# Public Health England, Covid-19 API

We are going to work with API of Covid-19 in the UK provided by Public Health England. The API has a python wrapper, so we do not have to code by ourselves, but to get used to the process of accessing API, we first access to the API without the wrapper, and then use the package afterwords.

API documentation: https://coronavirus.data.gov.uk/details/developers-guide

API endpoint: https://api.coronavirus.data.gov.uk/v1/data

uk-covid-19 package documentation: https://pypi.org/project/uk-covid19/


## Load packages

In [None]:
from urllib.request import urlopen
import json
import gzip
import pandas as pd

## Load example URL

In [None]:
example_url = "https://api.coronavirus.data.gov.uk/v1/data?filters=areaType=nation;areaName=england&structure={%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22newCasesByPublishDate%22:%22newCasesByPublishDate%22,%22cumCasesByPublishDate%22:%22cumCasesByPublishDate%22,%22newDeaths28DaysByPublishDate%22:%22newDeaths28DaysByPublishDate%22,%22cumDeaths28DaysByPublishDate%22:%22cumDeaths28DaysByPublishDate%22}"

In [None]:
response = urlopen(example_url)
js = json.loads(response)

In [None]:
response = urlopen(example_url)
content_gz = response.read() # read the response (in tar.gz format)
content_text = gzip.decompress(content_gz) # decompress
dic_nat = json.loads(content_text.decode('utf-8'))

In [None]:
dic_nat['length']

In [None]:
dic_nat['data'][0]

In [None]:
date = [item['date'] for item in dic_nat['data']]

In [None]:
newCasesByPublishDate = [item['newCasesByPublishDate'] for item in dic_nat['data']]

In [None]:
print(json.dumps(dic_nat, indent=4))

In [None]:
dic_nat['data'][0]

In [None]:
pd.DataFrame(dic_nat['data'][0], index=[0])

In [None]:
list_df = [pd.DataFrame(item, index=[0]) for item in dic_nat['data']]

In [None]:
list_df = [pd.DataFrame(item, index=[0]) for item in dic_nat['data']]
df_england  = pd.concat(list_df, ignore_index=True)
df_england.head(20)

## Make functions to get the data

### Function to construct the filter string

- we will create a function to create a string like `areaType=nation;areaName=england`, from a dictionary `{'areaType':'nation', 'areaName': 'england'}`

In [None]:
def construct_filter(filters):
  key_val_list = [key + "=" + value for key, value in filters.items()]
  return(str.join(';', key_val_list))

In [None]:
construct_filter({'areaType':'nation', 'areaName': 'england'})

### Function to construct the URL

In [None]:
def construct_url(filters):
  filter_str = construct_filter(filters)
  # url_template = "https://api.coronavirus.data.gov.uk/v1/data?filters={filterVal}&structure={%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22newCasesByPublishDate%22:%22newCasesByPublishDate%22,%22cumCasesByPublishDate%22:%22cumCasesByPublishDate%22,%22newDeaths28DaysByPublishDate%22:%22newDeaths28DaysByPublishDate%22,%22cumDeaths28DaysByPublishDate%22:%22cumDeaths28DaysByPublishDate%22}"
  # full_url = url_template.format(filterVal = filters)
  full_url = "https://api.coronavirus.data.gov.uk/v1/data?filters=" + filter_str + "&structure={%22date%22:%22date%22,%22areaName%22:%22areaName%22,%22areaCode%22:%22areaCode%22,%22newCasesByPublishDate%22:%22newCasesByPublishDate%22,%22cumCasesByPublishDate%22:%22cumCasesByPublishDate%22,%22newDeaths28DaysByPublishDate%22:%22newDeaths28DaysByPublishDate%22,%22cumDeaths28DaysByPublishDate%22:%22cumDeaths28DaysByPublishDate%22}"
  return(full_url)

In [None]:
current_url = construct_url({'areaType':'nation',
                            'areaName': 'england'})
current_url

### Using the URL function, get the data again

In [None]:
response = urlopen(current_url)
content_gz = response.read()
content_text = gzip.decompress(content_gz)
dic_nat = json.loads(content_text.decode('utf-8'))
dic_nat['data'][0:3]

### Function to get the data

In [None]:
def get_data(filters):
  c_url = construct_url(filters)
  response = urlopen(c_url)
  content_gz = response.read()
  content_text = gzip.decompress(content_gz)
  dic_nat = json.loads(content_text.decode('utf-8'))
  list_df = [pd.DataFrame(item, index=[0]) for item in dic_nat['data']]
  df_out  = pd.concat(list_df, ignore_index=True)
  return(df_out)


In [None]:
df_new = get_data({'areaType':'nation','areaName': 'england'})
df_new.head()

In [None]:
#df_colchester = get_data({'areaType':'ltla', 'areaName':'colchester'})
#get_data({'areaType':'ltla', 'date':'2021-11-01'})
df_colchester2 = get_data({'areaType':'ltla', 'areaCode':'E07000071'})



In [None]:
df_colchester2.head()

In [None]:
#sns.lineplot(data = df_national, x = "date", y = "newCasesByPublishDate" )
import seaborn as sns
df_colchester2.date
df_colchester2['date2'] = pd.to_datetime(df_colchester2.date)
sns.lineplot(data = df_colchester2, x = "date2", y = "newCasesByPublishDate" )


## Use the package

In [None]:
!pip install uk-covid19

In [None]:
from uk_covid19 import Cov19API

In [None]:
c_structure = {
    "date": "date",
    "areaName": "areaName",
    "areaCode": "areaCode",
    "newCasesByPublishDate": "newCasesByPublishDate",
    "cumCasesByPublishDate": "cumCasesByPublishDate",
    "newDeaths28DaysByDeathDate": "newDeaths28DaysByDeathDate",
    "cumDeaths28DaysByDeathDate": "cumDeaths28DaysByDeathDate"
}

In [None]:
c_filters = ['areaType=ltla',
             'date=2020-11-01']

In [None]:
api = Cov19API(filters=c_filters, structure=c_structure)
df_covid_2 = api.get_dataframe()

In [None]:
df_covid_2