In [1]:
!wget https://api.ukhsa-dashboard.data.gov.uk/themes/infectious_disease/sub_themes/respiratory/topics/COVID-19/geography_types/Nation/geographies/England/metrics/COVID-19_cases_casesByDay


--2024-11-20 22:11:56--  https://api.ukhsa-dashboard.data.gov.uk/themes/infectious_disease/sub_themes/respiratory/topics/COVID-19/geography_types/Nation/geographies/England/metrics/COVID-19_cases_casesByDay
Resolving api.ukhsa-dashboard.data.gov.uk (api.ukhsa-dashboard.data.gov.uk)... 108.138.217.88, 108.138.217.92, 108.138.217.42, ...
Connecting to api.ukhsa-dashboard.data.gov.uk (api.ukhsa-dashboard.data.gov.uk)|108.138.217.88|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2061 (2.0K) [application/json]
Saving to: ‘COVID-19_cases_casesByDay’


2024-11-20 22:11:56 (7.67 MB/s) - ‘COVID-19_cases_casesByDay’ saved [2061/2061]



In [2]:
import requests
import time

class APIwrapper:
    # class variables shared among all instances
    _access_point="https://api.ukhsa-dashboard.data.gov.uk"
    _last_access=0.0 # time of last api access
    
    def __init__(self, theme, sub_theme, topic, geography_type, geography, metric):
       
        # build the path with all the required structure parameters. You do not need to edit this line,
        # parameters will be replaced by the actual values when you instantiate an object of the class!
        url_path=(f"/themes/{theme}/sub_themes/{sub_theme}/topics/{topic}/geography_types/" +
                  f"{geography_type}/geographies/{geography}/metrics/{metric}")
        # our starting API endpoint
        self._start_url=APIwrapper._access_point+url_path
        self._filters=None
        self._page_size=-1
        # will contain the number of items
        self.count=None

    def error_detection(self, url, params):
         try:
             response = requests.get(url, params=params)
             response.raise_for_status()
             return response
         except requests.exceptions.RequestException as error:
             #here we are making use of the RequestException class 
             print(f"Unable to access API due to {error}")
             return {"response": [], "next": None, "count": 0}
         

    def get_page(self, filters={}, page_size=5):
     
        if page_size>365:
            raise ValueError("Max supported page size is 365")
        if filters!=self._filters or page_size!=self._page_size:
            self._filters=filters
            self._page_size=page_size
            self._next_url=self._start_url
        if self._next_url==None: 
            return [] # we already fetched the last page

        curr_time=time.time() # Unix time: number of seconds since the Epoch
        deltat=curr_time-APIwrapper._last_access
        if deltat<0.33: # max 3 requests/second
            time.sleep(0.33-deltat)
        APIwrapper._last_access=curr_time
       
        parameters={x: y for x, y in filters.items() if y!=None}
        parameters['page_size']=page_size
      
        response = self.error_detection(self._next_url, params=parameters).json()
   
        self._next_url=response['next']
        self.count=response['count']
        # data are in the nested 'results' list
        return response['results'] 

    def get_all_pages(self, filters={}, page_size=365):
       
        data=[] # build up all data here
        while True:
            # use get_page to do the job, including the pacing
            next_page=self.get_page(filters, page_size)
            if next_page==[]:
                break # we are done
            data.extend(next_page)
        return data





# {"theme":"infectious_disease","sub_theme":"respiratory","topic":"Influenza","geography_type":"Nation",
#  "geography":"England","metric":"influenza_testing_positivityByWeek","age":"all","stratum":"default","sex":"all",
#  "year":2024,"date":"2024-11-04","metric_value":"3.5600","in_reporting_delay_period":false}

structure1={"theme": "infectious_disease", 
           "sub_theme": "respiratory",
           "topic": "Influenza",
           "geography_type": "Nation", 
           "geography": "England",
           "metric": "influenza_testing_positivityByWeek" 
          }

# {"theme":"infectious_disease","sub_theme":"respiratory","topic":"Influenza","geography_type":"Nation",
# "geography":"England","metric":"influenza_healthcare_ICUHDUadmissionRateByWeek","age":"all","stratum":"default",
# "sex":"all","year":2024,"date":"2024-11-04","metric_value":"0.0300","in_reporting_delay_period":false}


# {"theme":"infectious_disease","sub_theme":"respiratory","topic":"Influenza","geography_type":"Nation","geography":"England",
#  "metric":"influenza_healthcare_ICUHDUadmissionRateByWeek","age":"00-04","stratum":"default","sex":"all",
#  "year":2024,"date":"2024-11-04","metric_value":"0.0800",
#  "in_reporting_delay_period":false},
structure2={
           "theme": "infectious_disease", 
           "sub_theme": "respiratory",
           "topic": "Influenza",
           "geography_type": "Nation", 
           "geography": "England",
           "metric": "influenza_healthcare_ICUHDUadmissionRateByWeek" ,
           "age": "00-04"
}





api=APIwrapper(**structure1)
data=api.get_page() # default size is 5
print(api.count)
print(data)

3240
[{'theme': 'infectious_disease', 'sub_theme': 'respiratory', 'topic': 'Influenza', 'geography_type': 'Nation', 'geography': 'England', 'geography_code': 'E92000001', 'metric': 'influenza_testing_positivityByWeek', 'metric_group': 'testing', 'stratum': 'default', 'sex': 'all', 'age': '00-04', 'year': 2017, 'month': 7, 'epiweek': 27, 'date': '2017-07-03', 'metric_value': 0.35, 'in_reporting_delay_period': False}, {'theme': 'infectious_disease', 'sub_theme': 'respiratory', 'topic': 'Influenza', 'geography_type': 'Nation', 'geography': 'England', 'geography_code': 'E92000001', 'metric': 'influenza_testing_positivityByWeek', 'metric_group': 'testing', 'stratum': 'default', 'sex': 'all', 'age': '05-14', 'year': 2017, 'month': 7, 'epiweek': 27, 'date': '2017-07-03', 'metric_value': 0.0, 'in_reporting_delay_period': False}, {'theme': 'infectious_disease', 'sub_theme': 'respiratory', 'topic': 'Influenza', 'geography_type': 'Nation', 'geography': 'England', 'geography_code': 'E92000001', 'm

In [3]:

api=APIwrapper(**structure1)
positive_cases=api.get_all_pages()
api2= APIwrapper(**structure2)
admissions=api2.get_all_pages()
api3 = APIwrapper(**structure1)
positive_cases_by_age=api3.get_all_pages()


# print(positive_cases)
print(f"Data points expected: {api.count}")
print(f"Data points retrieved: {len(positive_cases)}")

Data points expected: 3240
Data points retrieved: 3240


In [4]:
import json
with open("positive_cases.json", "wt") as OUTF:
    json.dump(positive_cases, OUTF)


In [5]:
import json
with open("admissions.json", "wt") as OUTF:
    json.dump(admissions, OUTF)