In [3]:
import pandas as pd
import requests

In [42]:
def get_data(data_url: str, cbs = False):
    """Function to retrieve datasets from the internet

    Parameters
    ----------
    data_url : string
        The url that points to the dataset

    cbs: bool
        Whether the data is hosted by CBS

    Returns
    -------
    pd.DataFrame
        Dataframe containing data retrieved from data_url
    """    
    data = pd.DataFrame()
    if cbs:
        while data_url:
            r = requests.get(data_url).json()
            data = data.append(pd.DataFrame(r['value']))
            
            if '@odata.nextLink' in r:
                data_url = r['@odata.nextLink']
            else:
                data_url = None
    else:
        r = requests.get(data_url).json()
        data = data.append(pd.DataFrame(r))
    return data

In [22]:
covid19_link = 'https://data.rivm.nl/covid-19/COVID-19_aantallen_gemeente_cumulatief.json'
cbs_link = "https://beta-odata4.cbs.nl/CBS/70072ned"


In [51]:
covid19_df = get_data(covid19_link)

In [67]:
cbs_kerncijfers = get_data(f"{cbs_link}/Observations", cbs=True)

In [72]:
cbs_kerncijfers = cbs_kerncijfers[cbs_kerncijfers['RegioS'].str.contains('GM')]
cbs_kerncijfers = cbs_kerncijfers[cbs_kerncijfers['Perioden'].str.contains('202')]

In [76]:
cbs_kerncijfers_meta.to_csv('Data/CBS_kerncijfers.csv')

In [58]:
groups = get_data(cbs_link + "/MeasureGroups", cbs = True)
codes = get_data(cbs_link + "/MeasureCodes", cbs=True)

In [77]:
cbs_kerncijfers_meta = pd.merge(cbs_kerncijfers, codes, left_on="Measure",right_on="Identifier")


In [79]:
cbs_kerncijfers_meta.to_csv('Data/CBS_kerncijfers.csv')

In [93]:
cbs_kerncijfers_meta = cbs_kerncijfers_meta[cbs_kerncijfers_meta['Perioden'].str.contains('2020')]

In [97]:
cbs_kerncijfers_meta[(cbs_kerncijfers_meta['RegioS']=='GM0003')&(cbs_kerncijfers_meta['Title']=='5 tot 10 jaar')]

Unnamed: 0,Id,Measure,ValueAttribute,Value,RegioS,Perioden,Identifier,Index,Title,Description,MeasureGroupId,DataType,Unit,Decimals,PresentationType
5848,597472,70200_1,,561.0,GM0003,2020JJ00,70200_1,11,5 tot 10 jaar,,10000_2,Double,aantal,0,Absolute
18862,597481,70200_2,,4.8,GM0003,2020JJ00,70200_2,21,5 tot 10 jaar,,10000_3,Double,%,1,Relative


In [96]:
cbs_kerncijfers_meta.groupby(['RegioS','Perioden','Title']).size().value_counts()

1    106394
2     20967
3     12291
4       723
dtype: int64

In [99]:
cbs_kerncijfers_meta_pivot = cbs_kerncijfers_meta.pivot(index=['RegioS','Perioden'],columns=['Identifier','Title','Description'],values='Value')

In [100]:
cbs_kerncijfers_meta_pivot.to_csv('Data/CBS_kerncijfers_pivot.csv')