In [3]:
import pandas as pd
import requests

In [42]:
def get_data(data_url: str, cbs = False):
    """Function to retrieve datasets from the internet

    Parameters
    ----------
    data_url : string
        The url that points to the dataset

    cbs: bool
        Whether the data is hosted by CBS

    Returns
    -------
    pd.DataFrame
        Dataframe containing data retrieved from data_url
    """    
    data = pd.DataFrame()
    if cbs:
        while data_url:
            r = requests.get(data_url).json()
            data = data.append(pd.DataFrame(r['value']))
            
            if '@odata.nextLink' in r:
                data_url = r['@odata.nextLink']
            else:
                data_url = None
    else:
        r = requests.get(data_url).json()
        data = data.append(pd.DataFrame(r))
    return data

In [22]:
covid19_link = 'https://data.rivm.nl/covid-19/COVID-19_aantallen_gemeente_cumulatief.json'
cbs_link = "https://beta-odata4.cbs.nl/CBS/70072ned"


In [51]:
covid19_df = get_data(covid19_link)

In [67]:
cbs_kerncijfers = get_data(f"{cbs_link}/Observations", cbs=True)

In [72]:
cbs_kerncijfers = cbs_kerncijfers[cbs_kerncijfers['RegioS'].str.contains('GM')]
cbs_kerncijfers = cbs_kerncijfers[cbs_kerncijfers['Perioden'].str.contains('202')]

In [76]:
cbs_kerncijfers.to_csv('Data/CBS_kerncijfers.csv')

In [58]:
groups = get_data(cbs_link + "/MeasureGroups", cbs = True)
codes = get_data(cbs_link + "/MeasureCodes", cbs=True)

In [77]:
cbs_kerncijfers_meta = pd.merge(cbs_kerncijfers, codes, left_on="Measure",right_on="Identifier")


In [78]:
cbs_kerncijfers_meta.head()

Unnamed: 0,Id,Measure,ValueAttribute,Value,RegioS,Perioden,Identifier,Index,Title,Description,MeasureGroupId,DataType,Unit,Decimals,PresentationType
0,382099,M000352_3,,25445.0,GM1680,2020JJ00,M000352_3,4,Totale bevolking,Bevolking op 1 januari. Betreft het geregistre...,M000352_2,Double,aantal,0,Absolute
1,382350,M000352_3,,25399.0,GM1680,2021JJ00,M000352_3,4,Totale bevolking,Bevolking op 1 januari. Betreft het geregistre...,M000352_2,Double,aantal,0,Absolute
2,388739,M000352_3,Impossible,,GM0738,2020JJ00,M000352_3,4,Totale bevolking,Bevolking op 1 januari. Betreft het geregistre...,M000352_2,Double,aantal,0,Absolute
3,389008,M000352_3,Impossible,,GM0738,2021JJ00,M000352_3,4,Totale bevolking,Bevolking op 1 januari. Betreft het geregistre...,M000352_2,Double,aantal,0,Absolute
4,395396,M000352_3,,31859.0,GM0358,2020JJ00,M000352_3,4,Totale bevolking,Bevolking op 1 januari. Betreft het geregistre...,M000352_2,Double,aantal,0,Absolute


In [65]:
groups

Unnamed: 0,Id,Index,Title,Description,ParentId
0,10000_1,8,Leeftijd,Leeftijd per 1 januari.,M000352_2
1,10000_2,9,Leeftijdsgroepen,,10000_1
2,10000_3,19,"Leeftijdsgroepen, relatief",,10000_1
3,1050010_1,107,Particuliere huishoudens,Particuliere huishoudens bestaan uit één of me...,M000352_1
4,1050010_2,108,Particuliere huishoudens,,1050010_1
...,...,...,...,...,...
76,T001132,124,Woningen naar eigendom,Peildatum: 1 januari van het betreffende jaar....,M000297
77,T001455_1,316,Bodemgebruik,,M000348
78,T001455_2,317,Oppervlakte,De gegevens over de totale oppervlakte volgens...,T001455_1
79,T001455_4,326,Oppervlakte,,M000344_1


In [45]:
test

Unnamed: 0,Id,Measure,ValueAttribute,Value,RegioS,Perioden
0,0,M000352_3,,15424122.0,NL01,1995JJ00
1,1,3000,,7627482.0,NL01,1995JJ00
2,2,4000,,7796640.0,NL01,1995JJ00
3,3,40000_1,,988709.0,NL01,1995JJ00
4,4,70200_1,,945288.0,NL01,1995JJ00
...,...,...,...,...,...,...
95,95,1013400_2,Impossible,,NL01,1995JJ00
96,96,1013601_2,Impossible,,NL01,1995JJ00
97,97,1013650_2,Impossible,,NL01,1995JJ00
98,98,1014800_2,Impossible,,NL01,1995JJ00


In [48]:
get_data("aObservations?$top=100", cbs = True)

Unnamed: 0,Id,Measure,ValueAttribute,Value,WijkenEnBuurten
0,0,T001036,,17407585.0,NL00
1,1,3000,,8648031.0,NL00
2,2,4000,,8759554.0,NL00
3,3,10680,,2726099.0,NL00
4,4,53050,,2143743.0,NL00
...,...,...,...,...,...
95,95,1016040,,4361.0,GM1680
96,96,1016030,,3563.0,GM1680
97,97,M000114,,2.2,GM1680
98,98,M000100,,92.0,GM1680


In [None]:
get_

In [35]:
pd.DataFrame(requests.get("https://opendata.cbs.nl/ODataFeed/odata/84583NED/TypedDataSet").json()['value'])

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [27]:
test['value']

0     {'Id': 0, 'Measure': 'M000352_3', 'ValueAttrib...
1     {'Id': 1, 'Measure': '3000', 'ValueAttribute':...
2     {'Id': 2, 'Measure': '4000', 'ValueAttribute':...
3     {'Id': 3, 'Measure': '40000_1', 'ValueAttribut...
4     {'Id': 4, 'Measure': '70200_1', 'ValueAttribut...
                            ...                        
95    {'Id': 95, 'Measure': '1013400_2', 'ValueAttri...
96    {'Id': 96, 'Measure': '1013601_2', 'ValueAttri...
97    {'Id': 97, 'Measure': '1013650_2', 'ValueAttri...
98    {'Id': 98, 'Measure': '1014800_2', 'ValueAttri...
99    {'Id': 99, 'Measure': '1014850_3', 'ValueAttri...
Name: value, Length: 100, dtype: object

In [14]:
covid19_df

Unnamed: 0,Date_of_report,Municipality_code,Municipality_name,Province,Total_reported,Hospital_admission,Deceased
0,2020-03-13 10:00:00,GM0003,Appingedam,Groningen,0,0,0
1,2020-03-13 10:00:00,GM0010,Delfzijl,Groningen,0,0,0
2,2020-03-13 10:00:00,GM0014,Groningen,Groningen,3,0,0
3,2020-03-13 10:00:00,GM0024,Loppersum,Groningen,0,0,0
4,2020-03-13 10:00:00,GM0034,Almere,Flevoland,1,1,0
...,...,...,...,...,...,...,...
221843,2021-11-09 10:00:00,,,Noord-Holland,1785,24,2
221844,2021-11-09 10:00:00,,,Zuid-Holland,3616,68,15
221845,2021-11-09 10:00:00,,,Zeeland,118,5,1
221846,2021-11-09 10:00:00,,,Noord-Brabant,1792,38,8


In [16]:
covid19_df[covid19_df['Municipality_code'] == 'GM0003']

Unnamed: 0,Date_of_report,Municipality_code,Municipality_name,Province,Total_reported,Hospital_admission,Deceased
0,2020-03-13 10:00:00,GM0003,Appingedam,Groningen,0,0,0
367,2020-03-14 10:00:00,GM0003,Appingedam,Groningen,0,0,0
734,2020-03-15 10:00:00,GM0003,Appingedam,Groningen,0,0,0
1101,2020-03-16 10:00:00,GM0003,Appingedam,Groningen,0,0,0
1468,2020-03-17 10:00:00,GM0003,Appingedam,Groningen,0,0,0
...,...,...,...,...,...,...,...
108265,2021-01-02 10:00:00,GM0003,Appingedam,Groningen,305,9,1
108632,2021-01-03 10:00:00,GM0003,Appingedam,Groningen,307,9,1
108999,2021-01-04 10:00:00,GM0003,Appingedam,Groningen,312,9,1
109366,2021-01-05 10:00:00,GM0003,Appingedam,Groningen,321,9,2
