In [1]:
# IMPORTS
import sys
import numpy as np
import pandas as pd
import yaml
from pathlib import Path
import glob
import os
from functools import partial

In [2]:
def get_config(file) -> dict:
    """
    Read in config file and return it as a dictionary.

    :parameter
    ----------
    file - String
        Location of config file
    
    :returns
    --------
    config - dict
        Configuration file in dictionary form.
    """
    try:
        with open(file, 'r') as stream:
            config = yaml.safe_load(stream)
    
        return config
    except FileNotFoundError as e:
        print(f"File: could not be found. Error {e}")
        sys.exit(1)

In [3]:
config = get_config("config.yaml")
data_dir = config['data']

# Load the data

First, load in the data from the year 2020. Then, load in the data of the year 2021 by combine all the seperate files into one data frame. 

In [4]:
file_2020 = Path(data_dir) / "2020" / "2020_NO2.csv"
df_2020 = pd.read_csv(file_2020, skiprows=7, sep=";")

In [5]:
df_2020.head()

Unnamed: 0,Component,Bep.periode,Eenheid,Begindatumtijd,Einddatumtijd,NL01485,NL01487,NL01488,NL01489,NL01491,...,NL49022,NL49546,NL49551,NL49553,NL49561,NL49564,NL49565,NL49701,NL49703,NL49704
0,NO2,uur,�g/m�,20200101 00:00,20200101 01:00,45.8,48.5,47.8,36.9,41.4,...,30.4,26.8,35.2,21.5,27.8,13.1,22.9,49.2,21.6,17.1
1,NO2,uur,�g/m�,20200101 01:00,20200101 02:00,32.3,55.8,45.1,43.4,47.4,...,24.6,33.7,16.4,16.0,24.3,24.0,30.2,54.3,21.2,26.6
2,NO2,uur,�g/m�,20200101 02:00,20200101 03:00,32.3,42.8,32.9,39.3,37.4,...,22.9,39.6,23.9,24.8,27.6,26.9,30.6,50.9,22.4,32.6
3,NO2,uur,�g/m�,20200101 03:00,20200101 04:00,25.4,40.3,32.1,26.4,37.1,...,20.4,31.1,22.9,22.7,29.5,28.5,27.9,38.8,22.3,28.4
4,NO2,uur,�g/m�,20200101 04:00,20200101 05:00,24.3,31.3,24.3,23.1,27.1,...,25.1,26.7,26.3,25.0,29.1,25.8,27.0,29.1,25.8,28.7


### Clean data

In [6]:
# Rename the date columns
df_2020.rename(columns = {"Begindatumtijd": "date_start", "Einddatumtijd": "date_end"}, inplace = True)

# Set the datatype of the date and time colum to datetime.
df_2020.date_start = df_2020.date_start.str.replace(" ", "").str.replace(":","")
df_2020.date_start = pd.to_datetime(df_2020.date_start.astype(str), format='%Y%m%d%H%M')

df_2020.date_end = df_2020.date_end.str.replace(" ", "").str.replace(":","")
df_2020.date_end = pd.to_datetime(df_2020.date_end.astype(str), format='%Y%m%d%H%M')

df_2020.head()

Unnamed: 0,Component,Bep.periode,Eenheid,date_start,date_end,NL01485,NL01487,NL01488,NL01489,NL01491,...,NL49022,NL49546,NL49551,NL49553,NL49561,NL49564,NL49565,NL49701,NL49703,NL49704
0,NO2,uur,�g/m�,2020-01-01 00:00:00,2020-01-01 01:00:00,45.8,48.5,47.8,36.9,41.4,...,30.4,26.8,35.2,21.5,27.8,13.1,22.9,49.2,21.6,17.1
1,NO2,uur,�g/m�,2020-01-01 01:00:00,2020-01-01 02:00:00,32.3,55.8,45.1,43.4,47.4,...,24.6,33.7,16.4,16.0,24.3,24.0,30.2,54.3,21.2,26.6
2,NO2,uur,�g/m�,2020-01-01 02:00:00,2020-01-01 03:00:00,32.3,42.8,32.9,39.3,37.4,...,22.9,39.6,23.9,24.8,27.6,26.9,30.6,50.9,22.4,32.6
3,NO2,uur,�g/m�,2020-01-01 03:00:00,2020-01-01 04:00:00,25.4,40.3,32.1,26.4,37.1,...,20.4,31.1,22.9,22.7,29.5,28.5,27.9,38.8,22.3,28.4
4,NO2,uur,�g/m�,2020-01-01 04:00:00,2020-01-01 05:00:00,24.3,31.3,24.3,23.1,27.1,...,25.1,26.7,26.3,25.0,29.1,25.8,27.0,29.1,25.8,28.7


### Create tidy data

In [7]:
# Example link: https://cmdlinetips.com/2019/06/reshaping-dataframes-with-pandas-melt-and-wide_to_long/
tidy_2020 = df_2020.melt(id_vars=["date_start", "date_end"],
            value_vars=df_2020.columns[5:],
                        var_name = "site",
                        value_name = "no2")
tidy_2020.head()

Unnamed: 0,date_start,date_end,site,no2
0,2020-01-01 00:00:00,2020-01-01 01:00:00,NL01485,45.8
1,2020-01-01 01:00:00,2020-01-01 02:00:00,NL01485,32.3
2,2020-01-01 02:00:00,2020-01-01 03:00:00,NL01485,32.3
3,2020-01-01 03:00:00,2020-01-01 04:00:00,NL01485,25.4
4,2020-01-01 04:00:00,2020-01-01 05:00:00,NL01485,24.3


In [79]:
# There is some missing data.
tidy_2020.isnull().sum().sum()

8794

In [14]:
# Grab the mean NO2 value per day for each site
mean_no2_day = tidy_2020.groupby(['site', tidy_2020['date_start'].dt.date]).agg({'no2': ['mean']})
mean_no2_day

Unnamed: 0_level_0,Unnamed: 1_level_0,no2
Unnamed: 0_level_1,Unnamed: 1_level_1,mean
site,date_start,Unnamed: 2_level_2
NL01485,2020-01-01,22.029167
NL01485,2020-01-02,18.970833
NL01485,2020-01-03,19.004167
NL01485,2020-01-04,39.550000
NL01485,2020-01-05,17.685714
...,...,...
NL49704,2020-12-27,18.854167
NL49704,2020-12-28,32.970833
NL49704,2020-12-29,36.295833
NL49704,2020-12-30,28.425000


In [8]:
tidy_2020['date_start'].dt.date

0         2020-01-01
1         2020-01-01
2         2020-01-01
3         2020-01-01
4         2020-01-01
             ...    
641227    2020-12-31
641228    2020-12-31
641229    2020-12-31
641230    2020-12-31
641231    2020-12-31
Name: date_start, Length: 641232, dtype: object

## Interpolation and taking average

"The daily data was interpolated using inverse distance weighted interpolation with a maximum distance of 0.05 degrees to fill in gaps between valid observations but avoiding extrapolation outside areas with valid observations. After interpolation, the daily observations were averaged over time to obtain June-August average NO2 levels." - https://www.greenpeace.org.au/research/new-satellite-data-reveals-worlds-largest-air-pollution-emission-hotspots-greenpeace-media-briefing/

In [None]:
# Interpolate the missing data, and take the avarage NO2 value per day.

In [12]:
def read_csv_file(file, *, skiprows = 0, sep = ",", encoding = None) -> pd.DataFrame:
    """
    Read in a csv file.
    
    :parameters
    -----------
    file - String
        File location
    skiprows - int
        Number of rows to skip
    sep - str
        Seperator
    encoding - str
        Encoding to use for UTF when reading/writing (ex. ‘utf-8’).
        
    :return
    -------
    df - pd.DataFrame
        Pandas data frame
    """
    df = pd.read_csv(file, skiprows=7, sep=";", encoding='unicode_escape')
    return df
   
# Map does not take keyword arugments: solution create a partial.
map_func = partial(read_csv_file, skiprows=7, sep=";", encoding='unicode_escape')

# Merging the files 2021 csv files
list_2021 = glob.glob(os.path.join(data_dir, "2021", "*.csv"))
df_2021 = pd.concat(map(map_func, list_2021), ignore_index=True)
df_2021

Unnamed: 0,Component,Bep.periode,Eenheid,Begindatumtijd,Einddatumtijd,NL01485,NL01487,NL01488,NL01489,NL01491,...,NL49022,NL49546,NL49551,NL49553,NL49561,NL49564,NL49565,NL49701,NL49703,NL49704
0,NO2,uur,µg/m³,20210101 00:00,20210101 01:00,41.4,57.3,50.2,46.4,58.1,...,39.4,54.6,30.3,19.6,47.5,35.0,43.4,45.8,42.0,51.5
1,NO2,uur,µg/m³,20210101 01:00,20210101 02:00,41.8,51.4,47.8,43.0,56.2,...,36.0,53.4,39.7,17.9,49.2,33.6,44.4,40.2,43.3,48.9
2,NO2,uur,µg/m³,20210101 02:00,20210101 03:00,35.5,51.0,45.1,40.7,52.0,...,42.2,54.1,8.9,3.9,53.0,35.6,44.9,47.6,41.9,50.5
3,NO2,uur,µg/m³,20210101 03:00,20210101 04:00,20.4,52.9,49.9,45.3,54.6,...,41.2,55.1,9.0,3.0,56.6,38.9,56.0,42.7,39.3,47.1
4,NO2,uur,µg/m³,20210101 04:00,20210101 05:00,14.5,46.2,45.6,43.5,54.4,...,48.8,56.4,12.5,2.4,58.3,40.3,55.8,46.1,41.8,47.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8011,NO2,uur,µg/m³,20211130 19:00,20211130 20:00,,,,,,...,,,,,,,,,,
8012,NO2,uur,µg/m³,20211130 20:00,20211130 21:00,,,,,,...,,,,,,,,,,
8013,NO2,uur,µg/m³,20211130 21:00,20211130 22:00,,,,,,...,,,,,,,,,,
8014,NO2,uur,µg/m³,20211130 22:00,20211130 23:00,,,,,,...,,,,,,,,,,


In [80]:
df_2021.isnull().sum().sum()

107994

## Meta data

In [23]:
meta_data = pd.read_csv(file_2020, nrows=6, sep=";", encoding='unicode_escape').iloc[:, 4:].T
# Set first row as column names
meta_data.columns = meta_data.iloc[0,:]

# Remove the first row
meta_data.drop("StationsCode", axis = 0, inplace = True)
# meta_data
meta_data

StationsCode,Stationsnaam,"Latitude,Longitude",Stationsgebied,Stationstype,Meetprincipe,Meetopstelling
NL01485,Hoogvliet-Leemkuil,"(51.867411,4.355242)",stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01487,Rotterdam Zuid-Pleinweg,"(51.891147,4.48069)",regionaal,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01488,Rotterdam Zuid-Zwartewaalstraat,"(51.893617,4.487528)",stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01489,Ridderkerk-Hogeweg,"(51.869431,4.580058)",stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01491,Rotterdam-Oost Sidelinge A13,"(51.938472,4.430692)",stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
...,...,...,...,...,...,...
NL49564,Hoofddorp-Hoofdweg,"(52.327464,4.715008)",onbekend,onbekend,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49565,Oude Meer-Aalsmeerderdijk,"(52.279991,4.770773)",regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49701,Zaandam-Wagenschotpad,"(52.448011,4.816706)",stad,achtergrond,Chemiluminescentie,Thermo model 42w NO/Nox analyser
NL49703,Amsterdam-Spaarnwoude,"(52.398437,4.728581)",regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser


### Clean meta data

In [24]:
# Seperate the latitude and longitude column and assign it to their own column
latitude_longitude = meta_data["Latitude,Longitude"].str.strip("()").str.split(",", n = 1, expand = True)

# Remove old Lat-long column
meta_data.drop("Latitude,Longitude", axis = 1, inplace=True)

latitude_longitude[[0,1]] = latitude_longitude[[0,1]].astype(float)

# Insert the chunk number column into the dataframe
meta_data.insert(1, column = "Latitude", value = latitude_longitude.iloc[:, 0])

# Insert the patient id column into the dataframe
meta_data.insert(2, column = "Longitude", value = latitude_longitude.iloc[:, 1])

In [25]:
meta_data

StationsCode,Stationsnaam,Latitude,Longitude,Stationsgebied,Stationstype,Meetprincipe,Meetopstelling
NL01485,Hoogvliet-Leemkuil,51.867411,4.355242,stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01487,Rotterdam Zuid-Pleinweg,51.891147,4.480690,regionaal,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01488,Rotterdam Zuid-Zwartewaalstraat,51.893617,4.487528,stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01489,Ridderkerk-Hogeweg,51.869431,4.580058,stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01491,Rotterdam-Oost Sidelinge A13,51.938472,4.430692,stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
...,...,...,...,...,...,...,...
NL49564,Hoofddorp-Hoofdweg,52.327464,4.715008,onbekend,onbekend,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49565,Oude Meer-Aalsmeerderdijk,52.279991,4.770773,regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49701,Zaandam-Wagenschotpad,52.448011,4.816706,stad,achtergrond,Chemiluminescentie,Thermo model 42w NO/Nox analyser
NL49703,Amsterdam-Spaarnwoude,52.398437,4.728581,regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser


In [14]:
test = pd.read_csv("test_file.csv", sep = ";")
test

Unnamed: 0,Component,Bep.periode,Eenheid,Begindatumtijd,Einddatumtijd,NL01485,NL01487,NL01488,NL01489,NL01491,...,NL49022,NL49546,NL49551,NL49553,NL49561,NL49564,NL49565,NL49701,NL49703,NL49704
0,NO2,uur,�g/m�,20210101 00:00,20210101 01:00,41.4,57.3,50.2,46.4,58.1,...,39.4,54.6,30.3,19.6,47.5,35.0,43.4,45.8,42.0,51.5
1,NO2,uur,�g/m�,20210101 01:00,20210101 02:00,41.8,51.4,47.8,43.0,56.2,...,36.0,53.4,39.7,17.9,49.2,33.6,44.4,40.2,43.3,48.9
2,NO2,uur,�g/m�,20210101 02:00,20210101 03:00,35.5,51.0,45.1,40.7,52.0,...,42.2,54.1,8.9,3.9,53.0,35.6,44.9,47.6,41.9,50.5
3,NO2,uur,�g/m�,20210101 03:00,20210101 04:00,20.4,52.9,49.9,45.3,54.6,...,41.2,55.1,9.0,3.0,56.6,38.9,56.0,42.7,39.3,47.1
4,NO2,uur,�g/m�,20210101 04:00,20210101 05:00,14.5,46.2,45.6,43.5,54.4,...,48.8,56.4,12.5,2.4,58.3,40.3,55.8,46.1,41.8,47.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,NO2,uur,�g/m�,20210131 19:00,20210131 20:00,37.1,31.3,30.4,21.8,27.5,...,21.2,18.5,18.8,16.3,19.4,24.6,14.9,12.9,18.1,17.9
740,NO2,uur,�g/m�,20210131 20:00,20210131 21:00,29.6,31.9,28.8,24.3,25.6,...,26.0,19.8,18.5,18.4,20.3,29.5,15.8,14.2,19.8,17.1
741,NO2,uur,�g/m�,20210131 21:00,20210131 22:00,28.5,27.1,25.6,22.2,24.4,...,20.9,18.8,18.2,20.0,18.5,28.1,15.2,14.6,19.5,18.2
742,NO2,uur,�g/m�,20210131 22:00,20210131 23:00,24.8,24.1,22.2,21.2,22.7,...,17.8,17.4,19.3,17.8,15.4,23.1,14.0,14.8,18.8,16.7


In [17]:
test_header = pd.read_csv("test_file_header.csv", sep = ";")
test_header

Unnamed: 0,Datum export,20210928 10:00,Unnamed: 2,Unnamed: 3,StationsCode,NL01485,NL01487,NL01488,NL01489,NL01491,...,NL49022,NL49546,NL49551,NL49553,NL49561,NL49564,NL49565,NL49701,NL49703,NL49704
0,Periode,20210101 00:00 - 20210201 00:00,,,Stationsnaam,Hoogvliet-Leemkuil,Rotterdam Zuid-Pleinweg,Rotterdam Zuid-Zwartewaalstraat,Ridderkerk-Hogeweg,Rotterdam-Oost Sidelinge A13,...,Amsterdam-Sportpark Ookmeer (Osdorp),Zaanstad-Hemkade,IJmuiden-Kanaaldijk,Wijk aan Zee-Burgemeester Rothestraat,Badhoevedorp-Sloterweg,Hoofddorp-Hoofdweg,Oude Meer-Aalsmeerderdijk,Zaandam-Wagenschotpad,Amsterdam-Spaarnwoude,Amsterdam-Hoogtij
1,Bron,https://data.rivm.nl/data/luchtmeetnet,,,"Latitude,Longitude","(51.867411,4.355242)","(51.891147,4.48069)","(51.893617,4.487528)","(51.869431,4.580058)","(51.938472,4.430692)",...,"(52.366811,4.793344)","(52.42023,4.83206)","(52.463039,4.601842)","(52.493992,4.601986)","(52.334003,4.774006)","(52.327464,4.715008)","(52.279991,4.770773)","(52.448011,4.816706)","(52.398437,4.728581)","(52.428017,4.773478)"
2,Beschrijving data,https://data.rivm.nl/data/luchtmeetnet/readme.pdf,,,Stationsgebied,stad,regionaal,stad,stad,stad,...,stad,stad,stad,stad,stad,onbekend,regionaal,stad,regionaal,stad
3,,,,,Stationstype,achtergrond,verkeer,achtergrond,verkeer,verkeer,...,achtergrond,industrie,industrie,industrie,achtergrond,onbekend,achtergrond,achtergrond,achtergrond,industrie
4,,,,,Meetprincipe,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,...,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie
5,,,,,Meetopstelling,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,...,Thermo model 42w NO/Nox analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Thermo model 42w NO/Nox analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser


In [44]:
meta_data = test_header.iloc[:, 5:].T
meta_data.columns = test_header.StationsCode
meta_data

StationsCode,Stationsnaam,"Latitude,Longitude",Stationsgebied,Stationstype,Meetprincipe,Meetopstelling
NL01485,Hoogvliet-Leemkuil,"(51.867411,4.355242)",stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01487,Rotterdam Zuid-Pleinweg,"(51.891147,4.48069)",regionaal,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01488,Rotterdam Zuid-Zwartewaalstraat,"(51.893617,4.487528)",stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01489,Ridderkerk-Hogeweg,"(51.869431,4.580058)",stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01491,Rotterdam-Oost Sidelinge A13,"(51.938472,4.430692)",stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
...,...,...,...,...,...,...
NL49564,Hoofddorp-Hoofdweg,"(52.327464,4.715008)",onbekend,onbekend,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49565,Oude Meer-Aalsmeerderdijk,"(52.279991,4.770773)",regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49701,Zaandam-Wagenschotpad,"(52.448011,4.816706)",stad,achtergrond,Chemiluminescentie,Thermo model 42w NO/Nox analyser
NL49703,Amsterdam-Spaarnwoude,"(52.398437,4.728581)",regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser


In [45]:
# Seperate the latitude and longitude column and assign it to their own column
latitude_longitude = meta_data["Latitude,Longitude"].str.strip("()").str.split(",", n = 1, expand = True)

# Remove old Lat-long column
meta_data.drop("Latitude,Longitude", axis = 1, inplace=True)

latitude_longitude[[0,1]] = latitude_longitude[[0,1]].astype(float)

# Insert the chunk number column into the dataframe
meta_data.insert(1, column = "Latitude", value = latitude_longitude.iloc[:, 0])

# Insert the patient id column into the dataframe
meta_data.insert(2, column = "Longitude", value = latitude_longitude.iloc[:, 1])

In [46]:
meta_data

StationsCode,Stationsnaam,Latitude,Longitude,Stationsgebied,Stationstype,Meetprincipe,Meetopstelling
NL01485,Hoogvliet-Leemkuil,51.867411,4.355242,stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01487,Rotterdam Zuid-Pleinweg,51.891147,4.480690,regionaal,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01488,Rotterdam Zuid-Zwartewaalstraat,51.893617,4.487528,stad,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01489,Ridderkerk-Hogeweg,51.869431,4.580058,stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL01491,Rotterdam-Oost Sidelinge A13,51.938472,4.430692,stad,verkeer,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
...,...,...,...,...,...,...,...
NL49564,Hoofddorp-Hoofdweg,52.327464,4.715008,onbekend,onbekend,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49565,Oude Meer-Aalsmeerderdijk,52.279991,4.770773,regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser
NL49701,Zaandam-Wagenschotpad,52.448011,4.816706,stad,achtergrond,Chemiluminescentie,Thermo model 42w NO/Nox analyser
NL49703,Amsterdam-Spaarnwoude,52.398437,4.728581,regionaal,achtergrond,Chemiluminescentie,Teledyne API 200E chemiluminescent Nox Analyser


In [81]:
pd.read_csv(Path(data_dir + "2021_01_NO2.csv"), nrows=6, sep=";", encoding='unicode_escape')

Unnamed: 0,Datum export,20210928 10:00,Unnamed: 2,Unnamed: 3,StationsCode,NL01485,NL01487,NL01488,NL01489,NL01491,...,NL49022,NL49546,NL49551,NL49553,NL49561,NL49564,NL49565,NL49701,NL49703,NL49704
0,Periode,20210101 00:00 - 20210201 00:00,,,Stationsnaam,Hoogvliet-Leemkuil,Rotterdam Zuid-Pleinweg,Rotterdam Zuid-Zwartewaalstraat,Ridderkerk-Hogeweg,Rotterdam-Oost Sidelinge A13,...,Amsterdam-Sportpark Ookmeer (Osdorp),Zaanstad-Hemkade,IJmuiden-Kanaaldijk,Wijk aan Zee-Burgemeester Rothestraat,Badhoevedorp-Sloterweg,Hoofddorp-Hoofdweg,Oude Meer-Aalsmeerderdijk,Zaandam-Wagenschotpad,Amsterdam-Spaarnwoude,Amsterdam-Hoogtij
1,Bron,https://data.rivm.nl/data/luchtmeetnet,,,"Latitude,Longitude","(51.867411,4.355242)","(51.891147,4.48069)","(51.893617,4.487528)","(51.869431,4.580058)","(51.938472,4.430692)",...,"(52.366811,4.793344)","(52.42023,4.83206)","(52.463039,4.601842)","(52.493992,4.601986)","(52.334003,4.774006)","(52.327464,4.715008)","(52.279991,4.770773)","(52.448011,4.816706)","(52.398437,4.728581)","(52.428017,4.773478)"
2,Beschrijving data,https://data.rivm.nl/data/luchtmeetnet/readme.pdf,,,Stationsgebied,stad,regionaal,stad,stad,stad,...,stad,stad,stad,stad,stad,onbekend,regionaal,stad,regionaal,stad
3,,,,,Stationstype,achtergrond,verkeer,achtergrond,verkeer,verkeer,...,achtergrond,industrie,industrie,industrie,achtergrond,onbekend,achtergrond,achtergrond,achtergrond,industrie
4,,,,,Meetprincipe,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,...,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie
5,,,,,Meetopstelling,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,...,Thermo model 42w NO/Nox analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Thermo model 42w NO/Nox analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser


In [4]:
pd.read_csv(Path(data_dir + "2020_NO2.csv"), nrows=6, sep=";", encoding='unicode_escape')

Unnamed: 0,Datum export,20210316 13:33,Unnamed: 2,Unnamed: 3,StationsCode,NL01485,NL01487,NL01488,NL01489,NL01491,...,NL49022,NL49546,NL49551,NL49553,NL49561,NL49564,NL49565,NL49701,NL49703,NL49704
0,Periode,20200101 00:00 - 20210101 00:00,,,Stationsnaam,Hoogvliet-Leemkuil,Rotterdam Zuid-Pleinweg,Rotterdam Zuid-Zwartewaalstraat,Ridderkerk-Hogeweg,Rotterdam-Oost Sidelinge A13,...,Amsterdam-Sportpark Ookmeer (Osdorp),Zaanstad-Hemkade,IJmuiden-Kanaaldijk,Wijk aan Zee-Burgemeester Rothestraat,Badhoevedorp-Sloterweg,Hoofddorp-Hoofdweg,Oude Meer-Aalsmeerderdijk,Zaandam-Wagenschotpad,Amsterdam-Spaarnwoude,Amsterdam-Hoogtij
1,Bron,https://data.rivm.nl/data/luchtmeetnet,,,"Latitude,Longitude","(51.867411,4.355242)","(51.891147,4.48069)","(51.893617,4.487528)","(51.869431,4.580058)","(51.938472,4.430692)",...,"(52.366811,4.793344)","(52.42023,4.83206)","(52.463039,4.601842)","(52.493992,4.601986)","(52.334003,4.774006)","(52.327464,4.715008)","(52.279991,4.770773)","(52.448011,4.816706)","(52.398437,4.728581)","(52.428017,4.773478)"
2,Beschrijving data,https://data.rivm.nl/data/luchtmeetnet/readme.pdf,,,Stationsgebied,stad,regionaal,stad,stad,stad,...,stad,stad,stad,stad,stad,onbekend,regionaal,stad,regionaal,stad
3,,,,,Stationstype,achtergrond,verkeer,achtergrond,verkeer,verkeer,...,achtergrond,industrie,industrie,industrie,achtergrond,onbekend,achtergrond,achtergrond,achtergrond,industrie
4,,,,,Meetprincipe,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,...,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie,Chemiluminescentie
5,,,,,Meetopstelling,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,...,Thermo model 42w NO/Nox analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser,Thermo model 42w NO/Nox analyser,Teledyne API 200E chemiluminescent Nox Analyser,Teledyne API 200E chemiluminescent Nox Analyser


# Test wide to long

In [47]:
data_url = "https://goo.gl/ioc2Td"
gapminder = pd.read_csv(data_url)
print(gapminder.head(3))

  continent  country  gdpPercap_1952  gdpPercap_1957  gdpPercap_1962  \
0    Africa  Algeria     2449.008185     3013.976023     2550.816880   
1    Africa   Angola     3520.610273     3827.940465     4269.276742   
2    Africa    Benin     1062.752200      959.601080      949.499064   

   gdpPercap_1967  gdpPercap_1972  gdpPercap_1977  gdpPercap_1982  \
0     3246.991771     4182.663766     4910.416756     5745.160213   
1     5522.776375     5473.288005     3008.647355     2756.953672   
2     1035.831411     1085.796879     1029.161251     1277.897616   

   gdpPercap_1987  ...    pop_1962    pop_1967    pop_1972    pop_1977  \
0     5681.358539  ...  11000948.0  12760499.0  14760787.0  17152804.0   
1     2430.208311  ...   4826015.0   5247469.0   5894858.0   6162675.0   
2     1225.856010  ...   2151895.0   2427334.0   2761407.0   3168267.0   

     pop_1982    pop_1987    pop_1992    pop_1997  pop_2002  pop_2007  
0  20033753.0  23254956.0  26298373.0  29072015.0  31287142  3333

In [48]:
lifeExp = gapminder.loc[:, gapminder.columns.str.contains('^life|^c')]
print(lifeExp.head(n=3))

  continent  country  lifeExp_1952  lifeExp_1957  lifeExp_1962  lifeExp_1967  \
0    Africa  Algeria        43.077        45.685        48.303        51.407   
1    Africa   Angola        30.015        31.999        34.000        35.985   
2    Africa    Benin        38.223        40.358        42.618        44.885   

   lifeExp_1972  lifeExp_1977  lifeExp_1982  lifeExp_1987  lifeExp_1992  \
0        54.518        58.014        61.368        65.799        67.744   
1        37.928        39.483        39.942        39.906        40.647   
2        47.014        49.190        50.904        52.337        53.919   

   lifeExp_1997  lifeExp_2002  lifeExp_2007  
0        69.152        70.994        72.301  
1        40.963        41.003        42.731  
2        54.777        54.406        56.728  


In [50]:
gapminder_tidy = lifeExp.melt(id_vars=["continent", "country"], 
                              var_name="year", 
                              value_name="lifeExp")
gapminder_tidy.head(n=10)

Unnamed: 0,continent,country,year,lifeExp
0,Africa,Algeria,lifeExp_1952,43.077
1,Africa,Angola,lifeExp_1952,30.015
2,Africa,Benin,lifeExp_1952,38.223
3,Africa,Botswana,lifeExp_1952,47.622
4,Africa,Burkina Faso,lifeExp_1952,31.975
5,Africa,Burundi,lifeExp_1952,39.031
6,Africa,Cameroon,lifeExp_1952,38.523
7,Africa,Central African Republic,lifeExp_1952,35.463
8,Africa,Chad,lifeExp_1952,38.092
9,Africa,Comoros,lifeExp_1952,40.715
