In [10]:
import pandas as pd
import seaborn as sns
import openaq

In [11]:
api = openaq.OpenAQ()

In [12]:
stations = api.locations(city=['Antwerpen', 'Paris', 'London'], df=True)

In [13]:
stations.head()

Unnamed: 0,city,coordinates.latitude,coordinates.longitude,count,country,distance,firstUpdated,lastUpdated,location,parameters,sourceName,sourceNames
0,Antwerpen,51.236195,4.385224,4179,BE,5902293,2017-09-22 01:00:00+00:00,2019-08-05 03:00:00+00:00,BELAL01,"[pm10, pm25]",EEA Belgium,[EEA Belgium]
1,Antwerpen,51.1703,4.341005,8052,BE,5902428,2017-09-22 01:00:00+00:00,2019-08-05 03:00:00+00:00,BELHB23,"[so2, pm10, no2, pm25]",EEA Belgium,[EEA Belgium]
2,Antwerpen,51.109978,5.004864,1641,BE,5947480,2017-09-22 01:00:00+00:00,2019-01-09 01:00:00+00:00,BELLD01,[no2],EEA Belgium,[EEA Belgium]
3,Antwerpen,51.120384,5.021546,1973,BE,5948067,2017-09-22 01:00:00+00:00,2019-08-05 03:00:00+00:00,BELLD02,[no2],EEA Belgium,[EEA Belgium]
4,Antwerpen,51.32766,4.362261,1923,BE,5896736,2017-09-23 01:00:00+00:00,2019-08-05 03:00:00+00:00,BELR833,[no2],EEA Belgium,[EEA Belgium]


In [14]:
parameters = api.parameters(df=True)
parameters

Unnamed: 0,description,id,name,preferredUnit
0,Black Carbon,bc,BC,µg/m³
1,Carbon Monoxide,co,CO,ppm
2,Nitrogen Dioxide,no2,NO2,ppm
3,Ozone,o3,O3,ppm
4,Particulate matter less than 10 micrometers in...,pm10,PM10,µg/m³
5,Particulate matter less than 2.5 micrometers i...,pm25,PM2.5,µg/m³
6,Sulfur Dioxide,so2,SO2,ppm


Pick location in Londen, Antwerp and France: 'London Westminster', 'BETR801', 'FR04014'

In [15]:
stations = ['FR04014', 'BETR801', 'London Westminster']

## Prepare short format example data set

In [16]:
no2_stations = [api.measurements(location=station, parameter='no2', 
                                 date_to='2019-06-21', 
                                 limit=10000, df=True) for station in stations]

In [17]:
air_quality_data = pd.concat(no2_stations)

In [18]:
air_quality_data.shape

(2068, 9)

In [19]:
air_quality_data.head()

Unnamed: 0_level_0,city,coordinates.latitude,coordinates.longitude,country,date.utc,location,parameter,unit,value
date.local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-06-21 02:00:00,Paris,48.837243,2.393902,FR,2019-06-21 00:00:00+00:00,FR04014,no2,b'\xc2\xb5g/m\xc2\xb3',20.0
2019-06-21 01:00:00,Paris,48.837243,2.393902,FR,2019-06-20 23:00:00+00:00,FR04014,no2,b'\xc2\xb5g/m\xc2\xb3',21.8
2019-06-21 00:00:00,Paris,48.837243,2.393902,FR,2019-06-20 22:00:00+00:00,FR04014,no2,b'\xc2\xb5g/m\xc2\xb3',26.5
2019-06-20 23:00:00,Paris,48.837243,2.393902,FR,2019-06-20 21:00:00+00:00,FR04014,no2,b'\xc2\xb5g/m\xc2\xb3',24.9
2019-06-20 22:00:00,Paris,48.837243,2.393902,FR,2019-06-20 20:00:00+00:00,FR04014,no2,b'\xc2\xb5g/m\xc2\xb3',21.4


In [20]:
no2_data = air_quality_data.pivot(columns='location', values='value')
no2_data.columns = ["station_antwerp", "station_paris", "station_london"]
no2_data.index.name = 'datetime'

In [21]:
no2_data.head()

Unnamed: 0_level_0,station_antwerp,station_paris,station_london
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-05-07 02:00:00,,,23.0
2019-05-07 03:00:00,50.5,25.0,19.0
2019-05-07 04:00:00,45.0,27.7,19.0
2019-05-07 05:00:00,,50.4,16.0
2019-05-07 06:00:00,,61.9,


In [22]:
no2_data.to_csv("../data/air_quality_no2.csv")

## Prepare long format example data set

In [23]:
air_quality_data = pd.concat(no2_stations)
air_quality_data = air_quality_data.reset_index()
air_quality_data["unit"] = air_quality_data["unit"].str.decode("utf-8")
air_quality_data = air_quality_data[["city", "country", "date.utc", "location", "parameter", "value", "unit"]]

In [24]:
air_quality_data.head()

Unnamed: 0,city,country,date.utc,location,parameter,value,unit
0,Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³
1,Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³
2,Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³
3,Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³
4,Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³


### Prepare long format two variable example data set

In [25]:
pm25_stations = [api.measurements(location=station, parameter='pm25', 
                                date_to='2019-06-21', 
                                limit=10000, df=True) for station in stations]
air_quality_data = pd.concat(pm25_stations + no2_stations)

In [26]:
air_quality_data["unit"] = air_quality_data["unit"].str.decode("utf-8")
air_quality_data = air_quality_data[["city", "country", "date.utc", "location", "parameter", "value", "unit"]]

In [28]:
air_quality_data.head()

Unnamed: 0_level_0,city,country,date.utc,location,parameter,value,unit
date.local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-06-18 08:00:00,Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³
2019-06-17 10:00:00,Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³
2019-06-17 09:00:00,Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³
2019-06-17 08:00:00,Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³
2019-06-17 07:00:00,Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³


In [102]:
air_quality_data.to_csv("../data/air_quality_long.csv", index=False)

Save the two data sets separately:

In [35]:
air_quality_data[air_quality_data["parameter"] == "pm25"].to_csv("../data/air_quality_pm25_long.csv", index=False)
air_quality_data[air_quality_data["parameter"] == "no2"].to_csv("../data/air_quality_no2_long.csv", index=False)

Save the air quality stations and parameter metadata as well:

In [8]:
stations.to_csv("../data/air_quality_stations.csv", index=False)
parameters.to_csv("../data/air_quality_parameters.csv", index=False)