In [15]:
import pandas as pd
import seaborn as sns
import openaq

In [16]:
api = openaq.OpenAQ()

In [17]:
stations = api.locations(city=['Antwerpen', 'Paris', 'London'], df=True)

In [18]:
stations.head()

Unnamed: 0,location,city,country,count,sourceNames,lastUpdated,firstUpdated,parameters,distance,sourceName,coordinates.latitude,coordinates.longitude
0,BELAL01,Antwerpen,BE,4423,[EEA Belgium],2019-09-15 03:00:00+00:00,2017-09-22 01:00:00+00:00,"[pm25, pm10]",6699516,EEA Belgium,51.236195,4.385224
1,BELHB23,Antwerpen,BE,8540,[EEA Belgium],2019-09-15 03:00:00+00:00,2017-09-22 01:00:00+00:00,"[pm10, pm25, no2, so2]",6700304,EEA Belgium,51.1703,4.341005
2,BELLD01,Antwerpen,BE,1641,[EEA Belgium],2019-01-09 01:00:00+00:00,2017-09-22 01:00:00+00:00,[no2],6744160,EEA Belgium,51.109978,5.004864
3,BELLD02,Antwerpen,BE,2095,[EEA Belgium],2019-09-15 03:00:00+00:00,2017-09-22 01:00:00+00:00,[no2],6744619,EEA Belgium,51.120384,5.021546
4,BELR833,Antwerpen,BE,2045,[EEA Belgium],2019-09-15 03:00:00+00:00,2017-09-23 01:00:00+00:00,[no2],6693263,EEA Belgium,51.32766,4.362261


In [47]:
parameters = api.parameters(df=True)
parameters

Unnamed: 0,id,name,description,preferredUnit
0,bc,BC,Black Carbon,µg/m³
1,co,CO,Carbon Monoxide,ppm
2,no2,NO2,Nitrogen Dioxide,ppm
3,o3,O3,Ozone,ppm
4,pm10,PM10,Particulate matter less than 10 micrometers in...,µg/m³
5,pm25,PM2.5,Particulate matter less than 2.5 micrometers i...,µg/m³
6,so2,SO2,Sulfur Dioxide,ppm


Pick location in Londen, Antwerp and France: 'London Westminster', 'BETR801', 'FR04014'

In [48]:
stations = ['FR04014', 'BETR801', 'London Westminster']

## Prepare short format example data set

In [36]:
no2_stations = [api.measurements(location=station, parameter='no2', 
                                 date_to='2019-06-21', 
                                 limit=10000, df=True) for station in stations]

In [50]:
no2_stations[2]

Unnamed: 0_level_0,city,coordinates.latitude,coordinates.longitude,country,date.utc,location,parameter,unit,value
date.local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1


In [37]:
air_quality_data = pd.concat(no2_stations)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [38]:
air_quality_data.shape

(26, 9)

In [43]:
air_quality_data.location.unique()

array(['FR04014'], dtype=object)

In [40]:
no2_data = air_quality_data.pivot(columns='location', values='value')

In [41]:
no2_data

location,FR04014
date.local,Unnamed: 1_level_1
2019-06-18 03:00:00,60.1
2019-06-18 04:00:00,39.8
2019-06-18 05:00:00,45.5
2019-06-18 06:00:00,26.5
2019-06-18 07:00:00,33.8
2019-06-18 08:00:00,51.4
2019-06-18 09:00:00,52.6
2019-06-18 10:00:00,49.6
2019-06-18 21:00:00,15.3
2019-06-18 22:00:00,17.0


In [42]:
no2_data.columns = ["station_antwerp", "station_paris", "station_london"]
no2_data.index.name = 'datetime'

ValueError: Length mismatch: Expected axis has 1 elements, new values have 3 elements

In [21]:
no2_data.head()

Unnamed: 0_level_0,station_antwerp,station_paris,station_london
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-05-07 02:00:00,,,23.0
2019-05-07 03:00:00,50.5,25.0,19.0
2019-05-07 04:00:00,45.0,27.7,19.0
2019-05-07 05:00:00,,50.4,16.0
2019-05-07 06:00:00,,61.9,


In [22]:
no2_data.to_csv("../data/air_quality_no2.csv")

## Prepare long format example data set

In [23]:
air_quality_data = pd.concat(no2_stations)
air_quality_data = air_quality_data.reset_index()
air_quality_data["unit"] = air_quality_data["unit"].str.decode("utf-8")
air_quality_data = air_quality_data[["city", "country", "date.utc", "location", "parameter", "value", "unit"]]

In [24]:
air_quality_data.head()

Unnamed: 0,city,country,date.utc,location,parameter,value,unit
0,Paris,FR,2019-06-21 00:00:00+00:00,FR04014,no2,20.0,µg/m³
1,Paris,FR,2019-06-20 23:00:00+00:00,FR04014,no2,21.8,µg/m³
2,Paris,FR,2019-06-20 22:00:00+00:00,FR04014,no2,26.5,µg/m³
3,Paris,FR,2019-06-20 21:00:00+00:00,FR04014,no2,24.9,µg/m³
4,Paris,FR,2019-06-20 20:00:00+00:00,FR04014,no2,21.4,µg/m³


### Prepare long format two variable example data set

In [25]:
pm25_stations = [api.measurements(location=station, parameter='pm25', 
                                date_to='2019-06-21', 
                                limit=10000, df=True) for station in stations]
air_quality_data = pd.concat(pm25_stations + no2_stations)

In [26]:
air_quality_data["unit"] = air_quality_data["unit"].str.decode("utf-8")
air_quality_data = air_quality_data[["city", "country", "date.utc", "location", "parameter", "value", "unit"]]

In [28]:
air_quality_data.head()

Unnamed: 0_level_0,city,country,date.utc,location,parameter,value,unit
date.local,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-06-18 08:00:00,Antwerpen,BE,2019-06-18 06:00:00+00:00,BETR801,pm25,18.0,µg/m³
2019-06-17 10:00:00,Antwerpen,BE,2019-06-17 08:00:00+00:00,BETR801,pm25,6.5,µg/m³
2019-06-17 09:00:00,Antwerpen,BE,2019-06-17 07:00:00+00:00,BETR801,pm25,18.5,µg/m³
2019-06-17 08:00:00,Antwerpen,BE,2019-06-17 06:00:00+00:00,BETR801,pm25,16.0,µg/m³
2019-06-17 07:00:00,Antwerpen,BE,2019-06-17 05:00:00+00:00,BETR801,pm25,7.5,µg/m³


In [102]:
air_quality_data.to_csv("../data/air_quality_long.csv", index=False)

Save the two data sets separately:

In [35]:
air_quality_data[air_quality_data["parameter"] == "pm25"].to_csv("../data/air_quality_pm25_long.csv", index=False)
air_quality_data[air_quality_data["parameter"] == "no2"].to_csv("../data/air_quality_no2_long.csv", index=False)

Save the air quality stations and parameter metadata as well:

In [8]:
stations.to_csv("../data/air_quality_stations.csv", index=False)
parameters.to_csv("../data/air_quality_parameters.csv", index=False)