In [1]:
import geopandas as gpd
import plotly.express as px
import pandas as pd


In [2]:

df_air_quality = gpd.read_file("data.geojson")
df_air_quality.head()

Unnamed: 0,OBJECTID,EoICode,StationNam,Longitude,Latitude,Altitude,StationTyp,StationAre,avg15,geometry
0,1,AT0ENK1,Enzenkirchen im Sauwald,13.67114,48.39172,525.0,background,rural-regional,13.353289,POINT (13.67114 48.39172)
1,2,AT0ZOE2,Zöbelboden im Reichraminger Hintergebirge - Wi...,14.441389,47.838611,899.0,background,rural-remote,7.257276,POINT (14.44139 47.83861)
2,3,AT30407,Glinzendorf im Marchfeld,16.636944,48.236667,150.0,background,rural-near_city,14.789538,POINT (16.63694 48.23667)
3,4,AT31902,Zwentendorf im Tullnerfeld,15.903611,48.331111,200.0,background,rural,14.524676,POINT (15.90361 48.33111)
4,5,AT4S108,Grünbach bei Freistadt,14.574722,48.531111,918.0,background,rural-regional,9.099422,POINT (14.57472 48.53111)


In [3]:
df_air_quality.nunique()

OBJECTID      710
EoICode       710
StationNam    710
Longitude     707
Latitude      707
Altitude      343
StationTyp      1
StationAre      7
avg15         710
geometry      707
dtype: int64

In [5]:
df_air_quality['avg15']

0      13.353289
1       7.257276
2      14.789538
3      14.524676
4       9.099422
         ...    
705    28.486592
706    18.477178
707    20.180000
708    18.397000
709    22.678000
Name: avg15, Length: 710, dtype: float64

In [4]:
fig = px.histogram(df_air_quality, x="avg15")

fig.update_layout(
    title={
        'text': "Distribution of PM 2,5 registered over various European stations",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_yaxes(showgrid=False)  # turning off the grid
fig.show()

In [6]:
fig = px.box(data_frame = df_air_quality, y = "avg15")
fig.update_layout(
    title={
        'text': "Detecting Outliers of PM 2,5 registered over various European stations",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_yaxes(showgrid=False)
fig.show()

In [10]:
df_air_quality[["avg15","Altitude"]].corr()
df_air_quality[["avg15","Latitude"]].corr()
df_air_quality[["avg15","Longitude"]].corr()

Unnamed: 0,avg15,Longitude
avg15,1.0,0.423205
Longitude,0.423205,1.0


In [16]:
fig = px.scatter(data_frame = df_air_quality, y="avg15", x="Latitude")
fig.update_layout(
    title={
        'text': "Distribution of PM 2,5 registered over various Latitudes",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_traces(marker=dict(color="crimson"))
fig.update_traces(marker={"opacity": 0.5})  # Thus we see which combination of PM 2,5 and Altitude values exists the most
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [18]:
fig = px.scatter(data_frame = df_air_quality, y="avg15", x="Longitude")
fig.update_layout(
    title={
        'text': "Distribution of PM 2,5 registered over various Longitudes",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_traces(marker=dict(color="crimson"))
fig.update_traces(marker={"opacity": 0.5})  # Thus we see which combination of PM 2,5 and Altitude values exists the most
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

In [19]:
fig = px.scatter(data_frame = df_air_quality, y="avg15", x="Altitude")
fig.update_layout(
    title={
        'text': "Distribution of PM 2,5 registered over various Altitudes",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.update_traces(marker=dict(color="crimson"))
fig.update_traces(marker={"opacity": 0.5})  # Thus we see which combination of PM 2,5 and Altitude values exists the most
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()

df_air_quality["StationAre"].value_counts()

In [20]:
df_air_quality["StationAre"].value_counts()

StationAre
urban              406
suburban           137
rural              112
rural-regional      36
rural-near_city      8
rural-remote         7
rural-nearcity       4
Name: count, dtype: int64

In [21]:
df_air_quality.groupby("StationAre")["avg15"].mean()

StationAre
rural              11.455986
rural-near_city    16.169133
rural-nearcity     18.019500
rural-regional      9.318929
rural-remote        8.609008
suburban           14.612287
urban              15.620849
Name: avg15, dtype: float64

In [22]:
#filter urban data and extract info
df_air_quality[df_air_quality["StationAre"] == "urban"][["avg15"]].describe()

Unnamed: 0,avg15
count,406.0
mean,15.620848
std,6.146706
min,3.677026
25%,11.674706
50%,13.853458
75%,19.242335
max,36.010959


In [23]:
fig = px.scatter_mapbox(df_air_quality,
                        lat="Latitude",
                        lon="Longitude",
                        hover_name="StationNam",
                        hover_data=["Altitude"],
                        color="StationAre",
                        zoom=4,
                        height=900,
                        size="avg15",
                        size_max=12,
                        opacity=0.4,
                        width=1300)
fig.update_layout(mapbox_style='carto-positron')
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
fig.update_layout(title_text="Air quality level in Europe 2018")
fig.show()

In [27]:
!curl -o europe.geojson https://opendata.arcgis.com/datasets/784fc60a00fa41cb9babb52

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   174  100   174    0     0    605      0 --:--:-- --:--:-- --:--:--   635


In [28]:
df_europe = gpd.read_file("europe.geojson")

DriverError: 'europe.geojson' not recognized as a supported file format.