In [None]:
import pandas as pd
import cufflinks
cufflinks.go_offline()

In [None]:
data_h5 = "../data/data.h5"

In [None]:
indicators = ["PM10", "PM2.5"]
titles = {i: f"24g/{i}" for i in indicators}
unsafe_levels = {"PM10": 50, "PM2.5": 25}
all_data = {k: pd.read_hdf(data_h5, val) for k, val in titles.items()}
metadata = pd.read_hdf(data_h5, "metadata")

In [None]:
cities = pd.DataFrame(metadata.groupby('Miejscowość')['Kod stacji'].apply(list))
# select the cities with at least 3 stations
large_cities = cities[cities['Kod stacji'].map(len) >= 3]

In [None]:
def get_means(all_data, city, codes, freq):
    if set(codes).intersection(set(all_data.columns)):
        return all_data.loc[:, codes].mean(1)
    else:
        return pd.Series()

In [None]:
data_averaged = {
    i: pd.DataFrame({city: get_means(all_data[i], city, codes, 'W') for city, codes in list(large_cities['Kod stacji'].items())}).dropna([0,1],'all')
    for i in indicators}

In [None]:
def plot_means(df, i):
    (df
     .groupby(pd.Grouper(freq='M'))
     .mean()
     .pipe(lambda df: df.loc[:,df.isnull().mean()<0.8])
     .pipe(lambda df: df.T.assign(mean=df.mean()).sort_values('mean').drop('mean',axis=1).T)
     .iplot(kind='heatmap', colorscale='reds', dimensions=(1000,1700), title=f'{i} [μg/m3] averaged over a month')
    )
    
def plot_days_unsafe(df, i, unsafe_level):
    (df
     .apply(lambda x: x>unsafe_level).astype(float)
     .groupby(pd.Grouper(freq='M'))
     .sum()
     .pipe(lambda df: df.loc[:,df.isnull().mean()<0.8])
     .pipe(lambda df: df.T.assign(mean=df.mean()).sort_values('mean').drop('mean',axis=1).T)
     .iplot(kind='heatmap', colorscale='reds', dimensions=(1000,1700), title=f'Days per month with high levels of {i} (>{unsafe_level} μg/m3/day)')
)

In [None]:
for i, df in data_averaged.items():
    plot_means(df, i)
    plot_days_unsafe(df,i,unsafe_levels[i])