In [None]:
#import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
import os


In [None]:
#list station data per month
files = glob("./data/HLL_data*.csv")
files

In [None]:
df_list = []
for file in files:
    
    #read file and create a subset
    df = pd.read_csv(file)
    subset = df[['time','id',"no2_mean","pm10_mean","pm25_mean"]]
    
    #add dataframe to df_merge list
    df_list.append(subset)

#concat/ merge dataframe (axis=0 row wise, axis=1 column wise)
df_merged = pd.concat(df_list,axis=0).reset_index(drop=True)

#convert time column to datetime object
df_merged['time'] = pd.to_datetime(df_merged['time'])

#show merged data
df_merged.head(5)

In [None]:
#create new column with date and month
df_merged['date'] = df_merged['time'].apply(lambda x: x.date())
df_merged['month'] = df_merged['time'].apply(lambda x: x.date().month)

#aggregate hourly date to days or months (average)
df_merged_days = df_merged.groupby(by='date').mean().reset_index()
df_merged_months = df_merged.groupby(by='month').mean().reset_index()

In [None]:
#show daily average
df_merged_days.head(5)

In [None]:
#show monthly average
df_merged_months.head(5)

In [None]:
#function to plot data
def plot_data(df,time_column,title):

    #define column names and x-axis
    col_names = ['no2_mean','pm10_mean','pm25_mean']
    x = df[time_column]

    #colors for each attribute
    colors=['red','blue','green']

    #setup figure and nr. of plots
    fig,axs = plt.subplots(3,1,figsize=(15,8))

    #loop over data
    for i in range(len(col_names)):

        #get data
        col = col_names[i]
        y = df[col]

        #plot scatterplot and line 
        axs[i].scatter(x,y,label=col,color=colors[i])
        axs[i].plot(x,y,color='black',ls='--')

        #plot legend
        axs[i].legend()

    #set y and x axes labels
    axs[1].set_ylabel('Concentration (g/mg3)')
    plt.xlabel('Time')
    axs[0].set_title(title)

In [None]:
#plot hourly data
plot_data(df_merged,'time','Hourly time-series')

#plot daily data data
plot_data(df_merged_days,'date','Daily time-series')

#plot daily data data
plot_data(df_merged_days,'month','Monthly time-series')