In [1]:
import pandas as pd
import datetime as dt
import plotly.express as px
from dstack import create_frame
from dstack import push_frame

In [2]:
#Data is till May 15th
#https://aqicn.org/data-platform/covid19/verify/44b4316d-6a53-46ee-8238-4e23f8cce63a

In [3]:
kpis = ["co", "dew", "no2", "o3", "pm10", "pm25", "so2"]
start_date_2019 = dt.datetime(2019, 1, 1, 0, 0, 0, 0)
end_date_2019 = dt.datetime(2019, 5, 15, 0, 0, 0, 0)
start_date_2020 = dt.datetime(2020, 1, 1, 0, 0, 0, 0)
end_date_2020 = dt.datetime(2020, 5, 15, 0, 0, 0, 0)

In [4]:
def load_data(period):
    """
    :param
    period - string file ending for he needed period for load
    :returns
    returns raw data frame for the selected period
    """
    data = pd.read_csv(f"waqi-covid19-airqualitydata-{period}.csv", encoding='ISO-8859-1')
    data['Date'] = pd.to_datetime(data['Date'], format='%d/%m/%Y')
    return data

def consolidate_data(df_list):
    """
    :param
    df_list - list of pandas dataframes to concatenate
    :returns
    full_data - consolidated pandas dataframe
    """
    full_data = pd.concat(df_list)
    return full_data
    
def clean_data(data, start_date, end_date, kpis):
    """
    data - raw pandas df
    start_data - datetime, start date for filtering
    end_date - datetime, end date for filtering
    kpis - list of Species
    """
    clean_data = data[data.Specie.isin(kpis) &
                    (data['Date'] > start_date) &
                    (data['Date'] < end_date)].copy()
    return clean_data

In [5]:
# Extracting data from files
data2019Q1 = load_data('2019Q1')
data2019Q2 = load_data('2019Q2')
data2020 = load_data('2020')

#Combining data
data2019 = consolidate_data([data2019Q1, data2019Q2])
clean_data2019 = clean_data(data2019, start_date_2019, end_date_2019, kpis)
clean_data2020 = clean_data(data2020, start_date_2020, end_date_2020, kpis)

## Map stack creation

In [30]:
def map_plot(data, kpi, year, title):
    data = data[(data["Specie"]==kpi)]
    fig = px.choropleth(data, locations="Country3",
                    color=year,
                    hover_name="CountryName",
                    color_continuous_scale=px.colors.diverging.Geyser)
    return fig

# Data preparation
clean_data2019['key'] = clean_data2019['Country']+clean_data2019['Specie']
clean_data2020['key'] = clean_data2020['Country']+clean_data2020['Specie']             
data2019_grouped = clean_data2019[["Specie", "CountryName", "Country3", "median", "key"]].groupby(["Specie", "CountryName", "Country3", "key"], as_index=False).mean()
data2020_grouped = clean_data2020[["Specie", "CountryName", "Country3", "median", "key"]].groupby(["Specie", "CountryName", "Country3", "key"], as_index=False).mean()

#Data Combining
data2019_grouped = data2019_grouped.rename(columns={"median": "median2019"}, errors="raise")
data2020_grouped = data2020_grouped.rename(columns={"median": "median2020"}, errors="raise")
data_grouped = data2020_grouped.merge(data2019_grouped.drop(["Specie", "Country3", "CountryName"], axis=1), on="key", how="left")
data_grouped['difference'] = data_grouped["median2020"]-data_grouped["median2019"]
#data2019_grouped_date = clean_data2019[["Date", "Specie", "Country3", "median"]].groupby(["Date", "Specie", "Country3"], as_index=False).mean()

fig = map_plot(data_grouped, 'no2', 'difference', 'World map')
fig.show()

In [37]:
# Creation of Map stack
frame = create_frame("world_map")

for kpi in kpis:
    for year in ['median2020', 'median2019', 'difference']:
        frame.commit(map_plot(data_grouped, kpi, year, 'Air Indicator Map'),
                     f"World map colored by {kpi}", {"Air Indicator": kpi, "Measure": year})

frame.push()

'https://dstack.ai/otipita/world_map'

## Radar stack creation

In [7]:
def radar_plot(data, country):
    data = data[(data["CountryName"]==country)]
    fig = px.line_polar(data, r="median", theta="Specie", color="Year", line_close=True,
                        hover_name="CountryName",
                       color_discrete_sequence=["#3deb34","#eb4034"])
    return fig

clean_data2019['DayMonth'] = clean_data2019["Date"].dt.month_name().str[:3] + ' ' + clean_data2019["Date"].dt.day.astype(str)
clean_data2020['DayMonth'] = clean_data2020["Date"].dt.month_name().str[:3] + ' ' + clean_data2020["Date"].dt.day.astype(str)
clean_data2019['DayOfYear'] = clean_data2019["Date"].dt.dayofyear
clean_data2020['DayOfYear'] = clean_data2020["Date"].dt.dayofyear
clean_data2019['WeekOfYear'] = clean_data2019["Date"].dt.weekofyear
clean_data2020['WeekOfYear'] = clean_data2020["Date"].dt.weekofyear

data2019_grouped = clean_data2019[["Date", "Specie", "CountryName", "Country3", "median"]].groupby(["Specie", "CountryName", "Country3"], as_index=False).mean()
data2020_grouped = clean_data2020[["Date", "Specie", "CountryName", "Country3", "median"]].groupby(["Specie", "CountryName", "Country3"], as_index=False).mean()
data2019_grouped['Year'] = 'Y2019'
data2020_grouped['Year'] = 'Y2020'

data_grouped_time = pd.concat([data2019_grouped, data2020_grouped])

fig = radar_plot(data_grouped_time, 'China')
fig.show()

In [46]:
# Creation of radar stack
frame = create_frame("radar_chart")

for country in data_grouped_time['CountryName'].sort_values().unique():
    frame.commit(radar_plot(data_grouped_time, country),
    f"Air quality measure for {country}", {"Country": country})

frame.push()

'https://dstack.ai/otipita/radar_chart'

## Line stack creation

In [18]:
def line_plot(data, kpi, country):
    try:
        data = data[(data["Specie"]==kpi) & (data["CountryName"]==country)]
        sub_df = data[data.groupby('DayMonth').DayMonth.transform('count')>1].copy()
        fig = px.line(sub_df.sort_values(["DayOfYear"]), x="DayMonth", y="median", color="Year") #labels={'x':'DayMonth'}
        fig.update_xaxes(tickangle=-45, title_text='Month and Day')
        fig.update_yaxes(title_text='Indicator Median')
    except:
        pass
    return fig

data2019_grouped = clean_data2019[["DayOfYear", "DayMonth", "Specie", "Country3", "CountryName", "median"]].groupby(["DayOfYear", "DayMonth", "Specie", "Country3", "CountryName"], as_index=False).mean().sort_values('DayOfYear', ascending=True)
data2020_grouped = clean_data2020[["DayOfYear", "DayMonth", "Specie", "Country3", "CountryName", "median"]].groupby(["DayOfYear", "DayMonth", "Specie", "Country3", "CountryName"], as_index=False).mean().sort_values('DayOfYear', ascending=True)
data2019_grouped['Year'] = 'Y2019'
data2020_grouped['Year'] = 'Y2020'

data_grouped_days = pd.concat([data2019_grouped, data2020_grouped])
#data_grouped_days = data_grouped_days[data_grouped_days.groupby('DayMonth').DayMonth.transform('count')>1].copy()
#data_grouped_days = data_grouped_days[(data_grouped_days["Specie"]=="co") & (data_grouped_days["CountryName"]=="Argentina")]

figure = line_plot(data_grouped_days, 'co', 'Argentina')
figure.show()

In [21]:
frame = create_frame("line_chart")

for kpi in kpis:
    for country in data_grouped_days['CountryName'].sort_values().unique():
        data = data_grouped_days[(data_grouped_days["Specie"]==kpi) & (data_grouped_days["CountryName"]==country)]
        sub_df = data[data.groupby('DayMonth').DayMonth.transform('count')>1].copy()
        if len(sub_df)>0:
            frame.commit(line_plot(data_grouped_days, kpi, country),
            f"Air quality dynamics for {country}", {"Air Indicator": kpi, "Country": country})

frame.push()

'https://dstack.ai/otipita/line_chart'

## Bar stack creation

In [25]:
def bar_plot(data, kpi):
    try:
        data = data[(data["Specie"]==kpi)]
        fig = px.bar(data.sort_values(['difference'], ascending=True).head(15), x="difference", y="CountryName", orientation='h') #labels={'x':'DayMonth'}
        fig.update_yaxes(dtick='1', autorange="reversed", title_text="")
        fig.update_xaxes(title_text='2020 vs 2019 Mean Difference')
        #fig.update_layout(
            #title_text='Top 15 Countries of 2020 vs 2019 difference'
        #)
    except KeyError:
        pass
    return fig


figure = bar_plot(data_grouped, 'pm10')
figure.show()

In [27]:
frame = create_frame("bar_plot")

for kpi in kpis:
    frame.commit(bar_plot(data_grouped, kpi),
    f"Countries air indicator difference 2020 vs 2019 for Indicator - {kpi}", {"Air Indicator": kpi})

frame.push()

'https://dstack.ai/otipita/bar_plot'