In [None]:
import matplotlib as mpl
import sys
import json

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import tensorflow as tf

sys.path.append('../scripts/particles/')

mpl.rcParams['figure.dpi'] = 200


In [None]:
import data_handler as dh
import metrics
import utils

In [None]:
outputs = ['PM1', 'PM2.5', 'PM10']
inputs = [
    'PM1_2.5_OUT',
    'PM1_2.5_H_OUT',
    'PM2.5_OUT',
    'PM2.5_H_OUT',
    'PM2.5_10_OUT',
    'PM2.5_10_H_OUT',
    'PERSON_NUMBER',
    'AIR_PURIFIER',
    'WINDOW',
    'AIR_CONDITIONER',
    'DOOR',
    'WIND_DEG',
    'HUMIDITY'
]

cis = [x for x in np.arange(0, 1 + 0.05, 0.05)]
dropouts = [x for x in np.arange(0.2, 0.6 + 0.05, 0.05)]

In [None]:
def plot_ci(_df, title=None):
    ax = _df.plot(y="pred", figsize=(30, 10), color="r")
    # ax = _df.plot(y="real", figsize=(30, 10), color="b", ax=ax)
    ax.fill_between(
        _df.index,
        _df["lower"],
        _df["upper"],
        facecolor="green",
        alpha=0.2,
        interpolate=True,
    )
    if title is not None:
        plt.suptitle(title, fontsize=22)
    ax.legend(fontsize=17)
    ax.set_ylabel('$PM \mu g/m^3$', fontsize=17)
    plt.show()

In [None]:
ci_df_n15 = pd.read_csv(f"../../projects/particle/ci_result/pm-15/ci/d_0.55_ci_0.95_PM1.csv")
# ci_df_n5 = pd.read_csv(f"../../projects/particle/ci_result/pm-5/ci/d_0.55_ci_0.95_PM1.csv")
ci_df_org = pd.read_csv(f"../../projects/particle/ci_result/pm/ci/d_0.55_ci_0.95_PM1.csv")
# ci_df_5 = pd.read_csv(f"../../projects/particle/ci_result/pm05/ci/d_0.55_ci_0.95_PM1.csv")
ci_df_15 = pd.read_csv(f"../../projects/particle/ci_result/pm15/ci/d_0.55_ci_0.95_PM1.csv")

In [None]:
ci_df_n15['width'] = ci_df_n15['upper'] - ci_df_n15['lower']
# ci_df_n5['width'] = ci_df_n5['upper'] - ci_df_n5['lower']
ci_df_org['width'] = ci_df_org['upper'] - ci_df_org['lower']
# ci_df_5['width'] = ci_df_5['upper'] - ci_df_5['lower']
ci_df_15['width'] = ci_df_15['upper'] - ci_df_15['lower']

In [None]:
width_df = pd.concat([
    ci_df_n15['width'], 
    # ci_df_n5['width'], 
    ci_df_org['width'], 
    # ci_df_5['width'], 
    ci_df_15['width']
]
    , axis=1)
width_df.columns = ['-15', 'org', '+15']

In [None]:
ax = width_df.plot(kind='hist', bins=300, alpha=0.6, figsize=(22, 10))
ax.set_title('Deep learning model prediction width distribution', fontsize=17)

In [None]:
ci_df_org.describe().transpose()

In [None]:
ci_df_n15.describe().transpose()

In [None]:
ci_df_15.describe().transpose()

In [None]:
ax = ci_df_15.plot.scatter(x='pred', y='lower', figsize=(8, 8), c="lightcoral")
ax = ci_df_15.plot.scatter(x='pred', y='upper', figsize=(8, 8), c="cornflowerblue", ax=ax)
ax.set_title('pred-lower,upper', fontsize=17)
ax.set_xlabel('pred', fontsize=17)
ax.set_ylabel('lower, upper', fontsize=17)

lims = [
    np.min([ax.get_xlim(), ax.get_ylim()]),
    np.max([ax.get_xlim(), ax.get_ylim()]),
]

ax.plot(lims, lims, "r-", linewidth=2, alpha=0.75, zorder=2)
ax.set_aspect("equal")

In [None]:
ci_df_15.plot.scatter(x=['pred', 'pred'], y=['lower', 'upper'], figsize=(8, 8))

In [None]:
plot_ci(ci_df_n15, '95% CI with dropout 0.55 in original input')

In [None]:
plot_ci(ci_df_org, '95% CI with dropout 0.55 in increased PM_OUT for $5 \mu g/m^3$')

In [None]:
plot_ci(ci_df_15, '95% CI with dropout 0.55 in original input')

In [None]:
names = ["pm-15", "pm", "pm15"]
root_dir = "../../projects/particle/ci_result/"
for proj_dir in [root_dir + x for x in names]:
    for idx in range(3):
        res = np.zeros((len(dropouts) * len(cis), 3))
        res_index = 0
        for dropout in dropouts:
            for ci in cis:
                path = f"{proj_dir}/ci/d_{dropout:.2f}_ci_{ci:.2f}_{outputs[idx]}.csv"
                df = pd.read_csv(path)
                df = df[["real", "lower", "upper", "pred"]]
                score = (
                    len(df[(df["real"] <= df["upper"]) & (df["real"] >= df["lower"])])
                    / len(df)
                    * 100
                )
                res[res_index] = np.array([np.round(dropout, 3), ci, score])
                res_index += 1
        pd.DataFrame(res, columns=["dropout", "ci", "score"]).to_csv(
            f"{proj_dir}/ci_{outputs[idx]}.csv", index=False
        )


In [None]:
ax = None
pm1_ci = pd.read_csv(f'{root_dir + names[1]}/ci_PM1.csv')

for dropout in dropouts:
    if ax is None:
        ax = pm1_ci[pm1_ci["dropout"] == np.round(dropout, 3)].plot(
            x="ci", y="score", figsize=(10, 10)
        )
    else:
        ax = pm1_ci[pm1_ci["dropout"] == np.round(dropout, 3)].plot(
            x="ci", y="score", ax=ax
        )

legned_label = [f"dropout={x:.2f}" for x in dropouts]
ax.legend(legned_label)

In [None]:
ax = None
pm25_ci = pd.read_csv(f'{root_dir + names[1]}/ci_PM2.5.csv')

for dropout in dropouts:
    if ax is None:
        ax = pm25_ci[pm25_ci["dropout"] == np.round(dropout, 3)].plot(
            x="ci", y="score", figsize=(10, 10)
        )
    else:
        ax = pm25_ci[pm25_ci["dropout"] == np.round(dropout, 3)].plot(
            x="ci", y="score", ax=ax
        )

legned_label = [f"dropout={x:.2f}" for x in dropouts]
ax.legend(legned_label)

In [None]:
ax = None
pm10_ci = pd.read_csv(f'{root_dir + names[1]}/ci_PM10.csv')

for dropout in dropouts:
    if ax is None:
        ax = pm10_ci[pm10_ci["dropout"] == np.round(dropout, 3)].plot(
            x="ci", y="score", figsize=(10, 10)
        )
    else:
        ax = pm10_ci[pm10_ci["dropout"] == np.round(dropout, 3)].plot(
            x="ci", y="score", ax=ax
        )

legned_label = [f"dropout={x:.2f}" for x in dropouts]
ax.legend(legned_label)