In [1]:
import matplotlib as mpl
import sys
import json

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

import tensorflow as tf

sys.path.append("codes/scripts/particles/")

mpl.rcParams['figure.dpi'] = 200


In [2]:
import data_handler as dh
import metrics
import utils

In [3]:
outputs = ['PM1']
inputs = [
    'PM1_OUT',
    'PM1_H_OUT',
    'PERSON_NUMBER',
    'AIR_PURIFIER',
    'WINDOW',
    'AIR_CONDITIONER',
    'DOOR',
    'WIND_DEG',
    'HUMIDITY'
]

model_dir = 'projects/particle/model'
model_name = 'conv_20'
model_path = model_dir + '/' + model_name

config_path = model_path + "/config.json"
f = open(config_path, "r")
config = json.load(f)
f.close()


In [4]:
in_time_step = config["model"]["window_size"]
out_time_step = 1
offset = config["model"]["offset"]

In [5]:
pred_df = pd.read_csv(f'{model_dir}/{model_name}/result/predict/predict.csv', index_col='DATE', parse_dates=True)

metric_dfs = []
for label in ['pm1', 'pm2.5', 'pm10']:
    metric_dfs.append(pd.read_csv(f'{model_dir}/{model_name}/result/metric/result_{label}.csv', index_col='Metric'))

In [6]:
def get_cond_df(_df, cond_in):
    cond_cols = ['PERSON_NUMBER', 'AIR_PURIFIER', 'AIR_CONDITIONER', 'WINDOW', 'DOOR']
    cond_df = _df.copy()
    if len(cond_in) != 5:
        print('[ERROR] Invalid condition length')
        raise Exception('Invalid condition length')
    if cond_in[0] == '0':
        cond_df = cond_df[cond_df[cond_cols[0]] == 0]
    elif cond_in[1] == '1':
        cond_df = cond_df[cond_df[cond_cols[0]] != 0]
    for i in range(1, 5, 1):
        if cond_in[i] == 'x' or cond_in[i] == 'X':
            continue
        cond_df = cond_df[cond_df[cond_cols[i]] == int(cond_in[i])]
    return cond_df

In [None]:
_ = utils.plot(get_cond_df(pred_df, '00000'), ['PM1', 'PM2.5', 'PM10', 'PM1_PRED', 'PM2.5_PRED', 'PM10_PRED'])

In [7]:
df = get_cond_df(pred_df, '00000')
train_df = df[df['TYPE'] == 'train'].copy()
val_df = df[df['TYPE'] == 'val'].copy()
test_df = df[df['TYPE'] == 'test'].copy()

In [8]:
pms = ['PM1', 'PM2.5', 'PM10']

for pm in pms:
    print(metrics.calc_r2(test_df[pm].values, test_df[pm + '_PRED'].values))

0.8054361502068589
0.8130286713072087
0.8114279345677691


In [None]:
_ = utils.plot(test_df, pms + [x + '_PRED' for x in pms])

# ODE Model

In [10]:
ode_pm1_df = pd.read_csv('projects/particle/ode/pm1_pred.csv', index_col='DATE', parse_dates=True)
ode_pm25_df = pd.read_csv('projects/particle/ode/pm2.5_pred.csv', index_col='DATE', parse_dates=True)
ode_pm10_df = pd.read_csv('projects/particle/ode/pm10_pred.csv', index_col='DATE', parse_dates=True)

In [11]:
ode_pred_df = pd.concat([ode_pm1_df['PM1_PRED'], ode_pm25_df['PM2.5_PRED'], ode_pm10_df['PM10_PRED']], axis=1)

In [12]:
ode_pred_df.columns = ['ODE_PM1_PRED', 'ODE_PM2.5_PRED', 'ODE_PM10_PRED']

In [13]:
df = pd.concat([pred_df, ode_pred_df], axis=1)

pm1_df = df[['PM1', 'PM1_PRED', 'ODE_PM1_PRED']].dropna()
pm25_df = df[['PM2.5', 'PM2.5_PRED', 'ODE_PM2.5_PRED']].dropna()
pm10_df = df[['PM10', 'PM10_PRED', 'ODE_PM10_PRED']].dropna()

In [None]:
_ = utils.plot(pm1_df, pm1_df.columns)

In [None]:
_ = utils.plot(pm25_df, pm25_df.columns)

In [None]:
_ = utils.plot(pm10_df, pm10_df.columns)

In [16]:
metrics.calc_r2(pm1_df['PM1_PRED'].values, pm1_df['ODE_PM1_PRED'].values)

0.8702661206977642

In [18]:
metrics.calc_r2(pm25_df['PM2.5_PRED'].values, pm25_df['ODE_PM2.5_PRED'].values)

0.8848492260056826

In [19]:
metrics.calc_r2(pm10_df['PM10_PRED'].values, pm10_df['ODE_PM10_PRED'].values)

0.8810195190981019