# Differential Equation Modeling
1. List 자르기
2. $p',\, k'$ 최적화
3. $\frac{C_{in}(t)}{dt}=p'C_{out}-k'C_{in}+\dot s$

In [None]:
import scipy.optimize as op
import pandas as pd
import numpy as np

In [None]:
import sys

sys.path.append("codes/scripts/particles/")

import data_handler as dh
import metrics
import utils

In [None]:
moving_average_window = 20
moving_average_method = 'mean'

excludes = ['PERSON_NUMBER', 'AIR_PURIFIER',
            'AIR_CONDITIONER', 'WINDOW', 'DOOR']
df_org = pd.read_csv('storage/particle/data.csv', index_col='DATE', parse_dates=True)
df = dh.apply_moving_average(df_org,
                             window=moving_average_window,
                             method=moving_average_method,
                             excludes=excludes)
df = pd.concat([df, df_org[excludes]], axis=1)


In [None]:
def get_cond_df(_df, cond_in):
    cond_cols = ['PERSON_NUMBER', 'AIR_PURIFIER', 'AIR_CONDITIONER', 'WINDOW', 'DOOR']
    cond_df = _df.copy()
    if len(cond_in) != 5:
        print('[ERROR] Invalid condition length')
        raise Exception('Invalid condition length')
    if cond_in[0] == '0':
        cond_df = cond_df[cond_df[cond_cols[0]] == 0]
    elif cond_in[1] == '1':
        cond_df = cond_df[cond_df[cond_cols[0]] != 0]
    for i in range(1, 5, 1):
        if cond_in[i] == 'x' or cond_in[i] == 'X':
            continue
        cond_df = cond_df[cond_df[cond_cols[i]] == int(cond_in[i])]
    return cond_df

In [None]:
def catch_dates(_df, min_length=10):
    dates = {"start": [], "end": []}

    start_date = None
    end_date = None
    cnt = 0
    for idx, row in _df.iterrows():
        if np.isnan(row['PM2.5']) or np.isnan(row['PM2.5_H_OUT']) or np.isnan(row['PM2.5_OUT']):
            if start_date is not None and end_date is not None and cnt >= min_length:
                dates["start"].append(start_date)
                dates["end"].append(end_date)
            start_date = None
            end_date = None
            cnt = 0
        else:
            if start_date is None:
                start_date = idx
            end_date = idx
            cnt += 1
    if start_date is not None and end_date is not None:
        dates["start"].append(start_date)
        dates["end"].append(end_date)
    return dates

In [None]:
train_df = get_cond_df(df, '00000').loc[:pd.to_datetime('2022-07-07 13:45')].resample('1T').mean()
test_df = get_cond_df(df, '00000').loc[pd.to_datetime('2022-07-07 13:46'):].resample('1T').mean()
train_dates = catch_dates(train_df, 60)
test_dates = catch_dates(test_df, 60)

train_dates_df = pd.DataFrame(train_dates)
test_dates_df = pd.DataFrame(test_dates)

train_dates_df = train_dates_df.drop(38)
train_dates_df.index = np.arange(len(train_dates_df))

test_dates_df = test_dates_df.drop(list(range(6,12)) + [28, 29])
test_dates_df.index = np.arange(len(test_dates_df))

In [None]:
train_dfs = []
test_dfs = []

for i in range(train_dates_df.shape[0]):
    train_dfs.append(train_df[(train_df.index >= train_dates_df['start'][i]) & (train_df.index <= train_dates_df['end'][i])])
#_ = utils.plot(pd.concat(train_dfs), ['PM2.5', 'PM2.5_OUT', 'PM2.5_H_OUT'])

for i in range(test_dates_df.shape[0]):
    test_dfs.append(test_df[(test_df.index >= test_dates_df['start'][i]) & (test_df.index <= test_dates_df['end'][i])])
#_ = utils.plot(pd.concat(test_dfs), ['PM2.5', 'PM2.5_OUT', 'PM2.5_H_OUT'])

In [None]:
target = 'PM1'

def predict(par, _df):
    p1 = par[0]
    p2 = par[1]
    k = par[2]

    idx_out = target + '_OUT'
    idx_h = target + '_H_OUT'
    pred = np.zeros(_df.shape[0])
    pred[0] = _df[target].iloc[0]

    for i in range(0, _df.shape[0] - 1):
        delt = (_df.index[i + 1] - _df.index[i]).total_seconds() / 3600.0
        pred[i + 1] = pred[i] + delt * (p1 * (_df[idx_out].iloc[i + 1] + _df[idx_out].iloc[i]) / 2 + 
                                        p2 * (_df[idx_h].iloc[i + 1] + _df[idx_h].iloc[i]) / 2 - k * pred[i] / 2)
        pred[i + 1] /= (1 + k / 2)

    return pred


def predict_without_hall(par, _df):
    p1 = par[0]
    k = par[1]

    idx_out = target + '_OUT'
    pred = np.zeros(_df.shape[0])
    pred[0] = _df[target].iloc[0]

    for i in range(0, _df.shape[0] - 1):
        delt = (_df.index[i + 1] - _df.index[i]).total_seconds() / 3600.0
        pred[i + 1] = pred[i] + delt * (p1 * (_df[idx_out].iloc[i + 1] + _df[idx_out].iloc[i]) / 2)
        pred[i + 1] = pred[i + 1] / (1 + k / 2)

    return pred


def loss_func(par, _dfs=test_dfs):
    _meas_res = []
    _pred_res = []
    
    predictor = None
    if len(par) == 3:
        predictor = predict
    else:
        predictor = predict_without_hall
    
    for _df in _dfs:
        _pred_res.append(predictor(par, _df))
        _meas_res.append(_df[target].values)

    _meas = np.concatenate(_meas_res, axis=None)
    _pred = np.concatenate(_pred_res, axis=None)

    return np.var(_meas - _pred)


def get_results(par, _dfs):
    _res_dfs = []

    if len(par) == 3:
        predictor = predict
    else:
        predictor = predict_without_hall
        
    for i, _df in enumerate(_dfs):
        pred_res = predictor(par, _df)
        _df[target + '_PRED'] = pred_res

        _res_dfs.append(_df.copy())

    return pd.concat(_res_dfs)


In [None]:
bounds = [(0, 2)] * 2
results = dict()

results = op.shgo(loss_func, bounds=bounds)

In [None]:
results.x.tolist()

In [None]:
test_df

In [None]:
data = np.zeros((3, 3))

In [None]:
data[2] = results.x.tolist()

In [None]:
train_res_df = get_results(results.x.tolist(), train_dfs)
test_res_df = get_results(results.x.tolist(), test_dfs)

In [None]:
train_res_df['TYPE'] = 'train'
test_res_df['TYPE'] = 'test'
res_df = pd.concat([train_res_df, test_res_df])

In [None]:
#res_df.to_csv('projects/particle/ode/pm10_pred.csv', index_label='DATE')

In [None]:
_ = utils.plot(res_df, [target, target + '_PRED'])