In [None]:
import sys

import pandas as pd
import tensorflow as tf

sys.path.append("../scripts/")

In [None]:
import particle_data
from particle_utils import *

In [None]:
TARGET_LABEL = ["PM1", "PM2.5", "PM10"]
FEATURE_LABEL = [
    "PM1_2.5_OUT",
    "PM1_2.5_H_OUT",
    "PM2.5_OUT",
    "PM2.5_H_OUT",
    "PM2.5_10_OUT",
    "PM2.5_10_H_OUT",
    "PERSON_NUMBER",
    "AIR_PURIFIER",
    "WINDOW",
    "AIR_CONDITIONER",
    "DOOR",
    "TEMPERATURE",
    "WIND_SPEED",
    "WIND_DEG",
    "HUMIDITY",
]

WINDOW_SIZE = 30
OFFSET = 0
OUTPUT_SIZE = 1
BATCH_SIZE = 32

hyper_params = {
    "name": "Conv_Weather",
    "description": "CONV WITH WEATHER",
    "version": "06",
    "root_dir": "project/Conv",
    "dirs": {
        "weights": "training/weights",
        "history": "training/history",
        "metric": "result/metric",
        "model": "result/model",
        "predict": "result/predict",
    },
    "model": {
        "lr": 1e-05,
        "batch_size": BATCH_SIZE,
        "epochs": 300,
        "window_size": WINDOW_SIZE,
        "offset": OFFSET,
        "loss": "MSE",
        "data": {
            "moving_average_window": 15,
            "moving_average_method": 'median',
            "train": 0.60,
            "validation": 0.15,
            "test": 0.25,
            "used_data": [
                {"start": "2022-05-07 09:40", "end": "2022-05-17 08:38"},
                {"start": "2022-05-17 11:25", "end": "2022-05-30 23:26"},
                {"start": "2022-06-01 22:40", "end": "2022-07-02 07:00"},
                {"start": "2022-07-02 16:40", "end": "2022-07-09 07:13"},
                {"start": "2022-07-09 14:30", "end": "2022-07-12 10:00"},
                {"start": "2022-07-25 12:00", "end": "2022-08-01 10:00"},
                {"start": "2022-08-03 09:00", "end": "2022-08-11 22:18"},
                {"start": "2022-08-12 12:14", "end": "2022-08-20 00:00"},
            ],
            "meta": None
        },
    },
}

In [None]:
weather_df = pd.read_csv('../data/weather.csv', index_col='DATE', parse_dates=True)[['TEMPERATURE', 'WIND_DEG', 'WIND_SPEED', 'HUMIDITY']]
weather_df['WIND_DEG'] = weather_df['WIND_DEG'].values * np.pi / 180

In [None]:
df_org = load_pm("../data/data.csv")
add_diff(df_org)

In [None]:
df = apply_moving_average(
    pd.concat([df_org, weather_df], axis=1), hyper_params["model"]["data"]["moving_average_method"], hyper_params["model"]["data"]["moving_average_window"], True
)
df[['PERSON_NUMBER', 'AIR_PURIFIER', 'AIR_CONDITIONER', 'WINDOW', 'DOOR']].fillna(method='ffill')
df.dropna(inplace=True)

dfs = trim_df(df, hyper_params["model"]["data"]["used_data"])
train_dfs, val_dfs, test_dfs = train_test_split_df(dfs, hyper_params["model"]["data"]["validation"], hyper_params["model"]["data"]["test"])
meta = get_meta(train_dfs, [
            "PERSON_NUMBER",
            "PM2.5_OUT",
            "PM2.5_H_OUT",
            "PM1_2.5_OUT",
            "PM1_2.5_H_OUT",
            "PM2.5_10_OUT",
            "PM2.5_10_H_OUT",
            "PM1_OUT",
            "PM1_H_OUT",
            "PM10_OUT",
            "PM10_H_OUT",
            "TEMPERATURE",
            "WIND_SPEED",
            "WIND_DEG",
            "HUMIDITY",
        ],)
hyper_params['model']['data']['meta'] = meta

X_train, y_train = translate_to_dataset(train_dfs)
X_val, y_val = translate_to_dataset(val_dfs, meta)
X_test, y_test = translate_to_dataset(test_dfs, meta)

print("X_train, y_train shape: ", X_train.shape, y_train.shape)
print("X_val, y_val shape: ", X_val.shape, y_val.shape)
print("X_test, y_test shape: ", X_test.shape, y_test.shape)

In [None]:
import json

root_dir = (
    hyper_params["root_dir"] + "/" + hyper_params["name"] + hyper_params["version"]
)

create_folder(root_dir + "/" + hyper_params["dirs"]["weights"])
create_folder(root_dir + "/" + hyper_params["dirs"]["history"])
create_folder(root_dir + "/" + hyper_params["dirs"]["predict"])
create_folder(root_dir + "/" + hyper_params["dirs"]["model"])
create_folder(root_dir + "/" + hyper_params["dirs"]["metric"])

with open(f"{root_dir}/config.json", "w") as outfile:
    json.dump(hyper_params, outfile)
    outfile.close()

In [None]:
from tensorflow.keras import backend
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


rlr_cb = ReduceLROnPlateau(
    monitor="val_loss", factor=0.2, patience=5, mode="min", verbose=1, min_lr=1e-6
)
ely_cb = EarlyStopping(monitor="val_loss", patience=15, mode="min", verbose=1)
mcp_cb = ModelCheckpoint(
    filepath=root_dir
    + "/"
    + hyper_params["dirs"]["weights"]
    + "/e{epoch:02d}-v{val_loss:.2f}.h5",
    monitor="val_loss",
    save_weights_only=True,
    mode="min",
    period=1,
    verbose=0,
)

In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import (
    GRU,
    LSTM,
    Conv1D,
    Dense,
    Dropout,
    Flatten,
    GlobalMaxPooling1D,
    Input,
    LeakyReLU,
    MaxPooling1D,
    Attention,
    Permute,
)
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam

def attention_3d_block(inputs):
    input_dim = int(inputs.shape[2])
    
    a = Permute((2, 1))(inputs) # same transpose
    a = Dense(inputs.shape[1], activation='softmax')(a)
    
    a_probs = Permute((2, 1), name='attention_vec')(a)

    output_attention_mul  = tf.keras.layers.multiply([inputs, a_probs])
    return output_attention_mul

def build_model(inputs):
    input_tensor = Input(shape=(inputs.shape[1], inputs.shape[2]), name="input")

    x = Conv1D(256, kernel_size=3, kernel_initializer='he_uniform', activation='relu', strides=1, padding="valid")(input_tensor)
    # x = Conv1D(128, kernel_size=3, activation='relu', strides=1, padding="valid")(x)
    # x = GRU(
    #     units=256,
    #     activation="tanh",
    #     kernel_initializer="glorot_uniform",
    #     return_sequences=True,
    # )(input_tensor)
    # x = GRU(
    #     units=160,
    #     activation="tanh",
    #     kernel_initializer="he_uniform",
    #     return_sequences=True,
    # )(x)
    # x = attention_3d_block(x)
    x = MaxPooling1D(pool_size=3, strides=1)(x)
    x = Flatten()(x)
    x = Dense(256, kernel_initializer='he_uniform', activation=LeakyReLU(0.25))(x)
    x = Dropout(0.5)(x)
    output = Dense(y_train.shape[2], kernel_initializer='he_uniform', activation="relu", name="output")(x)

    _model = Model(
        inputs=input_tensor,
        outputs=output,
        name=f'{hyper_params["name"].lower()}_v{hyper_params["version"]}',
    )

    _model.compile(
        optimizer=Adam(learning_rate=hyper_params["model"]["lr"]),
        loss=hyper_params["model"]["loss"].lower(),
        metrics=RootMeanSquaredError(),
    )

    return _model


model = build_model(X_train)
model.summary()

In [None]:
with tf.device("/device:GPU:0"):
    training_res = model.fit(
        x=X_train,
        y=y_train,
        batch_size=BATCH_SIZE,
        epochs=hyper_params["model"]["epochs"],
        validation_data=(X_val, y_val),
        callbacks=[rlr_cb, ely_cb, mcp_cb],
    )
    pd.DataFrame(training_res.history).to_csv(
        root_dir + "/" + hyper_params["dirs"]["history"] + "/history.csv", index=False
    )
    plt.figure(figsize=(28, 10))
    plt.plot(training_res.history["loss"], "o--", label="train")
    plt.plot(training_res.history["val_loss"], "o--", label="valid")
    plt.xlabel("Epochs", fontsize=15)
    plt.ylabel("Loss - RMSE", fontsize=15)
    plt.legend(fontsize=15)

In [None]:
model = tf.keras.models.load_model("project/Conv/Conv_Weather04/result/model/conv_weather_04.h5")
train_res = get_result_df(model, train_dfs, meta)
train_res['TYPE'] = 'train'
val_res = get_result_df(model, val_dfs, meta)
val_res['TYPE'] = 'val'
test_res = get_result_df(model, test_dfs, meta)
test_res['TYPE'] = 'test'

In [None]:
# # model.load_weights('project/GRU/GRU09/training/weights/e23-v17.85.h5')
# model = tf.keras.models.load_model("project/GRU/GRUkt01/result/model/gru_kt01.h5")
# train_res = get_result_df(model, train_dfs, meta)
# val_res = get_result_df(model, val_dfs, meta)
# test_res = get_result_df(model, test_dfs, meta)

In [None]:
plot(train_res, ['PM2.5_PRED', 'PM2.5', 'PM2.5_OUT', 'PM2.5_H_OUT', 'PERSON_NUMBER', 'AIR_PURIFIER', 'AIR_CONDITIONER', 'WINDOW', 'DOOR'])

In [None]:
plot(val_res, ['PM2.5_PRED', 'PM2.5', 'PM2.5_OUT', 'PM2.5_H_OUT', 'PERSON_NUMBER', 'AIR_PURIFIER', 'AIR_CONDITIONER', 'WINDOW', 'DOOR'])

In [None]:
plot(test_res, ['PM2.5_PRED', 'PM2.5', 'PM2.5_OUT', 'PM2.5_H_OUT', 'PERSON_NUMBER', 'AIR_PURIFIER', 'AIR_CONDITIONER', 'WINDOW', 'DOOR'])

In [None]:
ax = val_res.plot.scatter(x="PM2.5", y="PM2.5_PRED", c="y", figsize=(15, 15))
test_res.plot.scatter(x="PM2.5", y="PM2.5_PRED", c="g", figsize=(15, 15), ax=ax)
lims = [
    np.min([ax.get_xlim(), ax.get_ylim()]),
    np.max([ax.get_xlim(), ax.get_ylim()]),
]

ax.plot(lims, lims, "r-", linewidth=2, alpha=0.75, zorder=2)
ax.set_aspect("equal")

In [None]:
save = True

cols = ["pm1", "pm2.5", "pm10"]
total_res = pd.concat([train_res, val_res, test_res])
res_dfs = [total_res, train_res, val_res, test_res]
res_indices = ["Total", "Train", "Validation", "Test"]
metrics = [calc_r2, calc_corrcoef, calc_nmse, calc_fb, calc_b, calc_a_div_co]
metrics_indices = ["R Square", "Corr", "NMSE", "FB", "B", "a/C"]


def calc_metric(_f, _df, _col):
    return _f(_df[_col].values, _df[_col + "_PRED"].values)


for col in cols:
    print(f"======== {col} prediction results ========")
    res_dict = {
        "Metric": metrics_indices,
        "Total": [],
        "Train": [],
        "Validation": [],
        "Test": [],
    }

    for j, m in enumerate(metrics):
        for i, rd in enumerate(res_dfs):
            s = calc_metric(m, rd, col.upper())
            res_dict[res_indices[i]].append(s)

    r_df = pd.DataFrame(res_dict)
    print(r_df)
    print()
    if save:
        r_df.to_csv(
            f'{root_dir}/{hyper_params["dirs"]["metric"]}/result_{col}.csv',
            index=False,
            float_format="%.3f",
        )

In [None]:
if save:
    total_res.to_csv(
        root_dir + "/" + hyper_params["dirs"]["predict"] + "/predict.csv",
        index_label="DATE",
    )

    model.save(
        root_dir
        + "/"
        + hyper_params["dirs"]["model"]
        + f'/{hyper_params["name"].lower()}_{hyper_params["version"]}.h5'
    )