In [None]:
import sys

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf

sys.path.append("./scripts/particles/")

In [None]:
import data_handler as dh
import metrics
import utils

In [None]:
outputs = ['PM1', 'PM2.5', 'PM10']
outputs = ['PM1', 'PM2.5', 'PM10']
inputs = [
    'PM1_2.5_OUT',
    'PM1_2.5_H_OUT',
    'PM2.5_OUT',
    'PM2.5_H_OUT',
    'PM2.5_10_OUT',
    'PM2.5_10_H_OUT',
    'PERSON_NUMBER',
    'AIR_PURIFIER',
    'WINDOW',
    'AIR_CONDITIONER',
    'DOOR',
    'WIND_DEG',
    'HUMIDITY'
]

offset = 1
out_time_step = 1

dates = [
    {"start": "2022-05-07 09:40", "end": "2022-05-17 08:38"},
    {"start": "2022-05-17 11:25", "end": "2022-05-30 23:26"},
    {"start": "2022-06-01 22:40", "end": "2022-07-02 07:00"},
    {"start": "2022-07-02 16:40", "end": "2022-07-09 07:13"},
    {"start": "2022-07-09 14:30", "end": "2022-07-12 10:00"},
    {"start": "2022-07-25 12:00", "end": "2022-08-01 10:00"},
    {"start": "2022-08-03 09:00", "end": "2022-08-11 22:18"},
    {"start": "2022-08-12 12:14", "end": "2022-08-20 00:00"},
    {"start": "2022-08-20 09:38", "end": "2022-09-01 00:00"},
]

moving_average_window = 20
moving_average_method = 'mean'
val_size = 0.15
test_size = 0.25
train_size = 1 - val_size - test_size

In [None]:
weather_df = pd.read_csv('../storage/particle/weather.csv', index_col='DATE', parse_dates=True)[['TEMPERATURE', 'WIND_DEG', 'WIND_SPEED', 'HUMIDITY']]
weather_df['WIND_DEG'] = np.sin(weather_df['WIND_DEG'].values * np.pi / 180)

df_org = dh.load_data("../storage/particle/data.csv")
df_org = dh.add_pm_diff(df_org)

excludes = ['PERSON_NUMBER', 'AIR_PURIFIER', 'AIR_CONDITIONER', 'WINDOW', 'DOOR']
df = dh.apply_moving_average(pd.concat([df_org, weather_df], axis=1), 
                             window=moving_average_window, 
                             method=moving_average_method, 
                             excludes=excludes,
                             min_periods=1)
df = pd.concat([df, df_org[excludes]], axis=1)
df[excludes] = df[excludes].fillna(method='ffill')
df.dropna(inplace=True)

dfs = dh.trim_df(df, dates)
train_dfs, val_dfs, test_dfs = dh.train_test_split_df(dfs, val_size, test_size)
meta_df = pd.concat(train_dfs).describe()

In [None]:
train_ds = {}
val_ds = {}
test_ds = {}

win_sizes = [12, 16, 30, 45, 60]

def to_dataset(_dfs, in_time_step):
    return dh.dfs_to_dataset(_dfs, meta_df, inputs, outputs, in_time_step=in_time_step, out_time_step=out_time_step, offset=offset, excludes=outputs)

for win_size in win_sizes:
    train_ds[str(win_size)] = to_dataset(train_dfs, win_size)
    val_ds[str(win_size)] = to_dataset(val_dfs, win_size)
    test_ds[str(win_size)] = to_dataset(test_dfs, win_size)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


rlr_cb = ReduceLROnPlateau(
    monitor="val_loss", factor=0.2, patience=10, mode="min", min_lr=1e-6, verbose=False
)
ely_cb = EarlyStopping(monitor="val_loss", patience=20, mode="min", verbose=False, restore_best_weights=True)

In [None]:
import keras_tuner as kt
from keras.layers import Input, Dense, Conv1D, MaxPooling1D, Dropout, GlobalMaxPooling1D, GlobalAveragePooling1D, LSTM, BatchNormalization, LeakyReLU, SimpleRNN, GRU, Flatten
from keras.optimizers import Adam
from keras import Model
from keras.metrics import RootMeanSquaredError

rnn_types = [SimpleRNN, LSTM, GRU]

class ConvPredictor(kt.HyperModel):
    def __init__(self):
        super().__init__()
        self.win_size = None
        
    def build(self, hp):
        self.win_size = hp.Choice('window_size', win_sizes, default=win_sizes[0])
        n_conv_layers = hp.Choice('n_conv_layers', [0, 1, 2], default=1)
        n_rnn_layers = hp.Choice('n_rnn_layers', [0, 1, 2], default=0)
        rnn_type = hp.Choice('rnn_type', [0, 1, 2], default=2)
        
        inputs = train_ds[str(self.win_size)][0]
        outputs = train_ds[str(self.win_size)][1]
        
        input_tensor = Input(shape=(inputs.shape[1], inputs.shape[2]), name='input')
        x = input_tensor
        
        for i in range(n_conv_layers):
            x = Conv1D(hp.Int(f'conv_filters_{i}', min_value=16, max_value=128, step=16), 
                       kernel_size=hp.Int(f'kernel_size_{i}', min_value=3, max_value=5, step=1), 
                       activation='relu', 
                       strides=hp.Int(f'conv_strides_{i}', min_value=1, max_value=3, step=1), 
                       padding='same')(x)

        for i in range(n_rnn_layers):
            x = rnn_types[rnn_type](
                units=hp.Int(f'gru_units_{i}', min_value=32, max_value=256, step=32), 
                activation='tanh', 
                kernel_initializer='he_uniform', 
                return_sequences=True,
                dropout=hp.Float(f'gru_dropout_{i}', min_value=0.0, max_value=0.5, step=0.05)
            )(x)

        pool_size = hp.Int('max_pool_size', min_value=2, max_value=5, step=1)
        pool_strides = hp.Choice('max_pool_strides', [0, 1, 2, 3, 4])
        if pool_strides == 0:
            pool_strides = None
        x = MaxPooling1D(pool_size=pool_size, strides=pool_strides)(x)
        x = Flatten()(x)
        
        fc_units = hp.Int('fc_units', min_value=32, max_value=256, step=32)
        leaky_relu = hp.Float('leaky_relu', min_value=0.0, max_value=0.5, step=0.05)
        fc_dropout = hp.Float('fc_dropout', min_value=0.0, max_value=0.5, step=0.05)

        x = Dense(fc_units, kernel_initializer='he_uniform', activation=LeakyReLU(alpha=leaky_relu))(x)
        x = Dropout(fc_dropout)(x)

        output = Dense(outputs.shape[2], kernel_initializer='he_uniform', activation='relu', name='output')(x)

        model = Model(inputs=input_tensor, outputs=output, name='model')

        hp_learning_rate = hp.Choice('learning_rate', [0.001, 0.0001, 0.00001])
        model.compile(
            optimizer=Adam(learning_rate=hp_learning_rate), loss='mse', metrics=RootMeanSquaredError()
        )
        return model

    def fit(self, hp, model, *args, **kwargs):
        return model.fit(
            train_ds[str(self.win_size)][0],
            train_ds[str(self.win_size)][1],
            validation_data=(test_ds[str(self.win_size)][0], test_ds[str(self.win_size)][1]),
            *args,
            batch_size=hp.Int('batch_size', min_value=32, max_value=256, step=32),
            **kwargs,
        )

In [None]:
import datetime as dt

new_proj = 'conv_02'
# exist_proj = 'KTBO_2022-08-21_14:57'

proj = new_proj

tuner = kt.tuners.BayesianOptimization(
    ConvPredictor(),
    seed=42,
    objective='val_loss',
    max_trials=120,
    executions_per_trial=3,
    directory='../projects/particle/kt/bo',
    project_name=proj
)

tuner.search(epochs=100, shuffle=False, callbacks=[rlr_cb, ely_cb])

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]

print(
    f"""
window_size : {best_hp.get('window_size')}
conv_filters : {best_hp.get('conv_filters')}
conv_strides : {best_hp.get('conv_strides')}
kernel_size : {best_hp.get('kernel_size')}
pool_size : {best_hp.get('pool_size')}
pool_strides : {best_hp.get('pool_strides')}
fc_units : {best_hp.get('fc_units')}
leaky_relu : {best_hp.get('leaky_relu')}
fc_dropout : {best_hp.get('fc_dropout')}
learning_rate : {best_hp.get('learning_rate')}
batch_size : {best_hp.get('batch_size')}
"""
)

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
print(f'''
struct: {best_hp.get('struct')}
conv_units_1: {best_hp.get('conv_units_1')}
conv_strides_1: {best_hp.get('conv_strides_1')}
kernel_size_1: {best_hp.get('kernel_size_1')}
conv_units_2: {best_hp.get('conv_units_2')}
conv_strides_2: {best_hp.get('conv_strides_2')}
kernel_size_2: {best_hp.get('kernel_size_2')}
pool_size: {best_hp.get('pool_size')}
fc_units: {best_hp.get('fc_units')}
leaky_relu: {best_hp.get('leaky_relu')}
fc_dropout: {best_hp.get('fc_dropout')}
''')

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]

print(
    f"""
struct : {best_hp.get('struct')}
conv_units_1 : {best_hp.get('conv_units_1')}
kernel_size_1 : {best_hp.get('kernel_size_1')}
conv_strides : {best_hp.get('conv_strides')}
gru_units_1 : {best_hp.get('gru_units_1')}
gru_units_2 : {best_hp.get('gru_units_2')}
fc_units : {best_hp.get('fc_units')}
leaky_relu : {best_hp.get('leaky_relu')}
gru_dropout_1 : {best_hp.get('gru_dropout_1')}
gru_dropout_2 : {best_hp.get('gru_dropout_2')}
fc_dropout : {best_hp.get('fc_dropout')}
learning_rate : {best_hp.get('learning_rate')}
batch_size : {best_hp.get('batch_size')}
"""
)

In [None]:
best_model.summary()

In [None]:
train_pred = best_model.predict(X_train)
val_pred = best_model.predict(X_val)
test_pred = best_model.predict(X_test)

pred = np.concatenate((train_pred, val_pred, test_pred), axis=0)

In [None]:
results = []

for dd in dfs:
    x_ele, y_real = conv_to_dataset(
        min_max_scale(
            dd, [
                'PM2.5_OUT',
                'PM2.5_H_OUT',
                'PM1_2.5_OUT',
                'PM1_2.5_H_OUT',
                'PM2.5_10_OUT',
                'PM2.5_10_H_OUT',
                'PERSON_NUMBER',
            ], meta
        ), FEATURE_LABEL, TARGET_LABEL, WINDOW_SIZE, OUTPUT_SIZE, OFFSET, verbose=False
    )
    y_hat = best_model.predict(x_ele, verbose=0)
    _tmp_df = dd.iloc[WINDOW_SIZE + OFFSET + 1:].copy()
    _tmp_df['PM1_PRED'] = y_hat[:, 0]
    _tmp_df['PM2.5_PRED'] = y_hat[:, 1]
    _tmp_df['PM10_PRED'] = y_hat[:, 2]
    results.append(_tmp_df)

res_df = pd.concat(results)
ax = res_df[len(X_train):len(X_train) + len(X_val)].plot.scatter(x='PM2.5', y='PM2.5_PRED', c='y', figsize=(15, 15))
res_df[len(X_train) + len(X_val):].plot.scatter(x='PM2.5', y='PM2.5_PRED', c='g', figsize=(15, 15), ax=ax)
lims = [
    np.min([ax.get_xlim(), ax.get_ylim()]),
    np.max([ax.get_xlim(), ax.get_ylim()]),
]

ax.plot(lims, lims, 'r-', linewidth=2, alpha=0.75, zorder=2)
ax.set_aspect('equal')

In [None]:
total_result = pd.DataFrame(
    {
        'pm1 real': y[:, 0, 0], 'pm2.5 real': y[:, 0, 1], 'pm10 real': y[:, 0, 2], 'pm1 pred': pred[:, 0],
        'pm2.5 pred': pred[:, 1], 'pm10 pred': pred[:, 2]
    }
)
train_result = pd.DataFrame(
    {
        'pm1 real': y_train[:, 0], 'pm2.5 real': y_train[:, 1], 'pm10 real': y_train[:, 2],
        'pm1 pred': train_pred[:, 0],
        'pm2.5 pred': train_pred[:, 1], 'pm10 pred': train_pred[:, 2]
    }
)
val_result = pd.DataFrame(
    {
        'pm1 real': y_val[:, 0], 'pm2.5 real': y_val[:, 1], 'pm10 real': y_val[:, 2], 'pm1 pred': val_pred[:, 0],
        'pm2.5 pred': val_pred[:, 1], 'pm10 pred': val_pred[:, 2]
    }
)
test_result = pd.DataFrame(
    {
        'pm1 real': y_test[:, 0], 'pm2.5 real': y_test[:, 1], 'pm10 real': y_test[:, 2], 'pm1 pred': test_pred[:, 0],
        'pm2.5 pred': test_pred[:, 1], 'pm10 pred': test_pred[:, 2]
    }
)

In [None]:
from matplotlib import pyplot as plt

fig, axes = plt.subplots(nrows=9, ncols=1, figsize=(26, 40))
train_result.plot(kind='line', y=['pm1 real', 'pm1 pred'], ax=axes[0], title='PM1 Training')
train_result.plot(kind='line', y=['pm2.5 real', 'pm2.5 pred'], ax=axes[1], title='PM2.5 Training')
train_result.plot(kind='line', y=['pm10 real', 'pm10 pred'], ax=axes[2], title='PM10 Training')
val_result.plot(kind='line', y=['pm1 real', 'pm1 pred'], ax=axes[3], title='PM1 Validation')
val_result.plot(kind='line', y=['pm2.5 real', 'pm2.5 pred'], ax=axes[4], title='PM2.5 Validation')
val_result.plot(kind='line', y=['pm10 real', 'pm10 pred'], ax=axes[5], title='PM10 Validation')
test_result.plot(kind='line', y=['pm1 real', 'pm1 pred'], ax=axes[6], title='PM1 Testing')
test_result.plot(kind='line', y=['pm2.5 real', 'pm2.5 pred'], ax=axes[7], title='PM2.5 Testing')
test_result.plot(kind='line', y=['pm10 real', 'pm10 pred'], ax=axes[8], title='PM10 Testing')
plt.xlabel('Time [min]', fontsize=13)
for i in range(3):
    axes[i * 3 + 0].set_ylabel('PM1 [$\mu g/m^3$]', fontsize=13)
    axes[i * 3 + 1].set_ylabel('PM2.5 [$\mu g/m^3$]', fontsize=13)
    axes[i * 3 + 2].set_ylabel('PM10 [$\mu g/m^3$]', fontsize=13)

In [None]:
cols = ['pm1', 'pm2.5', 'pm10']

val_test_res = pd.concat([
    val_result, test_result]).reset_index(drop='index')

for col in cols:
    print(f'======== {col} prediction results ========')
    total_r2 = calc_r2(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_r2 = calc_r2(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_r2 = calc_r2(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total R square: ', total_r2)
    print('Train R square: ', train_r2)
    print('Test R square: ', test_r2)
    print()

    total_nmse = calc_nmse(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_nmse = calc_nmse(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_nmse = calc_nmse(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total NMSE: ', total_nmse)
    print('Train NMSE: ', train_nmse)
    print('Test NMSE: ', test_nmse)
    print()

    total_fb = calc_fb(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_fb = calc_fb(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_fb = calc_fb(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total FB: ', total_fb)
    print('Train FB: ', train_fb)
    print('Test FB: ', test_fb)
    print()

    total_b = calc_b(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_b = calc_b(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_b = calc_b(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total B: ', total_b)
    print('Train B: ', train_b)
    print('Test B: ', test_b)
    print()

    total_corr = calc_corrcoef(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_corr = calc_corrcoef(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_corr = calc_corrcoef(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total Correlation Coeff: ', total_corr)
    print('Train Correlation Coeff: ', train_corr)
    print('Test Correlation Coeff: ', test_corr)
    print()

    pd.DataFrame({'Total': [total_r2, total_nmse, total_fb, total_b, total_corr],
                  'Train': [train_r2, train_nmse, train_fb, train_b, train_corr],
                  'Test': [test_r2, test_nmse, test_fb, test_b, test_corr]}).to_csv(
        f'C:/Users/son/Desktop/lstm/lstm/training/result/result_{col}.csv', index=False, float_format='%.3f')



In [None]:
from keras.layers import Input, Dense, Conv1D, MaxPooling1D, Dropout, GlobalMaxPooling1D, GlobalAveragePooling1D, LSTM, BatchNormalization, LeakyReLU, TimeDistributed, GRU
from keras.optimizers import Adam
from keras import Model
from keras.metrics import RootMeanSquaredError


def build_model():
    input_tensor = Input(shape=(X_train.shape[1], X_train.shape[2]), name='input')

    x = Conv1D(512, kernel_size=3, strides=1, padding='valid')(input_tensor)
    x = GRU(units=256, activation='tanh', kernel_initializer='he_uniform', return_sequences=True)(input_tensor)
    # x = GRU(units=192, activation='tanh', kernel_initializer='he_uniform', return_sequences=True)(x)
    x = GlobalMaxPooling1D()(x)
    x = Dense(128, activation=LeakyReLU(alpha=0.05))(x)
    x = Dropout(0.45)(x)
    output = Dense(y.shape[2], 'relu', name='output')(x)

    model = Model(inputs=input_tensor, outputs=output, name=f'lstm_v{hyper_params["version"]}')

    model.compile(optimizer=Adam(learning_rate=hyper_params["lr"]), loss=root_mean_squared_error,
                  metrics=RootMeanSquaredError())

    return model

In [None]:
hyper_params = {
    "name": "PM2.5 Prediction test",
    "lr": 0.0001,
    "batch_size": BATCH_SIZE,
    "epochs": 300,
    "version": "C1",
    "window_size": WINDOW_SIZE,
    "offset": OFFSET,
}

In [None]:
from keras import backend
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
import os
import shutil


def root_mean_squared_error(y_true, y_pred):
    return backend.sqrt(backend.mean(backend.square(y_pred - y_true)))


res_dir = f'training/v{hyper_params["version"]}'

if os.path.exists(res_dir):
    shutil.rmtree(res_dir)

os.makedirs(res_dir)

rlr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, mode='min', verbose=1)
ely_cb = EarlyStopping(monitor='val_loss', patience=15, mode='min', verbose=1)
mcp_cb = ModelCheckpoint(
    filepath=res_dir + '/weights.{epoch:02d}-{val_loss:.2f}.hdf5',
    monitor='val_loss',
    save_weights_only=True,
    mode='min',
    period=1,
    verbose=0
)

In [None]:
from keras.layers import Input, Dense, Conv1D, MaxPooling1D, Dropout, GlobalMaxPooling1D, GlobalAveragePooling1D, LSTM, BatchNormalization, LeakyReLU, TimeDistributed
from keras.optimizers import Adam
from keras import Model
from keras.metrics import RootMeanSquaredError


def build_model():
    input_tensor = Input(shape=(X_train.shape[1], X_train.shape[2]), name='input')

    x = Conv1D(128, kernel_size=3, strides=1, padding='valid')(input_tensor)
    x = LSTM(units=256, activation='tanh', kernel_initializer='he_uniform', return_sequences=True)(x)
    x = LSTM(units=192, activation='tanh', kernel_initializer='he_uniform', return_sequences=True)(x)
    x = GlobalMaxPooling1D()(x)
    x = Dense(128, activation=LeakyReLU(alpha=0.05))(x)
    x = Dropout(0.45)(x)
    output = Dense(y.shape[2], name='output')(x)

    model = Model(inputs=input_tensor, outputs=output, name=f'lstm_v{hyper_params["version"]}')

    model.compile(optimizer=Adam(learning_rate=hyper_params["lr"]), loss=root_mean_squared_error,
                  metrics=RootMeanSquaredError())

    return model

In [None]:
results = []

for dd in dfs:
    x_ele, y_real = conv_to_dataset(
        min_max_scale(
            dd, [
                'PM2.5_OUT',
                'PM2.5_H_OUT',
                'PM1_2.5_OUT',
                'PM1_2.5_H_OUT',
                'PM2.5_10_OUT',
                'PM2.5_10_H_OUT',
                'PERSON_NUMBER',
            ], meta
        ), FEATURE_LABEL, TARGET_LABEL, WINDOW_SIZE, OUTPUT_SIZE, OFFSET, verbose=False
    )
    y_hat = lstm.predict(x_ele, verbose=0)
    _tmp_df = dd.iloc[WINDOW_SIZE + OFFSET + 1:].copy()
    _tmp_df['PM1_PRED'] = y_hat[:, 0]
    _tmp_df['PM2.5_PRED'] = y_hat[:, 1]
    _tmp_df['PM10_PRED'] = y_hat[:, 2]
    results.append(_tmp_df)

res_df = pd.concat(results)
ax = res_df[len(X_train):len(X_train) + len(X_val)].plot.scatter(x='PM2.5', y='PM2.5_PRED', c='y', figsize=(15, 15))
res_df[len(X_train) + len(X_val):].plot.scatter(x='PM2.5', y='PM2.5_PRED', c='g', figsize=(15, 15), ax=ax)
lims = [
    np.min([ax.get_xlim(), ax.get_ylim()]),
    np.max([ax.get_xlim(), ax.get_ylim()]),
]

ax.plot(lims, lims, 'r-', linewidth=2, alpha=0.75, zorder=2)
ax.set_aspect('equal')

In [None]:
lstm = build_model()
lstm.summary()

In [None]:
# with tf.device('/device:GPU:0'):
history = lstm.fit(x=X_train, y=y_train, batch_size=BATCH_SIZE, shuffle=False, epochs=hyper_params["epochs"],
                   validation_data=(X_val, y_val),
                   callbacks=[rlr_cb, ely_cb, mcp_cb])

plt.figure(figsize=(28, 10))
plt.plot(history.history['loss'], "o--", label='train')
plt.plot(history.history['val_loss'], "o--", label='valid')
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Loss - RMSE', fontsize=15)
plt.legend(fontsize=15)

train_pred = lstm.predict(X_train)
val_pred = lstm.predict(X_val)
test_pred = lstm.predict(X_test)

pred = np.concatenate((train_pred, val_pred, test_pred), axis=0)

In [None]:
from sklearn.metrics import r2_score


def calc_nmse(real, pred):
    mse = np.sum((real - pred) ** 2)
    size = len(real)
    pred_sum = real.sum()
    real_sum = pred.sum()
    nmse = mse * size / (pred_sum * real_sum)
    return nmse


def calc_b(real, pred):
    pred_mean = pred.mean()
    real_mean = real.mean()
    tmp_a = pred - pred_mean
    tmp_b = real - real_mean
    tmp_c = np.sum(np.square(real - real_mean))
    return np.sum(tmp_a * tmp_b) / tmp_c

def calc_fb(_real, _pred):
    pred_mean = _pred.mean()
    real_mean = _real.mean()
    return 2 * (pred_mean - real_mean) / (pred_mean + real_mean)

def calc_r2(real, pred):
    return r2_score(real, pred)


def calc_corrcoef(real, pred):
    return np.corrcoef(real, pred)[0, 1]

In [None]:
cols = ['pm1', 'pm2.5', 'pm10']

val_test_res = pd.concat([
    val_result, test_result]).reset_index(drop='index')

for col in cols:
    print(f'======== {col} prediction results ========')
    total_r2 = calc_r2(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_r2 = calc_r2(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_r2 = calc_r2(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total R square: ', total_r2)
    print('Train R square: ', train_r2)
    print('Test R square: ', test_r2)
    print()

    total_nmse = calc_nmse(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_nmse = calc_nmse(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_nmse = calc_nmse(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total NMSE: ', total_nmse)
    print('Train NMSE: ', train_nmse)
    print('Test NMSE: ', test_nmse)
    print()

    total_fb = calc_fb(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_fb = calc_fb(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_fb = calc_fb(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total FB: ', total_fb)
    print('Train FB: ', train_fb)
    print('Test FB: ', test_fb)
    print()

    total_b = calc_b(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_b = calc_b(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_b = calc_b(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total B: ', total_b)
    print('Train B: ', train_b)
    print('Test B: ', test_b)
    print()

    total_corr = calc_corrcoef(total_result[col + ' real'].values, total_result[col + ' pred'].values)
    train_corr = calc_corrcoef(train_result[col + ' real'].values, train_result[col + ' pred'].values)
    test_corr = calc_corrcoef(val_test_res[col + ' real'].values, val_test_res[col + ' pred'].values)

    print('Total Correlation Coeff: ', total_corr)
    print('Train Correlation Coeff: ', train_corr)
    print('Test Correlation Coeff: ', test_corr)
    print()

    # pd.DataFrame({'Total': [total_r2, total_nmse, total_fb, total_b, total_corr],
    #               'Train': [train_r2, train_nmse, train_fb, train_b, train_corr],
    #               'Test': [test_r2, test_nmse, test_fb, test_b, test_corr]}).to_csv(
    #     f'/content/drive/MyDrive/result_{col}.csv', index=False, float_format='%.3f')

    # calc_r2(total_result, train_result, val_result, test_result, col)

In [None]:
from keras.utils import plot_model

plot_model(lstm, show_shapes=True)

In [None]:
lstm.save('C:/Users/son/Desktop/lstm/lstm/training/model_G2.h5')

In [None]:
lstm.save_weights('C:/Users/son/Desktop/lstm/lstm/training/model_G2_weights.h5')

In [None]:
from tensorflow.keras.utils import plot_model

plot_model(lstm, show_shapes=True)

In [None]:
lstm.save('/content/drive/MyDrive/saved_models/modelA-3.h5')

In [None]:
lstm.save_weights('/content/drive/MyDrive/saved_weights/modelA-3_weights.h5')