# Pre-training and Reinforcement Learning

## Imports

In [None]:
import time
import gc
import warnings
warnings.filterwarnings('ignore')

'''Main'''
import numpy as np
import pandas as pd
# pd.options.display.float_format = '{:.2f}'.format
import glob

'''Data Viz'''
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

'''Data Prep and Model Evaluation'''
from sklearn.model_selection import StratifiedKFold 
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import log_loss
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.metrics import confusion_matrix, accuracy_score


'''Tensorflow and Keras'''
import tensorflow as tf
from tensorflow import keras
# K = keras.backend

from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Dropout, LSTM, Conv1D
from keras.layers import BatchNormalization, Input, Lambda
from keras.callbacks import EarlyStopping
from keras import regularizers
from keras.losses import mse
from keras.utils.vis_utils import plot_model

'''My module'''
# from models.AE import Autoencoder
from utils.preprocess import create_windows, load_csv, scaler, synthesize_vectors
from utils.visualize import visualize_loss

'''check gpu'''
# from tensorflow.python.client import device_lib
# device_lib.list_local_devices()

## Load data

In [None]:
event_dir = {0: "", 1: "", 2: "", 3: "", 4: "",5: "",\
                6: "", 7: "", 8: "", 9: "", 10: "", 11: ""}
event_list = ["", "", "", "", "",\
                "", "", "", "", "", "", ""]

colors = ["red", "blue", "orange", "green", "black", "tan", "gray", "purple", \
            "cyan", "yellow", "pink", "magenta"]

left_columns_name = ["L_accX", "L_accY", "L_accZ", "L_bpm", "L_temp", "event"]
right_columns_name = ["R_accX", "R_accY", "R_accZ", "R_bpm", "R_temp", "event"]

### Train

In [None]:
# file names as list
left_train_files = glob.glob("../../../../../data/data_[0-2][0-9]/left*.csv")
right_train_files = glob.glob("../../../../../data/data_[0-2][0-9]/right*.csv")

df_train = load_csv(left_files=left_train_files, right_files=right_train_files)
print(pd.unique(df_train["event"]))
df_train

### Test

In [None]:
# file names as list
left_files = glob.glob("../../../../../data/data_[4][5-9]/left*.csv")
right_files = glob.glob("../../../../../data/data_[4][5-9]/right*.csv")

df_test = load_csv(left_files=left_files, right_files=right_files)
print(pd.unique(df_test["event"]))
df_test

## Pre-process

### Standardize

#### Train

In [None]:
df_std_train = scaler(df_train, method="nrm")
l = df_std_train["event"].value_counts()
print(l.sort_index() / len(df_std_train["event"]))
df_std_train

#### Test

In [None]:
# test
df_std_test = scaler(df_test, method="nrm")
l = df_std_test["event"].value_counts()
print(l.sort_index() / len(df_std_test["event"]))
df_std_test

### Synthesize vevtors

#### Train

In [None]:
# X_train_left = synthesize_vectors(x=df_std_train["L_accX"], y=df_std_train["L_accY"], z=df_std_train["L_accZ"])
# X_train_right = synthesize_vectors(x=df_std_train["R_accX"], y=df_std_train["R_accY"], z=df_std_train["R_accZ"])

# X_train_left, X_train_right

#### Test

In [None]:
# X_test_left = synthesize_vectors(x=df_std_test["L_accX"], y=df_std_test["L_accY"], z=df_std_test["L_accZ"])
# X_test_right = synthesize_vectors(x=df_std_test["R_accX"], y=df_std_test["R_accY"], z=df_std_test["R_accZ"])

# X_test_left, X_test_right

#### or

In [None]:
X_train_left = df_std_train[["L_accX", "L_accY", "L_accZ"]]
X_train_right = df_std_train[["R_accX", "R_accY", "R_accZ"]]

X_test_left = df_std_test[["L_accX", "L_accY", "L_accZ"]]
X_test_right = df_std_test[["R_accX", "R_accY", "R_accZ"]]

### Sliding window

In [None]:
window_size = 48
step_size = 16

#### Train

In [None]:
X_train_left_win = create_windows(X_train_left, window_size=window_size, step_size=step_size)
X_train_right_win = create_windows(X_train_right, window_size=window_size, step_size=step_size)

y_train_win = create_windows(df_train["event"], window_size=window_size, step_size=step_size)

X_train_left_win.shape, X_train_right_win.shape, y_train_win.shape

In [None]:
label = []
for window in y_train_win:
    label_counts = np.bincount(window)
    majority_label = np.argmax(label_counts)
    label.append(majority_label)
OE = OneHotEncoder(sparse=False)
y_train = OE.fit_transform(pd.DataFrame(label))
y_train.shape

#### Test

In [None]:
X_test_left_win = create_windows(X_test_left, window_size=window_size, step_size=step_size)
X_test_right_win = create_windows(X_test_right, window_size=window_size, step_size=step_size)

y_test_win = create_windows(df_test["event"], window_size=window_size, step_size=step_size)

X_test_left_win.shape, X_test_right_win.shape, y_test_win.shape

In [None]:
label = []
for window in y_test_win:
    label_counts = np.bincount(window)
    majority_label = np.argmax(label_counts)
    label.append(majority_label)
OE = OneHotEncoder(sparse=False)
y_test = OE.fit_transform(pd.DataFrame(label))
y_test.shape

In [None]:
y_test = np.argmax(y_test, axis=1)

#### Reshape

In [None]:
X_train_left_win = X_train_left_win.reshape(X_train_left_win.shape[0], window_size*3, order="F")
X_train_right_win = X_train_right_win.reshape(X_train_right_win.shape[0], window_size*3, order="F")
X_test_left_win = X_test_left_win.reshape(X_test_left_win.shape[0], window_size*3, order="F")
X_test_right_win = X_test_right_win.reshape(X_test_right_win.shape[0], window_size*3, order="F")

X_train_left_win.shape, X_train_right_win.shape, X_test_left_win.shape, X_test_right_win.shape

## Stacked AutoEncoder

### Modeling

#### First part

In [None]:
# Layer 1
input_layer_1 = Input(shape=(X_train_left_win.shape[1]), name="input")
encoding_1 = Dense(100, activation="sigmoid", name="encoder_1")(input_layer_1)
encoding_1 = BatchNormalization()(encoding_1)
decoding_1 = Dense(window_size*3, activation="sigmoid", name="decoder_1")(encoding_1)

# whole autoencoder
autoencoder_1 = Model(inputs=input_layer_1, outputs=decoding_1)

# only encoder part
encoder_1 = Model(inputs=input_layer_1, outputs=encoding_1)
encoder_1._name = "first"

# autoencoder_1.summary()

In [None]:
plot_model(autoencoder_1, show_shapes=True, show_dtype=False, show_layer_names=False, show_layer_activations=False,\
            rankdir="TB", expand_nested=False, layer_range=None)
# LR for horizontal plot
# , dpi=120

In [None]:
# plot_model(encoder_1, show_shapes=True, show_dtype=False, show_layer_names=False, show_layer_activations=False,\
#             rankdir="TB", expand_nested=False, layer_range=None)

#### Second part

In [None]:
# Layer 2
input_layer_2 = Input(shape=(100), name="input")

encoding_2 = Dense(50, activation="sigmoid", name="encoder_2")(input_layer_2)
encoding_2 = BatchNormalization()(encoding_2)
decoding_2 = Dense(100, activation="sigmoid", name="decoder_2")(encoding_2)

# whole autoencoder
autoencoder_2 = Model(inputs=input_layer_2, outputs=decoding_2)

# only ecoder part
encoder_2 = Model(inputs=input_layer_2, outputs=encoding_2)
encoder_2._name = "second"

# encoder_2.summary()

In [None]:
plot_model(autoencoder_2, show_shapes=True, show_dtype=False, show_layer_names=False, show_layer_activations=False,\
            rankdir="TB", expand_nested=False, layer_range=None)
# LR for horizontal plot
# , dpi=120

In [None]:
# plot_model(encoder_2, show_shapes=True, show_dtype=False, show_layer_names=False, show_layer_activations=False,\
#             rankdir="TB", expand_nested=False, layer_range=None)

#### Final part

In [None]:
# Stack
stack_encoding_1 = Dense(100, activation="sigmoid")(input_layer_1)
stack_encoding_2 = Dense(50, activation="sigmoid")(stack_encoding_1)
encoding_2 = BatchNormalization()(encoding_2)

output = Dense(len(pd.unique(label)), activation="softmax")(stack_encoding_2)

stacked_autoencoder = Model(inputs=input_layer_1, outputs=output)
stacked_autoencoder.summary()

In [None]:
autoencoder_1.compile(loss="mse", optimizer="adam", metrics=["mae"])
autoencoder_2.compile(loss="mse", optimizer="adam", metrics=["mae"])

encoder_1.compile(loss="mse", optimizer="adam")
encoder_2.compile(loss="mse", optimizer="adam")

stacked_autoencoder.compile(loss="categorical_crossentropy", optimizer="adam", metrics=['accuracy'])

### Learning

#### First autoencoder

In [None]:
start = time.time()

early_stopping = EarlyStopping(monitor="val_loss", min_delta=0.0, patience=3, verbose=1)

# learning
epochs = 30
batch_size = window_size * 3 # 144
validation_split = 0.1
history = autoencoder_1.fit(X_train_left_win, X_train_left_win, epochs=epochs, batch_size=batch_size,
                    validation_split=validation_split, callbacks=[early_stopping], shuffle=False)

end = time.time() - start
print(f"\n{round(end, 2)} sec taken")

In [None]:
visualize_loss(history, "")

In [None]:
first_layer_code = encoder_1.predict(X_train_left_win)
print(first_layer_code.shape)

#### Second autoencoder

In [None]:
start = time.time()
# learning
epochs = 30
batch_size = 64
# validation_split = 0.1
history = autoencoder_2.fit(first_layer_code, first_layer_code, epochs=epochs, batch_size=batch_size,
                    validation_split=validation_split, callbacks=[early_stopping], shuffle=False)

end = time.time() - start
print(f"\n{round(end, 2)} sec taken")

In [None]:
visualize_loss(history, "")

#### Stacked

In [None]:
# encoder_1.layers[1].weights

In [None]:
stacked_autoencoder.layers[1].set_weights(encoder_1.layers[1].get_weights())
stacked_autoencoder.layers[2].set_weights(encoder_2.layers[1].get_weights())
stacked_autoencoder.layers[1].trainable = False
stacked_autoencoder.layers[2].trainable = False
stacked_autoencoder.summary()

In [None]:
start = time.time()
# learning
epochs = 20
batch_size = 64
# validation_split = 0.1
history = stacked_autoencoder.fit(X_train_left_win, y_train, epochs=epochs, batch_size=batch_size,
                        validation_split=validation_split, callbacks=[early_stopping], shuffle=False)
end = time.time() - start
print(f"\n{round(end, 2)} sec taken")

In [None]:
# stacked_autoencoder.save("my_model.h5")

In [None]:
# visualize_loss(history, "autoencoder")
loss = history.history["accuracy"]
val_loss = history.history["val_accuracy"]
# mae = history.history["mae"]
epochs = range(1, len(loss)+1)
plt.figure(figsize=(6,4))
plt.plot(epochs, loss, "b", label="Training acc")
plt.plot(epochs, val_loss, "r", label="Validation acc")
plt.title("")
if len(loss) < 15:
    plt.xticks(list(range(1, len(loss)+1)))
plt.xlabel("Epochs")
plt.ylabel("Acc")
plt.legend()
plt.show()

In [None]:
y_pred = stacked_autoencoder.predict(X_test_left_win)
print(y_pred.shape)

In [None]:
y_pred = np.argmax(y_pred, axis=1)

In [None]:
event_list = ["work", "walk", "down_stairs", "up_stairs",\
                "drive", "shower", "meal", "toilet", "sleep"]

In [None]:
y_pred 

In [None]:
accuracy_score(y_pred=y_pred, y_true=y_test)

In [None]:
y_pred

In [None]:
mat = confusion_matrix(y_test, y_pred)
mat = pd.DataFrame(data=mat, index=event_list, columns=event_list)
sns.heatmap(mat, square=True, cbar=True, annot=True, cmap='Blues')
plt.yticks(rotation=0)
plt.xlabel("Prediction", fontsize=13, rotation=0)
plt.ylabel("True", fontsize=13)

In [None]:
# mat.to_csv("baseline_result_2.csv")

In [None]:
# mat_dec = np.round(mat / np.sum(mat, axis=1), decimals=2)

# fig, axes = plt.subplots(1, 2, figsize=(10, 10))
# kwargs = dict(square=True, annot=True, cbar=False, cmap='RdPu')

# # 2つのヒートマップを描画
# for i, dat in enumerate([mat, mat_dec]):
#     sns.heatmap(dat, **kwargs, ax=axes[i])

# # グラフタイトル、x軸とy軸のラベルを設定
# for ax, t in zip(axes, ['Real number', 'Percentage(per row)']):
#     plt.axes(ax)
#     plt.title(t)
#     plt.xlabel('predicted value')
#     plt.ylabel('true value')