In [None]:
import os
import datetime
import time

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint
from keras.preprocessing.sequence import TimeseriesGenerator


mpl.rcParams['figure.figsize'] = (20, 8)
mpl.rcParams['axes.grid'] = False
plt.style.use('ggplot')

In [None]:

data_2 = pd.read_csv('../output/zero4_outputs.csv')
data_2 = data_2.drop([data_2.columns[0]], axis='columns')
data_2 = data_2.iloc[:, :-3]
data_2.reset_index(drop=True, inplace=True)
# unit='s' to convert it into epoch time
data_2['Time'] = pd.to_datetime(data_2['Time'])

date_time = data_2['Time'].dt.strftime('%S')


# checking our dataframe once again
data_2.head()


In [None]:
plot_cols = ['Subcar_6', 'Subcar_7', 'Subcar_8']
plot_features = data_2[plot_cols]
plot_features.index = date_time
_ = plot_features.plot(subplots=True)
plt.savefig('../images/raw_data_1.png', dpi=500,)

plot_features = data_2[plot_cols][:480]
plot_features.index = date_time[:480]
_ = plot_features.plot(subplots=True)
plt.savefig('../images/raw_data_2.png', dpi=500,)



In [None]:
plot_cols = ['Subcar_6', 'Subcar_7', 'Subcar_9',
             'Subcar_10', 'Subcar_11', 'Subcar_13', 'Subcar_15', 'Subcar_16', 'Subcar_17', 'Subcar_18', 'Subcar_19']
plot_features = data_2[plot_cols]
plot_features.index = date_time
_ = plot_features.plot(subplots=False)
plt.ylabel("Amplitude", fontsize=20, fontweight='bold', horizontalalignment='center')
plt.xlabel("Time", fontsize=20, fontweight='bold',horizontalalignment='center')
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.legend(fontsize=13)
plt.savefig('../images/raw_data.pdf', dpi=300)


In [None]:
# Set the date column as the index of your DataFrame meat
data_2 = data_2.set_index('Time')

# Print the summary statistics of the DataFrame
print(data_2.describe())


In [None]:
df_input = data_2[['Subcar_6', 'Subcar_7', 'Subcar_9',
                   'Subcar_10', 'Subcar_11', 'Subcar_13','Subcar_15', 'Subcar_16', 'Subcar_17', 'Subcar_18', 'Subcar_19']]
df_input.head()


In [None]:
df_input.describe()


In [None]:
df_input.query("Subcar_13 > 511")


In [None]:
def mape(actual, pred):
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100


In [None]:
scaler = StandardScaler()
data_scaled = scaler.fit_transform(df_input)
data_scaled


In [None]:
features = data_scaled
target = data_scaled[:, 1]


In [None]:
TimeseriesGenerator(features, target, length=6,
                    sampling_rate=1, batch_size=1)[1]


In [None]:
x_train, x_test, y_train, y_test = train_test_split(
    features, target, test_size=0.20, random_state=120, shuffle=False)
print('X_train.shape: ', x_train.shape)
print('X_test.shape: ', x_test.shape)


In [None]:
win_length = 144  # 1 day = 144 of data, 5 day = 720 of data
batch_size = 32
num_features = 11
train_generator = tf.keras.preprocessing.sequence.TimeseriesGenerator(
    x_train, y_train, length=win_length, sampling_rate=1, batch_size=batch_size)
test_generator = tf.keras.preprocessing.sequence.TimeseriesGenerator(
    x_test, y_test, length=win_length, sampling_rate=1, batch_size=batch_size)

In [None]:
train_generator[0]


In [None]:
lstm_model = tf.keras.Sequential([
    tf.keras.layers.LSTM(128, input_shape=(
        win_length, num_features), return_sequences=True),
    tf.keras.layers.LeakyReLU(alpha=0.5),
    tf.keras.layers.LSTM(128, return_sequences=True),
    tf.keras.layers.LeakyReLU(alpha=0.5),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.LSTM(64, return_sequences=False),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, 'linear')
])

lstm_model.summary()


In [None]:
cp1 = ModelCheckpoint('../save/lstm_model_standard/', save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    mode='min'
)
lstm_model.compile(loss=tf.keras.losses.MeanSquaredError(),
                   optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                   metrics=[tf.keras.metrics.MeanAbsoluteError()])

val_performance = {}
performance = {}

In [None]:
history = lstm_model.fit(
    train_generator, epochs=15,
    validation_data=test_generator,
    shuffle=False,
    callbacks=[early_stopping, cp1 ])


In [None]:
lstm_eval = lstm_model.evaluate(
    test_generator, verbose=0
)
lstm_eval
val_performance['LSTM'] = lstm_model.evaluate(train_generator)
performance['LSTM'] = lstm_model.evaluate(test_generator, verbose=0)


In [None]:
predictions_lstm = lstm_model.predict(test_generator)

In [None]:
from tensorflow.keras.models import load_model
gru_model = load_model('../save/lstm_model_standard/')

In [None]:
predictions_lstm.shape[0]


In [None]:
predictions_lstm


In [None]:
y_test


In [None]:
x_test


In [None]:
x_test[:, 1:][win_length:]


In [None]:
df_pred_lstm = pd.concat([pd.DataFrame(predictions_lstm),
                    pd.DataFrame(x_test[:, 1:][win_length:])], axis=1)
df_pred_lstm


In [None]:
# To the original format
rev_trans = scaler.inverse_transform(df_pred_lstm)
rev_trans


In [None]:
df_final_lstm = df_input[predictions_lstm.shape[0]*-1:]


In [None]:
df_final_lstm.count()


In [None]:
df_final_lstm['Subcar_13_pred'] = rev_trans[:, 1]
df_final_lstm


In [None]:
plot_cols = ['Subcar_6', 'Subcar_7', 'Subcar_9',
             'Subcar_10', 'Subcar_11', 'Subcar_13', 'Subcar_15', 'Subcar_16', 'Subcar_17', 'Subcar_18', 'Subcar_19']
plot_features = df_input[plot_cols]
plot_features.index = date_time
_ = plot_features.plot(subplots=False)
plt.ylabel("Amplitude")

df_final_lstm[['Subcar_13', 'Subcar_13_pred']]
df_final_lstm.index = date_time[:617]
_ = df_final_lstm.plot(subplots=False)
plt.ylabel("Amplitude")


In [None]:
gru_model = tf.keras.Sequential([
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(
        128, input_shape=(win_length, num_features), return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='linear')
])


In [None]:
cp3 = ModelCheckpoint('../save/gru_model_standard/', save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    mode='min'
)
gru_model.compile(loss=tf.keras.losses.MeanSquaredError(),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  metrics=[tf.keras.metrics.MeanAbsoluteError()])


In [None]:
history_2 = gru_model.fit(
    train_generator, epochs=10,
    validation_data=test_generator,
    shuffle=False,
    callbacks=[early_stopping
               , cp3])


In [None]:
from tensorflow.keras.models import load_model
gru_model = load_model('../save/gru_model_standard/')

In [None]:
gru_eval = gru_model.evaluate(
    test_generator, verbose=0
)
gru_eval
val_performance['BiLSTM'] = lstm_model.evaluate(train_generator)
performance['BiLSTM'] = lstm_model.evaluate(test_generator, verbose=0)


In [None]:
predictions_gru = gru_model.predict(test_generator)
predictions_gru

In [None]:
predictions_gru.shape[0]


In [None]:
predictions_gru

In [None]:
df_pred_gru = pd.concat([pd.DataFrame(predictions_gru),
                    pd.DataFrame(x_test[:, 1:][win_length:])], axis=1)
df_pred_gru

In [None]:
# To the original format
rev_trans = scaler.inverse_transform(df_pred_gru)
rev_trans


In [None]:
df_final_gru = df_input[predictions_gru.shape[0]*-1:]

In [None]:
df_final_gru['Subcar_13_pred'] = rev_trans[:, 1]
df_final_gru


In [None]:

conv_model = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=32,
                           kernel_size=(2,),
                           activation='relu', input_shape=(win_length, num_features)),
    tf.keras.layers.Dense(units=32, activation='relu'),
    tf.keras.layers.Dense(units=1),
])


In [None]:
cp4 = ModelCheckpoint('../save/conv_model_standard/', save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=2,
    mode='min'
)
conv_model.compile(loss=tf.keras.losses.MeanSquaredError(),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                   metrics=[tf.keras.metrics.MeanAbsoluteError()])


In [None]:
history_3 = conv_model.fit(
    train_generator, epochs=15,
    validation_data=test_generator,
    shuffle=False,
    callbacks=[early_stopping, cp4])


In [None]:
from tensorflow.keras.models import load_model
gru_model = load_model('../save/conv_model_standard/')


In [None]:
conv_eval = conv_model.evaluate(
    test_generator, verbose=0
)
conv_eval
val_performance['CONV'] = lstm_model.evaluate(train_generator)
performance['CONV'] = lstm_model.evaluate(test_generator, verbose=0)


In [None]:
predictions_conv = conv_model.predict(test_generator)
predictions_conv

In [None]:
df_pred_conv = pd.concat([pd.DataFrame(predictions_gru),
                         pd.DataFrame(x_test[:, 1:][win_length:])], axis=1)
df_pred_conv


In [None]:
# To the original format
rev_trans_conv = scaler.inverse_transform(df_pred_conv)
rev_trans_conv


In [None]:
df_final_conv = df_input[predictions_conv.shape[0]*-1:]


In [None]:
df_final_conv['Subcar_13_pred'] = rev_trans_conv[:, 1]
df_final_conv


In [None]:

df_final_gru[['Subcar_13', 'Subcar_13_pred']]
df_final_gru.index = date_time[:617]
_ = df_final_gru.plot(subplots=False)
plt.ylabel("Amplitude", fontsize=20, fontweight='bold',
           horizontalalignment='center')
plt.xlabel("Time", fontsize=20, fontweight='bold',
           horizontalalignment='center')
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.legend(fontsize=15)
plt.savefig('../images/lstm_curve_standard.pdf', dpi=500)

df_final_gru[['Subcar_13', 'Subcar_13_pred']]
df_final_gru.index = date_time[:617]
_ = df_final_gru.plot(subplots=False)
plt.ylabel("Amplitude", fontsize=20, fontweight='bold',
           horizontalalignment='center')
plt.xlabel("Time", fontsize=20, fontweight='bold',
           horizontalalignment='center')
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.legend(fontsize=15)
plt.savefig('../images/gru_curve_standard.pdf', dpi=500)

df_final_conv[['Subcar_13', 'Subcar_13_pred']]
df_final_conv.index = date_time[:617]
_ = df_final_conv.plot(subplots=False)
plt.ylabel("Amplitude", fontsize=20, fontweight='bold',
           horizontalalignment='center')
plt.xlabel("Time", fontsize=20, fontweight='bold',
           horizontalalignment='center')
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.legend(fontsize=15)
plt.savefig('../images/conv_curve_standard.pdf', dpi=500)


In [None]:
x = np.arange(len(performance))
width = 0.3
metric_name = 'mean_absolute_error'
metric_index = lstm_model.metrics_names.index('mean_absolute_error')
val_mae = [v[metric_index] for v in val_performance.values()]
test_mae = [v[metric_index] for v in performance.values()]

plt.ylabel('mean_absolute_error [Subcar_7), normalized]',
           fontsize=16, fontweight='bold', horizontalalignment='center')
plt.bar(x - 0.17, val_mae, width, label='Training')
plt.bar(x + 0.17, test_mae, width, label='Testing')
plt.xticks(ticks=x, labels=performance.keys(),
           rotation=45, fontsize=16)
plt.yticks(fontsize=16)
_ = plt.legend(fontsize=16)
plt.savefig('../images/bar_graph_standard.pdf', dpi=500)


In [None]:
for name, value in performance.items():
  print(f'{name:12s}: {value[1]:0.4f}')


In [None]:
# df_final_conv[['Subcar_7', 'Subcar_7_pred']].plot()
for name, value in val_performance.items():
      print(f'{name:12s}: {value[1]:0.4f}')
