In [None]:
!pip install tensorflow==2.0.0-beta1 
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.figure_factory as ff
import re
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV, KFold
import lightgbm as lgb
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
import warnings

warnings.filterwarnings("ignore")

%matplotlib inline
sns.set()
py.init_notebook_mode(connected=True)

In [None]:
os.listdir("../input")

In [None]:
df = pd.read_csv('../input/pmsm_temperature_data.csv')
df.head()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
exp_time_count = df.profile_id.value_counts().sort_values()
exp_time_count

In [None]:
fig = plt.figure(figsize=(10, 12))
sns.barplot(y=exp_time_count.index, x=exp_time_count.values, order=exp_time_count.index, orient="h")
plt.title("Experiment time per profile_id", fontsize=16)
plt.ylabel("Profile ID",fontsize=14)
plt.xlabel("Experiment time", fontsize=14)

In [None]:
df_20 = df[df.profile_id==20].drop("profile_id", axis=1).reset_index(drop=True)
df_20.head()

In [None]:
corr_matrix = df_20.corr()
figure = ff.create_annotated_heatmap(z=corr_matrix.values,
                                     x=list(corr_matrix.columns),
                                     y=list(corr_matrix.index), 
                                     annotation_text=np.round(corr_matrix.values, 2),
                                    colorscale="YlOrRd",
                                    showscale=True)
figure["layout"]["yaxis"].update({"tickangle": -45})
figure["layout"]["xaxis"].update({"tickangle": -45})
py.iplot(figure)

In [None]:
list_cor = list(df_20.corr()[df_20.corr() >= 0.5].stack().index)
for elem in list_cor:
    if elem[0] == elem[1]:
        list_cor.pop(list_cor.index(elem))
list_cor

In [None]:
list(df_20.corr()[df_20.corr() <= -0.3].stack().index)

In [None]:
# with sns.plotting_context(font_scale=12):
#     sns.pairplot(df_20)

In [None]:
df_20.describe()

In [None]:
fig = tls.make_subplots(rows=1, cols=len(df_20.columns), horizontal_spacing=0.05)
for i, var in enumerate(df_20.columns):
    fig.append_trace(go.Box(y=df_20[var].values, name=var), 1, i+1)
fig["layout"].update(height=400, width=2000)
py.iplot(fig)

In [None]:
df.head()

### Create `exp_time` variable

In [None]:
group_df = df.groupby(by="profile_id").cumcount()
df = pd.concat([df, group_df], axis=1)
df = df.rename(columns={0: "time_idx"})
df.head()

In [None]:
df["exp_time"] = df["time_idx"]*0.5
df = df.drop("time_idx", axis=1)
df.head()

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(df.drop("profile_id", axis=1).corr(), annot=True)

In [None]:
target_vars = re.findall(r"stator_\w*|torque|pm", " ".join(df.columns))
target_df = df[target_vars]
attr_df = df.drop(target_vars, axis=1)

In [None]:
target_df.head()

In [None]:
attr_df.head()

## Case Study (`torque`)

In [None]:
corr_vars = ["i_q", "i_d", "u_d", "u_q"]

### Univariate Analysis (`torque`)

In [None]:
sns.distplot(df["torque"], bins=10, kde=False)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=len(corr_vars), figsize=(10, 6))
for i, var in enumerate(corr_vars):
    sns.distplot(df[var], ax=ax[i], bins=8, kde=False)
plt.show()

#### Checking for autocorrelation

In [None]:
plot_acf(df_20["torque"], title="Auto correlation (Torque) for profile_id = 20", lags=1250)
_ = plt.show()

In [None]:
plot_pacf(df_20["torque"], title=" Partial auto correlation (Torque) for profile_id = 20", lags=5)
_ = plt.show()

A strong autocorrelation can be noticed between the series of the `torque` variable. Noticing that the PACF crosses the x axis at `lag = 5` and the ACF has a geometric decay, an AR(4) could be used to model the time series data (if it is __stationary__) 

### Multivariate Analysis (`torque`)

In [None]:
vars_ = corr_vars[:]
vars_.append("torque")
# sns.pairplot(vars=vars_, hue="profile_id", data=df)
sns.pairplot(vars=vars_, data=df)
_ = plt.show()

In [None]:
df_analysis = df[vars_]
corr_matrix = df_analysis.corr()
figure = ff.create_annotated_heatmap(z=corr_matrix.values,
                                     x=list(corr_matrix.columns),
                                     y=list(corr_matrix.index), 
                                     annotation_text=np.round(corr_matrix.values, 2),
                                    colorscale="YlOrRd",
                                    showscale=True)
figure["layout"]["yaxis"].update({"tickangle": -45})
figure["layout"]["xaxis"].update({"tickangle": -45})
py.iplot(figure)

## OLS (`torque`)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(attr_df, target_df, test_size=.2, shuffle=True)
profile_train = X_train["profile_id"]
profile_test = X_test["profile_id"]
X_train = X_train.drop("profile_id", axis=1)
X_test = X_test.drop("profile_id", axis=1)

In [None]:
def ols(target, corr_vars, add_const):
    target_train = y_train["torque"].values.reshape(-1, 1)
    y_scaler = StandardScaler().fit(target_train)
    target_train = y_scaler.transform(target_train)
    var_train = X_train[corr_vars]
    var_names = var_train.columns
    x_scaler = StandardScaler().fit(var_train)
    var_train = pd.DataFrame(x_scaler.transform(var_train), columns=var_names)
    if add_const:
        var_train = sm.add_constant(var_train)
    model = sm.OLS(target_train, var_train, ).fit()
    return model, y_scaler, x_scaler, var_names

In [None]:
model, _, _, _ = ols("torque", corr_vars, True)
model.summary()

The p-value for the `const` variable is considerably high, meaning that the null hypothesis should be accepted, that is, it's coefficient is equal to 0. So we'll discard the `const` variable (constant) and fit the model again.

In [None]:
model, y_scaler, x_scaler, var_names = ols("torque", corr_vars, False)
model.summary()

In [None]:
y_20 = y_scaler.transform(df.loc[df["profile_id"]==20, "torque"].values.reshape(-1, 1))
X_20 = x_scaler.transform(df.loc[df["profile_id"]==20, corr_vars])
X_20 = pd.DataFrame(X_20, columns=var_names)

In [None]:
model_20 = sm.OLS(y_20, X_20).fit()
model_20.summary()

Choosing only one `profile_id`, the __Durbin-Watson__ test scored a considerable small value, suggesting there is a strong postive autocorrelation.

In [None]:
ols_params = model.params

In [None]:
profiles = df["profile_id"].unique()
profile_lens = {profile: df[df["profile_id"]==profile].shape[0] for profile in profiles}
profile_series = pd.Series()

for profile_len in profile_lens.items():
    profile_series.at[profile_len[0]] = profile_len[1]
profile_series.head()

In [None]:
yhat = lambda X: X[corr_vars[0]]*ols_params[0] - X[corr_vars[1]]*ols_params[1] \
    - X[corr_vars[2]]*ols_params[2] - X[corr_vars[3]]*ols_params[3]

In [None]:
# trace1 = go.Scatter3d(x=df_sample[corr_vars[0]],
#                      y=df_sample[corr_vars[1]],
#                      z=yhat(df_sample),
#                      mode="lines",
#                      name="Regression Line")

# trace2 = go.Scatter3d(x=df_sample[corr_vars[0]],
#                      y=df_sample[corr_vars[1]],
#                      z=df_sample["torque"],
#                      mode="markers",
#                      name="True Values")

# data=[trace1, trace2]

# layout = go.Layout(title={"text": "Regression (Torque)"},
#                   scene=go.layout.Scene(xaxis=go.layout.scene.XAxis(title=corr_vars[0]),
#                                 yaxis=go.layout.scene.YAxis(title=corr_vars[1]),
#                                 zaxis=go.layout.scene.ZAxis(title="Torque")))

# fig = go.Figure(data=data, layout=layout)

# py.iplot(fig)

In [None]:
var_names

In [None]:
y_test_scaled = y_scaler.transform(y_test["torque"].values.reshape(-1, 1))
X_test_scaled = pd.DataFrame(x_scaler.transform(X_test[corr_vars]), columns=var_names)

In [None]:
mae = mean_absolute_error(y_test_scaled, yhat(X_test_scaled))
mae

In [None]:
mse = mean_squared_error(y_test_scaled, yhat(X_test_scaled))
mse

In [None]:
r2 = r2_score(y_test_scaled, yhat(X_test_scaled))
r2

In [None]:
error_df = y_scaler.inverse_transform(yhat(X_test_scaled)) - y_test["torque"]
error_df.describe()

In [None]:
sns.distplot(error_df)

The errors are mainly located close to 0, but it presents a considerable variance relative to the true values.

In [None]:
error_df_perc = error_df / y_test["torque"] * 100
error_df_perc.describe()

In [None]:
sns.distplot(error_df_perc)

In [None]:
X_test_scaled = x_scaler.transform(X_test[["i_q", "i_d", "u_d", "u_q"]])
var_pca = PCA(n_components=1).fit(X_test_scaled)
var_pca.explained_variance_ratio_

In [None]:
sample = df.sample(n=1000)
sample_X_scaled = pd.DataFrame(x_scaler.transform(sample[corr_vars]), columns=var_names)

In [None]:
x_plot = var_pca.transform(sample[corr_vars]).ravel()

In [None]:
y_plot = y_scaler.inverse_transform(yhat(sample_X_scaled).values)

In [None]:
y_true = sample["torque"]

In [None]:
plt.plot(x_plot, y_plot, alpha=0.8)
plt.scatter(x_plot, y_true, c="red")

### LightGBM (`torque`)

In [None]:
# kfold = KFold(n_splits=5).split(X=X_train, y=y_train["torque"])

In [None]:
# params_grid = {"num_leaves": [10, 20, 30],
#               "learning_rate": 0.1,
#               "n_estimators": [100, 150],
#               "boosting_type": ["gbdt", "dart"],
#               "reg_alpha": [1, 1.2],
#               "reg_lambda": [1, 1.2, 1.4]}

In [None]:
# gbm = lgb.LGBMRegressor()

In [None]:
# gbm_cv = GridSearchCV(gbm, param_grid=params_grid,
#                       cv=kfold, return_train_score=True,
#                       scoring="neg_mean_squared_error")

In [None]:
# gbm_cv.fit(X_train, y_train["torque"])

In [None]:
# gbm_cv.best_score_

In [None]:
# gbm_cv.best_params_

In [None]:
params = {'boosting_type': 'gbdt',
         'learning_rate': 0.1,
         'n_estimators': 150,
         'num_leaves': 30,
         'reg_alpha': 1,
         'reg_lambda': 1.2}

In [None]:
x_scaler = StandardScaler().fit(X_train)
X_train_scaled = x_scaler.transform(X_train)
y_train_scaled = y_scaler.transform(y_train["torque"].values.reshape(-1, 1))

In [None]:
# gbm = lgb.LGBMRegressor(**gbm_cv.best_params_)
gbm = lgb.LGBMRegressor(**params)

In [None]:
gbm.fit(X_train_scaled, y_train_scaled)

In [None]:
sns.barplot(x=gbm.feature_importances_, y=X_train.columns)
plt.xlabel("Feature Importance", fontsize=12)
plt.ylabel("Label", fontsize=12)
plt.title("Feature Importance (Torque)", fontsize=16)
_ = plt.show()

As seem before in the correlation matrix and the OLS regression, the `i_q` variable is the most important predictor of `torque`.

In [None]:
X_test_scaled = x_scaler.transform(X_test)

In [None]:
pred_gbm = y_scaler.inverse_transform(gbm.predict(X=X_test_scaled))
mae = mean_absolute_error(y_test["torque"], pred_gbm)
mse = mean_squared_error(y_test["torque"], pred_gbm)
r2 = r2_score(y_test["torque"], pred_gbm)
print("MAE: %f" %mae)
print("MSE: %f" %mse)
print("R2 Score: %f" %r2)

It seems that, the LightGBM has fitted well the data through bruteforce.

In [None]:
error_df = pd.Series(pred_gbm) - y_test["torque"].reset_index(drop=True)
error_df.describe()

In [None]:
sns.distplot(error_df)

In [None]:
pca_x = PCA(n_components=1).fit(X_test_scaled)
pca_x.explained_variance_ratio_

In [None]:
sns.lineplot(x=pca_x.transform(X_test_scaled).reshape(-1), y=pred_gbm, color="red", alpha=0.8)
sns.scatterplot(x=pca_x.transform(X_test_scaled).reshape(-1), y=y_test["torque"], color="blue")

As can be observed in the graph above, the lightGBM model has fitted well the data.

### GRU (`torque`) using `profile_id = 20` for training

The amount of timesteps will be equal to 60, representing 0.5 minute of experiment time.

In [None]:
def prepare_series(profile_id, df, target_var, test_size, series_size):
    df_profile = df[df["profile_id"]==profile_id].drop("profile_id", axis=1)
    dependent_vars = re.findall(r"stator_\w*|pm|torque", " ".join(df.columns))
    df_target = df_profile[target_var]
    df_vars = df_profile.drop(dependent_vars, axis=1)
    target_train = df_target.values[:int((1-test_size)*df_target.shape[0])][series_size-1:]
    target_test = df_target.values[int((1-test_size)*df_target.shape[0]):][series_size-1:]
    train = df_vars.values[:int((1-test_size)*df_vars.shape[0]), ...]
    test = df_vars.values[int((1-test_size)*df_vars.shape[0]):, ...]
    train_series = np.zeros(shape=(train.shape[0]-series_size+1, series_size, train.shape[-1]))
    test_series = np.zeros(shape=(test.shape[0]-series_size+1, series_size, test.shape[-1]))
    for i in range(train.shape[0]-series_size+1):
        train_series[i, ...] = train[i:i+series_size, ...]
    for i in range(test.shape[0]-series_size):
        test_series[i, ...] = test[i:i+series_size, ...]
        
    return train_series, test_series, target_train, target_test

In [None]:
df.head()

In [None]:
gru_model = tf.keras.models.Sequential()
gru_model.add(tf.keras.layers.GRU(units=60, 
                                  return_sequences=True, 
                                  use_bias=True, 
                                  input_shape=(60, X_train.shape[1]-1),
                                  dropout=0.3))

gru_model.add(tf.keras.layers.GRU(units=60, 
                                  return_sequences=False, 
                                  use_bias=True, 
                                  dropout=0.3,
                                  activation="linear"))


gru_model.add(tf.keras.layers.Dense(units=1, activation="linear", use_bias=True))

gru_model.compile(optimizer="adam", loss="mse", metrics=["mae", "mse"])

gru_model.summary()

In [None]:
train_series, test_series, target_train, target_test = prepare_series(20, 
                                                                      df.drop("exp_time", axis=1), 
                                                                      "torque", 0.3, 60)

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
history_gru = gru_model.fit(x=train_series, y=target_train,
                            batch_size=512, epochs=50, 
                            validation_data=(test_series, target_test), callbacks=[callback])

In [None]:
history_gru.history.keys()

In [None]:
epochs = np.arange(len(history_gru.history["loss"]))+1

In [None]:
trace1 = go.Scatter(x=epochs, y=history_gru.history["loss"], mode="lines", name="Train Loss (MSE)")

trace2 = go.Scatter(x=epochs, y=history_gru.history["val_loss"], mode="lines", name="Validation Loss (MSE)")

data = [trace1, trace2]

layout = go.Layout(title={"text": "Loss Curves (GRU)"},
                  scene=go.layout.Scene(xaxis=go.layout.scene.XAxis(title="Epoch"),
                                       yaxis=go.layout.scene.YAxis(title="Mean Absolute Error (LOSS)")))

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [None]:
pred_gru = gru_model.predict(test_series)
mae = mean_absolute_error(target_test, pred_gru.reshape(-1))
mse = mean_squared_error(target_test, pred_gru.reshape(-1))
r2_gru = r2_score(target_test, pred_gru.reshape(-1))

In [None]:
print("MAE: %f" %mae)
print("MSE: %f" %mse)
print("R2 Score: %f" %r2_gru)

In [None]:
error = pred_gru.reshape(-1) - target_test
print("MAX error: %f" %error.max())
print("MIN error: %f" %error.min())

In [None]:
sns.distplot(error, kde=False)

### LSTM (`torque`) using `profile_id = 20` for training

In [None]:
lstm_model = tf.keras.models.Sequential()
lstm_model.add(tf.keras.layers.LSTM(units=60, 
                                  return_sequences=True, 
                                  use_bias=True, 
                                  input_shape=(60, X_train.shape[1]-1),
                                  dropout=0.3))

lstm_model.add(tf.keras.layers.LSTM(units=60, 
                                  return_sequences=False, 
                                  use_bias=True, 
                                  dropout=0.3,
                                  activation="linear"))

lstm_model.add(tf.keras.layers.Dense(units=1, activation="linear", use_bias=True))

lstm_model.compile(optimizer="adam", loss="mse", metrics=["mae", "mse"])

lstm_model.summary()

In [None]:
train_series, test_series, target_train, target_test = prepare_series(20, 
                                                                      df.drop("exp_time", axis=1), 
                                                                      "torque", 0.3, 60)

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
history_lstm = lstm_model.fit(x=train_series, y=target_train,
                             batch_size=512, epochs=50, 
                              validation_data=(test_series, target_test), callbacks=[callback])

In [None]:
epochs = np.arange(len(history_lstm.history["loss"]))+1

In [None]:
trace1 = go.Scatter(x=epochs, y=history_lstm.history["loss"], mode="lines", name="Train Loss (MSE)")

trace2 = go.Scatter(x=epochs, y=history_lstm.history["val_loss"], mode="lines", name="Validation Loss (MSE)")

data = [trace1, trace2]

layout = go.Layout(title={"text": "Loss Curves (LSTM)"},
                  scene=go.layout.Scene(xaxis=go.layout.scene.XAxis(title="Epoch"),
                                       yaxis=go.layout.scene.YAxis(title="Mean Absolute Error (LOSS)")))

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

In [None]:
pred_lstm = lstm_model.predict(test_series)
mae = mean_absolute_error(target_test, pred_lstm.reshape(-1))
mse = mean_squared_error(target_test, pred_lstm.reshape(-1))
r2_lstm = r2_score(target_test, pred_lstm.reshape(-1))

In [None]:
print("MAE: %f" %mae)
print("MSE: %f" %mse)
print("R2 Score: %f" %r2_lstm)

In [None]:
error = pred_lstm.reshape(-1) - target_test
print("MAX error: %f" %error.max())
print("MIN error: %f" %error.min())

In [None]:
sns.distplot(error, kde=False)

## Training the models for all profile_id's

In [None]:
def predict_and_evaluate_profiles(model, df, target_var):
    mae_array = np.array([], dtype=np.float32)
    mse_array = np.array([], dtype=np.float32)
    error = np.array([], dtype=np.float32)
    for profile in df["profile_id"].unique():
        train_series, test_series, target_train, target_test = prepare_series(profile, 
                                                                      df.drop("exp_time", axis=1), 
                                                                      target_var, 0.3, 60)
        prediction = model.predict(test_series).reshape(-1)
        error = np.concatenate([error, prediction-target_test], axis=0)
        r2 = r2_score(target_test, prediction).reshape(-1)
        mae = mean_absolute_error(target_test, prediction).reshape(-1)
        mse = mean_squared_error(target_test, prediction).reshape(-1)
        mae_array = np.concatenate([mae_array, mae], axis=0)
        mse_array = np.concatenate([mse_array, mse], axis=0)
        print("Model fitted to profile: %d ----VAL STATS >> R2: %f | MAE: %f | MSE: %f" %(profile, r2, mae, mse))
    print("\n---------------------------------\n")
    print("Average MAE: %f" %np.mean(mae_array))
    print("Average MSE: %f" %np.mean(mse_array))
    return error

In [None]:
def steps_per_epoch(df, series_size, batch_size, test_size):
    total_size_train, total_size_test = (0, 0)
    for profile_id in df.profile_id.unique():
        total_size_train += (1-test_size)*df[df["profile_id"]==profile_id].shape[0] - series_size
        total_size_test += test_size*df[df["profile_id"]==profile_id].shape[0] - series_size
    return total_size_train//batch_size, total_size_test//batch_size

In [None]:
def train_gen(series_size, test_size, target_var="torque", df=df.drop("exp_time", axis=1)):
    dep_vars = re.findall(r"stator_\w*|pm|torque", " ".join(df.columns))
    for profile_id in df.profile_id.unique():
        train_len = int((1-test_size)*df[df["profile_id"]==profile_id].shape[0]) - series_size
        X_profile = df[df["profile_id"]==profile_id].drop(dep_vars, axis=1).drop("profile_id", axis=1).values
        y_profile = df.loc[df["profile_id"]==profile_id, target_var].values
        for i in range(train_len):
            X = X_profile[i:i+series_size, :]
            y = np.array([y_profile[i+series_size-1]])
            yield X, y

In [None]:
def test_gen(series_size, test_size, target_var="torque", df=df.drop("exp_time", axis=1)):
    dep_vars = re.findall(r"stator_\w*|pm|torque", " ".join(df.columns))
    for profile_id in df.profile_id.unique():
        test_begin = int((1-test_size)*df[df["profile_id"]==profile_id].shape[0])
        test_end = df[df["profile_id"]==profile_id].shape[0] - series_size
        X_profile = df[df["profile_id"]==profile_id].drop(dep_vars, axis=1).drop("profile_id", axis=1).values
        y_profile = df.loc[df["profile_id"]==profile_id, target_var].values
        for i in range(test_begin, test_end+1):
            X = X_profile[i:i+series_size, :]
            y = np.array([y_profile[i+series_size-1]])
            yield X, y

### GRU Model

In [None]:
gru_model = tf.keras.models.Sequential()
gru_model.add(tf.keras.layers.GRU(units=50, 
                                  return_sequences=True, 
                                  use_bias=True, 
                                  input_shape=(60, X_train.shape[1]-1),
                                  dropout=0.3))

gru_model.add(tf.keras.layers.GRU(units=50, 
                                  return_sequences=False, 
                                  use_bias=True, 
                                  dropout=0.3,
                                  activation="linear"))


gru_model.add(tf.keras.layers.Dense(units=1, activation="linear", use_bias=True))

gru_model.compile(optimizer="adam", loss="mse", metrics=["mae", "mse"])

gru_model.summary()

In [None]:
steps_train_val = steps_per_epoch(df, series_size=60, batch_size=512, test_size=.3)

In [None]:
train_dataset = tf.data.Dataset.from_generator(generator=train_gen,
                                               output_types=(tf.float32, tf.float32),
                                              args=([60, .3]))
train_dataset = train_dataset.shuffle(500).batch(512).repeat(10)

test_dataset = tf.data.Dataset.from_generator(generator=test_gen,
                                               output_types=(tf.float32, tf.float32),
                                             args=([60, .3]))
test_dataset = test_dataset.shuffle(500).batch(512).repeat(10)

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history_gru = gru_model.fit_generator(generator=train_dataset,
                                 steps_per_epoch=int(steps_train_val[0]),
                                 callbacks=[callback],
                                 validation_data=test_dataset,
                                 validation_steps=int(steps_train_val[1]),
                                 epochs=10)

In [None]:
epochs = np.arange(len(history_gru.history["loss"]))+1
trace1 = go.Scatter(x=epochs, y=history_gru.history["loss"], mode="lines", name="Train Loss (MSE)")

trace2 = go.Scatter(x=epochs, y=history_gru.history["val_loss"], mode="lines", name="Validation Loss (MSE)")

data = [trace1, trace2]

layout = go.Layout(title={"text": "Loss Curves (GRU)"},
                  scene=go.layout.Scene(xaxis=go.layout.scene.XAxis(title="Epoch"),
                                       yaxis=go.layout.scene.YAxis(title="Mean Absolute Error (LOSS)")))

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

#### `Profile_id = 20`

In [None]:
train_series, test_series, target_train, target_test = prepare_series(20, 
                                                                      df.drop("exp_time", axis=1), 
                                                                      "torque", 0.3, 60)
gru_model.evaluate(x=test_series, y=target_test)

In [None]:
pred_gru = gru_model.predict(x=test_series)
mae = mean_absolute_error(target_test, pred_gru.reshape(-1))
mse = mean_squared_error(target_test, pred_gru.reshape(-1))
r2 = r2_score(target_test, pred_gru.reshape(-1))
print("MAE: %f" %mae)
print("MSE: %f" %mse)
print("R2 Score: %f" %r2)

#### `Profile_id = 4`

In [None]:
train_series, test_series, target_train, target_test = prepare_series(4, 
                                                                      df.drop("exp_time", axis=1), 
                                                                      "torque", 0.3, 60)

In [None]:
pred_gru = gru_model.predict(x=test_series)
mae = mean_absolute_error(target_test, pred_gru.reshape(-1))
mse = mean_squared_error(target_test, pred_gru.reshape(-1))
r2 = r2_score(target_test, pred_gru.reshape(-1))
print("MAE: %f" %mae)
print("MSE: %f" %mse)
print("R2 Score: %f" %r2)

#### ALL `profile_id`

In [None]:
error_gru = predict_and_evaluate_profiles(gru_model, df, "torque")

In [None]:
sns.distplot(error_gru)

### LSTM Model

In [None]:
lstm_model = tf.keras.models.Sequential()
lstm_model.add(tf.keras.layers.LSTM(units=50, 
                                  return_sequences=True, 
                                  use_bias=True, 
                                  input_shape=(60, X_train.shape[1]-1),
                                  dropout=0.3))

lstm_model.add(tf.keras.layers.LSTM(units=50, 
                                  return_sequences=False, 
                                  use_bias=True, 
                                  dropout=0.3,
                                  activation="linear"))

lstm_model.add(tf.keras.layers.Dense(units=1, activation="linear", use_bias=True))

lstm_model.compile(optimizer="adam", loss="mse", metrics=["mae", "mse"])

lstm_model.summary()

In [None]:
train_dataset = tf.data.Dataset.from_generator(generator=train_gen,
                                               output_types=(tf.float32, tf.float32),
                                              args=([60, .3]))
train_dataset = train_dataset.shuffle(500).batch(512).repeat(10)

test_dataset = tf.data.Dataset.from_generator(generator=test_gen,
                                               output_types=(tf.float32, tf.float32),
                                             args=([60, .3]))
test_dataset = test_dataset.shuffle(500).batch(512).repeat(10)

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history_lstm = lstm_model.fit_generator(generator=train_dataset,
                                 steps_per_epoch=int(steps_train_val[0]),
                                 callbacks=[callback],
                                 validation_data=test_dataset,
                                 validation_steps=int(steps_train_val[1]),
                                 epochs=10)

In [None]:
epochs = np.arange(len(history_lstm.history["loss"]))+1
trace1 = go.Scatter(x=epochs, y=history_lstm.history["loss"], mode="lines", name="Train Loss (MSE)")

trace2 = go.Scatter(x=epochs, y=history_lstm.history["val_loss"], mode="lines", name="Validation Loss (MSE)")

data = [trace1, trace2]

layout = go.Layout(title={"text": "Loss Curves (LSTM)"},
                  scene=go.layout.Scene(xaxis=go.layout.scene.XAxis(title="Epoch"),
                                       yaxis=go.layout.scene.YAxis(title="Mean Absolute Error (LOSS)")))

fig = go.Figure(data=data, layout=layout)

py.iplot(fig)

#### `Profile_id = 20`

In [None]:
train_series, test_series, target_train, target_test = prepare_series(20, 
                                                                      df.drop("exp_time", axis=1), 
                                                                      "torque", 0.3, 60)
lstm_model.evaluate(x=test_series, y=target_test)

In [None]:
pred_lstm = lstm_model.predict(x=test_series)
mae = mean_absolute_error(target_test, pred_lstm.reshape(-1))
mse = mean_squared_error(target_test, pred_lstm.reshape(-1))
r2 = r2_score(target_test, pred_lstm.reshape(-1))
print("MAE: %f" %mae)
print("MSE: %f" %mse)
print("R2 Score: %f" %r2)

#### `Profile_id = 4`

In [None]:
train_series, test_series, target_train, target_test = prepare_series(4, 
                                                                      df.drop("exp_time", axis=1), 
                                                                      "torque", 0.3, 60)

In [None]:
pred_lstm = lstm_model.predict(x=test_series)
mae = mean_absolute_error(target_test, pred_lstm.reshape(-1))
mse = mean_squared_error(target_test, pred_lstm.reshape(-1))
r2 = r2_score(target_test, pred_lstm.reshape(-1))
print("MAE: %f" %mae)
print("MSE: %f" %mse)
print("R2 Score: %f" %r2)

#### ALL `profile_id`

In [None]:
error_lstm = predict_and_evaluate_profiles(lstm_model, df, "torque")

In [None]:
sns.distplot(error_lstm)