# Importing packages

In [1]:
from utils.model_summary_functions import feature_importance, metrics, summary_plot,multivariate_importance

In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import math 
import ppscore as pps
from tqdm import tqdm
import pickle
import tensorflow as tf

from sklearn.metrics import mean_squared_error, f1_score, accuracy_score, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_score, train_test_split, KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder, RobustScaler
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor,GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.linear_model import LassoLarsCV,LassoCV, Lasso


from yellowbrick.model_selection import FeatureImportances
from yellowbrick.regressor import residuals_plot
from yellowbrick.regressor import prediction_error
from yellowbrick.model_selection import learning_curve
from yellowbrick.model_selection import feature_importances

import xgboost as xgb

In [3]:
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [4]:
import warnings  # Ignore alert messages
warnings.filterwarnings('ignore')

In [5]:
plt.style.use('default')

plt.rcParams.update({
    'font.size': 16,
    'axes.linewidth': 2,
    'axes.titlesize': 20,
    'axes.edgecolor': 'black',
    'axes.labelsize': 20,
    'axes.grid': True,
    'lines.linewidth': 1.5,
    'lines.markersize': 6,
    'figure.figsize': (15, 6),
    'xtick.labelsize': 14,
    'ytick.labelsize': 14,
    'font.family': 'Arial',
    'legend.fontsize': 13,
    'legend.framealpha': 1,
    'legend.edgecolor': 'black',
    'legend.shadow': False,
    'legend.fancybox': True,
    'legend.frameon': True,
})

# Importing dataset after EDA

In [6]:
df = pd.read_csv("Processed_data/FRA3-FRA6_cleaned_feature_engineered.csv.zip")
df

Unnamed: 0,Trail_right_z_sec_susp,Lead_left_z_sec_susp,Trail_left_z_sec_susp,Lead_right_z_sec_susp,Triaxial_x,Triaxial_z,Lead_z_accel_right_side,Susp_travel_left,Susp_travel_right,Pitch_accel,...,Triaxial_x_RMA_55 samples,Triaxial_x_MA_55 samples,Triaxial_x_RMS_55 samples,Triaxial_z_mean_55 samples,Susp_travel_mean_55 samples,Susp_travel_skew_55 samples,Pitch_left_susp_mean_55 samples,Bounce_susp_mean_55 samples,Front_Roll_susp_skew_55 samples,Vert_irreg_right_rail
0,-0.034100,0.030900,0.021800,-0.0218,-0.000116,-0.000749,-0.001817,0.030900,-0.034100,-0.000459,...,0.008054,0.000065,0.000457,-0.000293,0.004538,-1.143003,0.000911,0.001606,0.941525,-0.0712
1,-0.035600,0.031600,0.022800,-0.0228,-0.000139,-0.000749,-0.001812,0.031600,-0.035600,-0.000542,...,0.008238,0.000068,0.000457,-0.000307,0.003883,-1.156428,0.000941,0.001485,0.893110,-0.0753
2,-0.036700,0.032000,0.023700,-0.0239,-0.000162,-0.000727,-0.001605,0.032000,-0.036700,-0.000613,...,0.008436,0.000071,0.000458,-0.000320,0.003208,-1.139831,0.000970,0.001362,0.843661,-0.0791
3,-0.037000,0.031400,0.024400,-0.0248,-0.000182,-0.000650,-0.000912,0.031400,-0.037000,-0.000654,...,0.008647,0.000075,0.000459,-0.000332,0.002527,-1.100159,0.000993,0.001242,0.787577,-0.0827
4,-0.037300,0.030900,0.025100,-0.0257,-0.000202,-0.000574,-0.000219,0.030900,-0.037300,-0.000695,...,0.008870,0.000079,0.000459,-0.000342,0.001840,-1.048016,0.001013,0.001125,0.728316,-0.0858
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68096,-0.111903,-0.231038,0.133286,0.0503,0.001834,0.000079,0.004336,-0.231038,-0.111903,-0.003638,...,0.053665,0.002880,0.004202,-0.009225,-0.722654,-0.835219,-0.063069,-0.147773,-0.773285,0.0486
68097,-0.113222,-0.225703,0.140285,0.0422,0.001846,-0.000082,0.003567,-0.225703,-0.113222,-0.003363,...,0.053726,0.002887,0.004204,-0.008988,-0.693827,-0.847119,-0.063281,-0.143451,-0.788629,0.0511
68098,-0.114092,-0.220937,0.148758,0.0347,0.001823,-0.000321,0.002644,-0.220937,-0.114092,-0.003317,...,0.053553,0.002868,0.004196,-0.008838,-0.666117,-0.856692,-0.063529,-0.139384,-0.803025,0.0535
68099,-0.114961,-0.216170,0.157232,0.0272,0.001800,-0.000559,0.001721,-0.216170,-0.114961,-0.003272,...,0.053190,0.002829,0.004170,-0.008807,-0.639543,-0.864403,-0.063821,-0.135586,-0.816822,0.0558


In [7]:
df_aux = df.copy()

In [8]:
def create_scaled_dataset(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.3,random_state=0,shuffle=True) 

    std_scaler = StandardScaler()
    X_scaled_train = pd.DataFrame(std_scaler.fit_transform(X_train),columns=X_train.columns,index=X_train.index)
    X_scaled_test = pd.DataFrame(std_scaler.transform(X_test),columns=X_train.columns,index=X_test.index)
    return X_scaled_train,X_scaled_test, y_train,y_test

In [9]:
y = df_aux['Vert_irreg_right_rail']
X = df_aux.drop(['Vert_irreg_right_rail'],axis=1)

X_scaled_train,X_scaled_test,y_train,y_test = create_scaled_dataset(X,y)

# Training and optimizing traditional machine learning models

In [10]:
from utils.Regression_models import RegModels

In [11]:
modelList = {#'Lasso': 50,
             #'Ridge':   50,
             #'Elastic_Net':   50,
             #'SGD_Reg':100,
              #'Random_Forest':   40,
              'Extra_Trees':  40,
              #'Ada_Boost':   100,
              #'XGBR':  100,
              #'Cat_Boost':  100,
              #'Light_Boost':  100,
              #'Ridge_CV': 2
}

In [12]:
obj = RegModels(modo="optimize", X_trainS=X_scaled_train,X_testS=X_scaled_test, y_trainS=y_train, y_testS=y_test)

In [13]:
obj.fit_models(modelList, path='models/ml_models')

  0%|                                                                                                                                                                                                                 | 0/1 [00:00<?, ?it/s][32m[I 2021-08-24 17:43:34,097][0m A new study created in memory with name: no-name-5bea07b5-b220-4fc9-a2ce-79ce6035977b[0m


Optimizing model Extra_Trees


[32m[I 2021-08-24 17:43:50,796][0m Trial 0 finished with value: 0.8732164621503636 and parameters: {'n_estimators': 440, 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 3, 'bootstrap': True}. Best is trial 0 with value: 0.8732164621503636.[0m
[32m[I 2021-08-24 17:43:54,702][0m Trial 1 finished with value: 0.8697472395581805 and parameters: {'n_estimators': 80, 'max_depth': 1, 'min_samples_split': 4, 'min_samples_leaf': 3, 'bootstrap': False}. Best is trial 0 with value: 0.8732164621503636.[0m
[32m[I 2021-08-24 17:43:58,519][0m Trial 2 finished with value: 0.8147519505040242 and parameters: {'n_estimators': 120, 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 6, 'bootstrap': False}. Best is trial 0 with value: 0.8732164621503636.[0m
[32m[I 2021-08-24 17:44:06,247][0m Trial 3 finished with value: 0.8526375616109421 and parameters: {'n_estimators': 220, 'max_depth': 4, 'min_samples_split': 4, 'min_samples_leaf': 4, 'bootstrap': False}. Best is trial 0 wi

[32m[I 2021-08-24 17:54:10,809][0m Trial 36 finished with value: 0.9061317778893246 and parameters: {'n_estimators': 400, 'max_depth': 6, 'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': False}. Best is trial 35 with value: 0.9062711868958608.[0m
[32m[I 2021-08-24 17:54:29,125][0m Trial 37 pruned. [0m
[32m[I 2021-08-24 17:54:49,664][0m Trial 38 pruned. [0m
[32m[I 2021-08-24 17:55:07,304][0m Trial 39 pruned. [0m


FrozenTrial(number=35, values=[0.9062711868958608], datetime_start=datetime.datetime(2021, 8, 24, 17, 53, 17, 997354), datetime_complete=datetime.datetime(2021, 8, 24, 17, 53, 40, 507355), params={'n_estimators': 420, 'max_depth': 6, 'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': True}, distributions={'n_estimators': IntUniformDistribution(high=600, low=60, step=20), 'max_depth': IntUniformDistribution(high=6, low=1, step=1), 'min_samples_split': IntUniformDistribution(high=6, low=2, step=1), 'min_samples_leaf': IntUniformDistribution(high=6, low=1, step=1), 'bootstrap': CategoricalDistribution(choices=(True, False))}, user_attrs={'best_model': ExtraTreesRegressor(bootstrap=True, n_estimators=420, n_jobs=-1, oob_score=True,
                    random_state=0)}, system_attrs={}, intermediate_values={0: 0.9062711868958608}, trial_id=35, state=TrialState.COMPLETE, value=None)


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [11:49<00:00, 709.82s/it]



Model concluded:  ExtraTreesRegressor(bootstrap=True, n_estimators=420, n_jobs=-1, oob_score=True,
                    random_state=0) saved as 'Extra_Trees.sav' 
 





In [14]:
obj.models_performace()

Unnamed: 0,MAE,MAPE,RMSE,R2
Extra_Trees,1.270207,2.100391,1.738592,0.906271


# Loading best model

In [None]:
a = RegModels(modo="load", X_trainS=X_scaled_train,X_testS=X_scaled_test, y_trainS=y_train, y_testS=y_test)

models = ['Lasso',
          'Ridge',
          'Elastic_Net',
          'SGD_Reg',
          'Random_Forest',
          'Extra_Trees',
          'Ada_Boost',
          'XGBR',
          'Cat_Boost',
          'Light_Boost']


a.fit_models(models, path='models/ml_models')

In [None]:
a.get_Fitted_Models()

In [None]:
table = a.models_performace(models)
table

In [None]:
table.to_csv(r'C:\Users\arthu\Downloads\my_data.csv', index=False)

In [None]:
#def load_model(name, path):
#    import pickle
#    filename = path + '/' + name
#    loaded_model = pickle.load(open(filename, 'rb'))
#    return loaded_model

#path = 'models/ml_models'
#knn = load_model('Light_Boost.sav',path=path)

In [None]:
cv = KFold(n_splits=5, shuffle=True,random_state=0)
summary_plot(knn, X_scaled_train, y_train, X_scaled_test,y_test, cv, train_sizes=np.linspace(0.1, 1, 5),lc=False)

In [None]:
y_test_ordered = pd.DataFrame(y_test.sort_index(ascending=True))
y_test_ordered = y_test_ordered.to_numpy()
y_test_ordered

In [None]:
X_test_ordered = X_scaled_test.sort_index(ascending=True)
yhat = knn.predict(X_test_ordered)
yhat

In [None]:
metrics(y_test_ordered,yhat)

In [None]:
plt.plot(y_test_ordered, label='Real irregularity')
plt.plot(yhat, label='Predicted Irregularity')
plt.legend()

In [None]:
error = y_test_ordered.reshape(-1,) - yhat
plt.plot(error)
plt.ylabel('Error (mm)')
plt.xlabel('Index')

In [None]:
ax,fig = plt.subplots(1,1,figsize=(20,6))

plt.plot(y_test_ordered, label='Real irregularity')
plt.plot(yhat, label='Predicted Irregularity')
plt.legend()
plt.ylabel('Vertical irregularity (mm)')
plt.xlabel('Index')

plt.xlim([200, 400])
plt.ylim([-15, 15])

# Ensemble

In [None]:
#obj.get_Fitted_Models()

In [None]:
#ensemble_model = obj.stack_models()

In [None]:
# ensemble_model.fit(X_scaled_train,y_train)
# ensemble_model.score(X_scaled_test, y_test)