# MODEL TRAINING:


In [13]:
import pandas as pd
import numpy as np

from imblearn.combine import SMOTETomek
from sklearn.impute import KNNImputer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
#from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv("filter.csv")

In [7]:
df.head()

Unnamed: 0,Sensor-1,Sensor-2,Sensor-3,Sensor-4,Sensor-5,Sensor-7,Sensor-8,Sensor-9,Sensor-10,Sensor-11,...,Sensor-582,Sensor-583,Sensor-584,Sensor-585,Sensor-586,Sensor-587,Sensor-588,Sensor-589,Sensor-590,Good/Bad
0,2968.33,2476.58,2216.7333,1748.0885,1.1127,97.5822,0.1242,1.53,-0.0279,-0.004,...,,0.5004,0.012,0.0033,2.4069,0.0545,0.0184,0.0055,33.7876,-1
1,2961.04,2506.43,2170.0666,1364.5157,1.5447,96.77,0.123,1.3953,0.0084,0.0062,...,,0.4994,0.0115,0.0031,2.302,0.0545,0.0184,0.0055,33.7876,1
2,3072.03,2500.68,2205.7445,1363.1048,1.0518,101.8644,0.122,1.3896,0.0138,0.0,...,,0.4987,0.0118,0.0036,2.3719,0.0545,0.0184,0.0055,33.7876,-1
3,3021.83,2419.83,2205.7445,1363.1048,1.0518,101.8644,0.122,1.4108,-0.0046,-0.0024,...,,0.4934,0.0123,0.004,2.4923,0.0545,0.0184,0.0055,33.7876,-1
4,3006.95,2435.34,2189.8111,1084.6502,1.1993,104.8856,0.1234,1.5094,-0.0046,0.0121,...,,0.4987,0.0145,0.0041,2.8991,0.0545,0.0184,0.0055,33.7876,-1


In [3]:
df.shape


(100, 465)

In [4]:
X = df.drop(columns="Good/Bad", axis =1)
y = df["Good/Bad"]

In [11]:
X.shape,y.shape

((100, 464), (100,))

In [19]:
y

0    -1
1     1
2    -1
3    -1
4    -1
     ..
95   -1
96    1
97   -1
98   -1
99    1
Name: Good/Bad, Length: 100, dtype: int64

In [24]:
target_col_mapping = {'-1': 0}
y1=np.where(y==-1,0,1)

#y1 = y.map(target_col_mapping)

In [15]:
nan_count = np.isnan(y).sum()
print(nan_count)

0


In [25]:
y1

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1])

In [5]:
## Pipeline

feature_pipeline = Pipeline(
    steps=[
        ('imputer',KNNImputer(n_neighbors=3)),
        ('scaler',RobustScaler())
    ]
)



In [6]:
feature_pipeline

In [7]:
X_pipe = feature_pipeline.fit_transform(X)


In [8]:
X_pipe.shape

(100, 464)

In [53]:
from imblearn.combine import SMOTETomek

resampler = SMOTETomek(sampling_strategy="auto")
X_res, y_res = resampler.fit_resample(X_pipe, y)

In [54]:
X_res.shape,y_res.shape

((188, 464), (188,))

In [51]:
y_res.value_counts()

Good/Bad
-1    94
 1    94
Name: count, dtype: int64

In [55]:
X_train,X_test,y_train,y_test = train_test_split(X_res,y_res,test_size=0.25,random_state=42)

In [56]:
print(X_train.shape,X_test.shape)

print(y_train.shape,y_test.shape)

(141, 464) (47, 464)
(141,) (47,)


# Model Selection and Model Training:

In [80]:
def evaluate_model(true, predicted):
    cl_report = classification_report(true, predicted)
    con_mat = confusion_matrix(true, predicted)
    roc_score = roc_auc_score(true,predicted)
    acc_score = accuracy_score(true, predicted)
    return cl_report, con_mat,roc_score, acc_score

In [81]:
models = {  "Random Forest": RandomForestClassifier(),
            "Decision Tree": DecisionTreeClassifier(),
            "Gradient Boosting": GradientBoostingClassifier(),
            "K-Neighbors Classifier": KNeighborsClassifier(),
            "AdaBoost Classifier": AdaBoostClassifier()
}
trained_model_list=[]
model_list=[]
acc_list =[]

for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    #y_train_pred = model.predict(X_train)
    y_test_pred=model.predict(X_test)

    cl_report, con_mat,roc_score, acc_score = evaluate_model(y_test, y_test_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("Classification Report:",cl_report)
    print('*'*15)
    print("Confusion Matrix:",con_mat)
    print('*'*15)
    print("ROC Score:",roc_score)
    print('*'*15)
    print("Accuracy score:",acc_score*100)

    acc_list.append(acc_score)
    
    print('*'*35)
    print('\n')

Random Forest
Model Training Performance
Classification Report:               precision    recall  f1-score   support

          -1       1.00      1.00      1.00        25
           1       1.00      1.00      1.00        22

    accuracy                           1.00        47
   macro avg       1.00      1.00      1.00        47
weighted avg       1.00      1.00      1.00        47

***************
Confusion Matrix: [[25  0]
 [ 0 22]]
***************
ROC Score: 1.0
***************
Accuracy score: 100.0
***********************************


Decision Tree
Model Training Performance
Classification Report:               precision    recall  f1-score   support

          -1       1.00      1.00      1.00        25
           1       1.00      1.00      1.00        22

    accuracy                           1.00        47
   macro avg       1.00      1.00      1.00        47
weighted avg       1.00      1.00      1.00        47

***************
Confusion Matrix: [[25  0]
 [ 0 22]]
*****

In [82]:
model_list

['Random Forest',
 'Decision Tree',
 'Gradient Boosting',
 'K-Neighbors Classifier',
 'AdaBoost Classifier']

In [83]:
acc_list

[1.0, 1.0, 0.9787234042553191, 0.6382978723404256, 1.0]