In [26]:
# Basic Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Modelling
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import  GridSearchCV
from sklearn.preprocessing import label_binarize
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix
from imblearn.over_sampling import SMOTE
import warnings
import pickle

In [10]:
df=pd.read_csv('Predictive_Maintenance_Binary_Model.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,Temperature Difference,Power,Mean Temperature,Wear strain
0,0,1,0.304348,0.358025,0.222934,0.535714,0.0,0,0.644444,0.622557,0.331395,0.0
1,1,2,0.315217,0.37037,0.139697,0.583791,0.011858,0,0.644444,0.609161,0.343023,0.00842
2,2,2,0.304348,0.345679,0.192084,0.626374,0.019763,0,0.622222,0.708143,0.325581,0.014972
3,3,2,0.315217,0.358025,0.154249,0.490385,0.027668,0,0.622222,0.512694,0.337209,0.016761
4,4,2,0.315217,0.37037,0.139697,0.497253,0.035573,0,0.644444,0.509509,0.343023,0.021822


In [11]:
df['Machine failure'].value_counts()

Machine failure
0    9661
1     339
Name: count, dtype: int64

In [12]:
df.drop(['Unnamed: 0'],axis=1,inplace=True)

In [13]:
X = df.drop(columns=['Machine failure'],axis=1)
X.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Temperature Difference,Power,Mean Temperature,Wear strain
0,1,0.304348,0.358025,0.222934,0.535714,0.0,0.644444,0.622557,0.331395,0.0
1,2,0.315217,0.37037,0.139697,0.583791,0.011858,0.644444,0.609161,0.343023,0.00842
2,2,0.304348,0.345679,0.192084,0.626374,0.019763,0.622222,0.708143,0.325581,0.014972
3,2,0.315217,0.358025,0.154249,0.490385,0.027668,0.622222,0.512694,0.337209,0.016761
4,2,0.315217,0.37037,0.139697,0.497253,0.035573,0.644444,0.509509,0.343023,0.021822


In [14]:
y = df['Machine failure']
y

0       0
1       0
2       0
3       0
4       0
       ..
9995    0
9996    0
9997    0
9998    0
9999    0
Name: Machine failure, Length: 10000, dtype: int64

In [15]:
X.shape

(10000, 10)

In [18]:
# separate dataset into train and test

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
X_train.shape, X_test.shape

((8000, 10), (2000, 10))

# Logistic Regression

In [20]:
LR_clf = LogisticRegression()

# Train the model
LR_clf.fit(X_train, y_train)

# Evaluate the model
y_pred = LR_clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Accuracy: 0.9705
              precision    recall  f1-score   support

           0       0.97      1.00      0.99      1939
           1       0.75      0.05      0.09        61

    accuracy                           0.97      2000
   macro avg       0.86      0.52      0.54      2000
weighted avg       0.96      0.97      0.96      2000



# Decision Tree Classifier

In [21]:
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)


y_pred_dt = dt_model.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_pred_dt)
print("Decision Tree Accuracy:", accuracy_dt)
print(classification_report(y_test, y_pred_dt))

Decision Tree Accuracy: 0.985
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1939
           1       0.74      0.79      0.76        61

    accuracy                           0.98      2000
   macro avg       0.87      0.89      0.88      2000
weighted avg       0.99      0.98      0.99      2000



#Random Forest Classifier

In [22]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)


y_pred_rf = rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print("Random Forest Accuracy:", accuracy_rf)
print(classification_report(y_test, y_pred_rf))

Random Forest Accuracy: 0.9925
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1939
           1       0.94      0.80      0.87        61

    accuracy                           0.99      2000
   macro avg       0.97      0.90      0.93      2000
weighted avg       0.99      0.99      0.99      2000



# AdaBoost Classifier

In [27]:
ada_clf = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_clf.fit(X_train, y_train)

y_pred = ada_clf.predict(X_test)

print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99      1939
           1       0.82      0.61      0.70        61

    accuracy                           0.98      2000
   macro avg       0.90      0.80      0.84      2000
weighted avg       0.98      0.98      0.98      2000

[[1931    8]
 [  24   37]]


# Gradient Boosting Classifier

In [28]:
gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_clf.fit(X_train, y_train)

y_pred = gb_clf.predict(X_test)


print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1939
           1       0.96      0.82      0.88        61

    accuracy                           0.99      2000
   macro avg       0.98      0.91      0.94      2000
weighted avg       0.99      0.99      0.99      2000

[[1937    2]
 [  11   50]]


# imbalance data by using SMOTE technique

In [29]:
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)


rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_resampled, y_train_resampled)


y_pred = rf_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

Accuracy: 0.972
              precision    recall  f1-score   support

           0       1.00      0.97      0.99      1939
           1       0.52      0.89      0.66        61

    accuracy                           0.97      2000
   macro avg       0.76      0.93      0.82      2000
weighted avg       0.98      0.97      0.98      2000



HyperTurning in Random Forest Classifer

In [31]:
rf_classifier = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4] }

grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(classification_report(y_test, y_pred))

Best Hyperparameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}
              precision    recall  f1-score   support

           0       0.99      1.00      1.00      1939
           1       0.94      0.82      0.88        61

    accuracy                           0.99      2000
   macro avg       0.97      0.91      0.94      2000
weighted avg       0.99      0.99      0.99      2000



In [32]:
Pkl_Filename = "Predictive_grid.pkl"
pickle.dump(best_model, open(Pkl_Filename, 'wb'))

In [33]:
with open(Pkl_Filename, 'rb') as file:
    Pickled_GRID_Model = pickle.load(file)

In [34]:
score = Pickled_GRID_Model.score(X_test, y_test)
# Print the Score
print("Test score: {0:.2f} %".format(100 * score))

# Predict the Labels using the reloaded Model
Ypredict = Pickled_GRID_Model.predict(X_test)

Ypredict

Test score: 99.30 %


array([0, 0, 0, ..., 0, 1, 0])