In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('predictive_maintenance.csv')

In [2]:
df.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [3]:
columns_to_drop = ['UDI' , 'Product ID']
df.drop(columns=columns_to_drop , axis = 1 , inplace=True)


In [4]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
# columnns_to_encode = ['Type', 'Failure Type']
df['Type'] = encoder.fit_transform(df['Type'])

le = LabelEncoder()
df['Failure Type'] = le.fit_transform(df['Failure Type'])
import joblib
joblib.dump(encoder , 'type_encoder.pkl')
joblib.dump(le , 'failure_type.pkl')

['failure_type.pkl']

In [5]:
df.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,2,298.1,308.6,1551,42.8,0,0,1
1,1,298.2,308.7,1408,46.3,3,0,1
2,1,298.1,308.5,1498,49.4,5,0,1
3,1,298.2,308.6,1433,39.5,7,0,1
4,1,298.2,308.7,1408,40.0,9,0,1


In [6]:
columns_to_normalize = ['Air temperature [K]','Process temperature [K]' , 'Rotational speed [rpm]','Torque [Nm]' , 'Tool wear [min]']
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df[columns_to_normalize] = scaler.fit_transform(df[columns_to_normalize])
joblib.dump(scaler , 'scaler.pkl')

['scaler.pkl']

In [7]:
df.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,2,0.304348,0.358025,0.222934,0.535714,0.0,0,1
1,1,0.315217,0.37037,0.139697,0.583791,0.011858,0,1
2,1,0.304348,0.345679,0.192084,0.626374,0.019763,0,1
3,1,0.315217,0.358025,0.154249,0.490385,0.027668,0,1
4,1,0.315217,0.37037,0.139697,0.497253,0.035573,0,1


In [8]:
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.ensemble import RandomForestClassifier
x = df.drop('Failure Type' , axis = 1)
y = df['Failure Type']
x_train , x_test , y_train , y_test = train_test_split(x , y , test_size=0.2 , random_state=42)

param_grid =  {
    'n_estimators':[10 , 20 , 50 , 100 , 200]
    }
model = RandomForestClassifier(random_state=42)
grid_param = GridSearchCV(param_grid=param_grid , verbose=5 , estimator= model  , cv = 5 , n_jobs=1)

grid_param.fit(x_train , y_train)
print("Best parameters" , grid_param.best_params_)
print("Best Score" , grid_param.best_score_)

best_model = grid_param.best_estimator_
test_score = best_model.score(x_test , y_test)

print("Test Score" , grid_param.best_score_)

Fitting 5 folds for each of 5 candidates, totalling 25 fits
[CV 1/5] END ...................n_estimators=10;, score=0.995 total time=   0.0s
[CV 2/5] END ...................n_estimators=10;, score=0.996 total time=   0.0s
[CV 3/5] END ...................n_estimators=10;, score=0.993 total time=   0.0s
[CV 4/5] END ...................n_estimators=10;, score=0.994 total time=   0.0s
[CV 5/5] END ...................n_estimators=10;, score=0.992 total time=   0.0s
[CV 1/5] END ...................n_estimators=20;, score=0.996 total time=   0.0s
[CV 2/5] END ...................n_estimators=20;, score=0.998 total time=   0.0s
[CV 3/5] END ...................n_estimators=20;, score=0.993 total time=   0.0s
[CV 4/5] END ...................n_estimators=20;, score=0.994 total time=   0.0s
[CV 5/5] END ...................n_estimators=20;, score=0.993 total time=   0.0s
[CV 1/5] END ...................n_estimators=50;, score=0.995 total time=   0.1s
[CV 2/5] END ...................n_estimators=50;,

In [9]:
best_model.fit(x_train , y_train)
pred = best_model.predict(x_test)
from sklearn.metrics import accuracy_score , r2_score , f1_score , classification_report , recall_score
print("accuracy",accuracy_score(y_test , pred))
print("r2 score",r2_score(y_test , pred))
print("f1 score",f1_score(y_test , pred , average='macro'))
print("classification",classification_report(y_test , pred))
print("recall ",recall_score(y_test , pred , average='macro'))

accuracy 0.995
r2 score 0.7687687687687688
f1 score 0.7829492965616006
classification               precision    recall  f1-score   support

           0       0.94      1.00      0.97        15
           1       1.00      1.00      1.00      1935
           2       0.80      0.92      0.86        13
           3       1.00      0.95      0.97        20
           4       0.00      0.00      0.00         6
           5       1.00      0.82      0.90        11

    accuracy                           0.99      2000
   macro avg       0.79      0.78      0.78      2000
weighted avg       0.99      0.99      0.99      2000

recall  0.781876456876457


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
joblib.dump(best_model , 'rfc_best_model.pkl')

['rfc_best_model.pkl']