## Machine Predictive Maintenance Classification Dataset

##### Source: https://www.kaggle.com/datasets/shivamb/machine-predictive-maintenance-classification/data

In [27]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')


In [28]:
df=pd.read_csv('dataset\predictive_maintenance.csv',index_col=False)

In [29]:
df.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [30]:
df['Target'].value_counts()

Target
0    9661
1     339
Name: count, dtype: int64

In [31]:
df['Type'].value_counts()

Type
L    6000
M    2997
H    1003
Name: count, dtype: int64

Since we will be performing binary classification, therefore failure type is removed.

#### Creating X and y

In [32]:
X=df.drop(columns=['Target','Failure Type','UDI','Product ID','Type'],axis=1)
y=df['Target']

#### Performing train-test split

In [33]:
from sklearn.model_selection import train_test_split

In [34]:
# separate dataset into train and test
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42,stratify=y)
X_train.shape, X_test.shape

((8000, 5), (2000, 5))

### Model Selection

In [35]:

from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report,confusion_matrix, \
                            precision_score, recall_score, f1_score, roc_auc_score,roc_curve 

In [36]:
models={
    "Logisitic Regression":LogisticRegression(),
    "Decision Tree":DecisionTreeClassifier(),

}
for i in range(len(list(models))):
    model = list(models.values())[i]
    model.fit(X_train, y_train) # Train model

    # Make predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    # Training set performance
    model_train_accuracy = accuracy_score(y_train, y_train_pred) # Calculate Accuracy
    model_train_f1 = f1_score(y_train, y_train_pred, average='weighted') # Calculate F1-score
    model_train_precision = precision_score(y_train, y_train_pred) # Calculate Precision
    model_train_recall = recall_score(y_train, y_train_pred) # Calculate Recall
    model_train_rocauc_score = roc_auc_score(y_train, y_train_pred)


    # Test set performance
    model_test_accuracy = accuracy_score(y_test, y_test_pred) # Calculate Accuracy
    model_test_f1 = f1_score(y_test, y_test_pred, average='weighted') # Calculate F1-score
    model_test_precision = precision_score(y_test, y_test_pred) # Calculate Precision
    model_test_recall = recall_score(y_test, y_test_pred) # Calculate Recall
    model_test_rocauc_score = roc_auc_score(y_test, y_test_pred) #Calculate Roc

    print(list(models.keys())[i])
    
    print('Model performance for Training set')
    print("- Accuracy: {:.4f}".format(model_train_accuracy))
    print('- F1 score: {:.4f}'.format(model_train_f1))
    
    print('- Precision: {:.4f}'.format(model_train_precision))
    print('- Recall: {:.4f}'.format(model_train_recall))
    print('- Roc Auc Score: {:.4f}'.format(model_train_rocauc_score))

    
    
    print('----------------------------------')
    
    print('Model performance for Test set')
    print('- Accuracy: {:.4f}'.format(model_test_accuracy))
    print('- F1 score: {:.4f}'.format(model_test_f1))
    print('- Precision: {:.4f}'.format(model_test_precision))
    print('- Recall: {:.4f}'.format(model_test_recall))
    print('- Roc Auc Score: {:.4f}'.format(model_test_rocauc_score))

    
    print('='*35)
    print('\n')

Logisitic Regression
Model performance for Training set
- Accuracy: 0.9709
- F1 score: 0.9630
- Precision: 0.7436
- Recall: 0.2140
- Roc Auc Score: 0.6057
----------------------------------
Model performance for Test set
- Accuracy: 0.9690
- F1 score: 0.9596
- Precision: 0.6875
- Recall: 0.1618
- Roc Auc Score: 0.5796


Decision Tree
Model performance for Training set
- Accuracy: 1.0000
- F1 score: 1.0000
- Precision: 1.0000
- Recall: 1.0000
- Roc Auc Score: 1.0000
----------------------------------
Model performance for Test set
- Accuracy: 0.9745
- F1 score: 0.9746
- Precision: 0.6232
- Recall: 0.6324
- Roc Auc Score: 0.8094




### Perfroming Hyperparameter Tuning

In [37]:
param={
    'criterion':['gini', 'entropy', 'log_loss'],
    'splitter':['best', 'random'],
    'max_depth':[1,2,3,4,5],
    'max_features':['sqrt','log2'],
}

In [38]:
from sklearn.model_selection import GridSearchCV
treemodel=DecisionTreeClassifier()

In [39]:
grid=GridSearchCV(treemodel,param_grid=param,cv=5,scoring='accuracy')

In [40]:
grid.fit(X_train,y_train)

In [41]:
y_pred=grid.predict(X_test)

In [42]:
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))
print(accuracy_score(y_pred,y_test))

[[1925    7]
 [  47   21]]
              precision    recall  f1-score   support

           0       0.98      1.00      0.99      1932
           1       0.75      0.31      0.44        68

    accuracy                           0.97      2000
   macro avg       0.86      0.65      0.71      2000
weighted avg       0.97      0.97      0.97      2000

0.973


In [43]:
grid.predict([[300,308.7,2000,500,3]])

array([1])