In [2]:
import sklearn
import pandas as pd 
import numpy as np 
import warnings 
warnings.filterwarnings('ignore')

In [3]:
data = pd.read_csv("predictive_maintenance.csv")
data.head(5)

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [4]:
data.tail(5)

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
9995,9996,M24855,M,298.8,308.4,1604,29.5,14,0,No Failure
9996,9997,H39410,H,298.9,308.4,1632,31.8,17,0,No Failure
9997,9998,M24857,M,299.0,308.6,1645,33.4,22,0,No Failure
9998,9999,H39412,H,299.0,308.7,1408,48.5,25,0,No Failure
9999,10000,M24859,M,299.0,308.7,1500,40.2,30,0,No Failure


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   UDI                      10000 non-null  int64  
 1   Product ID               10000 non-null  object 
 2   Type                     10000 non-null  object 
 3   Air temperature [K]      10000 non-null  float64
 4   Process temperature [K]  10000 non-null  float64
 5   Rotational speed [rpm]   10000 non-null  int64  
 6   Torque [Nm]              10000 non-null  float64
 7   Tool wear [min]          10000 non-null  int64  
 8   Target                   10000 non-null  int64  
 9   Failure Type             10000 non-null  object 
dtypes: float64(3), int64(4), object(3)
memory usage: 781.4+ KB


In [6]:
data.describe()

Unnamed: 0,UDI,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,300.00493,310.00556,1538.7761,39.98691,107.951,0.0339
std,2886.89568,2.000259,1.483734,179.284096,9.968934,63.654147,0.180981
min,1.0,295.3,305.7,1168.0,3.8,0.0,0.0
25%,2500.75,298.3,308.8,1423.0,33.2,53.0,0.0
50%,5000.5,300.1,310.1,1503.0,40.1,108.0,0.0
75%,7500.25,301.5,311.1,1612.0,46.8,162.0,0.0
max,10000.0,304.5,313.8,2886.0,76.6,253.0,1.0


In [7]:
data.describe(exclude='number')

Unnamed: 0,Product ID,Type,Failure Type
count,10000,10000,10000
unique,10000,3,6
top,M14860,L,No Failure
freq,1,6000,9652


In [8]:
data.shape

(10000, 10)

In [10]:
data.isnull().sum()

UDI                        0
Product ID                 0
Type                       0
Air temperature [K]        0
Process temperature [K]    0
Rotational speed [rpm]     0
Torque [Nm]                0
Tool wear [min]            0
Target                     0
Failure Type               0
dtype: int64

In [11]:
columns = data.columns.tolist()
columns

['UDI',
 'Product ID',
 'Type',
 'Air temperature [K]',
 'Process temperature [K]',
 'Rotational speed [rpm]',
 'Torque [Nm]',
 'Tool wear [min]',
 'Target',
 'Failure Type']

In [12]:


X = data.iloc[:,2:8]
y = data.iloc[:,-1]

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)


from sklearn.preprocessing import OrdinalEncoder
oe = OrdinalEncoder(categories=[['L', 'M', 'H']])
oe.fit(X_train[['Type']])
X_train['Type'] = oe.transform(X_train[['Type']]).astype(int)
X_test['Type'] = oe.transform(X_test[['Type']]).astype(int)


In [13]:
X_train.head()

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min]
9254,0,298.3,309.1,1616,31.1,195
1561,0,298.2,308.4,1388,53.8,137
1670,0,298.2,307.8,1528,31.1,194
6087,1,300.9,310.8,1599,33.0,7
6669,0,301.4,310.5,1571,33.9,208


In [14]:
X_train['Type'].unique()

array([0, 1, 2])

In [15]:
data['Failure Type'].unique()

array(['No Failure', 'Power Failure', 'Tool Wear Failure',
       'Overstrain Failure', 'Random Failures',
       'Heat Dissipation Failure'], dtype=object)

In [16]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)


In [17]:
label_mapping = pd.DataFrame({
    'Category': le.classes_,
    'Encoded Value': le.transform(le.classes_)
})
label_mapping

Unnamed: 0,Category,Encoded Value
0,Heat Dissipation Failure,0
1,No Failure,1
2,Overstrain Failure,2
3,Power Failure,3
4,Random Failures,4
5,Tool Wear Failure,5


In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix


In [19]:
model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
accuracy = round(accuracy_score(y_pred,y_test) * 100,2)
print(f"Accuracy score for the Logistic model is: {accuracy}\n")
print("-----------------------------------------------------------")
print(f"Classification report: {classification_report(y_pred,y_test)}")

Accuracy score for the Logistic model is: 96.75

-----------------------------------------------------------
Classification report:               precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       1.00      0.97      0.98      1987
           2       0.15      0.29      0.20         7
           3       0.10      1.00      0.18         2
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         3

    accuracy                           0.97      2000
   macro avg       0.21      0.38      0.23      2000
weighted avg       0.99      0.97      0.98      2000



In [20]:
from sklearn.tree import DecisionTreeClassifier
model1 = DecisionTreeClassifier()
model1.fit(X_train,y_train)
y_pred1 = model1.predict(X_test)

accuracy1 = round(accuracy_score(y_pred1,y_test) * 100,4)
print(f"Accuracy score for Decsion Tree is {accuracy1}")

Accuracy score for Decsion Tree is 97.15


In [21]:
from sklearn.ensemble import RandomForestClassifier
random_forest = RandomForestClassifier(n_estimators=500)
random_forest.fit(X_train, y_train)
y_pred2 = random_forest.predict(X_test)

random_forest_accuracy = round(accuracy_score(y_pred2, y_test) * 100, 2)
print(f"Accuracy score for Random Forest is {random_forest_accuracy}")


Accuracy score for Random Forest is 98.2


In [22]:
import pickle
pickle.dump(random_forest,open('model.pkl','wb'))