In [115]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
## Grid search CV (hyperparameter tuning)
from sklearn.model_selection import GridSearchCV

In [96]:
url = "https://drive.google.com/uc?id=1_4GJkuna6iceQqGLLBe4RH7IuiN68XD1"
data = pd.read_csv(url)

In [97]:
data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [98]:
data.describe()

Unnamed: 0,UDI,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,300.00493,310.00556,1538.7761,39.98691,107.951,0.0339
std,2886.89568,2.000259,1.483734,179.284096,9.968934,63.654147,0.180981
min,1.0,295.3,305.7,1168.0,3.8,0.0,0.0
25%,2500.75,298.3,308.8,1423.0,33.2,53.0,0.0
50%,5000.5,300.1,310.1,1503.0,40.1,108.0,0.0
75%,7500.25,301.5,311.1,1612.0,46.8,162.0,0.0
max,10000.0,304.5,313.8,2886.0,76.6,253.0,1.0


In [99]:
unique_type = data["Type"].unique()
unique_type

array(['M', 'L', 'H'], dtype=object)

In [100]:
data.isna().value_counts()

UDI    Product ID  Type   Air temperature [K]  Process temperature [K]  Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target  Failure Type
False  False       False  False                False                    False                   False        False            False   False           10000
Name: count, dtype: int64

In [101]:
priority_mapping = {'L': 0, 'M': 1, 'H': 2}
data["Type"] = data["Type"].map(priority_mapping)

In [102]:
data.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,1,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,0,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,0,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,0,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,0,298.2,308.7,1408,40.0,9,0,No Failure


In [103]:
data.Type.unique()

array([1, 0, 2])

In [104]:
X = data.iloc[:, 2:-2]
y = data.iloc[:, -2:-1]
y

Unnamed: 0,Target
0,0
1,0
2,0
3,0
4,0
...,...
9995,0
9996,0
9997,0
9998,0


In [105]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [132]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = np.array(y_train)
y_train = y_train.flatten()

In [136]:
## Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
y_lr = lr.predict(X_test)
accuracy_lr = accuracy_score(y_test, y_lr)
print(accuracy_lr)

param_grid = {"penalty": ("l1",'l2',), 'C': [1,5,10]}
lr_cv = GridSearchCV(estimator=LogisticRegression(solver='liblinear') ,param_grid=param_grid)
lr_cv.fit(X_train, y_train)

y_lr_cv = lr.predict(X_test)
accuracy_lr_cv = accuracy_score(y_test, y_lr_cv)
print(accuracy_lr_cv)

print(lr_cv.best_params_)


0.973
0.973
{'C': 1, 'penalty': 'l1'}


In [138]:
## Decision Tree
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_dt = lr.predict(X_test)
accuracy_dt = accuracy_score(y_test, y_dt)
print(accuracy_dt)

param_grid = {"criterion": ("gini", "entropy", "log_loss"), 'max_depth': [1,2,5]}
dt_cv = GridSearchCV(estimator=DecisionTreeClassifier() ,param_grid=param_grid)
dt_cv.fit(X_train, y_train)

y_dt_cv = lr.predict(X_test)
accuracy_dt_cv = accuracy_score(y_test, y_dt_cv)
print(accuracy_dt_cv)

print(dt_cv.best_params_)

0.973
0.973
{'criterion': 'entropy', 'max_depth': 5}


In [140]:
## SVM
from sklearn.svm import SVC

svc = SVC(random_state=42)
svc.fit(X_train, y_train)
y_svc = svc.predict(X_test)
accuracy_svc = accuracy_score(y_test, y_svc)
print(accuracy_svc)

param_grid = {"kernel": ('linear', 'poly', 'rbf', 'sigmoid'), 'C': [1,2,5]}
svc_cv = GridSearchCV(estimator=SVC() ,param_grid=param_grid)
svc_cv.fit(X_train, y_train)

y_svc_cv = lr.predict(X_test)
accuracy_svc_cv = accuracy_score(y_test, y_svc_cv)
print(accuracy_svc_cv)

print(svc_cv.best_params_)

0.976
0.973
{'C': 5, 'kernel': 'rbf'}


In [141]:
## Random Forest
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)
y_rf = lr.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_rf)
accuracy_rf

0.973