In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import math
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, f1_score

In [None]:
data = pd.read_csv('machine-failure.csv')
data.head()

Unnamed: 0,Date,Temperature,Humidity,Operator,Measure1,Measure2,Measure3,Measure4,Measure5,Measure6,...,Measure15,Hours Since Previous Failure,Failure,﻿Date.year,﻿Date.month,﻿Date.day-of-month,﻿Date.day-of-week,﻿Date.hour,﻿Date.minute,﻿Date.second
0,2016-01-01 00:00:00,67,82,Operator1,291,1,1,1041,846,334,...,1842,90,No,2016,1,1,5,0,0,0
1,2016-01-01 01:00:00,68,77,Operator1,1180,1,1,1915,1194,637,...,748,91,No,2016,1,1,5,1,0,0
2,2016-01-01 02:00:00,64,76,Operator1,1406,1,1,511,1577,1121,...,1689,92,No,2016,1,1,5,2,0,0
3,2016-01-01 03:00:00,63,80,Operator1,550,1,1,1754,1834,1413,...,711,93,No,2016,1,1,5,3,0,0
4,2016-01-01 04:00:00,65,81,Operator1,1928,1,2,1326,1082,233,...,507,94,No,2016,1,1,5,4,0,0


In [None]:
unique_values = pd.DataFrame(data.columns, columns=["Column"])
unique_values["Type"] = unique_values["Column"].apply(lambda x: data[x].dtype)
unique_values["UniqueValues"] = unique_values["Column"].apply(lambda x: data[x].nunique())

temp = pd.DataFrame(np.sum(data.isnull()), columns=['NULL'])
unique_values = pd.merge(unique_values, temp, left_on='Column', right_index=True)
unique_values

Unnamed: 0,Column,Type,UniqueValues,NULL
0,Date,object,8784,0
1,Temperature,int64,23,0
2,Humidity,int64,35,0
3,Operator,object,8,0
4,Measure1,int64,1843,0
5,Measure2,int64,4,0
6,Measure3,int64,3,0
7,Measure4,int64,1837,0
8,Measure5,int64,1839,0
9,Measure6,int64,1843,0


In [None]:
data['Failure'].value_counts()

No     8703
Yes      81
Name: Failure, dtype: int64

In [None]:
data = data.drop(labels = ['Date'], axis = 1)

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for i, col in enumerate(data.columns):
    if(data[col].dtype == 'object'): 
        data[col] = le.fit_transform(data[col])
        print('done')

done
done


# Temperature vs Failure

In [None]:
pd.crosstab(data["Failure"], data["Temperature"])

Temperature,5,12,19,28,60,61,62,63,64,65,...,69,70,71,72,73,74,75,76,77,78
Failure,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1,1,1,1,981,964,977,976,942,957,...,1,2,2,4,2,1,1,0,0,0
1,0,0,0,0,1,0,1,4,2,3,...,9,3,11,15,5,2,3,2,3,1


In [None]:
target = data['Failure']
training = data.drop(labels = ['Failure'] , axis = 1)

In [None]:
Xtrain, Xtest, ytrain, ytest = train_test_split( training, target,
                                                random_state = 10, test_size= .4, stratify = target)

# Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0, class_weight = 'balanced')

In [None]:
clf.fit(Xtrain, ytrain)
y_pred = clf.predict(Xtest)

In [None]:
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
print(recall_score(ytest, y_pred, average = 'macro'))
print(precision_score(ytest, y_pred, average = 'macro'))
print(f1_score(ytest, y_pred, average='macro'))
print(accuracy_score(ytest, y_pred))

0.8777642159678346
0.5308800657334859
0.5267722473604827
0.8804780876494024


# Radial kernel

In [None]:
from sklearn.svm import SVC
clf2 = SVC(kernel = 'rbf',class_weight= 'balanced')

In [None]:
clf2.fit(Xtrain, ytrain)
y_pred2 = clf2.predict(Xtest)

In [None]:
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
print(recall_score(ytest, y_pred2, average = 'macro'))
print(precision_score(ytest, y_pred2, average = 'macro'))
print(f1_score(ytest, y_pred2, average='macro'))
print(accuracy_score(ytest, y_pred2))

0.5
0.4954467842914058
0.4977129788450543
0.9908935685828116


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


# Polynomial kernel

In [None]:
from sklearn.svm import SVC
clf2 = SVC(kernel = 'poly',class_weight= 'balanced')

In [None]:
clf2.fit(Xtrain, ytrain)
y_pred3 = clf2.predict(Xtest)

In [None]:
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
print(recall_score(ytest, y_pred3, average = 'macro'))
print(precision_score(ytest, y_pred3, average = 'macro'))
print(f1_score(ytest, y_pred3, average='macro'))
print(accuracy_score(ytest, y_pred3))

0.5267985353245261
0.525993923724461
0.5263889349197406
0.9826408651109846
