In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.tree import DecisionTreeClassifier
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn import metrics
import warnings

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df = pd.read_csv('drive/My Drive/.csv',sep=',')
df = df.drop(df.columns[0],axis=1)

In [None]:
train_percentage = 17/22
train_index = int(len(df)*train_percentage)
test_index = len(df)-train_index

In [None]:
df_train = df[:train_index]
df_test = df[-test_index:]

In [None]:
X_test = df_test.drop(["Y"],axis=1)
y_test = df_test["Y"]

In [None]:
X_train = df_train.drop('Y',axis=1)
y_train = df_train['Y']

In [None]:
def average(y_pred):
  for i in range(len(y_pred)):
    if i % 240 == 0 or (i+1) % 240 == 0:
      pass
    else: 
      average = float(y_pred[i-1] +  y_pred[i] + y_pred[i+1])/3
      if average >= 0.5:
        y_pred[i] = 1
      else:
        y_pred[i] = 0
  return y_pred

In [None]:
#Logistic Regression
clf = LogisticRegression().fit(X_train, y_train)
y_pred_1 = clf.predict(X_test)
y_pred_1 = average(y_pred_1)

y_score_1 = clf.predict_proba(X_test)[:,1]
acc1 = accuracy_score(y_test, y_pred_1)
f1_score_1 = metrics.f1_score(y_test, y_pred_1)
roc_1 = metrics.roc_auc_score(y_test, y_score_1)

print([acc1,f1_score_1,roc_1])
print(confusion_matrix(y_test, y_pred_1))

In [None]:
#Naive Bayes
clf_NB = GaussianNB()
clf_NB.fit(X_train, y_train)

pred_NB = clf_NB.predict(X_test)
pred_NB = average(pred_NB)

y_score_2 = clf_NB.predict_proba(X_test)[:,1]
acc2 = accuracy_score(y_test, pred_NB)
f1_score_2 = metrics.f1_score(y_test, pred_NB)
roc_2 = metrics.roc_auc_score(y_test, y_score_2)

print([acc2,f1_score_2,roc_2])
print(confusion_matrix(y_test, pred_NB))

In [None]:
#KNN
acc3_list = []
f1_score3_list = []
roc_3_list = []
from sklearn.neighbors import KNeighborsClassifier
for i in range(1,30):
    neigh = KNeighborsClassifier(n_neighbors=i)
    neigh.fit(X_train, y_train) 
    pred_KN = neigh.predict(X_test)
    pred_KN = average(pred_KN)
    y_score_3 = neigh.predict_proba(X_test)[:,1]
    acc3_list.append(accuracy_score(y_test, pred_KN))
    f1_score3_list.append(metrics.f1_score(y_test, pred_KN))
    roc_3_list.append(metrics.roc_auc_score(y_test, y_score_3))

In [None]:
acc3_list.index(max(acc3_list))+1

In [None]:
neigh = KNeighborsClassifier(n_neighbors=acc3_list.index(max(acc3_list))+1)
neigh.fit(X_train, y_train) 
pred_KN = neigh.predict(X_test)
pred_KN = average(pred_KN)
y_score_3 = neigh.predict_proba(X_test)[:,1]
acc3 = accuracy_score(y_test, pred_KN)
f1_score_3 = metrics.f1_score(y_test, pred_KN)
roc_3 = metrics.roc_auc_score(y_test, y_score_3)
print([acc3,f1_score_3,roc_3])
print(confusion_matrix(y_test, pred_KN))

In [None]:
#Random Forest
from sklearn.ensemble import RandomForestClassifier
acc4=[]
max_depth = []
for i in range(1,10):
    clf_RF = RandomForestClassifier(max_depth=i)
    clf_RF.fit(X_train, y_train) 
    pred_RF = clf_RF.predict(X_test)
    pred_RF = average(pred_RF)
    acc4.append(accuracy_score(pred_RF, y_test))
    max_depth.append(i)
print (max(acc4))

In [None]:
best_depth_4 = max_depth[acc4.index(max(acc4))]

In [None]:
clf_RF = RandomForestClassifier(max_depth=best_depth_4)
clf_RF.fit(X_train, y_train) 
pred_RF = clf_RF.predict(X_test)
pred_RF = average(pred_RF)
y_score_4 = clf_RF.predict_proba(X_test)[:,1]
acc4 = accuracy_score(y_test, pred_RF)
f1_score_4 = metrics.f1_score(y_test, pred_RF)
roc_4 = metrics.roc_auc_score(y_test, y_score_4)
print([acc4,f1_score_4,roc_4])
print(confusion_matrix(y_test, pred_RF))

feature_importances = pd.DataFrame(clf_RF.feature_importances_,
                                   index = X_train.columns,
                                    columns=['importance']).sort_values('importance',ascending=False)

In [None]:
X_train_shaped = np.expand_dims(X_train, axis=2)

In [None]:
X_test_shaped = np.expand_dims(X_test, axis=2)

In [None]:
X_train_shaped.shape
X_test_shaped.shape

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.layers.convolutional import Conv1D
from keras.optimizers import Adam, RMSprop
from keras.layers import Dropout

model = Sequential()

model.add(Conv1D(64, kernel_size = 3, activation = 'relu', input_shape = (8,1)))
model.add(Flatten())
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(1,activation = 'sigmoid'))

optimizer = Adam(lr=0.00001)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

model.fit(X_train_shaped, y_train, validation_data = (X_test_shaped,y_test), epochs = 100)

In [None]:
model.summary()

In [None]:
#CNN
pred_cnn = model.predict_classes(X_test_shaped)
pred_cnn = average(pred_cnn)
y_score_5 = model.predict_proba(X_test_shaped)
acc5 = accuracy_score(y_test, np.array(pred_cnn))
f1_score_5 = metrics.f1_score(y_test, pred_cnn)
roc_5 = metrics.roc_auc_score(y_test, y_score_5)

print([acc5,f1_score_5,roc_5])
print(confusion_matrix(y_test, pred_cnn))

In [None]:
accuracy_total = {'Model':['Logistic Regression','Naive Bayes', 'KNN', 'Random Forest', 'CNN',],
        'Accuracy':[acc1,acc2, acc3, acc4, acc5]}
accuracy_total=pd.DataFrame(accuracy_total)
accuracy_total=accuracy_total.set_index('Model')
accuracy_total
plt.plot(accuracy_total['Accuracy'])
plt.xticks(rotation=45)
accuracy_total

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8,8))
plt.plot([0, 1], [0, 1],'r--')
fpr_1, tpr_1, thresholds = roc_curve(y_test, y_score_1)
fpr_2, tpr_2, thresholds = roc_curve(y_test, y_score_2)
fpr_3, tpr_3, thresholds = roc_curve(y_test, y_score_3)
fpr_4, tpr_4, thresholds = roc_curve(y_test, y_score_4)
fpr_5, tpr_5, thresholds = roc_curve(y_test, y_score_5)

plt.plot(fpr_1, tpr_1, label= "Logistic Regression")
plt.plot(fpr_2, tpr_2, label= "Naive Bayes")
plt.plot(fpr_3, tpr_3, label= "KNN")
plt.plot(fpr_4, tpr_4, label= "Random Forest")
plt.plot(fpr_5, tpr_5, label= "CNN")

plt.title('ROC of LSTM')
plt.xlabel('FP Rate')
plt.ylabel('TP Rate')

plt.legend()

In [None]:
from sklearn.calibration import calibration_curve

plt.figure(figsize=(8,8))
plt.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")

fraction_of_positives, mean_predicted_value=calibration_curve(y_test,y_score_1,n_bins=10)
plt.plot(mean_predicted_value, fraction_of_positives,"s-",
                 label="%s" % 'Logistic Regression')

fraction_of_positives, mean_predicted_value=calibration_curve(y_test,y_score_2,n_bins=10)
plt.plot(mean_predicted_value, fraction_of_positives,"s-",
                 label="%s" % 'Naive Bayes')

fraction_of_positives, mean_predicted_value=calibration_curve(y_test,y_score_3,n_bins=10)
plt.plot(mean_predicted_value, fraction_of_positives,"s-",
                 label="%s" % 'KNN')

fraction_of_positives, mean_predicted_value=calibration_curve(y_test,y_score_4,n_bins=10)
plt.plot(mean_predicted_value, fraction_of_positives,"s-",
                 label="%s" % 'Random Forest')

fraction_of_positives, mean_predicted_value=calibration_curve(y_test,y_score_5,n_bins=10)
plt.plot(mean_predicted_value, fraction_of_positives,"s-",
                 label="%s" % 'CNN')

plt.legend(loc="lower right")