In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from numpy import array 
bend_df = pd.read_csv("bend.csv")
X = bend_df.Value
le = LabelEncoder()
Y = le.fit_transform(bend_df.State)

In [2]:
from sklearn.model_selection import train_test_split
X_train,X_test, Y_train,Y_test = train_test_split(X, Y,train_size=0.7,random_state=1)

#model.fit requires [[], [], []] format.
X_train = array(X_train).reshape(-1, 1) 
X_test = array(X_test).reshape(-1, 1) 

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier

models = []

models.append(('Gaussian N-Bayes', GaussianNB()))
models.append(('Support Vector Machine', SVC(gamma='auto')))
models.append(('Random Forest', RandomForestClassifier(n_estimators=15)))
models.append(('K Nearest Neighbors', KNeighborsClassifier(n_neighbors=20)))
models.append(('Decision Tree', DecisionTreeClassifier()))

In [5]:
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
import joblib
csv_file = open("Analysis.csv","w")
csv_file.write("Name" + "," + 
               "Accuracy"+ "," +
               "Misclassification Rate" + "," + 
               "f1_score" + "," + 
               "Precision" + "," + 
               "Recall" + "\n")

for name, model in models:
    print(name)
    
    model.fit(X_train,Y_train)
    print("Fitting Done")
    
    filename = "_".join(name.split(" "))+'.sav'
    joblib.dump(model, open(filename, 'wb'))
    
    Y_pred = model.predict(X_test)
    csv_file.write(name + "," +
                   str(accuracy_score(Y_test, Y_pred)) + "," +
                   str(1 - accuracy_score(Y_test, Y_pred)) + "," +
                   str(f1_score(Y_test, Y_pred, average="macro")) + "," +
                   str(precision_score(Y_test, Y_pred, average="macro")) + "," +
                   str(recall_score(Y_test, Y_pred, average="macro")) + "\n")
    
    print("Accuracy:",accuracy_score(Y_test, Y_pred))
    print("Misclassification Rate:",1 - accuracy_score(Y_test, Y_pred))
    print("f1_score:",f1_score(Y_test, Y_pred, average="macro"))
    print("Precision:",precision_score(Y_test, Y_pred, average="macro"))
    print("Recall:",recall_score(Y_test, Y_pred, average="macro"))
    print("\n")

Gaussian N-Bayes
Fitting Done
Accuracy: 0.8
Misclassification Rate: 0.19999999999999996
f1_score: 0.8056426332288401
Precision: 0.8056426332288401
Recall: 0.8056426332288401


Support Vector Machine
Fitting Done
Accuracy: 0.8
Misclassification Rate: 0.19999999999999996
f1_score: 0.8056426332288401
Precision: 0.8056426332288401
Recall: 0.8056426332288401


Random Forest
Fitting Done
Accuracy: 0.8333333333333334
Misclassification Rate: 0.16666666666666663
f1_score: 0.8383311603650586
Precision: 0.8381944444444445
Recall: 0.8387321490769767


K Nearest Neighbors
Fitting Done
Accuracy: 0.8444444444444444
Misclassification Rate: 0.15555555555555556
f1_score: 0.8493055555555555
Precision: 0.8494623655913979
Recall: 0.85022640195054


Decision Tree
Fitting Done
Accuracy: 0.8
Misclassification Rate: 0.19999999999999996
f1_score: 0.8064516129032256
Precision: 0.8084291187739464
Recall: 0.8084291187739464




In [None]:
csv_file.close()