# Fazua classifier

For solving this exercise, we would use popular scikit-learn

In [5]:
#Import needed packages
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression

%matplotlib inline

In [7]:
#Load data
data = pd.read_excel('./HR-motor-samples.xlsx')

#Rename columns and preprocessing
data.columns = ['index','input1','input2','result']
data.drop(columns=['index'],inplace=True)
data['result'] = data['result'].apply(lambda value: value == 'pass')

#Divide in input and output
X = np.array(data[['input1','input2']])
Y = np.array(data['result'])


names = ["Logistic Regresion","Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]

#First, let's find a classifier that better fits out data 
classifiers = [
    LogisticRegression(solver='lbfgs'),
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

for name, clf in zip(names, classifiers):
    model = clf.fit(X, Y)
    accuracy = (clf.score(X,Y))
    

    print("For {} classifier we have {} % false negative ratio".format(name,(1-accuracy) * 100))


For Logistic Regresion classifier we have 27.428571428571423 % false negative ratio
For Nearest Neighbors classifier we have 17.14285714285714 % false negative ratio
For Linear SVM classifier we have 28.000000000000004 % false negative ratio
For RBF SVM classifier we have 0.5714285714285672 % false negative ratio
For Gaussian Process classifier we have 28.57142857142857 % false negative ratio
For Decision Tree classifier we have 17.714285714285715 % false negative ratio
For Random Forest classifier we have 16.000000000000004 % false negative ratio
For Neural Net classifier we have 42.85714285714286 % false negative ratio
For AdaBoost classifier we have 16.000000000000004 % false negative ratio
For Naive Bayes classifier we have 36.57142857142858 % false negative ratio
For QDA classifier we have 36.0 % false negative ratio


In [11]:
#As seen before, the best performance was given by Radial Basis Function kernel Support Vector Machine
rbfSVM = SVC(gamma=2, C=1)
rbfSVM.fit(X,Y)

print("Enter input 1")
a = input() 
print("Enter input 2")
b = input()
pred = np.array([[a,b]])
print("For input {} we have that result will be {}".format(pred,rbfSVM.predict(pred)))

Enter input 1
90
Enter input 2
95
For input [['90' '95']] we have that result will be [ True]
