In [57]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [48]:
dataset = pd.read_csv('../dataset/train_preprocessing.csv')
dataset.head()

Unnamed: 0,Survived,Pclass,Sex,Age,Fare,Embarked,Relatives,Alone
0,0,3,1,22,7.25,0,1,0
1,1,1,0,38,71.2833,1,1,0
2,1,3,0,26,7.925,0,0,1
3,1,1,0,35,53.1,0,1,0
4,0,3,1,35,8.05,0,0,1


In [49]:
X = dataset.drop('Survived', axis=1)
Y = dataset.Survived

In [50]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=101)

In [51]:
steps = [('scaller', MinMaxScaler()),
         ('clf', RandomForestClassifier())]

predictor = Pipeline(steps)
predictor.fit(x_train, y_train)

print(predictor.score(x_train, y_train))
print(accuracy_score(predictor.predict(x_test), y_test))
print(confusion_matrix(predictor.predict(x_test), y_test))
print(classification_report(predictor.predict(x_test), y_test))

0.9864864864864865
0.8295964125560538
[[119  21]
 [ 17  66]]
              precision    recall  f1-score   support

           0       0.88      0.85      0.86       140
           1       0.76      0.80      0.78        83

    accuracy                           0.83       223
   macro avg       0.82      0.82      0.82       223
weighted avg       0.83      0.83      0.83       223



In [52]:
steps = [('scaller', MinMaxScaler()),
         ('clf', LogisticRegression())]

predictor = Pipeline(steps)
predictor.fit(x_train, y_train)

print(predictor.score(x_train, y_train))
print(accuracy_score(predictor.predict(x_test), y_test))
print(confusion_matrix(predictor.predict(x_test), y_test))
print(classification_report(predictor.predict(x_test), y_test))

0.8018018018018018
0.820627802690583
[[125  29]
 [ 11  58]]
              precision    recall  f1-score   support

           0       0.92      0.81      0.86       154
           1       0.67      0.84      0.74        69

    accuracy                           0.82       223
   macro avg       0.79      0.83      0.80       223
weighted avg       0.84      0.82      0.83       223



In [64]:
steps = [('scaller', MinMaxScaler()),
         ('clf', SVC())]

predictor = Pipeline(steps)
predictor.fit(x_train, y_train)

print(predictor.score(x_train, y_train))
print(accuracy_score(predictor.predict(x_test), y_test))
print(pd.DataFrame(confusion_matrix(predictor.predict(x_test), y_test)))
print(classification_report(predictor.predict(x_test), y_test))

pd.DataFrame(confusion_matrix(predictor.predict(x_test), y_test), columns=['Predict 0', 'Predict 1'], index=['Actual 0', 'Actual 1'])

0.8123123123123123
0.7982062780269058
     0   1
0  128  37
1    8  50
              precision    recall  f1-score   support

           0       0.94      0.78      0.85       165
           1       0.57      0.86      0.69        58

    accuracy                           0.80       223
   macro avg       0.76      0.82      0.77       223
weighted avg       0.85      0.80      0.81       223



Unnamed: 0,Predict 0,Predict 1
Actual 0,128,37
Actual 1,8,50


In [60]:
pd.DataFrame(confusion_matrix(predictor.predict(x_test), y_test), column=[''])

Unnamed: 0,0,1
0,128,37
1,8,50


In [54]:
steps = [('scaller', MinMaxScaler()),
         ('clf', MultinomialNB())]

predictor = Pipeline(steps)
predictor.fit(x_train, y_train)

print(predictor.score(x_train, y_train))
print(accuracy_score(predictor.predict(x_test), y_test))
print(confusion_matrix(predictor.predict(x_test), y_test))
print(classification_report(predictor.predict(x_test), y_test))

0.7027027027027027
0.6995515695067265
[[131  62]
 [  5  25]]
              precision    recall  f1-score   support

           0       0.96      0.68      0.80       193
           1       0.29      0.83      0.43        30

    accuracy                           0.70       223
   macro avg       0.63      0.76      0.61       223
weighted avg       0.87      0.70      0.75       223



In [56]:
steps = [('scaller', MinMaxScaler()),
         ('clf', KNeighborsClassifier())]

predictor = Pipeline(steps)
predictor.fit(x_train, y_train)

print(predictor.score(x_train, y_train))
print(accuracy_score(predictor.predict(x_test), y_test))
print(confusion_matrix(predictor.predict(x_test), y_test))
print(classification_report(predictor.predict(x_test), y_test))

0.8498498498498499
0.8071748878923767
[[119  26]
 [ 17  61]]
              precision    recall  f1-score   support

           0       0.88      0.82      0.85       145
           1       0.70      0.78      0.74        78

    accuracy                           0.81       223
   macro avg       0.79      0.80      0.79       223
weighted avg       0.81      0.81      0.81       223



In [58]:
steps = [('scaller', MinMaxScaler()),
         ('clf', DecisionTreeClassifier())]

predictor = Pipeline(steps)
predictor.fit(x_train, y_train)

print(predictor.score(x_train, y_train))
print(accuracy_score(predictor.predict(x_test), y_test))
print(confusion_matrix(predictor.predict(x_test), y_test))
print(classification_report(predictor.predict(x_test), y_test))

0.9864864864864865
0.726457399103139
[[103  28]
 [ 33  59]]
              precision    recall  f1-score   support

           0       0.76      0.79      0.77       131
           1       0.68      0.64      0.66        92

    accuracy                           0.73       223
   macro avg       0.72      0.71      0.72       223
weighted avg       0.72      0.73      0.73       223



In [66]:
data = ['0.7982062780269058', '0.6995515695067265', '0.8071748878923767', '0.8295964125560538']
index = ['SVM', 'Naive Bayes', 'KNN', 'Random Forest']
column = ['Accuracy']
pd.DataFrame(data, index, column).sort_values('Accuracy')

Unnamed: 0,Accuracy
Naive Bayes,0.6995515695067265
SVM,0.7982062780269058
KNN,0.8071748878923767
Random Forest,0.8295964125560538
