In [902]:
import pandas as pd
import numpy as np
import joblib
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB


data = pd.read_csv("train.csv")
data = data.drop(columns = ["PassengerId","Name","Ticket","Cabin"])
data = data.dropna()



genderEncode = preprocessing.LabelEncoder().fit_transform(data["Sex"]) 
data["Sex"] = genderEncode


embarkedEncode = preprocessing.LabelEncoder().fit_transform(data["Embarked"]) 
data["Embarked"] = embarkedEncode

data["Family"] = data["SibSp"] + data["Parch"]
data["Age_Group"] = pd.cut(data["Age"],bins = [0,21,55,80], labels = [0,1,2])#0 = child, 1 = adult, 2 = old



x = data.drop(columns = ["Survived","Age","SibSp","Parch"])
y = data["Survived"]

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=.2)

sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)


RF = RandomForestClassifier(max_depth=2)
RF.fit(x_train,y_train)

DT = DecisionTreeClassifier()
DT.fit(x_train,y_train)

LR = LogisticRegression(random_state=0)
LR.fit(x_train,y_train)

KN = KNeighborsClassifier(n_neighbors = 5, metric = "minkowski", p=2)
KN.fit(x_train,y_train)

svcLin = SVC(kernel="linear")#, random_state=0
svcLin.fit(x_train,y_train)

svcRBF = SVC(kernel="rbf")#, random_state=0
svcRBF.fit(x_train,y_train)

Ga = GaussianNB()
Ga.fit(x_train,y_train)

print("RF: ",RF.score(x_test,y_test))
print("DT: ",DT.score(x_test,y_test))
print("LR: ",LR.score(x_test,y_test))
print("KN: ",KN.score(x_test,y_test))
print("SVCLIN: ",svcLin.score(x_test,y_test))
print("SVCRBF: ",svcRBF.score(x_test,y_test))
print("GA: ",Ga.score(x_test,y_test),"\n")

#class, gender,fare, embark, family, age
person = [[1,0,13,0,1,38],[1,1,13,0,1,3]]
print("RF: ",RF.predict(person))
print("DT: ",DT.predict(person))
print("LR: ",LR.predict(person))
print("KN: ",KN.predict(person))
print("SVCLIN:",svcLin.predict(person))
print("SVCRBF:",svcRBF.predict(person))
print("GA: ",Ga.predict(person))




RF:  0.7762237762237763
DT:  0.7762237762237763
LR:  0.7482517482517482
KN:  0.7692307692307693
SVCLIN:  0.7482517482517482
SVCRBF:  0.7832167832167832
GA:  0.7552447552447552 

RF:  [0 0]
DT:  [1 1]
LR:  [0 0]
KN:  [1 1]
SVCLIN: [0 0]
SVCRBF: [1 1]
GA:  [1 1]
