In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, preprocessing, linear_model, neighbors, tree
from sklearn.model_selection import train_test_split

In [45]:
### Important functions and variables

features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Cabin']


def create_num_row(df, col):  # Convert string columns into numeric values. Returns dictionary 
    if df[col].dtype == np.object:
        col_dict = {k: v for v, k in enumerate(df[col].unique())}
        df[col] = df[col].map(col_dict)
        
        
def create_df(filename):
    df = pd.read_csv(filename)  # Open training data
    df = df.dropna()  # Drop NaN columns
    df['Cabin'] = df['Cabin'].astype(str).str[0]  # Isolate cabin row, remove number
    return df

In [60]:
### Create X and Y data

df = create_df("data/train.csv")

for feature in features:
    create_num_row(df, feature)

X = df[features]
y = df['Survived']

X = preprocessing.scale(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [61]:
### Support Vector Machine

clf = svm.SVC()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = clf.score(X_test, y_test)

print("SVM Accuracy: {}".format(accuracy * 100))

SVM Accuracy: 81.08108108108108


In [62]:
### Logistic Regression

clf = linear_model.LogisticRegression(n_jobs=-1)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = clf.score(X_test, y_test)

print("Logistic Regression Accuracy: {}".format(accuracy * 100))

Logistic Regression Accuracy: 75.67567567567568


In [63]:
### K Nearest Neighbors

clf = neighbors.KNeighborsClassifier(n_neighbors=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = clf.score(X_test, y_test)

print("K Nearest Neighbors Accuracy: {}".format(accuracy * 100))

K Nearest Neighbors Accuracy: 81.08108108108108


In [64]:
### Decision Tree

clf = tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = clf.score(X_test, y_test)

print("Decision Tree Accuracy: {}".format(accuracy * 100))

Decision Tree Accuracy: 91.8918918918919
