In [None]:
import sklearn.model_selection as ms
import sklearn.feature_extraction
from sklearn import svm, tree, neighbors, neural_network
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV 
import pandas as pd
import numpy as np

In [None]:
train_data = pd.read_csv("clean_train_data.csv")
test_data = pd.read_csv("clean_test_data.csv")

In [None]:
# dropping target column
X = train_data.loc[:, train_data.columns != 'Survived']
y = train_data.loc[:, 'Survived']
# training on 2/3 of the data, testing on remaining third
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=10)

In [None]:
# Grid search for optimal parameters for SVC
# defining parameter range 
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf']} 
  
grid = GridSearchCV(svm.SVC(), param_grid) 
  
# fitting the model for grid search 
grid.fit(X, y) 

In [None]:
grid.best_params_

In [None]:
# SVM
svm_clf = svm.SVC(kernel='rbf', C=1000, gamma=.00001)
svm_clf.fit(X_train.values, y_train.values)
print(svm_clf.score(X_test.values, y_test.values))
y_pred = svm_clf.predict(X_test.values)
y_truth = y_test.values

In [None]:
tn, fp, fn, tp = confusion_matrix(y_truth, y_pred).ravel()
print("Confusion Matrix")
print(confusion_matrix(y_truth, y_pred, labels=[0, 1]))
print("")
print("True Negatives", tn)
print("False Positives", fp)
print("False Negatives", fn)
print("True Positives", tp)

In [None]:
# MLP model
NN_clf = neural_network.MLPClassifier()
NN_clf.fit(X.values, y.values)
print(NN_clf.score(X_test.values, y_test.values))
y_pred = NN_clf.predict(X_test.values)
y_truth = y_test.values

In [None]:
tn, fp, fn, tp = confusion_matrix(y_truth, y_pred).ravel()
print("Confusion Matrix")
print(confusion_matrix(y_truth, y_pred, labels=[0, 1]))
print("")
print("True Negatives", tn)
print("False Positives", fp)
print("False Negatives", fn)
print("True Positives", tp)

In [None]:
# Predict on the holdout set and save predictions to CSV

# predictions = svm_clf.predict(test_data.values)
predictions = NN_clf.predict(test_data.values)
pred_df = pd.DataFrame(predictions, index=test_data.index, columns=['Survived'])
pred_df.to_csv('predictions-ml.csv', header=True, sep=',')