In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
from preprocess import *
from useful_tools import *
from sklearn.svm import SVC
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import KFold
import matplotlib.pyplot as plt
plt.style.use('bmh')
%matplotlib inline

In [2]:
train = pd.read_csv('data_sets/preprocessed_v1.csv')

In [3]:
train_x = train.drop(['Survived'],axis=1)
train_y = train['Survived'].values

In [4]:
X_train, X_valid, Y_train, Y_valid = train_test_split(train_x, train_y, test_size=0.15, random_state=0)

In [5]:
def cross_validation(train_x,train_y,p_range,k=50,parameter='C',silence=True,**kwargs):
    errors = []
    for p in p_range:
        kfold = KFold(len(train_x), n_folds=k,shuffle=True)
        error = 0
        params = dict({parameter:p},**kwargs)
        for train_index, val_index in kfold:
            x_train = train_x.iloc[train_index]
            y_train = train_y[train_index]
            x_val = train_x.iloc[val_index]
            y_val = train_y[val_index]
            clf = SVC(**params)
            clf.fit(x_train,y_train)
            predictions = clf.predict(x_val)
            err = np.sum((predictions != y_val),dtype=float)/len(y_val)
            error += err/k
        if silence != True:
            print p,':',error
        errors.append(error)
    best = p_range[np.argmin(errors)]
    return best,errors

In [6]:
best_c = 1e-5

In [None]:
lrc = SVC(C=best_c,kernel='linear')
lrc.fit(X_train,Y_train)

In [None]:
predictions = lrc.predict(X_valid)
print error_rate(predictions,Y_valid)

In [None]:
np.savetxt('Predictions/train_svm1_predictions.csv',
           np.c_[predictions],
           delimiter=',',
           fmt='%d',
           comments='')

In [None]:
test = pd.read_csv('data_sets/preprocessed_test_v1.csv')

In [None]:
test_predictions = lrc.predict(test)
np.savetxt('Predictions/test_svm1_predictions.csv',
           np.c_[test_predictions],
           delimiter=',',
           fmt='%d',
           comments='')