# Packages and Data Imports

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt
#import seaborn as sns
#%matplotlib inline

#Stats and other tools
from sklearn import metrics
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline


#Models we will test and try
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm

cv_k_global = 10 #the amount of f_folds to be used in all CV


# train_full= pd.read_csv('train.csv')
# train_full.set_index('PassengerId',inplace=True)

# quantitative = [f for f in train_full.columns if train_full.dtypes[f] != 'object']
# quantitative.remove('Survived')#Survived is target label
# #quantitative.remove('PassengerId')#PassengerId will be turned into index
# qualitative = [f for f in train_full.columns if train_full.dtypes[f] == 'object']

datasets=['1','2','deck_pred_1','deck_pred_2']

# Cross-validated hyper-parameter searches

## Logistic Regression

In [5]:
for dataset in datasets:
    print('Grid-Searching for Data-Set '+dataset)
    if('deck_pred' not in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark','Missing_Deck'],axis=1)
    if('deck_pred' in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark'],axis=1)
    X=data.drop('Survived',axis=1).as_matrix()
    Y=data['Survived'].as_matrix()
    logmodel = LogisticRegression()
    param_grid = [{
        'C': [10**x for x in range(-3,4)],
        'penalty':['l1'],
        'solver' :['liblinear','saga']},{
        'C': [10**x for x in range(-3,4)],
        'penalty':['l2'],
        'solver':['newton-cg','lbfgs','sag']}]
    gscv=GridSearchCV(logmodel,param_grid,scoring='accuracy',cv=cv_k_global,n_jobs=-1,verbose=1)
    gscv.fit(X,Y)
    results_df=pd.DataFrame(gscv.cv_results_)
    results_df.to_csv('./CV_LR_'+dataset+'.csv')

Grid-Searching for Data-Set 1
Fitting 10 folds for each of 35 candidates, totalling 350 fits
Grid-Searching for Data-Set 2
Fitting 10 folds for each of 35 candidates, totalling 350 fits


[Parallel(n_jobs=-1)]: Done 350 out of 350 | elapsed:    1.5s finished


Grid-Searching for Data-Set deck_pred_1
Fitting 10 folds for each of 35 candidates, totalling 350 fits


[Parallel(n_jobs=-1)]: Done 350 out of 350 | elapsed:    3.5s finished
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s


Grid-Searching for Data-Set deck_pred_2
Fitting 10 folds for each of 35 candidates, totalling 350 fits


[Parallel(n_jobs=-1)]: Done 350 out of 350 | elapsed:    1.5s finished
[Parallel(n_jobs=-1)]: Done 350 out of 350 | elapsed:    3.6s finished


## LR With polinomial features

In [6]:
for dataset in datasets:
    print('Grid-Searching for Data-Set '+dataset)
    if('deck_pred' not in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark','Missing_Deck'],axis=1)
    if('deck_pred' in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark'],axis=1)
    X=data.drop('Survived',axis=1).as_matrix()
    Y=data['Survived'].as_matrix()
    estimators = [('AddPoly',PolynomialFeatures()),('LR',LogisticRegression())]
    quartic_log_reg = Pipeline(estimators)
    param_grid = [{
        'AddPoly__degree':[2,3],
        'LR__C': [10**x for x in range(-3,4)],
        'LR__penalty':['l1'],
        'LR__solver' :['liblinear','saga']
        },{
        'AddPoly__degree':[2,3],
        'LR__C': [10**x for x in range(-3,4)],
        'LR__penalty':['l2'],
        'LR__solver' :['newton-cg','lbfgs','sag']
        }]
    gscv=GridSearchCV(quartic_log_reg,param_grid,scoring='accuracy',cv=cv_k_global,verbose=1,n_jobs=-1)
    gscv.fit(X,Y)
    results_df=pd.DataFrame(gscv.cv_results_)
    results_df.to_csv('./CV_LR_X_'+dataset+'.csv')

Grid-Searching for Data-Set 1
Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:  3.5min finished


Grid-Searching for Data-Set 2
Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 450 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 669 out of 700 | elapsed: 15.0min remaining:   41.8s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed: 18.2min finished


Grid-Searching for Data-Set deck_pred_1
Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s


Grid-Searching for Data-Set deck_pred_2
Fitting 10 folds for each of 70 candidates, totalling 700 fits


[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed:  4.0min finished
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 370 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 642 tasks      | elapsed: 10.6min
[Parallel(n_jobs=-1)]: Done 669 out of 700 | elapsed: 18.1min remaining:   50.3s
[Parallel(n_jobs=-1)]: Done 700 out of 700 | elapsed: 24.6min finished


## AdaBC

In [7]:
for dataset in datasets:
    print('Grid-Searching for Data-Set '+dataset)
    if('deck_pred' not in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark','Missing_Deck'],axis=1)
    if('deck_pred' in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark'],axis=1)
    X=data.drop('Survived',axis=1).as_matrix()
    Y=data['Survived'].as_matrix()
    classif_ABC = AdaBoostClassifier()
    param_grid ={'n_estimators':[10*x for x in range(1,21)],
             'learning_rate':[10**x for x in range(-4,5)],
             'algorithm':['SAMME','SAMME.R']}
    gscv=GridSearchCV(classif_ABC,param_grid,scoring='accuracy',cv=cv_k_global,n_jobs=-1,verbose=1)
    gscv.fit(X,Y)
    results_df=pd.DataFrame(gscv.cv_results_)
    results_df.to_csv('./CV_ABC_'+dataset+'.csv')

Grid-Searching for Data-Set 1
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 862 tasks      | elapsed:   11.0s
[Parallel(n_jobs=-1)]: Done 1118 tasks      | elapsed:   14.5s
[Parallel(n_jobs=-1)]: Done 1468 tasks      | elapsed:   19.0s
[Parallel(n_jobs=-1)]: Done 2182 tasks      | elapsed:   29.3s
[Parallel(n_jobs=-1)]: Done 3282 tasks      | elapsed:   44.5s


Grid-Searching for Data-Set 2
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed:   49.7s finished
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 905 tasks      | elapsed:   12.2s
[Parallel(n_jobs=-1)]: Done 1655 tasks      | elapsed:   22.3s
[Parallel(n_jobs=-1)]: Done 2705 tasks      | elapsed:   38.0s
[Parallel(n_jobs=-1)]: Done 3569 out of 3600 | elapsed:   51.8s remaining:    0.4s


Grid-Searching for Data-Set deck_pred_1
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed:   52.4s finished
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 730 tasks      | elapsed:    9.1s
[Parallel(n_jobs=-1)]: Done 1230 tasks      | elapsed:   15.5s
[Parallel(n_jobs=-1)]: Done 1930 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 2830 tasks      | elapsed:   37.7s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed:   49.1s finished


Grid-Searching for Data-Set deck_pred_2
Fitting 10 folds for each of 360 candidates, totalling 3600 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 676 tasks      | elapsed:    9.0s
[Parallel(n_jobs=-1)]: Done 926 tasks      | elapsed:   12.4s
[Parallel(n_jobs=-1)]: Done 1276 tasks      | elapsed:   17.1s
[Parallel(n_jobs=-1)]: Done 1726 tasks      | elapsed:   23.2s
[Parallel(n_jobs=-1)]: Done 2276 tasks      | elapsed:   31.5s
[Parallel(n_jobs=-1)]: Done 2926 tasks      | elapsed:   41.6s
[Parallel(n_jobs=-1)]: Done 3600 out of 3600 | elapsed:   52.7s finished


## GBC

In [8]:
for dataset in datasets:
    print('Grid-Searching for Data-Set '+dataset)
    if('deck_pred' not in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark','Missing_Deck'],axis=1)
    if('deck_pred' in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark'],axis=1)
    X=data.drop('Survived',axis=1).as_matrix()
    Y=data['Survived'].as_matrix()
    classif_GBC = GradientBoostingClassifier()
    param_grid ={'loss' : ['deviance', 'exponential'],
             'learning_rate':[10**x for x in range(-4,5)],
             'criterion':['friedman_mse','mse'],
             'n_estimators':[10*x for x in range(1,21)],
             'subsample':[x*0.1 for x in range(1,11)],
             'max_features':['auto','log2','sqrt',None]}
    gscv=GridSearchCV(classif_GBC,param_grid,scoring='accuracy',cv=cv_k_global,n_jobs=-1,verbose=1)
    gscv.fit(X,Y)
    results_df=pd.DataFrame(gscv.cv_results_)
    results_df.to_csv('./CV_GBC_'+dataset+'.csv')

Grid-Searching for Data-Set 1
Fitting 10 folds for each of 28800 candidates, totalling 288000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 1500 tasks      | elapsed:   10.0s
[Parallel(n_jobs=-1)]: Done 2500 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done 3900 tasks      | elapsed:   28.0s
[Parallel(n_jobs=-1)]: Done 5700 tasks      | elapsed:   38.3s
[Parallel(n_jobs=-1)]: Done 7900 tasks      | elapsed:   56.0s
[Parallel(n_jobs=-1)]: Done 10434 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 13434 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 16834 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done 20634 tasks      | elapsed:  2.4min
[Parallel(n_jobs=-1)]: Done 24834 tasks      | elapsed:  3.0min
[Parallel(n_jobs=-1)]: Done 29386 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 34386 tasks      | elapsed:  4.2min
[Parallel(n_jobs=-1)]: Done 39786 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done 45586 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done 51786 tasks      | 

Grid-Searching for Data-Set 2
Fitting 10 folds for each of 28800 candidates, totalling 288000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 1455 tasks      | elapsed:   10.9s
[Parallel(n_jobs=-1)]: Done 1955 tasks      | elapsed:   18.8s
[Parallel(n_jobs=-1)]: Done 3913 tasks      | elapsed:   31.3s
[Parallel(n_jobs=-1)]: Done 5713 tasks      | elapsed:   41.7s
[Parallel(n_jobs=-1)]: Done 7887 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 9998 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 13689 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 16812 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 20703 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 25032 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 28806 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 33045 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 39240 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done 43650 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 49662 tasks      | e

Grid-Searching for Data-Set deck_pred_1
Fitting 10 folds for each of 28800 candidates, totalling 288000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 1340 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 2090 tasks      | elapsed:   16.5s
[Parallel(n_jobs=-1)]: Done 3971 tasks      | elapsed:   28.8s
[Parallel(n_jobs=-1)]: Done 6224 tasks      | elapsed:   41.9s
[Parallel(n_jobs=-1)]: Done 8543 tasks      | elapsed:   59.4s
[Parallel(n_jobs=-1)]: Done 10676 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 14411 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 18173 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 23084 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 27074 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 31922 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 37286 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 43217 tasks      | elapsed:  5.3min
[Parallel(n_jobs=-1)]: Done 49274 tasks      | elapsed:  6.1min
[Parallel(n_jobs=-1)]: Done 56656 tasks      | 

Grid-Searching for Data-Set deck_pred_2
Fitting 10 folds for each of 28800 candidates, totalling 288000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 1336 tasks      | elapsed:    9.6s
[Parallel(n_jobs=-1)]: Done 2086 tasks      | elapsed:   19.7s
[Parallel(n_jobs=-1)]: Done 3913 tasks      | elapsed:   32.0s
[Parallel(n_jobs=-1)]: Done 6082 tasks      | elapsed:   45.8s
[Parallel(n_jobs=-1)]: Done 8293 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 10243 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 12745 tasks      | elapsed:  1.7min
[Parallel(n_jobs=-1)]: Done 15817 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 19018 tasks      | elapsed:  2.6min
[Parallel(n_jobs=-1)]: Done 24055 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 27505 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 32512 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done 36961 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 42331 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 48283 tasks      | 

## Random Forests

In [9]:
for dataset in datasets:
    print('Grid-Searching for Data-Set '+dataset)
    if('deck_pred' not in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark','Missing_Deck'],axis=1)
    if('deck_pred' in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark'],axis=1)
    X=data.drop('Survived',axis=1).as_matrix()
    Y=data['Survived'].as_matrix()
    rfc = RandomForestClassifier()
    param_grid ={
        'n_estimators':[10*x for x in range(1,21)],
        'criterion':['gini','entropy'],
        'max_features':['auto','log2','sqrt',None]}
    gscv=GridSearchCV(rfc,param_grid,scoring='accuracy',cv=cv_k_global,verbose=1,n_jobs=-1)
    gscv.fit(X,Y)
    results_df=pd.DataFrame(gscv.cv_results_)
    results_df.to_csv('./CV_RFC_'+dataset+'.csv')

Grid-Searching for Data-Set 1
Fitting 10 folds for each of 160 candidates, totalling 1600 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 612 tasks      | elapsed:    9.5s
[Parallel(n_jobs=-1)]: Done 1112 tasks      | elapsed:   18.1s


Grid-Searching for Data-Set 2
Fitting 10 folds for each of 160 candidates, totalling 1600 fits


[Parallel(n_jobs=-1)]: Done 1600 out of 1600 | elapsed:   28.2s finished
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 612 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 969 tasks      | elapsed:   16.9s
[Parallel(n_jobs=-1)]: Done 1319 tasks      | elapsed:   22.5s
[Parallel(n_jobs=-1)]: Done 1600 out of 1600 | elapsed:   30.3s finished


Grid-Searching for Data-Set deck_pred_1
Fitting 10 folds for each of 160 candidates, totalling 1600 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 612 tasks      | elapsed:    9.4s
[Parallel(n_jobs=-1)]: Done 1112 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done 1600 out of 1600 | elapsed:   28.4s finished


Grid-Searching for Data-Set deck_pred_2
Fitting 10 folds for each of 160 candidates, totalling 1600 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 783 tasks      | elapsed:   13.9s
[Parallel(n_jobs=-1)]: Done 1033 tasks      | elapsed:   18.0s
[Parallel(n_jobs=-1)]: Done 1383 tasks      | elapsed:   24.5s
[Parallel(n_jobs=-1)]: Done 1600 out of 1600 | elapsed:   30.7s finished


## KNN

In [10]:
for dataset in datasets:
    print('Grid-Searching for Data-Set '+dataset)
    if('deck_pred' not in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark','Missing_Deck'],axis=1)
    if('deck_pred' in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark'],axis=1)
    X=data.drop('Survived',axis=1).as_matrix()
    Y=data['Survived'].as_matrix()
    knn = KNeighborsClassifier()
    param_grid = [{
        'n_neighbors':[2*x+1 for x in range(1,51)],
        'algorithm':[ 'ball_tree', 'kd_tree','brute'],
        'p':[1,2],
        'weights':['uniform','distance'],
        'leaf_size':[2*x+1 for x in range(1,51)]}]
    gscv=GridSearchCV(knn,param_grid,scoring='accuracy',cv=cv_k_global,verbose=1,n_jobs=-1)
    gscv.fit(X,Y)
    results_df=pd.DataFrame(gscv.cv_results_)
    results_df.to_csv('./CV_KNN_'+dataset+'.csv')

Grid-Searching for Data-Set 1
Fitting 10 folds for each of 30000 candidates, totalling 300000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 1664 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 4664 tasks      | elapsed:   11.7s
[Parallel(n_jobs=-1)]: Done 8864 tasks      | elapsed:   21.1s
[Parallel(n_jobs=-1)]: Done 14264 tasks      | elapsed:   33.3s
[Parallel(n_jobs=-1)]: Done 20864 tasks      | elapsed:   47.8s
[Parallel(n_jobs=-1)]: Done 28664 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 37664 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 47864 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 59264 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 71864 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 85664 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 100664 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 109418 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 121850 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 140450 tasks  

Grid-Searching for Data-Set 2
Fitting 10 folds for each of 30000 candidates, totalling 300000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 1664 tasks      | elapsed:    6.0s
[Parallel(n_jobs=-1)]: Done 4664 tasks      | elapsed:   14.9s
[Parallel(n_jobs=-1)]: Done 8864 tasks      | elapsed:   26.9s
[Parallel(n_jobs=-1)]: Done 14264 tasks      | elapsed:   41.9s
[Parallel(n_jobs=-1)]: Done 20864 tasks      | elapsed:   59.8s
[Parallel(n_jobs=-1)]: Done 28664 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 37664 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 47864 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 59264 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 71864 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 85664 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 100664 tasks      | elapsed:  4.7min
[Parallel(n_jobs=-1)]: Done 108896 tasks      | elapsed:  5.4min
[Parallel(n_jobs=-1)]: Done 117596 tasks      | elapsed:  5.8min
[Parallel(n_jobs=-1)]: Done 126896 tasks  

Grid-Searching for Data-Set deck_pred_1
Fitting 10 folds for each of 30000 candidates, totalling 300000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 1664 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done 4664 tasks      | elapsed:   12.0s
[Parallel(n_jobs=-1)]: Done 8864 tasks      | elapsed:   21.6s
[Parallel(n_jobs=-1)]: Done 14264 tasks      | elapsed:   34.0s
[Parallel(n_jobs=-1)]: Done 20864 tasks      | elapsed:   48.6s
[Parallel(n_jobs=-1)]: Done 28664 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 37664 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 47864 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 59264 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 71864 tasks      | elapsed:  2.7min
[Parallel(n_jobs=-1)]: Done 85664 tasks      | elapsed:  3.3min
[Parallel(n_jobs=-1)]: Done 100664 tasks      | elapsed:  3.8min
[Parallel(n_jobs=-1)]: Done 109274 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done 117974 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 127274 tasks  

Grid-Searching for Data-Set deck_pred_2
Fitting 10 folds for each of 30000 candidates, totalling 300000 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 1392 tasks      | elapsed:    4.8s
[Parallel(n_jobs=-1)]: Done 3892 tasks      | elapsed:   13.1s
[Parallel(n_jobs=-1)]: Done 7392 tasks      | elapsed:   22.8s
[Parallel(n_jobs=-1)]: Done 11892 tasks      | elapsed:   36.0s
[Parallel(n_jobs=-1)]: Done 17392 tasks      | elapsed:   50.7s
[Parallel(n_jobs=-1)]: Done 23892 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done 31392 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 39892 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done 49392 tasks      | elapsed:  2.3min
[Parallel(n_jobs=-1)]: Done 59892 tasks      | elapsed:  2.8min
[Parallel(n_jobs=-1)]: Done 71392 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 83892 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 97392 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done 106462 tasks      | elapsed:  5.2min
[Parallel(n_jobs=-1)]: Done 114212 tasks    

## SVM

In [None]:
for dataset in datasets:
    print('Grid-Searching for Data-Set '+dataset)
    if('deck_pred' not in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark','Missing_Deck'],axis=1)
    if('deck_pred' in dataset):
        data=pd.read_csv('./data_train_'+dataset+'.csv',index_col='PassengerId').drop(['Missing_Embark'],axis=1)
    X=data.drop('Survived',axis=1).as_matrix()
    Y=data['Survived'].as_matrix()
    svm_class = svm.SVC()
    param_grid =[
        {'C':[10**x for x in range(-3,4)],
        'kernel':['poly'],
        'degree':[2,3,4,5],
        'decision_function_shape':['ovo','ovr']
        },
         {'C':[10**x for x in range(-3,4)],
        'kernel':['rbf','linear','sigmoid'],
        'decision_function_shape':['ovo','ovr']
         }
        ]
    gscv=GridSearchCV(svm_class,param_grid,scoring='accuracy',cv=cv_k_global,verbose=1,n_jobs=-1)
    gscv.fit(X,Y)
    results_df=pd.DataFrame(gscv.cv_results_)
    results_df.to_csv('./CV_SVM_'+dataset+'.csv')

Grid-Searching for Data-Set 1
Fitting 10 folds for each of 98 candidates, totalling 980 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 980 out of 980 | elapsed:  3.2min finished


Grid-Searching for Data-Set 2
Fitting 10 folds for each of 98 candidates, totalling 980 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.2s


Grid-Searching for Data-Set deck_pred_1
Fitting 10 folds for each of 98 candidates, totalling 980 fits


[Parallel(n_jobs=-1)]: Done 980 out of 980 | elapsed:  1.6min finished
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 980 out of 980 | elapsed:  3.0min finished


Grid-Searching for Data-Set deck_pred_2
Fitting 10 folds for each of 98 candidates, totalling 980 fits


[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.1s
