In [420]:
import pandas as pd

# import train and test files
csv_train = pd.read_csv('train.csv', sep=',')
csv_test = pd.read_csv('test.csv', sep=',')
csv_train.head(1)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S


In [421]:
csv_train.describe()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
count,891.0,891.0,891.0,714.0,891.0,891.0,891.0
mean,446.0,0.383838,2.308642,29.699118,0.523008,0.381594,32.204208
std,257.353842,0.486592,0.836071,14.526497,1.102743,0.806057,49.693429
min,1.0,0.0,1.0,0.42,0.0,0.0,0.0
25%,223.5,0.0,2.0,20.125,0.0,0.0,7.9104
50%,446.0,0.0,3.0,28.0,0.0,0.0,14.4542
75%,668.5,1.0,3.0,38.0,1.0,0.0,31.0
max,891.0,1.0,3.0,80.0,8.0,6.0,512.3292


In [422]:
csv_test.describe()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare
count,418.0,418.0,332.0,418.0,418.0,417.0
mean,1100.5,2.26555,30.27259,0.447368,0.392344,35.627188
std,120.810458,0.841838,14.181209,0.89676,0.981429,55.907576
min,892.0,1.0,0.17,0.0,0.0,0.0
25%,996.25,1.0,21.0,0.0,0.0,7.8958
50%,1100.5,3.0,27.0,0.0,0.0,14.4542
75%,1204.75,3.0,39.0,1.0,0.0,31.5
max,1309.0,3.0,76.0,8.0,9.0,512.3292


In [423]:
# start building X_data. Filter from the csv_train, only the OK fields
X_data_train = csv_train.filter(items=["Age", "SibSp", "Parch", "Fare"])
X_data_test = csv_test.filter(items=["Age", "SibSp", "Parch", "Fare"])

In [424]:
# let's also create our y_data_train for fitting and scoring purposes
y_data_train = csv_train.filter(items=["Survived"]).values
y_data_train = y_data_train.reshape(-1,1)

In [425]:
# concatenate train and test together. I HOPE THE INDICES DON'T CHANGE
X_data = pd.concat([X_data_train, X_data_test], axis=0)
print("X_data length :" + str(X_data.shape[0]))

X_data length :1309


In [426]:
# now we want to apply regex to separate the title from the the rest of the name with the help of regex
import re

# first, we get store the column values from each of original dataframes
passenger_titles_train = csv_train['Name'].values
passenger_titles_test = csv_test['Name'].values

# now we filter the title of each passenger with the help of RegEx
regex = r', (.*)\. '
for name_index in range(0, len(passenger_titles_train)):
    passenger_titles_train[name_index] = re.search(regex, passenger_titles_train[name_index]).group(1)

for name_index in range(0, len(passenger_titles_test)):
    passenger_titles_test[name_index] = re.search(regex, passenger_titles_test[name_index]).group(1)

# for some reason, the dataframe itself was changed, so the passenger names are already up to date.
# let's test this hypothesis
csv_train.head(1)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,Mr,male,22.0,1,0,A/5 21171,7.25,,S


In [427]:
# now we just need to concatenate both dfs to one single column and add them to the X_data
temp_df_titles = pd.concat([csv_train['Name'], csv_test['Name']], axis=0)
# X_data = pd.concat([temp_df_titles, X_data], axis=1)
print(X_data.shape)
X_data.describe()


(1309, 4)


Unnamed: 0,Age,SibSp,Parch,Fare
count,1046.0,1309.0,1309.0,1308.0
mean,29.881138,0.498854,0.385027,33.295479
std,14.413493,1.041658,0.86556,51.758668
min,0.17,0.0,0.0,0.0
25%,21.0,0.0,0.0,7.8958
50%,28.0,0.0,0.0,14.4542
75%,39.0,1.0,0.0,31.275
max,80.0,8.0,9.0,512.3292


In [428]:
# now, we need to transform all the categorical features into numerical
from sklearn.preprocessing import LabelEncoder

# we'll initiate Label Encoder and create useful variables for this process
le = LabelEncoder()

# list of columns we want to tranform, concatenate both csv files
cols_to_label = ['Sex', 'Name']
train_test_concat = pd.concat([csv_train, csv_test], axis=0)

# iterate through columns selected above and fit_transform each of them
for col in cols_to_label:
    le.fit(train_test_concat[col].values)
    train_test_concat[col] = le.transform(train_test_concat[col])

#let's confirm that Sex and Name are now categorical values
train_test_concat.describe()

Unnamed: 0,Age,Fare,Name,Parch,PassengerId,Pclass,Sex,SibSp,Survived
count,1046.0,1308.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,891.0
mean,29.881138,33.295479,11.278839,0.385027,655.0,2.294882,0.644003,0.498854,0.383838
std,14.413493,51.758668,1.856311,0.86556,378.020061,0.837836,0.478997,1.041658,0.486592
min,0.17,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
25%,21.0,7.8958,9.0,0.0,328.0,2.0,0.0,0.0,0.0
50%,28.0,14.4542,12.0,0.0,655.0,3.0,1.0,0.0,0.0
75%,39.0,31.275,12.0,0.0,982.0,3.0,1.0,1.0,1.0
max,80.0,512.3292,18.0,9.0,1309.0,3.0,1.0,8.0,1.0


In [429]:
train_test_concat.head(6)

Unnamed: 0,Age,Cabin,Embarked,Fare,Name,Parch,PassengerId,Pclass,Sex,SibSp,Survived,Ticket
0,22.0,,S,7.25,12,0,1,3,1,1,0.0,A/5 21171
1,38.0,C85,C,71.2833,13,0,2,1,0,1,1.0,PC 17599
2,26.0,,S,7.925,9,0,3,3,0,0,1.0,STON/O2. 3101282
3,35.0,C123,S,53.1,13,0,4,1,0,1,1.0,113803
4,35.0,,S,8.05,12,0,5,3,1,0,0.0,373450
5,,,Q,8.4583,12,0,6,3,1,0,0.0,330877


In [430]:
#### transform all the numerical/categorical features into a column for each category of its own
from sklearn.preprocessing import OneHotEncoder

# now we use onehotencoder on Sex, Pclass and Name to create column for the category of each feature
enc = OneHotEncoder(sparse=False)
cols_to_ohe = ['Sex', 'Pclass', 'Name']
for col in cols_to_ohe:
    data = train_test_concat[[col]]
    enc.fit(data)
    temp = enc.transform(data[[col]])
    temp = pd.DataFrame(temp,columns=[(col+"_"+str(i)) for i in data[col].value_counts().index])
    temp = temp.set_index(train_test_concat.index.values)
    X_data = pd.concat([X_data, temp], axis=1)

# let's print the head of X_data
X_data.describe()

Unnamed: 0,Age,SibSp,Parch,Fare,Sex_1,Sex_0,Pclass_3,Pclass_1,Pclass_2,Name_12,...,Name_7,Name_5,Name_2,Name_3,Name_18,Name_6,Name_17,Name_11,Name_14,Name_0
count,1046.0,1309.0,1309.0,1308.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,...,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0
mean,29.881138,0.498854,0.385027,33.295479,0.355997,0.644003,0.246753,0.211612,0.541635,0.000764,...,0.198625,0.001528,0.000764,0.578304,0.149733,0.000764,0.001528,0.006112,0.000764,0.000764
std,14.413493,1.041658,0.86556,51.758668,0.478997,0.478997,0.431287,0.408607,0.498454,0.027639,...,0.399117,0.039073,0.027639,0.494019,0.356946,0.027639,0.039073,0.077967,0.027639,0.027639
min,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,21.0,0.0,0.0,7.8958,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,28.0,0.0,0.0,14.4542,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,39.0,1.0,0.0,31.275,1.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,80.0,8.0,9.0,512.3292,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [431]:
# imputer on columns with NaN
from sklearn.preprocessing import Imputer

# X_data still has csv_trian and csv_test data (basically all), so we can run the imputer on it, before splitting
# define which columns have NaN values
rows_imputer = ['Age', 'Fare']

# define, fit the Imputer
imputer = Imputer()
imputer.fit(X_data[rows_imputer])

# transform and match in oneline
X_data[rows_imputer] = imputer.transform(X_data[rows_imputer])
X_data.describe()

Unnamed: 0,Age,SibSp,Parch,Fare,Sex_1,Sex_0,Pclass_3,Pclass_1,Pclass_2,Name_12,...,Name_7,Name_5,Name_2,Name_3,Name_18,Name_6,Name_17,Name_11,Name_14,Name_0
count,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,...,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0
mean,29.881138,0.498854,0.385027,33.295479,0.355997,0.644003,0.246753,0.211612,0.541635,0.000764,...,0.198625,0.001528,0.000764,0.578304,0.149733,0.000764,0.001528,0.006112,0.000764,0.000764
std,12.883193,1.041658,0.86556,51.738879,0.478997,0.478997,0.431287,0.408607,0.498454,0.027639,...,0.399117,0.039073,0.027639,0.494019,0.356946,0.027639,0.039073,0.077967,0.027639,0.027639
min,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,22.0,0.0,0.0,7.8958,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,29.881138,0.0,0.0,14.4542,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,35.0,1.0,0.0,31.275,1.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,80.0,8.0,9.0,512.3292,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [432]:
X_data.describe()

Unnamed: 0,Age,SibSp,Parch,Fare,Sex_1,Sex_0,Pclass_3,Pclass_1,Pclass_2,Name_12,...,Name_7,Name_5,Name_2,Name_3,Name_18,Name_6,Name_17,Name_11,Name_14,Name_0
count,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,...,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0
mean,29.881138,0.498854,0.385027,33.295479,0.355997,0.644003,0.246753,0.211612,0.541635,0.000764,...,0.198625,0.001528,0.000764,0.578304,0.149733,0.000764,0.001528,0.006112,0.000764,0.000764
std,12.883193,1.041658,0.86556,51.738879,0.478997,0.478997,0.431287,0.408607,0.498454,0.027639,...,0.399117,0.039073,0.027639,0.494019,0.356946,0.027639,0.039073,0.077967,0.027639,0.027639
min,0.17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,22.0,0.0,0.0,7.8958,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,29.881138,0.0,0.0,14.4542,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,35.0,1.0,0.0,31.275,1.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,80.0,8.0,9.0,512.3292,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [433]:
# feature scalling
from sklearn.preprocessing import MinMaxScaler

# define the scaller
scaller = MinMaxScaler()

for col in X_data.columns.values:
    #print(col)
    #print(X_data[col].describe())
    scaller.fit(X_data[col].values.reshape(-1,1))
    X_data[col] = scaller.transform(X_data[col].values.reshape(-1,1))

X_data.describe()



Unnamed: 0,Age,SibSp,Parch,Fare,Sex_1,Sex_0,Pclass_3,Pclass_1,Pclass_2,Name_12,...,Name_7,Name_5,Name_2,Name_3,Name_18,Name_6,Name_17,Name_11,Name_14,Name_0
count,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,...,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0,1309.0
mean,0.37218,0.062357,0.042781,0.064988,0.355997,0.644003,0.246753,0.211612,0.541635,0.000764,...,0.198625,0.001528,0.000764,0.578304,0.149733,0.000764,0.001528,0.006112,0.000764,0.000764
std,0.161383,0.130207,0.096173,0.100988,0.478997,0.478997,0.431287,0.408607,0.498454,0.027639,...,0.399117,0.039073,0.027639,0.494019,0.356946,0.027639,0.039073,0.077967,0.027639,0.027639
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.273456,0.0,0.0,0.015412,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.37218,0.0,0.0,0.028213,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.436302,0.125,0.0,0.061045,1.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [434]:
X_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1309 entries, 0 to 417
Data columns (total 28 columns):
Age         1309 non-null float64
SibSp       1309 non-null float64
Parch       1309 non-null float64
Fare        1309 non-null float64
Sex_1       1309 non-null float64
Sex_0       1309 non-null float64
Pclass_3    1309 non-null float64
Pclass_1    1309 non-null float64
Pclass_2    1309 non-null float64
Name_12     1309 non-null float64
Name_9      1309 non-null float64
Name_13     1309 non-null float64
Name_8      1309 non-null float64
Name_16     1309 non-null float64
Name_4      1309 non-null float64
Name_1      1309 non-null float64
Name_15     1309 non-null float64
Name_10     1309 non-null float64
Name_7      1309 non-null float64
Name_5      1309 non-null float64
Name_2      1309 non-null float64
Name_3      1309 non-null float64
Name_18     1309 non-null float64
Name_6      1309 non-null float64
Name_17     1309 non-null float64
Name_11     1309 non-null float64
Name_14   

In [435]:
X_data.head(10)

Unnamed: 0,Age,SibSp,Parch,Fare,Sex_1,Sex_0,Pclass_3,Pclass_1,Pclass_2,Name_12,...,Name_7,Name_5,Name_2,Name_3,Name_18,Name_6,Name_17,Name_11,Name_14,Name_0
0,0.273456,0.125,0.0,0.014151,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.473882,0.125,0.0,0.139136,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,0.323563,0.0,0.0,0.015469,1.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.436302,0.125,0.0,0.103644,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,0.436302,0.0,0.0,0.015713,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.37218,0.0,0.0,0.01651,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.674308,0.0,0.0,0.101229,0.0,1.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.022924,0.375,0.111111,0.041136,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.336089,0.0,0.222222,0.021731,1.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
9,0.173243,0.125,0.0,0.058694,1.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [436]:
# is there something else we can try?

In [437]:
# we need to separate the known that from the unseen data from X_data.

# store the lenght of both train and test for later separation
train_length = X_data_train.shape[0]
test_length = X_data_test.shape[0]
print("train length: " + str(train_length))
print("test length: " + str(test_length))

# for this, we need the initial length of the known data 
X_data_train = X_data[:train_length]
X_data_train.shape
X_data_test = X_data[train_length:]
X_data_test.shape

train length: 891
test length: 418


(418, 28)

In [438]:
# create a "local" train and test with the csv_train data. we'll call the test portion, a "cross validation cv"
from sklearn.model_selection import train_test_split
from random import randint

# define the random state
randstate = randint(0,100)

# now we use the model_selection to split our data in trainning and cross validation
X_train, X_cv, y_train, y_cv = train_test_split(X_data_train, y_data_train, test_size=0.7, random_state=randstate)

# transform X_train, X_cv, X_data_train into Numpy
X_train = X_train.values
X_cv = X_cv.values
X_data_test = X_data_test.values

In [439]:
y_cv = y_cv.flatten()
print(y_temp.shape)
print(y_temp.ravel())
print(y_temp.shape)
y_temp = y_cv

(446,)
[1 0 0 1 0 0 1 0 0 1 0 0 1 1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 1 0 0 0 1 1 0 0 0 0 1 1 0 0 1 1 1 0 0 0 1 0 0 1 0 0 1 1 1 0 0 1 0 1 1
 1 0 0 0 0 0 1 1 0 0 0 1 1 1 0 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 1 0 1 0 0 1 0
 0 0 0 0 1 0 1 1 1 0 0 0 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 0
 0 0 0 1 0 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 1 1 0 0 0 0 1 0 0 1 0 0 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 0 0 1
 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 0 1 1 0 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1 0 0 0
 1 1 0 0 0 1 0 1 1 1 0 0 1 1 0 1 1 0 0 1 1 0 1 0 1 0 1 1 0 0 1 1 0 0 0 0 1
 1 0 0 1 0 1 1 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 0 1 0 0 0 0 1 0
 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 1
 0 0 0 0 1 0 0 1 0 0 1 1 1 0 0 1 0 0 0 0 1 1 1 0 1 0 1 0 0 0 0 1 0 1 1 0 0
 1 1 1 1 1 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 1 0 1 1 1 0 1 0 1 0 0 1 1 0 0 0
 0 1]
(446,)


In [440]:
X_cv.shape

(446, 28)

In [441]:
c_values = [0.01, 0.1, 1, 10]
kernel_types = ['linear', 'poly', 'rbf', 'sigmoid']

## Now we try the models

#### LINEAR REGRESSION

In [442]:
# start trying with linear regression
from sklearn.linear_model import LogisticRegression
import numpy as np

# for some reason .reshape(-1,1) to transoform into a 1D array, isn't working, so we create a copy of y_train
# and y_cv with .flatten() numpy methon.
y_train = y_train.flatten()
y_cv = y_cv.flatten()

# start a loop to itirate over all the parameters we are trying out
# i change y_train first due to an error that tells me to use gravel on the 
for c in c_values:
    clf = LogisticRegression(C=c, random_state=randstate)
    clf.fit(X_train, y_train)
    score = clf.score(X_cv, y_cv)
    print("Linear Regression | C = %f | Score = %f"% (c, score))

Linear Regression | C = 0.010000 | Score = 0.793722
Linear Regression | C = 0.100000 | Score = 0.840807
Linear Regression | C = 1.000000 | Score = 0.836323
Linear Regression | C = 10.000000 | Score = 0.827354


#### SUPPORT VECTOR MACHINES

In [443]:
# continue our models test with SVMs
from sklearn.svm import SVC

for c in c_values:
    for kern in kernel_types:
        clf = SVC(C=c, kernel=kern, gamma='auto')
        clf.fit(X_train, y_train)
        score = clf.score(X_cv, y_cv)
        print("Support Vector Machines | C = %f | Kernel = %s | Score = %f"% (c, kern, score))

Support Vector Machines | C = 0.010000 | Kernel = linear | Score = 0.820628
Support Vector Machines | C = 0.010000 | Kernel = poly | Score = 0.607623
Support Vector Machines | C = 0.010000 | Kernel = rbf | Score = 0.607623
Support Vector Machines | C = 0.010000 | Kernel = sigmoid | Score = 0.607623
Support Vector Machines | C = 0.100000 | Kernel = linear | Score = 0.804933
Support Vector Machines | C = 0.100000 | Kernel = poly | Score = 0.607623
Support Vector Machines | C = 0.100000 | Kernel = rbf | Score = 0.820628
Support Vector Machines | C = 0.100000 | Kernel = sigmoid | Score = 0.607623
Support Vector Machines | C = 1.000000 | Kernel = linear | Score = 0.816143
Support Vector Machines | C = 1.000000 | Kernel = poly | Score = 0.607623
Support Vector Machines | C = 1.000000 | Kernel = rbf | Score = 0.811659
Support Vector Machines | C = 1.000000 | Kernel = sigmoid | Score = 0.811659
Support Vector Machines | C = 10.000000 | Kernel = linear | Score = 0.840807
Support Vector Machines