In [1]:
from time import time

import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.grid_search import ParameterGrid
from sklearn.feature_extraction import DictVectorizer as DV
from sklearn.svm import SVC

from get_data import data

In [2]:
df = data()

In [3]:
X = df[ [col for col in df if col not in ['class']]]
y = df['class'].values

# Binarize the categorical data using a DictVectorizer
# This requires the data be fed in the form of Python dicts
vectorizer = DV(sparse=False)
X_binarized = vectorizer.fit_transform( X.to_dict(orient='records') )


# Split into test and train sets
X_train, X_test, y_train, y_test = train_test_split(X_binarized, y, train_size=.8, random_state=42)

In [5]:
param_grid = list(ParameterGrid({'C': [1, 10, 100, 1000], 'kernel': ['rbf']}))

In [6]:
for params in param_grid:
    
    classifier = SVC(**params)

    # Fit the model to the training data
    t0 = time()
    classifier.fit(X_train, y_train)
    t1 = time()

    accuracy = classifier.score(X_test, y_test)
    error = (1 - accuracy)

    print '\nTest error: {} Time to train: {}'.format(error, (t1-t0))
    print 'Params: {}'.format(params)


Test error: 0.246737294641 Time to train: 245.705627918
Params: {'kernel': 'rbf', 'C': 1}

Test error: 0.256870873637 Time to train: 306.675590038
Params: {'kernel': 'rbf', 'C': 10}

Test error: 0.257638569016 Time to train: 303.641685009
Params: {'kernel': 'rbf', 'C': 100}

Test error: 0.257638569016 Time to train: 326.941473961
Params: {'kernel': 'rbf', 'C': 1000}


In [8]:
param_grid = list(ParameterGrid({'C': np.linspace(.1,.9, 9).tolist(), 'kernel': ['rbf']}))

In [9]:
for params in param_grid:
    
    classifier = SVC(**params)

    # Fit the model to the training data
    t0 = time()
    classifier.fit(X_train, y_train)
    t1 = time()

    accuracy = classifier.score(X_test, y_test)
    error = (1 - accuracy)

    print '\nTest error: {} Time to train: {}'.format(error, (t1-t0))
    print 'Params: {}'.format(params)


Test error: 0.241209887916 Time to train: 198.407383204
Params: {'kernel': 'rbf', 'C': 0.1}

Test error: 0.241209887916 Time to train: 229.189439058
Params: {'kernel': 'rbf', 'C': 0.2}

Test error: 0.241209887916 Time to train: 231.348419905
Params: {'kernel': 'rbf', 'C': 0.30000000000000004}

Test error: 0.240595731614 Time to train: 337.798237085
Params: {'kernel': 'rbf', 'C': 0.4}

Test error: 0.240288653462 Time to train: 260.568063021
Params: {'kernel': 'rbf', 'C': 0.5}

Test error: 0.23967449716 Time to train: 213.04496789
Params: {'kernel': 'rbf', 'C': 0.6}

Test error: 0.239367419008 Time to train: 215.627848864
Params: {'kernel': 'rbf', 'C': 0.7000000000000001}

Test error: 0.240595731614 Time to train: 217.814664125
Params: {'kernel': 'rbf', 'C': 0.8}

Test error: 0.244127130355 Time to train: 218.145750999
Params: {'kernel': 'rbf', 'C': 0.9}
