In [19]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Load the data

In [8]:
datafile = 'data/ionosphere.data'
colnames = [i for i in range(34)]
colnames.append('class')
df = pd.read_csv(datafile, names=colnames)

In [9]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,class
0,1,0,0.99539,-0.05889,0.85243,0.02306,0.83398,-0.37708,1.0,0.0376,...,-0.51171,0.41078,-0.46168,0.21266,-0.3409,0.42267,-0.54487,0.18641,-0.453,g
1,1,0,1.0,-0.18829,0.93035,-0.36156,-0.10868,-0.93597,1.0,-0.04549,...,-0.26569,-0.20468,-0.18401,-0.1904,-0.11593,-0.16626,-0.06288,-0.13738,-0.02447,b
2,1,0,1.0,-0.03365,1.0,0.00485,1.0,-0.12062,0.88965,0.01198,...,-0.4022,0.58984,-0.22145,0.431,-0.17365,0.60436,-0.2418,0.56045,-0.38238,g
3,1,0,1.0,-0.45161,1.0,1.0,0.71216,-1.0,0.0,0.0,...,0.90695,0.51613,1.0,1.0,-0.20099,0.25682,1.0,-0.32382,1.0,b
4,1,0,1.0,-0.02401,0.9414,0.06531,0.92106,-0.23255,0.77152,-0.16399,...,-0.65158,0.1329,-0.53206,0.02431,-0.62197,-0.05707,-0.59573,-0.04608,-0.65697,g


## Split into `train` and `test`

In [11]:
train, test = train_test_split(df, )

In [15]:
xcols = df.columns[:-1]
ycol = df.columns[-1]

# Support Vector Classifier

In [16]:
svc = Pipeline([
    ('scale', StandardScaler()),
    ('svc', SVC())
])

In [20]:
parameters = {
    'svc__C': np.logspace(-3, 3),
    'svc__gamma': np.logspace(-2, 1)
}

In [28]:
gs = GridSearchCV(estimator=svc, param_grid=parameters,)

In [29]:
gs.fit(train[xcols], train[ycol])

GridSearchCV(cv=None, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('scale', StandardScaler(copy=True, with_mean=True, with_std=True)), ('svc', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))]),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'svc__C': array([1.00000e-03, 1.32571e-03, 1.75751e-03, 2.32995e-03, 3.08884e-03,
       4.09492e-03, 5.42868e-03, 7.19686e-03, 9.54095e-03, 1.26486e-02,
       1.67683e-02, 2.22300e-02, 2.94705e-02, 3.90694e-02, 5.17947e-02,
       6.86649e-02, 9.10298e-02, 1.20679e-01, 1.59986e-01, 2.1...746,
        3.72759,  4.29193,  4.94171,  5.68987,  6.55129,  7.54312,
        8.68511, 10.     ])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [30]:
gs.cv_results_



{'mean_fit_time': array([0.00399677, 0.00295297, 0.00310008, ..., 0.00437252, 0.00396514,
        0.00393709]),
 'std_fit_time': array([0.00033002, 0.00020112, 0.00024576, ..., 0.0003898 , 0.0001775 ,
        0.00040842]),
 'mean_score_time': array([0.00126632, 0.00097855, 0.00102607, ..., 0.00131885, 0.00118136,
        0.00119185]),
 'std_score_time': array([8.56825056e-05, 5.81390548e-05, 8.86084252e-05, ...,
        6.98449342e-05, 2.59624587e-05, 2.19974789e-05]),
 'param_svc__C': masked_array(data=[0.001, 0.001, 0.001, ..., 1000.0, 1000.0, 1000.0],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'param_svc__gamma': masked_array(data=[0.01, 0.011513953993264475, 0.013257113655901088, ...,
                    7.543120063354615, 8.68511373751352, 10.0],
              mask=[False, False, False, ..., False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'svc__C': 0.001, 'svc__gamm

In [31]:
gs.best_params_

{'svc__C': 1.151395399326447, 'svc__gamma': 0.04714866363457394}

In [40]:
gs.predict(train[xcols].iloc[0:5])

array(['g', 'g', 'g', 'g', 'g'], dtype=object)

In [41]:
train.iloc[0:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25,26,27,28,29,30,31,32,33,class
62,1,0,1.0,0.16801,0.99352,0.16334,0.94616,0.33347,0.91759,0.2261,...,0.55236,0.43317,0.69129,0.35684,0.76147,0.33921,0.66844,0.22101,0.78685,g
117,1,0,0.98182,0.0,0.88627,0.03131,0.86249,0.04572,0.8,0.0,...,0.22727,0.64581,0.15088,0.67273,0.02727,0.60715,0.16465,0.5884,0.17077,g
303,1,0,0.94052,-0.01531,0.9417,0.01001,0.94994,-0.01472,0.95878,-0.0106,...,-0.04476,0.92695,-0.05827,0.90342,-0.07479,0.91991,-0.07244,0.92049,-0.0742,g
310,1,0,0.93658,0.35107,0.75254,0.6564,0.45571,0.88576,0.15323,0.95776,...,-0.84951,-0.04578,-0.91221,0.2733,-0.85762,0.54827,-0.69613,0.74828,-0.44173,g
74,1,0,1.0,0.0507,1.0,0.10827,1.0,0.19498,1.0,0.28453,...,0.63942,0.59417,0.69435,0.49538,0.72684,0.47027,0.71689,0.33381,0.75243,g


In [43]:
train[train['class'] == 'g'].shape

(170, 35)

In [44]:
train[train['class'] != 'g'].shape

(93, 35)