
___
# Support Vector Machines with Python

#### https://www.kaggle.com/c/leaf-classification/data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## Get the Data


In [2]:
leaf_df = pd.read_csv('train.csv')

In [3]:
leaf_df.head()

Unnamed: 0,id,species,margin1,margin2,margin3,margin4,margin5,margin6,margin7,margin8,...,texture55,texture56,texture57,texture58,texture59,texture60,texture61,texture62,texture63,texture64
0,1,Acer_Opalus,0.007812,0.023438,0.023438,0.003906,0.011719,0.009766,0.027344,0.0,...,0.007812,0.0,0.00293,0.00293,0.035156,0.0,0.0,0.004883,0.0,0.025391
1,2,Pterocarya_Stenoptera,0.005859,0.0,0.03125,0.015625,0.025391,0.001953,0.019531,0.0,...,0.000977,0.0,0.0,0.000977,0.023438,0.0,0.0,0.000977,0.039062,0.022461
2,3,Quercus_Hartwissiana,0.005859,0.009766,0.019531,0.007812,0.003906,0.005859,0.068359,0.0,...,0.1543,0.0,0.005859,0.000977,0.007812,0.0,0.0,0.0,0.020508,0.00293
3,5,Tilia_Tomentosa,0.0,0.003906,0.023438,0.005859,0.021484,0.019531,0.023438,0.0,...,0.0,0.000977,0.0,0.0,0.020508,0.0,0.0,0.017578,0.0,0.047852
4,6,Quercus_Variabilis,0.005859,0.003906,0.048828,0.009766,0.013672,0.015625,0.005859,0.0,...,0.09668,0.0,0.021484,0.0,0.0,0.0,0.0,0.0,0.0,0.03125


In [4]:
leaf_df['species'].value_counts()

Acer_Circinatum                10
Alnus_Viridis                  10
Tilia_Platyphyllos             10
Alnus_Sieboldiana              10
Quercus_Imbricaria             10
Magnolia_Heptapeta             10
Ilex_Aquifolium                10
Ilex_Cornuta                   10
Morus_Nigra                    10
Acer_Opalus                    10
Quercus_Phellos                10
Crataegus_Monogyna             10
Prunus_Avium                   10
Tilia_Tomentosa                10
Quercus_Cerris                 10
Populus_Adenopoda              10
Lithocarpus_Edulis             10
Quercus_Castaneifolia          10
Quercus_Rubra                  10
Quercus_Trojana                10
Quercus_Palustris              10
Quercus_Alnifolia              10
Acer_Rubrum                    10
Quercus_x_Hispanica            10
Quercus_Nigra                  10
Acer_Mono                      10
Cytisus_Battandieri            10
Betula_Austrosinensis          10
Quercus_Pyrenaica              10
Liriodendron_T

In [5]:
leaf_df.drop('id', axis=1, inplace=True)

In [6]:
leaf_df.head(2)

Unnamed: 0,species,margin1,margin2,margin3,margin4,margin5,margin6,margin7,margin8,margin9,...,texture55,texture56,texture57,texture58,texture59,texture60,texture61,texture62,texture63,texture64
0,Acer_Opalus,0.007812,0.023438,0.023438,0.003906,0.011719,0.009766,0.027344,0.0,0.001953,...,0.007812,0.0,0.00293,0.00293,0.035156,0.0,0.0,0.004883,0.0,0.025391
1,Pterocarya_Stenoptera,0.005859,0.0,0.03125,0.015625,0.025391,0.001953,0.019531,0.0,0.0,...,0.000977,0.0,0.0,0.000977,0.023438,0.0,0.0,0.000977,0.039062,0.022461


In [7]:
predictors = leaf_df.drop('species', axis = 1)
target = leaf_df['species']

## Train Test Split

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.30, random_state=101, stratify = target)

# Train the Support Vector Classifier

In [10]:
from sklearn.svm import SVC

In [11]:
svc = SVC()

In [12]:
svc.fit(X_train,y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

## Predictions and Evaluations

Now let's predict using the trained model.

In [13]:
predictions = svc.predict(X_test)

In [14]:
from sklearn.metrics import classification_report,confusion_matrix, accuracy_score

In [15]:
print(confusion_matrix(y_test,predictions))

[[3 0 0 ..., 0 0 0]
 [0 3 0 ..., 0 0 0]
 [0 0 3 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 3 0 0]
 [0 0 0 ..., 0 3 0]
 [0 1 0 ..., 0 0 2]]


In [16]:
print(classification_report(y_test,predictions))

                              precision    recall  f1-score   support

             Acer_Capillipes       1.00      1.00      1.00         3
             Acer_Circinatum       0.75      1.00      0.86         3
                   Acer_Mono       1.00      1.00      1.00         3
                 Acer_Opalus       0.67      0.67      0.67         3
               Acer_Palmatum       1.00      1.00      1.00         3
                 Acer_Pictum       1.00      0.67      0.80         3
             Acer_Platanoids       1.00      0.67      0.80         3
                 Acer_Rubrum       0.60      1.00      0.75         3
              Acer_Rufinerve       1.00      1.00      1.00         3
            Acer_Saccharinum       1.00      1.00      1.00         3
               Alnus_Cordata       0.75      1.00      0.86         3
          Alnus_Maximowiczii       0.75      1.00      0.86         3
                 Alnus_Rubra       1.00      0.67      0.80         3
           Alnus_Si

  'precision', 'predicted', average, warn_for)


In [17]:
print(accuracy_score(y_test, predictions))

0.774410774411


# Gridsearch

Finding the right parameters (like what C or gamma values to use) is a tricky task! But luckily, we can be a little lazy and just try a bunch of combinations and see what works best! This idea of creating a 'grid' of parameters and just trying out all the possible combinations is called a Gridsearch, this method is common enough that Scikit-learn has this functionality built in with GridSearchCV! The CV stands for cross-validation

GridSearchCV takes a dictionary that describes the parameters that should be tried and a model to train. The grid of parameters is defined as a dictionary, where the keys are the parameters and the values are the settings to be tested. 

In [18]:
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf', 'poly']} 

In [19]:
from sklearn.model_selection import GridSearchCV

One of the great things about GridSearchCV is that it is a meta-estimator. It takes an estimator like SVC, and creates a new estimator, that behaves exactly the same - in this case, like a classifier. You should add refit=True and choose verbose to whatever number you want, higher the number, the more verbose (verbose just means the text output describing the process).

In [20]:
grid = GridSearchCV(SVC(),param_grid,refit=True, verbose=3)
#grid = GridSearchCV(SVC(),param_grid,refit=True, cv =5, verbose=3)

What fit does is a bit more involved then usual. First, it runs the same loop with cross-validation, to find the best parameter combination. Once it has the best combination, it runs fit again on all data passed to fit (without cross-validation), to built a single new model using the best parameter setting.

In [21]:
# May take awhile!
grid.fit(X_train,y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] ....... C=0.1, gamma=1, kernel=rbf, score=0.747475, total=   0.1s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] ....... C=0.1, gamma=1, kernel=rbf, score=0.747475, total=   0.2s
[CV] C=0.1, gamma=1, kernel=rbf ......................................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.7s remaining:    0.0s


[CV] ....... C=0.1, gamma=1, kernel=rbf, score=0.808081, total=   0.2s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ...... C=0.1, gamma=1, kernel=poly, score=0.619529, total=   0.1s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ...... C=0.1, gamma=1, kernel=poly, score=0.671717, total=   0.1s
[CV] C=0.1, gamma=1, kernel=poly .....................................
[CV] ...... C=0.1, gamma=1, kernel=poly, score=0.681818, total=   0.1s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ..... C=0.1, gamma=0.1, kernel=rbf, score=0.750842, total=   0.1s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ..... C=0.1, gamma=0.1, kernel=rbf, score=0.737374, total=   0.2s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ..... C=0.1, gamma=0.1, kernel=rbf, score=0.803030, total=   0.2s
[CV] C=0.1, gamma=0.1, kernel=poly ...................................
[CV] .

[CV] ........ C=10, gamma=1, kernel=rbf, score=0.835017, total=   0.1s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........ C=10, gamma=1, kernel=rbf, score=0.858586, total=   0.2s
[CV] C=10, gamma=1, kernel=rbf .......................................
[CV] ........ C=10, gamma=1, kernel=rbf, score=0.914141, total=   0.1s
[CV] C=10, gamma=1, kernel=poly ......................................
[CV] ....... C=10, gamma=1, kernel=poly, score=0.619529, total=   0.1s
[CV] C=10, gamma=1, kernel=poly ......................................
[CV] ....... C=10, gamma=1, kernel=poly, score=0.671717, total=   0.1s
[CV] C=10, gamma=1, kernel=poly ......................................
[CV] ....... C=10, gamma=1, kernel=poly, score=0.681818, total=   0.1s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] ...... C=10, gamma=0.1, kernel=rbf, score=0.750842, total=   0.1s
[CV] C=10, gamma=0.1, kernel=rbf .....................................
[CV] .

[CV] . C=100, gamma=0.0001, kernel=poly, score=0.671717, total=   0.1s
[CV] C=100, gamma=0.0001, kernel=poly ................................
[CV] . C=100, gamma=0.0001, kernel=poly, score=0.681818, total=   0.1s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...... C=1000, gamma=1, kernel=rbf, score=0.868687, total=   0.2s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...... C=1000, gamma=1, kernel=rbf, score=0.893939, total=   0.2s
[CV] C=1000, gamma=1, kernel=rbf .....................................
[CV] ...... C=1000, gamma=1, kernel=rbf, score=0.954545, total=   0.2s
[CV] C=1000, gamma=1, kernel=poly ....................................
[CV] ..... C=1000, gamma=1, kernel=poly, score=0.784512, total=   0.1s
[CV] C=1000, gamma=1, kernel=poly ....................................
[CV] ..... C=1000, gamma=1, kernel=poly, score=0.803030, total=   0.2s
[CV] C=1000, gamma=1, kernel=poly ....................................
[CV] .

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:   55.0s finished


GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf', 'poly']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=3)

You can inspect the best parameters found by GridSearchCV in the best_params_ attribute, and the best estimator in the best\_estimator_ attribute:

In [22]:
grid.best_params_

{'C': 100, 'gamma': 1, 'kernel': 'rbf'}

In [23]:
grid.best_estimator_

SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [24]:
grid.best_score_

0.90043290043290047

Then you can re-run predictions on this grid object just like you would with a normal model.

In [25]:
grid_predictions = grid.predict(X_test)

In [26]:
print(confusion_matrix(y_test,grid_predictions))

[[3 0 0 ..., 0 0 0]
 [0 3 0 ..., 0 0 0]
 [0 0 3 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 3 0 0]
 [0 0 0 ..., 0 3 0]
 [0 0 0 ..., 0 0 3]]


In [27]:
print(classification_report(y_test,grid_predictions))

                              precision    recall  f1-score   support

             Acer_Capillipes       1.00      1.00      1.00         3
             Acer_Circinatum       1.00      1.00      1.00         3
                   Acer_Mono       1.00      1.00      1.00         3
                 Acer_Opalus       1.00      0.67      0.80         3
               Acer_Palmatum       1.00      1.00      1.00         3
                 Acer_Pictum       1.00      1.00      1.00         3
             Acer_Platanoids       1.00      1.00      1.00         3
                 Acer_Rubrum       0.75      1.00      0.86         3
              Acer_Rufinerve       1.00      1.00      1.00         3
            Acer_Saccharinum       1.00      1.00      1.00         3
               Alnus_Cordata       0.75      1.00      0.86         3
          Alnus_Maximowiczii       0.75      1.00      0.86         3
                 Alnus_Rubra       1.00      1.00      1.00         3
           Alnus_Si

  'precision', 'predicted', average, warn_for)


In [28]:
print(accuracy_score(y_test, grid_predictions))

0.93265993266


## Lets test on Stratified Shuffling

In [29]:
leaf_df = pd.read_csv('train.csv')

In [30]:
from sklearn.model_selection import StratifiedShuffleSplit
    
sss = StratifiedShuffleSplit(n_splits=3, test_size=0.3, random_state=10)   

#sss.get_n_splits(predictors, y)

svc = SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape=None, degree=3, gamma=1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

acc_rate = []
X = leaf_df.drop(['species'], axis = 1)
y = leaf_df['species']

for train_index, test_index in sss.split(X, y):
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]
    svc.fit(X_train, y_train)
    preds = svc.predict(X_test)
    acc_rate.append(accuracy_score(y_test, preds)) 

In [31]:
acc_rate

[0.016835016835016835, 0.010101010101010102, 0.0033670033670033669]

In [32]:
# You can also try the combination of CV and GridSearch to find the best params

In [None]:
C_range = [0.1,1, 10]
gamma_range = [1,0.1,0.01]
kernel_range = ['poly','rbf']
param_grid = dict(gamma=gamma_range, C=C_range, kernel= kernel_range)

cv = StratifiedShuffleSplit(n_splits=3, test_size=0.2, random_state=42)
grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv, verbose=2)
grid.fit(X, y)



In [None]:
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

## Comparision of various classifiers

#### https://www.kaggle.com/jeffd23/10-classifier-showdown-in-scikit-learn