In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.svm import SVC
from utilities import visualize_classifier
import seaborn as sns

In [2]:
data = pd.read_csv("data_banknote_authentication.txt")

In [3]:
data.shape

(1371, 5)

In [4]:
data.columns =['F1', 'F2', 'F3', 'F4','Class']

In [5]:

X = data.drop('Class', axis=1)
y = data['Class']

In [6]:
X.head()

Unnamed: 0,F1,F2,F3,F4
0,4.5459,8.1674,-2.4586,-1.4621
1,3.866,-2.6383,1.9242,0.10645
2,3.4566,9.5228,-4.0112,-3.5944
3,0.32924,-4.4552,4.5718,-0.9888
4,4.3684,9.6718,-3.9606,-3.1625


In [7]:
y.head()


0    0
1    0
2    0
3    0
4    0
Name: Class, dtype: int64

In [8]:
class_0 = (X[y==0])
class_1 = (X[y==1])


In [9]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 5)

In [10]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear',C=1.0)
svclassifier.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [11]:
y_pred = svclassifier.predict(X_test)

In [12]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_test,y_pred))
print(classification_report(y_test,y_pred))

[[212   3]
 [  7 190]]
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       215
           1       0.98      0.96      0.97       197

    accuracy                           0.98       412
   macro avg       0.98      0.98      0.98       412
weighted avg       0.98      0.98      0.98       412



In [13]:
from sklearn.model_selection import GridSearchCV 

# defining parameter range 
param_grid = {'C': [0.1, 0.2, 0.3, 1, 5, 8, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1000], 
      'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 0.00001], 
      'kernel': ['rbf']} 

grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3) 

In [14]:
# fitting the model for grid search 
grid.fit(X_train, y_train)

Fitting 5 folds for each of 102 candidates, totalling 510 fits
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.698, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.672, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.677, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.672, total=   0.0s
[CV] C=0.1, gamma=1, kernel=rbf ......................................
[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.702, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=1.000, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ 

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s


[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.995, total=   0.0s
[CV] C=0.1, gamma=0.1, kernel=rbf ....................................
[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.979, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.990, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.995, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.995, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.984, total=   0.0s
[CV] C=0.1, gamma=0.01, kernel=rbf ...................................
[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.963, total=   0.0s
[CV] C

[Parallel(n_jobs=1)]: Done 510 out of 510 | elapsed:    8.4s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [0.1, 0.2, 0.3, 1, 5, 8, 10, 20, 30, 40, 50, 60,
                               70, 80, 90, 100, 1000],
                         'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 1e-05],
                         'kernel': ['rbf']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [15]:
# print best parameter after tuning 
print(grid.best_params_) 

# print how our model looks after hyper-parameter tuning 
print(grid.best_estimator_) 

{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)


In [16]:
grid_predictions = grid.predict(X_test) 

# print classification report 
print(confusion_matrix(y_test,grid_predictions))
print(classification_report(y_test, grid_predictions))

[[215   0]
 [  0 197]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       215
           1       1.00      1.00      1.00       197

    accuracy                           1.00       412
   macro avg       1.00      1.00      1.00       412
weighted avg       1.00      1.00      1.00       412



In [17]:
from sklearn.model_selection import GridSearchCV 

# defining parameter range 
param_grid = {'kernel': ['poly', 'rbf', 'sigmoid', 'linear'], 
      'C': [1, 2, 3, 4, 5, 6]}

In [18]:
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)

In [19]:
grid.fit(X_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s


Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] C=1, kernel=poly ................................................
[CV] .................... C=1, kernel=poly, score=0.974, total=   0.0s
[CV] C=1, kernel=poly ................................................
[CV] .................... C=1, kernel=poly, score=0.974, total=   0.0s
[CV] C=1, kernel=poly ................................................
[CV] .................... C=1, kernel=poly, score=0.974, total=   0.0s
[CV] C=1, kernel=poly ................................................
[CV] .................... C=1, kernel=poly, score=0.974, total=   0.0s
[CV] C=1, kernel=poly ................................................
[CV] .................... C=1, kernel=poly, score=0.942, total=   0.0s
[CV] C=1, kernel=rbf .................................................
[CV] ..................... C=1, kernel=rbf, score=1.000, total=   0.0s
[CV] C=1, kernel=rbf .................................................
[CV] ..........

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:    1.5s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='rbf', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [1, 2, 3, 4, 5, 6],
                         'kernel': ['poly', 'rbf', 'sigmoid', 'linear']},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [20]:
# print best parameter after tuning 
print(grid.best_params_) 

# print how our model looks after hyper-parameter tuning 
print(grid.best_estimator_)

{'C': 3, 'kernel': 'rbf'}
SVC(C=3, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)


In [21]:
grid_predictions = grid.predict(X_test) 

# print classification report 
print(confusion_matrix(y_test,grid_predictions))
print(classification_report(y_test, grid_predictions))

[[215   0]
 [  0 197]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       215
           1       1.00      1.00      1.00       197

    accuracy                           1.00       412
   macro avg       1.00      1.00      1.00       412
weighted avg       1.00      1.00      1.00       412

