## Import Dataset

In [1]:
import pandas as pd

dataset = pd.read_csv(
    'data/banknote_authentication.txt', 
    header=None, 
    names=['Variance', 'Skewness', 'Curtosis', 'Entropy', 'Class']
)

print(dataset.shape[0], 'records')
dataset.head()

1372 records


Unnamed: 0,Variance,Skewness,Curtosis,Entropy,Class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


## Data Preprocessing

### Prepare features and labels

In [2]:
X = dataset.loc[:, dataset.columns != 'Class']
y = dataset['Class']

### Split data into Training and Test sets

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

print('Training data:', len(X_train))
print('Test data:', len(X_test))

Training data: 1097
Test data: 275


### Feature scaling

In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Create and Train model

In [5]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

classifier = SVC(random_state=0)

grid_params = {
    'C': [0.01, 0.1, 1.0, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [1, 2, 3, 4],  # for 'poly' kernel
    'gamma': ['auto', 'scale'] # for 'poly' kernel
}

grid_search = GridSearchCV(
    estimator=classifier,
    param_grid=grid_params,
    scoring='accuracy',
    cv=5, # number of folds
    n_jobs=-1 # all available computing power
)

grid_search.fit(X_train, y_train)

means = grid_search.cv_results_['mean_test_score']
stds = grid_search.cv_results_['std_test_score']

print('Grid Search scores:\n')
for mean, std, params in zip(means, stds, grid_search.cv_results_['params']):
    print('%0.3f (+/-%0.03f) for %r' % (mean, std * 2, params))

model = grid_search.best_estimator_

best_parameters = grid_search.best_params_
print('\nBest parameters:\n', best_parameters)

best_result = grid_search.best_score_
print('\nTraining accuracy:', best_result*100,'%')

Grid Search scores:

0.974 (+/-0.021) for {'C': 0.01, 'degree': 1, 'gamma': 'auto', 'kernel': 'linear'}
0.941 (+/-0.027) for {'C': 0.01, 'degree': 1, 'gamma': 'auto', 'kernel': 'poly'}
0.924 (+/-0.019) for {'C': 0.01, 'degree': 1, 'gamma': 'auto', 'kernel': 'rbf'}
0.907 (+/-0.029) for {'C': 0.01, 'degree': 1, 'gamma': 'auto', 'kernel': 'sigmoid'}
0.974 (+/-0.021) for {'C': 0.01, 'degree': 1, 'gamma': 'scale', 'kernel': 'linear'}
0.941 (+/-0.027) for {'C': 0.01, 'degree': 1, 'gamma': 'scale', 'kernel': 'poly'}
0.923 (+/-0.021) for {'C': 0.01, 'degree': 1, 'gamma': 'scale', 'kernel': 'rbf'}
0.907 (+/-0.029) for {'C': 0.01, 'degree': 1, 'gamma': 'scale', 'kernel': 'sigmoid'}
0.974 (+/-0.021) for {'C': 0.01, 'degree': 2, 'gamma': 'auto', 'kernel': 'linear'}
0.582 (+/-0.018) for {'C': 0.01, 'degree': 2, 'gamma': 'auto', 'kernel': 'poly'}
0.924 (+/-0.019) for {'C': 0.01, 'degree': 2, 'gamma': 'auto', 'kernel': 'rbf'}
0.907 (+/-0.029) for {'C': 0.01, 'degree': 2, 'gamma': 'auto', 'kernel': 's

## Evaluate the model

In [6]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)*100
print('Test Accuracy:', acc,'%')

cm = confusion_matrix(y_test,y_pred)
print('\nConfusion matrix:\n', cm)

cr = classification_report(y_test,y_pred)
print('\nClassification report:\n', cr)

Test Accuracy: 100.0 %

Confusion matrix:
 [[157   0]
 [  0 118]]

Classification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       157
           1       1.00      1.00      1.00       118

    accuracy                           1.00       275
   macro avg       1.00      1.00      1.00       275
weighted avg       1.00      1.00      1.00       275

