# Imports

In [1]:
import time
import psutil
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.svm import SVC
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from sklearn.decomposition import PCA
from scipy.stats import chi2_contingency
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Loading the dataset

In [2]:
!rm -rf ./ENCS5141Datasets
!git clone https://github.com/mkjubran/ENCS5141Datasets.git

Cloning into 'ENCS5141Datasets'...
remote: Enumerating objects: 60, done.[K
remote: Counting objects: 100% (60/60), done.[K
remote: Compressing objects: 100% (51/51), done.[K
remote: Total 60 (delta 14), reused 50 (delta 7), pack-reused 0[K
Unpacking objects: 100% (60/60), 22.99 MiB | 9.97 MiB/s, done.


In [3]:
df = pd.read_csv("/kaggle/working/ENCS5141Datasets/BankNotesDataset.csv",index_col=0)
df.describe()

Unnamed: 0,v_0,v_1,v_2,v_3,v_4,v_5,v_6,v_7,v_8,v_9,...,v_246,v_247,v_248,v_249,v_250,v_251,v_252,v_253,v_254,v_255
count,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,...,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0
mean,1.212995,1.204855,1.614074,1.603912,1.754493,0.772892,1.061728,1.029191,1.399374,1.379324,...,1.360101,1.075888,1.420618,1.672021,0.886281,1.193987,0.915244,1.033586,1.188619,1.660768
std,1.46075,1.417384,1.578595,1.633488,1.828556,1.164312,1.49337,1.475949,1.591379,1.561145,...,1.491093,1.435099,1.661894,1.688684,1.340346,1.577617,1.335918,1.395005,1.461009,1.620754
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.081231
50%,0.643254,0.703254,1.276952,1.247524,1.294879,0.0,0.240614,0.31988,0.873371,0.891365,...,0.927966,0.406859,0.786744,1.259272,0.051424,0.464241,0.097816,0.398608,0.558941,1.276748
75%,2.092841,2.041682,2.751267,2.593737,3.01894,1.269562,1.765016,1.633481,2.354793,2.228684,...,2.209718,1.78471,2.504917,2.788023,1.42862,1.988529,1.494967,1.663317,2.059749,2.739542
max,10.361509,9.754471,7.938003,9.515128,9.583521,8.533129,10.700576,10.720987,10.997601,8.869824,...,9.516924,9.747485,9.462851,10.49802,10.097586,9.408921,7.514771,11.567399,8.275887,9.846251


## Separating the denomination and orientation columns

In [4]:
new_columns = df['Denomination'].str.split('_', n=1, expand=True)
new_columns.columns = ['Denomination', 'Orientation']
df = pd.concat([df, new_columns['Orientation']], axis=1)
df['Denomination'] = new_columns['Denomination']
df['Orientation'] = df['Orientation']

## Creating a column that combines the three outputs

In [5]:
df['currency_denomination_orientation'] = df['Currency'] + '_' + df['Denomination'] + '_' + df['Orientation']

## Scaling the training data

In [6]:
scaler = MinMaxScaler()
numerical_columns = df.select_dtypes(include=['float64']).columns
df[numerical_columns] = scaler.fit_transform(df[numerical_columns])
df.describe()

Unnamed: 0,v_0,v_1,v_2,v_3,v_4,v_5,v_6,v_7,v_8,v_9,...,v_246,v_247,v_248,v_249,v_250,v_251,v_252,v_253,v_254,v_255
count,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,...,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0,24826.0
mean,0.117067,0.123518,0.203335,0.168564,0.183074,0.090575,0.099222,0.095998,0.127244,0.155507,...,0.142914,0.110376,0.150126,0.15927,0.087772,0.126899,0.121793,0.089353,0.143624,0.16867
std,0.140979,0.145306,0.198865,0.171673,0.190802,0.136446,0.13956,0.137669,0.144702,0.176006,...,0.156678,0.147228,0.175623,0.160857,0.132739,0.167672,0.177772,0.120598,0.176538,0.164606
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00825
50%,0.062081,0.072096,0.160866,0.131109,0.135115,0.0,0.022486,0.029837,0.079415,0.100494,...,0.097507,0.04174,0.08314,0.119953,0.005093,0.049341,0.013016,0.03446,0.067539,0.129668
75%,0.201982,0.209307,0.346594,0.272591,0.315014,0.14878,0.164946,0.152363,0.214119,0.251266,...,0.232188,0.183094,0.264711,0.265576,0.141481,0.211345,0.198937,0.143794,0.248886,0.278232
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Marking the training and the output data

In [7]:
X = df.drop(columns=['Currency', 'Denomination', 'Orientation', 'currency_denomination_orientation'])

## Categorizing the output data using Label Encoder

In [8]:
label_encoder = LabelEncoder()
categorical_columns = df.select_dtypes(include=['object']).columns

for column in categorical_columns:
    df[column] = label_encoder.fit_transform(df[column])

df.head()

Unnamed: 0,v_0,v_1,v_2,v_3,v_4,v_5,v_6,v_7,v_8,v_9,...,v_250,v_251,v_252,v_253,v_254,v_255,Currency,Denomination,Orientation,currency_denomination_orientation
0,0.0,0.082299,0.604584,0.369966,0.407621,0.347456,0.212743,0.044037,0.0,0.039585,...,0.509492,0.0,0.095429,0.0,0.570889,0.0,0,2,0,0
1,0.123728,0.01737,0.550206,0.470436,0.589134,0.25476,0.059676,0.0,0.013626,0.058281,...,0.264589,0.0,0.042307,0.0,0.320075,0.066663,0,2,0,0
2,0.188815,0.0,0.660703,0.37957,0.530699,0.44139,0.0,0.173959,0.259462,0.207532,...,0.244119,0.0,0.026333,0.0,0.09956,0.156396,0,2,0,0
3,0.036937,0.382155,0.555482,0.32512,0.654996,0.0,0.127884,0.0,0.099614,0.108101,...,0.027049,0.046646,0.322481,0.0,0.208345,0.0,0,2,0,0
4,0.059797,0.007307,0.476299,0.382274,0.432507,0.448753,0.075867,0.036131,0.091449,0.122241,...,0.476096,0.001822,0.0,0.0,0.358825,0.0,0,2,0,0


In [9]:
y_currency = df['Currency']
y_denomination = df['Denomination']
y_orientation = df['Orientation']
y_currency_denomination_orientation = df['currency_denomination_orientation']

# Applying PCA for dimensionality reduction

In [10]:
num_components = 100
pca = PCA(n_components=num_components)
pca.fit(X)
pca_data = pca.transform(X)
pca_df = pd.DataFrame(data=pca_data)
X = pca_df

In [11]:
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Classification Function

In [12]:
def Classifier(X_train, y_train, classifier):    
    mlp_classifier = classifier
    memory_usage_b = psutil.virtual_memory()[3]/1000000000
    start_time = time.time()
    mlp_classifier.fit(X_train, y_train)
    end_time = time.time()
    memory_usage_a = psutil.virtual_memory()[3]/1000000000
    training_time = end_time - start_time
    memory_usage = (memory_usage_a - memory_usage_b)*1000
    return mlp_classifier, training_time, memory_usage

# Grid Search Function

In [13]:
def GridSearch(param_grid, classifier, X_train, y_train):
    grid_search = GridSearchCV(classifier, param_grid=param_grid, n_jobs=10, verbose=1)
    grid_search.fit(X_train, y_train)
    return grid_search

# MLP Classification

## Currency

In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, y_currency, test_size= 0.2, random_state=0)
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=200, random_state=42))
y_pred= mlp_classifier.predict(X_test)

In [67]:
print(f'Predecting for Currency without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Currency without grid search:

              precision    recall  f1-score   support

           0       0.89      0.94      0.91       292
           1       0.97      0.96      0.97       426
           2       0.98      0.94      0.96       219
           3       0.98      0.96      0.97       414
           4       0.89      0.89      0.89       232
           5       0.98      0.99      0.99       251
           6       0.89      0.89      0.89       364
           7       0.97      0.98      0.97       342
           8       0.88      0.92      0.90       215
           9       0.98      0.98      0.98       253
          10       0.86      0.89      0.87       181
          11       0.90      0.84      0.87       238
          12       1.00      0.99      1.00       210
          13       0.98      0.95      0.96       243
          14       0.97      0.97      0.97       202
          15       0.97      0.98      0.97       583
          16       0.94      0.95  

In [68]:
param_grid = {
    'hidden_layer_sizes': [(100,), (100, 50), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.01, 0.1, 1, 10],
}
grid_search = GridSearch(param_grid, MLPClassifier(), X_train, y_train)
print(grid_search.best_params_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits




{'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (100, 50), 'solver': 'adam'}


In [69]:
classifier = grid_search.best_estimator_
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= mlp_classifier.predict(X_test)

In [70]:
print(f'Predecting for Currency with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Currency with grid search:

              precision    recall  f1-score   support

           0       0.94      0.94      0.94       309
           1       0.97      0.98      0.97       418
           2       0.98      0.95      0.96       216
           3       0.99      0.98      0.99       408
           4       0.93      0.93      0.93       231
           5       0.99      1.00      0.99       251
           6       0.92      0.92      0.92       364
           7       0.97      0.99      0.98       339
           8       0.91      0.97      0.94       210
           9       0.99      0.99      0.99       252
          10       0.91      0.90      0.90       190
          11       0.94      0.90      0.92       233
          12       1.00      1.00      1.00       207
          13       0.98      0.97      0.97       238
          14       0.98      0.97      0.97       203
          15       0.98      0.98      0.98       590
          16       0.95      0.95     

## Denomination

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y_denomination, test_size= 0.2, random_state=0)
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=200, random_state=42))
y_pred= mlp_classifier.predict(X_test)

In [72]:
print(f'Predecting for Denomination without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Denomination without grid search:

              precision    recall  f1-score   support

           0       0.91      0.91      0.91       134
           1       0.90      0.91      0.90       695
           2       0.92      0.90      0.91       748
           3       0.95      0.96      0.95       252
           4       1.00      0.92      0.96        59
           5       0.95      1.00      0.98        42
           6       0.93      0.93      0.93       136
           7       0.91      0.90      0.90       732
           8       0.91      0.92      0.91       343
           9       0.96      0.96      0.96       178
          10       0.98      0.98      0.98        44
          11       0.87      0.88      0.88       585
          12       0.91      0.89      0.90       677
          13       0.81      0.89      0.85       122
          14       0.97      0.97      0.97       181
          15       1.00      0.95      0.97        38

    accuracy                  

In [73]:
param_grid = {
    'hidden_layer_sizes': [(100,), (100, 50), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.01, 0.1, 1, 10],
}
grid_search = GridSearch(param_grid, MLPClassifier(), X_train, y_train)
print(grid_search.best_params_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits




{'activation': 'relu', 'alpha': 0.1, 'hidden_layer_sizes': (100, 50), 'solver': 'adam'}


In [74]:
classifier = grid_search.best_estimator_
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= mlp_classifier.predict(X_test)

In [75]:
print(f'Predecting for Denomination with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Denomination with grid search:

              precision    recall  f1-score   support

           0       0.93      0.95      0.94       130
           1       0.92      0.95      0.94       689
           2       0.94      0.95      0.95       726
           3       0.95      0.98      0.97       248
           4       1.00      1.00      1.00        54
           5       1.00      1.00      1.00        44
           6       0.96      0.90      0.93       145
           7       0.94      0.93      0.94       727
           8       0.95      0.95      0.95       350
           9       0.98      0.97      0.97       181
          10       0.98      1.00      0.99        43
          11       0.94      0.91      0.92       612
          12       0.92      0.92      0.92       671
          13       0.87      0.91      0.89       129
          14       0.98      0.98      0.98       182
          15       0.97      1.00      0.99        35

    accuracy                     

## Orientation

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y_orientation, test_size= 0.2, random_state=0)
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=200, random_state=42))
y_pred= mlp_classifier.predict(X_test)

In [15]:
print(f'Predecting for Orientation without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Orientation without grid search:

              precision    recall  f1-score   support

           0       0.95      0.95      0.95      2354
           1       0.76      0.77      0.77       104
           2       0.79      0.80      0.80        61
           3       0.95      0.95      0.95      2245
           4       0.76      0.82      0.79       125
           5       0.83      0.84      0.84        77

    accuracy                           0.94      4966
   macro avg       0.84      0.85      0.85      4966
weighted avg       0.94      0.94      0.94      4966

F1 score: 0.9355
MLP Training time: 25.138287782669067 seconds
MLP Memory usage: 12.484608000000064 MB


In [16]:
param_grid = {
    'hidden_layer_sizes': [(100,), (100, 50), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.01, 0.1, 1, 10],
}
grid_search = GridSearch(param_grid, MLPClassifier(), X_train, y_train)
print(grid_search.best_params_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits




{'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'solver': 'adam'}


In [17]:
classifier = grid_search.best_estimator_
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= mlp_classifier.predict(X_test)

In [18]:
print(f'Predecting for Orientation with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Orientation with grid search:

              precision    recall  f1-score   support

           0       0.97      0.94      0.96      2400
           1       0.81      0.84      0.83       101
           2       0.69      0.80      0.74        54
           3       0.95      0.96      0.96      2219
           4       0.74      0.89      0.81       111
           5       0.86      0.83      0.84        81

    accuracy                           0.95      4966
   macro avg       0.84      0.88      0.85      4966
weighted avg       0.95      0.95      0.95      4966

F1 score: 0.9460
MLP Training time: 37.262279987335205 seconds
MLP Memory usage: 49.24211199999995 MB


## All combined

In [81]:
X_train, X_test, y_train, y_test = train_test_split(X, y_currency_denomination_orientation, test_size= 0.2, random_state=0)
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, MLPClassifier(hidden_layer_sizes=(50, 50), max_iter=200, random_state=42))
y_pred= mlp_classifier.predict(X_test)

In [82]:
print(f'Predecting for Currency, Denomination, and Orientation without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Currency, Denomination, and Orientation without grid search:

              precision    recall  f1-score   support

           0       0.90      0.87      0.89        31
           1       0.84      0.90      0.87        30
           2       0.79      0.84      0.81        31
           3       0.69      0.80      0.74        30
           4       0.71      0.71      0.71        34
           5       0.58      0.70      0.63        27
           6       0.88      0.92      0.90        24
           7       0.90      0.84      0.87        31
           8       0.89      0.71      0.79        34
           9       0.75      0.89      0.81        27
          10       0.89      0.84      0.86        57
          11       0.74      0.78      0.76        18
          12       1.00      0.97      0.99        37
          13       1.00      1.00      1.00        24
          14       1.00      1.00      1.00        24
          15       0.97      1.00      0.98        29
    

In [83]:
param_grid = {
    'hidden_layer_sizes': [(100,), (100, 50), (50, 50)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.01, 0.1, 1, 10],
}
grid_search = GridSearch(param_grid, MLPClassifier(), X_train, y_train)
print(grid_search.best_params_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits




{'activation': 'tanh', 'alpha': 0.01, 'hidden_layer_sizes': (100,), 'solver': 'adam'}


In [84]:
classifier = grid_search.best_estimator_
mlp_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= mlp_classifier.predict(X_test)

In [85]:
print(f'Predecting for Currency, Denomination, and Orientation with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"MLP Training time: {training_time} seconds")
print(f"MLP Memory usage: {abs(memory_usage)} MB")

Predecting for Currency, Denomination, and Orientation with grid search:

              precision    recall  f1-score   support

           0       0.87      0.93      0.90        28
           1       0.88      0.85      0.86        33
           2       0.88      0.88      0.88        33
           3       0.83      0.85      0.84        34
           4       0.65      0.76      0.70        29
           5       0.73      0.75      0.74        32
           6       0.88      0.92      0.90        24
           7       0.97      0.85      0.90        33
           8       0.93      0.76      0.83        33
           9       0.81      0.93      0.87        28
          10       0.87      0.96      0.91        49
          11       0.95      0.78      0.86        23
          12       1.00      0.97      0.99        37
          13       1.00      1.00      1.00        24
          14       1.00      1.00      1.00        24
          15       0.97      1.00      0.98        29
       

# SVC Classification

## Currency

In [86]:
X_train, X_test, y_train, y_test = train_test_split(X, y_currency, test_size= 0.2, random_state=0)
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, SVC(kernel='linear', random_state=42))
y_pred= svc_classifier.predict(X_test)

In [87]:
print(f'Predecting for Currency without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Currency without grid search:

              precision    recall  f1-score   support

           0       0.92      0.91      0.92       315
           1       0.97      0.96      0.96       429
           2       0.97      0.92      0.94       222
           3       0.98      0.98      0.98       405
           4       0.86      0.91      0.88       217
           5       1.00      1.00      1.00       253
           6       0.89      0.85      0.87       378
           7       0.98      0.99      0.99       341
           8       0.88      0.95      0.91       206
           9       0.99      0.99      0.99       251
          10       0.80      0.89      0.84       170
          11       0.92      0.87      0.90       235
          12       1.00      1.00      1.00       208
          13       0.97      0.97      0.97       234
          14       0.97      0.98      0.98       199
          15       0.98      0.96      0.97       602
          16       0.95      0.96  

In [88]:
param_grid = {
    'C': [0.01, 0.1, 1],  
    'gamma': [1, 0.1, 0.01],
    'kernel': ['rbf', 'linear', 'poly']
}
grid_search = GridSearch(param_grid, SVC(), X_train, y_train)
print(grid_search.best_params_)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
{'C': 1, 'gamma': 1, 'kernel': 'poly'}


In [89]:
classifier = grid_search.best_estimator_
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= svc_classifier.predict(X_test)

In [90]:
print(f'Predecting for Currency with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Currency with grid search:

              precision    recall  f1-score   support

           0       0.95      0.93      0.94       318
           1       0.98      0.99      0.98       418
           2       0.98      0.99      0.98       208
           3       0.99      0.98      0.98       410
           4       0.95      0.96      0.95       228
           5       1.00      1.00      1.00       252
           6       0.94      0.92      0.93       372
           7       0.99      0.99      0.99       344
           8       0.93      0.98      0.95       212
           9       0.99      1.00      0.99       250
          10       0.94      0.89      0.91       198
          11       0.94      0.93      0.94       226
          12       1.00      1.00      1.00       208
          13       0.97      0.99      0.98       231
          14       0.99      0.99      0.99       201
          15       0.98      0.98      0.98       591
          16       0.95      0.97     

## Denomination

In [91]:
X_train, X_test, y_train, y_test = train_test_split(X, y_denomination, test_size= 0.2, random_state=0)
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, SVC(kernel='linear', random_state=42))
y_pred= svc_classifier.predict(X_test)

In [92]:
print(f'Predecting for Denomination without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Denomination without grid search:

              precision    recall  f1-score   support

           0       0.94      0.91      0.93       138
           1       0.87      0.81      0.84       760
           2       0.86      0.86      0.86       730
           3       0.91      0.97      0.94       240
           4       1.00      1.00      1.00        54
           5       1.00      1.00      1.00        44
           6       0.90      0.88      0.89       137
           7       0.86      0.82      0.84       759
           8       0.90      0.93      0.92       337
           9       0.94      0.95      0.95       177
          10       0.98      1.00      0.99        43
          11       0.80      0.84      0.82       569
          12       0.82      0.83      0.83       658
          13       0.75      0.94      0.83       108
          14       0.96      1.00      0.98       175
          15       1.00      0.97      0.99        37

    accuracy                  

In [93]:
param_grid = {
    'C': [0.01, 0.1, 1],  
    'gamma': [1, 0.1, 0.01],
    'kernel': ['rbf', 'linear', 'poly']
}
grid_search = GridSearch(param_grid, SVC(), X_train, y_train)
print(grid_search.best_params_)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
{'C': 0.1, 'gamma': 1, 'kernel': 'poly'}


In [94]:
classifier = grid_search.best_estimator_
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= svc_classifier.predict(X_test)

In [95]:
print(f'Predecting for Denomination with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Denomination with grid search:

              precision    recall  f1-score   support

           0       0.96      0.96      0.96       133
           1       0.95      0.93      0.94       720
           2       0.95      0.96      0.95       726
           3       0.96      1.00      0.98       245
           4       1.00      1.00      1.00        54
           5       0.98      1.00      0.99        43
           6       0.97      0.96      0.96       137
           7       0.96      0.94      0.95       737
           8       0.95      0.98      0.97       337
           9       0.96      0.99      0.97       174
          10       0.98      1.00      0.99        43
          11       0.95      0.93      0.94       602
          12       0.95      0.94      0.95       677
          13       0.87      0.97      0.92       120
          14       0.99      0.99      0.99       182
          15       1.00      1.00      1.00        36

    accuracy                     

## Orientation

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y_orientation, test_size= 0.2, random_state=0)
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, SVC(kernel='linear', random_state=42))
y_pred= svc_classifier.predict(X_test)

In [20]:
print(f'Predecting for Orientation without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Orientation without grid search:

              precision    recall  f1-score   support

           0       0.90      0.89      0.89      2377
           1       0.71      0.79      0.75        95
           2       0.56      0.81      0.67        43
           3       0.91      0.89      0.90      2299
           4       0.60      0.88      0.71        91
           5       0.73      0.93      0.82        61

    accuracy                           0.88      4966
   macro avg       0.74      0.87      0.79      4966
weighted avg       0.89      0.88      0.89      4966

F1 score: 0.8863
SVC Training time: 11.66326904296875 seconds
SVC Memory usage: 210.46067199999996 MB


For some reason, applying SVC to the orientation column takes a lot of time. We will take the previous grid and assume it is the best grid.

In [21]:
classifier = SVC(C=0.1, gamma=1, kernel='poly')
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= svc_classifier.predict(X_test)

In [22]:
print(f'Predecting for Orientation with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Orientation with grid search:

              precision    recall  f1-score   support

           0       0.96      0.95      0.96      2380
           1       0.86      0.94      0.90        96
           2       0.73      0.90      0.80        50
           3       0.96      0.95      0.96      2255
           4       0.79      0.92      0.85       115
           5       0.85      0.94      0.89        70

    accuracy                           0.95      4966
   macro avg       0.86      0.93      0.89      4966
weighted avg       0.95      0.95      0.95      4966

F1 score: 0.9514
SVC Training time: 10.344505071640015 seconds
SVC Memory usage: 300.01151999999996 MB


## All combined

In [100]:
X_train, X_test, y_train, y_test = train_test_split(X, y_currency_denomination_orientation, test_size= 0.2, random_state=0)
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, SVC(kernel='linear', random_state=42))
y_pred= svc_classifier.predict(X_test)

In [101]:
print(f'Predecting for Currency, Denomination, and Orientation without grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Currency, Denomination, and Orientation without grid search:

              precision    recall  f1-score   support

           0       0.90      0.93      0.92        29
           1       0.91      0.91      0.91        32
           2       0.88      0.71      0.78        41
           3       0.80      0.85      0.82        33
           4       0.65      0.63      0.64        35
           5       0.58      0.59      0.58        32
           6       0.92      0.88      0.90        26
           7       0.97      0.90      0.93        31
           8       0.93      0.83      0.88        30
           9       0.84      0.96      0.90        28
          10       0.87      1.00      0.93        47
          11       1.00      0.79      0.88        24
          12       1.00      1.00      1.00        36
          13       1.00      0.96      0.98        25
          14       1.00      1.00      1.00        24
          15       0.97      1.00      0.98        29
    

In [102]:
param_grid = {
    'C': [0.01, 0.1, 1],  
    'gamma': [1, 0.1, 0.01],
    'kernel': ['rbf', 'linear', 'poly']
}
grid_search = GridSearch(param_grid, SVC(), X_train, y_train)
print(grid_search.best_params_)

Fitting 5 folds for each of 27 candidates, totalling 135 fits
{'C': 1, 'gamma': 1, 'kernel': 'linear'}


In [103]:
classifier = grid_search.best_estimator_
svc_classifier, training_time, memory_usage = Classifier(X_train, y_train, classifier)
y_pred= svc_classifier.predict(X_test)

In [104]:
print(f'Predecting for Currency, Denomination, and Orientation with grid search:\n')
print(classification_report(y_pred, y_test))
fs = f1_score(y_pred, y_test, average='weighted')
print(f"F1 score: {fs:.4f}")
print(f"SVC Training time: {training_time} seconds")
print(f"SVC Memory usage: {abs(memory_usage)} MB")

Predecting for Currency, Denomination, and Orientation with grid search:

              precision    recall  f1-score   support

           0       0.90      0.93      0.92        29
           1       0.91      0.91      0.91        32
           2       0.88      0.71      0.78        41
           3       0.80      0.85      0.82        33
           4       0.65      0.63      0.64        35
           5       0.58      0.59      0.58        32
           6       0.92      0.88      0.90        26
           7       0.97      0.90      0.93        31
           8       0.93      0.83      0.88        30
           9       0.84      0.96      0.90        28
          10       0.87      1.00      0.93        47
          11       1.00      0.79      0.88        24
          12       1.00      1.00      1.00        36
          13       1.00      0.96      0.98        25
          14       1.00      1.00      1.00        24
          15       0.97      1.00      0.98        29
       