In [None]:
##########################
# 1. IMPORT ALL PACKAGES #
##########################

import pandas as pd
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
import math
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt #for plotting
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import multilabel_confusion_matrix
# importing mean() 
from statistics import mean

In [None]:
########################################
# 2. LOAD TESTING AND TRAINING DATASET #
########################################

data = pd.read_csv('../input/letterrecognition-using-svm/letter-recognition.csv', header=0)

labelencoder = LabelEncoder()
data['letter'] = labelencoder.fit_transform(data['letter'])

print("Dimensions: ", data.shape, "\n")

# View sample data
data.head(10) 

In [None]:
# Plot distribution
sns.countplot(data['letter'])

In [None]:
###################################
# 3. SHARE TO TEST AND TRAIN DATA #
###################################

x = data.iloc[:, 1:]
y = data['letter'].tolist()
print(x)

# # Select 10000 rows data as a testing dataset
x_test = x.iloc[0:5000, :].values.astype('float32') # all pixel values 
y_test = y[0:5000] # Select label for testing data
x_train = x.iloc[5000:, :].values.astype('float32') # all pixel values 
y_train = y[5000:]

# View one row data
data.iloc[0:1, 1:].values

In [None]:
from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import mean_squared_error


In [None]:
support_vector_classifier = SVC(kernel='poly', C=500, gamma=0.2)
support_vector_classifier.fit(x_train, y_train)

In [None]:
#################################################
# 5. APPLY THE TRAINED LEARNER TO TEST NEW DATA #
#################################################
# Apply the trained perceptron to make prediction of test data
y_pred = support_vector_classifier.predict(x_test)

In [None]:
##################################################
# 6. MULTI-CLASS CONFUSION MATRIX FOR EACH CLASS #
##################################################
# Actual and predicted classes
lst_actual_class = y_test
lst_predicted_class = y_pred


lst_classes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25] # Must in order

# Class
arr_out_matrix = multilabel_confusion_matrix(lst_actual_class, lst_predicted_class, labels=lst_classes)

# Temp store results
store_sens = [];
store_spec = [];
store_acc = [];
store_bal_acc = [];
store_prec = [];
store_fscore = [];
store_mcc = [];

# Loop for each taget label
for no_class in range(len(lst_classes)):
    arr_data = arr_out_matrix[no_class];
    print("Predicted Performance of Digit Label/Class: {0}".format(no_class));
    
    tp = arr_data[1][1]
    fp = arr_data[0][1]
    tn = arr_data[0][0]
    fn = arr_data[1][0]
    
    sensitivity = round(tp/(tp+fn), 3);
    specificity = round(tn/(tn+fp), 3);
    accuracy = round((tp+tn)/(tp+fp+tn+fn), 3);
    balanced_accuracy = round((sensitivity+specificity)/2, 3);
    precision = round(tp/(tp + fp), 3)
    fscore = round((2 * ((precision * sensitivity) / (precision + sensitivity))), 3)
    mcc = round((((tp * tn)-(fp * fn))/ math.sqrt((tp + fp)*(tp + fn)*(tn + fp)*(tn + fn))),3)
    
    store_sens.append(sensitivity);
    store_spec.append(specificity);
    store_acc.append(accuracy);
    store_bal_acc.append(balanced_accuracy);
    store_prec.append(precision);
    store_fscore.append(fscore);
    store_mcc.append(mcc);
    
    print("TP={0}, FP={1}, TN={2}, FN={3}".format(tp, fp, tn, fn));
    print("Sensitivity: {0}".format(sensitivity));
    print("Specificity: {0}".format(specificity));
    print("Accuracy: {0}".format(accuracy));
    print("Balanced Accuracy: {0}".format(balanced_accuracy));
    print("Precision: {0}".format(precision));
    print("F1-Score: {0}".format(fscore))
    print("MCC: {0}\n".format(mcc))

In [None]:
print("Overall Performance Prediction:");
print("Sensitivity: {0}%".format(round(mean(store_sens)*100, 4)));
print("Specificity: {0}%".format(round(mean(store_spec)*100, 4)));
print("Accuracy: {0}%".format(round(mean(store_acc)*100, 4)));
print("Balanced Accuracy: {0}%".format(round(mean(store_bal_acc)*100, 4)));
print("Precision: {0}%".format(round(mean(store_prec)*100, 4)));
print("F1-Score: {0}%".format(round(mean(store_fscore)*100, 4)))
print("MCC: {0}\n".format(round(mean(store_mcc), 4)))