# SVM Classifcation Determining Whether Mushrooms are Edible or Poisonous

## Import libraries and prepare data

In [None]:
# START: OWN CODE

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
data = pd.read_csv('mushrooms.csv')
data.drop(['stalk-root', 'veil-type'], axis=1, inplace=True) # drop column with null values
data = data.applymap(lambda x : (ord(x)-ord('a'))) # encode character values as it's unicode value minus unicode value of 'a'
data['class'] = data['class'].map({15:0, 4: 1}) # assign poisonous lable as 0 and edible lable as 1

data # preview data

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,0,23,18,13,19,15,5,2,13,10,...,18,18,22,22,22,14,15,10,18,20
1,1,23,18,24,19,0,5,2,1,10,...,18,18,22,22,22,14,15,13,13,6
2,1,1,18,22,19,11,5,2,1,13,...,18,18,22,22,22,14,15,13,13,12
3,0,23,24,22,19,15,5,2,13,13,...,18,18,22,22,22,14,15,10,18,20
4,1,23,18,6,5,13,5,22,1,10,...,18,18,22,22,22,14,4,13,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,1,10,18,13,5,13,0,2,1,24,...,18,18,14,14,14,14,15,1,2,11
8120,1,23,18,13,5,13,0,2,1,24,...,18,18,14,14,13,14,15,1,21,11
8121,1,5,18,13,5,13,0,2,1,13,...,18,18,14,14,14,14,15,1,2,11
8122,0,10,24,13,5,24,5,2,13,1,...,18,10,22,22,22,14,4,22,21,11


In [3]:
# split data into labels and features
labels = data['class']
features = data.drop('class', axis=1)

## Perform SVM with all features

In [4]:
# 80/20 split of data into training and testing data
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.20)

### Linear SVM

In [5]:
lin_all = SVC(kernel='linear')
lin_all.fit(train_features, train_labels)

label_pred = lin_all.predict(test_features)

c = confusion_matrix(test_labels, label_pred)
accuracy = (c[0][0]+c[1][1])/np.sum(c)

print(f'Prediction Error: {accuracy}')
print(f'Confusion Matrix:\n{c}')
print(f'Classification Report:\n {classification_report(test_labels, label_pred)}')

Prediction Error: 0.9581538461538461
Confusion Matrix:
[[785  29]
 [ 39 772]]
Classification Report:
               precision    recall  f1-score   support

           0       0.95      0.96      0.96       814
           1       0.96      0.95      0.96       811

    accuracy                           0.96      1625
   macro avg       0.96      0.96      0.96      1625
weighted avg       0.96      0.96      0.96      1625



### SVM with Quadratic Polynomial Kernel

In [6]:
quad_all = SVC(kernel='poly',degree=2)
quad_all.fit(train_features, train_labels)

label_pred = quad_all.predict(test_features)

c = confusion_matrix(test_labels, label_pred)
accuracy = (c[0][0]+c[1][1])/np.sum(c)

print(f'Prediction Error: {accuracy}')
print(f'Confusion Matrix:\n{c}')
print(f'Classification Report:\n {classification_report(test_labels, label_pred)}')

Prediction Error: 0.9975384615384615
Confusion Matrix:
[[811   3]
 [  1 810]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       814
           1       1.00      1.00      1.00       811

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



### SVM with Cubic Polynomial Kernel

In [7]:
cub_all = SVC(kernel='poly',degree=3)
cub_all.fit(train_features, train_labels)

label_pred = cub_all.predict(test_features)

c = confusion_matrix(test_labels, label_pred)
accuracy = (c[0][0]+c[1][1])/np.sum(c)

print(f'Prediction Error: {accuracy}')
print(f'Confusion Matrix:\n{c}')
print(f'Classification Report:\n {classification_report(test_labels, label_pred)}')

Prediction Error: 1.0
Confusion Matrix:
[[814   0]
 [  0 811]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       814
           1       1.00      1.00      1.00       811

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



### SVM with Radial Basis Function (RBF) Kernel

In [8]:
rbf_all = SVC(kernel='rbf')
rbf_all.fit(train_features, train_labels)

label_pred = rbf_all.predict(test_features)

c = confusion_matrix(test_labels, label_pred)
accuracy = (c[0][0]+c[1][1])/np.sum(c)

print(f'Prediction Error: {accuracy}')
print(f'Confusion Matrix:\n{c}')
print(f'Classification Report:\n {classification_report(test_labels, label_pred)}')

Prediction Error: 0.9975384615384615
Confusion Matrix:
[[814   0]
 [  4 807]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       814
           1       1.00      1.00      1.00       811

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



## Greedy Foward Feature Selection (with RBF kernel)

### Define function to perform greedy foward feature selection

In [9]:
def greedy_forward(data=data, labels=labels, features=features):
    attributes = features.columns
    selected = []
    min_error = (2.0, None)
    last_min_error = (1.0, None)

    # split data into training and test
    train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.20)
    while last_min_error[0] < min_error[0] or len(attributes) == 0: # still improving
        # compute error of SVM of selected + feat
        # add min error to selected IFF it reduces total error
        min_error = last_min_error
        for attr in attributes:
            temp = selected + [attr]
            classifier = SVC(kernel='rbf')
            classifier.fit(train_features[temp], train_labels)

            label_pred = classifier.predict(test_features[temp])

            c = confusion_matrix(test_labels, label_pred)
            error = (c[0][1]+c[1][0])/np.sum(c)
            
            if error < last_min_error[0]:
                last_min_error = (error, attr) 
            
        # add to selected
        if last_min_error[0] < min_error[0]:
            selected.append(last_min_error[1])
            attributes.drop(last_min_error[1])

    return selected

### Show results of a single greedy foward feature selection

In [10]:
greedy_attributes = greedy_forward()

greedy_attributes

['odor',
 'spore-print-color',
 'habitat',
 'bruises',
 'stalk-surface-above-ring',
 'gill-spacing']

### Perform same SVM classifiers as before with greedily selected features

In [11]:
# separate greedily selected features
greedy_features = data[greedy_attributes]

### Linear SVM

In [12]:
greedy_linear_res = []
for _ in range(10):
    train_features, test_features, train_labels, test_labels = train_test_split(greedy_features, labels, test_size = 0.20)
    greedy_linear = SVC(kernel='linear')
    greedy_linear.fit(train_features, train_labels)

    label_pred = greedy_linear.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    greedy_linear_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(greedy_linear_res)}')
print(f'Standard Deviation of Accuracy: {np.std(greedy_linear_res)}\n')


Mean Accuracy: 0.8830153846153846
Standard Deviation of Accuracy: 0.007319973485902772



### SVM with Quadratic Polynomial Kernel

In [13]:
greedy_quad_res = []
for _ in range(10):
    train_features, test_features, train_labels, test_labels = train_test_split(greedy_features, labels, test_size = 0.20)
    greedy_quad = SVC(kernel='poly', degree=2)
    greedy_quad.fit(train_features, train_labels)

    label_pred = greedy_quad.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    greedy_quad_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(greedy_quad_res)}')
print(f'Standard Deviation of Accuracy: {np.std(greedy_quad_res)}\n')


Mean Accuracy: 0.9630769230769232
Standard Deviation of Accuracy: 0.006650710688544639



### SVM with Cubic Polynomial Kernel

In [14]:
greedy_cub_res = []
for _ in range(10):
    train_features, test_features, train_labels, test_labels = train_test_split(greedy_features, labels, test_size = 0.20)
    greedy_cub = SVC(kernel='poly', degree=3)
    greedy_cub.fit(train_features, train_labels)

    label_pred = greedy_cub.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    greedy_cub_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(greedy_cub_res)}')
print(f'Standard Deviation of Accuracy: {np.std(greedy_cub_res)}\n')


Mean Accuracy: 0.9915692307692308
Standard Deviation of Accuracy: 0.0016523965024120063



### SVM with RBF Kernel

In [15]:
greedy_rbf_res = []
for _ in range(100):
    train_features, test_features, train_labels, test_labels = train_test_split(greedy_features, labels, test_size = 0.20)
    greedy_rbf = SVC(kernel='rbf')
    greedy_rbf.fit(train_features, train_labels)

    label_pred = greedy_rbf.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    greedy_rbf_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(greedy_rbf_res)}')
print(f'Standard Deviation of Accuracy: {np.std(greedy_rbf_res)}\n')


Mean Accuracy: 0.9962276923076923
Standard Deviation of Accuracy: 0.003508939049689784



## Sigmoid Kernel

### Sigmoid Kernel on All Features

In [16]:
sigmoid_res = []
for _ in range(10):
    train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.20)
    sigmoid = SVC(kernel='sigmoid')
    sigmoid.fit(train_features, train_labels)

    label_pred = sigmoid.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    sigmoid_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(sigmoid_res)}')
print(f'Standard Deviation of Accuracy: {np.std(sigmoid_res)}\n')


Mean Accuracy: 0.5605538461538462
Standard Deviation of Accuracy: 0.12923412449856753



### Sigmoid Kernel on Greedily Chosen Features

In [17]:
greedy_sigmoid_res = []
for _ in range(10):
    train_features, test_features, train_labels, test_labels = train_test_split(greedy_features, labels, test_size = 0.20)
    greedy_sigmoid = SVC(kernel='sigmoid')
    greedy_sigmoid.fit(train_features, train_labels)

    label_pred = greedy_sigmoid.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    greedy_sigmoid_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(greedy_sigmoid_res)}')
print(f'Standard Deviation of Accuracy: {np.std(greedy_sigmoid_res)}\n')


Mean Accuracy: 0.5345846153846154
Standard Deviation of Accuracy: 0.13630708108909673



In [18]:
norm_features = (features - features.min()) / (features.max() - features.min())

norm_features.head()

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1.0,0.684211,0.521739,1.0,0.625,1.0,0.0,1.0,0.391304,0.0,0.684211,0.684211,0.913043,0.913043,0.818182,0.166667,1.0,0.391304,0.75,0.894737
1,1.0,0.684211,1.0,1.0,0.0,1.0,0.0,0.0,0.391304,0.0,0.684211,0.684211,0.913043,0.913043,0.818182,0.166667,1.0,0.521739,0.541667,0.157895
2,0.0,0.684211,0.913043,1.0,0.458333,1.0,0.0,0.0,0.521739,0.0,0.684211,0.684211,0.913043,0.913043,0.818182,0.166667,1.0,0.521739,0.541667,0.473684
3,1.0,1.0,0.913043,1.0,0.625,1.0,0.0,1.0,0.521739,0.0,0.684211,0.684211,0.913043,0.913043,0.818182,0.166667,1.0,0.391304,0.75,0.894737
4,1.0,0.684211,0.217391,0.0,0.541667,1.0,1.0,0.0,0.391304,1.0,0.684211,0.684211,0.913043,0.913043,0.818182,0.166667,0.0,0.521739,0.0,0.157895


In [19]:
sigmoid_res = []
for _ in range(10):
    train_features, test_features, train_labels, test_labels = train_test_split(norm_features, labels, test_size = 0.20)
    sigmoid = SVC(kernel='sigmoid')
    sigmoid.fit(train_features, train_labels)

    label_pred = sigmoid.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    sigmoid_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(sigmoid_res)}')
print(f'Standard Deviation of Accuracy: {np.std(sigmoid_res)}\n')


Mean Accuracy: 0.5124923076923077
Standard Deviation of Accuracy: 0.010792239157630954



In [20]:
greedy_sigmoid_res = []
for _ in range(10):
    train_features, test_features, train_labels, test_labels = train_test_split(norm_features[greedy_attributes], labels, test_size = 0.20)
    greedy_sigmoid = SVC(kernel='sigmoid')
    greedy_sigmoid.fit(train_features, train_labels)

    label_pred = greedy_sigmoid.predict(test_features)

    c = confusion_matrix(test_labels, label_pred)
    accuracy = (c[0][0]+c[1][1])/np.sum(c)
    greedy_sigmoid_res.append(accuracy)

print(f'Mean Accuracy: {np.mean(greedy_sigmoid_res)}')
print(f'Standard Deviation of Accuracy: {np.std(greedy_sigmoid_res)}\n')

Mean Accuracy: 0.6968
Standard Deviation of Accuracy: 0.02374467738551129



In [None]:
# END: OWN CODE