In [1]:
import pandas as pd
import numpy as np

In [2]:
# read in mushrooms dataset
data = pd.read_csv('mushrooms.csv')
# data = data[data['stalk-root'] != '?'] # drop null values
data.drop('stalk-root', axis=1, inplace=True)
data

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,e,k,s,n,f,n,a,c,b,y,...,s,o,o,p,o,o,p,b,c,l
8120,e,x,s,n,f,n,a,c,b,y,...,s,o,o,p,n,o,p,b,v,l
8121,e,f,s,n,f,n,a,c,b,n,...,s,o,o,p,o,o,p,b,c,l
8122,p,k,y,n,f,y,f,c,n,b,...,k,w,w,p,w,o,e,w,v,l


In [3]:
# separate labels and encode (poisonous = 0, edible = 1)
labels = data['class'].map({'p':0, 'e': 1})

labels

0       0
1       1
2       1
3       0
4       1
       ..
8119    1
8120    1
8121    1
8122    0
8123    1
Name: class, Length: 8124, dtype: int64

In [4]:
# separate labels and encode (labels are labels converted to unicode-97)

features = data.iloc[:, 1:]
features = features.applymap(lambda x : (ord(x)-ord('a')))
features


Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,23,18,13,19,15,5,2,13,10,4,...,18,22,22,15,22,14,15,10,18,20
1,23,18,24,19,0,5,2,1,10,4,...,18,22,22,15,22,14,15,13,13,6
2,1,18,22,19,11,5,2,1,13,4,...,18,22,22,15,22,14,15,13,13,12
3,23,24,22,19,15,5,2,13,13,4,...,18,22,22,15,22,14,15,10,18,20
4,23,18,6,5,13,5,22,1,10,19,...,18,22,22,15,22,14,4,13,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,10,18,13,5,13,0,2,1,24,4,...,18,14,14,15,14,14,15,1,2,11
8120,23,18,13,5,13,0,2,1,24,4,...,18,14,14,15,13,14,15,1,21,11
8121,5,18,13,5,13,0,2,1,13,4,...,18,14,14,15,14,14,15,1,2,11
8122,10,24,13,5,24,5,2,13,1,19,...,10,22,22,15,22,14,4,22,21,11


In [5]:
# training/testing split
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.20)

svclassifier = SVC(kernel='poly', degree=2)
svclassifier.fit(train_features, train_labels)

label_pred = svclassifier.predict(test_features)

c = confusion_matrix(test_labels, label_pred)
error = (c[0][1]+c[1][0])/np.sum(c)
print(error)
print(c)
print(classification_report(test_labels, label_pred))

0.0
[[766   0]
 [  0 859]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       766
           1       1.00      1.00      1.00       859

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625



In [6]:
svclassifier = SVC(kernel='rbf')
svclassifier.fit(train_features, train_labels)

label_pred = svclassifier.predict(test_features)

c = confusion_matrix(test_labels, label_pred)
error = (c[0][1]+c[1][0])/np.sum(c)
print(error)
print(c)
print(classification_report(test_labels, label_pred))

0.0012307692307692308
[[766   0]
 [  2 857]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       766
           1       1.00      1.00      1.00       859

    accuracy                           1.00      1625
   macro avg       1.00      1.00      1.00      1625
weighted avg       1.00      1.00      1.00      1625

