In [1]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn import svm, datasets
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import numpy as np
import os

In [2]:
zoo_df = pd.read_csv(os.path.join("data", "zoo_animals.csv"))
zoo_df = zoo_df.drop('animal name', axis = 1)
zoo_df.head()

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1


In [3]:
target = zoo_df['type']
target_names = ['Mammal', 'Bird', 'Reptile', 'Fish', 'Amphibian', 'Insect', 'Invertebrate']

In [4]:
data = zoo_df.drop('type', axis=1)
feature_names = data.columns
data.head()

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize
0,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1
1,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1
2,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0
3,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1
4,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1


In [5]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, train_size=0.80, test_size=0.20, random_state=42)

In [6]:
# Testing with RBF Kernel

In [7]:
rbf = svm.SVC(kernel='rbf', gamma=0.5, C=0.1).fit(X_train, y_train)

In [8]:
rbf_pred = rbf.predict(X_test)

In [9]:
rbf_accuracy = accuracy_score(y_test, rbf_pred)
rbf_f1 = f1_score(y_test, rbf_pred, average='weighted')
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy*100))
print('F1 (RBF Kernel): ', "%.2f" % (rbf_f1*100))
print(classification_report(y_test, rbf_pred, labels=[1,2,3,4,5,6,7],
                            target_names=target_names))

Accuracy (RBF Kernel):  66.67
F1 (RBF Kernel):  53.76
              precision    recall  f1-score   support

      Mammal       0.63      1.00      0.77        12
        Bird       1.00      1.00      1.00         2
     Reptile       0.00      0.00      0.00         1
        Fish       0.00      0.00      0.00         2
   Amphibian       0.00      0.00      0.00         0
      Insect       0.00      0.00      0.00         3
Invertebrate       0.00      0.00      0.00         1

   micro avg       0.67      0.67      0.67        21
   macro avg       0.23      0.29      0.25        21
weighted avg       0.46      0.67      0.54        21



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
cm_rbf = confusion_matrix(y_test, rbf_pred)
print(cm_rbf)

[[12  0  0  0  0  0]
 [ 0  2  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 2  0  0  0  0  0]
 [ 3  0  0  0  0  0]
 [ 1  0  0  0  0  0]]


In [11]:
# Testing with Polynomial Kernel

In [12]:
poly = svm.SVC(kernel='poly', degree=3, C=1).fit(X_train, y_train)

In [13]:
poly_pred = poly.predict(X_test)

In [14]:
poly_accuracy = accuracy_score(y_test, poly_pred)
poly_f1 = f1_score(y_test, poly_pred, average='weighted')
print('Accuracy (Polynomial Kernel): ', "%.2f" % (poly_accuracy*100))
print('F1 (Polynomial Kernel): ', "%.2f" % (poly_f1*100))
print(classification_report(y_test, poly_pred, labels=[1,2,3,4,5,6,7],
                            target_names=target_names))

Accuracy (Polynomial Kernel):  85.71
F1 (Polynomial Kernel):  83.91
              precision    recall  f1-score   support

      Mammal       1.00      0.92      0.96        12
        Bird       1.00      1.00      1.00         2
     Reptile       0.00      0.00      0.00         1
        Fish       0.40      1.00      0.57         2
   Amphibian       0.00      0.00      0.00         0
      Insect       1.00      1.00      1.00         3
Invertebrate       0.00      0.00      0.00         1

   micro avg       0.86      0.86      0.86        21
   macro avg       0.49      0.56      0.50        21
weighted avg       0.85      0.86      0.84        21



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
cm_poly = confusion_matrix(y_test, poly_pred)
print(cm_poly)

[[11  0  0  1  0  0]
 [ 0  2  0  0  0  0]
 [ 0  0  0  1  0  0]
 [ 0  0  0  2  0  0]
 [ 0  0  0  0  3  0]
 [ 0  0  0  1  0  0]]
