In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

In [2]:
def model_fit_predict(X_train, y_train):
    kf = StratifiedKFold(n_splits = 4, random_state=42, shuffle=True)
    accuracies = []
    precisions = []
    recalls = []
    f1s = []
    for train_index, test_index in kf.split(X_train, y_train):
        x_train = X_train.iloc[train_index]
        Y_train = y_train.iloc[train_index]
        x_test = X_train.iloc[test_index]
        Y_test = y_train.iloc[test_index]
        model = SVC()
        model.fit(x_train, Y_train)
        predictions = model.predict(x_test)
        accuracies.append(accuracy_score(predictions, Y_test))
        precisions.append(precision_score(predictions, Y_test, average = None, zero_division=1))
        recalls.append(recall_score(predictions, Y_test, average = None, zero_division=1))
        f1s.append(f1_score(predictions, Y_test, average = None, zero_division=1))
    precision = np.sum(precisions, axis=0)/len(precisions)
    recall = np.sum(recalls, axis=0)/len(recalls)
    f1 =  np.sum(f1s, axis=0)/len(f1s)
    print('Accuracy Score : ', sum(accuracies)/len(accuracies))
    print('Precision Score : ', precision)
    print('Recall Score : ', recall)
    print('F1 Score : ', f1)
    print('Avg Precision Score : ', np.mean(np.array(precision)))
    print('Avg Recall Score : ', np.mean(np.array(recall)))
    print('Avg F1 Score : ', np.mean(np.array(f1)))

In [3]:
with open('zoo.names') as fp:
    line = fp.readlines(1)
    col = line[0].splitlines()
    cols = list(col[0].split(','))

options = {'header': None, 'names': cols, 'skipinitialspace': True}


zoo_df = pd.read_csv('zoo.data', **options)

In [4]:
for col in zoo_df.columns:
    print(col, ':', zoo_df[col].unique())

hair : [1 0]
feathers : [0 1]
eggs : [0 1]
milk : [1 0]
airborne : [0 1]
aquatic : [0 1]
predator : [1 0]
toothed : [1 0]
backbone : [1 0]
breathes : [1 0]
enomous : [0 1]
fins : [0 1]
legs : [4 0 2 6 8 5]
tail : [0 1]
domestic : [0 1]
catsize : [1 0]
class : [1 4 2 7 6 5 3]


In [5]:
one_hot = pd.get_dummies(zoo_df['legs'])
labels = zoo_df['class']

zoo_df = zoo_df.drop(['legs', 'class'],axis = 1)

zoo_df = zoo_df.join(one_hot)
zoo_df = zoo_df.join(labels)
zoo_df  

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,...,tail,domestic,catsize,0,2,4,5,6,8,class
0,1,0,0,1,0,0,1,1,1,1,...,0,0,1,0,0,1,0,0,0,1
1,1,0,0,1,0,0,0,1,1,1,...,1,0,1,0,0,1,0,0,0,1
2,0,0,1,0,0,1,1,1,1,0,...,1,0,0,1,0,0,0,0,0,4
3,1,0,0,1,0,0,1,1,1,1,...,0,0,1,0,0,1,0,0,0,1
4,1,0,0,1,0,0,1,1,1,1,...,1,0,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,1,0,0,1,0,0,0,1,1,1,...,1,0,1,0,1,0,0,0,0,1
97,1,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,6
98,1,0,0,1,0,0,1,1,1,1,...,1,0,1,0,0,1,0,0,0,1
99,0,0,1,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,7


In [6]:
col_names = list(one_hot.columns)
name_map = { i : 'legs_'+str(i) for i in col_names }
zoo_df.rename(columns = name_map, inplace=True)
zoo_df

Unnamed: 0,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,...,tail,domestic,catsize,legs_0,legs_2,legs_4,legs_5,legs_6,legs_8,class
0,1,0,0,1,0,0,1,1,1,1,...,0,0,1,0,0,1,0,0,0,1
1,1,0,0,1,0,0,0,1,1,1,...,1,0,1,0,0,1,0,0,0,1
2,0,0,1,0,0,1,1,1,1,0,...,1,0,0,1,0,0,0,0,0,4
3,1,0,0,1,0,0,1,1,1,1,...,0,0,1,0,0,1,0,0,0,1
4,1,0,0,1,0,0,1,1,1,1,...,1,0,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,1,0,0,1,0,0,0,1,1,1,...,1,0,1,0,1,0,0,0,0,1
97,1,0,1,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,6
98,1,0,0,1,0,0,1,1,1,1,...,1,0,1,0,0,1,0,0,0,1
99,0,0,1,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,7


In [7]:
X = zoo_df.iloc[:,:-1]
y = zoo_df.iloc[:,-1]

In [8]:
model_fit_predict(X, y)

Accuracy Score :  0.98
Precision Score :  [1.  1.  0.5 1.  1.  1.  1. ]
Recall Score :  [0.97727273 1.         1.         0.9375     1.         1.
 1.        ]
F1 Score :  [0.98809524 1.         0.5        0.96428571 1.         1.
 1.        ]
Avg Precision Score :  0.9285714285714286
Avg Recall Score :  0.9878246753246753
Avg F1 Score :  0.9217687074829932
