In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [3]:
zoo_data = pd.read_csv('Zoo.csv')
zoo_data

Unnamed: 0,animal name,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
1,antelope,1,0,0,1,0,0,0,1,1,1,0,0,4,1,0,1,1
2,bass,0,0,1,0,0,1,1,1,1,0,0,1,0,1,0,0,4
3,bear,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1
4,boar,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,wallaby,1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,1,1
97,wasp,1,0,1,0,1,0,0,0,0,1,1,0,6,0,0,0,6
98,wolf,1,0,0,1,0,0,1,1,1,1,0,0,4,1,0,1,1
99,worm,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,7


In [4]:
zoo_data.shape

(101, 18)

In [5]:
zoo_data.isna().sum()

animal name    0
hair           0
feathers       0
eggs           0
milk           0
airborne       0
aquatic        0
predator       0
toothed        0
backbone       0
breathes       0
venomous       0
fins           0
legs           0
tail           0
domestic       0
catsize        0
type           0
dtype: int64

In [6]:
zoo_data.dtypes

animal name    object
hair            int64
feathers        int64
eggs            int64
milk            int64
airborne        int64
aquatic         int64
predator        int64
toothed         int64
backbone        int64
breathes        int64
venomous        int64
fins            int64
legs            int64
tail            int64
domestic        int64
catsize         int64
type            int64
dtype: object

## Model Building

In [14]:
x = zoo_data.drop(labels=['animal name','type'],axis=1)
y = zoo_data[['type']]

In [15]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=12,stratify=y)

In [17]:
x_train.shape,x_test.shape

((80, 16), (21, 16))

In [19]:
y_train.shape,y_test.shape

((80, 1), (21, 1))

## Model Training | Testing | Evaluation

In [20]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [23]:
knn_classifier = KNeighborsClassifier(n_neighbors=3)
knn_classifier.fit(X_train,y_train)

KNeighborsClassifier(n_neighbors=3)

In [24]:
y_pred = knn_classifier.predict(X_test)
print('Accuracy Score:',round(accuracy_score(y_test,y_pred),4))

Accuracy Score: 0.9524


### choosing the Optimal Number of Neighbours

In [32]:
for i in range(1,30,2):
    knn_model=KNeighborsClassifier(n_neighbors=i).fit(x_train,y_train)
    y_pred=knn_model.predict(x_test)
    print('K value is: ',i)
    print(accuracy_score(y_test,y_pred))

K value is:  1
1.0
K value is:  3
0.9523809523809523
K value is:  5
0.9523809523809523
K value is:  7
0.9047619047619048
K value is:  9
0.8095238095238095
K value is:  11
0.8095238095238095
K value is:  13
0.8095238095238095
K value is:  15
0.8095238095238095
K value is:  17
0.8095238095238095
K value is:  19
0.8095238095238095
K value is:  21
0.8095238095238095
K value is:  23
0.7142857142857143
K value is:  25
0.7142857142857143
K value is:  27
0.7142857142857143
K value is:  29
0.5714285714285714


In [25]:
from sklearn.model_selection import cross_val_score

In [33]:
cv_scores = []

for i in range(1,31,2):
    knn_model = KNeighborsClassifier(n_neighbors = i)
    cross_valaidation_scores = cross_val_score(estimator = knn_model, X = x, y=y)
    cv_scores.append(cross_valaidation_scores.mean())

In [34]:
cv_scores

[0.97,
 0.93,
 0.8509523809523811,
 0.8414285714285714,
 0.7919047619047619,
 0.781904761904762,
 0.781904761904762,
 0.781904761904762,
 0.781904761904762,
 0.781904761904762,
 0.7419047619047618,
 0.7128571428571429,
 0.7028571428571427,
 0.7028571428571427,
 0.6928571428571428]

From above operations, the model behaves good when the value of k is 3 and 5

In [38]:
knn_model=KNeighborsClassifier(n_neighbors=3).fit(x_train,y_train)
y_pred=knn_model.predict(x_test)
print('K value is: ',3)
print(accuracy_score(y_test,y_pred))

K value is:  3
0.9523809523809523


In [39]:
knn_model=KNeighborsClassifier(n_neighbors=5).fit(x_train,y_train)
y_pred=knn_model.predict(x_test)
print('K value is: ',5)
print(accuracy_score(y_test,y_pred))

K value is:  5
0.9523809523809523
