In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Importing libraries
import warnings
warnings.filterwarnings('ignore')
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

In [None]:
data_zoo=pd.read_csv('/kaggle/input/zoo-animal-classification/zoo.csv')
data_class=pd.read_csv('/kaggle/input/zoo-animal-classification/class.csv')
print("Zoo Animal count",data_zoo.animal_name.value_counts().count())
print("Class type",data_zoo.class_type.value_counts().count())

In [None]:
display(data_zoo.head())
display(data_class)

### Scaling down the feature using MinMax scalar

In [None]:
X=data_zoo.iloc[:,1:-1]
y=data_zoo.iloc[:,-1]

scaler = MinMaxScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

### Visulizing the count of Animals in each class type

In [None]:
plt.figure(figsize = (12,8))
plt.grid(True)
ax = sns.countplot(x='class_type', data=data_zoo, palette='Spectral_r')
for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x()+0.3, p.get_height()+0.5))

### Checking the correlation of features using Heat map

In [None]:
corr_matirx=data_zoo.corr()
plt.figure(figsize=(20,12))
ax = sns.heatmap(corr_matirx, annot = True, cmap="Set1")
top, bottom = ax.get_ylim()
ax.set_ylim(top+0.5, bottom-0.5)

### Checking the Correlation on the Target variable using barchart

In [None]:
plt.figure(figsize=(25,10))
data_zoo.corr()['class_type'].sort_values(ascending = False).plot(kind='bar')
plt.show()

## Building Models

### Splitting in to Train and Test

In [None]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=.3,random_state=2,stratify=y)

### Building Support Vector Machine (SVM)

In [None]:
model=SVC()
model.fit(x_train,y_train)
print(f' Training Accuracy {model.score(x_train,y_train)}')
f'Test Accuracy {model.score(x_test,y_test)}'

### Building Modal Using KNN

In [None]:
k = range(1,20)
trainingAccuracy = []
testAccuracy=[]
for i in k:
    knn = KNeighborsClassifier(n_neighbors=i,n_jobs=15,p=1,weights='distance')
    knn.fit(x_train,y_train)
    trainingacc = knn.score(x_train,y_train)
    trainingAccuracy.append(trainingacc)
    testAccuracy.append(knn.score(x_test,y_test))

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10,4))
plt.xlabel("value of K")
plt.ylabel("Accuracy of test and training")
plt.title("Select best value of k")
plt.plot(k,trainingAccuracy)
plt.plot(k,testAccuracy)
  #axes[0].legend(['loss','val_loss'])
axes.legend(["Training Accurracy","Test Accuracy"])
print("Best TrainAccuracy:- ", max(trainingAccuracy)) 
print("Best TestAccuracy:- ", max(testAccuracy))

#### Finding out the Best KNN hyper tuning parameters

In [None]:
k_range = list(range(1,50))
weight_options = ["uniform", "distance"]
pe=[1,2]

param_grid = dict(n_neighbors = k_range, weights = weight_options,p=pe)
knn = KNeighborsClassifier()

knngrid = GridSearchCV(knn, param_grid, cv = 10, scoring = 'accuracy',n_jobs=15)
knngrid.fit(x_train,y_train)

print ("Best score on 10 folds split Data on Train split:- ",knngrid.best_score_)
print ("Best Param:- ",knngrid.best_params_)
print ("Best KNN Metric:- ", knngrid.best_estimator_)

print(f' Training Accuracy {knngrid.score(x_train,y_train)}')
f'Test Accuracy {knngrid.score(x_test,y_test)}'

### Using Logistic Regression Modal

In [None]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression(solver='lbfgs', max_iter=1000)

log_model.fit(x_train, y_train)

print(f' Training Accuracy {log_model.score(x_train,y_train)}')
f'Test Accuracy {log_model.score(x_test,y_test)}'

### Using Neural Network

#### Choosing Best Param using Grid search CV

In [None]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=4)
param_grid = [
        {
            'activation' : ['identity', 'logistic', 'tanh', 'relu'],
            'solver' : ['lbfgs', 'sgd', 'adam'],            
        }
       ]
clf = GridSearchCV(MLPClassifier(), param_grid, cv=folds,
                           scoring='accuracy',n_jobs=-1,verbose = 1,
)
clf.fit(x_train, y_train)

print(f' Training Accuracy {clf.score(x_train,y_train)}')
f'Test Accuracy {clf.score(x_test,y_test)}'

In [None]:
cv_results = pd.DataFrame(clf.cv_results_)

#print the optimum value of hyperparameters
print('Best hyperparameters: ', clf.best_params_)
print ("Best Estimator:- ", clf.best_estimator_)