In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from pandas.plotting import radviz
from pandas.plotting import lag_plot
import warnings

## Make a simple sklearn classifier
First, read the data in using `pandas.read_csv()`.
Note that the final column contains the `class_type` field that we are interested in.

In [None]:
data = pd.read_csv("../input/zoo-animal-classification/zoo.csv")
data.head(6)

In [None]:
data.plot.bar(title="Zoo Animal Classification",figsize=(20,10))

In [None]:
data.plot(x="eggs",y="milk")

In [None]:
color = {"boxes": "DarkGreen","whiskers": "DarkOrange", "medians": "DarkBlue","caps": "Gray",}
data.plot.box(color=color, sym="r+",figsize=(20,10))

In [None]:
label_encoder = LabelEncoder()
data.iloc[:,0] = label_encoder.fit_transform(data.iloc[:,0]).astype('float64')

In [None]:
plt.figure(figsize=(12,10))
corr = data.corr()
sns.heatmap(corr, annot=True, cmap=plt.cm.Reds)
plt.show()

In [None]:
columns = np.full((corr.shape[0],), True, dtype=bool)
for i in range(corr.shape[0]):
    for j in range(i+1, corr.shape[0]):
        if corr.iloc[i,j] >= 0.9:
            if columns[j]:
                columns[j] = False
selected_columns = data.columns[columns]
data = data[selected_columns]
data.head(5)

## Preprocess the data
Split the data up for training and evaluation.

In [None]:
def preprocess(data):
    X = data.iloc[:, 1:17]  # all rows, all the features and no labels
    y = data.iloc[:, 17]  # all rows, label only
    return X, y


In [None]:
# Shuffle and split the dataset
# We don't need to use this any more, thanks to scikit-learn!

data = data.sample(frac=1).reset_index(drop=True)
print("Data",data)
data_total_len = data[data.columns[0]].size
print("Length",data_total_len)
data_train_frac = 0.8
split_index = math.floor(data_total_len*data_train_frac)

train_data = data.iloc[:split_index]
eval_data = data.iloc[split_index:]

print(train_data,"\nE ", eval_data)

Split the data using scikit-learn instead, using fewer lines!

In [None]:
train_X, train_Y = preprocess(train_data)
test_X,test_Y = preprocess(eval_data)

In [None]:
print(train_X, train_Y,test_X,test_Y)

In [None]:
print(train_X.shape, train_Y.shape,test_X.shape,test_Y.shape)

## Train and Evaluate the model
It's easy to swap in a different model of your choice.

In [None]:
clf = LogisticRegression()
clf.fit(train_X, train_Y)  

In [None]:
clf.score(test_X,test_Y)

In [None]:
error = []

for i in range(1, 40):
    clf = LogisticRegression()
    clf.fit(train_X, train_Y) 
    pred_i = clf.predict(test_X)
    error.append(np.mean(pred_i != test_Y))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
         markerfacecolor='blue', markersize=10)
plt.ylabel('Mean Error')
warnings.filterwarnings('ignore', category=UserWarning, append=True)

In [None]:
clf.predict(test_X[1:25])

In [None]:
kn = KNeighborsClassifier()
kn.fit(train_X, train_Y)

In [None]:
kn.score(test_X,test_Y)

In [None]:
error = []

for i in range(1, 40):
    knn = KNeighborsClassifier()
    knn.fit(train_X, train_Y)
    pred_i = knn.predict(test_X)
    error.append(np.mean(pred_i != test_Y))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
         markerfacecolor='blue', markersize=10)
plt.title('Error Rate ')
plt.ylabel('Mean Error')

In [None]:
kn.predict(test_X[1:25])

In [None]:
nb = GaussianNB()
nb.fit(train_X, train_Y)

In [None]:
nb.score(test_X,test_Y)

In [None]:
error = []

for i in range(1, 40):
    nb = GaussianNB()
    nb.fit(train_X, train_Y) 
    pred_i = nb.predict(test_X)
    error.append(np.mean(pred_i != test_Y))
plt.figure(figsize=(12, 6))
plt.plot(range(1, 40), error, color='red', linestyle='dashed', marker='o',
         markerfacecolor='blue', markersize=10)
plt.ylabel('Mean Error')

In [None]:
nb.predict(test_X[1:25])

In [None]:
# Show what the correct answer is
test_Y[1:25]

In [None]:
hair = int(input(" Value of hair: "))
feathers = int(input(" Value of feathers: "))
eggs = int(input(" Value of eggs :"))
milk  = int(input(" Value of milk:"))
airborne  = int(input(" Value of airborne: "))
aquatic  = int(input(" Value of aquactic: "))
predator  = int(input(" Value of predator: "))
toothed  = int(input(" Value of toothed: "))
backbone  = int(input(" Value of backbone: "))
breathes  = int(input(" Value of breathes: "))
venomous  = int(input(" Value of venomous: "))
fins  = int(input(" Value of fins:"))
legs  = int(input(" Value of legs:"))
tail  = int(input(" Value of tail:"))
domestic = int(input(" Value of domestic :"))
catsize = int(input(" Value of catsize :"))
data=[[hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize]]
df = pd.DataFrame(data, columns = ['hair','feathers','eggs','milk','airborne','aquatic','predator','toothed','backbone','breathes','venomous','fins','legs','tail','domestic','catsize']) 
df

In [None]:
clf.predict(df)

In [None]:
kn.predict(df)

In [None]:
nb.predict(df)

In [None]:
class_type = pd.read_csv("../input/zoo-animal-classification/class.csv")
model_prediction = [clf,kn,nb]
for i in model_prediction:
    for j in i.predict(df):
        print(class_type.loc[class_type['Class_Number'] == j, 'Class_Type'])