In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os

In [None]:
# Get CSV file
zoo = pd.read_csv(os.path.join('Resources', 'zoo.csv'))
zoo

In [None]:
zoo_df=zoo.set_index('animal_name')

In [None]:
# Data for predictions
kangaroo = np.array([[1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,1]])
bald_eagle = np.array([[0,1,1,0,1,0,1,0,1,1,0,0,2,1,0,0]])
rattle_snake = np.array([[0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,]])
goldfish = np.array([[0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0]])
mountain_chicken_frog = np.array([[0,0,1,0,0,1,1,0,1,1,0,0,4,0,0,0]])
vampire_moth = np.array([[0,0,1,0,1,0,1,0,0,1,0,0,6,0,0,0]])
coral = np.array([[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0]])
sea_turtle = np.array([[0,0,1,0,0,1,1,0,1,0,0,1,4,1,0,1]])

In [None]:
# Assign X and y
X = zoo_df.drop(["class_type","class"], axis=1)
y = zoo_df["class_type"]
print (X.shape, y.shape)

In [None]:
# Split data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(max_iter=500)
classifier.fit(X_train, y_train)

In [None]:
print(f"Training score: {classifier.score(X_train, y_train)}")
print(f"Testing score: {classifier.score(X_test, y_test)}")

In [None]:
print(f"Predictions")
print(f"Kangaroo: {classifier.predict(kangaroo)}")
print(f"Bald Eagle: {classifier.predict(bald_eagle)}")
print(f"Rattle Snake: {classifier.predict(rattle_snake)}")
print(f"Goldfish: {classifier.predict(goldfish)}")
print(f"Mountain Chicken Frog: {classifier.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {classifier.predict(vampire_moth)}")
print(f"Coral: {classifier.predict(coral)}")
print(f"Sea Turtle: {classifier.predict(sea_turtle)}")

In [None]:
predictions = classifier.predict(X_test)
class_predictions_table = pd.DataFrame({"Prediction": predictions, "Actual": y_test})
class_predictions_table.head()

In [None]:
importances = classifier.coef_
importances

### Random Forest 

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)
rf = rf.fit(X_train, y_train)
rf.score(X_train, y_train)

In [None]:
print(f"Training score: {rf.score(X_train, y_train)}")
print(f"Testing score: {rf.score(X_test, y_test)}")

In [None]:
print(f"Predictions")
print(f"Kangaroo: {rf.predict(kangaroo)}")
print(f"Bald Eagle: {rf.predict(bald_eagle)}")
print(f"Rattle Snake: {rf.predict(rattle_snake)}")
print(f"Goldfish: {rf.predict(goldfish)}")
print(f"Mountain Chicken Frog: {rf.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {rf.predict(vampire_moth)}")
print(f"Coral: {rf.predict(coral)}")
print(f"Sea Turtle: {rf.predict(sea_turtle)}")

In [None]:
predictions = rf.predict(X_test)
rf_predictions_table = pd.DataFrame({"Prediction": predictions, "Actual": y_test})
rf_predictions_table.head()

In [None]:
importances = rf.feature_importances_
importances

### K Nearest Neighbor

In [None]:
from sklearn.neighbors import KNeighborsClassifier

train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    train_score = knn.score(X_train, y_train)
    test_score = knn.score(X_test, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")

In [None]:
plt.plot(range(1, 20, 2), train_scores, marker='o')
plt.plot(range(1, 20, 2), test_scores, marker="x")
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy Score")
plt.show()

In [None]:
knn = KNeighborsClassifier(n_neighbors=11)
knn.fit(X_train, y_train)
print('k=11 Train Acc: %.3f' % knn.score(X_train, y_train))
print('k=11 Test Acc: %.3f' % knn.score(X_test, y_test))

In [None]:
print(f"Predictions")
print(f"Kangaroo: {knn.predict(kangaroo)}")
print(f"Bald Eagle: {knn.predict(bald_eagle)}")
print(f"Rattle Snake: {knn.predict(rattle_snake)}")
print(f"Goldfish: {knn.predict(goldfish)}")
print(f"Mountain Chicken Frog: {knn.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {knn.predict(vampire_moth)}")
print(f"Coral: {knn.predict(coral)}")
print(f"Sea Turtle: {knn.predict(sea_turtle)}")

In [None]:
predictionsknn = knn.predict(X_test)
knn_predictions_table = pd.DataFrame({"Prediction": predictionsknn, "Actual": y_test})
knn_predictions_table.head()

In [None]:
#Importances
from sklearn.metrics import classification_report
print(classification_report(y_test, predictionsknn))

### Support Vector Machine

In [None]:
from sklearn.svm import SVC
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

print('Train Acc: %.3f' % svm_model.score(X_train, y_train))
print('Test Acc: %.3f' % svm_model.score(X_test, y_test))

In [None]:
predictionssvm = svm_model.predict(X_test)
print(classification_report(y_test, predictionssvm,
                            target_names=["1","2","3","4","5","6","7"]))

In [None]:
#Train the parameters? Do we want to do this?
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(svm_model, param_grid, verbose=3)

grid.fit(X_train, y_train)

In [None]:
print(grid.best_params_)
print(grid.best_score_)


In [None]:
print(f"Predictions")
print(f"Kangaroo: {grid.predict(kangaroo)}")
print(f"Bald Eagle: {grid.predict(bald_eagle)}")
print(f"Rattle Snake: {grid.predict(rattle_snake)}")
print(f"Goldfish: {grid.predict(goldfish)}")
print(f"Mountain Chicken Frog: {grid.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {grid.predict(vampire_moth)}")
print(f"Coral: {grid.predict(coral)}")
print(f"Sea Turtle: {grid.predict(sea_turtle)}")

In [None]:
svm_predictions_table = pd.DataFrame({"Prediction": predictionssvm, "Actual": y_test})
svm_predictions_table.head()