In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as stats
import os

In [None]:
# Get CSV file
zoo = pd.read_csv(os.path.join('..','Resources', 'zoo.csv'))
zoo

In [None]:
# Create dataframe
zoo_df=zoo.set_index('animal_name')
zoo_df

In [None]:
# Data Summary
zoo_df["class"].value_counts()


In [None]:
# Data for predictions
kangaroo = np.array([[1,0,0,1,0,0,0,1,1,1,0,0,2,1,0,1]])
bald_eagle = np.array([[0,1,1,0,1,0,1,0,1,1,0,0,2,1,0,0]])
rattle_snake = np.array([[0,0,1,0,0,0,1,1,1,1,1,0,0,1,0,0,]])
goldfish = np.array([[0,0,1,0,0,1,0,0,1,0,0,1,0,1,1,0]])
mountain_chicken_frog = np.array([[0,0,1,0,0,1,1,0,1,1,0,0,4,0,0,0]])
vampire_moth = np.array([[0,0,1,0,1,0,1,0,0,1,0,0,6,0,0,0]])
coral = np.array([[0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0]])
sea_turtle = np.array([[0,0,1,0,0,1,1,0,1,0,0,1,4,1,0,1]])

In [None]:
# Assign X and y
X = zoo_df.drop(["class_type","class"], axis=1)
y = zoo_df["class_type"]
print (X.shape, y.shape)

In [None]:
# Split data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


#### Run chi square test on training/testing data sets

In [None]:
# Run chi-squared test to look for statistical significance between overall data set, and training and testing data sets
# Get counts of values in training and testing sets
y_train_df = pd.DataFrame(y_train)
y_train_counts_df = pd.DataFrame(y_train_df.value_counts())

y_test_df = pd.DataFrame(y_test)
y_test_counts_df = pd.DataFrame(y_test_df.value_counts())
y_test_counts_df

#Get counts of values for overall zoo dataset
zoo_counts_train_df = pd.DataFrame((zoo_df["class_type"].value_counts()*0.75)).astype(int)
zoo_counts_train_df = zoo_counts_train_df.rename(columns= {"class_type": "count"})
zoo_counts_train_df.index.names = ["class_type"]
zoo_counts_test_df = pd.DataFrame((zoo_df["class_type"].value_counts()*0.25)).astype(int)
zoo_counts_test_df = zoo_counts_test_df.rename(columns= {"class_type": "count"})
zoo_counts_test_df.index.names = ["class_type"]

# Merge counts into training dataframe and testing dataframe
merged_training_df = pd.merge(y_train_counts_df, zoo_counts_train_df, on="class_type")
merged_training_df = merged_training_df.rename(columns= {0:"Actual", "count":"Expected"})

merged_testing_df = pd.merge(y_test_counts_df, zoo_counts_test_df, on="class_type")
merged_testing_df = merged_testing_df.rename(columns= {0:"Actual", "count":"Expected"})

In [None]:
merged_training_df

In [None]:
merged_testing_df

In [None]:
# Run the chi square test on training data
stats.chisquare(merged_training_df['Actual'], merged_training_df['Expected'])

In [None]:
# Run the chi square test on testing data
stats.chisquare(merged_testing_df['Actual'], merged_testing_df['Expected'])

### Feature Selection

Explanation and Code from https://machinelearningmastery.com/feature-selection-with-categorical-data/

#### Chi-Squared

In [None]:
from sklearn.feature_selection import SelectKBest, chi2, mutual_info_classif

# Run chi-squared independence test
fs_chi = SelectKBest(score_func=chi2, k='all')
fit_chi = fs_chi.fit(X_train, y_train)

# Create dataframe with results
chi_scores_df = pd.DataFrame(fit_chi.scores_)
chi_columns_df = pd.DataFrame(X.columns)

features_df = pd.concat([chi_columns_df, chi_scores_df], axis=1)
features_df.columns = ["Feature", "Score"]
features_df = features_df.sort_values(by=['Score'], ascending=False)
features_df

In [None]:
# Find critical value
# With 7 rows, the degree of freedom is 6
# With a p-value of 0.05, the confidence level is 1.00 - 0.05 = 0.95
critical_value = stats.chi2.ppf(q = 0.95, df = 6)
critical_value


#### Mutual Information

In [None]:
# Run mutual information module

MI_fs = SelectKBest(score_func=mutual_info_classif, k='all')
MI_fs_fit = MI_fs.fit(X_train, y_train)
X_train_MI_fs = MI_fs.transform(X_train)
X_test_MI_fs = MI_fs.transform(X_test)

#Create dataframe with results
MI_scores_df = pd.DataFrame(MI_fs_fit.scores_)
MI_columns_df = pd.DataFrame(X.columns)

MI_features_df = pd.concat([MI_columns_df, MI_scores_df], axis=1)
MI_features_df.columns = ["Feature", "Score"]
MI_features_df = MI_features_df.sort_values(by=['Score'], ascending=False)
MI_features_df

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(max_iter=500)
classifier.fit(X_train, y_train)

In [None]:
print(f"Training score: {classifier.score(X_train, y_train)}")
print(f"Testing score: {classifier.score(X_test, y_test)}")

In [None]:
# Print predictions for chosen animals
print(f"Predictions")
print(f"Kangaroo: {classifier.predict(kangaroo)}")
print(f"Bald Eagle: {classifier.predict(bald_eagle)}")
print(f"Rattle Snake: {classifier.predict(rattle_snake)}")
print(f"Goldfish: {classifier.predict(goldfish)}")
print(f"Mountain Chicken Frog: {classifier.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {classifier.predict(vampire_moth)}")
print(f"Coral: {classifier.predict(coral)}")
print(f"Sea Turtle: {classifier.predict(sea_turtle)}")

In [None]:
# Make predictions for test dataset
predictions_logreg = classifier.predict(X_test)
class_predictions_table = pd.DataFrame({"Prediction": predictions_logreg, "Actual": y_test})
class_predictions_table.head()

In [None]:
#Create the scatter plot of actual and predictions
fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)
plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')

plt.scatter(class_predictions_table.index, class_predictions_table.Actual, marker='o', color='b', label='Actual')
plt.scatter(class_predictions_table.index, class_predictions_table.Prediction, marker='x', color='r', label='Prediction')

plt.title('Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/logreg.png', dpi=fig.dpi)
plt.show()


In [None]:
#Plot Residuals
xtest_shaped = predictions_logreg.reshape(-1, 1)
ytest_shaped = y_test.values.reshape(-1,1)

xtrain = classifier.predict(X_train)
xtrain_shaped = xtrain.reshape(-1, 1)
ytrain_shaped = y_train.values.reshape(-1,1)

fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(xtest_shaped, ytest_shaped - xtest_shaped, c="orange", label="Testing Data")
plt.scatter(xtrain_shaped, ytrain_shaped - xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=xtest_shaped.min(), xmax=xtest_shaped.max())
plt.title('Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/residual.png', dpi=fig.dpi)
plt.show()

In [None]:
# Classification Report
from sklearn.metrics import classification_report
print(classification_report(y_test, predictions_logreg))

#### Logistic Regression Scaled

In [None]:
# Min Max Scaler
from sklearn.preprocessing import MinMaxScaler

X_scaler = MinMaxScaler().fit(X_train)

X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Fit model with scaled data
classifier_scaled = LogisticRegression(max_iter=500)
classifier_scaled.fit(X_train_scaled, y_train)

In [None]:
print(f"Training score: {classifier_scaled.score(X_train_scaled, y_train)}")
print(f"Testing score: {classifier_scaled.score(X_test_scaled, y_test)}")

In [None]:
# Print predictions of chosen animals
print(f"Predictions")
print(f"Kangaroo: {classifier_scaled.predict(kangaroo)}")
print(f"Bald Eagle: {classifier_scaled.predict(bald_eagle)}")
print(f"Rattle Snake: {classifier_scaled.predict(rattle_snake)}")
print(f"Goldfish: {classifier_scaled.predict(goldfish)}")
print(f"Mountain Chicken Frog: {classifier_scaled.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {classifier_scaled.predict(vampire_moth)}")
print(f"Coral: {classifier_scaled.predict(coral)}")
print(f"Sea Turtle: {classifier_scaled.predict(sea_turtle)}")

In [None]:
# Print predictions for test data
predictions_logreg_scaled = classifier_scaled.predict(X_test)
class_predictions_table_scaled = pd.DataFrame({"Prediction": predictions_logreg_scaled, "Actual": y_test})
class_predictions_table_scaled.head()

In [None]:
#Create the scatter plot of actual and predictions
fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')
plt.scatter(class_predictions_table_scaled.index, class_predictions_table_scaled.Actual, marker='o', color='b', label='Actual')
plt.scatter(class_predictions_table_scaled.index, class_predictions_table_scaled.Prediction, marker='x', color='r', label='Prediction')
plt.title('Min Max Scaler Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/minmaxlogreg.png', dpi=fig.dpi)
plt.show()

In [None]:
#Plot Residuals
xtest_shaped = predictions_logreg_scaled.reshape(-1, 1)
ytest_shaped = y_test.values.reshape(-1,1)

xtrain = classifier_scaled.predict(X_train)
xtrain_shaped = xtrain.reshape(-1, 1)
ytrain_shaped = y_train.values.reshape(-1,1)

fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(xtest_shaped, ytest_shaped - xtest_shaped, c="orange", label="Testing Data")
plt.scatter(xtrain_shaped, ytrain_shaped - xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=xtest_shaped.min(), xmax=xtest_shaped.max())
plt.title('Min Max Scaler Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/minmaxresidual.png', dpi=fig.dpi)
plt.show()

In [None]:
# Classification Report
print(classification_report(y_test, predictions_logreg_scaled))

### Random Forest 

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=50)
rf = rf.fit(X_train, y_train)
rf.score(X_train, y_train)

In [None]:
print(f"Training score: {rf.score(X_train, y_train)}")
print(f"Testing score: {rf.score(X_test, y_test)}")

In [None]:
# Print predictions for chosen animals
print(f"Predictions")
print(f"Kangaroo: {rf.predict(kangaroo)}")
print(f"Bald Eagle: {rf.predict(bald_eagle)}")
print(f"Rattle Snake: {rf.predict(rattle_snake)}")
print(f"Goldfish: {rf.predict(goldfish)}")
print(f"Mountain Chicken Frog: {rf.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {rf.predict(vampire_moth)}")
print(f"Coral: {rf.predict(coral)}")
print(f"Sea Turtle: {rf.predict(sea_turtle)}")

In [None]:
# Print predictions for test set
predictions_rf = rf.predict(X_test)
rf_predictions_table = pd.DataFrame({"Prediction": predictions_rf, "Actual": y_test})
rf_predictions_table.head()

In [None]:
# Feature importances
importances_rf = rf.feature_importances_
importances_rf

In [None]:
# Classification Report
print(classification_report(y_test, predictions_rf))

In [None]:
#Create the scatter plot of actual and predictions
fig_rf = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)
plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')

plt.scatter(rf_predictions_table.index, rf_predictions_table.Actual, marker='o', color='b', label='Actual')
plt.scatter(rf_predictions_table.index, rf_predictions_table.Prediction, marker='x', color='r', label='Prediction')

plt.title('Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/rf.png', dpi=fig_rf.dpi)
plt.show()


In [None]:
#Plot Residuals
rf_xtest_shaped = predictions_rf.reshape(-1, 1)
rf_ytest_shaped = y_test.values.reshape(-1,1)

rf_xtrain = rf.predict(X_train)
rf_xtrain_shaped = rf_xtrain.reshape(-1, 1)
rf_ytrain_shaped = y_train.values.reshape(-1,1)

rf_res_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(rf_xtest_shaped, rf_ytest_shaped - rf_xtest_shaped, c="orange", label="Testing Data")
plt.scatter(rf_xtrain_shaped, rf_ytrain_shaped - rf_xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=rf_xtest_shaped.min(), xmax=rf_xtest_shaped.max())
plt.title('Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/rf_residual.png', dpi=rf_res_fig.dpi)
plt.show()

#### Random Forest Scaled

In [None]:
rf_scaled = RandomForestClassifier(n_estimators=50)
rf_scaled = rf_scaled.fit(X_train_scaled, y_train)
rf_scaled.score(X_train_scaled, y_train)

In [None]:
print(f"Training score: {rf_scaled.score(X_train_scaled, y_train)}")
print(f"Testing score: {rf_scaled.score(X_test_scaled, y_test)}")

In [None]:
# Print predictions for chosen animals
print(f"Predictions")
print(f"Kangaroo: {rf_scaled.predict(kangaroo)}")
print(f"Bald Eagle: {rf_scaled.predict(bald_eagle)}")
print(f"Rattle Snake: {rf_scaled.predict(rattle_snake)}")
print(f"Goldfish: {rf_scaled.predict(goldfish)}")
print(f"Mountain Chicken Frog: {rf_scaled.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {rf_scaled.predict(vampire_moth)}")
print(f"Coral: {rf_scaled.predict(coral)}")
print(f"Sea Turtle: {rf_scaled.predict(sea_turtle)}")

In [None]:
# Print predictions for test data
predictions_rf_scaled = rf_scaled.predict(X_test)
rf_scaled_predictions_table = pd.DataFrame({"Prediction": predictions_rf_scaled, "Actual": y_test})
rf_scaled_predictions_table.head()

In [None]:
# Classification Report
print(classification_report(y_test, predictions_rf_scaled))

In [None]:
#Create the scatter plot of actual and predictions
rf_scaled_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)
plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')

plt.scatter(rf_scaled_predictions_table.index, rf_scaled_predictions_table.Actual, marker='o', color='b', label='Actual')
plt.scatter(rf_scaled_predictions_table.index, rf_scaled_predictions_table.Prediction, marker='x', color='r', label='Prediction')
plt.title('Min Max Scaler Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/rf_minmax.png', dpi=rf_scaled_fig.dpi)
plt.show()

In [None]:
#Plot Residuals
rf_scaled_xtest_shaped = predictions_rf_scaled.reshape(-1, 1)
rf_scaled_ytest_shaped = y_test.values.reshape(-1,1)

rf_scaled_xtrain = rf_scaled.predict(X_train)
rf_scaled_xtrain_shaped = rf_scaled_xtrain.reshape(-1, 1)
rf_scaled_ytrain_shaped = y_train.values.reshape(-1,1)

rf_scaled_res_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(rf_scaled_xtest_shaped, rf_scaled_ytest_shaped - rf_scaled_xtest_shaped, c="orange", label="Testing Data")
plt.scatter(rf_scaled_xtrain_shaped, rf_scaled_ytrain_shaped - rf_scaled_xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=rf_scaled_xtest_shaped.min(), xmax=rf_scaled_xtest_shaped.max())
plt.title('Min Max Scaler Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/rf_minmax_residual.png', dpi=rf_scaled_res_fig.dpi)
plt.show()

### K Nearest Neighbor

In [None]:
from sklearn.neighbors import KNeighborsClassifier

train_scores = []
test_scores = []
for k in range(1, 20, 2):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    train_score = knn.score(X_train, y_train)
    test_score = knn.score(X_test, y_test)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print(f"k: {k}, Train/Test Score: {train_score:.3f}/{test_score:.3f}")

In [None]:
# Plot results for k
plt.plot(range(1, 20, 2), train_scores, marker='o', color='#0b7090')
plt.plot(range(1, 20, 2), test_scores, marker="x", color='#c89124')
plt.xlabel("k neighbors")
plt.ylabel("Testing accuracy Score")
plt.title("Accuracy Score for Various K Values")
plt.legend(['Training Data', 'Testing Data'])
#plt.savefig('../resources/knn_kscores.png', dpi=fig.dpi)
plt.show()

In [None]:
# Run KNN on k=11
knn = KNeighborsClassifier(n_neighbors=11)
knn.fit(X_train, y_train)
print('k=11 Train Acc: %.3f' % knn.score(X_train, y_train))
print('k=11 Test Acc: %.3f' % knn.score(X_test, y_test))

In [None]:
# Print predictions for chosen animals
print(f"Predictions")
print(f"Kangaroo: {knn.predict(kangaroo)}")
print(f"Bald Eagle: {knn.predict(bald_eagle)}")
print(f"Rattle Snake: {knn.predict(rattle_snake)}")
print(f"Goldfish: {knn.predict(goldfish)}")
print(f"Mountain Chicken Frog: {knn.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {knn.predict(vampire_moth)}")
print(f"Coral: {knn.predict(coral)}")
print(f"Sea Turtle: {knn.predict(sea_turtle)}")

In [None]:
# Print predictions for test set
predictions_knn = knn.predict(X_test)
knn_predictions_table = pd.DataFrame({"Prediction": predictions_knn, "Actual": y_test})
knn_predictions_table.head()

In [None]:
# Classification Report
print(classification_report(y_test, predictions_knn))

In [None]:
#Create the scatter plot of actual and predictions
knn_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)
plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')

plt.scatter(knn_predictions_table.index, knn_predictions_table.Actual, marker='o', color='b', label='Actual')
plt.scatter(knn_predictions_table.index, knn_predictions_table.Prediction, marker='x', color='r', label='Prediction')

plt.title('Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/knn.png', dpi=knn_fig.dpi)
plt.show()


In [None]:
#Plot Residuals
knn_xtest_shaped = predictions_knn.reshape(-1, 1)
knn_ytest_shaped = y_test.values.reshape(-1,1)

knn_xtrain = knn.predict(X_train)
knn_xtrain_shaped = knn_xtrain.reshape(-1, 1)
knn_ytrain_shaped = y_train.values.reshape(-1,1)

knn_res_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(knn_xtest_shaped, knn_ytest_shaped - knn_xtest_shaped, c="orange", label="Testing Data")
plt.scatter(knn_xtrain_shaped, knn_ytrain_shaped - knn_xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=knn_xtest_shaped.min(), xmax=knn_xtest_shaped.max())
plt.title('Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/knn_residual.png', dpi=knn_res_fig.dpi)
plt.show()

#### K Nearest Neighbor Scaled

In [None]:
train_scores_scaled = []
test_scores_scaled = []
for k in range(1, 20, 2):
    knn_scaled = KNeighborsClassifier(n_neighbors=k)
    knn_scaled.fit(X_train_scaled, y_train)
    train_score_scaled = knn_scaled.score(X_train_scaled, y_train)
    test_score_scaled = knn_scaled.score(X_test_scaled, y_test)
    train_scores_scaled.append(train_score_scaled)
    test_scores_scaled.append(test_score_scaled)
    print(f"k: {k}, Train/Test Score: {train_score_scaled:.3f}/{test_score_scaled:.3f}")

In [None]:
knn_scaled = KNeighborsClassifier(n_neighbors=11)
knn_scaled.fit(X_train_scaled, y_train)
print('k=11 Train Acc: %.3f' % knn_scaled.score(X_train_scaled, y_train))
print('k=11 Test Acc: %.3f' % knn_scaled.score(X_test_scaled, y_test))

In [None]:
# Print predictions for chosen animals
print(f"Predictions")
print(f"Kangaroo: {knn_scaled.predict(kangaroo)}")
print(f"Bald Eagle: {knn_scaled.predict(bald_eagle)}")
print(f"Rattle Snake: {knn_scaled.predict(rattle_snake)}")
print(f"Goldfish: {knn_scaled.predict(goldfish)}")
print(f"Mountain Chicken Frog: {knn_scaled.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {knn_scaled.predict(vampire_moth)}")
print(f"Coral: {knn_scaled.predict(coral)}")
print(f"Sea Turtle: {knn_scaled.predict(sea_turtle)}")

In [None]:
# Classification Report
predictions_knn_scaled = knn_scaled.predict(X_test_scaled)
print(classification_report(y_test, predictions_knn_scaled))

In [None]:
# Print predictions for test set
predictions_knn_scaled = knn_scaled.predict(X_test)
knn_scaled_predictions_table = pd.DataFrame({"Prediction": predictions_knn_scaled, "Actual": y_test})
knn_scaled_predictions_table.head()

In [None]:
#Create the scatter plot of actual and predictions
knn_scaled_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')
plt.scatter(knn_scaled_predictions_table.index, knn_scaled_predictions_table.Actual, marker='o', color='b', label='Actual')
plt.scatter(knn_scaled_predictions_table.index, knn_scaled_predictions_table.Prediction, marker='x', color='r', label='Prediction')
plt.title('Min Max Scaler Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/knn_scaled.png', dpi=knn_scaled_fig.dpi)
plt.show()

In [None]:
#Plot Residuals
knn_scaled_xtest_shaped = predictions_knn_scaled.reshape(-1, 1)
knn_scaled_ytest_shaped = y_test.values.reshape(-1,1)

knn_scaled_xtrain = knn_scaled.predict(X_train)
knn_scaled_xtrain_shaped = knn_scaled_xtrain.reshape(-1, 1)
knn_scaled_ytrain_shaped = y_train.values.reshape(-1,1)

knn_scaled_res_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(knn_scaled_xtest_shaped, knn_scaled_ytest_shaped - knn_scaled_xtest_shaped, c="orange", label="Testing Data")
plt.scatter(knn_scaled_xtrain_shaped, knn_scaled_ytrain_shaped - knn_scaled_xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=knn_scaled_xtest_shaped.min(), xmax=knn_scaled_xtest_shaped.max())
plt.title('Min Max Scaler Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/knn_scaled_residual.png', dpi=knn_scaled_res_fig.dpi)
plt.show()

### Support Vector Machine

In [None]:
from sklearn.svm import SVC
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

print('Train Acc: %.3f' % svm_model.score(X_train, y_train))
print('Test Acc: %.3f' % svm_model.score(X_test, y_test))

In [None]:
predictions_svm = svm_model.predict(X_test)
print(classification_report(y_test, predictions_svm,
                            target_names=["1","2","3","4","5","6","7"]))

In [None]:
#Train the parameters
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid_svm = GridSearchCV(svm_model, param_grid, verbose=3)

grid_svm.fit(X_train, y_train)

In [None]:
print(grid_svm.best_params_)
print(grid_svm.best_score_)


In [None]:
# Print predictions for chosen animals
print(f"Predictions")
print(f"Kangaroo: {svm_model.predict(kangaroo)}")
print(f"Bald Eagle: {svm_model.predict(bald_eagle)}")
print(f"Rattle Snake: {svm_model.predict(rattle_snake)}")
print(f"Goldfish: {svm_model.predict(goldfish)}")
print(f"Mountain Chicken Frog: {svm_model.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {svm_model.predict(vampire_moth)}")
print(f"Coral: {svm_model.predict(coral)}")
print(f"Sea Turtle: {svm_model.predict(sea_turtle)}")

In [None]:
# Print predictions for test set
svm_predictions_table = pd.DataFrame({"Prediction": predictions_svm, "Actual": y_test})
svm_predictions_table.head()

In [None]:
#Create the scatter plot of actual and predictions
svm_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)
plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')

plt.scatter(svm_predictions_table.index, svm_predictions_table.Actual, marker='o', color='b', label='Actual')
plt.scatter(svm_predictions_table.index, svm_predictions_table.Prediction, marker='x', color='r', label='Prediction')

plt.title('Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/svm.png', dpi=svm_fig.dpi)
plt.show()

In [None]:
#Plot Residuals
svm_xtest_shaped = predictions_svm.reshape(-1, 1)
svm_ytest_shaped = y_test.values.reshape(-1,1)

svm_xtrain = svm_model.predict(X_train)
svm_xtrain_shaped = svm_xtrain.reshape(-1, 1)
svm_ytrain_shaped = y_train.values.reshape(-1,1)

svm_res_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(svm_xtest_shaped, svm_ytest_shaped - svm_xtest_shaped, c="orange", label="Testing Data")
plt.scatter(svm_xtrain_shaped, svm_ytrain_shaped - svm_xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=svm_xtest_shaped.min(), xmax=svm_xtest_shaped.max())
plt.title('Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/svm_res.png', dpi=svm_res_fig.dpi)
plt.show()

#### Support Vector Machine Scaled

In [None]:
svm_model_scaled = SVC(kernel='linear')
svm_model_scaled.fit(X_train_scaled, y_train)

print('Train Acc: %.3f' % svm_model_scaled.score(X_train_scaled, y_train))
print('Test Acc: %.3f' % svm_model_scaled.score(X_test_scaled, y_test))

In [None]:
predictions_svm_scaled = svm_model_scaled.predict(X_test_scaled)
print(classification_report(y_test, predictions_svm_scaled,
                            target_names=["1","2","3","4","5","6","7"]))

In [None]:
#Train the parameters
param_grid_scaled = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid_svm_scaled = GridSearchCV(svm_model, param_grid, verbose=3)

grid_svm_scaled.fit(X_train_scaled, y_train)

In [None]:
print(grid_svm_scaled.best_params_)
print(grid_svm_scaled.best_score_)

In [None]:
# Print predictions for chosen animals
print(f"Predictions")
print(f"Kangaroo: {svm_model_scaled.predict(kangaroo)}")
print(f"Bald Eagle: {svm_model_scaled.predict(bald_eagle)}")
print(f"Rattle Snake: {svm_model_scaled.predict(rattle_snake)}")
print(f"Goldfish: {svm_model_scaled.predict(goldfish)}")
print(f"Mountain Chicken Frog: {svm_model_scaled.predict(mountain_chicken_frog)}")
print(f"Vampire Moth: {svm_model_scaled.predict(vampire_moth)}")
print(f"Coral: {svm_model_scaled.predict(coral)}")
print(f"Sea Turtle: {svm_model_scaled.predict(sea_turtle)}")

In [None]:
# Print predictions for test set
svm_scaled_predictions_table = pd.DataFrame({"Prediction": predictions_svm_scaled, "Actual": y_test})
svm_scaled_predictions_table.head()

In [None]:
#Create the scatter plot of actual and predictions
svm_scaled_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.ylim(0, 8)
plt.xticks(rotation=90, ha='right')
plt.scatter(svm_scaled_predictions_table.index, svm_scaled_predictions_table.Actual, marker='o', color='b', label='Actual')
plt.scatter(svm_scaled_predictions_table.index, svm_scaled_predictions_table.Prediction, marker='x', color='r', label='Prediction')
plt.title('Min Max Scaler Predicting Animal Class',color='k', size=14, weight='bold')
plt.xlabel("Animal")
plt.ylabel('Class')
plt.legend(loc="best")
plt.grid(alpha=0.5)

# plt.savefig('../resources/svm_scaled.png', dpi=svm_scaled_fig.dpi)
plt.show()

In [None]:
#Plot Residuals
svm_scaled_xtest_shaped = predictions_svm_scaled.reshape(-1, 1)
svm_scaled_ytest_shaped = y_test.values.reshape(-1,1)

svm_scaled_xtrain = svm_model_scaled.predict(X_train)
svm_scaled_xtrain_shaped = svm_scaled_xtrain.reshape(-1, 1)
svm_scaled_ytrain_shaped = y_train.values.reshape(-1,1)

svm_scaled_res_fig = plt.figure(figsize=(20,8))
plt.subplot(2,1,1)
plt.tight_layout()
plt.subplots_adjust(top=0.90)

plt.scatter(svm_scaled_xtest_shaped, svm_scaled_ytest_shaped - svm_scaled_xtest_shaped, c="orange", label="Testing Data")
plt.scatter(svm_scaled_xtrain_shaped, svm_scaled_ytrain_shaped - svm_scaled_xtrain_shaped, c="blue", label="Training Data")
plt.legend()
plt.hlines(y=0, xmin=svm_scaled_xtest_shaped.min(), xmax=svm_scaled_xtest_shaped.max())
plt.title('Min Max Scaler Residual Plot',color='k', size=14, weight='bold')

# plt.savefig('../resources/svm_scaled_res.png', dpi=svm_scaled_res_fig.dpi)
plt.show()