In [1]:
# Initial Data Preprocessing and Importing #####################################################################################
#My first step in preprocessing was picking out what columns were important to include before
#even downloading the dataset, and pulling only those features.
#I did however leave some columns, such as links, for self reference, but will remove them here.

# Imports
import numpy as np
import scipy as sp
import pandas as pd
from IPython.display import display, HTML

df=pd.read_csv('observations.csv')
print(df.shape)
cols=df.columns


# print out and display dataframe as tables in HTML
display(HTML(df.head(5).to_html()))

# remove unnecessary columns
df=df.drop('id',1)
# Url's are only for post reference, and not needed in data.
df=df.drop('url',1)
df=df.drop('image_url',1)

# after reading and searching through the data, these columns were also found unecessary:
# Captive Cultivated: No rows were TRUE. Every value is FALSE, so this data is not useful.
df=df.drop('captive_cultivated',1)
# Geospatial data isn't too important, as the only positional data I would use anyway would
#be latitude and longitude, and not map data, so I don't need positional_accuracy.
df=df.drop('positional_accuracy',1)
# observed_on_string is just a different format of observed_on with extra data, so I'm removing it.
df=df.drop('observed_on_string',1)
# I'm replacing the time column with ONLY times, and filling missing values with averages.
#I also filled missing values with fillna method ffill, which just pulls the most recent time
#value above it for itself. After consideration, to make it a float value, I'm changing it to be 
#only the hour.
df['time_observed_at'] = df['time_observed_at'].replace(to_replace='[0-9]{4}-[0-9]{2}-[0-9]{2}', value='', regex=True)
df['time_observed_at'] = df['time_observed_at'].replace(to_replace='\+[0-9]{4}', value='', regex=True)
df['time_observed_at'] = df['time_observed_at'].replace(to_replace=':[0-9]{2}', value='', regex=True)
df['time_observed_at'].fillna(method='ffill', inplace=True)
df['time_observed_at'] = df['time_observed_at'].astype(float)
# Time values aren't that important directly on my classification, so I'm also using ffill for 
#missing values in time_zone
df['time_zone'].fillna(method='ffill', inplace=True)
# Replacing missing values in place_guess to none
df['place_guess'].fillna(value='None', inplace=True)
# Replacing missing values in species_guess to none, since no exact species was identified.
df['species_guess'].fillna(value='None', inplace=True)


# The last change I need to make in order to use observed_on as my label for months, is to
#remove anything but the month, as we can still tell the month just with one digit.
df['observed_on'] = df['observed_on'].replace(to_replace='/.*', value='', regex=True)
df['observed_on'] = df['observed_on'].astype(float)


# After trying to figure out how to get this into my first model, I realized that if I'm trying
#to determine what month a species is most likely to be found during, the only thing I need is my 
#label, which would be common name or scientific name, (not taxon name because it matches 
#scientific and common name anyway) and observed_on since it's the month. Because of this, I'm
#removing a few more columns so I can run this data. Latitude and Longitude is also unique, so
#it's not going to help here I believe.
df=df.drop('time_zone',1)
df=df.drop('place_guess',1)
df=df.drop('iconic_taxon_name',1)
df=df.drop('latitude',1)
df=df.drop('longitude',1)
df=df.drop('species_guess',1)
df=df.drop('scientific_name',1)


# print out and display dataframe as tables in HTML after removing columns
display(HTML(df.head(5).to_html()))
# Redefine columns
cols=df.columns
# Checking Datatypes of columns and for missing values
print('ColumnName, DataType, MissingValues')
for i in cols:
    print(i, ',', df[i].dtype,',',df[i].isnull().any())
    

(71717, 16)


Unnamed: 0,id,observed_on_string,observed_on,time_observed_at,time_zone,url,image_url,captive_cultivated,place_guess,latitude,longitude,positional_accuracy,species_guess,scientific_name,common_name,iconic_taxon_name
0,1292466,3/14/2015 14:35,3/14/2015,2015-03-14 20:35:00 +0100,Central Time (US & Canada),http://www.inaturalist.org/observations/1292466,https://inaturalist-open-data.s3.amazonaws.com/photos/1621232/medium.JPG,False,Salt Creek Woods,41.827903,-87.884413,49.0,Northern Leopard Frog,Lithobates pipiens,Northern Leopard Frog,Amphibia
1,1297303,3/16/2015 12:26,3/16/2015,2015-03-16 18:26:00 +0100,Central Time (US & Canada),http://www.inaturalist.org/observations/1297303,https://inaturalist-open-data.s3.amazonaws.com/photos/1627861/medium.JPG,False,Warrenville Grove FP,41.82186,-88.172686,31.0,Common Snapping Turtle,Chelydra serpentina,Common Snapping Turtle,Reptilia
2,1297483,3/11/2015,3/11/2015,,Central Time (US & Canada),http://www.inaturalist.org/observations/1297483,http://static.inaturalist.org/photos/1627972/medium.JPG,False,"Marshall Road, Illinois, U.S.A.",37.320585,-88.914714,105.0,Common Snapping Turtle,Chelydra serpentina,Common Snapping Turtle,Reptilia
3,1299187,3/13/2015,3/13/2015,,Central Time (US & Canada),http://www.inaturalist.org/observations/1299187,http://static.inaturalist.org/photos/1630596/medium.JPG,False,"Promised Land Road, Pulaski county, Illinois, U.S.A.",37.119606,-89.304643,1161.0,Green Tree Frog,Hyla cinerea,Green Treefrog,Amphibia
4,1299190,3/13/2015,3/13/2015,,Central Time (US & Canada),http://www.inaturalist.org/observations/1299190,http://static.inaturalist.org/photos/1630599/medium.JPG,False,"Promised Land Road, Pulaski county, Illinois, U.S.A.",37.119606,-89.304643,1161.0,cope's gray treefrog,Hyla chrysoscelis,Cope's Gray Treefrog,Amphibia


Unnamed: 0,observed_on,time_observed_at,common_name
0,3.0,20.0,Northern Leopard Frog
1,3.0,18.0,Common Snapping Turtle
2,3.0,18.0,Common Snapping Turtle
3,3.0,18.0,Green Treefrog
4,3.0,18.0,Cope's Gray Treefrog


ColumnName, DataType, MissingValues
observed_on , float64 , False
time_observed_at , float64 , False
common_name , object , False


In [12]:
# KNN Model ####################################################################################################################

# Data preprocessing ################################################################################
# convert all nominal variables to binary variables
df_knn=df.copy(deep=True) 
# create new binary columns
df_dummies=pd.get_dummies(df_knn['common_name'])
# add them to dataframe
df_knn=df_knn.join(df_dummies)
# drop original columns
df_knn=df_knn.drop('common_name',axis=1)
display('Data Example:',HTML(df_knn.head(3).to_html()))

# Normalized all numerical features
# min-max normalization to scale [0, 1]
for col in df_knn.columns:
    if col != 'observed_on':
        # exclude GradeLetter, since it is label in our data
        df_knn[col]=(df_knn[col]-df_knn[col].min())/(df_knn[col].max()-df_knn[col].min())

display(HTML(df_knn.head(3).to_html()))

# Build KNN models and evaluate the models ############################################################
# Note: for demo and teaching purpose, we present evaluations based on both hold-out and N-fold cross validations

# By hold-out evaluations ###############################################
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns

# preprocess label, since KNN requires label encoding
from sklearn import preprocessing


y = df_knn['observed_on'] # define label as nominal values
le = preprocessing.LabelEncoder()
le.fit(y)
y_encoded = le.transform(y) # encode nominal labels to integers #####################################

print(y_encoded)

df_knn['observed_on'] = y_encoded
df_knn_x = df_knn.drop('observed_on',1)

display(HTML(df_knn.head(10).to_html()))


# KNN format splits here, can use for other models that use KNN
x_train, x_test, y_train, y_test = train_test_split(df_knn_x, y_encoded, 
                                                    test_size=0.2)

# build and eval models
from sklearn import neighbors
from sklearn.metrics import accuracy_score
# API， https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_score.html
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

# Hold-out evaluation ###############################################
for k in range(1, 15, 2): 
    clf=neighbors.KNeighborsClassifier(k, weights='uniform')
    clf.fit(x_train, y_train)
    y_pred = clf.predict(x_test)
    print('K =', k, ', Accuracy: ', accuracy_score(y_test, y_pred), ', Precision: ', precision_score(y_test, y_pred, average='micro'),
         ', Recall: ', recall_score(y_test, y_pred, average='micro'))

    
# Visualize the best model on the test set
clf=neighbors.KNeighborsClassifier(1, weights='uniform')
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)


# N-fold cross evaluation ###########################################
from sklearn.model_selection import cross_val_score

for k in range(1, 15, 2): 
    clf=neighbors.KNeighborsClassifier(k, weights='uniform')
    acc=cross_val_score(clf, df_knn_x, y, cv=5, scoring='accuracy').mean()
    print("KNN Accuracy by N-fold Cross Validation:",acc)

'Data Example:'

Unnamed: 0,observed_on,time_observed_at,American Bullfrog,American Softshells,American Toad,American Water Frogs,Bird-voiced Treefrog,Black Kingsnake,Blanchard's Cricket Frog,Blanding's Turtle,Blue Racer,Blue-spotted Salamander,Boreal Chorus Frog,Broad-headed Skink,Brook Salamanders,Brown Anole,Bullsnake,Butler's Garter Snake,Cave Salamander,Central Newt,Chicago Gartersnake,Chorus Frogs,Common Box Turtle,Common Five-lined Skink,Common Garter Snake,Common Mudpuppy,Common Slider,Common Snapping Turtle,Common Watersnake,Cope's Gray Treefrog,Crawfish Frog,Cuban Tree Frog,Deirochelyine Turtles,Dekay's Brownsnake,Diamondback Watersnake,Dwarf American Toad,Eastern American Toad,Eastern Box Turtle,Eastern Copperhead,Eastern Fence Lizard,Eastern Foxsnake,Eastern Garter Snake,Eastern Hognose Snake,Eastern Lesser Siren,Eastern Long-tailed Salamander,Eastern Milksnake,Eastern Mud Turtle,Eastern Musk Turtle,Eastern Narrow-mouthed Toad,Eastern Newt,Eastern Red-backed Salamander,Eastern Ribbon Snake,Eastern River Cooter,Eastern Spadefoot,Eastern Worm Snake,Eastern/Gray Ratsnake Complex,False Map Turtle,Flat-headed Snake,Four-toed Salamander,Fowler's Toad,Garter Snakes,Gopher Snake,Graham's Crayfish Snake,Gray Ratsnake,Gray Treefrog,Gray Treefrog Complex,Green Anole,Green Frog,Green Treefrog,Greenhouse Frog,Holarctic Treefrogs,Illinois Chorus Frog,Jefferson Salamander,Kirtland's Snake,Lesser Siren,Little Brown Skink,Long-tailed Salamander,Map Turtles,Marbled Salamander,Massasauga,Mediterranean Gecko,Midland Painted Turtle,Midland Smooth Softshell Turtle,Midland Watersnake,Midwestern Wormsnake,Mink Frog,Mississippi Green Watersnake,Mississippi Map Turtle,Mississippi Ringneck Snake,Mole Salamander,Mole Salamanders,Mudsnake,North American Racer,North American Ratsnakes,North American Toads,Northern Black Racer,Northern Cottonmouth,Northern Crawfish Frog,Northern Dusky Salamander,Northern False Map Turtle,Northern Leopard Frog,Northern Map Turtle,Northern Prairie Skink,Northern Ravine Salamander,Northern Redbelly Snake,Northern Ribbon Snake,Northern Ringneck Snake,Northern Rough Greensnake,Northern Slimy Salamander,Northern Spiny Softshell Turtle,Northern Watersnake,Northern Zigzag Salamander,Orange-striped Ribbonsnake,Ornate Box Turtle,Ouachita Map Turtle,Painted Turtle,Pickerel Frog,Plain-bellied Watersnake,Plains Box Turtle,Plains Garter Snake,Plains Hognose Snake,Plains Leopard Frog,Prairie Kingsnake,Prairie Racerunner,Prairie Ringneck Snake,Prairie Skink,Queensnake,Red-bellied Snake,Red-eared Slider,Red-spotted Newt,Ribbon Snake,River Cooter,Rough Greensnake,Six-lined Racerunner,Slender Glass Lizard,Small-mouthed Salamander,Smooth Earthsnake,Smooth Greensnake,Smooth Softshell Turtle,Southern Black Racer,Southern Leopard Frog,Southern Painted Turtle,Southern Red-backed Salamander,Southern Two-lined Salamander,Spiny Softshell Turtle,Spotted Dusky Salamander,Spotted Salamander,Spotted Turtle,Spring Peeper,Squirrel Tree Frog,Streamside Salamander,Three-toed Box Turtle,Tiger Salamander,Timber Rattlesnake,Toothy Skinks,Tropical House Gecko,Unisexual Mole Salamander,Watersnakes,Western Chorus Frog,Western Lesser Siren,Western Mudsnake,Western Painted Turtle,Western Ratsnake,Western Ribbon Snake,Western Slender Glass Lizard,Western Smooth Earthsnake,Wood Frog,Wood Turtle,Woodland Salamanders,Yellow-bellied Slider,ring-necked snake
0,3.0,20.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,3.0,18.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,3.0,18.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,observed_on,time_observed_at,American Bullfrog,American Softshells,American Toad,American Water Frogs,Bird-voiced Treefrog,Black Kingsnake,Blanchard's Cricket Frog,Blanding's Turtle,Blue Racer,Blue-spotted Salamander,Boreal Chorus Frog,Broad-headed Skink,Brook Salamanders,Brown Anole,Bullsnake,Butler's Garter Snake,Cave Salamander,Central Newt,Chicago Gartersnake,Chorus Frogs,Common Box Turtle,Common Five-lined Skink,Common Garter Snake,Common Mudpuppy,Common Slider,Common Snapping Turtle,Common Watersnake,Cope's Gray Treefrog,Crawfish Frog,Cuban Tree Frog,Deirochelyine Turtles,Dekay's Brownsnake,Diamondback Watersnake,Dwarf American Toad,Eastern American Toad,Eastern Box Turtle,Eastern Copperhead,Eastern Fence Lizard,Eastern Foxsnake,Eastern Garter Snake,Eastern Hognose Snake,Eastern Lesser Siren,Eastern Long-tailed Salamander,Eastern Milksnake,Eastern Mud Turtle,Eastern Musk Turtle,Eastern Narrow-mouthed Toad,Eastern Newt,Eastern Red-backed Salamander,Eastern Ribbon Snake,Eastern River Cooter,Eastern Spadefoot,Eastern Worm Snake,Eastern/Gray Ratsnake Complex,False Map Turtle,Flat-headed Snake,Four-toed Salamander,Fowler's Toad,Garter Snakes,Gopher Snake,Graham's Crayfish Snake,Gray Ratsnake,Gray Treefrog,Gray Treefrog Complex,Green Anole,Green Frog,Green Treefrog,Greenhouse Frog,Holarctic Treefrogs,Illinois Chorus Frog,Jefferson Salamander,Kirtland's Snake,Lesser Siren,Little Brown Skink,Long-tailed Salamander,Map Turtles,Marbled Salamander,Massasauga,Mediterranean Gecko,Midland Painted Turtle,Midland Smooth Softshell Turtle,Midland Watersnake,Midwestern Wormsnake,Mink Frog,Mississippi Green Watersnake,Mississippi Map Turtle,Mississippi Ringneck Snake,Mole Salamander,Mole Salamanders,Mudsnake,North American Racer,North American Ratsnakes,North American Toads,Northern Black Racer,Northern Cottonmouth,Northern Crawfish Frog,Northern Dusky Salamander,Northern False Map Turtle,Northern Leopard Frog,Northern Map Turtle,Northern Prairie Skink,Northern Ravine Salamander,Northern Redbelly Snake,Northern Ribbon Snake,Northern Ringneck Snake,Northern Rough Greensnake,Northern Slimy Salamander,Northern Spiny Softshell Turtle,Northern Watersnake,Northern Zigzag Salamander,Orange-striped Ribbonsnake,Ornate Box Turtle,Ouachita Map Turtle,Painted Turtle,Pickerel Frog,Plain-bellied Watersnake,Plains Box Turtle,Plains Garter Snake,Plains Hognose Snake,Plains Leopard Frog,Prairie Kingsnake,Prairie Racerunner,Prairie Ringneck Snake,Prairie Skink,Queensnake,Red-bellied Snake,Red-eared Slider,Red-spotted Newt,Ribbon Snake,River Cooter,Rough Greensnake,Six-lined Racerunner,Slender Glass Lizard,Small-mouthed Salamander,Smooth Earthsnake,Smooth Greensnake,Smooth Softshell Turtle,Southern Black Racer,Southern Leopard Frog,Southern Painted Turtle,Southern Red-backed Salamander,Southern Two-lined Salamander,Spiny Softshell Turtle,Spotted Dusky Salamander,Spotted Salamander,Spotted Turtle,Spring Peeper,Squirrel Tree Frog,Streamside Salamander,Three-toed Box Turtle,Tiger Salamander,Timber Rattlesnake,Toothy Skinks,Tropical House Gecko,Unisexual Mole Salamander,Watersnakes,Western Chorus Frog,Western Lesser Siren,Western Mudsnake,Western Painted Turtle,Western Ratsnake,Western Ribbon Snake,Western Slender Glass Lizard,Western Smooth Earthsnake,Wood Frog,Wood Turtle,Woodland Salamanders,Yellow-bellied Slider,ring-necked snake
0,3.0,0.869565,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,3.0,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,3.0,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


[2 2 2 ... 2 9 6]


Unnamed: 0,observed_on,time_observed_at,American Bullfrog,American Softshells,American Toad,American Water Frogs,Bird-voiced Treefrog,Black Kingsnake,Blanchard's Cricket Frog,Blanding's Turtle,Blue Racer,Blue-spotted Salamander,Boreal Chorus Frog,Broad-headed Skink,Brook Salamanders,Brown Anole,Bullsnake,Butler's Garter Snake,Cave Salamander,Central Newt,Chicago Gartersnake,Chorus Frogs,Common Box Turtle,Common Five-lined Skink,Common Garter Snake,Common Mudpuppy,Common Slider,Common Snapping Turtle,Common Watersnake,Cope's Gray Treefrog,Crawfish Frog,Cuban Tree Frog,Deirochelyine Turtles,Dekay's Brownsnake,Diamondback Watersnake,Dwarf American Toad,Eastern American Toad,Eastern Box Turtle,Eastern Copperhead,Eastern Fence Lizard,Eastern Foxsnake,Eastern Garter Snake,Eastern Hognose Snake,Eastern Lesser Siren,Eastern Long-tailed Salamander,Eastern Milksnake,Eastern Mud Turtle,Eastern Musk Turtle,Eastern Narrow-mouthed Toad,Eastern Newt,Eastern Red-backed Salamander,Eastern Ribbon Snake,Eastern River Cooter,Eastern Spadefoot,Eastern Worm Snake,Eastern/Gray Ratsnake Complex,False Map Turtle,Flat-headed Snake,Four-toed Salamander,Fowler's Toad,Garter Snakes,Gopher Snake,Graham's Crayfish Snake,Gray Ratsnake,Gray Treefrog,Gray Treefrog Complex,Green Anole,Green Frog,Green Treefrog,Greenhouse Frog,Holarctic Treefrogs,Illinois Chorus Frog,Jefferson Salamander,Kirtland's Snake,Lesser Siren,Little Brown Skink,Long-tailed Salamander,Map Turtles,Marbled Salamander,Massasauga,Mediterranean Gecko,Midland Painted Turtle,Midland Smooth Softshell Turtle,Midland Watersnake,Midwestern Wormsnake,Mink Frog,Mississippi Green Watersnake,Mississippi Map Turtle,Mississippi Ringneck Snake,Mole Salamander,Mole Salamanders,Mudsnake,North American Racer,North American Ratsnakes,North American Toads,Northern Black Racer,Northern Cottonmouth,Northern Crawfish Frog,Northern Dusky Salamander,Northern False Map Turtle,Northern Leopard Frog,Northern Map Turtle,Northern Prairie Skink,Northern Ravine Salamander,Northern Redbelly Snake,Northern Ribbon Snake,Northern Ringneck Snake,Northern Rough Greensnake,Northern Slimy Salamander,Northern Spiny Softshell Turtle,Northern Watersnake,Northern Zigzag Salamander,Orange-striped Ribbonsnake,Ornate Box Turtle,Ouachita Map Turtle,Painted Turtle,Pickerel Frog,Plain-bellied Watersnake,Plains Box Turtle,Plains Garter Snake,Plains Hognose Snake,Plains Leopard Frog,Prairie Kingsnake,Prairie Racerunner,Prairie Ringneck Snake,Prairie Skink,Queensnake,Red-bellied Snake,Red-eared Slider,Red-spotted Newt,Ribbon Snake,River Cooter,Rough Greensnake,Six-lined Racerunner,Slender Glass Lizard,Small-mouthed Salamander,Smooth Earthsnake,Smooth Greensnake,Smooth Softshell Turtle,Southern Black Racer,Southern Leopard Frog,Southern Painted Turtle,Southern Red-backed Salamander,Southern Two-lined Salamander,Spiny Softshell Turtle,Spotted Dusky Salamander,Spotted Salamander,Spotted Turtle,Spring Peeper,Squirrel Tree Frog,Streamside Salamander,Three-toed Box Turtle,Tiger Salamander,Timber Rattlesnake,Toothy Skinks,Tropical House Gecko,Unisexual Mole Salamander,Watersnakes,Western Chorus Frog,Western Lesser Siren,Western Mudsnake,Western Painted Turtle,Western Ratsnake,Western Ribbon Snake,Western Slender Glass Lizard,Western Smooth Earthsnake,Wood Frog,Wood Turtle,Woodland Salamanders,Yellow-bellied Slider,ring-necked snake
0,2,0.869565,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2,0.782609,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


K = 1 , Accuracy:  0.19046291132180704 , Precision:  0.19046291132180704 , Recall:  0.19046291132180704
K = 3 , Accuracy:  0.19590072504182934 , Precision:  0.19590072504182934 , Recall:  0.19590072504182934
K = 5 , Accuracy:  0.20705521472392638 , Precision:  0.20705521472392638 , Recall:  0.20705521472392638
K = 7 , Accuracy:  0.21597880646960402 , Precision:  0.21597880646960402 , Recall:  0.21597880646960402
K = 9 , Accuracy:  0.22497211377579476 , Precision:  0.22497211377579476 , Recall:  0.22497211377579476
K = 11 , Accuracy:  0.2308979364194088 , Precision:  0.2308979364194088 , Recall:  0.2308979364194088
K = 13 , Accuracy:  0.23257110987172336 , Precision:  0.23257110987172336 , Recall:  0.23257110987172336
Naive Bayes Accuracy by N-fold Cross Validation: 0.16011550570111233
Naive Bayes Accuracy by N-fold Cross Validation: 0.17046170317150927
Naive Bayes Accuracy by N-fold Cross Validation: 0.17608082789908727
Naive Bayes Accuracy by N-fold Cross Validation: 0.183331548521660

In [9]:
# Naive Bayes Model ############################################################################################################
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_validate

df_nb=df.copy(deep=True)


print('Column data types:\n',df_nb.dtypes)
# convert numerical to categorical data #################################
df_nb['common_name'] = df_nb['common_name'].astype(str)
df_nb['time_observed_at'] = pd.cut(df_nb['time_observed_at'],3)


display('Data Example',HTML(df_nb.head(5).to_html()))

y=df_nb['observed_on']
le = preprocessing.LabelEncoder()
le.fit(y)
y_encoded = le.transform(y) # encode nominal labels to integers #####################################


print('Column data types:\n',df_nb.dtypes)

df_nb=pd.get_dummies(df_nb.drop('observed_on',axis=1))
df_nb['observed_on']=y_encoded
display(HTML(df_nb.head(5).to_html()))

nb_x_train, nb_x_test, nb_y_train, nb_y_test = train_test_split(df_nb, y_encoded, test_size=0.2)

# Hold-out evaluation
clf = GaussianNB()
clf.fit(nb_x_train, nb_y_train)
y_pred=clf.predict(nb_x_test)
# in the following coding example, we use accuracy only as the example
print("Naive Bayes Accuracy by Hold-out Eval:",accuracy_score(y_pred,nb_y_test))
print("Naive Bayes Precision by Hold-out Eval:",precision_score(y_pred,nb_y_test, average='micro'))
print("Naive Bayes Recall by Hold-out Eval:",accuracy_score(y_pred,nb_y_test, average='micro'))

accuracy_score(y_test, y_pred), ', Precision: ', precision_score(y_test, y_pred, average='micro'),
         ', Recall: ', recall_score(y_test, y_pred, average='micro')
# N-fold evaluation ###########################################################
y=df_nb['observed_on']
x=df_nb.drop('observed_on',axis=1)
clf = GaussianNB()
acc=cross_val_score(clf, x, y, cv=5, scoring='accuracy').mean()
print("Naive Bayes Accuracy by N-fold Cross Validation:",acc)
# I couldn't tell you why, but after trying many times I just keep getting this error
#when attempting to run the same cross_val_score for precision or recall:
# Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
#I tried multiple solutions, but none worked. I don't know how to include these.

Column data types:
 observed_on         float64
time_observed_at    float64
common_name          object
dtype: object


'Data Example'

Unnamed: 0,observed_on,time_observed_at,common_name
0,3.0,"(15.333, 23.0]",Northern Leopard Frog
1,3.0,"(15.333, 23.0]",Common Snapping Turtle
2,3.0,"(15.333, 23.0]",Common Snapping Turtle
3,3.0,"(15.333, 23.0]",Green Treefrog
4,3.0,"(15.333, 23.0]",Cope's Gray Treefrog


Column data types:
 observed_on          float64
time_observed_at    category
common_name           object
dtype: object


Unnamed: 0,"time_observed_at_(-0.023, 7.667]","time_observed_at_(7.667, 15.333]","time_observed_at_(15.333, 23.0]",common_name_American Bullfrog,common_name_American Softshells,common_name_American Toad,common_name_American Water Frogs,common_name_Bird-voiced Treefrog,common_name_Black Kingsnake,common_name_Blanchard's Cricket Frog,common_name_Blanding's Turtle,common_name_Blue Racer,common_name_Blue-spotted Salamander,common_name_Boreal Chorus Frog,common_name_Broad-headed Skink,common_name_Brook Salamanders,common_name_Brown Anole,common_name_Bullsnake,common_name_Butler's Garter Snake,common_name_Cave Salamander,common_name_Central Newt,common_name_Chicago Gartersnake,common_name_Chorus Frogs,common_name_Common Box Turtle,common_name_Common Five-lined Skink,common_name_Common Garter Snake,common_name_Common Mudpuppy,common_name_Common Slider,common_name_Common Snapping Turtle,common_name_Common Watersnake,common_name_Cope's Gray Treefrog,common_name_Crawfish Frog,common_name_Cuban Tree Frog,common_name_Deirochelyine Turtles,common_name_Dekay's Brownsnake,common_name_Diamondback Watersnake,common_name_Dwarf American Toad,common_name_Eastern American Toad,common_name_Eastern Box Turtle,common_name_Eastern Copperhead,common_name_Eastern Fence Lizard,common_name_Eastern Foxsnake,common_name_Eastern Garter Snake,common_name_Eastern Hognose Snake,common_name_Eastern Lesser Siren,common_name_Eastern Long-tailed Salamander,common_name_Eastern Milksnake,common_name_Eastern Mud Turtle,common_name_Eastern Musk Turtle,common_name_Eastern Narrow-mouthed Toad,common_name_Eastern Newt,common_name_Eastern Red-backed Salamander,common_name_Eastern Ribbon Snake,common_name_Eastern River Cooter,common_name_Eastern Spadefoot,common_name_Eastern Worm Snake,common_name_Eastern/Gray Ratsnake Complex,common_name_False Map Turtle,common_name_Flat-headed Snake,common_name_Four-toed Salamander,common_name_Fowler's Toad,common_name_Garter Snakes,common_name_Gopher Snake,common_name_Graham's Crayfish Snake,common_name_Gray Ratsnake,common_name_Gray Treefrog,common_name_Gray Treefrog Complex,common_name_Green Anole,common_name_Green Frog,common_name_Green Treefrog,common_name_Greenhouse Frog,common_name_Holarctic Treefrogs,common_name_Illinois Chorus Frog,common_name_Jefferson Salamander,common_name_Kirtland's Snake,common_name_Lesser Siren,common_name_Little Brown Skink,common_name_Long-tailed Salamander,common_name_Map Turtles,common_name_Marbled Salamander,common_name_Massasauga,common_name_Mediterranean Gecko,common_name_Midland Painted Turtle,common_name_Midland Smooth Softshell Turtle,common_name_Midland Watersnake,common_name_Midwestern Wormsnake,common_name_Mink Frog,common_name_Mississippi Green Watersnake,common_name_Mississippi Map Turtle,common_name_Mississippi Ringneck Snake,common_name_Mole Salamander,common_name_Mole Salamanders,common_name_Mudsnake,common_name_North American Racer,common_name_North American Ratsnakes,common_name_North American Toads,common_name_Northern Black Racer,common_name_Northern Cottonmouth,common_name_Northern Crawfish Frog,common_name_Northern Dusky Salamander,common_name_Northern False Map Turtle,common_name_Northern Leopard Frog,common_name_Northern Map Turtle,common_name_Northern Prairie Skink,common_name_Northern Ravine Salamander,common_name_Northern Redbelly Snake,common_name_Northern Ribbon Snake,common_name_Northern Ringneck Snake,common_name_Northern Rough Greensnake,common_name_Northern Slimy Salamander,common_name_Northern Spiny Softshell Turtle,common_name_Northern Watersnake,common_name_Northern Zigzag Salamander,common_name_Orange-striped Ribbonsnake,common_name_Ornate Box Turtle,common_name_Ouachita Map Turtle,common_name_Painted Turtle,common_name_Pickerel Frog,common_name_Plain-bellied Watersnake,common_name_Plains Box Turtle,common_name_Plains Garter Snake,common_name_Plains Hognose Snake,common_name_Plains Leopard Frog,common_name_Prairie Kingsnake,common_name_Prairie Racerunner,common_name_Prairie Ringneck Snake,common_name_Prairie Skink,common_name_Queensnake,common_name_Red-bellied Snake,common_name_Red-eared Slider,common_name_Red-spotted Newt,common_name_Ribbon Snake,common_name_River Cooter,common_name_Rough Greensnake,common_name_Six-lined Racerunner,common_name_Slender Glass Lizard,common_name_Small-mouthed Salamander,common_name_Smooth Earthsnake,common_name_Smooth Greensnake,common_name_Smooth Softshell Turtle,common_name_Southern Black Racer,common_name_Southern Leopard Frog,common_name_Southern Painted Turtle,common_name_Southern Red-backed Salamander,common_name_Southern Two-lined Salamander,common_name_Spiny Softshell Turtle,common_name_Spotted Dusky Salamander,common_name_Spotted Salamander,common_name_Spotted Turtle,common_name_Spring Peeper,common_name_Squirrel Tree Frog,common_name_Streamside Salamander,common_name_Three-toed Box Turtle,common_name_Tiger Salamander,common_name_Timber Rattlesnake,common_name_Toothy Skinks,common_name_Tropical House Gecko,common_name_Unisexual Mole Salamander,common_name_Watersnakes,common_name_Western Chorus Frog,common_name_Western Lesser Siren,common_name_Western Mudsnake,common_name_Western Painted Turtle,common_name_Western Ratsnake,common_name_Western Ribbon Snake,common_name_Western Slender Glass Lizard,common_name_Western Smooth Earthsnake,common_name_Wood Frog,common_name_Wood Turtle,common_name_Woodland Salamanders,common_name_Yellow-bellied Slider,common_name_ring-necked snake,observed_on
0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
4,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2


Accuracy by Hold-out Eval: 0.9999302844394868
Accuracy by N-fold Cross Validation: 0.012967635726081414


In [15]:
# Other Models #################################################################################################################

# Decision Tree, using Naive Bayes dataframe ###############
from sklearn.tree import DecisionTreeClassifier

# Hold-out evaluation
clf=DecisionTreeClassifier(criterion='entropy', max_depth=10, ccp_alpha = 0.1) 
clf=clf.fit(nb_x_train, nb_y_train)
y_pred=clf.predict(nb_x_test)
print("Tree Accuracy by Hold-out Eval:",accuracy_score(y_pred,nb_y_test))
print("Tree Precision by Hold-out Eval:",precision_score(y_pred,nb_y_test, average='micro'))
print("Tree Recall by Hold-out Eval:",recall_score(y_pred,nb_y_test, average='micro'))

# N-fold cross validation
clf=DecisionTreeClassifier(criterion='entropy', max_depth=10, ccp_alpha = 0.1)
clf=clf.fit(nb_x_train, nb_y_train)
y_pred=clf.predict(nb_x_test)
# by N-fold cross validation
acc=cross_val_score(clf, x, y, cv=5, scoring='accuracy').mean()
print("Tree Accuracy by N-fold Cross Validation:",acc)



# Random Forest, using Naive Bayes dataframe ###############
# Random Forest took too long to respond.
#from sklearn.ensemble import BaggingClassifier

#tree = DecisionTreeClassifier()
#bag = BaggingClassifier(tree, n_estimators=100, max_samples=0.8, random_state=1)
#acc=cross_val_score(bag, x, y, cv=5, scoring='accuracy').mean()
#print("RandomForest Accuracy by N-fold Cross Validation:",acc)


# SVC, using KNN dataframe ################################
# SVC took too long to respond.
#from sklearn.svm import SVC

#x=df_knn.drop('observed_on',axis=1)
#y=df_knn['observed_on']
# by N-fold cross validation
#clf=SVC(kernel='linear', C=1E10)
#acc=cross_val_score(clf, x, y, cv=5, scoring='accuracy').mean()
#print("SVC Accuracy by N-fold Cross Validation:",acc)


# Neural Networks, using KNN dataframe ###################
# Neural Networks took too long to respond.
#from sklearn.neural_network import MLPClassifier

#x=df_knn.drop('observed_on',axis=1)
#y=df_knn['observed_on']
# by N-fold cross validation
#clf=MLPClassifier(solver='lbfgs', alpha=1e-4,hidden_layer_sizes=(500,), random_state=1)
#acc=cross_val_score(clf, x, y, cv=5, scoring='accuracy').mean()
#print("Accuracy by N-fold Cross Validation:",acc)

Tree Accuracy by Hold-out Eval: 0.9739263803680982
Tree Precision by Hold-out Eval: 0.9739263803680982
Tree Recall by Hold-out Eval: 0.9739263803680982
Tree Accuracy by N-fold Cross Validation: 0.19015017362640177
