In [338]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB, CategoricalNB

In [268]:
frame = pd.read_csv('features.csv')

In [269]:
frame.head()

Unnamed: 0,Amp_range,Avg_amp,Duration,Zero_Crossings,Species,Sex,Age,Call
0,0.166116,-3.2e-05,1,706,A. jubatus,,A,growl
1,0.466826,-8e-06,4,3456,A. jubatus,,A,
2,0.311871,5e-06,5,2984,A. jubatus,,A,growl
3,0.386908,3e-06,5,3108,A. jubatus,,A,
4,0.327264,-2.2e-05,3,1919,A. jubatus,,A,growl


In [270]:
frame['Call'].unique()

array(['growl', nan, 'growl?', 'hiss', 'hiss?', 'call', 'unknown',
       'growl/hiss', 'call sequence', 'purr', 'purr sequence', 'Growl',
       'Hiss', 'Loud rumble/roar', 'Sharp Hiss', 'call?', 'roar',
       'roar or call', 'growl ', 'roar?', 'main call', 'call/growl'],
      dtype=object)

In [272]:
frame.dtypes

Amp_range         float64
Avg_amp           float64
Duration            int64
Zero_Crossings      int64
Species            object
Sex                object
Age                object
Call               object
dtype: object

In [273]:
clean = {'growl?': 'growl','Growl':'growl', 'growl ':'growl', 'hiss?':'hiss', 'Hiss':'hiss', 'Sharp Hiss':'hiss','unknown':np.NaN, 'purr sequence': 'purr',
        'Loud rumble/roar':'roar', 'call?':'call', 'main call':'call', 'call sequence':'call', 'roar or call':'roar', 'roar?':'roar', 'purr sequence':'purr', ' roar':'roar'}

In [274]:
clean2 = {'A':'Adult','Adult ':'Adult', 'Juvenile ':'Juvenile', 'juvenile':'Juvenile'}
clean3 = {'Female ':'Female','F':'Female', 'M':'Male','male ':'Male', 'P':'Pair', 'Pair (Unknown)':'Pair', 'G':'Group', 'G (1 M and 2F)':'Group'}

In [275]:
frame.replace(clean, inplace = True)

In [276]:
frame.replace(clean2, inplace = True)

In [277]:
frame.replace(clean3, inplace = True)

In [302]:
frame['Sex'].fillna('Unknown', inplace = True)
frame['Age'].fillna('Unknown', inplace = True)

In [303]:
frame['Call'].unique()

array(['growl', nan, 'hiss', 'call', 'growl/hiss', 'purr', 'roar',
       'call/growl'], dtype=object)

In [304]:
frame['Age'].unique()

array(['Adult', 'Juvenile', 'Unknown'], dtype=object)

In [305]:
frame['Sex'].unique()

array(['Unknown', 'Group', 'Pair', 'Female', 'Male'], dtype=object)

In [321]:
train = frame.loc[pd.notna(frame['Call'])]

In [322]:
train['Call'].unique()

array(['growl', 'hiss', 'call', 'growl/hiss', 'purr', 'roar',
       'call/growl'], dtype=object)

In [324]:
x = train.iloc[:, 0:7]
y = train.iloc[:, 7]

In [356]:
x_dummies = pd.get_dummies(x)

In [357]:
x_train, x_test, y_train, y_test = train_test_split(x_dummies, y, test_size = 0.30)

In [358]:
parameters = dict(
    objective='multi:softprob',
    random_state = 30,
    max_depth=11,
    learning_rate=0.01,
    subsample=0.8,
    colsample_bytree=0.4,
    tree_method='gpu_hist')

In [359]:
clf = XGBClassifier(**parameters, n_estimators=1200)

In [354]:
x.dtypes

Amp_range         float64
Avg_amp           float64
Duration            int64
Zero_Crossings      int64
Species            object
Sex                object
Age                object
dtype: object

In [360]:
clf.fit(x_train, y_train)





XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=0.4, gamma=0, gpu_id=0,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.01, max_delta_step=0, max_depth=11,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=1200, n_jobs=12, num_parallel_tree=1,
              objective='multi:softprob', random_state=30, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=0.8,
              tree_method='gpu_hist', validate_parameters=1, verbosity=None)

In [367]:
y_pred = clf.predict(x_test)

In [369]:
np.sum(y_pred == y_test)/len(y_pred)

0.75

In [370]:
gnb = GaussianNB()

In [372]:
gnb.fit(x_train, y_train)

GaussianNB()

In [373]:
gnb.predict(x_test)

array(['purr', 'call/growl', 'purr', 'call', 'roar', 'growl', 'call',
       'growl', 'call/growl', 'call/growl', 'hiss', 'call/growl',
       'call/growl', 'call', 'hiss', 'call', 'call/growl', 'growl',
       'growl', 'growl', 'call/growl', 'call/growl', 'roar', 'hiss',
       'call/growl', 'growl', 'roar', 'hiss', 'call/growl', 'growl',
       'call', 'roar', 'hiss', 'roar', 'call/growl', 'call/growl',
       'growl', 'call', 'roar', 'call/growl', 'growl', 'hiss', 'hiss',
       'call/growl', 'call/growl', 'growl', 'call/growl', 'call/growl',
       'growl', 'call/growl', 'hiss', 'roar', 'purr', 'growl',
       'call/growl', 'call/growl', 'call', 'growl', 'hiss', 'roar',
       'call/growl', 'call/growl', 'roar', 'growl', 'hiss', 'growl',
       'call/growl', 'hiss', 'roar', 'call/growl', 'roar', 'call', 'call',
       'call/growl', 'hiss', 'roar', 'roar', 'hiss', 'call/growl', 'call',
       'call', 'hiss', 'roar', 'growl', 'call', 'call/growl', 'growl',
       'growl', 'roar', 'ca