# Import Library

In [1]:
from __future__ import print_function, unicode_literals, division

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler, LabelEncoder, OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

import warnings
warnings.filterwarnings('ignore')

# Load Data

In [2]:
#DATADIR = 'C:/Users/acer/Documents/Python/dataset/Anuran Calls (MFCCs)'

df = pd.read_csv('Frogs_MFCCs.csv')
df.head(2)

Unnamed: 0,MFCCs_ 1,MFCCs_ 2,MFCCs_ 3,MFCCs_ 4,MFCCs_ 5,MFCCs_ 6,MFCCs_ 7,MFCCs_ 8,MFCCs_ 9,MFCCs_10,...,MFCCs_17,MFCCs_18,MFCCs_19,MFCCs_20,MFCCs_21,MFCCs_22,Family,Genus,Species,RecordID
0,1.0,0.152936,-0.105586,0.200722,0.317201,0.260764,0.100945,-0.150063,-0.171128,0.124676,...,-0.108351,-0.077623,-0.009568,0.057684,0.11868,0.014038,Leptodactylidae,Adenomera,AdenomeraAndre,1
1,1.0,0.171534,-0.098975,0.268425,0.338672,0.268353,0.060835,-0.222475,-0.207693,0.170883,...,-0.090974,-0.05651,-0.035303,0.02014,0.082263,0.029056,Leptodactylidae,Adenomera,AdenomeraAndre,1


In [3]:
df.tail()

Unnamed: 0,MFCCs_ 1,MFCCs_ 2,MFCCs_ 3,MFCCs_ 4,MFCCs_ 5,MFCCs_ 6,MFCCs_ 7,MFCCs_ 8,MFCCs_ 9,MFCCs_10,...,MFCCs_17,MFCCs_18,MFCCs_19,MFCCs_20,MFCCs_21,MFCCs_22,Family,Genus,Species,RecordID
7190,1.0,-0.554504,-0.337717,0.035533,0.034511,0.443451,0.093889,-0.100753,0.037087,0.081075,...,0.06943,0.071001,0.021591,0.052449,-0.02186,-0.07986,Hylidae,Scinax,ScinaxRuber,60
7191,1.0,-0.517273,-0.370574,0.030673,0.068097,0.40289,0.096628,-0.11646,0.063727,0.089034,...,0.061127,0.068978,0.017745,0.046461,-0.015418,-0.101892,Hylidae,Scinax,ScinaxRuber,60
7192,1.0,-0.582557,-0.343237,0.029468,0.064179,0.385596,0.114905,-0.103317,0.07037,0.081317,...,0.082474,0.077771,-0.009688,0.027834,-0.000531,-0.080425,Hylidae,Scinax,ScinaxRuber,60
7193,1.0,-0.519497,-0.307553,-0.004922,0.072865,0.377131,0.086866,-0.115799,0.056979,0.089316,...,0.051796,0.069073,0.017963,0.041803,-0.027911,-0.096895,Hylidae,Scinax,ScinaxRuber,60
7194,1.0,-0.508833,-0.324106,0.062068,0.078211,0.397188,0.094596,-0.117672,0.058874,0.07618,...,0.061455,0.072983,-0.00398,0.03156,-0.029355,-0.08791,Hylidae,Scinax,ScinaxRuber,60


# Check data

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7195 entries, 0 to 7194
Data columns (total 26 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   MFCCs_ 1  7195 non-null   float64
 1   MFCCs_ 2  7195 non-null   float64
 2   MFCCs_ 3  7195 non-null   float64
 3   MFCCs_ 4  7195 non-null   float64
 4   MFCCs_ 5  7195 non-null   float64
 5   MFCCs_ 6  7195 non-null   float64
 6   MFCCs_ 7  7195 non-null   float64
 7   MFCCs_ 8  7195 non-null   float64
 8   MFCCs_ 9  7195 non-null   float64
 9   MFCCs_10  7195 non-null   float64
 10  MFCCs_11  7195 non-null   float64
 11  MFCCs_12  7195 non-null   float64
 12  MFCCs_13  7195 non-null   float64
 13  MFCCs_14  7195 non-null   float64
 14  MFCCs_15  7195 non-null   float64
 15  MFCCs_16  7195 non-null   float64
 16  MFCCs_17  7195 non-null   float64
 17  MFCCs_18  7195 non-null   float64
 18  MFCCs_19  7195 non-null   float64
 19  MFCCs_20  7195 non-null   float64
 20  MFCCs_21  7195 non-null   floa

In [5]:
df.isnull().sum()

MFCCs_ 1    0
MFCCs_ 2    0
MFCCs_ 3    0
MFCCs_ 4    0
MFCCs_ 5    0
MFCCs_ 6    0
MFCCs_ 7    0
MFCCs_ 8    0
MFCCs_ 9    0
MFCCs_10    0
MFCCs_11    0
MFCCs_12    0
MFCCs_13    0
MFCCs_14    0
MFCCs_15    0
MFCCs_16    0
MFCCs_17    0
MFCCs_18    0
MFCCs_19    0
MFCCs_20    0
MFCCs_21    0
MFCCs_22    0
Family      0
Genus       0
Species     0
RecordID    0
dtype: int64

In [6]:
df['Family'].unique()

array(['Leptodactylidae', 'Dendrobatidae', 'Hylidae', 'Bufonidae'],
      dtype=object)

In [7]:
df['Genus'].unique()

array(['Adenomera', 'Ameerega', 'Dendropsophus', 'Hypsiboas',
       'Leptodactylus', 'Osteocephalus', 'Rhinella', 'Scinax'],
      dtype=object)

In [8]:
df['Species'].unique()

array(['AdenomeraAndre', 'Ameeregatrivittata', 'AdenomeraHylaedactylus',
       'HylaMinuta', 'HypsiboasCinerascens', 'HypsiboasCordobae',
       'LeptodactylusFuscus', 'OsteocephalusOophagus',
       'Rhinellagranulosa', 'ScinaxRuber'], dtype=object)

In [9]:
df['RecordID'].unique()

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60], dtype=int64)

In [10]:
df.shape

(7195, 26)

# Choose features and labels

In [11]:
y = df[['Family', 'Genus', 'Species']].values
X = df.drop(['Family', 'Genus', 'Species','RecordID'], axis=1).values
print(X.shape, y.shape)

(7195, 22) (7195, 3)


# Convert categorical to numeric

In [12]:
enc = OrdinalEncoder()
enc.fit(y)
print(enc.categories_)
y = enc.transform(y)
print(y)

[array(['Bufonidae', 'Dendrobatidae', 'Hylidae', 'Leptodactylidae'],
      dtype=object), array(['Adenomera', 'Ameerega', 'Dendropsophus', 'Hypsiboas',
       'Leptodactylus', 'Osteocephalus', 'Rhinella', 'Scinax'],
      dtype=object), array(['AdenomeraAndre', 'AdenomeraHylaedactylus', 'Ameeregatrivittata',
       'HylaMinuta', 'HypsiboasCinerascens', 'HypsiboasCordobae',
       'LeptodactylusFuscus', 'OsteocephalusOophagus',
       'Rhinellagranulosa', 'ScinaxRuber'], dtype=object)]
[[3. 0. 0.]
 [3. 0. 0.]
 [3. 0. 0.]
 ...
 [2. 7. 9.]
 [2. 7. 9.]
 [2. 7. 9.]]


# Split data train and data test

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Training


## KNeighborsClassifier Model

In [14]:
knn = KNeighborsClassifier(n_neighbors= 3)
classifier = MultiOutputClassifier(knn, n_jobs=-1)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
score = classifier.score(X_test, y_test)

print('Prediction:', y_pred[:10])
print('Score :', score)

Prediction: [[3. 0. 1.]
 [2. 3. 4.]
 [2. 3. 5.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 0.]
 [3. 0. 0.]
 [2. 3. 4.]]
Score : 0.9840166782487839


## DecisionTreeClassifier

In [15]:
tree = DecisionTreeClassifier(random_state=42)
classifier = MultiOutputClassifier(tree, n_jobs=-1)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
score = classifier.score(X_test, y_test)

print('Prediction:', y_pred[:10])
print('Score :', score)

Prediction: [[3. 0. 1.]
 [2. 3. 4.]
 [2. 3. 5.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 0.]
 [3. 0. 0.]
 [2. 3. 7.]]
Score : 0.8992355802640722


## RandomForest Classifier

In [16]:
forest = RandomForestClassifier(random_state=42)
classifier = MultiOutputClassifier(forest, n_jobs=-1)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
score = classifier.score(X_test, y_test)

print('Prediction:', y_pred[:10])
print('Score :', score)

Prediction: [[3. 0. 1.]
 [2. 3. 4.]
 [2. 3. 5.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 1.]
 [3. 0. 0.]
 [3. 0. 0.]
 [2. 3. 4.]]
Score : 0.9777623349548298
