### IMPORT ALL REQUIRED LIBRARIES

In [None]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten,Dropout
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint 
from datetime import datetime 
import matplotlib.pyplot as plt

### READ DATA

In [None]:
# read csv file to a dataframe format
extracted_features_df = pd.read_csv('feature_vector_specie_df.csv')
# display top 5 rows of the dataframe
extracted_features_df.head(5)

### CLEAN DATA

In [None]:
# read the first value of the feature column of the dataframe
extracted_features_df.feature[0]

In [None]:
# check the format of the values in feature column 
extracted_features_df.feature[0][-1]

In [None]:
# iterate through the dataframe
for i in range(len(extracted_features_df)):
    # strip the characters that are not required
    extracted_features_df.feature[i] = extracted_features_df.feature[i].strip('[')
    extracted_features_df.feature[i] = extracted_features_df.feature[i].strip(']')
    extracted_features_df.feature[i] = extracted_features_df.feature[i].strip('\n')

In [None]:
float(extracted_features_df.feature[2].split(",")[0])

In [None]:
# since the features imported in the file were in string format, we clean and wrangle it to a list format 

# create empty string to store the features
features = []
# iterate through the feature column to the dataframe
for i in extracted_features_df.feature:
    # split the data where "," found
    m = i.split(",")
    # create empty list to store the values
    lst =[]
    # iterate through each value of th elist after the split
    for j in m:
        # store the values in float format 
        lst.append(float(j))
    # print the values
    print(lst)
    # append values to the main feature list
    features.append(lst)
        
# store the cleaned required format in the dataframe
extracted_features_df.feature = features

In [None]:
# print and check if the data is as per the requirement
extracted_features_df['feature']

In [None]:
# stoer the input variables as a list
X=np.array(extracted_features_df['feature'].tolist())
# stoer the target variable as a list
y=np.array(extracted_features_df['specie'].tolist())

In [None]:
# call the function label encoder
labelencoder=LabelEncoder()
# conver the target variable column to categorical format
y= to_categorical(labelencoder.fit_transform(y))

### SPLIT DATA TO TRAIN AND TEST SET

In [None]:
# using the values of input variable list and target variable list, split the data into 75% as train and 25% as test data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.25,random_state=3, stratify = y)

### BUILD MODEL

In [None]:
# define the number of categories of the target variable
num_labels=5

# call the deep learning model : Sequencial
model=Sequential()
# first layer
model.add(Dense(100,input_shape=(40,)))
# add activation layer "relu"
model.add(Activation('relu'))
# add dropout to the model
model.add(Dropout(0.4))
# final layer
model.add(Dense(num_labels))
# add activation layer "softmax" to convert vectors into probabilities
model.add(Activation('softmax'))

In [None]:
# compile the model using the loss function cross entropy
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [None]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(X_test, y_test, verbose=1)
accuracy = 100*score[1]

In [None]:
# define the number of epochs
num_epochs = 50
# define batch size for each epoch
num_batch_size = 256

# create a checkpointer to save the best model
checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.sequential.hdf5', 
                               verbose=1, save_best_only=True)

# fit the model using the training values and the defined parameters. Validate the model on the testing data
history = model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, callbacks=[checkpointer], validation_data=(X_test, y_test), verbose=1)

In [None]:
# print the values of the model fitting
print(history.history.keys())

### VISUALISE MODEL FITTING

In [None]:
# plot accuracy of the model
plt.plot(history.history['accuracy'])
# plot validation accuracy of the model
plt.plot(history.history['val_accuracy'])
# define title of the plot
plt.title('model accuracy')
# define the label of y-axis
plt.ylabel('accuracy')
# define the label of x-axis
plt.xlabel('epoch')
# define the legend and its position
plt.legend(['train', 'test'], loc='upper left')
# show the graph
plt.show()

In [None]:
# plot loss of the model
plt.plot(history.history['loss'])
# plot validation loss of the model
plt.plot(history.history['val_loss'])
# define title of the plot
plt.title('model loss')
# define the label of y-axis
plt.ylabel('loss')
# define the label of x-axis
plt.xlabel('epoch')
# define the legend and its position
plt.legend(['train', 'test'], loc='upper left')
# show the graph
plt.show()

In [None]:
# Evaluating the model on the training and testing set
score = model.evaluate(X_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(X_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

### OUTPUTS OF VARIOUS TRIAL METHODS

In [None]:
# with different melbourne ksyline birds as seperate categories + balanced sampling 
# Training Accuracy:  0.4318658411502838
# Testing Accuracy:  0.28125
  
# with different skyline birds as seperate categories + oversampling only
# Training Accuracy:  0.4118658411502838
# Testing Accuracy:  0.30125
    
# with different skyline birds as one category + oversampling only
# Training Accuracy:  0.8696969747543335
# Testing Accuracy:  0.8060606122016907

# with different skyline birds as one category + category of not birds category + oversampling only

### TESTING DATA WITH A NEW TEST FILE

In [None]:
# import librosa
# import numpy as np

# filename = "C:\\Users\\pragya\\Downloads\\1334324461.wav"
# audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
# mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
# mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

# print(mfccs_scaled_features, '\n')
# mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)
# print(mfccs_scaled_features, '\n')
# print(mfccs_scaled_features.shape, '\n')
# predicted_label=model.predict(mfccs_scaled_features)
# print(predicted_label, '\n')
# classes_x=np.argmax(predicted_label,axis=1)
# print(classes_x, '\n')
# prediction_class = labelencoder.inverse_transform(classes_x)
# prediction_class

In [None]:
### check the encodes for each category of the target variable
# labelencoder.inverse_transform([0])
# labelencoder.inverse_transform([1])
# labelencoder.inverse_transform([2])
# labelencoder.inverse_transform([3])
# labelencoder.inverse_transform([4])