In [3]:
#******************************************************************************************************************************************************#
"""
    @author Shaela Khan, Created : 23rd August, Tuesday, Last Updated 24th August, Wednesday 2022.

#  birdy.ipynb  -> Experimentation
#  Bird Recognition from birdsongs using Deep learning ->s
#  DataSource : - https://www.kaggle.com/datasets/rtatman/british-birdsong-dataset  (Aggregated from the original Xeno-Canto Dataset.)
#  Provided dataset has a directory with - ./small Xeno-Canto/songs --> Original audio dataset.
#                                        -  ./small Xeno-Canto/birdsong_metadata.csv  --> Original Dataset with labels.
                                         -   ./img_data --> audio files converted into image files --> contains spectrographs of audio data.
#
#
#  We then create a CNN model with possible usage of pre-trained models, that can identify the difference classes defined - this is a supervised learning
#  problem.
#  Input: birdsong_metadata.csv + songs
#  Output : T
"""
#*******************************************************************************************************************************************************#


'\n    @author Shaela Khan, Created : 23rd August, Tuesday, Last Updated 24th August, Wednesday 2022.\n\n#  birdy.ipynb  -> Experimentation\n#  Bird Recognition from birdsongs using Deep learning ->s\n#  DataSource : - https://www.kaggle.com/datasets/rtatman/british-birdsong-dataset  (Aggregated from the original Xeno-Canto Dataset.)\n#  Provided dataset has a directory with - ./small Xeno-Canto/songs --> Original audio dataset.\n#                                        -  ./small Xeno-Canto/birdsong_metadata.csv  --> Original Dataset with labels.\n                                         -   ./img_data --> audio files converted into image files --> contains spectrographs of audio data.\n#\n#\n#  We then create a CNN model with possible usage of pre-trained models, that can identify the difference classes defined - this is a supervised learning\n#  problem.\n#  Input: birdsong_metadata.csv + songs\n#  Output : T\n'

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from librosa import core, onset, feature, display
import soundfile as sf
import umap
from IPython.display import Audio
import sklearn
import warnings
warnings.filterwarnings('ignore')

print("Hello Experiment.")

Hello Experiment.


In [None]:
df = pd.read_csv("./small Xeno-Canto/birdsong_metadata.csv")
df.head()

In [6]:
def load_audio(file_id1):
    data1, samplerate = sf.read("./small Xeno-Canto/songs/songs/xc"+str(file_id1)+".flac")
    s = len(data1)/samplerate
    sg1 = feature.melspectrogram(data1, sr=samplerate, hop_length=512)
    mme = sg1.mean(axis=0)

    # Take mean amplitude M from frame with highest energy
    centerpoint = np.argmax(mme)
    M = sg1[:,centerpoint].mean()

    # Filter out all frames with energy less than 5% of M
    mask1 = sg1.mean(axis=0)>=M/20

    audio_mask1 = np.zeros(len(data1), dtype=bool)
    for i in range(0,len(mask1)):
        audio_mask1[i*512:] = mask1[i]

    return sg1, mask1, data1, audio_mask1, samplerate



df['length'] = np.zeros(len(df))
waves = {}

for file_id in df['file_id']:
    sg, mask, data, audio_mask, sample_rate = load_audio(file_id)
    waves[file_id] = data[audio_mask]
    df.loc[df['file_id'] == file_id,'length'] = len(data[audio_mask])
    #print(len(data[audio_mask])/sample_rate)

In [None]:
#print(df.length)
df['length'].hist()
plt.show()
#df['length'].describe()
#print(sg)
#df.head()

# We set window to 6.144000e+03 frames as it's the minimum length among our audio files
df['windows'] = df['length'].apply(lambda x: int(x/6.144000e+03))
df.head()

# To relax the problem we'll consider the genus as the label instead of the species
# We use 23 windows per genus to have a balanced data set

n_windows = df.groupby('species')['windows'].sum().min()
n_windows

In [8]:
# First we create all windows for each file and partition them by species
windows = {}

for file_id in df['file_id']:
    wave = waves[file_id]
    species = df[df['file_id']==file_id]['genus'].values[0] + "_" + df[df['file_id']==file_id]['species'].values[0]
    if species not in windows:
        windows[species] = []
    for i in range(0, int(len(wave)/6.144000e+03)):
        windows[species].append(wave[i:int(i+6.144000e+03)])


In [9]:
# We randomly pick 20 windows for each species
# Save other samples for testing
windows_fixed = {}
windows_fixed_test = {}

for species in windows.keys():
    windows_fixed[species] = []
    windows_fixed_test[species] = []
    ws = windows[species]
    index = np.random.choice(len(ws), n_windows, replace=False)
    for i in range(0, len(ws)):
        if i in index:
            windows_fixed[species].append(ws[i])
        else:
            windows_fixed_test[species].append(ws[i])

In [10]:
# Extract Features from Window
new_dataset = pd.DataFrame()

for species in windows_fixed.keys():
    for i in range(0,n_windows):
        data_point = {'species':species.split('_')[1], 'genus':species.split('_')[0]}
        spec_centroid = feature.spectral_centroid(windows_fixed[species][i])[0]
        chroma = feature.chroma_stft(windows_fixed[species][i], sample_rate)
        for j in range(0,13):
            data_point['spec_centr_'+str(j)] = spec_centroid[j]
            for k in range(0,12):
                data_point['chromogram_'+str(k)+"_"+str(j)] = chroma[k,j]
        new_dataset = new_dataset.append(data_point,ignore_index=True)

new_dataset.head()

Unnamed: 0,species,genus,spec_centr_0,chromogram_0_0,chromogram_1_0,chromogram_2_0,chromogram_3_0,chromogram_4_0,chromogram_5_0,chromogram_6_0,...,chromogram_2_12,chromogram_3_12,chromogram_4_12,chromogram_5_12,chromogram_6_12,chromogram_7_12,chromogram_8_12,chromogram_9_12,chromogram_10_12,chromogram_11_12
0,flammea,Acanthis,2443.446204,1.0,0.620579,0.318997,0.151927,0.094071,0.09418,0.06865,...,0.555383,0.392124,0.218584,0.100603,0.040804,0.028532,0.067722,0.145757,0.509797,1.0
1,flammea,Acanthis,2441.397755,1.0,0.62025,0.318581,0.151816,0.094349,0.094949,0.069267,...,0.55378,0.390659,0.218012,0.100694,0.040641,0.028188,0.067299,0.14526,0.510484,1.0
2,flammea,Acanthis,2440.042293,1.0,0.622055,0.320582,0.152584,0.094716,0.095581,0.070223,...,0.553241,0.390372,0.218837,0.101043,0.040824,0.027893,0.066034,0.143134,0.505659,1.0
3,flammea,Acanthis,2439.988776,1.0,0.621762,0.320039,0.152444,0.095059,0.096328,0.070788,...,0.551704,0.388921,0.217786,0.100475,0.040764,0.027681,0.06543,0.142715,0.506232,1.0
4,flammea,Acanthis,2459.276398,1.0,0.621538,0.319594,0.152282,0.09558,0.097411,0.071672,...,0.549077,0.386734,0.216927,0.099844,0.040137,0.027416,0.064454,0.142324,0.507135,1.0


In [11]:
#Extract Features from Window for test
new_dataset_test = pd.DataFrame()

for species in windows_fixed_test.keys():
    for i in range(0,len(windows_fixed_test[species])):
        data_point = {'species':species.split('_')[1], 'genus':species.split('_')[0]}
        spec_centroid = feature.spectral_centroid(windows_fixed_test[species][i])[0]
        chroma = feature.chroma_stft(windows_fixed_test[species][i], sample_rate)
        for j in range(0,13):
            data_point['spec_centr_'+str(j)] = spec_centroid[j]
            for k in range(0,12):
                data_point['chromogram_'+str(k)+"_"+str(j)] = chroma[k,j]
        new_dataset_test = new_dataset_test.append(data_point,ignore_index=True)

new_dataset_test.head()

Unnamed: 0,species,genus,spec_centr_0,chromogram_0_0,chromogram_1_0,chromogram_2_0,chromogram_3_0,chromogram_4_0,chromogram_5_0,chromogram_6_0,...,chromogram_2_12,chromogram_3_12,chromogram_4_12,chromogram_5_12,chromogram_6_12,chromogram_7_12,chromogram_8_12,chromogram_9_12,chromogram_10_12,chromogram_11_12
0,flammea,Acanthis,2442.089677,0.645542,0.34945,0.166184,0.096049,0.093169,0.073407,0.034041,...,0.412498,0.243003,0.112923,0.046037,0.028564,0.061722,0.129256,0.452654,1.0,0.714536
1,flammea,Acanthis,2439.945547,0.645315,0.34925,0.166102,0.096156,0.093512,0.073729,0.034165,...,0.411641,0.242523,0.112759,0.045962,0.028441,0.061449,0.128977,0.452955,1.0,0.714293
2,flammea,Acanthis,2440.628987,1.0,0.620424,0.318773,0.151869,0.094209,0.094567,0.068958,...,0.554592,0.391417,0.218344,0.100698,0.040755,0.028357,0.067543,0.145454,0.510272,1.0
3,flammea,Acanthis,2443.366373,1.0,0.620077,0.318395,0.151747,0.094501,0.095324,0.069563,...,0.552913,0.389868,0.217546,0.100458,0.040529,0.028066,0.066965,0.145106,0.510679,1.0
4,flammea,Acanthis,2444.563339,1.0,0.621917,0.320325,0.152506,0.094887,0.095952,0.070502,...,0.552449,0.389643,0.218332,0.100761,0.040778,0.027789,0.065724,0.142933,0.505951,1.0


In [12]:
# Prepare dataset to fit a simple model
features= list(new_dataset.columns)
features.remove('species')
features.remove('genus')

X = new_dataset[features].values
y = new_dataset['species'].values

X_test = new_dataset_test[features].values
y_test = new_dataset_test['species'].values

In [13]:
# Use Naive Bayes as benchmark

from sklearn import naive_bayes
NB = naive_bayes.GaussianNB()

SSS = sklearn.model_selection.StratifiedShuffleSplit(n_splits=5, test_size=0.2)
accs = []

for train_index, val_index in SSS.split(X, y):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

    NB.fit(X_train, y_train)

    y_pred = NB.predict(X_val)

    accs.append(sklearn.metrics.accuracy_score(y_pred=y_pred, y_true=y_val))

print(accs)

[0.8948863636363636, 0.9005681818181818, 0.9034090909090909, 0.8778409090909091, 0.9232954545454546]


In [14]:
y_pred = NB.predict(X_test)
sklearn.metrics.accuracy_score(y_pred=y_pred, y_true=y_test)

0.9302297606159028

In [None]:
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
# Converting numpy arrays to tensors
import tensorflow as tf
import numpy as np

# Preparing the dataset for CNN.
new_dataset['labels'] = new_dataset['genus'].astype(str)


datagen=ImageDataGenerator(rescale=1./255,validation_split=0.2) # Training and Validation split 70/30
#test_datagen = ImageDataGenerator(rescale=1./255)


train_generator =datagen.flow_from_dataframe(dataframe=new_dataset,directory='./small Xeno-Canto/songs/songs/',
                                             x_col="file_id",
                                             y_col="genus",
                                             class_mode="categorical",
                                             target_size=(224,224), #target_size=(64,64)
                                             #validation_split=0.2,
                                             subset="training",
                                             seed=1337,batch_size=30,shuffle=True)

print('Train generator created')
val_generator =datagen.flow_from_dataframe(dataframe=new_dataset_test,directory='./small Xeno-Canto/songs/songs/',
                                           x_col="file_id",
                                           y_col="genus",
                                           class_mode="categorical",
                                           target_size=(224,224), #target_size=(64,64)
                                           #subset="validation",
                                           seed=42,batch_size=30,shuffle=True)


print('Validation generator created')
train_image_data, train_labels = train_generator.next()