# **BirdCLEF 2022**

## **Identify bird calls in soundscapes**

### **I'm working first time in AudioSignal**

### **Version 2: preprocessing in one audio**

#### **--Load --> waveform**
#### **--fft --> Spectrum**
#### **--stft --> Spectrogram**
#### **--MFCCs**

### **Version 5: Generate Json file (Preprocessing)**

### **Version 7: Apply preprocessing output data.json ---> Build CNN model using tensorflow framework**

### ***Now I'm trying Build Model (LSTM) using data.json***



In [None]:
import numpy as np
import pandas as pd
import librosa,librosa.display
import matplotlib.pyplot as plt

In [None]:
!ls ../input/birdclef-2022

# **Load**

In [None]:
file1 = "../input/birdclef-2022/train_audio/afrsil1/XC125458.ogg"

# **Display_Waveform**

In [None]:
import librosa.display
import librosa
signal,sr = librosa.load(file1,sr = 22050)
librosa.display.waveshow(signal, sr = sr)
plt.xlabel("Time")
plt.ylabel("Amplitude")
plt.show()


# **FFT**
The "Fast Fourier Transform" (FFT) is an important measurement method in the science of audio and acoustics measurement. It converts a signal into individual spectral components and thereby provides frequency information about the signal.**

In [None]:
#fft --> Spectrum

fft = np.fft.fft(signal)

magnitude = np.abs(fft)
frequency = np.linspace(0,sr,len(magnitude))

#plt
plt.plot(frequency,magnitude)
plt.xlabel('Frequency')
plt.ylabel('Magnitude')
plt.show()

## **Left_side_Visualize**

In [None]:
#fft --> Spectrum

fft = np.fft.fft(signal)

magnitude = np.abs(fft)
frequency = np.linspace(0,sr,len(magnitude))

left_frequency = frequency[:int(len(frequency)/2)]
left_magnitude = magnitude[:int(len(frequency)/2)]

#plt
plt.plot(left_frequency,left_magnitude)
plt.xlabel('Frequency')
plt.ylabel('Magnitude')
plt.show()

# **STFT**

The Short-time Fourier transform (STFT), is a Fourier-related transform used to determine the sinusoidal frequency and phase content of local sections of a signal as it changes over time.

In [None]:
#stft -- spectrogram

n_fft = 2048 #no.of.sample
hop_length = 512 #amount of shift h-fouriertransform

stft = librosa.core.stft(signal,hop_length=hop_length,n_fft=n_fft)
spectrogram = np.abs(stft)
#convert viewable form of low point
log_spectrogram = librosa.amplitude_to_db(spectrogram)

librosa.display.specshow(log_spectrogram,sr=sr,hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("Frequency")
plt.colorbar()
plt.show()

# **MFCCS**

The MFCC feature extraction technique basically includes windowing the signal, applying the DFT, taking the log of the magnitude, and then warping the frequencies on a Mel scale, followed by applying the inverse DCT. 

In [None]:
#MFCCS
#n_mfcc013= commonly using
MFFCS = librosa.feature.mfcc(signal,n_fft=n_fft,hop_length=hop_length,n_mfcc=13)

librosa.display.specshow(MFFCS,sr=sr,hop_length=hop_length)
plt.xlabel("Time")
plt.ylabel("MFFCS")
plt.colorbar()
plt.show()

# **Apply All data in above preprocessing process**

In [None]:
!ls ../input/birdclef-2022

In [None]:
import json
import os

Path = "../input/birdclef-2022/"
with open(os.path.join(Path,"scored_birds.json")) as f:
    scored_birds = json.load(f)
    
print(len(scored_birds))

df = pd.DataFrame(scored_birds)
display(df)

In [None]:
eBird = pd.read_csv(Path +'eBird_Taxonomy_v2021.csv')
train_meta = pd.read_csv(Path +'train_metadata.csv')
test = pd.read_csv(Path +'test.csv')

In [None]:
eBird.sample(3)

In [None]:
train_meta.sample(3)

In [None]:
test.sample(3)

In [None]:
!ls ../input/birdclef-2022/train_audio

# ***Preprocessing whole data***

In [None]:
import os
import librosa
import math

DATASET_PATH = '../input/birdclef-2022/train_audio'
JSON_PATH = 'data.json'
SAMPLE_RATE =  22050
DURATION = 11
SAMPLE_PER_TRACK = SAMPLE_RATE * DURATION

In [None]:

def save_mfcc(dataset_path,json_path,n_mfcc=13,n_fft=2040,hop_length=512,num_segments=5):
    
    #dictionary to store data
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }
    
    num_sample_per_segment = int(SAMPLE_PER_TRACK / num_segments)
    expected_num_mfcc_vectors_per_segment = math.ceil(num_sample_per_segment / hop_length) #1.2 -> 2
    
    #Loop through all the birds sound
    for i,(dirpath,dirnames,filenames) in enumerate(os.walk(dataset_path)):
        
        # ensure that were not at the root level
        
        if dirpath is not dataset_path:
            
            #save the semantic label
            dirpath_components = dirpath.split("/")
            semantic_label = dirpath_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))
            
            #process files for a specific sound
             
            for f in filenames:
                
                #load audio file
                file_path = os.path.join(dirpath,f)
                signal,sr = librosa.load(file_path,sr = SAMPLE_RATE)
                
                # process of segments extracting mfcc and storing data 
                for s in range(num_segments):
                    start_sample = num_sample_per_segment #S=0 -->0
                    finish_sample = start_sample + num_sample_per_segment #S=0 -> NUM SAMPLES PER SEGMENTS
                    
                    mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample],
                                               sr =sr,
                                               n_fft=n_fft,
                                               n_mfcc=n_mfcc,
                                               hop_length=hop_length)
                    mfcc = mfcc.T
                    
                    #store mfcc for segment if it has the expected length
                    
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print("{},segment:{}".format(file_path,s))
                    
    with open(json_path,"w") as fp:
        json.dump(data,fp,indent=4)

if __name__ == "__main__":
    save_mfcc(DATASET_PATH,JSON_PATH,num_segments=10)
    

        

# **Build the model ---> LSTM-Tensorflow**

In [None]:
import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt

DATA_PATH = "./data.json"


def load_data(data_path):
    """Loads training dataset from json file.
        :param data_path (str): Path to json file containing data
        :return X (ndarray): Inputs
        :return y (ndarray): Targets
    """

    with open(data_path, "r") as fp:
        data = json.load(fp)

    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
    return X, y


def plot_history(history):
    """Plots accuracy/loss for training/validation set as a function of the epochs
        :param history: Training history of model
        :return:
    """

    fig, axs = plt.subplots(2)

    # create accuracy sublpot
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    # create error sublpot
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()


def prepare_datasets(test_size, validation_size):
    """Loads data and splits it into train, validation and test sets.
    :param test_size (float): Value in [0, 1] indicating percentage of data set to allocate to test split
    :param validation_size (float): Value in [0, 1] indicating percentage of train set to allocate to validation split
    :return X_train (ndarray): Input training set
    :return X_validation (ndarray): Input validation set
    :return X_test (ndarray): Input test set
    :return y_train (ndarray): Target training set
    :return y_validation (ndarray): Target validation set
    :return y_test (ndarray): Target test set
    """

    # load data
    X, y = load_data(DATA_PATH)

    # create train, validation and test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    """Generates RNN-LSTM model
    :param input_shape (tuple): Shape of input set
    :return model: RNN-LSTM model
    """

    # build network topology
    model = keras.Sequential()

    # 2 LSTM layers
    model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True))
    model.add(keras.layers.LSTM(64))

    # dense layer
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(152, activation='softmax'))

    return model


if __name__ == "__main__":

    # get train, validation, test splits
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    # create network
    input_shape = (X_train.shape[1], X_train.shape[2]) # 130, 13
    model = build_model(input_shape)

    # compile model
    optimiser = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimiser,
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    model.summary()

    # train model
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

    # plot accuracy/error for training and validation
    plot_history(history)

    # evaluate model on test set
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
    print('\nTest accuracy:', test_acc)

In [None]:
#save model
model.save('fg_model.h5')

## **Update_Next_ComingSoon.........**

**Reference: [https://youtu.be/szyGiObZymo](http://)**