<b>Table of content</b>
<ul>
    <li>Overview</li>
    <li>Importing the required libraries</li>
    <li>Configuration</li>
    <li>Helper functions</li>
    <li>Loading the data</li>
    <li>Create Tensorflow Dataset</li>
    <li>Model Development</li>
    <li>Model Evaluation</li>
    <li>Submit</li>
</ul>

In [None]:
import pandas as pd
import numpy as np
import tensorflow_io as tfio
import IPython.display import Audio
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import sklearn.metrics
import json
import tensorflow as tf
import librosa
import Ipython.display as ipd
import os
import glob

In [None]:
#configuration 
class CFG:
    image_size = [256,256]
    is_training = False
    epochs = 10

In [None]:
#function to load audio files
def load_audio(f_apth):
    input_len = 32000
    data = librosa.core.load(f_path)[0]
    if len(data)>input_len:
        data = data[:input_len]
    else:
        data = np.pad(data,(0,max(0,input_len - len(data))),"constant")
        
    return data

def visualize_data(data):
    fig = plt.figure(figsize=14,8)
    plt.title('Raw Wave')
    plt.ylabel('Amplitude')
    plt.plot(np.linspace(0,1,len(data)),data)
    plt.show()
    #sample usage
    #data = load_audio_file("dir")
    #visualize_data(data)
    #ipd.Audio(data,rate=32000)
    #other things to do -> add whitenoise stretch sound
    
#precosessing auido data
def preprocess(audio_dir,label):
    audio_string = tf.io.read_file(audio_dir)
    audio = tfio.audio.decode_vorbis(audio_string)
    audio_tensor = tf.squeeze(audio,axis=[-1])
    diff = tf.cast(tf.shape(audio_tensor)[0] - 5 * 32000,tf.float32)
    start  = tf.cast(tf.random.uniform(shape=()) * diff, tf.int32)
    current = tf.where(diff>0,begin,0)
    stop = tf.where(diff > 0,current + 5 * 32000,tf.shape(audio_tensor)[0])
    audio_tensor = audio_tensor[start:stop]

In [None]:
class MelSpecComputer:
    def __init__(self,sr,n_mels,f_min,f_max,**kwargs):
        self.sr = sr
        self.n_mels = n_mels
        self.f_min = f_min
        self.f_max = f_max
        kwargs["n_fft"] = kwargs.get("n_fft",self.sr//(10*4))
        self.kwargs = kwargs
        
    def __call__(self,y):
        mel_spec = lb.feature.melspectrogram(y,
                                            sr=self.sr,
                                            n_mels=self.n_mels,
                                            f_min=self.f_min,
                                            f_max=self.f_max,
                                            **self.kwargs)
        mel_spec = lb.power_to_db(mel_spec).astype(np.float32)
        return mel_spec

In [None]:
#generate random integer
def random_int(shape=[],min_val=0,max_val=1):
    return tf.random.uniform(shape=shape,min_val=min_val,max_val=max_val,dtype=tf.int32)

def mono_to_color(X,eps=1e-6,mean=None,std=None):
    mean = mean or X.mean()
    std = std or X.std()
    X = (X - mean) / (std + eps)
    _min

In [None]:
@tf.function
def crop_pad(audio,tag_len,pad_mode='constant'):
    #check the length of the imported audio
    audio_len = tf.shape(audio)[0]
    #if the length  of the input audio is smaller than the target length ,randomly pad the audio
    if audio_len < tag_len:
        #calculate the offset between he input audio and the target length
        diff_len = (tag_len - audio_len)
        pad1 = random_int([] ,minval=0,maxval=diff_len)
        
        #calcualte the second padding value
        pad2 = diff_len - pad1
        pad_len = [pad1,pad2]
        
        
        #apply padding to the audio data
        audio = tf.pad(audio,paddings=[pad_len],mode=pad_mode)
        
        elif audio > tag_len:
            diff_len = (audio_len - tag_len)
            indx = tf.random.uniform([],0,diff_len,dtype=tf.int32)
            #crop the audio data
            audio = audio[indx: (indx + tag_len)]
            #reshape the audio data to the target length
            audio = tf.reshape(audio,[target_len])
            
            return audio
        
@tf.function
def normalize(data,min_max=True):
    #compute the mean and std of the data
    me_an = tf.math.reduce_mean(data)
    std = tf.math.reduce_std(data)
    #standardize the data
    data = tf.math.divide_no_nan(data -mean,std)
    #nomarlize b2n [0,1]
    if min_max:
        MIN = tf.math.reduce_min(data)
        MAX = tf.math.reduce_max(data)
        data = tf.math.divide_no_nan(data - MIN, MAX-MIN)
        return data

# EA Birds Tensorflow Dataloader

In [None]:
class EABirdsDsLoader:
    def __init(self,df,train_path,sr=32000,duration=10,audio_len = 10 *32000,window_size=1024,n_mels=128,n_fft=2048,hop_size=512,
              batch_size=32,shuffle=True):
        self.train_path = train_path
        self.df_path = df_path
        self.sr = sr
        self.duration = duration
        self.audio_len = audio_len
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_size = hop_size
        self.batch_size = batch_size
        self.window_size = window_size
        self.shuffle = shuffle
        self.df = pd.read_csv(self.df_path)
        
        #create a list of images paths and labels for train,validation and test dataset
        self.audio_paths = [os.path.join(self.train_path,fname) for fname in self.df.filename]
        self.labels = self.df.primary_label.values
        self.indx_labels = {}
        
    def load_file(self,f_path,label):
        audio = tf.io.read_file(f_path)
        audio = tfio.audio.decode_vorbis(audio)
        audi = tf.cast(audio,tf.float32)
        audio = tf.squeeze(audio,axis=-1)
        audio = crop_paf(audio,self.audio_len)
        if normalize:
            audio = normalize(audio)
        return audio,label
    
    def spectrog_2_img(self,spectogram,label):
        spectrogram = tf.expand_dims(spectogram,axis=-1)
        spectogram = tf.image.resize(spectogram,[224,224])
        spectro_gram = tf.image.grayyscale_to_rgb(spectogram)
        return spectogram,label
    
    def get_dataset(self):
        ds = tf.data.Dataset.from_tensor_slices((self.audio_paths,self.labels))
        ds = ds.map(self.load_audio,num_parrallel_calls=tf.data.AUTOTUNE)
        ds = ds.map(AudioAug,num_parallel_calls=tf.data.AUTOTUNE)
        ds = ds.map(self.spectrog_2_img,num_parallel_calls=tf.data.AUTOTUNE)
        
        if self.shuffle:
            ds = ds.shuffle(buff_size=500,reshuffle_each_iteration=True)
            ds = ds.batch(batch_size.self.batch_size)
            ds = ds.prefetch(tf.data.AUTOTUNE)
        return ds

In [None]:
train_data_loader = EABirdsDsLoader(df_path='',train_path)
train_ds = train_data_loader.get_dataset()