<a href="https://colab.research.google.com/gist/keyurparalkar/5a49f696ed36ddce6526ab50e29e04ce/audio-data-augmentation-visualization-librosa-intgration-part-2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook contains the Wild Animals Dataset Augmentation. We worked with the Librosa and Soundfile methods to add noise, shift, time stretch and pitch shift audio files. Then we've augmented the Data with these techniques.

## Importing libraries

In [None]:
from fastai import * 
from fastai.vision import *
import os
import librosa
import IPython.display as ipd
import math
import secrets
import glob
import pandas as pd
import numpy as np
import soundfile as sf
from matplotlib import pyplot as plt
pd.set_option('max_rows', None)

## Creating an ID generator for the dataset

In [25]:
def random_alphanum(length: int) -> str:
        text = secrets.token_hex(nbytes=math.ceil(length / 2))
        isEven = length % 2 == 0
        return text if isEven else text[1:]

## Creating a training set

In [27]:
# assign directory
directory = './dataset'
 
# iterate over files in
# that directory
dataset_dict = {
    'relative_path' : [],
    'classID' : [],
    'id_file' : []
}
sum1 = 0
relative_path = []
classID = []
id_file = []
for filename in glob.iglob(f'{directory}/*'):
    for audio_name in glob.iglob(f'{filename}/*'):
        audio_name = audio_name.replace("\\", "/")
        relative_path.append(audio_name)
        classID.append(filename.split("\\")[-1])

cpt = 0
for filename in glob.iglob(f'{directory}/*'):
    for audio_name in glob.iglob(f'{filename}/*'):  
        cpt += 1

for i in range(cpt):
    id_file.append(random_alphanum(6))
dataset_dict['relative_path'] = relative_path
dataset_dict['classID'] = classID
dataset_dict['id_file'] = id_file

df_dataset = pd.DataFrame(dataset_dict, columns = ['relative_path', 'classID', 'id_file'])

## Creating a prediction set

In [28]:
# assign directory
directory = '.\prediction set'
 
# iterate over files in
# that directory
test_dict = {
    'relative_path' : [],
    'classID' : [],
    'id_file' : []
}
sum1 = 0
relative_path = []
classID = []
id_file = []
for filename in glob.iglob(f'{directory}/*'):
    for audio_name in glob.iglob(f'{filename}/*'):
        audio_name = audio_name.replace("\\", "/")
        print("filename : ", audio_name)
        relative_path.append(audio_name)
        classID.append(filename.split("\\")[-1])
        
cpt = 0
for filename in glob.iglob(f'{directory}/*'):
    for audio_name in glob.iglob(f'{filename}/*'):  
        cpt += 1

for i in range(cpt):
    id_file.append(random_alphanum(6))
    
test_dict['relative_path'] = relative_path
test_dict['classID'] = classID
test_dict['id_file'] = id_file
df_test = pd.DataFrame(test_dict, columns = ['relative_path', 'classID', 'id_file'])
# print("Mean duration : ", sum1/len(list(glob.iglob(f'{directory}/*'))))

filename :  ./prediction set/bear/439441__asteroiderer__bear-mad.wav
filename :  ./prediction set/crow/138344__unclesigmund__crow.wav
filename :  ./prediction set/dolphin/456151__jfournier18__dolphin-noise.wav
filename :  ./prediction set/eagle/104911__kabit__eagle.wav
filename :  ./prediction set/elephant/139875__y89312__44.wav
filename :  ./prediction set/falcon/467704__tallareta__halcon.wav
filename :  ./prediction set/frog/67261__benboncan__frog-croaking.wav
filename :  ./prediction set/hippo/194871__toadie__hippos-2.mp3
filename :  ./prediction set/lion/69570__bidone__lion-loud.mp3
filename :  ./prediction set/monkey/128473__darkozl__a-monkey.wav
filename :  ./prediction set/owl/25945__inchadney__owl.wav
filename :  ./prediction set/panther/black-panther-roaring-128-ytshorts.savetube.me.mp3
filename :  ./prediction set/Penguin/463033__iamaviolin__penguin-calls-noises.wav
filename :  ./prediction set/seal/179586__stormpetrel__a-weddell-seal-pup-laying-on-the-antarctic-ice-shelf-is-

In [30]:
df_test["relative_path"] = df_test["relative_path"].apply(lambda x: x.replace("./prediction set/", "./dataset/"))

## Counting number of samples per category

In [36]:
df_value_counts = pd.DataFrame(df_dataset["classID"].value_counts()).rename(columns={"classID":"occurences"}).reset_index().rename(columns={"index":"classID"})

## Defining the noise addition function

In [41]:
def noise_addition(file_path, cpt_al_sds_agm_all_fct_usd):
    '''
    Noise addition using normal distribution with mean = 0 and std =1

    Permissible noise factor value = x > 0.004
    '''
    wav, sr = librosa.load(file_path,sr=None)
    wav_n = wav + 0.009*np.random.normal(0,1,len(wav))
    extension = os.path.splitext(file_path)[1]
    file_path = file_path.replace(extension, "")
    file_path = file_path + "-noise_add-nb-" + str(cpt_al_sds_agm_all_fct_usd) + extension
    print("Adding noise to the file...")
    if (extension == ".mp3") or (extension == ".m4a") or (extension == ".flac") or (extension == ".ogg"):
        file_path = file_path.replace("-noise_add-nb-" + str(cpt_al_sds_agm_all_fct_usd) + extension, "")
        file_path = file_path + "-noise_add-nb-" + str(cpt_al_sds_agm_all_fct_usd) + ".wav"
    sf.write(file_path, wav_n, 44100, 'PCM_24')

## Defining the shifting function

In [42]:
def shifting(file_path, cpt_al_sds_agm_all_fct_usd):
    '''
    Permissible factor values = sr/10
    '''

    wav, sr = librosa.load(file_path, sr=None)
    wav_shift = np.roll(wav, int(sr / 10))
    extension = os.path.splitext(file_path)[1]
    file_path = file_path.replace(extension, "")
    file_path = file_path + "-shifting-nb-" + str(cpt_al_sds_agm_all_fct_usd) + extension
    print("Shifting the file...")
    if (extension == ".mp3") or (extension == ".m4a") or (extension == ".flac") or (extension == ".ogg"):
        file_path = file_path.replace("-shifting-nb-" + str(cpt_al_sds_agm_all_fct_usd) + extension, "")
        file_path = file_path + "-shifting-nb-" + str(cpt_al_sds_agm_all_fct_usd) + ".wav"
    sf.write(file_path, wav_shift, 44100, 'PCM_24')

## Defining the time stretching function

In [43]:
#Time-stretching the wave
def time_sttch(file_path, cpt_al_sds_agm_all_fct_usd):
    '''
    Permissible factor values = 0 < x < 1.0
    '''
    factor = 0.4
    wav, sr = librosa.load(file_path,sr=None)
    wav_t_sttch = librosa.effects.time_stretch(wav,factor)
    extension = os.path.splitext(file_path)[1]
    file_path = file_path.replace(extension, "")
    file_path = file_path + "-t-sttch-nb-" + str(cpt_al_sds_agm_all_fct_usd) + extension
    print("Time stretching the file...")
    if (extension == ".mp3") or (extension == ".m4a") or (extension == ".flac") or (extension == ".ogg"):
        file_path = file_path.replace("-t-sttch-nb-" + str(cpt_al_sds_agm_all_fct_usd) + extension, "")
        file_path = file_path + "-t-sttch-nb-" + str(cpt_al_sds_agm_all_fct_usd) + ".wav"
    sf.write(file_path, wav_t_sttch, 44100, 'PCM_24')

## Defining the pitch shifting function

In [44]:
#pitch shifting of wav
def ptch_shft(file_path, cpt_al_sds_agm_all_fct_usd, step):
    '''
    Permissible factor values = -5 <= x <= 5
    '''
    wav, sr = librosa.load(file_path,sr=None)
    wav_pitch_sf = librosa.effects.pitch_shift(wav,sr,n_steps=step)
    extension = os.path.splitext(file_path)[1]
    file_path = file_path.replace(extension, "")
    file_path = file_path + "-pitching-nb-" + str(cpt_al_sds_agm_all_fct_usd) + "-step-" + str(step) + extension
    print("Pitch shifting the file for step ,", step, "...")
    if (extension == ".mp3") or (extension == ".m4a") or (extension == ".flac") or (extension == ".ogg"):
        file_path = file_path.replace("-pitching-nb-" + str(cpt_al_sds_agm_all_fct_usd) + "-step-" + str(step) + extension, "")
        file_path = file_path + "-pitching-nb-" + str(cpt_al_sds_agm_all_fct_usd) + "-step-" + str(step) + ".wav"
    sf.write(file_path, wav_pitch_sf, 44100, 'PCM_24')

In [45]:
dict_augm_funct = {
    "funct_name":["noise_addition", "shifting", "time_sttch", "ptch_shft"],
    "funct":[noise_addition, shifting, time_sttch, ptch_shft]
}
df_augm_funct = pd.DataFrame(dict_augm_funct)
df_augm_funct

Unnamed: 0,funct_name,funct
0,noise_addition,<function noise_addition at 0x0000017452AC35E0>
1,shifting,<function shifting at 0x0000017452ADCD30>
2,time_sttch,<function time_sttch at 0x0000017452ADCEE0>
3,ptch_shft,<function ptch_shft at 0x000001745325EE50>


## Augmenting the Dataset

In [46]:
directory = '.\dataset'

all_snds_of_drctry_augm = False
for filename in glob.iglob(f'{directory}/*'):
    print("Processing category ", filename.split("\\")[-1], "...")
    orig_svd_flnms_of_cat = df_dataset[df_dataset["classID"] == filename.split("\\")[-1]]
    cpt_sounds_p_cat = len([name for name in os.listdir(filename) if os.path.isfile(os.path.join(filename, name))])
    orig_n_o_sds_p_cat = len([name for name in os.listdir(filename) if os.path.isfile(os.path.join(filename, name))])
    index_df_augm_func = 0
    if orig_svd_flnms_of_cat.shape[0] == 24:
        continue
    restart = True
    cpt_nb_of_agm_snd_pr_ct = 0
    cpt_al_sds_agm_all_fct_usd = 0
    step = -5
    while restart:
        for index, row in orig_svd_flnms_of_cat.iterrows():
            audio_name = row["relative_path"]
            aug_funct = df_augm_funct["funct"][index_df_augm_func]
            if (index_df_augm_func == 3):
                aug_funct(audio_name, cpt_al_sds_agm_all_fct_usd, step)
            else:
                aug_funct(audio_name, cpt_al_sds_agm_all_fct_usd)
            cpt_nb_of_agm_snd_pr_ct += 1
            if cpt_nb_of_agm_snd_pr_ct == orig_n_o_sds_p_cat:
                index_df_augm_func += 1
                if (index_df_augm_func > 3):
                    index_df_augm_func = 3
                    step += 1
                    cpt_al_sds_agm_all_fct_usd += 1
                all_snds_of_drctry_augm = True
                cpt_sounds_p_cat += 1
                if (cpt_sounds_p_cat == 24):
                    restart = False
                    break
                cpt_nb_of_agm_snd_pr_ct = 0
                restart = True
                break
            else:
                cpt_sounds_p_cat += 1
                if cpt_sounds_p_cat == 24:
                    restart = False
                    break
                else:
                    continue
                    restart = True
                    break

Processing category  bear ...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Shifting the file...
Shifting the file...
Shifting the file...
Time stretching the file...


  wav_t_sttch = librosa.effects.time_stretch(wav,factor)


Time stretching the file...
Time stretching the file...
Pitch shifting the file for step , -5 ...


  wav_pitch_sf = librosa.effects.pitch_shift(wav,sr,n_steps=step)


Pitch shifting the file for step , -5 ...


  wav_pitch_sf = librosa.effects.pitch_shift(wav,sr,n_steps=step)


Pitch shifting the file for step , -5 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -2 ...
Pitch shifting the file for step , -2 ...
Pitch shifting the file for step , -2 ...
Processing category  crow ...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Shifting the file...
Shifting the file...
Shifting the file...
Shifting the file...
Processing category  dolphin ...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Shifting the file...


  return f(*args, **kwargs)


Shifting the file...
Shifting the file...
Time stretching the file...
Time stretching the file...
Time stretching the file...
Pitch shifting the file for step , -5 ...


  wav_pitch_sf = librosa.effects.pitch_shift(wav,sr,n_steps=step)


Pitch shifting the file for step , -5 ...
Pitch shifting the file for step , -5 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -2 ...
Pitch shifting the file for step , -2 ...
Pitch shifting the file for step , -2 ...
Processing category  eagle ...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Shifting the file...
Shifting the file...
Processing category  elephant ...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Shifting the file...
Shifting the file.

  wav_pitch_sf = librosa.effects.pitch_shift(wav,sr,n_steps=step)


Pitch shifting the file for step , -5 ...
Pitch shifting the file for step , -5 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -4 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -3 ...
Pitch shifting the file for step , -2 ...
Pitch shifting the file for step , -2 ...
Pitch shifting the file for step , -2 ...
Processing category  falcon ...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Shifting the file...
Shifting the file...
Processing category  frog ...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise to the file...
Adding noise t