##Mount Google Drive

In [1]:
import os
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


##Specify root

In [2]:
# Specify the directory path
root = '/gdrive/MyDrive/Irfan/ESC-50-master'
!ls '/gdrive/MyDrive/Irfan/ESC-50-master'

 audio		    esc50.gif   melspectrograms   pytest.ini   requirements.txt
'audio augmented'   LICENSE     meta		  README.md    tests


##Import Libraries

In [3]:
import numpy as np
import pandas as pd
import random

from scipy.io import wavfile
from sklearn import preprocessing

import librosa.display
import librosa

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import soundfile as sf

##Data augmentor

In [4]:
def noise_injection(data, noise_factor):
    noise = np.random.randn(len(data))
    augmented_data = data + noise_factor * noise
    # Cast back to same data type
    augmented_data = augmented_data.astype(type(data[0]))
    return augmented_data

def time_shift(data, sampling_rate, shift_max, shift_direction):
    shift = np.random.randint(sampling_rate * shift_max)
    if shift_direction == 'right':
        shift = -shift
    elif self.shift_direction == 'both':
        direction = np.random.randint(0, 2)
        if direction == 1:
            shift = -shift
    augmented_data = np.roll(data, shift)
    # Set to silence for heading/ tailing
    if shift > 0:
        augmented_data[:shift] = 0
    else:
        augmented_data[shift:] = 0
    return augmented_data

def pitch_changer(data, sr, n_steps):
    return librosa.effects.pitch_shift(data, sr=sr, n_steps=n_steps)

def speed_changer(data, speed_factor):
    return librosa.effects.time_stretch(data, rate=speed_factor)

In [5]:
def save_melspectrogram(directory_path, file_name, dataset_split, label, sampling_rate=44100):
    """ Will save spectogram into current directory"""

    testdir = root + '/audio augmented/testing/{label}'.format(label=label)
    traindir = root + '/audio augmented/training/{label}'.format(label=label)

    if os.path.exists(testdir) and os.path.exists(traindir):
      testdirc = os.listdir(testdir)
      traindirc = os.listdir(traindir)
      number_filestest = len(testdirc)
      number_filestrain = len(traindirc)

      if number_filestest == 24 and number_filestrain == 96:
        return

    path_to_file = os.path.join(directory_path, file_name)
    data, sr = librosa.load(path_to_file, sr=sampling_rate, mono=True)
    #data, sr = sf.read(path_to_file)
    data = data.astype(float)
    data = preprocessing.scale(data)

    rand1 = random.uniform(0, 3)
    rand2 = random.randint(-7,7)
    data_aug1 = speed_changer(data, rand1)
    data_aug2 = pitch_changer(data, sr, rand2)


    # create saving directory
    directory = root + '/audio augmented/{dataset}/{label}'.format(dataset=dataset_split, label=label)
    if not os.path.exists(directory):
      os.makedirs(directory)

    melspec = librosa.feature.melspectrogram(y=data, sr=sr, n_mels=128)
    log_melspec = librosa.power_to_db(melspec, ref=np.max)
    librosa.display.specshow(log_melspec, sr=sr)
    plt.savefig(directory + '/' + file_name.strip('.wav') + '.png')

    melspec1 = librosa.feature.melspectrogram(y=data_aug1, sr=sr, n_mels=128)
    log_melspec1 = librosa.power_to_db(melspec1, ref=np.max)
    librosa.display.specshow(log_melspec1, sr=sr)
    plt.savefig(directory + '/' + file_name.strip('.wav') + 'aug1.png')

    melspec2 = librosa.feature.melspectrogram(y=data_aug2, sr=sr, n_mels=128)
    log_melspec2 = librosa.power_to_db(melspec2, ref=np.max)
    librosa.display.specshow(log_melspec2, sr=sr)
    plt.savefig(directory + '/' + file_name.strip('.wav') + 'aug2.png')

In [6]:
def _train_test_split(filenames, train_pct):
    """Create train and test splits for ESC-50 data"""
    random.seed(2018)
    n_files = len(filenames)
    n_train = int(n_files*train_pct)
    train = np.random.choice(n_files, n_train, replace=False)
        
    # split on training indices
    training_idx = np.isin(range(n_files), train)
    training_set = np.array(filenames)[training_idx]
    testing_set = np.array(filenames)[~training_idx]
    
    return {'training': training_set, 'testing': testing_set}

In [7]:
dataset_dir = root

# Load meta data for audio files
meta_data = pd.read_csv(dataset_dir + '/meta/esc50.csv')

labs = meta_data.category
unique_labels = labs.unique()
meta_data.head()

Unnamed: 0,filename,fold,target,category,esc10,src_file,take
0,1-100032-A-0.wav,1,0,dog,True,100032,A
1,1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
2,1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
3,1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
4,1-101296-A-19.wav,1,19,thunderstorm,False,101296,A


In [8]:
import time
count = 0
for label in unique_labels:
    count += 1
    start_time = time.time()
    print("Proccesing {} audio files".format(label))
    current_label_meta_data = meta_data[meta_data.category == label]
    datasets = _train_test_split(current_label_meta_data.filename, train_pct=0.8)
    for dataset_split, audio_files in datasets.items():
        for filename in audio_files:
            directory_path = dataset_dir + '/audio/'
            save_melspectrogram(directory_path, filename, dataset_split, label, sampling_rate=44100)
    print("---Time elapsed: %s seconds ---" % (time.time() - start_time))

Proccesing dog audio files
---Time elapsed: 0.12777113914489746 seconds ---
Proccesing chirping_birds audio files
---Time elapsed: 0.13430547714233398 seconds ---
Proccesing vacuum_cleaner audio files
---Time elapsed: 0.11284708976745605 seconds ---
Proccesing thunderstorm audio files
---Time elapsed: 0.1151270866394043 seconds ---
Proccesing door_wood_knock audio files
---Time elapsed: 0.1196897029876709 seconds ---
Proccesing can_opening audio files
---Time elapsed: 0.12384700775146484 seconds ---
Proccesing crow audio files
---Time elapsed: 0.15959930419921875 seconds ---
Proccesing clapping audio files
---Time elapsed: 0.13207077980041504 seconds ---
Proccesing fireworks audio files
---Time elapsed: 0.12416219711303711 seconds ---
Proccesing chainsaw audio files
---Time elapsed: 0.12426352500915527 seconds ---
Proccesing airplane audio files
---Time elapsed: 0.12716102600097656 seconds ---
Proccesing mouse_click audio files
---Time elapsed: 0.12152314186096191 seconds ---
Proccesin