<a href="https://colab.research.google.com/github/puneat/Audio_Sentiment/blob/puneet/data_augmentation_2d.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **MOUNTING DRIVE SYSTEMS AND IMPORTING LIBRARIES**

In [1]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [2]:
# Keras
import keras
from keras import regularizers
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model, model_from_json
from keras.layers import Dense, Embedding, LSTM
from keras.layers import Input, Flatten, Dropout, Activation, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
from keras.utils import np_utils, to_categorical
from keras.callbacks import (EarlyStopping, LearningRateScheduler,
                             ModelCheckpoint, TensorBoard, ReduceLROnPlateau)
from keras import losses, models, optimizers
from keras.activations import relu, softmax
from keras.layers import (Convolution2D, GlobalAveragePooling2D, BatchNormalization, Flatten, Dropout,
                          GlobalMaxPool2D, MaxPool2D, concatenate, Activation, Input, Dense)

# sklearn
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Other  
from tqdm import tqdm, tqdm_pandas
import scipy
from scipy.stats import skew
import librosa
import librosa.display
import json
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from matplotlib.pyplot import specgram
import pandas as pd
import seaborn as sns
import glob 
import os
import sys
import IPython.display as ipd  # To play sound in the notebook
import warnings
# ignore warnings 
if not sys.warnoptions:
    warnings.simplefilter("ignore")

Using TensorFlow backend.
  import pandas.util.testing as tm


### **DATA AUGMENTATION**

In [None]:
################################################################################
''' Adding White Noise '''

def noise(data):
    #Can take any distribution from https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.random.html
    noise_amp = 0.05*np.random.uniform()*np.amax(data)   # more noise reduce the value to 0.5
    data = data.astype('float64') + noise_amp * np.random.normal(size=data.shape[0])
    return data

################################################################################
''' Random Shifting '''

def shift(data):
    s_range = int(np.random.uniform(low=-5, high = 5)*1000)  #default at 500
    return np.roll(data, s_range)

################################################################################
''' Streching the Sound. This expands the audio size '''

def stretch(data, rate=0.8):
    data = librosa.effects.time_stretch(data, rate)
    return data

################################################################################    
''' Pitch Tuning '''
def pitch(data, sample_rate):
    bins_per_octave = 12
    pitch_pm = 2
    pitch_change =  pitch_pm * 2*(np.random.uniform())   
    data = librosa.effects.pitch_shift(data.astype('float64'), 
                                      sample_rate, n_steps=pitch_change, 
                                      bins_per_octave=bins_per_octave)
    return data

################################################################################    
''' Dynamic Change'''

def dyn_change(data):
    Random Value Change.
    dyn_change = np.random.uniform(low=-0.5 ,high=7)
    return (data * dyn_change)

################################################################################
''' Speed and Pitch Tuning '''

def speedNpitch(data):
    length_change = np.random.uniform(low=0.8, high = 1)
    speed_fac = 1.2  / length_change 
    tmp = np.interp(np.arange(0,len(data),speed_fac),np.arange(0,len(data)),data)
    minlen = min(data.shape[0], tmp.shape[0])
    data *= 0
    data[0:minlen] = tmp[0:minlen]
    return data

### **PREPARING 4D  NUMPY ARRAY WITH AUGMENTATIONS**

In [13]:
'''Extracting the MFCC feature as an image (Matrix format).'''
def prepare_data(df, n):
    X_speednpitch = np.empty(shape=(df.shape[0], n, 216, 1))
    X_noise = np.empty(shape=(df.shape[0], n, 216, 1))
    X_pitch = np.empty(shape=(df.shape[0], n, 216, 1))
    X_shift = np.empty(shape=(df.shape[0], n, 216, 1))
    X_dyn_change = np.empty(shape=(df.shape[0], n, 216, 1))
    X_ref = np.empty(shape=(df.shape[0], n, 216, 1))
    input_length = sampling_rate * audio_duration
    
    cnt = 0
    for fname in tqdm(df.path):
        file_path = fname
        data, _ = librosa.load(file_path, sr=sampling_rate
                               ,res_type="kaiser_fast"
                               ,duration=2.5
                               ,offset=0.5
                              )

        # Random offset / Padding
        if len(data) > input_length:
            max_offset = len(data) - input_length
            offset = np.random.randint(max_offset)
            data = data[offset:(input_length+offset)]
        else:
            if input_length > len(data):
                max_offset = input_length - len(data)
                offset = np.random.randint(max_offset)
            else:
                offset = 0
            data = np.pad(data, (offset, int(input_length) - len(data) - offset), "constant")

        # Augmentation 
            data_speednpitch = speedNpitch(data)
            data_noise=noise(data)
            data_shift=shift(data)
            data_pitch=pitch(data,sampling_rate)
            data_dyn_change=dyn_change(data)

        # MFCC extraction 
            MFCC = librosa.feature.mfcc(data_speednpitch, sr=sampling_rate, n_mfcc=n_mfcc)
            MFCC = np.expand_dims(MFCC, axis=-1)
            X_speednpitch[cnt,] = MFCC

            MFCC = librosa.feature.mfcc(data_noise, sr=sampling_rate, n_mfcc=n_mfcc)
            MFCC = np.expand_dims(MFCC, axis=-1)
            X_noise[cnt,] = MFCC

            MFCC = librosa.feature.mfcc(data_pitch, sr=sampling_rate, n_mfcc=n_mfcc)
            MFCC = np.expand_dims(MFCC, axis=-1)
            X_pitch[cnt,] = MFCC

            MFCC = librosa.feature.mfcc(data_shift, sr=sampling_rate, n_mfcc=n_mfcc)
            MFCC = np.expand_dims(MFCC, axis=-1)
            X_shift[cnt,] = MFCC

            MFCC = librosa.feature.mfcc(data_dyn_change, sr=sampling_rate, n_mfcc=n_mfcc)
            MFCC = np.expand_dims(MFCC, axis=-1)
            X_dyn_change[cnt,] = MFCC

            MFCC = librosa.feature.mfcc(data, sr=sampling_rate, n_mfcc=n_mfcc)
            MFCC = np.expand_dims(MFCC, axis=-1)
            X_ref[cnt,] = MFCC
  
        #else:
            # Log-melspectogram
            # melspec = librosa.feature.melspectrogram(data, n_mels = n_melspec)   
            # logspec = librosa.amplitude_to_db(melspec)
            # logspec = np.expand_dims(logspec, axis=-1)
            # X[cnt,] = logspec
            
        cnt += 1

    return X_ref, X_speednpitch, X_pitch, X_shift, X_noise, X_dyn_change

### **MALE AUDIO AUGMENTATION**

In [9]:
ref = pd.read_csv("/gdrive/My Drive/Audio_files/Combined_Dataframes/male_df.csv")
ref.head()

Unnamed: 0,labels,source,path
0,male_disgust,SAVEE,/gdrive/My Drive/Audio_files/male_final/SAVEE/...
1,male_disgust,SAVEE,/gdrive/My Drive/Audio_files/male_final/SAVEE/...
2,male_disgust,SAVEE,/gdrive/My Drive/Audio_files/male_final/SAVEE/...
3,male_disgust,SAVEE,/gdrive/My Drive/Audio_files/male_final/SAVEE/...
4,male_disgust,SAVEE,/gdrive/My Drive/Audio_files/male_final/SAVEE/...


In [14]:
sampling_rate=44100
audio_duration=2.5
n_mfcc = 30
df_ref, df_speednpitch, df_pitch, df_shift, df_noise, df_dyn_change = prepare_data(ref, n = n_mfcc)

100%|██████████| 5130/5130 [1:38:41<00:00,  1.15s/it]


In [36]:
print(df_noise.shape,df_speednpitch.shape, df_pitch.shape, df_dyn_change.shape, df_shift.shape,df_ref.shape)

(5130, 30, 216, 1) (5130, 30, 216, 1) (5130, 30, 216, 1) (5130, 30, 216, 1) (5130, 30, 216, 1) (5130, 30, 216, 1)


In [37]:
result=np.concatenate((df_noise,df_speednpitch,df_pitch,df_shift,df_dyn_change,df_ref),axis=0)

In [38]:
result.shape

(30780, 30, 216, 1)

In [39]:
np.save('/gdrive/My Drive/Audio_files/Combined_Dataframes/male_2d_aug.npy',result)

### **FEMALE AUDIO AUGMENTATION**

In [None]:
ref = pd.read_csv("/gdrive/My Drive/Audio_files/Combined_Dataframes/female_df.csv")
ref.head()

Unnamed: 0,labels,source,path
0,female_fear,TESS,/gdrive/My Drive/Audio_files/female_final/TESS...
1,female_fear,TESS,/gdrive/My Drive/Audio_files/female_final/TESS...
2,female_fear,TESS,/gdrive/My Drive/Audio_files/female_final/TESS...
3,female_fear,TESS,/gdrive/My Drive/Audio_files/female_final/TESS...
4,female_fear,TESS,/gdrive/My Drive/Audio_files/female_final/TESS...


In [None]:
sampling_rate=44100
audio_duration=2.5
n_mfcc = 30
df_ref, df_speednpitch, df_pitch, df_shift, df_noise, df_dyn_change = prepare_data(ref, n = n_mfcc)

100%|██████████| 7029/7029 [1:23:01<00:00,  1.41it/s]


In [None]:
print(df_noise.shape,df_speednpitch.shape, df_pitch.shape, df_dyn_change.shape, df_shift.shape,df_ref.shape)

(7029, 30, 216, 1) (7029, 30, 216, 1) (7029, 30, 216, 1) (7029, 30, 216, 1) (7029, 30, 216, 1) (7029, 30, 216, 1)


In [None]:
result=np.concatenate((df_noise,df_speednpitch,df_pitch,df_shift,df_dyn_change,df_ref),axis=0)

In [None]:
result.shape

(42174, 30, 216, 1)

In [None]:
np.save('/gdrive/My Drive/Audio_files/Combined_Dataframes/female_2d_aug.npy', result)