In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display
import audioread
import os
from tqdm import tqdm
import pandas as pd
from tensorflow import keras
from sys import byteorder
from array import array
from struct import pack

import wave

import matplotlib.pyplot as plt
from scipy import signal
from scipy.io import wavfile
import os
import numpy as np
import random

#To find the duration of wave file in seconds
import wave
import contextlib

#Keras imports
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from keras.models import model_from_json

import time
import datetime

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
def absoluteFilePaths(directory):
    for dirpath,_,filenames in os.walk(directory):
        for f in filenames:
            yield os.path.abspath(os.path.join(dirpath, f))

In [3]:
class Augmentation:
    def read_audio_file(self, file_path):
            input_length = 36000
            data = librosa.core.load(file_path)[0]
            if len(data) > input_length:
                data = data[:input_length]
            else:
                data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
            return data
        
    def dyn_change(self, data):
        """
        Random Value Change.
        """
        dyn_change = np.random.uniform(low=1.5,high=3)
        return (data * dyn_change)
    
    def pitch_adv(data, sample_rate):
        """
        Pitch Tuning.
        """
        bins_per_octave = 12
        pitch_pm = 2
        pitch_change =  pitch_pm * 2*(np.random.uniform())   
        data = librosa.effects.pitch_shift(data.astype('float64'), 
                                          sample_rate, n_steps=pitch_change, 
                                          bins_per_octave=bins_per_octave)

    
    def shift_adv(self, data):
        """
        Random Shifting.
        """
        s_range = int(np.random.uniform(low=-5, high = 5)*500)
        return np.roll(data, s_range)
    
    def noise_adv(self, data):
        """
        Adding White Noise.
        """
        # you can take any distribution from https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.random.html
        noise_amp = 0.005*np.random.uniform()*np.amax(data)
        data = data.astype('float64') + noise_amp * np.random.normal(size=data.shape[0])
        return data
    
    def add_noise(self, data):
        noise = np.random.randn(len(data))
        data_noise = data + 0.005*noise
        return data_noise

    def shift(self, data):
        return np.roll(data, 1600)

    def stretch(self, data, rate=0.8): #0.8
        input_length = 36000
        data = librosa.effects.time_stretch(data, rate)
        if len(data) > input_length:
            data = data[:input_length]
        else:
            data = np.pad(data, (0, max(0, input_length - len(data))), "constant")
        return data

    def pitch(self, data):
        return librosa.effects.pitch_shift(data, 16000, 0.9)

    def speed(self, data):
        return librosa.effects.time_stretch(data, 1.5)

    def write_audio_file(self, file, data, sample_rate=16000):
        librosa.output.write_wav(file, data, sample_rate)

    def plot_time_series(self, data):
        fig = plt.figure(figsize=(14, 8))
        plt.title('Raw wave ')
        plt.ylabel('Amplitude')
        plt.plot(np.linspace(0, 1, len(data)), data)
        plt.show()

    def get_dur(self, file):
        with audioread.audio_open(file) as f:
            return f.duration

In [19]:
def graph_spectrogram(in_path, out_path, mode=None, nfft=512, noverlap=511):
    rate, data = wavfile.read(in_path)
    aa = Augmentation()
    #print("")
    if mode == 'stretch':
        data = aa.stretch(data)
    elif mode == 'pitch':
        data = aa.pitch(data)
    elif mode == 'noise':
        data = aa.noise_adv(data)
    elif mode == 'shift':
        data = aa.shift_adv(data)
    elif mode == 'speed':
        data = aa.speed(data)    
    elif mode == 'defaut':
        data = data
    fig,ax = plt.subplots(1)
    fig.subplots_adjust(left=0,right=1,bottom=0,top=1)
    ax.axis('off')
    pxx, freqs, bins, im = ax.specgram(x=data, Fs=rate, noverlap=noverlap, NFFT=nfft)
    ax.axis('off')
    plt.rcParams['figure.figsize'] = [0.75,0.5]
    file_name = in_path.split("/")[-1].split(".")[0] + '_{}'.format(mode)
    out_img = "/home/bigbrain/emotion-recognize/images/Train1/"
    des_path  = out_img + file_name + ".png"
    plt.savefig(des_path, dpi=300, frameon='false')
    fig.canvas.draw()
    size_inches  = fig.get_size_inches()
    dpi          = fig.get_dpi()
    width, height = fig.get_size_inches() * fig.get_dpi()

    #print(size_inches, dpi, width, height)
    mplimage = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    #print("MPLImage Shape: ", np.shape(mplimage))
    imarray = np.reshape(mplimage, (int(height), int(width), 3))
    plt.close(fig)
    del file_name,des_path,data,rate,fig,ax

In [4]:
# Create spectrogram from .wav file PAEP-000001.wav
def create_spectrogram(in_path, out_path, mode='defaut'):
    aa = Augmentation()
    
    plt.interactive(False)
    clip, sample_rate = librosa.load(in_path, sr=16000)
    
    if mode == 'stretch':
        clip = aa.stretch(clip)
    elif mode == 'pitch':
        clip = aa.pitch(clip)
    elif mode == 'noise':
        clip = aa.add_noise(clip)
    elif mode == 'shift':
        clip = aa.shift(clip)
    elif mode == 'speed':
        clip = aa.speed(clip)    
    elif mode == 'defaut':
        clip = clip
        
    fig = plt.figure(figsize=[0.72,0.72])
    ax = fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
    file_name = in_path.split("/")[-1].split(".")[0] + '_{}'.format(mode)
    out_img = "/home/bigbrain/emotion-recognize/images/Train1/"
    des_path  = out_img + file_name + ".png"
    plt.savefig(des_path, dpi=400, bbox_inches='tight',pad_inches=0)
    plt.close()
    fig.clf()
    plt.close(fig)
    plt.close('all')
    aa.write_audio_file(out_path + '{}.wav'.format(file_name), clip)
    del file_name,des_path,clip,sample_rate,fig,ax,S
    

In [5]:
files = list(absoluteFilePaths('/home/bigbrain/emotion-recognize/Handout/Train1/'))

In [7]:
target_file = "/home/bigbrain/emotion-recognize/Handout/Train1/"

for file in tqdm(files):
    create_spectrogram(file, target_file, mode="noise")
    create_spectrogram(file, target_file, "stretch")
#     create_spectrogram(file, target_file, "pitch")
#     create_spectrogram(file, target_file, "speed")
    create_spectrogram(file, target_file, mode="shift")
    create_spectrogram(file, target_file, mode='defaut')
print("Done!!!")

100%|██████████| 5229/5229 [20:25<00:00,  4.27it/s]

Done!!!





In [19]:
data_audio = pd.read_csv('../Handout/train_label.csv')
# data_audio.File = data_audio.File.str.replace('.png','.png')

In [20]:
data_audio

Unnamed: 0,File,Label
0,PAEP-000001.wav,3
1,PAEP-000002.wav,5
2,PAEP-000003.wav,4
3,PAEP-000004.wav,1
4,PAEP-000005.wav,0
...,...,...
5224,PAEP-005225.wav,0
5225,PAEP-005226.wav,2
5226,PAEP-005227.wav,1
5227,PAEP-005228.wav,4


In [21]:
df = pd.DataFrame(columns={'File', 'Label'})

In [22]:
files = list(absoluteFilePaths('/home/bigbrain/emotion-recognize/Handout/Train1/'))
for file in files:
    file = file.split('/')[-1]
    df = df.append({'File': file, 'Label': 'x'}, ignore_index=True)

In [23]:
df

Unnamed: 0,Label,File
0,x,PAEP-004667_noise.wav
1,x,PAEP-002135.wav
2,x,PAEP-003204.wav
3,x,PAEP-001005_shift.wav
4,x,PAEP-004512_shift.wav
...,...,...
15682,x,PAEP-004048_noise.wav
15683,x,PAEP-002564_shift.wav
15684,x,PAEP-001220.wav
15685,x,PAEP-003784.wav


In [24]:
array = []
for i in df['File']:
    i = i.split('.')[0].split('_')[0] + ".wav"
    label = data_audio[data_audio["File"] == i]["Label"].item()
    array.append(label)

  after removing the cwd from sys.path.


In [25]:
df["Label"] = array

In [26]:
df

Unnamed: 0,Label,File
0,5,PAEP-004667_noise.wav
1,3,PAEP-002135.wav
2,3,PAEP-003204.wav
3,5,PAEP-001005_shift.wav
4,0,PAEP-004512_shift.wav
...,...,...
15682,2,PAEP-004048_noise.wav
15683,4,PAEP-002564_shift.wav
15684,3,PAEP-001220.wav
15685,2,PAEP-003784.wav


In [27]:
df.to_csv('../Handout/train_last.csv', index=False)