In [33]:
import librosa
import librosa.display
from IPython.display import Audio
import soundfile
import os, glob, pickle
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from scipy.io import wavfile
import librosa.display
from IPython.display import Audio
import matplotlib.pyplot as plt
import soundfile as sf
import shutil

In [34]:
#DataFlair - Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}

#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [35]:
sample_rate = 16000

In [36]:
def preProcess(file_name):
  x,sr = librosa.load(file_name,sample_rate)
  y = librosa.effects.preemphasis(x,coef=0.99)
  y , index = librosa.effects.trim(y, top_db = 20)
  return y

In [37]:
def pitch_and_speed_change(abc):

  y_pitch_speed = abc.copy()
  # you can change low and high here
  length_change = np.random.uniform(low=0.8, high = 1)
  speed_fac = 1.0  / length_change
  print("resample length_change = ",length_change)

  tmp = np.interp(np.arange(0,len(y_pitch_speed),speed_fac),np.arange(0,len(y_pitch_speed)),y_pitch_speed)
  minlen = min(y_pitch_speed.shape[0], tmp.shape[0])
  y_pitch_speed *= 0
  y_pitch_speed[0:minlen] = tmp[0:minlen]
  Audio(y_pitch_speed, rate=sample_rate)
  return y_pitch_speed

In [38]:
def pitch_changing(abc):
  y_pitch = abc.copy()
  bins_per_octave = 12
  pitch_pm = 2
  pitch_change =  pitch_pm * 2*(np.random.uniform())   
  print("pitch_change = ",pitch_change)
  y_pitch = librosa.effects.pitch_shift(y_pitch, 
                                        sample_rate, n_steps=pitch_change, 
                                        bins_per_octave=bins_per_octave)
  Audio(y_pitch, rate=sample_rate)
  return y_pitch

In [39]:
def time_stretching(abc):
  factor = 0.5
  wav_time_stch = librosa.effects.time_stretch(abc,factor)
  return wav_time_stch

In [40]:
def noise_addition(abc):
  wav_n = abc + 0.0005*np.random.normal(0,1,len(abc))
  return wav_n

In [41]:
def speed_changing(abc):
  y_speed = abc.copy()
  speed_change = np.random.uniform(low=0.9,high=1.1)
  print("speed_change = ",speed_change)
  tmp = librosa.effects.time_stretch(y_speed, speed_change)
  minlen = min(y_speed.shape[0], tmp.shape[0])
  y_speed *= 0 
  y_speed[0:minlen] = tmp[0:minlen]
  Audio(y_speed, rate=sample_rate)
  return y_speed

In [42]:
def extract_feature(abc,mfcc,chroma,mel):
  if chroma:
    stft=np.abs(librosa.stft(abc))
  result=np.array([])
  if mfcc:
    mfccs=np.mean(librosa.feature.mfcc(y=abc, sr=sample_rate, n_mfcc=40).T, axis=0)
    result=np.hstack((result, mfccs))
  if chroma:
    chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    result=np.hstack((result, chroma))
  if mel:
    mel=np.mean(librosa.feature.melspectrogram(abc, sr=sample_rate).T,axis=0)
    result=np.hstack((result, mel))
  return result

In [43]:
#DataFlair - Load the data and extract features for each sound file
def load_data_noise():
    x,y=[],[]
    for file in glob.glob("/content/drive/MyDrive/RAVDESS/Speech/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        print(file_name)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        abc = preProcess(file)
        #pitch_and_speed_changed = pitch_and_speed_change(abc)
        #time_stretched = time_stretching(abc)
        noise_added = noise_addition(abc)
        # pitch_changed = pitch_changing(abc)
        # speed_changed = speed_changing(abc)
        wa = file_name.split(".")
        sf.write(wa[0]+".wav",noise_added,16000)
        # sf.write(wa[0]+".wav",pitch_changed,16000)
        # sf.write(wa[0]+".wav",speed_changed,16000)
        # sf.write(wa[0]+".wav",time_stretched,16000)
        shutil.copy("/content/"+wa[0]+".wav","/content/drive/MyDrive/RAVDESS/Actor_27")
        # shutil.copy("/content/"+wa[0]+".wav","/content/drive/MyDrive/RAVDESS/speech/TrActor_28")
        # shutil.copy("/content/"+wa[0]+".wav","/content/drive/MyDrive/RAVDESS/speech/TrActor_29")
        # shutil.copy("/content/"+wa[0]+".wav","/content/drive/MyDrive/RAVDESS/speech/TrActor_30")
        #feature1 = extract_feature(abc,mfcc=True, chroma=True, mel=True)
        #feature2 = extract_feature(noise_added,mfcc=True, chroma=True, mel=True)
        #print(feature)
        # x.append(feature1)
        # x.append(feature2)
        # y.append(emotion)
        # y.append(emotion)
    # return train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [44]:
def load_data_pitch():
    x,y=[],[]
    for file in glob.glob("/content/drive/MyDrive/RAVDESS/Speech/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        print(file_name)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        abc = preProcess(file)
        pitch_changed = pitch_changing(abc)
        wa = file_name.split(".")
        sf.write(wa[0]+".wav",pitch_changed,16000)
        shutil.copy("/content/"+wa[0]+".wav","/content/drive/MyDrive/RAVDESS/Actor_28")

In [45]:
def load_data_speed():
    x,y=[],[]
    for file in glob.glob("/content/drive/MyDrive/RAVDESS/Speech/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        print(file_name)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        abc = preProcess(file)
        speed_changed = speed_changing(abc)
        wa = file_name.split(".")
        sf.write(wa[0]+".wav",speed_changed,16000)
        shutil.copy("/content/"+wa[0]+".wav","/content/drive/MyDrive/RAVDESS/Actor_29")

In [46]:
def load_data_time():
    x,y=[],[]
    for file in glob.glob("/content/drive/MyDrive/RAVDESS/Speech/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        print(file_name)
        emotion=emotions[file_name.split("-")[2]]
        if emotion not in observed_emotions:
            continue
        abc = preProcess(file)
        time_stretched = time_stretching(abc)
        wa = file_name.split(".")
        sf.write(wa[0]+".wav",time_stretched,16000)
        shutil.copy("/content/"+wa[0]+".wav","/content/drive/MyDrive/RAVDESS/Actor_30")

In [47]:
load_data_noise()
load_data_pitch()
load_data_speed()
load_data_time()

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
03-01-06-02-01-02-03.wav
03-01-07-01-01-01-03.wav
03-01-07-02-02-01-03.wav
03-01-08-01-01-02-03.wav
03-01-06-02-02-02-03.wav
03-01-08-01-01-01-03.wav
03-01-07-01-01-02-03.wav
03-01-06-02-01-01-03.wav
03-01-07-01-02-01-03.wav
03-01-07-01-02-02-03.wav
03-01-08-02-02-01-03.wav
03-01-08-02-01-02-03.wav
03-01-08-01-02-01-03.wav
03-01-08-02-02-02-03.wav
03-01-08-02-01-01-03.wav
03-01-08-01-02-02-03.wav
03-02-01-01-02-02-03.wav
03-02-02-01-01-02-03.wav
03-02-02-01-02-01-03.wav
03-02-01-01-01-01-03.wav
03-02-02-01-01-01-03.wav
03-02-02-01-02-02-03.wav
03-02-01-01-01-02-03.wav
03-02-01-01-02-01-03.wav
03-02-02-02-02-01-03.wav
03-02-04-01-02-01-03.wav
03-02-03-02-02-01-03.wav
03-02-03-02-01-02-03.wav
03-02-04-01-01-01-03.wav
03-02-02-02-02-02-03.wav
03-02-03-01-02-02-03.wav
03-02-03-02-02-02-03.wav
03-02-03-02-01-01-03.wav
03-02-02-02-01-02-03.wav
03-02-04-01-01-02-03.wav
03-02-02-02-01-01-03.wav
03-02-03-01-01-01-03.wav
03-02-03-0

In [48]:
# koulik = '/content/drive/MyDrive/RAVDESS/Speech/Actor_01/03-01-03-01-01-02-01.wav'
# kd,k = librosa.load(koulik,sr=sample_rate)
# plt.figure(figsize=(12, 4))
# print("raw data")
# librosa.display.waveplot(kd.astype('float'), sr=sample_rate)
# plt.show()

# koulikDas = preProcess(koulik)
# plt.figure(figsize=(12, 4))
# print("preprocessed data")
# librosa.display.waveplot(koulikDas.astype('float'), sr=sample_rate)
# plt.show()

# noise_added = noise_addition(koulikDas)
# print("noise added")
# plt.figure(figsize=(12, 4))
# librosa.display.waveplot(noise_added.astype('float'), sr=sample_rate)
# plt.show()

# pitch_and_speed_changed = pitch_and_speed_change(koulikDas)
# print("pitch and speed changed")
# plt.figure(figsize=(12, 4))
# librosa.display.waveplot(pitch_and_speed_changed, sr=sample_rate)
# plt.show()

# time_stretched = time_stretching(koulikDas)
# print("Time stretched")
# Audio(time_stretched,rate = 16000)
# plt.figure(figsize=(12, 4))
# librosa.display.waveplot(time_stretched, sr=sample_rate)
# plt.show()

# pitch_changed = pitch_changing(koulikDas)
# print("pitch changed")
# plt.figure(figsize=(12, 4))
# librosa.display.waveplot(pitch_changed, sr=sample_rate)
# plt.show()

# speed_changed = speed_changing(koulikDas)
# print("speed changed")
# plt.figure(figsize=(12, 4))
# librosa.display.waveplot(speed_changed, sr=sample_rate)
# plt.show()