<a href="https://colab.research.google.com/github/shehadeh7/APS360-Project/blob/main/APS360_proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
! pip install kaggle



In [2]:
! mkdir ~/.kaggle

In [3]:
! cp kaggle.json ~/.kaggle/

In [4]:
! chmod 600 ~/.kaggle/kaggle.json

In [5]:
! kaggle datasets download -d ejlok1/cremad

Downloading cremad.zip to /content
 99% 446M/451M [00:05<00:00, 70.9MB/s]
100% 451M/451M [00:05<00:00, 84.4MB/s]


In [None]:
! unzip cremad.zip

In [7]:
import os
# crema_emotions = {'NEU':'neutral', 'HAP':'happy', 'SAD':'sad', 'ANG':'angry', 'FEA':'fear', 'DIS':'disgust'}
crema_emotions = {'NEU':0, 'HAP':1, 'SAD':2, 'ANG':3, 'FEA':4, 'DIS':5}

processed_data = []
crema_path = './AudioWAV'
crema_data = os.listdir(crema_path) # store all files in crema_data

In [20]:
! pip install pydub
! pip install noisereduce

Collecting noisereduce
  Downloading noisereduce-2.0.0-py3-none-any.whl (15 kB)
Installing collected packages: noisereduce
Successfully installed noisereduce-2.0.0


In [29]:
import librosa
import numpy as np

from pydub import AudioSegment, effects
import noisereduce as nr

frame_length = 2048
hop_length = 512
total_length = 180000 # verify this value?

def extract_feature(file_name): 

    _, sample_rate = librosa.load(file_name, sr=None)

    rawsound = AudioSegment.from_file(file_name) 
    # Normalize the audio to +5.0 dBFS.
    normalizedsound = effects.normalize(rawsound, headroom = 5.0) 
    # Transform the normalized audio to np.array of samples.
    normal_x = np.array(normalizedsound.get_array_of_samples(), dtype = 'float32')
    # Trim silence from the beginning and the end.
    xt, index = librosa.effects.trim(normal_x, top_db=30)
    # Pad for duration equalization.
    padded_x = librosa.util.fix_length(xt, size=total_length)
    # Noise reduction.
    final_x = nr.reduce_noise(y=padded_x, sr=sample_rate)
    
    # Features extraction   
    stft = np.abs(librosa.stft(final_x))
    mfccs = librosa.feature.mfcc(y=final_x, sr=sample_rate, n_mfcc=15)
    chroma = librosa.feature.chroma_stft(S=stft, sr=sample_rate)
    mel = librosa.feature.melspectrogram(final_x, sr=sample_rate)
    contrast = librosa.feature.spectral_contrast(S=stft, sr=sample_rate)
    tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(final_x), sr=sample_rate)
    return mfccs,chroma,mel,contrast,tonnetz


In [57]:
# Obtain features from each wav file
labels = []
features = []
i = 0

mfccs = []
chroma = []
mel = []
contrast = []
tonnetz = []
mylists = [mfccs, chroma, mel, contrast, tonnetz]
for file in crema_data:
    emotion = crema_emotions[file.split('_')[2]] # extract relevant emotion
    labels.append(emotion)
    result = (extract_feature(crema_path+'/'+file))
    for x, lst in zip(result, mylists):
        lst.append(x)
    i+=1
    if (i>=100):
        break


In [58]:
# Change feature lists to np array of size timestamp x features
a_mfccs = np.asarray(mfccs).astype('float32')
a_mfccs = np.swapaxes(a_mfccs, 1, 2)
a_chroma = np.asarray(chroma).astype('float32')
a_chroma = np.swapaxes(a_chroma, 1, 2)
a_mel = np.asarray(mel).astype('float32')
a_mel = np.swapaxes(a_mel, 1, 2)
a_contrast = np.asarray(contrast).astype('float32')
a_contrast = np.swapaxes(a_contrast, 1, 2)
a_tonnetz = np.asarray(tonnetz).astype('float32')
a_tonnetz = np.swapaxes(a_tonnetz, 1, 2)

print('ZCR shape:',a_mfccs.shape)
print('RMS shape:',a_chroma.shape)

ZCR shape: (100, 352, 15)
RMS shape: (100, 352, 12)


In [59]:
# Create np arrays for data and labels
X = np.concatenate((a_mfccs, a_chroma, a_mel, a_contrast, a_tonnetz), axis=2)
Y = np.array(labels)

print(X.shape)
print(Y.shape)

(100, 352, 168)
(100,)


In [60]:
import sklearn
#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

# dataSize = 1000
# testSplit = 0.1 # 10% for testsplit

#Create a Gaussian Classifier
clf=RandomForestClassifier(criterion='entropy')

#Train the model using the training sets y_pred=clf.predict(X_test)
# Flatten X to a 2d array for random forest
nsamples, nx, ny = X.shape
X_2d = X.reshape((nsamples,nx*ny))
clf.fit(X_2d[:80,:], Y[:80])



RandomForestClassifier(criterion='entropy')

In [61]:
y_pred=clf.predict(X_2d[80:,:])

pscore = sklearn.metrics.accuracy_score(y_pred, Y[80:])
print(pscore)


0.45
