In [1]:
import librosa
import librosa.display
import IPython.display as ipd
from IPython.core.display import display
import matplotlib.pyplot as plt
import seaborn as sb
import numpy as np
import pandas as pd
import os
from random import randint

# Visualise evaluation dataset

Note: We use randomly generated labels first to fit the OneHotEncoder() that will be used later on

In [2]:
test_path = "NLP Final Evaluation Dataset/NLP"

file_name = []
emotion_guess = []
emotion_list = ['angry', 'fear', 'happy', 'neutral', 'sad']

for file in os.listdir(test_path):
  audio_file = test_path + '/' + file
  if (file[-3:] == "wav"):
    file_name.append(file)
    emotion_guess.append(emotion_list[randint(0,4)]) #dummy values but is never used

# dataframe for file name
name_df = pd.DataFrame(file_name, columns=['FileName'])

# add another column for emotions prediction and concat
emotion_df = pd.DataFrame(emotion_guess, columns=['Emotion'])
test_df = pd.concat([name_df, emotion_df], axis=1)

test_df.tail(10)

Unnamed: 0,FileName,Emotion
690,fdf37bbd11.wav,neutral
691,fe3c41f573.wav,fear
692,fe96772cca.wav,angry
693,fed46ebc2f.wav,fear
694,feeef44bf3.wav,fear
695,ff1ef342b5.wav,angry
696,ff495c9312.wav,fear
697,ff85bf9fd5.wav,sad
698,ff9f3fe0b8.wav,sad
699,ffe72dcaa1.wav,neutral


# Feature extraction

At each time t, a vector of length 20 is produced. Therefore, for each sample, we have a 2D matrix. Each sample has different durations, so the matrix has the dimensions 20 x t. We need to make sure that each feature has the same dimensions though padding or truncating.

In [3]:
# Each feature is a 2D matrix
# We need to make sure that each feature has the same dimensions though padding or truncating

def pad_or_cut(matrix, desired_height, desired_width):
    h = matrix.shape[0]
    w = matrix.shape[1]
    if h <= desired_height:
        matrix = np.pad(matrix, pad_width=((0, desired_height - h)), mode='constant')
    else:
        matrix = matrix[:desired_height, :]
    if w <= desired_width:
        matrix = np.pad(matrix, pad_width=((0, 0), (0, desired_width - w)), mode='constant')
    else:
        matrix = matrix[:, :desired_width]

    return matrix

# Test pad_or_cut
temp_mat = np.arange(24).reshape((4, 6))
assert pad_or_cut(temp_mat, 5, 5).shape == (5,5)

def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, offset=0.1)
    res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))    
    return pad_or_cut(res, 20, 120)


In [4]:
from tqdm import tqdm
X, y, Z = [], [], []

for path, emotion in tqdm(zip(test_df.FileName, test_df.Emotion), total=len(test_df.FileName)):
    file_path = test_path + '/' + path
    feature = get_features(file_path)
    X.append(feature)
    y.append(emotion) #dummy data
    Z.append(path) #filename
X = np.array(X)
y = np.array(y)
Z = np.array(Z)

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -4.6178866e-06  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ]

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  2.7585081e-03  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result i

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyw

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -0.01117528], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  9.77583113e-05  0.00000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.       

  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -1.41166975e-05  0.00000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.       

  2.4370526e-04  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  8.3886582e-05  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional argum

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -2.2651013e-05  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.       

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -1.99997194e-05  0.00000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result

  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.featur

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -1.3523160e-03  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ]

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  5.2221494e-05  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ]

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.04541026], sr=22050 as keyword args. From version 0

  6.5710679e-05  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result i

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -1.3124634e-07  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ]

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  1.1104554e-03  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 -3.5556750e-05  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an e

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.00458004], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

 -7.2420655e-05  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ]

  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  1.5298813e-04  0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.ar

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
 0.0000000e+00], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feat

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res =

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyw

  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyword args. From version 0.10 passing these as positional arguments will result in an error
  res = np.array(librosa.feature.mfcc(data, sample_rate, n_fft=2048, hop_length=512, n_mfcc=20))
  0.        ], sr=22050 as keyw

In [5]:
X.shape, y.shape, Z.shape

((700, 20, 120), (700,), (700,))

# Scale data

I scaled the data by a factor of 0.01.

In [6]:
X = X/100

In [7]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, KFold

import keras
from keras.callbacks import ReduceLROnPlateau
from keras.models import Sequential
from keras.layers import Dense, Conv1D, MaxPooling1D, Flatten, Dropout, BatchNormalization
from keras.utils import np_utils
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ModelCheckpoint

import warnings
if not os.sys.warnoptions:
    warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", category=DeprecationWarning) 

# Create OneHotEncoder

In [8]:
# As this is a multiclass classification problem onehotencoding our Y.
from sklearn.preprocessing import StandardScaler, OneHotEncoder
encoder = OneHotEncoder()
y = encoder.fit_transform(np.array(y).reshape(-1,1)).toarray()

# Predict on test data

In [9]:
saved_model = keras.models.load_model('./nlp_model.h5')

In [10]:
# predicting on test data.
pred_test = saved_model.predict(X)
print(pred_test)
y_pred = encoder.inverse_transform(pred_test)
print(y_pred)

[[6.42691020e-07 1.06287924e-04 1.53168148e-05 9.96254206e-01
  3.62355937e-03]
 [1.07539227e-14 1.98441612e-07 5.34468962e-11 1.64681282e-08
  9.99999762e-01]
 [8.39743661e-05 3.33222151e-02 9.04658635e-04 5.66559429e-05
  9.65632498e-01]
 ...
 [1.26298051e-04 2.93389987e-02 9.70455766e-01 3.22179367e-05
  4.66943311e-05]
 [7.68873811e-01 3.53899226e-02 1.86616585e-01 6.92640664e-03
  2.19328888e-03]
 [8.89290310e-03 2.81765964e-02 4.63186949e-02 8.32714438e-01
  8.38973373e-02]]
[['neutral']
 ['sad']
 ['sad']
 ['angry']
 ['angry']
 ['angry']
 ['angry']
 ['sad']
 ['neutral']
 ['sad']
 ['angry']
 ['sad']
 ['sad']
 ['neutral']
 ['sad']
 ['sad']
 ['neutral']
 ['neutral']
 ['angry']
 ['sad']
 ['happy']
 ['angry']
 ['sad']
 ['sad']
 ['neutral']
 ['neutral']
 ['happy']
 ['sad']
 ['sad']
 ['sad']
 ['fear']
 ['happy']
 ['sad']
 ['sad']
 ['fear']
 ['happy']
 ['sad']
 ['sad']
 ['sad']
 ['angry']
 ['angry']
 ['sad']
 ['sad']
 ['neutral']
 ['sad']
 ['sad']
 ['sad']
 ['happy']
 ['sad']
 ['angry']


# Store data

In [11]:
df = pd.DataFrame(columns=['FileName','Predicted Emotion']) #,'Dummy Labels'])
df['FileName'] = Z
df['Predicted Emotion'] = y_pred.flatten()
#df['Dummy Labels'] = y_test.flatten()

# .csv format, no headers, sorted in ascending order by the first column (Do for submission csv)
df.sort_values(by=['FileName'], inplace=True)

# Cleaning (because of x3 attributes)
#df.drop_duplicates(inplace=True)
#df.drop_duplicates(subset='FileName', keep="last", inplace=True)

# Output save
df.to_csv("ouput_chris_nlp.csv", index=False, header=False)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    print(df.head(100))


          FileName Predicted Emotion
0   0028c4fbc5.wav           neutral
1   00b924e5b4.wav               sad
2   0116a30694.wav               sad
3   014259dcfd.wav             angry
4   01bfa4103d.wav             angry
5   026c0f62db.wav             angry
6   02990f092b.wav             angry
7   02b750b8f1.wav               sad
8   03022e40b3.wav           neutral
9   03403158f8.wav               sad
10  035f9b2e70.wav             angry
11  044370bd33.wav               sad
12  04ec2f4600.wav               sad
13  04f22c7b9e.wav           neutral
14  0561683c1f.wav               sad
15  05d9201963.wav               sad
16  0606db6199.wav           neutral
17  065680dc7d.wav           neutral
18  07192d646f.wav             angry
19  076eb57ae9.wav               sad
20  0803950294.wav             happy
21  086c232666.wav             angry
22  08c6fbf5fc.wav               sad
23  090ad52a07.wav               sad
24  097debeccb.wav           neutral
25  09b8baab63.wav           neutral
2