In [2]:
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
tqdm_notebook().pandas()
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np
import math
import os
import librosa
import librosa.display

# Human Emotion Detection

# 1.0 Input Data

<h3> 1.1 Helper Methods to find wav file path and emotion </h3>

In [3]:
# Gets the emotion label from audio title
def get_emotion(filename):
    # Emotion Tag
    emote = filename.split('_')[2]
    if emote=='ANG':
        return 'angry'
    elif emote=='DIS':
        return 'disgust'
    elif emote=='FEA':
        return 'fear'
    elif emote=='HAP':
        return ('happy')
    elif emote=='NEU':
        return ('neutral')
    else:
        return ('sad')

# Creates the main dataframe
def create_dataframe(directory):
    emotions = []
    paths = []
    # Loop through folder
    for filename in tqdm(os.listdir(directory)):
        # Get file complete path
        f = os.path.join(directory, filename)
        # Checks if path is a file or not   
        if os.path.isfile(f):
            emotions.append(get_emotion(filename))
            paths.append(f)
    # Output as Pandas Dataframe
    return pd.DataFrame.from_dict({'Emotion':emotions,'Path':paths})

<h3> 1.2 Helper Methods to get the frequency values from wav file. </h3>

In [4]:
# Gets the frequencies for each path in the dataframe
def get_data():
    audios = []
    samples = []
    for x in tqdm(df['Path']):
        # Cuts all audios to include the sound only
        data, sampling = librosa.load(x, duration=2.5, offset=0.35)
        audios.append(list(data))
        samples.append(sampling)
    return audios, samples

In [5]:
# Get wav files path and emotion type
df = create_dataframe('data/audio')
# Get sound frequencies for each file
audios, samples = get_data()
# Append to a new column in dataframe
df['freq'] = audios
df['sampling'] = samples

  0%|          | 0/7442 [00:00<?, ?it/s]

  0%|          | 0/7442 [00:00<?, ?it/s]

# 2.0 Initial Feature Engineering

This Jupyter Script contains the features individually for easy break down of each feature. For the more efficient run containing all the features please check the python file "emotion_feature_engineering.py".  

<h3> 2.1 Basic Statistics of time-series </h3>

Which includes the Min, Max and Standard Deviation.

In [6]:
def std_value(xs):
    xbar = sum(xs) / len(xs)
    return math.sqrt(sum([(x - xbar) ** 2 for x in xs]) / len(xs))

def basic_stats(xs):
    minV =  min(xs)
    maxV = max(xs)
    std = std_value(xs)
    return np.array([minV, maxV, std])

# Calculates the basic stats without the need to repeat for each one
stats = df['freq'].progress_apply(basic_stats).tolist()
stats = np.array(stats)

df['min_value'] = stats[:,0]
df['max_value'] = stats[:,1]
df['std_value'] = stats[:,2]

  0%|          | 0/7442 [00:00<?, ?it/s]

<h3> 2.2 Total Energy of Time-series </h3>

$$X = \sum \limits _{n=1} ^{N} x_n^2$$
where $x_n$ denotes each element from the time-series 

In [7]:
def total_energy(xs):
    return sum([x**2 for x in xs])

df['total_energy'] = df['freq'].progress_apply(total_energy)

  0%|          | 0/7442 [00:00<?, ?it/s]

<h3> 2.3 Zero Crossing Rate </h3>

(Rate at which a signal changes from positive to negative or vice versa)

$$ ZCR = \frac{1}{T - 1} \sum \limits _{t=1} ^{T-1} 1_{\mathbb{R}_{<0}} (s_t s_{t-1})$$

where $s$ is a signal of length $T$ and $1_{\mathbb{R}_{<0}}$ is an indicator function where $<0$ is $1$ and $>0$ is $0$.

https://en.wikipedia.org/wiki/Zero-crossing_rate

In [8]:
# Returns the number of times a signal changes from positive to negative or negative to positive
def zero_crossing(xs):
    xs = np.array(xs)
    #'xs[1:]' is a list not containing the first element
    #'xs[:-1]' is a list not containing the last element
    return ((xs[1:] * xs[:-1]) < 0).sum()

# Returns the rate for the zero crossing
# In cases that the zero_crossing is not calculated this function can handle it 
def zero_crossing_rate(xs, precompute=None):
    if (precompute == None):
        return (zero_crossing(xs) / (len(xs) - 1))
    else:
        return (precompute / (len(xs) - 1))

def zero_crossing_stats(xs, both=True):
    # Convert to numpy array
    xs = np.array(xs)
    # Zero Crossing Count
    z_crossing_val = zero_crossing(xs)
    # Zero Crossing Rate
    z_crossing_rate_val = zero_crossing_rate(xs, precompute=z_crossing_val)
    return np.array([z_crossing_val, z_crossing_rate_val])

# Calculates the values without the need to repeat for each one
crossing_stats = df['freq'].progress_apply(zero_crossing_stats).tolist()
crossing_stats = np.array(crossing_stats)

df['zero_crossing'] = crossing_stats[:,0]
df['zero_crossing_rate'] = crossing_stats[:,1]

  0%|          | 0/7442 [00:00<?, ?it/s]

<h3> 2.4 RMS (Root Mean Square) </h3>

$$ RMS = \sqrt{\frac{1}{N} (\sum \limits _{n=0} ^{N} x_n^2)}$$

where $x_n$ denotes each element from the time-series

In [9]:
def rms(xs):
     return math.sqrt(sum([x**2 for x in xs]) / len(xs))
    
df['rms'] = df['freq'].progress_apply(rms)

  0%|          | 0/7442 [00:00<?, ?it/s]

<h3> 2.5 Mel-frequency cepstral coefficients [MFCC] </h3>

In [10]:
def mfcc(xs, sampling=22050):
    xs = np.array(xs)
    mfcc = librosa.feature.mfcc(y=xs, sr=sampling).T
    # Gets the mean value for each Coefficient [Gets mean of column]
    return list(np.mean(mfcc, axis=0))

mfcc_features = []
for x in df['freq']:
    mfcc_features.append(mfcc(x))

mfcc_features = np.array(mfcc_features)

# convert to pandas dataframe and append to the main dataframe
df = pd.concat([df, pd.DataFrame(mfcc_features, columns=['mfcc_%i' % i for i in range(20)])], axis=1)

<h3> 2.6 Mel Spectogram </h3>

In [11]:
def mel_spec(xs, sampling=22050):
    xs = np.array(xs)
    melspec = librosa.feature.melspectrogram(y=xs, sr=sampling).T
    # Gets the mean value for each Coefficient [Gets mean of column]
    return list(np.mean(melspec, axis=0))

mel_spec_features = []
for x in df['freq']:
    mel_spec_features.append(mel_spec(x))

mel_spec_features = np.array(mel_spec_features)

columns = ['mel_spec_%i' % i for i in range(mel_spec_features[0].shape[0])]
# convert to pandas dataframe and append to the main dataframe
df = pd.concat([df, pd.DataFrame(mel_spec_features, columns=columns)], axis=1)

# 3.0 Initial Machine Learning Models

<h3> 3.1 Creating Training and Testing Set </h3>

In [13]:
from sklearn.model_selection import train_test_split
X = df[df.columns[4:]].copy()
y = df['Emotion']

In [14]:
# Normalise Dataset
X=(X-X.min())/(X.max()-X.min())
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

<h3> 3.2 ML Models </h3>

<h3> Random Forest </h3>

In [15]:
from sklearn.ensemble import RandomForestClassifier
# Create Model
model = RandomForestClassifier(verbose=1, max_depth=50, class_weight='balanced')
model.fit(X_train, y_train)
# Make Predictions
preds = model.predict(X_test)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    4.1s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:    0.0s finished


In [16]:
from sklearn.metrics import precision_score, recall_score, f1_score
# Evalulate Model
print(precision_score(y_test, preds, average='macro'))
print(recall_score(y_test, preds, average='macro'))
print(f1_score(y_test, preds, average='macro'))

0.46375248910322947
0.47019230573230447
0.45558644497709233


<h3> Gradient Boosting </h3>

In [17]:
from sklearn.ensemble import GradientBoostingClassifier
# Create Model
model = GradientBoostingClassifier(verbose=1, n_estimators=200, learning_rate=1.0, max_depth=1)
model.fit(X_train, y_train)
# Make Predictions
preds = model.predict(X_test)

      Iter       Train Loss   Remaining Time 
         1           1.5447            1.35m
         2           1.5023            1.32m
         3           1.4475            1.32m
         4           1.4169            1.30m
         5           1.3877            1.28m
         6           1.3726            1.28m
         7           1.3499            1.28m
         8           1.3402            1.28m
         9           1.3281            1.27m
        10           1.3173            1.27m
        20           1.2213            1.21m
        30           1.1620            1.14m
        40           1.1147            1.05m
        50           1.0766           58.35s
        60           1.0435           53.89s
        70           1.0124           49.61s
        80           0.9865           45.59s
        90           0.9608           41.54s
       100           0.9357           37.61s
       200           0.7609            0.00s


In [18]:
from sklearn.metrics import precision_score, recall_score, f1_score
print(precision_score(y_test, preds, average='macro'))
print(recall_score(y_test, preds, average='macro'))
print(f1_score(y_test, preds, average='macro'))

0.4784672212608357
0.47904140090719866
0.47827565914340786
