<a href="https://colab.research.google.com/github/lmtkhanh/Week3-Machine_Learning/blob/notebook/Assignment3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Loading Important Libraries**

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import cv2
import nltk
import librosa

### **Feature Extraction**

In [11]:
# Get the critical imports out of the way
import librosa.display
import soundfile
import os
# matplotlib complains about the behaviour of librosa.display, so we'll ignore those warnings:
import warnings; warnings.filterwarnings('ignore')
from IPython.core.display import HTML
# Center matplotlib figures...
HTML("""
<style>
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style>
""")

In [12]:
def feature_chromagram(waveform, sample_rate):
    # STFT computed here explicitly; mel spectrogram and MFCC functions do this under the hood
    stft_spectrogram=np.abs(librosa.stft(waveform))
    #print(stft_spectrogram.shape)
    # Produce the chromagram for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    chromagram=np.mean(librosa.feature.chroma_stft(S=stft_spectrogram, sr=sample_rate).T,axis=0)
    #print(chromagram.shape)
    return chromagram

def feature_melspectrogram(waveform, sample_rate):
    # Produce the mel spectrogram for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    # Using 8khz as upper frequency bound should be enough for most speech classification tasks
    melspectrogram=np.mean(librosa.feature.melspectrogram(y=waveform, sr=sample_rate, n_mels=128, fmax=8000).T,axis=0)
    return melspectrogram

def feature_mfcc(waveform, sample_rate):
    # Compute the MFCCs for all STFT frames and get the mean of each column of the resulting matrix to create a feature array
    # 40 filterbanks = 40 coefficients
    mfc_coefficients=np.mean(librosa.feature.mfcc(y=waveform, sr=sample_rate, n_mfcc=40).T, axis=0)
    return mfc_coefficients

In [13]:
def get_features(file):
    # load an individual soundfile
     with soundfile.SoundFile(file) as audio:
        waveform = audio.read(dtype="float32")
        sample_rate = audio.samplerate
        # compute features of soundfile
        chromagram = feature_chromagram(waveform, sample_rate)
        melspectrogram = feature_melspectrogram(waveform, sample_rate)
        mfc_coefficients = feature_mfcc(waveform, sample_rate)
        feature_matrix=np.array([])

        # use np.hstack to stack our feature arrays horizontally to create a feature matrix
        feature_matrix = np.hstack((chromagram, melspectrogram, mfc_coefficients))

        return feature_matrix

### **Loading Data from RAVDESS**

In [14]:
import os, glob
import numpy as np
import soundfile

common_emotions = {
    '01': 'neutral',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust'
}


def load_ravdess_data():
    X,y=[],[]
    count = 0
    for file in glob.glob("/content/drive/MyDrive/IAT 481/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        emotion_code = file_name.split("-")[2]
        #Only load the emotion shared by 2 datasets
        if emotion_code in common_emotions:
          emotionR = common_emotions[emotion_code]
          featuresR = get_features(file)
          X.append(featuresR)
          y.append(emotionR)
          count += 1

         # '\r' + end='' results in printing over same line
          print('\r' + f' Processed {count}/{1435} audio samples',end=' ')
      # Return arrays to plug into sklearn's cross-validation algorithms
    return np.array(X), np.array(y)

In [15]:
featuresR, emotionR = load_ravdess_data()

 Processed 1054/1435 audio samples 

In [25]:
print(f'\nAudio samples represented: {featuresR.shape[0]}')
print(f'Numerical features extracted per sample: {featuresR.shape[1]}')
featuresR_df = pd.DataFrame(featuresR) # make it pretty for display
featuresR_df


Audio samples represented: 1054
Numerical features extracted per sample: 180


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,170,171,172,173,174,175,176,177,178,179
0,0.636684,0.628297,0.631292,0.651613,0.698669,0.767675,0.781398,0.741129,0.741146,0.705466,...,-1.673998,-0.503829,0.026309,0.215442,1.812259,2.770561,1.646000,2.532423,0.569808,1.190669
1,0.619704,0.639446,0.650099,0.693850,0.721269,0.756798,0.789992,0.768492,0.766670,0.698196,...,-1.135965,-0.554937,-1.294901,-0.990536,-0.618531,0.289876,-0.120279,0.519724,1.181626,2.576108
2,0.615388,0.598130,0.632746,0.687002,0.735016,0.746685,0.735427,0.738268,0.771911,0.697520,...,-3.166279,0.292470,-1.585585,-0.819369,-1.223765,0.182237,0.581931,2.988665,3.700677,4.860653
3,0.666090,0.641014,0.625757,0.668821,0.702518,0.748449,0.786031,0.716457,0.713515,0.666831,...,-1.627202,1.990687,1.660290,1.184242,1.871421,1.095554,0.522881,0.699638,0.359175,1.215944
4,0.601642,0.617534,0.665111,0.690098,0.711811,0.743393,0.746468,0.811313,0.819656,0.768950,...,-1.931422,0.705211,-1.421736,0.109918,0.826748,3.198303,3.540084,4.285282,5.466786,6.817317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1049,0.714570,0.642242,0.599776,0.559592,0.535722,0.593334,0.677825,0.642308,0.637836,0.697389,...,4.606691,7.415055,5.400178,2.796613,5.055417,3.130010,0.871994,0.887742,-0.218687,1.141505
1050,0.558261,0.633760,0.607081,0.564991,0.606804,0.697466,0.776295,0.745406,0.698547,0.687153,...,-3.290008,-4.192609,-2.975512,-1.233714,-1.482003,-2.032330,0.065089,-0.486623,-0.640735,-0.860338
1051,0.626034,0.594640,0.617562,0.573329,0.557500,0.615959,0.686903,0.675111,0.661743,0.663048,...,3.803571,3.034509,2.019023,0.320287,1.755269,1.537050,0.793371,2.439653,-1.614806,-0.646167
1052,0.600810,0.569928,0.585620,0.623596,0.645769,0.695486,0.685356,0.638389,0.611997,0.660883,...,0.920214,2.389385,0.042414,-0.168045,1.884149,0.347379,-0.083077,1.599976,0.338264,-0.604630


### **Loading Data from EmoDB**

In [26]:
import os, glob
import numpy as np
import soundfile

common_emotions = {
    'N': 'neutral',
    'F': 'happy',
    'T': 'sad',
    'W': 'angry',
    'A': 'fearful',
    'E': 'disgust'
}


def load_emodb_data():
    X,y=[],[]
    count = 0
    for file in glob.glob("/content/drive/MyDrive/IAT 481/EmoDB/*.wav"):
        file_name=os.path.basename(file)
        emotion_code = file_name[5]
        #Only load the emotion shared by 2 datasets
        if emotion_code in common_emotions:
          emotionE = common_emotions[emotion_code]
          featuresE = get_features(file)
          X.append(featuresE)
          y.append(emotionE)
          count += 1

         # '\r' + end='' results in printing over same line
          print('\r' + f' Processed {count}/{535} audio samples',end=' ')
      # Return arrays to plug into sklearn's cross-validation algorithms
    return np.array(X), np.array(y)

In [28]:
featuresE, emotionE = load_emodb_data()

 Processed 454/535 audio samples 

In [None]:
print(f'\nAudio samples represented: {featuresE.shape[0]}')
print(f'Numerical features extracted per sample: {featuresE.shape[1]}')
featuresE_df = pd.DataFrame(featuresE) # make it pretty for display
featuresE_df

### **Merging two features matrices**

In [None]:
# Merge the feature matrices
features_combined = np.concatenate((featuresR, featuresE), axis=0)

# Merge the label arrays
emotions_combined = np.concatenate((emotionR, emotionE), axis=0)

In [None]:
print(f'\nAudio samples represented: {features_combined.shape[0]}')
print(f'Numerical features extracted per sample: {features_combined.shape[1]}')
#Make Dataframe for features
features_combined_df = pd.DataFrame(features_combined) # make it pretty for display

#Make Dataframe for emotions
emotions_combined_df = pd.DataFrame(emotions_combined)


In [29]:
#Check if it loads right
features_combined_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,170,171,172,173,174,175,176,177,178,179
0,0.636684,0.628297,0.631292,0.651613,0.698669,0.767675,0.781398,0.741129,0.741146,0.705466,...,-1.673998,-0.503829,0.026309,0.215442,1.812259,2.770561,1.646000,2.532423,0.569808,1.190669
1,0.619704,0.639446,0.650099,0.693850,0.721269,0.756798,0.789992,0.768492,0.766670,0.698196,...,-1.135965,-0.554937,-1.294901,-0.990536,-0.618531,0.289876,-0.120279,0.519724,1.181626,2.576108
2,0.615388,0.598130,0.632746,0.687002,0.735016,0.746685,0.735427,0.738268,0.771911,0.697520,...,-3.166279,0.292470,-1.585585,-0.819369,-1.223765,0.182237,0.581931,2.988665,3.700677,4.860653
3,0.666090,0.641014,0.625757,0.668821,0.702518,0.748449,0.786031,0.716457,0.713515,0.666831,...,-1.627202,1.990687,1.660290,1.184242,1.871421,1.095554,0.522881,0.699638,0.359175,1.215944
4,0.601642,0.617534,0.665111,0.690098,0.711811,0.743393,0.746468,0.811313,0.819656,0.768950,...,-1.931422,0.705211,-1.421736,0.109918,0.826748,3.198303,3.540084,4.285282,5.466786,6.817317
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1503,0.545511,0.604831,0.644693,0.675429,0.716208,0.726433,0.676241,0.672165,0.674903,0.716056,...,-2.045649,-1.179349,-2.833190,-1.940967,-2.509913,-2.486703,-3.354582,-1.943447,-1.464825,-2.969693
1504,0.498928,0.451786,0.517057,0.605375,0.563523,0.537695,0.497537,0.484230,0.511041,0.603973,...,5.097405,1.399691,-2.035277,0.764548,2.037185,0.918335,-0.946199,-1.225412,0.948814,5.614808
1505,0.479394,0.504327,0.587494,0.613058,0.626845,0.626645,0.578185,0.491933,0.457327,0.462098,...,-2.463939,1.045352,-0.204357,2.378903,-1.186556,0.181454,1.126310,1.671440,1.369183,0.590503
1506,0.481175,0.437888,0.420873,0.488118,0.562618,0.604745,0.576610,0.624957,0.663843,0.650717,...,7.207660,4.627536,3.303271,3.778844,2.242899,1.586491,-0.263002,0.830273,0.451080,-0.407189


In [30]:
#Check if it loads right
emotions_combined_df

Unnamed: 0,0
0,neutral
1,neutral
2,neutral
3,neutral
4,sad
...,...
1503,neutral
1504,neutral
1505,fearful
1506,disgust


### **Save the merged data set to excel**

In [24]:
features_combined_df.to_excel('/content/drive/MyDrive/IAT 481/featuresCombined.xlsx')
emotions_combined_df.to_excel('/content/drive/MyDrive/IAT 481/emotionsCombined.xlsx')


### **Load pre-saved dataset**

In [5]:
#Skip every cell above and load the excel file
features_combined_df = pd.read_excel('/content/drive/MyDrive/IAT 481/featuresCombined.xlsx',index_col=0)
emotions_combined_df = pd.read_excel('/content/drive/MyDrive/IAT 481/emotionsCombined.xlsx',index_col=0)

In [6]:
features_combined_df.head() #Check if it loads right

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,170,171,172,173,174,175,176,177,178,179
0,0.636684,0.628297,0.631292,0.651613,0.698669,0.767675,0.781398,0.741129,0.741146,0.705466,...,-1.673998,-0.503829,0.026309,0.215442,1.812259,2.770561,1.646,2.532423,0.569808,1.190669
1,0.619704,0.639446,0.650099,0.69385,0.721269,0.756798,0.789992,0.768492,0.76667,0.698196,...,-1.135965,-0.554937,-1.294901,-0.990536,-0.618531,0.289876,-0.120279,0.519724,1.181626,2.576108
2,0.615388,0.59813,0.632746,0.687002,0.735016,0.746685,0.735427,0.738268,0.771911,0.69752,...,-3.166279,0.29247,-1.585585,-0.819369,-1.223765,0.182237,0.581931,2.988665,3.700677,4.860653
3,0.66609,0.641014,0.625757,0.668821,0.702518,0.748449,0.786031,0.716457,0.713515,0.666831,...,-1.627202,1.990687,1.66029,1.184242,1.871421,1.095554,0.522881,0.699638,0.359175,1.215944
4,0.601642,0.617534,0.665111,0.690098,0.711811,0.743393,0.746468,0.811313,0.819656,0.76895,...,-1.931422,0.705211,-1.421736,0.109918,0.826748,3.198303,3.540084,4.285282,5.466786,6.817317


### **Feature Scaling**

In [7]:
# We would usually use df.describe(), but it provides a bit of a mess of information we don't need at the moment.
def print_features(df):
    # Check chromagram feature values
    features_combined_df_chromagram = df.loc[:,:11]
    chroma_min = features_combined_df_chromagram.min().min()
    chroma_max = features_combined_df_chromagram.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    chroma_mean = features_combined_df_chromagram.stack().mean()
    chroma_stdev = features_combined_df_chromagram.stack().std()
    print(f'12 Chromagram features:       \
    min = {chroma_min:.3f}, \
    max = {chroma_max:.3f}, \
    mean = {chroma_mean:.3f}, \
    deviation = {chroma_stdev:.3f}')

    # Check mel spectrogram feature values
    features_combined_df_melspectrogram = df.loc[:,12:139]
    mel_min = features_combined_df_melspectrogram.min().min()
    mel_max = features_combined_df_melspectrogram.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    mel_mean = features_combined_df_melspectrogram.stack().mean()
    mel_stdev = features_combined_df_melspectrogram.stack().std()
    print(f'\n128 Mel Spectrogram features: \
    min = {mel_min:.3f}, \
    max = {mel_max:.3f}, \
    mean = {mel_mean:.3f}, \
    deviation = {mel_stdev:.3f}')

    # Check MFCC feature values
    features_combined_df_mfcc = df.loc[:,140:179]
    mfcc_min = features_combined_df_mfcc.min().min()
    mfcc_max = features_combined_df_mfcc.max().max()
    # stack all features into a single series so we don't get a mean of means or stdev of stdevs
    mfcc_mean = features_combined_df_mfcc.stack().mean()
    mfcc_stdev = features_combined_df_mfcc.stack().std()
    print(f'\n40 MFCC features:             \
    min = {mfcc_min:.3f},\
    max = {mfcc_max:.3f},\
    mean = {mfcc_mean:.3f},\
    deviation = {mfcc_stdev:.3f}')

print_features(features_combined_df)

12 Chromagram features:           min = 0.276,     max = 0.888,     mean = 0.640,     deviation = 0.093

128 Mel Spectrogram features:     min = 0.000,     max = 886.647,     mean = 1.328,     deviation = 9.748

40 MFCC features:                 min = -863.639,    max = 121.174,    mean = -11.163,    deviation = 82.575


In [8]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

scaler = StandardScaler()
# keep our unscaled features just in case we need to process them alternatively
features_combined_scaled = features_combined_df
features_combined_scaled = scaler.fit_transform(features_combined_scaled)

scaler = MinMaxScaler()
# keep our unscaled features just in case we need to process them alternatively
features_minmax = features_combined_df
features_minmax = scaler.fit_transform(features_minmax)

In [9]:
print('\033[1m'+'Standard Scaling:\n'+'\033[0m')
features_combined_scaled_df = pd.DataFrame(features_combined_scaled)
print_features(features_combined_scaled_df)

print('\n\n\033[1m'+'MinMax Scaling:\n'+'\033[0m')
features_minmax_df = pd.DataFrame(features_minmax)
print_features(features_minmax_df)

[1mStandard Scaling:
[0m
12 Chromagram features:           min = -4.046,     max = 2.602,     mean = 0.000,     deviation = 1.000

128 Mel Spectrogram features:     min = -0.475,     max = 33.092,     mean = 0.000,     deviation = 1.000

40 MFCC features:                 min = -4.719,    max = 6.582,    mean = 0.000,    deviation = 1.000


[1mMinMax Scaling:
[0m
12 Chromagram features:           min = 0.000,     max = 1.000,     mean = 0.599,     deviation = 0.179

128 Mel Spectrogram features:     min = 0.000,     max = 1.000,     mean = 0.024,     deviation = 0.068

40 MFCC features:                 min = 0.000,    max = 1.000,    mean = 0.393,    deviation = 0.177


# **Classical Machine Learning Models**

### Splitting 80/20 for Training and Testing

In [31]:
from sklearn.model_selection import train_test_split


############# Unscaled test/train set #############
X_train, X_test, y_train, y_test =train_test_split(
    features_combined,
    emotions_combined,
    test_size=0.2,
    random_state=69
)

############ Standard Scaled test/train set ###########
# The labels/classes (y_train, y_test) never change, keep old values
X_train_scaled, X_test_scaled, _, _ = train_test_split(
    features_combined_scaled,
    emotions_combined,
    test_size=0.2,
    random_state=69
)

############# MinMax Scaled test/train set ###############
# The labels/classes (y_train, y_test) never change, keep old values
X_train_minmax, X_test_minmax, _, _ = train_test_split(
    features_minmax,
    emotions_combined,
    test_size=0.2,
    random_state=69
)

### Comparing Model

In [32]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

classification_models = [
    KNeighborsClassifier(),#(3),
    SVC(kernel='linear'),#, C=0.025),
    SVC(kernel='rbf'),
    DecisionTreeClassifier(),#max_depth=5),
    RandomForestClassifier(),#max_depth=5, n_estimators=10, max_features=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

scores = []
for model in classification_models:
    model.fit(X_train_scaled, y_train)
    score = model.score(X_test_scaled, y_test)
    model_name = type(model).__name__
    if model_name=='SVC' and model.kernel=='rbf': model_name+=' RBF kernel'
    scores.append((model_name,(f'{100*score:.2f}%')))
# Make it pretty
scores_df = pd.DataFrame(scores,columns=['Classifier','Accuracy Score'])
scores_df.sort_values(by='Accuracy Score',axis=0,ascending=False)

Unnamed: 0,Classifier,Accuracy Score
4,RandomForestClassifier,65.23%
1,SVC,61.92%
0,KNeighborsClassifier,53.97%
2,SVC RBF kernel,45.70%
3,DecisionTreeClassifier,45.36%
5,AdaBoostClassifier,37.09%
6,GaussianNB,33.44%
7,QuadraticDiscriminantAnalysis,29.47%


### The Support Vector Machine Classifier

In [33]:
from sklearn.svm import SVC

model = SVC(
    C=10,  #higher the value tighter the margin
    gamma='auto',
    kernel='rbf',
    random_state=69
)

model.fit(X_train, y_train)

print(f'SVC Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'SVC Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%')

SVC Model's accuracy on training set is 100.00%
SVC Model's accuracy on test set is 41.39%


### k Nearest Neighbors

In [34]:
from sklearn.neighbors import KNeighborsClassifier

####### Default kNN  ########
model = KNeighborsClassifier(
)

model.fit(X_train, y_train)

print(f'Default kNN Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'Default kNN Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%\n')

##### (hastily) tuned kNN ######
model = KNeighborsClassifier(
    n_neighbors = 5,
    weights = 'distance',
    algorithm = 'brute',
    n_jobs=4
)

model.fit(X_train, y_train)

print(f'kNN Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'kNN Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%')

Default kNN Model's accuracy on training set is 70.98%
Default kNN Model's accuracy on test set is 50.99%

kNN Model's accuracy on training set is 100.00%
kNN Model's accuracy on test set is 59.93%


### Random Forests

In [35]:
from sklearn.ensemble import RandomForestClassifier

####### Default Random Forest ########
model = RandomForestClassifier(
    random_state=69
)

model.fit(X_train, y_train)

print(f'Default Random Forest Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'Default Random Forest Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%\n')


########## Tuned Random Forest #######
model = RandomForestClassifier(
    n_estimators = 500,
    criterion ='entropy',
    warm_start = True,
    max_features = 'sqrt',
    oob_score = True, # more on this below
    random_state=69
)

model.fit(X_train, y_train)

print(f'Random Forest Model\'s accuracy on training set is {100*model.score(X_train, y_train):.2f}%')
print(f'Random Forest Model\'s accuracy on test set is {100*model.score(X_test, y_test):.2f}%')

Default Random Forest Model's accuracy on training set is 100.00%
Default Random Forest Model's accuracy on test set is 63.91%

Random Forest Model's accuracy on training set is 100.00%
Random Forest Model's accuracy on test set is 66.23%
