In [41]:
# Import libraries
import os
import sys
import random
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

import audio_utils

from bumblebee_client.Reservoir import Reservoir


In [37]:
# Get data from https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio
# Copy the files to a directory called "ravdess_files" under the working directory.
AUDIO_DIR = "ravdess_files/"

# Set some parameters related to the audio files
NUM_LABELS = 8
NUM_ACTORS = 24
NUM_TRAIN_ACTORS = 19
DECIMATION_FACTOR = 77
MAX_LENGTH = None
ACTORS = [i for i in range(1, NUM_ACTORS + 1)]

# Set parameters for resevoir
IP_ADDR = "172.26.26.11" # Change this to the cirrect IP address for Bumblebee
VBIAS = 0.3
GAIN = 0.55
NUM_NODES = 1500 # Number of reservoir nodes
NUM_TAPS = NUM_NODES
FEATURE_SCALING = 0.5
DENSITY = 1
DYN_SCALE = 0
RC_PARAMETERS = {}
RC_PARAMETERS["vbias"] = VBIAS
RC_PARAMETERS["gain"] = GAIN
RC_PARAMETERS["regression"] = "linear"
NUM_F = 77

# Set parameters used by linear model
STATEMENTS = ["01", "02"]
INTENSITIES = ["01", "02"]
REPETITIONS = ["01", "02"]

In [38]:
# Create the training and testing set
random.shuffle(ACTORS)
train_actors = ACTORS[:NUM_TRAIN_ACTORS]

audio_files = audio_utils.search_audio_files(AUDIO_DIR)

train_set = {}
test_set = {}

for audio_file in audio_files:
    
    print(audio_file)
    
    base_name = audio_file.split(".")[0]
    tmp_list = base_name.split("-")
    emotion = int(tmp_list[2])                          
    emotion_intensity = tmp_list[3]                            
    statement = tmp_list[4]                                   
    repetition = tmp_list[5]                                   
    actor = int(tmp_list[6])        
    
    cochleagram, status = audio_utils.convert_to_cochleagram(
        os.path.join(AUDIO_DIR, audio_file),
        decimation_factor=DECIMATION_FACTOR, 
        maxLength=MAX_LENGTH
    )    
    
    if cochleagram is None:                                     
        continue   
        
    label = audio_utils.to_categorical(
        emotion - 1,
        num_classes=NUM_LABELS
    )   
    
    if actor in train_actors:                      
        train_set[audio_file] = {"coch": cochleagram, "label": label}
    else:                                          
        test_set[audio_file] = {"coch": cochleagram, "label": label}

03-01-06-01-02-02-02.wav
03-01-05-01-02-01-16.wav
03-01-08-01-01-01-14.wav
03-01-06-01-02-02-16.wav
03-01-05-01-02-01-02.wav
03-01-01-01-02-02-06.wav
03-01-02-01-02-01-12.wav
03-01-01-01-02-02-12.wav
03-01-02-01-02-01-06.wav
03-01-02-02-01-01-06.wav
03-01-02-02-01-01-12.wav
03-01-06-02-01-02-16.wav
03-01-05-02-01-01-02.wav
03-01-08-02-02-01-14.wav
03-01-06-02-01-02-02.wav
03-01-05-02-01-01-16.wav
03-01-05-01-01-01-22.wav
03-01-08-01-02-01-20.wav
03-01-06-01-01-02-22.wav
03-01-08-01-02-01-08.wav
03-01-08-02-01-01-08.wav
03-01-06-02-02-02-22.wav
03-01-08-02-01-01-20.wav
03-01-05-02-02-01-22.wav
03-01-03-01-01-02-06.wav
03-01-03-01-01-02-12.wav
03-01-04-01-01-02-02.wav
03-01-07-01-01-01-16.wav
03-01-04-01-01-02-16.wav
03-01-07-01-01-01-02.wav
03-01-04-02-02-02-16.wav
03-01-07-02-02-01-02.wav
03-01-04-02-02-02-02.wav
03-01-07-02-02-01-16.wav
03-01-03-02-02-02-12.wav
03-01-03-02-02-02-06.wav
03-01-07-01-02-01-22.wav
03-01-04-01-02-02-22.wav
03-01-04-02-01-02-22.wav
03-01-07-02-01-01-22.wav


03-01-07-01-01-01-12.wav
03-01-02-02-02-01-22.wav
03-01-08-02-01-01-18.wav
03-01-08-02-01-01-24.wav
03-01-08-01-02-01-24.wav
03-01-01-01-01-02-22.wav
03-01-08-01-02-01-18.wav
03-01-02-01-01-01-22.wav
03-01-02-02-01-01-16.wav
03-01-02-02-01-01-02.wav
03-01-06-02-01-02-06.wav
03-01-05-02-01-01-12.wav
03-01-08-02-02-01-04.wav
03-01-08-02-02-01-10.wav
03-01-06-02-01-02-12.wav
03-01-05-02-01-01-06.wav
03-01-06-01-02-02-12.wav
03-01-05-01-02-01-06.wav
03-01-08-01-01-01-10.wav
03-01-08-01-01-01-04.wav
03-01-06-01-02-02-06.wav
03-01-05-01-02-01-12.wav
03-01-01-01-02-02-16.wav
03-01-02-01-02-01-02.wav
03-01-01-01-02-02-02.wav
03-01-02-01-02-01-16.wav
03-01-02-01-01-02-14.wav
03-01-01-01-01-01-14.wav
03-01-08-01-02-02-06.wav
03-01-06-01-01-01-04.wav
03-01-05-01-01-02-10.wav
03-01-06-01-01-01-10.wav
03-01-05-01-01-02-04.wav
03-01-08-01-02-02-12.wav
03-01-08-02-01-02-12.wav
03-01-06-02-02-01-10.wav
03-01-05-02-02-02-04.wav
03-01-06-02-02-01-04.wav
03-01-05-02-02-02-10.wav
03-01-08-02-01-02-06.wav


03-01-07-02-02-02-09.wav
03-01-04-02-02-01-09.wav
03-01-04-02-02-01-08.wav
03-01-07-02-02-02-08.wav
03-01-03-02-02-01-24.wav
03-01-07-02-02-02-20.wav
03-01-03-02-02-01-18.wav
03-01-04-02-02-01-20.wav
03-01-04-01-01-01-20.wav
03-01-03-01-01-01-18.wav
03-01-07-01-01-02-20.wav
03-01-03-01-01-01-24.wav
03-01-07-01-01-02-08.wav
03-01-04-01-01-01-08.wav
03-01-03-02-01-01-04.wav
03-01-03-02-01-01-10.wav
03-01-07-02-01-02-14.wav
03-01-04-02-01-01-14.wav
03-01-04-01-02-01-14.wav
03-01-07-01-02-02-14.wav
03-01-03-01-02-01-10.wav
03-01-03-01-02-01-04.wav
03-01-02-02-01-02-18.wav
03-01-05-02-01-02-20.wav
03-01-06-02-01-01-20.wav
03-01-08-02-02-02-22.wav
03-01-06-02-01-01-08.wav
03-01-02-02-01-02-24.wav
03-01-05-02-01-02-08.wav
03-01-05-01-02-02-08.wav
03-01-01-01-02-01-24.wav
03-01-02-01-02-02-24.wav
03-01-06-01-02-01-08.wav
03-01-08-01-01-02-22.wav
03-01-01-01-02-01-18.wav
03-01-06-01-02-01-20.wav
03-01-05-01-02-02-20.wav
03-01-02-01-02-02-18.wav
03-01-08-02-01-02-02.wav
03-01-05-02-02-02-14.wav


03-01-02-01-02-02-10.wav
03-01-02-02-01-02-10.wav
03-01-02-02-01-02-04.wav
03-01-05-02-01-02-14.wav
03-01-08-02-02-02-02.wav
03-01-08-02-02-02-16.wav
03-01-06-02-01-01-14.wav
03-01-07-01-02-02-08.wav
03-01-03-01-02-01-24.wav
03-01-04-01-02-01-08.wav
03-01-03-01-02-01-18.wav
03-01-04-01-02-01-20.wav
03-01-07-01-02-02-20.wav
03-01-07-02-01-02-20.wav
03-01-04-02-01-01-20.wav
03-01-03-02-01-01-18.wav
03-01-04-02-01-01-08.wav
03-01-03-02-01-01-24.wav
03-01-07-02-01-02-08.wav
03-01-03-01-01-01-10.wav
03-01-03-01-01-01-04.wav
03-01-04-01-01-01-14.wav
03-01-07-01-01-02-14.wav
03-01-07-02-02-02-14.wav
03-01-04-02-02-01-14.wav
03-01-03-02-02-01-04.wav
03-01-03-02-02-01-10.wav
03-01-03-02-02-01-11.wav
03-01-03-02-02-01-05.wav
03-01-07-02-02-02-01.wav
03-01-04-02-02-01-15.wav
03-01-07-02-02-02-15.wav
03-01-04-02-02-01-01.wav
03-01-07-01-01-02-15.wav
03-01-04-01-01-01-01.wav
03-01-07-01-01-02-01.wav
03-01-04-01-01-01-15.wav
03-01-03-01-01-01-05.wav
03-01-03-01-01-01-11.wav
03-01-07-02-01-02-09.wav


03-01-03-01-01-02-08.wav
03-01-02-02-02-01-14.wav
03-01-05-02-02-01-10.wav
03-01-06-02-02-02-04.wav
03-01-08-02-01-01-06.wav
03-01-08-02-01-01-12.wav
03-01-05-02-02-01-04.wav
03-01-06-02-02-02-10.wav
03-01-05-01-01-01-04.wav
03-01-06-01-01-02-10.wav
03-01-08-01-02-01-12.wav
03-01-08-01-02-01-06.wav
03-01-05-01-01-01-10.wav
03-01-06-01-01-02-04.wav
03-01-01-01-01-02-14.wav
03-01-02-01-01-01-14.wav
03-01-02-02-01-01-20.wav
03-01-05-02-01-01-18.wav
03-01-06-02-01-02-18.wav
03-01-05-02-01-01-24.wav
03-01-02-02-01-01-08.wav
03-01-06-02-01-02-24.wav
03-01-06-01-02-02-24.wav
03-01-02-01-02-01-08.wav
03-01-01-01-02-02-08.wav
03-01-05-01-02-01-24.wav
03-01-01-01-02-02-20.wav
03-01-06-01-02-02-18.wav
03-01-05-01-02-01-18.wav
03-01-02-01-02-01-20.wav
03-01-08-01-01-01-18.wav
03-01-01-01-02-02-22.wav
03-01-02-01-02-01-22.wav
03-01-08-01-01-01-24.wav
03-01-08-02-02-01-24.wav
03-01-02-02-01-01-22.wav
03-01-08-02-02-01-18.wav
03-01-01-01-01-02-16.wav
03-01-02-01-01-01-02.wav
03-01-01-01-01-02-02.wav


In [39]:
# Store the training and testing datasets
np.save("train_set_ravdess.npy", train_set)      
np.save("test_set_ravdess.npy", test_set)

In [None]:
# Run data through reservoir

# Instantiate the reservoir
reservoir_inst = Reservoir(
    NUM_NODES, NUM_TAPS, NUM_F, RC_PARAMETERS, IP_ADDR, density=DENSITY,
)
reservoir_inst.SetGain(RC_PARAMETERS["gain"])
reservoir_inst.SetVbias(RC_PARAMETERS["vbias"])

# Load training and testing data
train_set = np.load("train_set_ravdess.npy", allow_pickle=True).item()
test_set = np.load("test_set_ravdess.npy", allow_pickle=True).item()

# Loop through training and testing data and run the features through the reservoir
for item in train_set.keys():
    
    assert train_set[item]["coch"].shape[1] == NUM_F
    
    X_trans_response, dyn_scale = reservoir_inst.GetTransientResponse(
        train_set[item]["coch"], FEATURE_SCALING, DYN_SCALE
    )
    
    train_set[item]["coch"] = X_trans_response
    
    assert train_set[item]["coch"].shape[1] == NUM_NODES
    
for item in test_set.keys():
    
    assert test_set[item]["coch"].shape[1] == NUM_F
    
    X_trans_response, dyn_scale = reservoir_inst.GetTransientResponse(
        test_set[item]["coch"], FEATURE_SCALING, DYN_SCALE
    )
    
    test_set[item]["coch"] = X_trans_response
    
    assert test_set[item]["coch"].shape[1] == NUM_NODES

In [None]:
# Store reservoir responses
np.save("train_set_ravdess_reservoir.npy", train_set)
np.save("test_set_ravdess_reservoir.npy", test_set)

In [43]:
# Build a linear model from reservoir responses

# Load reservoir responses
train_set = np.load(
    "train_set_ravdess_reservoir.npy", 
    allow_pickle=True,
).item()

test_set = np.load(
    "test_set_ravdess_reservoir.npy", 
    allow_pickle=True,
).item()

# Assemble features and labels arrays
X_train = None
y_train = None
for file_name in train_set.keys():
    
    base_name = file_name.split(".")[0]
    
    if base_name.split("-")[5] not in REPETITIONS:
        continue   
    if base_name.split("-")[4] not in STATEMENTS:
        continue   
    if base_name.split("-")[3] not in INTENSITIES:
        continue   
        
    coch = train_set[file_name]["coch"]   

    if X_train is None:
        X_train = coch
    else:
        X_train = np.concatenate([X_train, coch])   
        
    label = train_set[file_name]["label"] * 2 - 1        
    labels = np.repeat(label.reshape(-1,1), coch.shape[0], axis=1).T 
    
    if y_train is None:
        y_train = labels
    else:
        y_train = np.concatenate([y_train, labels])
        
# Train a linear model
clf = LinearRegression(fit_intercept=True)
clf.fit(X_train, y_train)

LinearRegression()

In [44]:
# Calculate the success rate of the classifier on both train data
y_train = []
y_train_prd = []
for file_name in train_set.keys():
    
    base_name = file_name.split(".")[0]
    
    if base_name.split("-")[5] not in REPETITIONS:
        continue
    if base_name.split("-")[4] not in STATEMENTS:
        continue
    if base_name.split("-")[3] not in INTENSITIES:
        continue
        
    X_train_tmp = train_set[file_name]["coch"]
    y_train.append(train_set[file_name]["label"] * 2 - 1)
    y_train_prd.append(clf.predict(X_train_tmp).mean(axis=0))
    
print(
    "Success rate on train data: %0.3f" % (
         audio_utils.WSR_MSE(y_train, y_train_prd)
    )
)

Success rate on train data: 0.451


In [46]:
# Calculate the success rate of the classifier on both test data
y_test = []
y_test_prd = []
for file_name in test_set.keys():

    base_name = file_name.split(".")[0]

    if base_name.split("-")[5] not in REPETITIONS:
        continue
    if base_name.split("-")[4] not in STATEMENTS:
        continue
    if base_name.split("-")[3] not in INTENSITIES:
        continue

    X_test_tmp = test_set[file_name]["coch"]
    y_test.append(test_set[file_name]["label"] * 2 - 1)
    y_test_prd.append(clf.predict(X_test_tmp).mean(axis=0))
    
print(
    "Success rate on test data: %0.3f" % (
        audio_utils.WSR_MSE(y_test, y_test_prd)
    )
)

Success rate on test data: 0.450
