### Experiments From "Filtered Feelings: Investigating Frequency Filters in Speech Emotion Recognition Models"
Created by: Teun van Gisteren (s1055104)

### Import libraries

In [None]:
# NOTE: This cell might return an "TypeError: 'type' object is not subscriptable" error. If you run the cell again it should work.

import os
import csv
import re
from sklearn.metrics import accuracy_score
import numpy as np
from scipy.io import wavfile
from scipy.signal import butter, lfilter, filtfilt, freqz
from os import mkdir
from os import makedirs
from os.path import isdir
import shutil

from speechbrain.inference.interfaces import foreign_class

# 5.9 Testing on the MELD Dataset

## MELD Data Handling & Preprocessing
Note: The MELD dataset as used here was already resampled to 16000 Hz to bring it in line with the IEMOCAP dataset.

In [None]:
# For each split, the name of the split, where the .wav files are located for that split, and where the .csv file with the emotion labels is for that split
dev_split = {"name": "dev", "directory": "...\\dev", "csv": "...\\dev_sent_emo.csv"}
test_split = {"name": "test", "directory": "...\\test", "csv": "...\\test_sent_emo.csv"}
train_split = {"name": "train", "directory": "...\\train", "csv": "...\\train.tar\\train_sent_emo.csv"}

results_location = None # Where you want the results .csv files to go

In [None]:
def get_MELD_emotions(split):
    emotions = {}
    # List of all emotions to be gotten from the list
    possible_emotions = ['anger', 'joy', 'neutral', 'sadness']
    # Necessary because of different label used in different datasets
    emotion_map = {'anger': 'ang', 'joy': 'hap', 'neutral': 'neu', 'sadness': 'sad'}
    
    with open(split["csv"], mode='r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            if(row['Emotion'] in possible_emotions):
                emotions[(row["Dialogue_ID"], row["Utterance_ID"])] = emotion_map[row['Emotion']]
                
    return emotions

def get_MELD_split_files(split):
    # Get all files from the audio directory
    files = os.listdir(split["directory"])
    # Filter only .wav files
    wav_files = [file for file in files if file.endswith('.wav')]
    
    return wav_files

def filter_appropriate_files(emotions, files):
    # Pattern for finding all numbers in a string
    pattern = r'\d+'

    # Iterate over the file list and filter out files not in the array
    filtered_files = []
    for filename in files:
        # Extract Dialogue_ID and Utterance_ID from the file name
        numbers = re.findall(pattern, filename)        
        dialogue_id = numbers[0]
        utterance_id = numbers[1]
        # Check if the extracted pair exists in the dictionary keys
        if (dialogue_id, utterance_id) in emotions.keys():
            filtered_files.append(filename)

    return filtered_files

## CSV Export Function

In [None]:
# Function that takes a output path, file name, and results array and outputs the results array in CSV format to the output path in file name
def results_to_csv(output_path, name, results):
    # Get the absolute path to the new CSV file

    # Create output path if it does not exist
    if not os.path.exists(output_path):
       os.makedirs(output_path)

    csv_path = os.path.abspath(f"{output_path}\\{name}.csv")

    with open(csv_path, mode='w', newline='', encoding='utf-8') as new_file:
        fieldnames = ["Split", "Dialogue_ID", "Utterance_ID", "Emotion", "Emotion_Guess"]
        writer = csv.DictWriter(new_file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(results)

## Accuracy Score Function

In [None]:
# Fucntion to calculate the accuracy of the results, with optional decimal number length parameter 
def calculate_accuracy_str(results, dec_len=10):
    actual = [item["Emotion"] for item in results]
    predicted = [item["Emotion_Guess"] for item in results]
    score = accuracy_score(actual, predicted)
    return f"Accuracy: {score * 100:.{dec_len}f}%"


## Model Initilization and Function

In [None]:
# Initialize classifier
classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", 
                           pymodule_file="custom_interface.py", 
                           classname="CustomEncoderWav2vec2Classifier", 
                           run_opts={"device":"cuda"}) # Run on CUDA if you have a capable GPU. Remove parameter if you do not.

# Function that takes a session id, a dictionary of files to the emotions expressed in those files, the files themselves
# and a boolean for if the files are filtered, this is due to how the files are named.
def run_classifier(split, emotions, files, filtered=False):
    results = []
    
    # Count iterations for a nicer display
    iterations = 1

    # Process each file
    for wav_file in files:
        # Skip the file causing memory issues
        if "dia38_utt4" in wav_file:
            continue
            
        # Show progress text
        progress = iterations/len(files)*100
        print("\rProcessing: {}, Progress: {:.2f}%".format(wav_file, progress), end="")

        # Get file path of the .wav file
        if filtered:
            folder_name = wav_file.rsplit("_", 3)[0]
        else:
            file_path = os.path.join(split["directory"], wav_file)
        
        # Classify
        out_prob, score, index, text_lab = classifier.classify_file(file_path)

        # Pattern for finding all numbers in a string
        pattern = r'\d+'
        # Extract Dialogue_ID and Utterance_ID from the file name
        numbers = re.findall(pattern, wav_file)        
        dialogue_id = numbers[0]
        utterance_id = numbers[1]
        
        # Append data to results list
        results.append({
                "Split": split["name"],
                "Dialogue_ID": dialogue_id,
                "Utterance_ID": utterance_id, 
                "Emotion": emotions[(dialogue_id, utterance_id)],
                "Emotion_Guess": text_lab[0]
            })

        iterations += 1
        
    return results

## Baseline Experiment

In [None]:
all_splits = [dev_split, test_split, train_split]

combined_results = []

for split in all_splits:
    print(f"Split: {split['name']}")
    
    # Get the appropriate files and corresponding emotional labels
    emotions = get_MELD_emotions(split)
    files = filter_appropriate_files(emotions, get_MELD_split_files(split))

    # Run the classifier
    results = run_classifier(split, emotions, files)

    # Combine results with other data
    combined_results = combined_results + results
    
     # Display classification stats per session
    print("\n", calculate_accuracy_str(results, 2))
    
    # Print a nice line between splits
    term_size = os.get_terminal_size()
    print('=' * term_size.columns)
    
print("Overall")
print(calculate_accuracy_str(combined_results, 2))

results_to_csv(results_location, "MELD_Base", combined_results)

### Audio Filter

In [None]:
# The following code is a modified version of the code from this stack overflow post:
# https://stackoverflow.com/questions/21871834/adding-effects-to-make-voice-sound-like-it-s-over-a-telephone

# Creates a Butterworth filter from the low and high boundaries with a given order
def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a

# Applies the Butterworth filter 
def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = filtfilt(b, a, data)
    return y

def bandpass_filter(buffer, lowcut, highcut, FRAME_RATE):
    return butter_bandpass_filter(buffer, lowcut, highcut, FRAME_RATE, order=3)

# Function that takes a list of files in the input path, filters them with the given low and high boundaries and outputs the files to the output path
def filter_audio(low, high, sample, input_path, output_path, files):
    if isdir(output_path) == False:
        makedirs(output_path)
    else:
        shutil.rmtree(output_path)
        makedirs(output_path)
    
    for file in files:
        samplerate, data = wavfile.read(input_path + "\\" + file)
        assert samplerate == sample
        filtered = np.apply_along_axis(bandpass_filter, 0, data, lowcut=low, highcut=high, FRAME_RATE=sample).astype('int16')
        wavfile.write(os.path.join(output_path, f'{file[:-4]}_l{low}_h{high}.wav'), samplerate, filtered)

In [None]:
def filter_MELD_split_files(split, output_path, low, high):  
    files = []
    
    files = os.listdir(split["directory"])
    #print(files)
    wavfiles = [file for file in files if file.endswith('.wav')]
    filter_audio(low, high, 16000,
                 split["directory"], 
                 os.path.normpath(output_path), files)

## Systematically Altering The Frequency Range on Audio Filters

In [None]:
# We define new splits here to easily access the filtered files
dev_split_filtered = {"name": "dev", "directory": "...\\dev", "csv": "...\\dev_sent_emo.csv"}
test_split_filtered = {"name": "test", "directory": "...\\test", "csv": "...\\test_sent_emo.csv"}
train_split_filtered = {"name": "train", "directory": "...\\train", "csv": "...\\train.tar\\train_sent_emo.csv"}

In [None]:
def systematically_classify_filtered_files(low_bound, high_bound, step_size):
    # Define the ranges for x (low) and y (high)
    low_range = range(low_bound[0], low_bound[1], step_size)  
    high_range = range(high_bound[0], high_bound[1], step_size)
    
    # Generate the coordinates making sure that the higher bound is not lower or equal to the lower bound
    frequency_bands = [(x, y) for x in low_range for y in high_range if y > x]
    
    for step in frequency_bands:
        # Filter the audio files
        print("Filtering audio files...")
        filter_MELD_split_files(dev_split, dev_split_filtered["directory"], step[0], step[1])
        filter_MELD_split_files(train_split, train_split_filtered["directory"], step[0], step[1])
        filter_MELD_split_files(test_split, test_split_filtered["directory"], step[0], step[1])
        all_splits = [dev_split_filtered, test_split_filtered, train_split_filtered]
        
        combined_results = []
        print(f"Frequency Range between {step[0]} and {step[1]} Hz")
        # Loop through each session in the IEMOCAP database
        for split in all_splits:
            print(f"Split: {split['name']}")

            emotions = get_MELD_emotions(split)
            
            # Get the appropriate files
            files = filter_appropriate_files(emotions, get_MELD_split_files(split))
            
            # Run the classifier
            results = run_classifier(split, emotions, files)
            # Combine results with other data
            combined_results = combined_results + results
            print("\n")
        results_to_csv(results_location, f"MELD_Filtered_l{step[0]}_h{step[1]}", combined_results)

In [None]:
systematically_classify_filtered_files([1, 4002], [1, 7999], 200)