In [1]:
import os
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import tqdm as tqdm

import librosa
import librosa.display
import IPython.display as ipd
from sklearn.utils import resample

import warnings
warnings.filterwarnings('ignore')

<div style="text-align: center; font-size: 30px; color: red;">Audio Data Loading & Basic Inspection</div>

In [None]:
# Load sample
data, sr = librosa.load("../data/1001_DFA_DIS_XX.wav", sr = None, mono=False)

# Plot waveform
plt.figure(figsize=(15, 6))
librosa.display.waveshow(y=data, x_axis='time', sr=sr)
plt.title("Waveform Example")
plt.show()

In [None]:
# Play Audio
ipd.Audio("../data/1001_DFA_DIS_XX.wav")

In [None]:
# Check number of channels
if data.ndim == 1:
    print("File is a Mono Channel")
elif data.ndim == 2:
    print("File is Stereo Channel")

<div style="text-align: center; font-size: 30px; color: red;">Exploratory Data Analysis</div>

In [None]:
# Check for Class Imbalance
emotion_map = {
        'ANG': 'angry', 'DIS': 'disgust', 'FEA': 'fearful',
        'HAP': 'happy', 'NEU': 'neutral', 'SAD': 'sad'
}
emotions = []
for dirpath, dirnames, filenames in os.walk("../data"):
    for file in filenames:
        emotion = emotion_map[file.split('_')[-2]]
        emotions.append(emotion)
df = pd.DataFrame(emotions)
df.columns = ["emotion"]
df["emotion"].value_counts().plot(kind='bar')
plt.title("Class Balance");

**NOTE**
- All classes balanced except neutral. Consider resampling?

In [None]:
# Sample Audio for each emotion
ipd.Audio("../data/1001_DFA_ANG_XX.wav") # Anger

In [None]:
ipd.Audio("../data/1001_DFA_FEA_XX.wav") # Fear

In [None]:
ipd.Audio("../data/1001_IEO_DIS_HI.wav") # Disgust

In [None]:
ipd.Audio("../data/1001_DFA_HAP_XX.wav") # Happy

In [None]:
ipd.Audio("../data/1001_IEO_SAD_MD.wav") # Sad

**NOTE**
- Audio length is **NOT CONSISTENT**

In [None]:
def fix_length(y, target_len = sr*3):
    # Target is 3 seconds
    if len(y) > target_len:
        y_trunc = y[:target_len]
        return y_trunc
    else:
        y_pad = np.pad(y, (0, target_len - len(y)))
        return y_pad

<div style="font-size: 25px; color: green;">Waveform and Spectrogram Samples</div>

In [None]:
# Instantiate Samples
fearful = "../data/1001_DFA_FEA_XX.wav"
happy = "../data/1001_IEO_HAP_HI.wav"
sad = "../data/1001_DFA_SAD_XX.wav"
angry = "../data/1001_DFA_ANG_XX.wav"
neutral = "../data/1001_DFA_NEU_XX.wav"
disgust = "../data/1001_DFA_DIS_XX.wav"
# Put in list for easy iteration
samples_list = [fearful, happy, sad, angry, neutral, disgust]
samples_list

In [None]:
def visualize_audio(path):
    # Get the emotion
    file = os.path.basename(path)
    emotion = emotion_map[os.path.basename(path).split("_")[-2]]
    
    # Plot waveform
    y, sr = librosa.load(path, sr=None)
    plt.figure(figsize=(15, 6))
    librosa.display.waveshow(y, sr=sr)
    plt.title(f"{emotion.title()} Waveform")
    plt.show()
    
    # Plot spectrogram
    X = librosa.stft(y)
    Xdb = librosa.amplitude_to_db(np.abs(X))
    plt.figure(figsize=(15, 6))
    librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz')
    plt.title(f"{emotion.title()} Spectrogram")
    plt.show()

    # Plot mel-spectrogram
    # Initiate Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=512, n_mels=128)
    # Scale to db
    mel_db = librosa.power_to_db(mel_spec, ref=np.max)
    # Mel Spectrogram Plot
    plt.figure(figsize=(15, 6))
    librosa.display.specshow(data=mel_db,
                             sr=22050,
                             hop_length=512,
                             x_axis="time",
                             y_axis="mel",
                             cmap="viridis")
    plt.colorbar(format='%+2.0f dB')
    plt.title(f"{emotion.title()} Mel Spectrogram");

In [None]:
for sample in samples_list:
    visualize_audio(sample)

In [None]:
# Check fix length function
y, sr = librosa.load(neutral, sr=None)
print(f'Initial length: {len(y)}')
fixed_len = fix_length(y)
print(f'Fixed length: {len(fixed_len)}')

<div style="font-size: 25px; color: green;">Resampling</div>

**NOTE**
- There's a slight imbalance. I'll have to oversample neutral class to bring everything level

In [None]:
# Create a reference list for your data. Item e.g, (path, emotion)
data = []
for dirpath, dirnames, filenames in os.walk("../data"):
    for file in filenames:
        path = os.path.join(dirpath, file)
        speaker = file.split('_')[0] # Need to identify speaker since dataset is speaker-independent
        emotion = emotion_map[file.split('_')[-2]]
        data.append((path, speaker, emotion))
data[:5]

In [None]:
# Put data into a dataframe
data_df = pd.DataFrame(data)
data_df.columns = ["path","speaker", "emotion"]

data_df.head()

In [None]:
# Split into majority and minority classes for resampling
majority_df = data_df[data_df["emotion"]!="neutral"]
minority_df = data_df[data_df["emotion"]=="neutral"]

In [None]:
# Perform oversampling
neutral_upsampled = resample(
    minority_df,
    replace = True,
    n_samples = 1271,
    random_state = 42
)

In [None]:
# Combine back to one df
data_df = pd.concat([majority_df, neutral_upsampled])
data_df["emotion"].value_counts()

In [None]:
data_df.head()

**NOTE**
- This dataset **IS NOT** inherently speaker dependent!

<div style="font-size: 25px; color: green;">Splitting</div>

In [None]:
target = 