## Performing Resampling , Normalization & Windowing

In [1]:
import os
import soundfile as sf
import librosa
import numpy as np

INPUT_DIR = "/kaggle/input/dataset-separated"
OUTPUT_DIR = "/kaggle/working/dataset-windowed"
SAMPLE_RATE = 22050
WINDOW_SIZE = 1.0
HOP_SIZE = 0.5

# Loop through all the emotion subdirectories
for root, dirs, files in os.walk(INPUT_DIR):
    for file in files:
        # Load the audio file
        audio_path = os.path.join(root, file)
        audio, sr = librosa.load(audio_path, sr=None)

        # Resample the audio to a common sample rate
        audio_resampled = librosa.resample(audio, orig_sr=sr, target_sr=SAMPLE_RATE)

        # Normalize the audio data
        audio_norm = (audio_resampled - np.mean(audio_resampled)) / np.std(audio_resampled)

        # Calculate the number of samples per window
        n_samples_per_window = int(WINDOW_SIZE * sr)
        # Calculate the number of samples to hop between windows
        hop_length = int(HOP_SIZE * sr)

        # Loop through the audio signal in overlapping windows
        for i in range(0, len(audio_norm) - n_samples_per_window + 1, hop_length):
            # Extract the current window
            audio_window = audio_norm[i:i+n_samples_per_window]

            # Save the windowed audio to a new file
            new_audio_path = os.path.join(OUTPUT_DIR, os.path.basename(root), f"{file}_{i}.wav")
            os.makedirs(os.path.dirname(new_audio_path), exist_ok=True)
            sf.write(new_audio_path, audio_window, sr, subtype='PCM_16')


## Feature Extraction

In [2]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
from tqdm import tqdm
# Input directory for the windowed audio files
INPUT_DIR = "/kaggle/working/dataset-windowed"
# Output directory for the extracted features
OUTPUT_DIR = "/kaggle/working/features"

# Parameters for MFCC feature extraction
N_MFCC = 13
N_MELS = 128
HOP_LENGTH = 512

# Loop through all the emotion subdirectories
for root, dirs, files in tqdm(os.walk(INPUT_DIR)):
    for file in files:
        # Load the audio file
        audio_path = os.path.join(root, file)
        audio, sr = librosa.load(audio_path, sr=None)

        # Extract MFCC features
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=N_MFCC, n_mels=N_MELS, hop_length=HOP_LENGTH)


        # Create a feature vector by taking the mean of MFCC coefficients across time
        feature_vector = np.mean(mfcc, axis=1)

        # Save the features to a CSV file
        output_file = os.path.join(OUTPUT_DIR, os.path.basename(root), f"{file}.csv")
        os.makedirs(os.path.dirname(output_file), exist_ok=True)
        pd.DataFrame([feature_vector]).to_csv(output_file, index=False, header=False)


9it [02:18, 15.36s/it]


## Mel Spectrum

In [3]:
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt

# Input and output directories
INPUT_DIR = "/kaggle/working/dataset-windowed"
OUTPUT_DIR = "/kaggle/working/mel-spectrograms"

# Sampling rate
SR = 22050

# Number of mel bands
N_MELS = 128

# Loop through all the emotion subdirectories
for root, dirs, files in os.walk(INPUT_DIR):
    for file in files:
        # Load the audio file
        audio_path = os.path.join(root, file)
        audio, sr = librosa.load(audio_path, sr=SR)
        
        # Calculate the mel spectrogram
        S = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=N_MELS)

        # Convert to decibels
        log_S = librosa.power_to_db(S, ref=np.max)

        # Plot the mel spectrogram
        plt.figure(figsize=(5,5))
        librosa.display.specshow(log_S, sr=sr, x_axis='time', y_axis='mel')
        plt.axis('off')
        plt.tight_layout()

        # Save the mel spectrogram
        new_audio_path = os.path.join(OUTPUT_DIR, os.path.basename(root), f"{file}.png")
        os.makedirs(os.path.dirname(new_audio_path), exist_ok=True)
        plt.savefig(new_audio_path, bbox_inches='tight', pad_inches=0)
        plt.close()


## LABEL CSV

In [4]:
import os
import csv

# Path to directory containing the mel-spectrograms
INPUT_DIR = "/kaggle/working/mel-spectrograms"

# Create a list of all the mel-spectrogram file paths
mel_files = []
for root, dirs, files in os.walk(INPUT_DIR):
    for file in files:
        mel_path = os.path.join(root, file)
        #print(mel_path)
        mel_files.append(mel_path)

# Define the emotions corresponding to each identifier
emotions = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}

# Define the labels file path
LABELS_FILE = "labels.csv"

# Open the labels file for writing
with open(LABELS_FILE, "w", newline="") as f:
    writer = csv.writer(f)

    # Write the header row
    writer.writerow(["file_name", "emotion"])

    # Iterate over the mel-spectrogram file paths
    for mel_file in mel_files:
        # Extract the file name and the emotion from the file path
        file_name = os.path.basename(mel_file).split(".")[0] + ".png"
        file_name_parts = file_name.split("_")
        #print(file_name_parts)
        emotion_id = file_name_parts[0].split("-")[2]
        emotion = emotions[emotion_id]

        # Extract the folder containing the png file and append it to the file path
        folder = emotion
        png_file =  file_name
        #print(png_file)
        # Write the file name and emotion to the labels file
        writer.writerow([png_file, emotion])

## Splitting Dataset

In [5]:
import os
import random
import shutil

# Input and output directories
INPUT_DIR = "/kaggle/working/mel-spectrograms"
OUTPUT_DIR = "/kaggle/working/dataset-split"

# Training, validation, and test set sizes
TRAIN_SIZE = 0.7
VAL_SIZE = 0.2
TEST_SIZE = 0.1

# Seed for random number generator
RANDOM_SEED = 42

# Loop through all the emotion subdirectories
for root, dirs, files in os.walk(INPUT_DIR):
    # Shuffle the file list
    random.seed(RANDOM_SEED)
    random.shuffle(files)

    # Calculate the number of files for each set
    n_files = len(files)
    n_train = int(n_files * TRAIN_SIZE)
    n_val = int(n_files * VAL_SIZE)
    n_test = n_files - n_train - n_val

    # Split the files into sets
    train_files = files[:n_train]
    val_files = files[n_train:n_train+n_val]
    test_files = files[n_train+n_val:]

    # Copy the files to their respective sets
    for file in train_files:
        src_path = os.path.join(root, file)
        dst_dir = os.path.join(OUTPUT_DIR, "train", os.path.basename(root))
        os.makedirs(dst_dir, exist_ok=True)
        dst_path = os.path.join(dst_dir, file)
        shutil.copyfile(src_path, dst_path)

    for file in val_files:
        src_path = os.path.join(root, file)
        dst_dir = os.path.join(OUTPUT_DIR, "val", os.path.basename(root))
        os.makedirs(dst_dir, exist_ok=True)
        dst_path = os.path.join(dst_dir, file)
        shutil.copyfile(src_path, dst_path)

    for file in test_files:
        src_path = os.path.join(root, file)
        dst_dir = os.path.join(OUTPUT_DIR, "test", os.path.basename(root))
        os.makedirs(dst_dir, exist_ok=True)
        dst_path = os.path.join(dst_dir, file)
        shutil.copyfile(src_path, dst_path)