# Root Mean Square Audio Data Pipeline

Author: Sylas Chacko, Omari Motta, Ashley Chen

In [1]:
# Importing Packages

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import wave
import soundfile as sf
from glob import glob
import pickle
import librosa 
import librosa.display
import IPython.display as ipd
from itertools import cycle
import shutil



TESS dataset is already clean. Below is the feature extraction process. 

### Feature Extraction Root Means Square

In [2]:
def calculate_rms(file_path):
    # Load the audio file
    audio, sr = librosa.load(file_path)
    
    # Calculate the RMS (Root Mean Square) energy
    rms = librosa.feature.rms(y=audio)
    
    # Return the mean RMS value
    return rms.mean()

def process_audio_files(folder_path, output_csv):
    # List to store the results
    results = []

    # Iterate over all files in the folder
    for filename in os.listdir(folder_path):
        # Check if the file is a .wav file
        if filename.endswith('.wav'):
            file_path = os.path.join(folder_path, filename)
            
            # Calculate RMS
            rms_value = calculate_rms(file_path)
            
            # Append the result to the list
            results.append({'filename': filename, 'rms': rms_value})

    # Create a DataFrame from the results
    df = pd.DataFrame(results)
    
    # Save the DataFrame to a CSV file
    df.to_csv(output_csv, index=False)
    print(f'CSV file created: {output_csv}')


folder_path = r'C:\Users\sylas\OneDrive\Projects\emo-db-project\TESS_Audio'  
output_csv = 'TESS_rms_values.csv' 


process_audio_files(folder_path, output_csv)


CSV file created: TESS_rms_values.csv


Adding Emotions Column

In [3]:
import pandas as pd

# Load the existing CSV file
csv_file = r'C:\Users\sylas\OneDrive\Projects\emo-db-project\TESS_rms_values.csv'  # Replace with the path to your CSV file
df = pd.read_csv(csv_file)

# Function to extract emotion from the filename
def extract_emotion(filename):
    # Extract the part of the filename between the last underscore and ".wav"
    return filename.split('_')[-1].split('.')[0]

# Add the emotion column
df['Emotion'] = df['filename'].apply(extract_emotion)

# Save the updated DataFrame back to a CSV file
output_csv = 'TESS_rms_final.csv'  # Name of the updated CSV file
df.to_csv(output_csv, index=False)

print(f'Updated CSV file created: {output_csv}')


Updated CSV file created: TESS_rms_final.csv
