In [1]:
import glob
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis

# Directory containing the individual song CSV files
# song_directory = "path_to_your_song_csv_files"  # Replace with the path to your CSV files
output_file = "master_rms_energy_statistics.csv"  # Master output file

# List to store statistics for each song
rms_stats_list = []
files = glob.glob('*-MFCC.csv')
# Process each song CSV file in the directory
for filename in files:

        # Load the MFCC data for the song
       
        mfcc_data = pd.read_csv(filename)

        # Compute RMS energy per time frame (column-wise)
        rms_energy_approx = np.sqrt(np.sum(mfcc_data**2, axis=0))

        # Calculate RMS energy statistics
        rms_stats = {
            "Song": filename,
            "Mean_RMS": np.mean(rms_energy_approx),
            "STD_RMS": np.std(rms_energy_approx),
            "Min_RMS": np.min(rms_energy_approx),
            "Max_RMS": np.max(rms_energy_approx),
            "Skew_RMS": skew(rms_energy_approx),
            "Kurtosis_RMS": kurtosis(rms_energy_approx),
            "25th_Percentile_RMS": np.percentile(rms_energy_approx, 25),
            "Median_RMS": np.percentile(rms_energy_approx, 50),
            "75th_Percentile_RMS": np.percentile(rms_energy_approx, 75)
        }

        # Append stats to the list
        rms_stats_list.append(rms_stats)

# Create a DataFrame from the list of statistics
master_df = pd.DataFrame(rms_stats_list)

# Save the DataFrame to a master CSV file
master_df.to_csv(output_file, index=False)

print(f"RMS energy statistics saved to {output_file}")


RMS energy statistics saved to master_rms_energy_statistics.csv
