In [5]:
import librosa
import numpy as np

wav_file = "wav/3 Nights.wav"

##### EXTRACT all features first from librosa
def extract_features(wav_file_path):
    # Load the audio file
    x, sr = librosa.load(wav_file_path)

    # Extract various audio features without averaging
    features = {
        'tempo': librosa.beat.tempo(y=x, sr=sr)[0],
        'zcr': librosa.feature.zero_crossing_rate(x),
        'spectral_centroid': librosa.feature.spectral_centroid(y=x, sr=sr),
        'spectral_bandwidth': librosa.feature.spectral_bandwidth(y=x, sr=sr),
        'spectral_rolloff': librosa.feature.spectral_rolloff(y=x, sr=sr),
        'chroma_stft': librosa.feature.chroma_stft(y=x, sr=sr),
        'mfcc': librosa.feature.mfcc(y=x, sr=sr),
        'spectral_contrast': librosa.feature.spectral_contrast(y=x, sr=sr),
        'tonnetz': librosa.feature.tonnetz(y=x, sr=sr)
    }

    return features


In [6]:
def analyze_zcr(zcr, threshold=0.1):
    """
    Analyze the Zero Crossing Rate (ZCR) array and return a list of statistics.

    Parameters:
    zcr (numpy.ndarray): Array of Zero Crossing Rate values.
    threshold (float): Threshold for calculating the proportion of ZCR above it.

    Returns:
    list: A list of ZCR statistics (mean, median, std deviation, max, proportion above threshold).
    """
    mean_zcr = round(np.mean(zcr), 4)
    median_zcr = round(np.median(zcr), 4)
    std_zcr = round(np.std(zcr), 4)
    max_zcr = round(np.max(zcr), 4)
    above_threshold = round(np.mean(zcr > threshold), 4)

    return [mean_zcr, median_zcr, std_zcr, max_zcr, above_threshold]



def analyze_spectral_feature(spectral_feature, threshold):
    """
    Analyze a spectral feature (like spectral rolloff, bandwidth, or centroid) and return a list of statistics.

    Parameters:
    spectral_feature (numpy.ndarray): Array of spectral feature values.
    threshold (float): Threshold for calculating the proportion of the feature above it.

    Returns:
    list: A list of statistics (mean, median, std deviation, max, proportion above threshold).
    """
    mean_val = round(np.mean(spectral_feature), 4)
    median_val = round(np.median(spectral_feature), 4)
    std_val = round(np.std(spectral_feature), 4)
    max_val = round(np.max(spectral_feature), 4)
    above_threshold = round(np.mean(spectral_feature > threshold), 4)

    return [mean_val, median_val, std_val, max_val, above_threshold]



def analyze_chroma_stft(chroma_stft):
    """
    Analyze the Chroma STFT feature and return the standard deviation for each pitch class.

    Parameters:
    chroma_stft (numpy.ndarray): Array of Chroma STFT features.

    Returns:
    list: A list of standard deviation values for each pitch class.
    """
    std_dev_values = []
    for key in range(chroma_stft.shape[0]):
        key_data = chroma_stft[key, :]
        std_dev_values.append(round(np.std(key_data), 4))

    return std_dev_values


def analyze_mfcc(mfcc):
    """
    Analyze MFCC features to compute mean delta, mean delta-delta, and total variability.

    Parameters:
    mfcc (numpy.ndarray): Array of MFCC features.

    Returns:
    tuple: Mean delta, mean delta-delta, and total variability of MFCCs.
    """
    # Compute Delta (First Derivative) of MFCCs
    delta_mfcc = librosa.feature.delta(mfcc)

    # Compute Mean of Delta and Delta-Delta
    mean_delta = np.mean(np.abs(delta_mfcc), axis=1)

    # Total Variability (Sum of Standard Deviations)
    total_variability = np.sum(np.std(mfcc, axis=1))

    # Rounding off to two decimal places
    mean_delta = np.round(mean_delta, 4)
    total_variability = round(total_variability, 4)

    return mean_delta, total_variability


def analyze_spectral_contrast(spectral_contrast):
    """
    Analyze spectral contrast features to compute standard deviation for each band,
    mean delta, and average standard deviation across bands.

    Parameters:
    spectral_contrast (numpy.ndarray): Array of spectral contrast features.

    Returns:
    tuple: A list of standard deviation values for each frequency band, 
           mean delta of spectral contrast, 
           and average standard deviation across all bands.
    """
    # Standard Deviation for Each Frequency Band
    std_dev_band = np.std(spectral_contrast, axis=1)
    std_dev_band = np.round(std_dev_band, 4).tolist()

    # Temporal Dynamics - Mean Delta of Spectral Contrast
    delta_contrast = librosa.feature.delta(spectral_contrast)
    mean_delta_contrast = round(np.mean(np.abs(delta_contrast)), 4)

    # Variability Measure - Average Standard Deviation Across Bands
    average_std_dev_contrast = round(np.mean(std_dev_band), 6)

    return std_dev_band, mean_delta_contrast, average_std_dev_contrast


def analyze_tonnetz(tonnetz):
    """
    Analyze Tonnetz features to compute the standard deviation for each tonal band, mean delta, 
    and average standard deviation across bands.

    Parameters:
    tonnetz (numpy.ndarray): Array of Tonnetz features.

    Returns:
    tuple: A list of standard deviation values for each tonal band,
           mean delta of Tonnetz,
           average standard deviation across all bands.
    """
    # Standard Deviation for Each Tonal Band
    std_dev_tonal_bands = np.std(tonnetz, axis=1)
    std_dev_tonal_bands = np.round(std_dev_tonal_bands, 4).tolist()

    # Temporal Dynamics - Mean Delta of Tonnetz
    delta_tonnetz = librosa.feature.delta(tonnetz)
    mean_delta_tonnetz = round(np.mean(np.abs(delta_tonnetz)), 6)

    # Variability Measure - Average Standard Deviation
    average_std_dev_tonnetz = round(np.mean(std_dev_tonal_bands), 6)

    return std_dev_tonal_bands, mean_delta_tonnetz, average_std_dev_tonnetz


In [None]:
import pandas as pd
import os

wav_files_path = "C:/Users/megha/Documents/Sem3/881 Data Mining/Project/Final/Mine/wav"

#Create a dataframe
zcr_col = ['mean_zcr', 'median_zcr', 'std_zcr', 'max_zcr', 'aboveThr_zcr']
sc_col = ['mean_sc', 'median_sc', 'std_sc', 'max_sc', 'aboveThr_sc']
sb_col = ['mean_sb', 'median_sb', 'std_sb', 'max_sb', 'aboveThr_sb']
sr_col = ['mean_sr', 'median_sr', 'std_sr', 'max_sr', 'aboveThr_sr']
chroma = ['pitch1_intensity', 'pitch2_intensity', 'pitch3_intensity', 'pitch4_intensity', 'pitch5_intensity', 'pitch6_intensity', 'pitch7_intensity', 'pitch8_intensity', 'pitch9_intensity', 'pitch10_intensity', 'pitch11_intensity', 'pitch12_intensity']
mfcc_col = ['MFCC1','MFCC2','MFCC3','MFCC4','MFCC5','MFCC6','MFCC7','MFCC8','MFCC9','MFCC10','MFCC11','MFCC12','MFCC13','MFCC14','MFCC15','MFCC16','MFCC17','MFCC18','MFCC19','MFCC20','MFCC_Variability']
contrast_col = ['Band1','Band2','Band3','Band4','Band5','Band6','contrast_del_mean','contrast_avg_sd']
ton_col = ['Tone1','Tone2','Tone3','Tone4','Tone5','Tone6','Tone_deltaMean','Tone_avg_sd']

columns = ['track_name', 'tempo'] + zcr_col + sc_col + sb_col + sr_col + chroma + mfcc_col + contrast_col + ton_col
df = pd.DataFrame(columns=columns)

i=1
# Process each WAV file
for filename in os.listdir(wav_files_path):
    if filename.endswith('.wav'):
        wav_file = os.path.join(wav_files_path, filename)
        new_row = {}
        new_row.update({'track_name': filename.replace('.wav', '')})
            
        features = extract_features(wav_file)
        
        new_row['tempo'] = features['tempo']
        
        zcr_stats = analyze_zcr(features['zcr']) # ['mean_zcr', 'median_zcr', 'std_zcr', 'max_zcr', 'zcr_ab_th']
        new_row.update({'mean_zcr':zcr_stats[0], 'median_zcr':zcr_stats[1], 'std_zcr':zcr_stats[2], 'max_zcr':zcr_stats[3], 'aboveThr_zcr':zcr_stats[4]})
        
        centroid_stats = analyze_spectral_feature(features['spectral_centroid'], 2000) 
        new_row.update({'mean_sc':centroid_stats[0], 'median_sc':centroid_stats[1], 'std_sc':centroid_stats[2], 'max_sc':centroid_stats[3], 'aboveThr_sc':centroid_stats[4]})
        
        bandwidth_stats = analyze_spectral_feature(features['spectral_bandwidth'], 2500) 
        new_row.update({'mean_sb':bandwidth_stats[0], 'median_sb':bandwidth_stats[1], 'std_sb':bandwidth_stats[2], 'max_sb':bandwidth_stats[3], 'aboveThr_sb':bandwidth_stats[4]})
        
        rolloff_stats = analyze_spectral_feature(features['spectral_rolloff'], 5000)
        new_row.update({'mean_sr':rolloff_stats[0], 'median_sr':rolloff_stats[1], 'std_sr':rolloff_stats[2], 'max_sr':rolloff_stats[3], 'aboveThr_sr':rolloff_stats[4]})

        chroma_stats = analyze_chroma_stft(features['chroma_stft'])
        new_row.update({'pitch1_intensity':chroma_stats[0], 'pitch2_intensity':chroma_stats[1], 'pitch3_intensity':chroma_stats[2], 'pitch4_intensity':chroma_stats[3], 
                        'pitch5_intensity':chroma_stats[4], 'pitch6_intensity':chroma_stats[5], 'pitch7_intensity':chroma_stats[6], 'pitch8_intensity':chroma_stats[7],
                        'pitch9_intensity':chroma_stats[8], 'pitch10_intensity':chroma_stats[9], 'pitch11_intensity':chroma_stats[10], 'pitch12_intensity':chroma_stats[11]})
        
        mfcc_stats = analyze_mfcc(features['mfcc'])
        delta1 = mfcc_stats[0]
        new_row.update({'MFCC1':delta1[0],'MFCC2':delta1[1],'MFCC3':delta1[2],'MFCC4':delta1[3],'MFCC5':delta1[4],'MFCC6':delta1[5],'MFCC7':delta1[6],'MFCC8':delta1[7],'MFCC9':delta1[8],
                        'MFCC10':delta1[9],'MFCC11':delta1[10],'MFCC12':delta1[11],'MFCC13':delta1[12],'MFCC14':delta1[13],'MFCC15':delta1[14],'MFCC16':delta1[15],'MFCC17':delta1[16],
                        'MFCC18':delta1[17],'MFCC19':delta1[18],'MFCC20':delta1[19]})
        new_row.update({'MFCC_Variability':mfcc_stats[1]})

        
        contrast_analysis = analyze_spectral_contrast(features['spectral_contrast'])
        bands = contrast_analysis[0]
        new_row.update({'Band1':bands[0],'Band2':bands[1],'Band3':bands[2],'Band4':bands[3],'Band5':bands[4],'Band6':bands[5],
                        'contrast_del_mean':contrast_analysis[1],'contrast_avg_sd':contrast_analysis[2]})

        tonnetz_stats = analyze_tonnetz(features['tonnetz'])
        new_row.update({'Tone1':tonnetz_stats[0][0],'Tone2':tonnetz_stats[0][1],'Tone3':tonnetz_stats[0][2],'Tone4':tonnetz_stats[0][3],'Tone5':tonnetz_stats[0][4],'Tone6':tonnetz_stats[0][5],'Tone_deltaMean':tonnetz_stats[1],'Tone_avg_sd':tonnetz_stats[2]})
        
        print(f"File {filename} done, Number: {i}")
        i += 1
        
        df = df.append(new_row, ignore_index=True)

df.to_csv("Song_Analytics.csv",index=False)


In [8]:
df.head(2)

Unnamed: 0,track_name,tempo,mean_zcr,median_zcr,std_zcr,max_zcr,aboveThr_zcr,mean_sc,median_sc,std_sc,...,contrast_del_mean,contrast_avg_sd,Tone1,Tone2,Tone3,Tone4,Tone5,Tone6,Tone_deltaMean,Tone_avg_sd
0,0800 HEAVEN,143.554688,0.0918,0.0913,0.0527,0.6323,0.4358,2196.7699,2259.6592,862.0266,...,0.6804,6.128429,0.1183,0.1248,0.1614,0.127,0.0527,0.0522,0.00906,0.106067
1,1 2 3 feat Jason Derulo De La Ghetto,95.703125,0.1278,0.1172,0.0813,0.6621,0.6041,2811.9937,2768.6767,1057.0624,...,0.7764,6.554386,0.1012,0.0931,0.1172,0.1136,0.0526,0.0427,0.008302,0.086733
