In [2]:
import librosa
import numpy as np
import os
import csv
from scipy.stats import kurtosis, skew, entropy

# EXTRACT FEATURES FROM AUDIO FILE

def extract_features(audio_path):
    y, sr = librosa.load(audio_path)

    # Frame-wise features
    S = np.abs(librosa.stft(y))
    centroid = librosa.feature.spectral_centroid(S=S)[0]
    bandwidth = librosa.feature.spectral_bandwidth(S=S)[0]
    rolloff = librosa.feature.spectral_rolloff(S=S)[0]
    flatness = librosa.feature.spectral_flatness(S=S)[0]
    rms = librosa.feature.rms(S=S)[0]
    zcr = librosa.feature.zero_crossing_rate(y)[0]
    flux = np.sqrt(np.sum(np.diff(S, axis=1)**2, axis=0))  # Spectral flux

    # Tempo and beats
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)

    # Harmonic-Percussive Source Separation
    y_harmonic, _ = librosa.effects.hpss(y)

    # Pitch salience proxy using harmonic energy
    pitch_salience = np.mean(librosa.feature.rms(y=y_harmonic))

    # Chroma
    chroma = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr)
    chroma_entropy = entropy(np.mean(chroma, axis=1))
    chroma_crest = np.max(chroma, axis=0) / (np.mean(chroma, axis=0) + 1e-6)

    # Tonal centroid (approx)
    tonal_centroid = librosa.feature.tonnetz(y=y_harmonic, sr=sr)

    # Silence rate approximation
    silence_threshold = 0.01
    frame_energy = librosa.feature.rms(y=y)[0]
    silence_rate = np.mean(frame_energy < silence_threshold)

    features = {
        'spectral_centroid_mean': np.mean(centroid),
        'spectral_bandwidth_mean': np.mean(bandwidth),
        'spectral_rolloff_mean': np.mean(rolloff),
        'spectral_flatness_mean': np.mean(flatness),
        'spectral_rms_mean': np.mean(rms),
        'zero_crossing_rate_mean': np.mean(zcr),
        'spectral_flux_mean': np.mean(flux),
        'spectral_flux_std': np.std(flux),
        'rms_kurtosis': kurtosis(rms),
        'rms_skewness': skew(rms),
        'pitch_salience': pitch_salience,
        'tempo': float(tempo),
        'chroma_entropy': chroma_entropy,
        'chroma_crest_mean': np.mean(chroma_crest),
        'chroma_crest_std': np.std(chroma_crest),
        'silence_rate': silence_rate,
    }

    # Add Tonnetz values as individual features
    for i, val in enumerate(np.mean(tonal_centroid, axis=1)):
        features[f'tonnetz_mean_{i}'] = val
    for i, val in enumerate(np.std(tonal_centroid, axis=1)):
        features[f'tonnetz_std_{i}'] = val

    return features

def process_folder(folder_path, output_csv):
    feature_list = []
    filenames = []

    for filename in os.listdir(folder_path):
        if filename.lower().endswith('.mp3'):
            filepath = os.path.join(folder_path, filename)
            print(f"Processing: {filename}")
            try:
                features = extract_features(filepath)
                features['filename'] = filename
                feature_list.append(features)
                filenames.append(filename)
            except Exception as e:
                print(f"Error processing {filename}: {e}")

    # Write to CSV
    if feature_list:
        keys = ['filename'] + [k for k in feature_list[0].keys() if k != 'filename']
        with open(output_csv, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=keys)
            writer.writeheader()
            writer.writerows(feature_list)
        print(f"Features saved to {output_csv}")
    else:
        print("No features extracted.")

# Example usage
if __name__ == "__main__":
    folder_path = ""     # CHANGE THIS
    output_csv = ""
    process_folder(folder_path, output_csv)


FileNotFoundError: [WinError 3] The system cannot find the path specified: ''

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_distances

# COMPUTE ILD SCORES USING COSINE DISTANCE

def compute_intra_list_diversity(csv_path, id_column='filename'):
    # Load features from CSV
    df = pd.read_csv(csv_path)
    
    # Separate out the song ID / name column
    if id_column in df.columns:
        song_ids = df[id_column]
        df = df.drop(columns=[id_column])
    else:
        song_ids = df.index

    # Min-Max normalization (scales all features to [0, 1])
    scaler = MinMaxScaler()
    features_normalized = scaler.fit_transform(df)

    # Compute pairwise cosine distances
    pairwise_dist = cosine_distances(features_normalized)

    display(pairwise_dist)

    # Extract upper triangle of the matrix (excluding diagonal)
    triu_indices = np.triu_indices_from(pairwise_dist, k=1)
    pairwise_values = pairwise_dist[triu_indices]

    # Intra-list diversity = average pairwise distance
    diversity_score = np.mean(pairwise_values)

    print(f"Intra-list diversity (cosine distance, min-max scaled): {diversity_score:.4f}")
    return diversity_score