In [1]:
!pip install librosa
!pip install scikit-learn
!pip install umap-learn
!pip install plotly
!pip install pandas
!pip install numpy
!pip install tqdm



In [8]:
import librosa
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import plotly.express as px
import umap
from tqdm.notebook import tqdm
import os

In [1]:
def extract_audio_features(audio_path):
    """
    Extract common audio features using librosa
    """
    try:
        print(f"Processing file: {audio_path}")  # Debug print
        y, sr = librosa.load(audio_path)

        features = {}
        # Spectral features
        features['spectral_centroids'] = float(np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)))
        features['spectral_rolloff'] = float(np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)))
        features['spectral_bandwidth'] = float(np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)))

        # Rhythm features
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        features['tempo'] = float(tempo)

        # MFCC
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        for i, mfcc in enumerate(mfccs):
            features[f'mfcc_{i}'] = float(np.mean(mfcc))

        # Chroma features
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        features['chroma_mean'] = float(np.mean(chroma))

        # Energy features
        features['rms_energy'] = float(np.mean(librosa.feature.rms(y=y)))

        return features
    except Exception as e:
        print(f"Error processing {audio_path}: {str(e)}")
        return None

In [2]:
def process_audio_directory(audio_dir):
    """Process audio files with detailed logging"""
    features_list = []
    file_paths = []

    # Supported audio formats
    audio_extensions = {'.mp3', '.wav', '.m4a', '.ogg'}

    print(f"Scanning directory: {audio_dir}")
    found_files = 0
    processed_files = 0

    for root, _, files in os.walk(audio_dir):
        for file in files:
            if any(file.lower().endswith(ext) for ext in audio_extensions):
                found_files += 1
                file_path = os.path.join(root, file)
                print(f"\nProcessing {file_path}")

                features = extract_audio_features(file_path)
                if features is not None:
                    features_list.append(features)
                    file_paths.append(file)
                    processed_files += 1

    print(f"\nFound {found_files} audio files")
    print(f"Successfully processed {processed_files} files")

    if not features_list:
        raise ValueError("No audio files were successfully processed")

    df = pd.DataFrame(features_list)
    df['file_name'] = file_paths
    return df

In [3]:
audio_dir = "/path/to/your/audio/files"  # Replace with your actual path

# Process files
try:
    feature_df = process_audio_directory(audio_dir)
    print("\nFeature DataFrame shape:", feature_df.shape)
    print("\nFeature columns:", feature_df.columns.tolist())

    # Scale features
    numeric_columns = feature_df.select_dtypes(include=[np.number]).columns
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(feature_df[numeric_columns])
    print("\nScaled features shape:", scaled_features.shape)
except Exception as e:
    print(f"Error in processing: {str(e)}")

Scanning directory: /path/to/your/audio/files
Error in processing: name 'os' is not defined


In [10]:
import os
def plot_elbow_curve(scaled_features, max_clusters=10):
    """Plot elbow curve for optimal k selection"""
    inertias = []
    K = range(1, max_clusters + 1)

    for k in K:
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(scaled_features)
        inertias.append(kmeans.inertia_)

    fig = px.line(x=K, y=inertias,
                  title='Elbow Method for Optimal k',
                  labels={'x': 'Number of Clusters (k)',
                         'y': 'Inertia'})
    fig.show()

# Keep all the previous imports and functions the same, but modify the main execution part:

# Main execution
audio_dir = "./audio_files"  # Using your current directory

try:
    # Process audio files
    print("Processing audio files...")
    feature_df = process_audio_directory(audio_dir)
    print("\nFeature DataFrame shape:", feature_df.shape)

    # Scale features
    print("\nScaling features...")
    numeric_columns = feature_df.select_dtypes(include=[np.number]).columns
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(feature_df[numeric_columns])

    # Adjust max_clusters based on dataset size
    n_samples = len(feature_df)
    max_clusters = min(4, n_samples - 1)  # Maximum clusters should be less than number of samples

    # Plot elbow curve with adjusted range
    print(f"\nGenerating elbow curve (testing 1 to {max_clusters} clusters)...")
    plot_elbow_curve(scaled_features, max_clusters=max_clusters)

    # Perform clustering with smaller number of clusters
    print("\nPerforming clustering...")
    n_clusters = min(3, n_samples - 1)  # Use 3 clusters or less based on sample size
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(scaled_features)

    # Reduce dimensionality and visualize
    print("\nGenerating visualization...")
    reducer = umap.UMAP(random_state=42, n_neighbors=min(3, n_samples-1))
    embedding = reducer.fit_transform(scaled_features)

    # Create scatter plot
    fig = px.scatter(
        x=embedding[:, 0],
        y=embedding[:, 1],
        color=clusters,
        hover_data=[feature_df['file_name']],
        title='Audio Clusters Visualization',
        labels={'color': 'Cluster'},
        color_continuous_scale='viridis'
    )
    fig.show()

    # Print cluster information
    print("\nCluster Distribution:")
    for cluster in range(n_clusters):
        files_in_cluster = feature_df['file_name'][clusters == cluster].tolist()
        print(f"\nCluster {cluster} ({len(files_in_cluster)} files):")
        print(", ".join(files_in_cluster))

    # Print feature analysis for each cluster
    print("\nCluster Characteristics:")
    for cluster in range(n_clusters):
        print(f"\nCluster {cluster} characteristics:")
        cluster_data = feature_df[clusters == cluster]
        for column in numeric_columns:
            mean_value = cluster_data[column].mean()
            print(f"{column}: {mean_value:.2f}")

except Exception as e:
    print(f"An error occurred: {str(e)}")

Processing audio files...
Scanning directory: ./audio_files

Processing ./audio_files/4 Parker Solar Probe - Whistler Mode Waves 2.mp3
Processing file: ./audio_files/4 Parker Solar Probe - Whistler Mode Waves 2.mp3


  features['tempo'] = float(tempo)



Processing ./audio_files/2 Kepler Star KIC7671081B.mp3
Processing file: ./audio_files/2 Kepler Star KIC7671081B.mp3

Processing ./audio_files/1 Kepler Star KIC12268220C.mp3
Processing file: ./audio_files/1 Kepler Star KIC12268220C.mp3

Processing ./audio_files/5 Juno Mission Europa Flyby.mp3
Processing file: ./audio_files/5 Juno Mission Europa Flyby.mp3

Processing ./audio_files/3 NASA - Whistler Waves.mp3
Processing file: ./audio_files/3 NASA - Whistler Waves.mp3

Found 5 audio files
Successfully processed 5 files

Feature DataFrame shape: (5, 20)

Scaling features...

Generating elbow curve (testing 1 to 4 clusters)...



Performing clustering...

Generating visualization...



n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.




Cluster Distribution:

Cluster 0 (2 files):
2 Kepler Star KIC7671081B.mp3, 1 Kepler Star KIC12268220C.mp3

Cluster 1 (1 files):
3 NASA - Whistler Waves.mp3

Cluster 2 (2 files):
4 Parker Solar Probe - Whistler Mode Waves 2.mp3, 5 Juno Mission Europa Flyby.mp3

Cluster Characteristics:

Cluster 0 characteristics:
spectral_centroids: 217.17
spectral_rolloff: 416.95
spectral_bandwidth: 325.46
tempo: 139.78
mfcc_0: -374.13
mfcc_1: 162.99
mfcc_2: 91.35
mfcc_3: 48.97
mfcc_4: 35.66
mfcc_5: 22.23
mfcc_6: 1.15
mfcc_7: -8.21
mfcc_8: -2.22
mfcc_9: 5.70
mfcc_10: 7.88
mfcc_11: 10.73
mfcc_12: 15.34
chroma_mean: 0.33
rms_energy: 0.28

Cluster 1 characteristics:
spectral_centroids: 2651.19
spectral_rolloff: 5131.90
spectral_bandwidth: 2441.03
tempo: 143.55
mfcc_0: -118.95
mfcc_1: 78.12
mfcc_2: -31.07
mfcc_3: -0.20
mfcc_4: -11.46
mfcc_5: 2.94
mfcc_6: 5.08
mfcc_7: 7.19
mfcc_8: 4.82
mfcc_9: 8.02
mfcc_10: -2.17
mfcc_11: 1.90
mfcc_12: -3.06
chroma_mean: 0.46
rms_energy: 0.06

Cluster 2 characteristics:
sp