In [6]:
import librosa
import numpy as np
import os
import csv
import time

In [7]:
def get_processed_song_ids(csv_file):
    processed_song_ids = set()
    if os.path.exists(csv_file):
        with open(csv_file, 'r', newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                processed_song_ids.add(row['song_id'])
    return processed_song_ids

In [8]:
def extract_features(song_path, song_id):
    y, sr = librosa.load(song_path)
    
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr) #pitch
    mfccs = librosa.feature.mfcc(y=y, sr=sr) #spectral charecteristics and timbral information
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr) #spectrum center of mass
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr) #width of spectral envelope
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) #spectral texture, difference in amplitude between peaks and valleys in the spectrum
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr) #tonal content and harmonic relationships
    rms = librosa.feature.rms(y=y) #energy of the signal
    spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) #frequency below which a certain percentage of the total spectral energy lies
    zero_crossing_rate = librosa.feature.zero_crossing_rate(y) #rate of changes in the signal
    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    
    beat_histogram = np.histogram(beat_frames, bins=len(y))
    
    features = {
        'song_id': song_id,
        'chroma_stft_mean': np.mean(chroma_stft),
        'chroma_stft_var': np.var(chroma_stft),
        'mfccs_mean': np.mean(mfccs),
        'mfccs_var': np.var(mfccs),
        'spectral_centroid_mean': np.mean(spectral_centroid),
        'spectral_centroid_var': np.var(spectral_centroid),
        'spectral_bandwidth_mean': np.mean(spectral_bandwidth),
        'spectral_bandwidth_var': np.var(spectral_bandwidth),
        'spectral_contrast_mean': np.mean(spectral_contrast),
        'spectral_contrast_var': np.var(spectral_contrast),
        'tonnetz_mean': np.mean(tonnetz),
        'tonnetz_var': np.var(tonnetz),
        'rms_mean': np.mean(rms),
        'rms_var': np.var(rms),
        'spectral_rolloff_mean': np.mean(spectral_rolloff),
        'spectral_rolloff_var': np.var(spectral_rolloff),
        'zero_crossing_rate_mean': np.mean(zero_crossing_rate),
        'zero_crossing_rate_var': np.var(zero_crossing_rate),
        'tempo': tempo,
        'beat_histogram': beat_histogram[0]
    }

    return features

In [12]:
def process_files(audio_dir, output_csv):
    processed_song_ids = get_processed_song_ids(output_csv)
    
    with open(output_csv, 'a', newline='') as csvfile:
        feature_names = [
            'song_id', 
            'chroma_stft_mean', 'chroma_stft_var',
            'mfccs_mean', 'mfccs_var',
            'spectral_centroid_mean', 'spectral_centroid_var',
            'spectral_bandwidth_mean', 'spectral_bandwidth_var',
            'spectral_contrast_mean', 'spectral_contrast_var',
            'tonnetz_mean', 'tonnetz_var',
            'rms_mean', 'rms_var',
            'spectral_rolloff_mean', 'spectral_rolloff_var',
            'zero_crossing_rate_mean', 'zero_crossing_rate_var',
            'tempo', 'beat_histogram'
        ]
        writer = csv.DictWriter(csvfile, fieldnames=feature_names)
        if os.stat(output_csv).st_size == 0:
            writer.writeheader()
        
        for root, dirs, files in os.walk(audio_dir):
            for file in files:
                if file.endswith(".mp3"):
                    song_path = os.path.join(root, file)
                    song_id = os.path.splitext(os.path.basename(file))[0]
                    if song_id not in processed_song_ids:
                        try:
                            features = extract_features(song_path, song_id)
                            writer.writerow(features)
                            processed_song_ids.add(song_id)
                        except (librosa.util.exceptions.ParameterError, ValueError) as e:
                            print(f"Error processing {song_path}: {e}")

In [13]:
audio_dir = '/Volumes/Elemental/genre-id/songs/'
output_csv = 'features.csv'
process_files(audio_dir, output_csv)

Error processing /Volumes/Elemental/genre-id/songs/afro-cuban_percussion_44.mp3: can't extend empty axis 0 using modes other than 'constant' or 'empty'


KeyboardInterrupt: 