In [1]:
# These two lines of code make your notebook aware of changes in your utils.py file without needing to restart the session each time
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
from tqdm import tqdm
import utils as u

# Set file paths
AUDIOFILES_PATH = "audio2"
FEATURES_FILE_PATH = 'data/features2.csv'

In [3]:
# Search for audio files in the audiofiles directory
audio_files = u.search_audio_files(AUDIOFILES_PATH)

# Print number of audio files found
print(f"Number of audio files found: {len(audio_files)}")

# Initialse features.csv and clear
open(FEATURES_FILE_PATH, 'w').close()

Number of audio files found: 330


In [4]:
# Initialise essentia classes 
ess = u.EssentiaClasses()

def analyze_audio_files(audio_files):

    pbar = tqdm(audio_files)
    for audio_file in pbar:
        pbar.set_description(f"Analyzing {audio_file}")

        # Load audio file and extract features
        audio_stereo, audio_mono = u.load_audio_file(audio_file)
        
        # If audio length is less than 3 seconds (sampled at 16kHz), skip the file
        if len(audio_mono) < 48000:
            print(f"Audio file {audio_file} is less than 3 seconds long. Skipping...")
            continue
    
        ess.extract_features(audio_mono, audio_stereo)

        # Write features to CSV file
        features_dict = ess.write_features_dict(audio_file)
        df_features = pd.DataFrame([features_dict])
        df_features.to_csv(FEATURES_FILE_PATH, mode='a', header=False, index=False)

    print("Finished analyzing all audio files")

In [5]:
# Analyze audio files
analyze_audio_files(audio_files)

Analyzing audio2/archive/musicnet/musicnet/train_data/2241.wav: 100%|██████████| 330/330 [1:27:36<00:00, 15.93s/it]

Finished analyzing all audio files



