In [None]:
import pickle
import os
import csv
import re
import string
import librosa
import matplotlib.pyplot as plt
import numpy as np
import shutil
import soundfile as sf
import torch
import h5py
import torchaudio
import torchaudio.transforms as T
from PIL import Image
from scipy.ndimage import zoom
from librosa.util import normalize
from librosa.util import fix_length
from sklearn.model_selection import train_test_split
from google.colab import drive
from google.colab import runtime
from collections import defaultdict

drive.mount('/content/drive')

Mounted at /content/drive


Paths

In [None]:
mp3_data_path = '/content/drive/My Drive/Projects/NeuraBeat/Data/fma_small/'
csv_path = '/content/drive/My Drive/Projects/NeuraBeat/tracks.csv'

Create File:Genre Map

In [None]:
file_genre_map = {}  # Dictionary to store file-genre mapping
track_ids = [file_name.split('.')[0].lstrip('0') for file_name in os.listdir(mp3_data_path) if file_name.endswith('.mp3')]

# Read CSV file and create file-genre mapping
with open(csv_path, 'r') as csvfile:
    csvreader = csv.reader(csvfile)
    next(csvreader) # Skip headers
    next(csvreader)
    next(csvreader)
    for row in csvreader:
        if row[0] in track_ids:
            genre = row[40]
            file_genre_map[row[0]] = genre

In [None]:
# Initialize genre dist dictionary
genre_dist = {}
total_songs = 0

# Count the number of each genre
for genre in file_genre_map.values():
    if genre not in genre_dist:
        genre_dist[genre] = 0
    genre_dist[genre] += 1

# Calculate the total number of songs
total_songs = len(file_genre_map)

# Output the genre distribution and total number of songs
print("Genre distribution:")
for genre, count in genre_dist.items():
    print(f"{genre}: {count}")
print(f"Total number of songs: {total_songs}")

Genre distribution:
Hip-Hop: 1000
Pop: 1000
Folk: 1000
Experimental: 1000
Rock: 1000
International: 1000
Electronic: 1000
Instrumental: 1000
Total number of songs: 8000


Preprocess Training Data

In [None]:
data = []
labels = []

genre_counts = defaultdict(int)
max_songs_per_genre = 990
target_sr = 16000
chunk_duration = 3
full_song_length = 27
num_chunks = full_song_length // chunk_duration

genre_to_number = {'Electronic': 0, 'Experimental': 1, 'Folk': 2, 'Hip-Hop': 3, 'Instrumental': 4, 'International': 5, 'Pop': 6, 'Rock': 7}

with open(csv_path, 'r') as csvfile:
    for mp3_file in os.listdir(mp3_data_path):
        track_id = mp3_file.split('.')[0].lstrip('0')
        genre = file_genre_map[track_id]

        if genre_counts[genre] >= max_songs_per_genre:
            continue

        try:
            audio, sr = torchaudio.load(os.path.join(mp3_data_path, mp3_file))
            audio = T.Resample(orig_freq=sr, new_freq=target_sr)(audio)
            audio = torch.mean(audio, dim=0)
            if (len(audio) / target_sr) < full_song_length:
                print(f"Skipped short file: {mp3_file}")
                continue
            padded_audio = fix_length(audio, size=target_sr * full_song_length)

            chunk_length = target_sr * chunk_duration
            for i in range(num_chunks):
                start_sample = i * chunk_length
                end_sample = start_sample + chunk_length
                if end_sample > len(padded_audio):
                    break
                audio_chunk = padded_audio[start_sample:end_sample]
                data.append(audio_chunk)
                numeric_label = genre_to_number[genre]
                labels.append(numeric_label)
            genre_counts[genre] += 1

        except Exception as e:
            print(f"Skipped corrupt file: {mp3_file}")

data = np.array(data)
labels = np.array(labels)
print(genre_counts)

Skipped short file: 098565.mp3
Skipped corrupt file: 133297.mp3
Skipped short file: 098569.mp3
Skipped corrupt file: 108925.mp3
Skipped corrupt file: 099134.mp3
Skipped short file: 098567.mp3
defaultdict(<class 'int'>, {'Folk': 990, 'Instrumental': 990, 'Electronic': 990, 'Rock': 990, 'Hip-Hop': 990, 'Pop': 990, 'Experimental': 990, 'International': 990})


In [None]:
training_data, val_data, training_labels, val_labels = train_test_split(data,
                                                                        labels,
                                                                        test_size=0.2,
                                                                        stratify=labels,
                                                                        random_state=42)

del data, labels

with h5py.File('/content/drive/My Drive/Projects/NeuraBeat/Data/torchaudio_train_data.h5', 'w') as f:
    f.create_dataset('data', data=np.array(training_data))
    f.create_dataset('labels', data=np.array(training_labels))

val_data, test_data, val_labels, test_labels = train_test_split(val_data,
                                                                val_labels,
                                                                test_size=0.5,
                                                                stratify=val_labels,
                                                                random_state=42)

with h5py.File('/content/drive/My Drive/Projects/NeuraBeat/Data/torchaudio_val_data.h5', 'w') as f:
    f.create_dataset('data', data=np.array(val_data))
    f.create_dataset('labels', data=np.array(val_labels))

with h5py.File('/content/drive/My Drive/Projects/NeuraBeat/Data/torchaudio_test_data.h5', 'w') as f:
    f.create_dataset('data', data=np.array(test_data))
    f.create_dataset('labels', data=np.array(test_labels))

In [None]:
runtime.unassign()