In [1]:
import librosa
import math
import os
import json
import random


In [2]:
# parameters

DATASET_PATH = "dataset"
JSON_PATH = "data.json"

# AUDIO_SAMPLE_RATE = 44100

SEGMENT_DURATION = 10 # in seconds
NUM_SEGMENTS_PER_TRACK = 20

n_mfcc=13
n_fft=2048
hop_length=512


In [3]:
# get common smallest discography size

sizes = []
print_dirs_contents = False

for root, dirs, files in os.walk(DATASET_PATH):
    if root != DATASET_PATH:
        if print_dirs_contents == True:
            path_components = root.split("/")
            print(f"{path_components[1]} [{len(files)}]")

            for f in files:
                name_components = f.split("-")
                print(f"    - {name_components[1]}")
            
        sizes.append(len(files))

COMMON_SMALLEST_SIZE = min(sizes)
print(f"common smallest size: {COMMON_SMALLEST_SIZE}")


# for root, dirs, files in os.walk(DATASET_PATH):
#     print(root)


common smallest size: 47


In [7]:
# create json dataset

data = {
    "mapping": [], # different artists labels
    "mfcc": [], # training inputs
    "labels": [] # outputs, targets
}

for i, (root, dirs, files) in enumerate(os.walk(DATASET_PATH)):
    if root != DATASET_PATH:
        # save the semantic label - mapping
        path_segments = root.split("/") # dataset / artist
        artist = path_segments[-1]
        data["mapping"].append(artist)
        print(artist)
        
        # process files for each artist
        for j in range(COMMON_SMALLEST_SIZE):
            track = random.choice(files)
            files.remove(track)

            track_path = os.path.join(root, track)
            signal, sr = librosa.load(track_path) # signal, sr = librosa.load(track_path, sr=None)
            track_duration = librosa.get_duration(y=signal, sr=sr, n_fft=n_fft, hop_length=hop_length) # in seconds
            
            num_samples = int(sr * track_duration)
            num_samples_in_segment = int(sr * SEGMENT_DURATION)
            expected_num_mfcc_vectors_in_segment = math.ceil(num_samples_in_segment / hop_length)

            for k in range(NUM_SEGMENTS_PER_TRACK):
                print(f"{i-1} {j} {k}")
                # choose start point
                segment_start = random.randint(0, num_samples)
                # calculate end point
                segment_end = segment_start + num_samples_in_segment
                if segment_end > num_samples:
                    segment_start = segment_start - num_samples_in_segment
                    segment_end = segment_end - num_samples_in_segment
                
                # get mfcc from that segment
                mfcc = librosa.feature.mfcc(y=signal[segment_start:segment_end], sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
                mfcc = mfcc.T
                # save results
                # check expected length
                if len(mfcc) == expected_num_mfcc_vectors_in_segment:
                    data["mfcc"].append(mfcc.tolist()) #np array -> list
                    data["labels"].append(i-1)
                    # print(f"{i-1} {j} {k} {track_path.split('/')[2]}, start: {segment_start}, end: {segment_end}, length: {segment_end - segment_start}")
                else:
                    print("length does not match expected!")

with open(JSON_PATH, "w") as fp:
    json.dump(data, fp, indent=4)

['judas_priest', 'acdc', 'korpiklaani', 'slayer', 'metallica', 'system_of_a_down', 'ensiferum', 'rammstein', 'korn', 'slipknot']
[]
judas_priest
0 0 0
0 0 1
0 0 2
0 0 3
0 0 4
0 0 5
0 0 6
0 0 7
0 0 8
0 0 9
0 0 10
0 0 11
0 0 12
0 0 13
0 0 14
0 0 15
0 0 16
0 0 17
0 0 18
0 0 19
0 1 0
0 1 1
0 1 2
0 1 3
0 1 4
0 1 5
0 1 6
0 1 7
0 1 8
0 1 9
0 1 10
0 1 11
0 1 12
0 1 13
0 1 14
0 1 15
0 1 16
0 1 17
0 1 18
0 1 19
0 2 0
0 2 1
0 2 2
0 2 3
0 2 4
0 2 5
0 2 6
0 2 7
0 2 8
0 2 9
0 2 10
0 2 11
0 2 12
0 2 13
0 2 14
0 2 15
0 2 16
0 2 17
0 2 18
0 2 19
0 3 0
0 3 1
0 3 2
0 3 3
0 3 4
0 3 5
0 3 6
0 3 7
0 3 8
0 3 9
0 3 10
0 3 11
0 3 12
0 3 13
0 3 14
0 3 15
0 3 16
0 3 17
0 3 18
0 3 19
0 4 0
0 4 1
0 4 2
0 4 3
0 4 4
0 4 5
0 4 6
0 4 7
0 4 8
0 4 9
0 4 10
0 4 11
0 4 12
0 4 13
0 4 14
0 4 15
0 4 16
0 4 17
0 4 18
0 4 19


KeyboardInterrupt: 