In [19]:
import librosa
import os
import math
import json
import numpy as np

In [2]:
SAMPLE_RATE = 22050
DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

In [4]:
data_path = 'Data/genres_original'

In [34]:
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048,
             hop_length=512, num_segments=5):
    # Data storage dictionary
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": [],
    }
    samples_ps = int(SAMPLES_PER_TRACK/num_segments) # ps = per segment
    expected_vects_ps = math.ceil(samples_ps/hop_length)
    
    # loop through all the genres
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # ensuring not at root
        if dirpath is not dataset_path:
            # save the semantic label
            dirpath_comp = dirpath.split("/")
            semantic_label = dirpath_comp[-1]
            data["mapping"].append(semantic_label)
            print(f"Processing: {semantic_label}")
            
            # process files for specific genre
            for f in filenames:
                if(f==str("jazz.00054.wav")):
                    # As librosa only read files <1Mb
                    continue
                else:
                    # load audio file
                    file_path = os.path.join(dirpath, f)
                    signal,sr = librosa.load(file_path,sr=SAMPLE_RATE)
                    for s in range(num_segments):
                        start_sample = samples_ps * s
                        finish_sample = start_sample + samples_ps

                        mfcc = librosa.feature.mfcc(y = signal[start_sample:finish_sample],
                                                    sr = sr,
                                                    n_fft = n_fft,
                                                    n_mfcc = n_mfcc,
                                                    hop_length = hop_length)

                        mfcc = mfcc.T

                        # store mfcc if it has expected length 
                        if len(mfcc)==expected_vects_ps:
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i-1)
                            print(f"{file_path}, segment: {s+1}")
    print(data["labels"])
    # with open(json_path,"w") as f:
    #     json.dump(data,f,indent=4)

In [35]:
json_path = r'data.json'
save_mfcc(data_path,json_path,num_segments=10)

[]
['hiphop.00023.wav', 'hiphop.00005.wav', 'hiphop.00038.wav', 'hiphop.00089.wav', 'hiphop.00011.wav', 'hiphop.00050.wav', 'hiphop.00020.wav', 'hiphop.00057.wav', 'hiphop.00025.wav', 'hiphop.00013.wav', 'hiphop.00030.wav', 'hiphop.00079.wav', 'hiphop.00053.wav', 'hiphop.00068.wav', 'hiphop.00060.wav', 'hiphop.00044.wav', 'hiphop.00071.wav', 'hiphop.00058.wav', 'hiphop.00018.wav', 'hiphop.00095.wav', 'hiphop.00034.wav', 'hiphop.00076.wav', 'hiphop.00015.wav', 'hiphop.00032.wav', 'hiphop.00052.wav', 'hiphop.00026.wav', 'hiphop.00008.wav', 'hiphop.00092.wav', 'hiphop.00078.wav', 'hiphop.00086.wav', 'hiphop.00040.wav', 'hiphop.00075.wav', 'hiphop.00002.wav', 'hiphop.00027.wav', 'hiphop.00056.wav', 'hiphop.00085.wav', 'hiphop.00062.wav', 'hiphop.00029.wav', 'hiphop.00039.wav', 'hiphop.00024.wav', 'hiphop.00031.wav', 'hiphop.00066.wav', 'hiphop.00021.wav', 'hiphop.00049.wav', 'hiphop.00098.wav', 'hiphop.00063.wav', 'hiphop.00017.wav', 'hiphop.00081.wav', 'hiphop.00093.wav', 'hiphop.00000.wa

KeyboardInterrupt: 

In [20]:
def load_data(dataset_path):
    with open(dataset_path,"r") as f:
        data = json.load(f)
    
    # Convert list to numpy arrays
    inputs = np.array(data["mfcc"])
    targets = np.array(data["labels"])    
    
    return inputs,targets



In [21]:
inputs,targets = load_data(r"./data.json")

TypeError: object of type 'numpy.float64' has no len()