In [None]:
import json
import os
import math
import librosa

In [None]:
data_loc = "/Users/Aysha/Downloads/Data/genres_original/data"
json_loc = "/Users/Aysha/Downloads/Data/genres_original/data/data_10.json"
sample_number = 22050 # number of samples taken from the dataset
song_time = 30 # this is the number of seconds of the song
samples_per_song = sample_number * song_time

In [None]:
# the process of this code is to extract all the necessary information from the data file

def audio_features(data_loc, json_loc, features_num=13, fft_num=2048, hop_length=512, segments_num=5):
    
    # dictionary to store information
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": []
    }

    samples_per_segment = int(samples_per_song / segments_num)
    features_num_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    
    # go through info in sub-folders
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(data_loc)):

        if dirpath is not data_loc:

            # save genre name in the "mapping" portion of dictionary
            genre_label = dirpath.split("/")[-1]
            data["mapping"].append(genre_label)
            print("\nProcessing: {}".format(genre_label))

            
            # get the audio file info from sub-folders
            for f in filenames:

                file_path = os.path.join(dirpath, f)
                signal, sample_number = librosa.load(file_path, sr=sample_number)

                
                for d in range(segments_num):

                    # get the start time and the finish time for when looking at segments
                    start = samples_per_segment * d
                    finish = start + samples_per_segment

                    # get the features
                    features = librosa.feature.features(signal[start:finish], sample_rate, n_features=features_num, fft_num=fft_num, hop_length=hop_length)
                    features = features.T

                    # only keep the features with a certain number of vectors
                    if len(features) == features_num_vectors_per_segment:
                        data["mfcc"].append(features.tolist())
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, d+1))

    # save featuress to json file
    with open(json_loc, "w") as fp:
        json.dump(data, fp, indent=4)
        
        
if __name__ == "__main__":
    save_features(data_loc, json_loc, segments_num=10)    