<a href="https://colab.research.google.com/github/kartiknarayansahoo/deep_learning_specialization/blob/main/deep%20learning%20for%20audio/5.%20music_genre_classification_preparing_the_dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import os
import librosa
import math
import json

DATASET_PATH = "genres_original"
JSON_PATH = "data.json"
SAMPLE_RATE = 22050
DURATION = 30 # given in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE*DURATION

In [6]:

def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
  # dictionary to store data
  data = {
      "mapping":[],
      "mfcc":[],
      "labels":[]
  }

  num_samples_per_segment = int(SAMPLES_PER_TRACK/num_segments)
  expected_num_mfcc_per_segment = math.ceil(num_samples_per_segment/hop_length)

  #loop through all the genres
  for i ,(dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):

    # ensure that we are not at root level
    if dirpath is not dataset_path:

      # save the genre names (semantic labels)
      dirpath_components = dirpath.split("/") # "genre/blues" => ["genre", "blues"]
      semantic_label = dirpath_components[-1]
      data["mapping"].append(semantic_label)
      print("\nProcessing: {} ".format(semantic_label))

      # process files for a specific genre
      for f in filenames:

        #load audio files
        file_path = os.path.join(dirpath, f)
        signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)

        #process segments extracting mfcc and storing data
        for s in range(num_segments):
          start_sample = num_samples_per_segment*s
          finish_sample = start_sample + num_samples_per_segment

          mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample], 
                                      sr=sr,
                                      n_mfcc=n_mfcc,
                                      n_fft=n_fft,
                                      hop_length=hop_length)
          mfcc = mfcc.T

          # store the mfcc if that has the expected length
          if len(mfcc) == expected_num_mfcc_per_segment:
            data["mfcc"].append(mfcc.tolist()) #.tolist() as mfcc is a numpy array which we will not to able to store 
            data["labels"].append(i-1) # i-1 as the first iteration is for the dataset_path
            print("{} segment:{} ".format(file_path, s))
  
  with open(json_path, "w") as fp:
    json.dump(data, fp, indent=4)


In [None]:
if __name__ == "__main__":
  save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)