# Preprocessing on the Music dataset
- extract the fetures (input, targets, labels, MFCCs)
- store them in json file

In [None]:
import json
import os
import math
import librosa

DATASET_PATH = "/content/drive/MyDrive/audio-genre-classification-main/dataset/genres_original"
JSON_PATH = "data_10.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION


def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    """Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels.

        :param dataset_path (str): Path to dataset
        :param json_path (str): Path to json file used to save MFCCs
        :param num_mfcc (int): Number of coefficients to extract
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": [],
        "chroma": [],
        "scen" : [],
        "scon": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # loop through all genre sub-folder
    #jj = 1
    
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # jj += 1
        # if jj == 5 : 
        #     break 
        
        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:
          #print("starting loop")
        

          # save genre label (i.e., sub-folder name) in the mapping
          semantic_label = dirpath.split("/")[-1]
          data["mapping"].append(semantic_label)
          
          print('\n================================================')
              #print('PREPARING: Genre: {}'.format(i))
          print("Processing: {}".format(semantic_label))

          #process all audio files in genre sub-dir
          #kk = 1
          for f in filenames:
              # kk += 1
              # if (kk == 5): break

              

              # load audio file
              file_path = os.path.join(dirpath, f)
              signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

              # process all segments of audio file
              for d in range(num_segments):

                  # calculate start and finish sample for current segment
                  start = samples_per_segment * d
                  finish = start + samples_per_segment

                  #extract mfcc
                  mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                  mfcc = mfcc.T
                  
                  #Chroma STFT
                  chroma = librosa.feature.chroma_stft(signal[start:finish], sample_rate, n_chroma=12, n_fft=n_fft, hop_length=hop_length)
                  chroma = chroma.T
                  
                  # Spectral Centroid:
                  scen = librosa.feature.spectral_centroid(signal[start:finish], sample_rate, n_fft=n_fft, hop_length=hop_length)
                  scen = scen.T
                
                  # Spectral Contrast:
                  scon = librosa.feature.spectral_contrast(signal[start:finish], sample_rate, n_fft=n_fft, hop_length=hop_length)
                  scon = scon.T
                  
    

                  # store feature with expected number of vectors
                  if len(mfcc) == num_mfcc_vectors_per_segment:
                      chroma_list =chroma.flatten().tolist()
                      scen_list =scen.flatten().tolist()
                      scon_list =scon.flatten().tolist()

                      data["mfcc"].append(mfcc.tolist())
                      data["chroma"].append(chroma_list)
                      data["scen"].append(scen_list)
                      data["scon"].append(scon_list)

                      data["labels"].append(i-1)
                #print("{}, segment:{}".format(file_path, d+1))
          print("Completed: {}".format(semantic_label))
  
    # save features to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        
        
if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)


In [None]:

DATASET_PATH = "Data/genres_original/"
JSON_PATH = "data_10.json"
SAMPLE_RATE = 22050
TRACK_DURATION = 30 # measured in seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION


def save_mfcc(dataset_path, json_path, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    """Extracts MFCCs from music dataset and saves them into a json file along witgh genre labels.

        :param dataset_path (str): Path to dataset
        :param json_path (str): Path to json file used to save MFCCs
        :param num_mfcc (int): Number of coefficients to extract
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],
        "labels": [],
        "mfcc": [],
        "chroma": [],
        "scen" : [],
        "scon": []
    }

    samples_per_segment = int(SAMPLES_PER_TRACK / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    # loop through all genre sub-folder
    #jj = 1
    
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # jj += 1
        # if jj == 5 : 
        #     break 
        
        # ensure we're processing a genre sub-folder level
        if dirpath is not dataset_path:
          #print("starting loop")
        

          # save genre label (i.e., sub-folder name) in the mapping
          semantic_label = dirpath.split("/")[-1]
          data["mapping"].append(semantic_label)
          
          print('\n================================================')
              #print('PREPARING: Genre: {}'.format(i))
          print("Processing: {}".format(semantic_label))

          #process all audio files in genre sub-dir
          #kk = 1
          for f in filenames:
              # kk += 1
              # if (kk == 5): break

              

              # load audio file
              file_path = os.path.join(dirpath, f)
              signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

              # process all segments of audio file
              for d in range(num_segments):

                  # calculate start and finish sample for current segment
                  start = samples_per_segment * d
                  finish = start + samples_per_segment

                  #extract mfcc
                  mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                  mfcc = mfcc.T
                  
                  #Chroma STFT
                  chroma = librosa.feature.chroma_stft(signal[start:finish], sample_rate, n_chroma=12, n_fft=n_fft, hop_length=hop_length)
                  chroma = chroma.T
                  
                  # Spectral Centroid:
                  scen = librosa.feature.spectral_centroid(signal[start:finish], sample_rate, n_fft=n_fft, hop_length=hop_length)
                  scen = scen.T
                
                  # Spectral Contrast:
                  scon = librosa.feature.spectral_contrast(signal[start:finish], sample_rate, n_fft=n_fft, hop_length=hop_length)
                  scon = scon.T
                  
    

                  # store feature with expected number of vectors
                  if len(mfcc) == num_mfcc_vectors_per_segment:
                      chroma_list =chroma.flatten().tolist()
                      scen_list =scen.flatten().tolist()
                      scon_list =scon.flatten().tolist()

                      data["mfcc"].append(mfcc.tolist())
                      data["chroma"].append(chroma_list)
                      data["scen"].append(scen_list)
                      data["scon"].append(scon_list)

                      data["labels"].append(i-1)
                #print("{}, segment:{}".format(file_path, d+1))
          print("Completed: {}".format(semantic_label))
  
    # save features to json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        
        
if __name__ == "__main__":
    save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)
