## Importing libraries

In [13]:
import json
import os
import math
import librosa

## save_mfcc Function

The `save_mfcc` takes several parameters:
- `dataset_path`: The directory path of audio files,
- `sample_rate`: The sampling rate for audio processing,
- `track_duration`: The duration of each audio track in seconds,
- `json_path`: File path to save the output in JSON format,
- `num_mfcc`: Number of MFCC features to extract,
- `n_fft`: The window size for the Fourier Transform,
- `hop_length`: The hop length for STFT,
- `num_segments`: The number of segments to divide each audio track,

These parameters allow customization of the MFCC extraction process to suit different datasets and requirements.


In [14]:
def save_mfcc(dataset_path, sample_rate, track_duration, json_path, num_mfcc, n_fft, hop_length, num_segments):
    data = {"mapping": [], "labels": [], "mfcc": []}
    samples_per_track = sample_rate * track_duration
    samples_per_segment = int(samples_per_track / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)

    for i, (current_directory, _, audio_files) in enumerate(os.walk(dataset_path)):
        if current_directory != dataset_path:
            genre_label = os.path.basename(current_directory)
            data["mapping"].append(genre_label)
            for audio_file_name in audio_files:
                file_path = os.path.join(current_directory, audio_file_name)
                try:
                    signal, _ = librosa.load(file_path, sr=sample_rate)
                    for segment_index in range(num_segments):
                        segment_start = samples_per_segment * segment_index
                        segment_end = segment_start + samples_per_segment
                        mfcc_coefficients = librosa.feature.mfcc(
                            y=signal[segment_start:segment_end],
                            sr=sample_rate,
                            n_mfcc=num_mfcc,
                            n_fft=n_fft,
                            hop_length=hop_length).T
                        if len(mfcc_coefficients) == num_mfcc_vectors_per_segment:
                            data["mfcc"].append(mfcc_coefficients.tolist())
                            data["labels"].append(i - 1)
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
            print(f"{genre_label} - done")

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
    print(mfcc_coefficients.shape)

## Running the MFCC Extraction

The `save_mfcc` function is executed with specific parameters for our dataset. The output will be a JSON file that contains MFCCs for each segment of the audio files in the dataset.


In [15]:
save_mfcc(dataset_path='gztan_dataset/genres_original',
          sample_rate=22050,
          track_duration=30,
          json_path='git.json',
          num_mfcc=13,
          n_fft=2048,
          hop_length=512,
          num_segments=10)

blues - done
classical - done
country - done
disco - done
hiphop - done
jazz - done
metal - done
pop - done
reggae - done
rock - done
(130, 13)
