# MusicAI
## Sztuczna Inteligencja - projekt 
### Część 2 - przygotowanie danych
Autorzy: Jakub Ochnik, Adam Karabiniewicz, Marcel Bieniek
___


# Part 2: preparing the dataset

Importing necessary libraries and packages

In [15]:
import librosa, librosa.display # api for visualizing data
import math
import os
import json

Defining constants

In [16]:
DATASET_PATH = "Data\\genres_original\\"
JSON_PATH = "Data\\data.json"

SAMPLE_RATE = 22050
DURATION = 30 # seconds
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

A function which generates a JSON dataset

In [17]:
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=5):
    #num_segments: chop each track to 5 segments and treat each segment as a different track
    #build a dictionary to store data
    data = {
        "mapping": [], # mapping different genre labels
        "mfcc": [], # training inputs
        "labels": [] # outputs, targets
    }

    num_samples_per_segment = SAMPLES_PER_TRACK // num_segments
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) # todo

    # loop through all the genres
    for i, (dirpath, dirname, filenames) in enumerate(os.walk(dataset_path)):

        # ensure that we're not at the root level
        if dirpath != dataset_path:
            # save the semantic label - mapping
            dirpath_components = dirpath.split("/") # genre/blues => ["genre, "blues"]
            semantic_label = dirpath_components[-1]
            data["mapping"].append(semantic_label)
            print("\nProcessing {}".format(semantic_label))

            # process files for a specific genre
            for f in filenames:
                # load audio file
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr = SAMPLE_RATE)               
                
                # divide into segments
                for s in range(num_segments):
                    start_sample = num_samples_per_segment * s
                    finish_sample = start_sample + num_samples_per_segment

                    mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample], sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
                    mfcc = mfcc.T

                    # store mfcc for segment if it has the expected length
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        data["mfcc"].append(mfcc.tolist()) #conv. from np array to list
                        data["labels"].append(i-1)
                        print("{}, segment: {}".format(file_path, s))
    
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)

Generate the dataset (careful, almost 20 million lines, 600+ MB)

In [19]:
save_mfcc(DATASET_PATH, JSON_PATH, num_segments=10)

62.wav, segment: 2
Data\genres_original\rock\rock.00062.wav, segment: 3
Data\genres_original\rock\rock.00062.wav, segment: 4
Data\genres_original\rock\rock.00062.wav, segment: 5
Data\genres_original\rock\rock.00062.wav, segment: 6
Data\genres_original\rock\rock.00062.wav, segment: 7
Data\genres_original\rock\rock.00062.wav, segment: 8
Data\genres_original\rock\rock.00062.wav, segment: 9
Data\genres_original\rock\rock.00063.wav, segment: 0
Data\genres_original\rock\rock.00063.wav, segment: 1
Data\genres_original\rock\rock.00063.wav, segment: 2
Data\genres_original\rock\rock.00063.wav, segment: 3
Data\genres_original\rock\rock.00063.wav, segment: 4
Data\genres_original\rock\rock.00063.wav, segment: 5
Data\genres_original\rock\rock.00063.wav, segment: 6
Data\genres_original\rock\rock.00063.wav, segment: 7
Data\genres_original\rock\rock.00063.wav, segment: 8
Data\genres_original\rock\rock.00063.wav, segment: 9
Data\genres_original\rock\rock.00064.wav, segment: 0
Data\genres_original\rock\r