### Import Packages:

In [1]:
from IPython.display import Audio
import matplotlib.pyplot as plt
import librosa.display as lrd
import librosa as lr
import numpy as np
import warnings
import sklearn
import random
import json
import time

In [2]:
warnings.filterwarnings('ignore')

In [3]:
root = '.'

### Load Utils:

In [4]:
filepath2songid = None
with open('./preprocessed/filepath2songid.json', 'r') as file:
    filepath2songid = json.load(file)

songid2filepath = None
with open('./preprocessed/songid2filepath.json', 'r') as file:
    songid2filepath = json.load(file)

songid2genre = None
with open('./preprocessed/songid2genre.json', 'r') as file:
    songid2genre = json.load(file)
    
genre2genreid = None
with open('./preprocessed/genre2genreid.json', 'r') as file:
    genre2genreid = json.load(file)

genreid2genre = None
with open('./preprocessed/genreid2genre.json', 'r') as file:
    genreid2genre = json.load(file)

### Blacklisted: (Corrupt Files Deleted Manually)

In [5]:
blacklists = [
    root + '/fma_small/099/099134.mp3',
    root + '/fma_small/108/108925.mp3',
    root + '/fma_small/133/133297.mp3',
    root + '/fma_small/098/098567.mp3',
    root + '/fma_small/098/098569.mp3',
    root + '/fma_small/011/011298.mp3',
    root +'/fma_small/098/098565.mp3'
]

### Loading Songs:

In [6]:
from pathlib import Path

In [7]:
for sid in list(songid2genre.keys()):
    if songid2filepath[sid] not in blacklists:
        if (Path(songid2filepath[sid]).stat().st_size / 1024) < 400:
            blacklists += [songid2filepath[sid]]

### Separating Genres:

In [8]:
genre2songlist = {}

for i in genreid2genre.keys():
    genre2songlist.update({int(i): []})

In [9]:
for sid in songid2filepath.keys():
    if songid2filepath[sid] not in blacklists:
        genre2songlist[genre2genreid[songid2genre[sid]]] += [sid]

### Creating 1000 tracks for each genre:

In [10]:
req = []
for i in range(8):
    req += [1000 - len(genre2songlist[i])]

In [11]:
c = 0
for g in genre2songlist.keys():
    for i in range(req[c]):
        genre2songlist[g] += [genre2songlist[g][np.random.randint(0, 1000 - req[c])]]
    c += 1

In [12]:
req = []
for i in range(8):
    req += [1000 - len(genre2songlist[i])]

### Shuffling The lists:

In [13]:
for g in genre2songlist:
    random.shuffle(genre2songlist[g])

### Padding Tracks:

In [14]:
def pad_tracks(track, sr, duration):
    output = None
    t = sr * duration
    if track.shape[0] <= t:
        a = (t - track.shape[0]) // 2
        b = (t - track.shape[0]) - ((t - track.shape[0]) // 2)
        output = np.concatenate([np.array([0]*a), track, np.array([0]*b)])
    else:
        output = track[0:t]
    return output

### Audio >> MFCC: (genre-wise)

In [None]:
for g in genre2songlist:

    mfcc_features = []
    
    tic = time.time()
    toc = time.time()

    count = 0
    for sid in genre2songlist[g]:

        if count % 250 == 0:
            toc = time.time()
            print('Songs Loaded: {:04d}.'.format(count), 'Load time for 100 songs: {:03d}.'.format(int(toc-tic)), 'sec')
            tic = time.time()

        if songid2filepath[sid] not in blacklists:
            temp = lr.load(songid2filepath[sid], sr=44100)
            mfcc_features += [lr.feature.spectral.mfcc(pad_tracks(temp[0], 44100, 30), sr=44100, hop_length=441)[:, 0:3000]]
        count += 1
    
    # saving mfcc in numpy format:
    np.save('./acoustic_features/' + str(g) + '.npy', np.array(mfcc_features), allow_pickle=False)
    
    mfcc_normalized = []
    
    # creating mfcc normalized:
    for mfcc in mfcc_features:
        mfcc_normalized += [sklearn.preprocessing.scale(mfcc, axis=1)]

    # saving normalized mfcc in numpy format:
    np.save('./acoustic_features/' + str(g) + 'n' + '.npy', np.array(mfcc_normalized), allow_pickle=False)

### Visualization: