In [15]:
# imports
import os
import numpy as np
import librosa
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

# Unzip file

In [None]:
# importing required modules
from zipfile import ZipFile
  
# specifying the zip file name
file_name = "../data/fma_small.zip"
  
# opening the zip file in READ mode
with ZipFile(file_name, 'r') as zip:
    # printing all the contents of the zip file
    zip.printdir()
  
    # extracting all the files
    print('Extracting all the files now...')
    zip.extractall()
    print('Done!')

# Data transformation

In [2]:
AUDIO_DIR = 'Music-Genre-Classification/data/sample'

In [16]:
def get_tids_from_directory(audio_dir):
    """Get track IDs from the mp3s in a directory.
    Parameters
    ----------
    audio_dir : str
        Path to the directory where the audio files are stored.
    Returns
    -------
        A list of track IDs.
    """
    tids = []
    for _, dirnames, files in os.walk(audio_dir):
        if dirnames == []:
            tids.extend(int(file[:-4]) for file in files)
    return tids

def get_audio_path(audio_dir, track_id):
    """
    Return the path to the mp3 given the directory where the audio is stored
    and the track ID.
    Examples
    --------
    >>> import utils
    >>> AUDIO_DIR = os.environ.get('AUDIO_DIR')
    >>> utils.get_audio_path(AUDIO_DIR, 2)
    '../data/fma_small/000/000002.mp3'
    """
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

def create_spectogram(track_id):
    filename = get_audio_path(AUDIO_DIR, track_id)
    y, sr = librosa.load(filename)
    spect = librosa.feature.melspectrogram(y=y, sr=sr,n_fft=2048, hop_length=1024)
    spect = librosa.power_to_db(spect, ref=np.max)
    return spect.T

def plot_spect(track_id):
    spect = create_spectogram(track_id)
    print(spect.shape)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spect.T, y_axis='mel', fmax=8000, x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.show()
    

dict_genres = {'Electronic':0, 'Experimental':1, 'Folk':2, 'Hip-Hop':3, 
               'Instrumental':4,'International':5, 'Pop' :6, 'Rock': 7}

def create_array(df):
    genres = []
    X_spect = np.empty((0, 640, 128))
    count = 0
    #Code skips records in case of errors
    for index, row in df.iterrows():
        try:
            count += 1
            track_id = int(row['track_id'])
            genre = str(row[('track', 'genre_top')])
            spect = create_spectogram(track_id)

            # Normalize for small shape differences
            spect = spect[:640, :]
            X_spect = np.append(X_spect, [spect], axis=0)
            genres.append(dict_genres[genre])
            if count % 100 == 0:
                print("Currently processing: ", count)
        except:
            print("Couldn't process: ", count)
            continue
    y_arr = np.array(genres)
    return X_spect, y_arr

In [13]:
filepath = '../data/fma_metadata/tracks.csv'
tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])
keep_cols = [('set', 'split'),
('set', 'subset'),('track', 'genre_top')]

df_all = tracks[keep_cols]
df_all = df_all[df_all[('set', 'subset')] == 'small']

df_all['track_id'] = df_all.index
df_all.head()

Unnamed: 0_level_0,set,set,track,track_id
Unnamed: 0_level_1,split,subset,genre_top,Unnamed: 4_level_1
track_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2,training,small,Hip-Hop,2
5,training,small,Hip-Hop,5
10,training,small,Pop,10
140,training,small,Folk,140
141,training,small,Folk,141


In [14]:
df_train = df_all[df_all[('set', 'split')]=='training']
df_valid = df_all[df_all[('set', 'split')]=='validation']
df_test = df_all[df_all[('set', 'split')]=='test']

print(df_train.shape, df_valid.shape, df_test.shape)

(6400, 4) (800, 4) (800, 4)


In [17]:
X_test, y_test = create_array(df_test)

print(X_test.shape, y_test.shape)

20it [00:23,  2.17it/s]

Couldn't process:  16
Couldn't process:  17
Couldn't process:  18
Couldn't process:  19
Couldn't process:  20


25it [00:23,  4.76it/s]

Couldn't process:  21
Couldn't process:  22
Couldn't process:  23
Couldn't process:  24
Couldn't process:  25


30it [00:23,  8.05it/s]

Couldn't process:  26
Couldn't process:  27
Couldn't process:  28
Couldn't process:  29
Couldn't process:  30


35it [00:24, 11.95it/s]

Couldn't process:  31
Couldn't process:  32
Couldn't process:  33
Couldn't process:  34
Couldn't process:  35


38it [00:24, 13.95it/s]

Couldn't process:  36
Couldn't process:  37
Couldn't process:  38
Couldn't process:  39
Couldn't process:  40


44it [00:24, 17.10it/s]

Couldn't process:  41
Couldn't process:  42
Couldn't process:  43
Couldn't process:  44
Couldn't process:  45


47it [00:24, 18.04it/s]

Couldn't process:  46
Couldn't process:  47
Couldn't process:  48
Couldn't process:  49
Couldn't process:  

52it [00:24, 18.60it/s]

50
Couldn't process:  51
Couldn't process:  52
Couldn't process:  53
Couldn't process:  54


58it [00:25, 19.72it/s]

Couldn't process:  55
Couldn't process:  56
Couldn't process:  57
Couldn't process:  58
Couldn't process:  59


64it [00:25, 19.68it/s]

Couldn't process:  60
Couldn't process:  61
Couldn't process:  62
Couldn't process:  63
Couldn't process:  64


69it [00:25, 20.20it/s]

Couldn't process:  65
Couldn't process:  66
Couldn't process:  67
Couldn't process:  68
Couldn't process:  69


72it [00:25, 20.46it/s]

Couldn't process:  70
Couldn't process:  71
Couldn't process:  72
Couldn't process:  73
Couldn't process:  74


78it [00:26, 20.64it/s]

Couldn't process:  75
Couldn't process:  76
Couldn't process:  77
Couldn't process:  78
Couldn't process:  79


84it [00:26, 20.75it/s]

Couldn't process:  80
Couldn't process:  81
Couldn't process:  82
Couldn't process:  83
Couldn't process:  84


87it [00:26, 20.82it/s]

Couldn't process:  85
Couldn't process:  86
Couldn't process:  87
Couldn't process:  88
Couldn't process:  89


93it [00:26, 19.93it/s]

Couldn't process:  90
Couldn't process:  91
Couldn't process:  92
Couldn't process:  93


98it [00:27, 20.06it/s]

Couldn't process:  94
Couldn't process:  95
Couldn't process:  96
Couldn't process:  97
Couldn't process:  98


101it [00:27, 19.80it/s]

Couldn't process:  99
Couldn't process:  100
Couldn't process:  101
Couldn't process:  102
Couldn't process:  103


107it [00:27, 20.27it/s]

Couldn't process:  104
Couldn't process:  105
Couldn't process:  106
Couldn't process:  107
Couldn't process:  108


113it [00:27, 20.24it/s]

Couldn't process:  109
Couldn't process:  110
Couldn't process:  111
Couldn't process:  112
Couldn't process:  113


116it [00:28, 20.47it/s]

Couldn't process:  114
Couldn't process:  115
Couldn't process:  116
Couldn't process:  117
Couldn't process:  118


122it [00:28, 20.49it/s]

Couldn't process:  119
Couldn't process:  120
Couldn't process:  121
Couldn't process:  122
Couldn't process:  123


128it [00:28, 20.67it/s]

Couldn't process:  124
Couldn't process:  125
Couldn't process:  126
Couldn't process:  127
Couldn't process:  128


131it [00:28, 20.84it/s]

Couldn't process:  129
Couldn't process:  130
Couldn't process:  131
Couldn't process:  132
Couldn't process:  133


137it [00:29, 20.69it/s]

Couldn't process:  134
Couldn't process:  135
Couldn't process:  136
Couldn't process:  137


140it [00:29, 20.23it/s]

Couldn't process:  138
Couldn't process:  139
Couldn't process:  140
Couldn't process:  141
Couldn't process:  142


146it [00:29, 20.34it/s]

Couldn't process:  143
Couldn't process:  144
Couldn't process:  145
Couldn't process:  146
Couldn't process:  147


152it [00:29, 20.41it/s]

Couldn't process:  148
Couldn't process:  149
Couldn't process:  150
Couldn't process:  151
Couldn't process:  152


155it [00:29, 20.37it/s]

Couldn't process:  153
Couldn't process:  154
Couldn't process:  155
Couldn't process:  156
Couldn't process:  157


161it [00:30, 20.54it/s]

Couldn't process:  158
Couldn't process:  159
Couldn't process:  160
Couldn't process:  161
Couldn't process:  162


167it [00:30, 20.58it/s]

Couldn't process:  163
Couldn't process:  164
Couldn't process:  165
Couldn't process:  166
Couldn't process:  167


170it [00:30, 20.59it/s]

Couldn't process:  168
Couldn't process:  169
Couldn't process:  170
Couldn't process:  171
Couldn't process:  172


176it [00:30, 20.63it/s]

Couldn't process:  173
Couldn't process:  174
Couldn't process:  175
Couldn't process:  176
Couldn't process:  177


182it [00:31, 20.98it/s]

Couldn't process:  178
Couldn't process:  179
Couldn't process:  180
Couldn't process:  181
Couldn't process:  182


185it [00:31, 21.16it/s]

Couldn't process:  183
Couldn't process:  184
Couldn't process:  185
Couldn't process:  186
Couldn't process:  187


191it [00:31, 21.36it/s]

Couldn't process:  188
Couldn't process:  189
Couldn't process:  190
Couldn't process:  191
Couldn't process:  192


197it [00:31, 21.43it/s]

Couldn't process:  193
Couldn't process:  194
Couldn't process:  195
Couldn't process:  196
Couldn't process:  197


200it [00:32, 21.28it/s]

Couldn't process:  198
Couldn't process:  199
Couldn't process:  200
Couldn't process:  201
Couldn't process:  202


206it [00:32, 21.35it/s]

Couldn't process:  203
Couldn't process:  204
Couldn't process:  205
Couldn't process:  206
Couldn't process:  207


212it [00:32, 21.28it/s]

Couldn't process:  208
Couldn't process:  209
Couldn't process:  210
Couldn't process:  211
Couldn't process:  212


215it [00:32, 21.25it/s]

Couldn't process:  213
Couldn't process:  214
Couldn't process:  215
Couldn't process:  216
Couldn't process:  217


221it [00:33, 21.23it/s]

Couldn't process:  218
Couldn't process:  219
Couldn't process:  220
Couldn't process:  221
Couldn't process:  222


227it [00:33, 20.79it/s]

Couldn't process:  223
Couldn't process:  224
Couldn't process:  225
Couldn't process:  226
Couldn't process:  227


230it [00:33, 20.67it/s]

Couldn't process:  228
Couldn't process:  229
Couldn't process:  230
Couldn't process:  231
Couldn't process:  232


236it [00:33, 20.48it/s]

Couldn't process:  233
Couldn't process:  234
Couldn't process:  235
Couldn't process:  236
Couldn't process:  237


242it [00:34, 20.71it/s]

Couldn't process:  238
Couldn't process:  239
Couldn't process:  240
Couldn't process:  241
Couldn't process:  242


245it [00:34, 20.60it/s]

Couldn't process:  243
Couldn't process:  244
Couldn't process:  245
Couldn't process:  246
Couldn't process:  247


251it [00:34, 20.73it/s]

Couldn't process:  248
Couldn't process:  249
Couldn't process:  250
Couldn't process:  251
Couldn't process:  252


257it [00:34, 20.50it/s]

Couldn't process:  253
Couldn't process:  254
Couldn't process:  255
Couldn't process:  256
Couldn't process:  257


260it [00:34, 20.68it/s]

Couldn't process:  258
Couldn't process:  259
Couldn't process:  260
Couldn't process:  261
Couldn't process:  262


266it [00:35, 20.88it/s]

Couldn't process:  263
Couldn't process:  264
Couldn't process:  265
Couldn't process:  266
Couldn't process:  267


272it [00:35, 21.02it/s]

Couldn't process:  268
Couldn't process:  269
Couldn't process:  270
Couldn't process:  271
Couldn't process:  272


275it [00:35, 20.93it/s]

Couldn't process:  273
Couldn't process:  274
Couldn't process:  275
Couldn't process:  276
Couldn't process:  277


281it [00:36, 20.79it/s]

Couldn't process:  278
Couldn't process:  279
Couldn't process:  280
Couldn't process:  281
Couldn't process:  282


287it [00:36, 20.61it/s]

Couldn't process:  283
Couldn't process:  284
Couldn't process:  285
Couldn't process:  286
Couldn't process:  287


290it [00:36, 20.76it/s]

Couldn't process:  288
Couldn't process:  289
Couldn't process:  290
Couldn't process:  291
Couldn't process:  292


296it [00:36, 20.44it/s]

Couldn't process:  293
Couldn't process:  294
Couldn't process:  295
Couldn't process:  296
Couldn't process:  297


299it [00:36, 20.40it/s]

Couldn't process:  298
Couldn't process:  299
Couldn't process:  300
Couldn't process:  301


305it [00:37, 20.61it/s]

Couldn't process:  302
Couldn't process:  303
Couldn't process:  304
Couldn't process:  305
Couldn't process:  306


311it [00:37, 20.40it/s]

Couldn't process:  307
Couldn't process:  308
Couldn't process:  309
Couldn't process:  310
Couldn't process:  311


314it [00:37, 20.53it/s]

Couldn't process:  312
Couldn't process:  313
Couldn't process:  314
Couldn't process:  315
Couldn't process:  316


320it [00:37, 20.62it/s]

Couldn't process:  317
Couldn't process:  318
Couldn't process:  319
Couldn't process:  320
Couldn't process:  321


326it [00:38, 20.42it/s]

Couldn't process:  322
Couldn't process:  323
Couldn't process:  324
Couldn't process:  325
Couldn't process:  326


329it [00:38, 20.48it/s]

Couldn't process:  327
Couldn't process:  328
Couldn't process:  329
Couldn't process:  330
Couldn't process:  331


335it [00:38, 20.82it/s]

Couldn't process:  332
Couldn't process:  333
Couldn't process:  334
Couldn't process:  335
Couldn't process:  336


341it [00:38, 20.80it/s]

Couldn't process:  337
Couldn't process:  338
Couldn't process:  339
Couldn't process:  340
Couldn't process:  341


344it [00:39, 20.47it/s]

Couldn't process:  342
Couldn't process:  343
Couldn't process:  344
Couldn't process:  345
Couldn't process:  346


350it [00:39, 20.69it/s]

Couldn't process:  347
Couldn't process:  348
Couldn't process:  349
Couldn't process:  350
Couldn't process:  351


356it [00:39, 20.60it/s]

Couldn't process:  352
Couldn't process:  353
Couldn't process:  354
Couldn't process:  355
Couldn't process:  356


359it [00:39, 20.59it/s]

Couldn't process:  357
Couldn't process:  358
Couldn't process:  359
Couldn't process:  360
Couldn't process:  361


365it [00:40, 20.15it/s]

Couldn't process:  362
Couldn't process:  363
Couldn't process:  364
Couldn't process:  365


368it [00:40, 20.21it/s]

Couldn't process:  366
Couldn't process:  367
Couldn't process:  368
Couldn't process:  369
Couldn't process:  370


374it [00:40, 20.51it/s]

Couldn't process:  371
Couldn't process:  372
Couldn't process:  373
Couldn't process:  374
Couldn't process:  375


380it [00:40, 20.89it/s]

Couldn't process:  376
Couldn't process:  377
Couldn't process:  378
Couldn't process:  379
Couldn't process:  380


383it [00:40, 20.97it/s]

Couldn't process:  381
Couldn't process:  382
Couldn't process:  383
Couldn't process:  384
Couldn't process:  385


389it [00:41, 20.79it/s]

Couldn't process:  386
Couldn't process:  387
Couldn't process:  388
Couldn't process:  389
Couldn't process:  390


395it [00:41, 20.84it/s]

Couldn't process:  391
Couldn't process:  392
Couldn't process:  393
Couldn't process:  394
Couldn't process:  395


398it [00:41, 20.75it/s]

Couldn't process:  396
Couldn't process:  397
Couldn't process:  398
Couldn't process:  399
Couldn't process:  400


404it [00:41, 20.83it/s]

Couldn't process:  401
Couldn't process:  402
Couldn't process:  403
Couldn't process:  404
Couldn't process:  405


410it [00:42, 20.98it/s]

Couldn't process:  406
Couldn't process:  407
Couldn't process:  408
Couldn't process:  409
Couldn't process:  410


413it [00:42, 20.83it/s]

Couldn't process:  411
Couldn't process:  412
Couldn't process:  413
Couldn't process:  414
Couldn't process:  415


419it [00:42, 20.13it/s]

Couldn't process:  416
Couldn't process:  417
Couldn't process:  418
Couldn't process:  419
Couldn't process:  420


425it [00:43, 20.30it/s]

Couldn't process:  421
Couldn't process:  422
Couldn't process:  423
Couldn't process:  424
Couldn't process:  425


428it [00:43, 20.10it/s]

Couldn't process:  426
Couldn't process:  427
Couldn't process:  428
Couldn't process:  429
Couldn't process:  430


434it [00:43, 20.11it/s]

Couldn't process:  431
Couldn't process:  432
Couldn't process:  433
Couldn't process:  434
Couldn't process:  435


440it [00:43, 20.03it/s]

Couldn't process:  436
Couldn't process:  437
Couldn't process:  438
Couldn't process:  439
Couldn't process:  440


443it [00:43, 19.31it/s]

Couldn't process:  441
Couldn't process:  442
Couldn't process:  443
Couldn't process:  444


447it [00:44, 19.08it/s]

Couldn't process:  445
Couldn't process:  446
Couldn't process:  447
Couldn't process:  448


452it [00:44, 19.37it/s]

Couldn't process:  449
Couldn't process:  450
Couldn't process:  451
Couldn't process:  452


456it [00:44, 19.46it/s]

Couldn't process:  453
Couldn't process:  454
Couldn't process:  455
Couldn't process:  456


460it [00:44, 19.24it/s]

Couldn't process:  457
Couldn't process:  458
Couldn't process:  459
Couldn't process:  460


464it [00:45, 19.52it/s]

Couldn't process:  461
Couldn't process:  462
Couldn't process:  463
Couldn't process:  464
Couldn't process:  465


469it [00:45, 19.66it/s]

Couldn't process:  466
Couldn't process:  467
Couldn't process:  468
Couldn't process:  469


473it [00:45, 19.24it/s]

Couldn't process:  470
Couldn't process:  471
Couldn't process:  472
Couldn't process:  473
Couldn't process:  474


479it [00:45, 19.93it/s]

Couldn't process:  475
Couldn't process:  476
Couldn't process:  477
Couldn't process:  478
Couldn't process:  479


484it [00:46, 20.05it/s]

Couldn't process:  480
Couldn't process:  481
Couldn't process:  482
Couldn't process:  483
Couldn't process:  484


488it [00:46, 19.76it/s]

Couldn't process:  485
Couldn't process:  486
Couldn't process:  487
Couldn't process:  488


493it [00:46, 19.71it/s]

Couldn't process:  489
Couldn't process:  490
Couldn't process:  491
Couldn't process:  492
Couldn't process:  493


496it [00:46, 20.06it/s]

Couldn't process:  494
Couldn't process:  495
Couldn't process:  496
Couldn't process:  497
Couldn't process:  498


502it [00:46, 20.46it/s]

Couldn't process:  499
Couldn't process:  500
Couldn't process:  501
Couldn't process:  502
Couldn't process:  503


505it [00:47, 20.36it/s]

Couldn't process:  504
Couldn't process:  505
Couldn't process:  506
Couldn't process:  507


510it [00:47, 19.55it/s]

Couldn't process:  508
Couldn't process:  509
Couldn't process:  510
Couldn't process:  511


515it [00:47, 19.50it/s]

Couldn't process:  512
Couldn't process:  513
Couldn't process:  514
Couldn't process:  515
Couldn't process:  516


521it [00:47, 20.15it/s]

Couldn't process:  517
Couldn't process:  518
Couldn't process:  519
Couldn't process:  520
Couldn't process:  521


524it [00:48, 19.71it/s]

Couldn't process:  522
Couldn't process:  523
Couldn't process:  524
Couldn't process:  525


528it [00:48, 19.37it/s]

Couldn't process:  526
Couldn't process:  527
Couldn't process:  528
Couldn't process:  529


534it [00:48, 20.13it/s]

Couldn't process:  530
Couldn't process:  531
Couldn't process:  532
Couldn't process:  533
Couldn't process:  534


537it [00:48, 20.03it/s]

Couldn't process:  535
Couldn't process:  536
Couldn't process:  537
Couldn't process:  538
Couldn't process:  539


542it [00:48, 19.88it/s]

Couldn't process:  540
Couldn't process:  541
Couldn't process:  542
Couldn't process:  543
Couldn't process:  544


548it [00:49, 20.27it/s]

Couldn't process:  545
Couldn't process:  546
Couldn't process:  547
Couldn't process:  548
Couldn't process:  549


554it [00:49, 20.34it/s]

Couldn't process:  550
Couldn't process:  551
Couldn't process:  552
Couldn't process:  553
Couldn't process:  554


557it [00:49, 20.31it/s]

Couldn't process:  555
Couldn't process:  556
Couldn't process:  557
Couldn't process:  558
Couldn't process:  559


563it [00:49, 20.65it/s]

Couldn't process:  560
Couldn't process:  561
Couldn't process:  562
Couldn't process:  563
Couldn't process:  564


569it [00:50, 20.54it/s]

Couldn't process:  565
Couldn't process:  566
Couldn't process:  567
Couldn't process:  568
Couldn't process:  569


572it [00:50, 20.59it/s]

Couldn't process:  570
Couldn't process:  571
Couldn't process:  572
Couldn't process:  573
Couldn't process:  574


578it [00:50, 20.63it/s]

Couldn't process:  575
Couldn't process:  576
Couldn't process:  577
Couldn't process:  578
Couldn't process:  579


584it [00:50, 20.88it/s]

Couldn't process:  580
Couldn't process:  581
Couldn't process:  582
Couldn't process:  583
Couldn't process:  584


587it [00:51, 20.87it/s]

Couldn't process:  585
Couldn't process:  586
Couldn't process:  587
Couldn't process:  588
Couldn't process:  589


593it [00:51, 21.02it/s]

Couldn't process:  590
Couldn't process:  591
Couldn't process:  592
Couldn't process:  593
Couldn't process:  594


599it [00:51, 21.24it/s]

Couldn't process:  595
Couldn't process:  596
Couldn't process:  597
Couldn't process:  598
Couldn't process:  599


602it [00:51, 21.11it/s]

Couldn't process:  600
Couldn't process:  601
Couldn't process:  602
Couldn't process:  603
Couldn't process:  604


608it [00:52, 21.30it/s]

Couldn't process:  605
Couldn't process:  606
Couldn't process:  607
Couldn't process:  608
Couldn't process:  609


614it [00:52, 20.89it/s]

Couldn't process:  610
Couldn't process:  611
Couldn't process:  612
Couldn't process:  613
Couldn't process:  614


617it [00:52, 20.69it/s]

Couldn't process:  615
Couldn't process:  616
Couldn't process:  617
Couldn't process:  618
Couldn't process:  619


623it [00:52, 20.95it/s]

Couldn't process:  620
Couldn't process:  621
Couldn't process:  622
Couldn't process:  623
Couldn't process:  624


629it [00:53, 20.95it/s]

Couldn't process:  625
Couldn't process:  626
Couldn't process:  627
Couldn't process:  628
Couldn't process:  629


632it [00:53, 20.85it/s]

Couldn't process:  630
Couldn't process:  631
Couldn't process:  632
Couldn't process:  633
Couldn't process:  634


638it [00:53, 20.92it/s]

Couldn't process:  635
Couldn't process:  636
Couldn't process:  637
Couldn't process:  638
Couldn't process:  639


641it [00:53, 20.15it/s]

Couldn't process:  640
Couldn't process:  641
Couldn't process:  642
Couldn't process:  643


647it [00:54, 19.00it/s]

Couldn't process:  644
Couldn't process:  645
Couldn't process:  646
Couldn't process:  647
Couldn't process:  648


652it [00:54, 19.58it/s]

Couldn't process:  649
Couldn't process:  650
Couldn't process:  651
Couldn't process:  652
Couldn't process:  653


657it [00:54, 19.61it/s]

Couldn't process:  654
Couldn't process:  655
Couldn't process:  656
Couldn't process:  657
Couldn't process:  658


661it [00:54, 19.35it/s]

Couldn't process:  659
Couldn't process:  660
Couldn't process:  661
Couldn't process:  662


668it [00:55, 19.79it/s]

Couldn't process:  663
Couldn't process:  664
Couldn't process:  665
Couldn't process:  666
Couldn't process:  667
Couldn't process:  668


672it [00:55, 19.44it/s]

Couldn't process:  669
Couldn't process:  670
Couldn't process:  671
Couldn't process:  672
Couldn't process:  673


678it [00:55, 19.86it/s]

Couldn't process:  674
Couldn't process:  675
Couldn't process:  676
Couldn't process:  677
Couldn't process:  678


683it [00:55, 20.19it/s]

Couldn't process:  679
Couldn't process:  680
Couldn't process:  681
Couldn't process:  682
Couldn't process:  683


686it [00:56, 20.51it/s]

Couldn't process:  684
Couldn't process:  685
Couldn't process:  686
Couldn't process:  687
Couldn't process:  688


692it [00:56, 20.55it/s]

Couldn't process:  689
Couldn't process:  690
Couldn't process:  691
Couldn't process:  692
Couldn't process:  693


698it [00:56, 20.82it/s]

Couldn't process:  694
Couldn't process:  695
Couldn't process:  696
Couldn't process:  697
Couldn't process:  698


701it [00:56, 20.80it/s]

Couldn't process:  699
Couldn't process:  700
Couldn't process:  701
Couldn't process:  702
Couldn't process:  703


707it [00:57, 20.44it/s]

Couldn't process:  704
Couldn't process:  705
Couldn't process:  706
Couldn't process:  707
Couldn't process:  708


713it [00:57, 20.54it/s]

Couldn't process:  709
Couldn't process:  710
Couldn't process:  711
Couldn't process:  712
Couldn't process:  713


716it [00:57, 20.69it/s]

Couldn't process:  714
Couldn't process:  715
Couldn't process:  716
Couldn't process:  717
Couldn't process:  718


722it [00:57, 20.82it/s]

Couldn't process:  719
Couldn't process:  720
Couldn't process:  721
Couldn't process:  722
Couldn't process:  723


728it [00:58, 21.06it/s]

Couldn't process:  724
Couldn't process:  725
Couldn't process:  726
Couldn't process:  727
Couldn't process:  728


731it [00:58, 20.92it/s]

Couldn't process:  729
Couldn't process:  730
Couldn't process:  731
Couldn't process:  732
Couldn't process:  733


737it [00:58, 21.15it/s]

Couldn't process:  734
Couldn't process:  735
Couldn't process:  736
Couldn't process:  737
Couldn't process:  738


743it [00:58, 21.31it/s]

Couldn't process:  739
Couldn't process:  740
Couldn't process:  741
Couldn't process:  742
Couldn't process:  743


746it [00:58, 21.33it/s]

Couldn't process:  744
Couldn't process:  745
Couldn't process:  746
Couldn't process:  747
Couldn't process:  748


752it [00:59, 21.25it/s]

Couldn't process:  749
Couldn't process:  750
Couldn't process:  751
Couldn't process:  752
Couldn't process:  753


758it [00:59, 21.16it/s]

Couldn't process:  754
Couldn't process:  755
Couldn't process:  756
Couldn't process:  757
Couldn't process:  758


761it [00:59, 21.06it/s]

Couldn't process:  759
Couldn't process:  760
Couldn't process:  761
Couldn't process:  762
Couldn't process:  763


767it [00:59, 21.13it/s]

Couldn't process:  764
Couldn't process:  765
Couldn't process:  766
Couldn't process:  767
Couldn't process:  768


773it [01:00, 20.73it/s]

Couldn't process:  769
Couldn't process:  770
Couldn't process:  771
Couldn't process:  772
Couldn't process:  773


776it [01:00, 20.82it/s]

Couldn't process:  774
Couldn't process:  775
Couldn't process:  776
Couldn't process:  777
Couldn't process:  778


782it [01:00, 20.85it/s]

Couldn't process:  779
Couldn't process:  780
Couldn't process:  781
Couldn't process:  782
Couldn't process:  783


788it [01:00, 20.50it/s]

Couldn't process:  784
Couldn't process:  785
Couldn't process:  786
Couldn't process:  787
Couldn't process:  788


791it [01:01, 20.59it/s]

Couldn't process:  789
Couldn't process:  790
Couldn't process:  791
Couldn't process:  792
Couldn't process:  793


797it [01:01, 20.44it/s]

Couldn't process:  794
Couldn't process:  795
Couldn't process:  796
Couldn't process:  797
Couldn't process:  798


800it [01:01, 13.01it/s]

Couldn't process:  799
Couldn't process:  800
(15, 640, 128) (15,)





In [7]:
np.savez('../data/test_arr', X_test, y_test)

In [None]:
X_valid, y_valid = create_array(df_valid)

Currently processing:  100
Couldn't process:  144


In [None]:
np.savez('../data/valid_arr', X_valid, y_valid)

In [None]:
def splitDataFrameIntoSmaller(df, chunkSize = 1600): 
    listOfDf = list()
    numberChunks = len(df) // chunkSize + 1
    for i in range(numberChunks):
        listOfDf.append(df[i*chunkSize:(i+1)*chunkSize])
    return listOfDf

In [None]:
listDf = splitDataFrameIntoSmaller(df_train)
df1_train = listDf[0]
df2_train = listDf[1]
df3_train = listDf[2]
df4_train = listDf[3]
print(df1_train.shape, df2_train.shape, df3_train.shape, df4_train.shape)

In [None]:
X_train1, y_train1 = create_array(df1_train)

In [None]:
np.savez('../data/train1_arr', X_train1, y_train1)

In [None]:
X_train2, y_train2 = create_array(df2_train)

In [None]:
np.savez('../data/train2_arr', X_train2, y_train2)

In [None]:
X_train3, y_train3 = create_array(df3_train)

In [None]:
np.savez('../data/train3_arr', X_train3, y_train3)

In [None]:
X_train4, y_train4 = create_array(df4_train)

In [None]:
np.savez('../data/train4_arr', X_train4, y_train4)

# Consolidate train files and shuffle train/validation splits

In [None]:
npzfile = np.load('../data/train1_arr.npz')
print(npzfile.files)
X_train1 = npzfile['arr_0']
y_train1 = npzfile['arr_1']
print(X_train1.shape, y_train1.shape)

In [None]:
npzfile = np.load('../data/train2_arr.npz')
print(npzfile.files)
X_train2 = npzfile['arr_0']
y_train2 = npzfile['arr_1']
print(X_train2.shape, y_train2.shape)

In [None]:
npzfile = np.load('../data/train3_arr.npz')
print(npzfile.files)
X_train3 = npzfile['arr_0']
y_train3 = npzfile['arr_1']
print(X_train3.shape, y_train3.shape)

In [None]:
npzfile = np.load('../data/train4_arr.npz')
print(npzfile.files)
X_train4 = npzfile['arr_0']
y_train4 = npzfile['arr_1']
print(X_train4.shape, y_train4.shape)

In [None]:
npzfile = np.load('../data/valid_arr.npz')
print(npzfile.files)
X_valid = npzfile['arr_0']
y_valid = npzfile['arr_1']
print(X_valid.shape, y_valid.shape)

In [None]:
X_train = np.concatenate((X_train1, X_train2, X_train3, X_train4), axis = 0)
y_train = np.concatenate((y_train1, y_train2, y_train3, y_train4), axis = 0)
print(X_train.shape, y_train.shape)

In [None]:
### Convert the scale of training data
X_train_raw = librosa.core.db_to_power(X_train, ref=1.0)
print(np.amin(X_train_raw), np.amax(X_train_raw), np.mean(X_train_raw))

In [None]:
X_train_log = np.log(X_train_raw)
print(np.amin(X_train_log), np.amax(X_train_log), np.mean(X_train_log))

In [None]:
X_valid_raw = librosa.core.db_to_power(X_valid, ref=1.0)
X_valid_log = np.log(X_valid_raw)

In [None]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

X_train, y_train = unison_shuffled_copies(X_train_log, y_train)
X_valid, y_valid = unison_shuffled_copies(X_valid_log, y_valid)

In [None]:
print("Shapes are: ", X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)

In [None]:
np.savez('../data/shuffled_train', X_train, y_train)
np.savez('../data/shuffled_valid', X_valid, y_valid)