This Jupyter Notebook is meant to be accessed via Google Colab and will be the main method to train models on Google Colab.

In [4]:
# primary importing code block to utilize the python files
import os
import json
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Input, LeakyReLU, ZeroPadding2D, BatchNormalization, MaxPool2D
from pydub import AudioSegment   # main class from pydub package used to upload mp3 into Python and then get a NumPy array
import IPython.display as ipd    # ability to play audio in Jupyter Notebooks if needed
import librosa as lb             # loads the librosa package
import librosa.display
from src.configs import *
from src.utils import MusicAlignedTab, create_FullSet_df, one_hot_encode, collapse_class, clean_labels
from src.dataset import Dataset

In [None]:
mat = MusicAlignedTab('mookies_last_christmas')
df = mat.MAT

In [None]:
df = clean_labels(df)
MusicAlignedTab.labels_summary(df)
df = collapse_class(df, keep_dynamics = False, keep_bells = False, keep_toms_separate = False, hihat_classes=1, cymbal_classes=1)
MusicAlignedTab.labels_summary(df)

In [None]:
encode_df = one_hot_encode(df)
print(encode_df.columns)
encode_df.head()

In [None]:
%time targets, target_dict = create_targets(S, encode_df)

In [None]:
for idx, val in target_dict.items():
    print(f'total windows that {val} are labeled 1 = {np.count_nonzero(targets[idx,:,0])}')
print(f'total windows = {targets.shape[1]}')

In [None]:
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.imshow(targets[:,:,1])
plt.show()

In [None]:
MusicAlignedTab.labels_summary(encode_df)

In [None]:
targets = np.zeros((7, 10, 3), dtype=int)
print(targets.shape)
targets[:, 0, :]

In [None]:
df.drop(columns = ['song slice', 'sample start'])[df.drop(columns = ['song slice', 'sample start']) != '-'].describe()

In [None]:
mat.random_alignment_checker(['BD', 'SD', 'CC'], 14)

In [None]:
librosa.display.specshow(S[:,:,0], sr=44100, x_axis='time', y_axis = 'mel')

### FullSet Testing

In [None]:
def play(samples, sr=44100):
    '''
    Helper function just so I can type play(samples) to get the song output
    '''
    return ipd.Audio(samples, rate = sr)

In [None]:
FullSet = create_FullSet_df(SONGS_PATH)
FullSet_clean = clean_labels(FullSet)
MusicAlignedTab.labels_summary(FullSet_clean)
FullSet_collapse = collapse_class(FullSet_clean, keep_dynamics = False, keep_bells = False, keep_toms_separate = False, hihat_classes=1, cymbal_classes=1)
MusicAlignedTab.labels_summary(FullSet_collapse)
FullSet_encoded = one_hot_encode(FullSet_collapse)

In [None]:
tset = Dataset('train', FullSet_encoded)
vset = Dataset('val', FullSet_encoded)

In [None]:
song_idx = 1

In [None]:
%%time
song_df = tset.subset_df.loc[tset.song_list[song_idx]].copy()
song = np.vstack(song_df['song slice'].to_numpy()).T   # stacks the song slices back into a single numpy array of shape (channels, samples)
mono_song = lb.core.to_mono(song)
channels = [mono_song]              # channels is a list of either [mono_song] or [mono, L_song, R_song]
if INCLUDE_LR_CHANNELS:             # appending the LR channels to the channels variable
    channels.append(song[0,:])
    channels.append(song[1,:])
aug_channels = tset.augment_audio_cp(channels, tset.aug_comp, sr=SAMPLE_RATE)

In [None]:
play(aug_channels[0])

In [None]:
INCLUDE_LR_CHANNELS = False
%time spectrogram, target, target_dict = tset.preprocess_song(tset.song_list[7])

In [None]:
print(spectrogram.shape)
print(target.shape)
print(target_dict)
tset.song_list[7]

In [None]:
vset.subset_df

In [None]:
labels = np.zeros((5,5))

In [None]:
labels_df = vset.subset_df.loc['four_years'].drop(columns = ['sample start', 'song slice']).copy()

In [None]:
labels_df

In [None]:
np.stack([labels_df.loc[0].to_numpy() for _ in range(3)], axis = -1).shape

In [None]:
targets = np.zeros((7, 15,3))

In [None]:
%%time
save_spec = []
save_tar = []
for spec, tar in vset:
    save_spec.append(spec)
    save_tar.append(tar)
    print(f'shape of target = {tar.shape}')

In [None]:
fy_spec = save_spec[0]
mb_spec = save_spec[1]
fy_tar = save_tar[0]
mb_tar = save_tar[1]

In [None]:
numpysave_fp = 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Experimental/'

In [None]:
np.save(numpysave_fp + 'fy_spec', fy_spec)
np.save(numpysave_fp + 'fy_tar', fy_tar)
np.save(numpysave_fp + 'm_spec', mb_spec)
np.save(numpysave_fp + 'mb_tar', mb_tar)

In [None]:
np.savez(numpysave_fp + 'zip_uncomp', fy_spec, fy_tar, mb_spec, mb_tar)

In [None]:
np.savez_compressed(numpysave_fp + 'zip_comp', fy_spec, fy_tar, mb_spec, mb_tar)

In [None]:
fy_spec.shape

In [None]:
fy_tar.shape

In [None]:
one_chan = fy_spec[:,:,0]

In [None]:
def expand_spectrogram(spectrogram, pre_context, post_context):
    '''
    Expands a 2D spectrogram into slices of the correct shape to be input into the model
    
    Args:
        spectrogram [np.array]:
        pre_context [int]:
        post_context [int]:
    
    Returns:
        np.array: 
    
    '''
    
    n_features, n_windows = spectrogram.shape
    
    input_width = 1 + pre_context + post_context
    min_value = np.min(spectrogram)
    
    expanded_spectrogram = np.full(shape = (n_windows, n_features, input_width), fill_value = min_value)# assign into this np.array
    
    for idx in range(n_windows):
        if idx - pre_context < 0:    # in a window where you would slice before the beginning
            start = pre_context-idx
            expanded_spectrogram[idx, :,start:] = spectrogram[:, 0:idx+post_context+1 ]
        elif idx + post_context+1 > n_windows: # in a window where you would slice past the end
            end = post_context+1 - (n_windows - idx)
            expanded_spectrogram[idx, :, :input_width-end] = spectrogram[:, idx-pre_context: n_windows ]
        else:    # in a "normal" middle window where you slice into the spectrogram normally
            expanded_spectrogram[idx, :,:] = spectrogram[:, idx-pre_context : idx+post_context+1]
    
    return expanded_spectrogram

In [None]:
%%time
exp_spec = expand_spectrogram(one_chan, 5,5)
exp_spec[10000,0,:]

In [None]:
exp_spec.

In [None]:
exp_spec[:,:,0]

In [None]:
one_chan.shape

## Testing of the train.py

In [1]:
%run train.py

subdirs = ['C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\ancient_tombs', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\best_of_me', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\boulevard_of_broken_dreams', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\cant_be_saved', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\face_down', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\family_tradition', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\fireworks_at_dawn', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\forever_at_last', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\four_years', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\garden_state', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\gunpowder', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\hair_of_the_dog', 'C:/User



tab length = 2882     datatype: <class 'int'>
len(song_slices_tab_indexed) = 2882     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (3392, 2)     datatype of [0]: <class 'numpy.ndarray'>
np.array(song_slices_tab_indexed).shape = (2882,)
len(sample_start_list) = 2882
tab length = 1628     datatype: <class 'int'>
len(song_slices_tab_indexed) = 1628     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (6182, 2)     datatype of [0]: <class 'numpy.ndarray'>
np.array(song_slices_tab_indexed).shape = (1628,)
len(sample_start_list) = 1628
tab length = 1440     datatype: <class 'int'>
len(song_slices_tab_indexed) = 1440     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (7922, 2)     datatype of [0]: <class 'numpy.ndarray'>
np.array(song_slices_tab_indexed).shape = (1440,)
len(sample_start_list) = 1440
tab length = 2110     datatype: <class 'int'>
len(song_slices_tab_indexed) = 2110     datatype of object: <class 'list'>
song_s

---dataframe.describe() without blank_chars---
           tk     BD    SD    HH    RD    CC    C2   LT   MT   HT  CH   C3  SC
count   49917  11829  7551  5351  1718  3275  2331  994  391  298  97  290  26
unique      9      2     3     4     4     3     3    2    2    2   3    2   2
top         +      o     o     x     x     X     X    o    o    o   X    X   X
freq    12461  11399  6752  2907  1104  2016  1891  916  334  262  81  264  20

---Unique values and frequencies by column name---
        +      e      a     1     2     3     4   t   s
tk  12461  12459  12446  3206  3202  3199  2867  47  30

        -      o    O
BD  38088  11399  430

        -     o    g    O
SD  42366  6752  527  272

        -     x     X    o   g
HH  44566  2907  2188  230  26

        -     x    X    b   g
RD  48199  1104  476  113  25

        -     X     x  b
CC  46642  2016  1252  7

        -     X    x  b
C2  47586  1891  439  1

        -    o   O
LT  48923  916  78

        -    o   O
MT  49526  33

  mask = arr == x


---dataframe.describe() without blank_chars---
           tk     BD    SD    HH    at    ac
count   12474  11829  7024  2907  1540  9801
unique      2      1     1     1     1     1
top         c      o     o     x     o     x
freq     9268  11829  7024  2907  1540  9801

---Unique values and frequencies by column name---
        -     c     C
tk  37443  9268  3206

        -      o
BD  38088  11829

        -     o
SD  42893  7024

        -     x
HH  47010  2907

        -     o
at  48377  1540

        -     x
ac  40116  9801

one_hot_encode: col_list before encoding = ['tk', 'BD', 'SD', 'HH', 'at', 'ac']
one_hot_encode: col_list after encoding = ['song slice', 'sample start', 'tk_beat', 'tk_downbeat', 'BD_o', 'SD_o', 'HH_x', 'at_o', 'ac_x']
train.py main(): FullSet_encoded created!
train.py main(): drum_tabber model created!
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (Inp



Epoch: 0 Song  6/23, lr:0.000026, song_loss:0.951794
Dataset class __next__: preprocessing fireworks_at_dawn
Epoch: 0 Song  7/23, lr:0.000030, song_loss:0.935100
Dataset class __next__: preprocessing forever_at_last
Epoch: 0 Song  8/23, lr:0.000035, song_loss:0.946203
Dataset class __next__: preprocessing garden_state
Epoch: 0 Song  9/23, lr:0.000039, song_loss:0.931625
Dataset class __next__: preprocessing gunpowder
Epoch: 0 Song 10/23, lr:0.000043, song_loss:0.851899
Dataset class __next__: preprocessing hair_of_the_dog
Epoch: 0 Song 11/23, lr:0.000048, song_loss:0.802483
Dataset class __next__: preprocessing lungs_like_gallows
Epoch: 0 Song 12/23, lr:0.000052, song_loss:0.785794
Dataset class __next__: preprocessing mookies_last_christmas
Epoch: 0 Song 13/23, lr:0.000057, song_loss:0.795409
Dataset class __next__: preprocessing planning_a_prison_break
Epoch: 0 Song 14/23, lr:0.000061, song_loss:0.772324
Dataset class __next__: preprocessing rollercoaster
Epoch: 0 Song 15/23, lr:0.00



Epoch: 0 Song 20/23, lr:0.000087, song_loss:0.728565
Dataset class __next__: preprocessing the_kill
Epoch: 0 Song 21/23, lr:0.000091, song_loss:0.728903
Dataset class __next__: preprocessing the_rapture
Epoch: 0 Song 22/23, lr:0.000096, song_loss:0.721049
Dataset class __next__: preprocessing wolves_at_the_door
Epoch: 0 Song  0/23, lr:0.000100, song_loss:0.714724
Dataset class __next__: preprocessing four_years
Dataset class __next__: preprocessing misery_business


Epoch:  0 val_loss:0.763304 


Starting Epoch 1/4
Dataset class __next__: preprocessing lungs_like_gallows
Epoch: 1 Song  1/23, lr:0.000100, song_loss:0.708597
Dataset class __next__: preprocessing the_kill
Epoch: 1 Song  2/23, lr:0.000100, song_loss:0.707649
Dataset class __next__: preprocessing the_rapture
Epoch: 1 Song  3/23, lr:0.000100, song_loss:0.708833
Dataset class __next__: preprocessing face_down
Epoch: 1 Song  4/23, lr:0.000099, song_loss:0.704507
Dataset class __next__: preprocessing family_tradition
Epoch: 1 S



Epoch: 1 Song 11/23, lr:0.000094, song_loss:0.694508
Dataset class __next__: preprocessing planning_a_prison_break




Epoch: 1 Song 12/23, lr:0.000093, song_loss:0.694306
Dataset class __next__: preprocessing rollercoaster




Epoch: 1 Song 13/23, lr:0.000092, song_loss:0.694149
Dataset class __next__: preprocessing cant_be_saved




Epoch: 1 Song 14/23, lr:0.000090, song_loss:0.693888
Dataset class __next__: preprocessing best_of_me
Epoch: 1 Song 15/23, lr:0.000089, song_loss:0.693793
Dataset class __next__: preprocessing boulevard_of_broken_dreams
Epoch: 1 Song 16/23, lr:0.000087, song_loss:0.693957
Dataset class __next__: preprocessing mookies_last_christmas




Epoch: 1 Song 17/23, lr:0.000086, song_loss:0.693621
Dataset class __next__: preprocessing wolves_at_the_door
Epoch: 1 Song 18/23, lr:0.000084, song_loss:0.693597
Dataset class __next__: preprocessing forever_at_last
Epoch: 1 Song 19/23, lr:0.000083, song_loss:0.693645
Dataset class __next__: preprocessing sugar_were_going_down
Epoch: 1 Song 20/23, lr:0.000081, song_loss:0.693459
Dataset class __next__: preprocessing surprise_surprise
Epoch: 1 Song 21/23, lr:0.000079, song_loss:0.693453
Dataset class __next__: preprocessing sow
Epoch: 1 Song 22/23, lr:0.000077, song_loss:0.693508
Dataset class __next__: preprocessing the_dark
Epoch: 1 Song  0/23, lr:0.000075, song_loss:0.693361
Dataset class __next__: preprocessing four_years
Dataset class __next__: preprocessing misery_business


Epoch:  1 val_loss:0.718876 


Starting Epoch 2/4
Dataset class __next__: preprocessing the_rapture
Epoch: 2 Song  1/23, lr:0.000073, song_loss:0.693345
Dataset class __next__: preprocessing mookies_last_chri



Epoch: 2 Song  2/23, lr:0.000071, song_loss:0.693328
Dataset class __next__: preprocessing garden_state




Epoch: 2 Song  3/23, lr:0.000069, song_loss:0.693312
Dataset class __next__: preprocessing fireworks_at_dawn
Epoch: 2 Song  4/23, lr:0.000067, song_loss:0.693304
Dataset class __next__: preprocessing gunpowder
Epoch: 2 Song  5/23, lr:0.000065, song_loss:0.693294
Dataset class __next__: preprocessing the_dark




Epoch: 2 Song  6/23, lr:0.000063, song_loss:0.693284
Dataset class __next__: preprocessing sugar_were_going_down
Epoch: 2 Song  7/23, lr:0.000061, song_loss:0.693264
Dataset class __next__: preprocessing lungs_like_gallows
Epoch: 2 Song  8/23, lr:0.000058, song_loss:0.693255
Dataset class __next__: preprocessing surprise_surprise




Epoch: 2 Song  9/23, lr:0.000056, song_loss:0.693262
Dataset class __next__: preprocessing face_down
Epoch: 2 Song 10/23, lr:0.000054, song_loss:0.693241
Dataset class __next__: preprocessing planning_a_prison_break
Epoch: 2 Song 11/23, lr:0.000052, song_loss:0.693245
Dataset class __next__: preprocessing forever_at_last




Epoch: 2 Song 12/23, lr:0.000049, song_loss:0.693229
Dataset class __next__: preprocessing ancient_tombs
Epoch: 2 Song 13/23, lr:0.000047, song_loss:0.693223
Dataset class __next__: preprocessing best_of_me
Epoch: 2 Song 14/23, lr:0.000045, song_loss:0.693213
Dataset class __next__: preprocessing rollercoaster
Epoch: 2 Song 15/23, lr:0.000043, song_loss:0.693209
Dataset class __next__: preprocessing sow
Epoch: 2 Song 16/23, lr:0.000040, song_loss:0.693242
Dataset class __next__: preprocessing cant_be_saved
Epoch: 2 Song 17/23, lr:0.000038, song_loss:0.693200
Dataset class __next__: preprocessing thats_what_you_get




Epoch: 2 Song 18/23, lr:0.000036, song_loss:0.693203
Dataset class __next__: preprocessing wolves_at_the_door
Epoch: 2 Song 19/23, lr:0.000034, song_loss:0.693193
Dataset class __next__: preprocessing the_kill
Epoch: 2 Song 20/23, lr:0.000032, song_loss:0.693198
Dataset class __next__: preprocessing boulevard_of_broken_dreams
Epoch: 2 Song 21/23, lr:0.000030, song_loss:0.693193
Dataset class __next__: preprocessing family_tradition




Epoch: 2 Song 22/23, lr:0.000028, song_loss:0.693193
Dataset class __next__: preprocessing hair_of_the_dog
Epoch: 2 Song  0/23, lr:0.000026, song_loss:0.693193
Dataset class __next__: preprocessing four_years
Dataset class __next__: preprocessing misery_business


Epoch:  2 val_loss:0.738158 


Starting Epoch 3/4
Dataset class __next__: preprocessing sow
Epoch: 3 Song  1/23, lr:0.000024, song_loss:0.693193
Dataset class __next__: preprocessing cant_be_saved
Epoch: 3 Song  2/23, lr:0.000022, song_loss:0.693181
Dataset class __next__: preprocessing hair_of_the_dog


KeyboardInterrupt: 

In [29]:
int1 = 0
int2 = 100
Song = 1
lr= 0.00023453
song_l = 234.4566

In [33]:
print('Epoch: {:2}/{} Song{:3}, lr:{:.6f}, song_loss:{:8.2f}'.format(int1, int2, Song, lr, song_l))

Epoch:  0/100 Song  1, lr:0.000235, song_loss:  234.46
