This Jupyter Notebook is meant to be accessed via Google Colab and will be the main method to train models on Google Colab.

In [1]:
# primary importing code block to utilize the python files
import os
import json
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pydub import AudioSegment   # main class from pydub package used to upload mp3 into Python and then get a NumPy array
import IPython.display as ipd    # ability to play audio in Jupyter Notebooks if needed
import librosa as lb             # loads the librosa package
import librosa.display
from src.configs import *
from src.utils import MusicAlignedTab, create_FullSet_df, one_hot_encode, collapse_class, clean_labels
from src.dataset import Dataset

In [None]:
mat = MusicAlignedTab('mookies_last_christmas')
df = mat.MAT

In [None]:
df = clean_labels(df)
MusicAlignedTab.labels_summary(df)
df = collapse_class(df, keep_dynamics = False, keep_bells = False, keep_toms_separate = False, hihat_classes=1, cymbal_classes=1)
MusicAlignedTab.labels_summary(df)

In [None]:
encode_df = one_hot_encode(df)
print(encode_df.columns)
encode_df.head()

In [None]:
%time targets, target_dict = create_targets(S, encode_df)

In [None]:
for idx, val in target_dict.items():
    print(f'total windows that {val} are labeled 1 = {np.count_nonzero(targets[idx,:,0])}')
print(f'total windows = {targets.shape[1]}')

In [None]:
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.imshow(targets[:,:,1])
plt.show()

In [None]:
MusicAlignedTab.labels_summary(encode_df)

In [None]:
targets = np.zeros((7, 10, 3), dtype=int)
print(targets.shape)
targets[:, 0, :]

In [None]:
df.drop(columns = ['song slice', 'sample start'])[df.drop(columns = ['song slice', 'sample start']) != '-'].describe()

In [None]:
mat.random_alignment_checker(['BD', 'SD', 'CC'], 14)

In [None]:
librosa.display.specshow(S[:,:,0], sr=44100, x_axis='time', y_axis = 'mel')

### FullSet Testing

In [2]:
def play(samples, sr=44100):
    '''
    Helper function just so I can type play(samples) to get the song output
    '''
    return ipd.Audio(samples, rate = sr)

In [3]:
FullSet = create_FullSet_df(SONGS_PATH)
FullSet_clean = clean_labels(FullSet)
MusicAlignedTab.labels_summary(FullSet_clean)
FullSet_collapse = collapse_class(FullSet_clean, keep_dynamics = False, keep_bells = False, keep_toms_separate = False, hihat_classes=1, cymbal_classes=1)
MusicAlignedTab.labels_summary(FullSet_collapse)
FullSet_encoded = one_hot_encode(FullSet_collapse)

subdirs = ['C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\ancient_tombs', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\best_of_me', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\boulevard_of_broken_dreams', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\cant_be_saved', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\face_down', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\family_tradition', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\fireworks_at_dawn', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\forever_at_last', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\four_years', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\garden_state', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\gunpowder', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\hair_of_the_dog', 'C:/User



All the following prints are from combine_tab_and_song function:
first drum note row = 0
# of song slices post fdn = 3062
# of song slices pre fdn = 3
Produced number of song slices = 3065
Expected number of song slices (should be same for non-triplet songs) = 3068.207528344671
tab length = 2882     datatype: <class 'int'>
len(song_slices_tab_indexed) = 2882     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (3392, 2)     datatype of [0]: <class 'numpy.ndarray'>
np.array(song_slices_tab_indexed).shape = (2882,)
len(sample_start_list) = 2882
All the following prints are from combine_tab_and_song function:
first drum note row = 56
# of song slices post fdn = 1680
# of song slices pre fdn = 162
Produced number of song slices = 1842
Expected number of song slices (should be same for non-triplet songs) = 1842.9039455782313
tab length = 1628     datatype: <class 'int'>
len(song_slices_tab_indexed) = 1628     datatype of object: <class 'list'>
song_slices_tab_indexed[0]

All the following prints are from combine_tab_and_song function:
first drum note row = 0
# of song slices post fdn = 3860
# of song slices pre fdn = 0
Produced number of song slices = 3860
Expected number of song slices (should be same for non-triplet songs) = 3859.859026455026
tab length = 3792     datatype: <class 'int'>
len(song_slices_tab_indexed) = 3792     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (3634, 2)     datatype of [0]: <class 'numpy.ndarray'>
np.array(song_slices_tab_indexed).shape = (3792,)
len(sample_start_list) = 3792
All the following prints are from combine_tab_and_song function:
first drum note row = 0
# of song slices post fdn = 1863
# of song slices pre fdn = 1
Produced number of song slices = 1864
Expected number of song slices (should be same for non-triplet songs) = 1865.376
tab length = 1826     datatype: <class 'int'>
len(song_slices_tab_indexed) = 1826     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (612

  mask = arr == x


---dataframe.describe() without blank_chars---
           tk     BD    SD    HH    at    ac
count   12474  11829  7024  2907  1540  9801
unique      2      1     1     1     1     1
top         c      o     o     x     o     x
freq     9268  11829  7024  2907  1540  9801

---Unique values and frequencies by column name---
        -     c     C
tk  37443  9268  3206

        -      o
BD  38088  11829

        -     o
SD  42893  7024

        -     x
HH  47010  2907

        -     o
at  48377  1540

        -     x
ac  40116  9801

one_hot_encode: col_list before encoding = ['tk', 'BD', 'SD', 'HH', 'at', 'ac']
one_hot_encode: col_list after encoding = ['song slice', 'sample start', 'tk_beat', 'tk_downbeat', 'BD_o', 'SD_o', 'HH_x', 'at_o', 'ac_x']


In [4]:
tset = Dataset('train', FullSet_encoded)
vset = Dataset('val', FullSet_encoded)

In [None]:
song_idx = 1

In [None]:
%%time
song_df = tset.subset_df.loc[tset.song_list[song_idx]].copy()
song = np.vstack(song_df['song slice'].to_numpy()).T   # stacks the song slices back into a single numpy array of shape (channels, samples)
mono_song = lb.core.to_mono(song)
channels = [mono_song]              # channels is a list of either [mono_song] or [mono, L_song, R_song]
if INCLUDE_LR_CHANNELS:             # appending the LR channels to the channels variable
    channels.append(song[0,:])
    channels.append(song[1,:])
aug_channels = tset.augment_audio_cp(channels, tset.aug_comp, sr=SAMPLE_RATE)

In [None]:
play(aug_channels[0])

In [None]:
INCLUDE_LR_CHANNELS = False
%time spectrogram, target, target_dict = tset.preprocess_song(tset.song_list[7])

In [None]:
print(spectrogram.shape)
print(target.shape)
print(target_dict)
tset.song_list[7]

In [None]:
vset.subset_df

In [None]:
labels = np.zeros((5,5))

In [None]:
labels_df = vset.subset_df.loc['four_years'].drop(columns = ['sample start', 'song slice']).copy()

In [None]:
labels_df

In [None]:
np.stack([labels_df.loc[0].to_numpy() for _ in range(3)], axis = -1).shape

In [None]:
targets = np.zeros((7, 15,3))

In [10]:
%%time
save_spec = []
save_tar = []
for spec, tar in vset:
    save_spec.append(spec)
    save_tar.append(tar)
    print(f'shape of target = {tar.shape}')

four_years
create_spectrogram: spectro.shape = (150, 25001)
create_spectrogram: spectro.shape = (150, 25001)
create_spectrogram: spectro.shape = (150, 25001)
create_spectrogram: spectrogram.shape after ftd = (150, 25001, 3)
shape of target = (7, 25001, 3)
misery_business
create_spectrogram: spectro.shape = (150, 20804)
create_spectrogram: spectro.shape = (150, 20804)
create_spectrogram: spectro.shape = (150, 20804)
create_spectrogram: spectrogram.shape after ftd = (150, 20804, 3)
shape of target = (7, 20804, 3)
Wall time: 14.5 s


In [13]:
fy_spec = save_spec[0]
mb_spec = save_spec[1]
fy_tar = save_tar[0]
mb_tar = save_tar[1]

In [23]:
numpysave_fp = 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Experimental/'

In [24]:
np.save(numpysave_fp + 'fy_spec', fy_spec)
np.save(numpysave_fp + 'fy_tar', fy_tar)
np.save(numpysave_fp + 'm_spec', mb_spec)
np.save(numpysave_fp + 'mb_tar', mb_tar)

In [17]:
np.savez(numpysave_fp + 'zip_uncomp', fy_spec, fy_tar, mb_spec, mb_tar)

In [18]:
np.savez_compressed(numpysave_fp + 'zip_comp', fy_spec, fy_tar, mb_spec, mb_tar)

In [21]:
fy_spec.shape

(150, 25001, 3)

In [22]:
fy_tar.shape

(7, 25001, 3)

In [25]:
from datetime import date

In [26]:
date.today()

datetime.date(2020, 10, 27)

In [29]:
date.today().strftime("-%b-%d")

'-Oct-27'

In [30]:
ints = np.array([1,2,3,4,5,6,7])

In [31]:
print(ints)

[1 2 3 4 5 6 7]


In [32]:
ints[-5:]

array([3, 4, 5, 6, 7])

In [43]:
np.zeros(shape = (2,2))

array([[0., 0.],
       [0., 0.]])

In [44]:
fy_spec.shape

(150, 25001, 3)

In [46]:
one_chan = fy_spec[:,:,0]

In [53]:
one_chan[1:3, 1:3]

array([[-3.976284 , -0.7534218],
       [-1.8763809, -0.791687 ]], dtype=float32)

In [57]:
def expand_spectrogram(spectrogram, pre_context, post_context):
    '''
    Expands a 2D spectrogram into slices of the correct shape to be input into the model
    
    Args:
        spectrogram [np.array]:
        pre_context [int]:
        post_context [int]:
    
    Returns:
        np.array: 
    
    '''
    
    n_features, n_windows = spectrogram.shape
    
    input_width = 1 + pre_context + post_context
    
    expanded_spectrogram = np.zeros(shape = (n_features, input_width, n_windows)) # assign into this np.array
    
    for idx in range(n_windows):
        if idx - pre_context < 0:    # in a window where you would slice before the beginning
            start = pre_context-idx
            end = idx + post_context
            expanded_spectrogram[:,start:,idx] = spectrogram[:, :idx+post_context+1 ]
        elif idx + pre_context > n_windows: # in a window where you would slice past the end
            expanded_spectrogram[:,:,idx] = spectrogram[:, ]
        else:
            expanded_spectrogram[:,:,idx] = spectrogram[:, idx-pre_context : idx+post_context+1]
        
    
    
    return expanded_spectrogram