This Jupyter Notebook is meant to be accessed via Google Colab and will be the main method to train models on Google Colab.

In [1]:
# primary importing code block to utilize the python files
import os
import json
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pydub import AudioSegment   # main class from pydub package used to upload mp3 into Python and then get a NumPy array
import IPython.display as ipd    # ability to play audio in Jupyter Notebooks if needed
import librosa as lb             # loads the librosa package
import librosa.display
from src.configs import *
from src.utils import MusicAlignedTab, create_FullSetMAT
from src.dataset import *

In [None]:
mat = MusicAlignedTab('mookies_last_christmas')
df = mat.MAT

In [None]:
df = clean_labels(df)
MusicAlignedTab.labels_summary(df)
df = collapse_class(df, keep_dynamics = False, keep_bells = False, keep_toms_separate = False, hihat_classes=1, cymbal_classes=1)
MusicAlignedTab.labels_summary(df)

In [None]:
encode_df = one_hot_encode(df)
print(encode_df.columns)
encode_df.head()

In [None]:
%time targets, target_dict = create_targets(S, encode_df)

In [None]:
for idx, val in target_dict.items():
    print(f'total windows that {val} are labeled 1 = {np.count_nonzero(targets[idx,:,0])}')
print(f'total windows = {targets.shape[1]}')

In [None]:
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.imshow(targets[:,:,1])
plt.show()

In [None]:
MusicAlignedTab.labels_summary(encode_df)

In [None]:
targets = np.zeros((7, 10, 3), dtype=int)
print(targets.shape)
targets[:, 0, :]

In [None]:
df.drop(columns = ['song slice', 'sample start'])[df.drop(columns = ['song slice', 'sample start']) != '-'].describe()

In [None]:
mat.random_alignment_checker(['BD', 'SD', 'CC'], 14)

In [None]:
librosa.display.specshow(S[:,:,0], sr=44100, x_axis='time', y_axis = 'mel')

### FullSet Testing

In [None]:
def play(samples, sr=44100):
    '''
    Helper function just so I can type play(samples) to get the song output
    '''
    return ipd.Audio(samples, rate = sr)

In [2]:
FullSet = create_FullSetMAT(SONGS_PATH)
FullSet_clean = clean_labels(FullSet)
MusicAlignedTab.labels_summary(FullSet_clean)
FullSet_collapse = collapse_class(FullSet_clean, keep_dynamics = False, keep_bells = False, keep_toms_separate = False, hihat_classes=1, cymbal_classes=1)
MusicAlignedTab.labels_summary(FullSet_collapse)
FullSet_encoded = one_hot_encode(FullSet_collapse)

subdirs = ['C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\ancient_tombs', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\best_of_me', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\boulevard_of_broken_dreams', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\cant_be_saved', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\face_down', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\family_tradition', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\fireworks_at_dawn', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\forever_at_last', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\four_years', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\garden_state', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\gunpowder', 'C:/Users/Thomas/Python Projects/Drum-Tabber-Support-Data/Songs\\hair_of_the_dog', 'C:/User



All the following prints are from combine_tab_and_song function:
first drum note row = 0
# of song slices post fdn = 3062
# of song slices pre fdn = 3
Produced number of song slices = 3065
Expected number of song slices (should be same for non-triplet songs) = 3068.207528344671
tab length = 2882     datatype: <class 'int'>
len(song_slices_tab_indexed) = 2882     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (3392, 2)     datatype of [0]: <class 'numpy.ndarray'>
np.array(song_slices_tab_indexed).shape = (2882,)
len(sample_start_list) = 2882
All the following prints are from combine_tab_and_song function:
first drum note row = 56
# of song slices post fdn = 1680
# of song slices pre fdn = 162
Produced number of song slices = 1842
Expected number of song slices (should be same for non-triplet songs) = 1842.9039455782313
tab length = 1628     datatype: <class 'int'>
len(song_slices_tab_indexed) = 1628     datatype of object: <class 'list'>
song_slices_tab_indexed[0]

All the following prints are from combine_tab_and_song function:
first drum note row = 0
# of song slices post fdn = 3860
# of song slices pre fdn = 0
Produced number of song slices = 3860
Expected number of song slices (should be same for non-triplet songs) = 3859.859026455026
tab length = 3792     datatype: <class 'int'>
len(song_slices_tab_indexed) = 3792     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (3634, 2)     datatype of [0]: <class 'numpy.ndarray'>
np.array(song_slices_tab_indexed).shape = (3792,)
len(sample_start_list) = 3792
All the following prints are from combine_tab_and_song function:
first drum note row = 0
# of song slices post fdn = 1863
# of song slices pre fdn = 1
Produced number of song slices = 1864
Expected number of song slices (should be same for non-triplet songs) = 1865.376
tab length = 1826     datatype: <class 'int'>
len(song_slices_tab_indexed) = 1826     datatype of object: <class 'list'>
song_slices_tab_indexed[0].shape = (612

  mask = arr == x


---dataframe.describe() without blank_chars---
           tk     BD    SD    HH    at    ac
count   12474  11829  7024  2907  1540  9801
unique      2      1     1     1     1     1
top         c      o     o     x     o     x
freq     9268  11829  7024  2907  1540  9801

---Unique values and frequencies by column name---
        -     c     C
tk  37443  9268  3206

        -      o
BD  38088  11829

        -     o
SD  42893  7024

        -     x
HH  47010  2907

        -     o
at  48377  1540

        -     x
ac  40116  9801

one_hot_encode: col_list before encoding = ['tk', 'BD', 'SD', 'HH', 'at', 'ac']
one_hot_encode: col_list after encoding = ['song slice', 'sample start', 'tk_beat', 'tk_downbeat', 'BD_o', 'SD_o', 'HH_x', 'at_o', 'ac_x']


In [3]:
tset = Dataset('train', FullSet_encoded)

In [11]:
song_idx = 1

In [12]:
song_df = tset.subset_df.loc[tset.song_list[song_idx]].copy()
song = np.vstack(song_df['song slice'].to_numpy()).T   # stacks the song slices back into a single numpy array of shape (channels, samples)
mono_song = lb.core.to_mono(song)
channels = [mono_song]              # channels is a list of either [mono_song] or [mono, L_song, R_song]
if INCLUDE_LR_CHANNELS:             # appending the LR channels to the channels variable
    channels.append(song[0,:])
    channels.append(song[1,:])
aug_channels = tset.augment_audio_cp(channels, tset.aug_comp, sr=SAMPLE_RATE)

NameError: name 'mono_song' is not defined

In [10]:
spectrogram, target, target_dict = tset.parse_song(tset.song_list[0])

{'should_apply': None}

In [None]:
FullSet_encoded.head()

In [None]:
FullSet_encoded.loc['ancient_tombs']

In [None]:
subset_df = FullSet_encoded.loc[['ancient_tombs', 'sow']].copy()

In [None]:
subset_df

In [None]:
ancient_tombs = subset_df.loc['ancient_tombs'].copy()

In [None]:
ancient_tombs

In [None]:
ancient_tombs['sample start'] = ancient_tombs['sample start'].apply(lambda val: val-ancient_tombs.at[0, 'sample start']) 

In [None]:
ancient_tombs.head()

In [None]:
full_slice = ancient_tombs['song slice'].to_numpy()

In [None]:
full_slice.shape

In [None]:
re_song = np.vstack(ancient_tombs['song slice'].to_numpy()).T

In [None]:
play(re_song)

In [None]:
re_song.shape

In [None]:
mono_song = lb.core.to_mono(re_song)

In [None]:
L_song = re_song[0,:]
R_song = re_song[1,:]

In [None]:
L_song.shape

In [None]:
MLR_song = [ mono_song, L_song, R_song]

In [None]:
type(MLR_song[0])

In [None]:
ancient_tombs.head()

In [None]:
shift_idx = random.randint(1, len(ancient_tombs)-1)
print(shift_idx, len(ancient_tombs))

In [None]:
shifted_song = ancient_tombs[shift_idx:].append(ancient_tombs[:shift_idx], ignore_index=True)
print(len(ancient_tombs[shift_idx:]), len(ancient_tombs[:shift_idx]) )
shifted_song['sample start'] = shifted_song['sample start'].apply(lambda val: )

In [None]:
shifted_song.head()

In [None]:
first_part = ancient_tombs[:shift_idx]
second_part = ancient_tombs[shift_idx:]
sample_start_shift = second_part.at[shift_idx, 'sample start']
print(sample_start_shift)
second_part.head()

In [None]:
second_part['sample start'] = second_part['sample start'].apply(lambda x: x - sample_start_shift)

In [None]:
first_part['sample start'] = first_part['sample start'].apply(lambda x: x + sample_start_shift)

In [None]:
first_part.head()

In [None]:
shifted_song = second_part.append(first_part , ignore_index = True)

In [None]:
play(np.vstack(shifted_song['song slice'].to_numpy()).T)

In [None]:
shifted_song.head()