# Create standard subdivisions for Harmonix

## Load libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import os

## Setup Colab

In [2]:
COLAB = 'google.colab' in str(get_ipython())

if COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    ROOT = '/content/drive/MyDrive/fyp/collabs/'
else:
    ROOT = os.path.join(os.getcwd(), '..', '..') 

## Load Colab libraries

In [3]:
if COLAB:
    !pip install pickle5
    import pickle5 as pickle

## Load custom modules

In [4]:
from nnssa.constants import *
from nnssa.sub_divisions import *

## Progress bars!

In [5]:
from tqdm import tqdm
tqdm.pandas()

  from pandas import Panel


## Load dataset

In [6]:
harmonix = pd.read_pickle(os.path.join(ROOT, BEATS_DIR, 'harmonix.p'))
harmonix.head()

Unnamed: 0,File,Duration,BPM,Time_Signature,Genre,Labels,Sections,Beat_times,Beat_frames
0,0001_12step,142.47,113,4|4,R&B,"[0.0, 8.495567999999999, 25.486704, 42.4753280...","[intro, verse, chorus, verse, chorus, verse, c...","[0.0, 0.5309729999999999, 1.0619459999999998, ...","[0, 11, 22, 34, 45, 57, 68, 80, 91, 102, 114, ..."
1,0003_6foot7foot,157.347,84,4|4,Hip-Hop,"[2.857108, 8.571396, 31.428548, 37.14283599999...","[chorus, verse, chorus, verse, chorus, verse, ...","[2.857108, 3.571394, 4.28568, 4.99996600000000...","[61, 76, 92, 107, 123, 138, 153, 169, 184, 199..."
2,0004_abc,180.955,94,4|4,Pop-Rock,"[2.666656, 28.300542999999998, 58.263180000000...","[verse, chorus, verse, chorus, bridge, chorus,...","[2.666656, 3.238084, 3.952369, 4.597529, 5.242...","[57, 69, 85, 98, 112, 126, 140, 154, 169, 183,..."
3,0006_aint2proud2beg,181.034,105,4|4,R&B,"[0.0, 27.4652, 45.203726, 63.518522999999995, ...","[intro, verse, transition, chorus, verse, brid...","[0.0, 0.572203, 1.144406, 1.716609, 2.288812, ...","[0, 12, 24, 36, 49, 61, 73, 86, 98, 110, 123, ..."
4,0008_america,222.683,136,4|4,Metal,"[3.871208, 10.56504, 33.217138, 56.85190400000...","[intro, verse, verse, bridge, solo/instrumenta...","[3.871208, 4.359011, 4.846814, 5.338616, 5.830...","[83, 93, 104, 114, 125, 136, 146, 156, 166, 17..."


## Load spectrograms

In [7]:
harmonix['Songs'] = harmonix.File.progress_map(lambda f: np.load(os.path.join(ROOT, HARMONIX_MELS, f + '-mel.npy')))

100%|██████████| 885/885 [00:23<00:00, 37.26it/s]


## Helper functions

In [8]:
def sync(row):
    mel = row.Songs
    frames = row.Beat_frames
    return librosa.util.sync(mel, frames) 

#### Smear the labels
This is a form of oversampling, we add triangular weights  
around the ground truth labels and then set the labels to  
true. This function changes quite a bit so it is left in  
the notebook for easier access

In [9]:
def label_smear(row):
    weights = np.ones(shape=(row.Binary_Labels.shape))
    labels = row.Binary_Labels.copy()
    truthy = np.where(labels == 1)[0]
    for t in (truthy - 1):
        if (t > 0):
            labels[t] = 2
            weights[t] = 0.5
    for t in (truthy + 1):
        if (t  < len(labels)):
            labels[t] = 1
            weights[t] = 0.5
    mid_segments = (truthy[1:] + truthy[:-1]) / 2
    for l in ((truthy[1:] + truthy[:-1]) / 2):
        labels[t] = -1
    labels[truthy] = 1
    weights[truthy] = 3
    return labels, weights

## Sync beats

In [10]:
harmonix['Sync'] = harmonix.progress_apply(sync, axis=1)

100%|██████████| 885/885 [00:05<00:00, 170.43it/s]


## Process Bars

In [11]:
bars = harmonix.copy()

In [12]:
bars['Sub_Divisions'] = bars['Sync'].progress_map(lambda t: create_spec_windows(t, 16, 4))

100%|██████████| 885/885 [00:05<00:00, 171.19it/s]


### Standard scale spectrograms

In [13]:
bars['Sub_Divisions'] = bars['Sub_Divisions'].progress_map(normalize)

100%|██████████| 885/885 [00:27<00:00, 31.77it/s]


### Line-up labels and context windows

In [14]:
bars['Binary_Labels'] = bars.progress_apply(lambda t: subdivide_labels(t, 16, 4), axis=1)

100%|██████████| 885/885 [00:00<00:00, 4948.57it/s]


#### Check that shape is the same

In [15]:
bars_sample = bars.iloc[1]
bars_sample['Sub_Divisions'].shape, bars_sample['Binary_Labels'].shape

((52, 80, 4, 33), (52,))

### Apply label smearing

In [16]:
bars['Weighted_Labels'] = bars.progress_apply(label_smear, axis=1)

100%|██████████| 885/885 [00:00<00:00, 17649.86it/s]


In [17]:
bars['Weights'] = bars['Weighted_Labels'].progress_map(lambda t: t[1])

100%|██████████| 885/885 [00:00<00:00, 550735.76it/s]


In [18]:
bars['Weighted_Labels'] = bars['Weighted_Labels'].progress_map(lambda t: t[0])

100%|██████████| 885/885 [00:00<00:00, 679223.98it/s]


#### Check that labels line up

In [19]:
bars_sample = bars.iloc[0]
truthy = np.where(bars_sample['Binary_Labels'] == 1)[0] * 4
beat_times = bars_sample['Beat_times']
times = beat_times[truthy]
print(f"Beat label times: {times} \nLabel times: {bars_sample['Labels']}")

Beat label times: [  0.         8.495568  25.486704  42.475328  59.47014   78.594744
  95.585708 112.578716 129.565932] 
Label times: [  0.         8.495568  25.486704  42.475328  59.47014   78.594744
  95.585708 112.578716 129.565932]


#### Check the number of truthy labels

In [20]:
bars_sample = bars.iloc[4]
weighted = np.where(bars_sample['Weighted_Labels'] == 1)[0]
normal = np.where(bars_sample['Binary_Labels'] == 1)[0]
weighted.shape, normal.shape, bars_sample['Labels'].shape

((21,), (11,), (11,))

### Create IDS vector

In [21]:
bars['IDS'] = bars.progress_apply(lambda r: np.full(r.Binary_Labels.shape, r.File), axis=1)

100%|██████████| 885/885 [00:00<00:00, 27316.31it/s]


### Save bar mel spectrograms

In [22]:
for _, feature in tqdm(bars.iterrows(), total=bars.shape[0]):
    np.save(os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'harmonix', feature.File + '.npy'), feature.Sub_Divisions)

100%|██████████| 885/885 [01:23<00:00, 10.55it/s]


#### Save path instead of melspec

In [23]:
bars['Sub_Divisions'] = bars['File'].progress_map(lambda file: os.path.join('bars', 'harmonix', file + '.npy'))

100%|██████████| 885/885 [00:00<00:00, 364955.17it/s]


In [24]:
bars['Oversamples'] = bars['File'].progress_map(lambda file: os.path.join('over_bars', 'harmonix', file + '.npy'))

100%|██████████| 885/885 [00:00<00:00, 338836.97it/s]


#### Save metadata

In [25]:
bars = bars[['File', 'Sub_Divisions', 'Binary_Labels', 'Weighted_Labels', 'Weights', 'IDS', 'Beat_times', 'Labels', 'BPM']]
bars.to_pickle(os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'harmonix.p'))