# Create standard subdivisions for Harmonix

## Load libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa

from IPython.display import display, Markdown
plt.style.use('seaborn-darkgrid')
pd.set_option('display.max_columns', None) 

DEBUG = False

## Load local modules

In [2]:
import os
import sys

ROOT = os.path.join(os.getcwd(), '..', '..')

src_dir = os.path.join(ROOT, 'src')
sys.path.append(src_dir)

from constants import *
from sub_divisions import *

## Progress bars!

In [3]:
from tqdm import tqdm
tqdm.pandas()

  from pandas import Panel


## Load dataset

In [4]:
harmonix = pd.read_pickle(os.path.join(ROOT, BEATS_DIR, 'harmonix.p'))
# harmonix = harmonix.head(50)
harmonix.head()

Unnamed: 0,File,Duration,BPM,Time_Signature,Genre,Labels,Sections,Beat_times,Beat_frames
0,0001_12step,142.47,113,4|4,R&B,"[0.0, 8.495567999999999, 25.486704, 42.4753280...","[intro, verse, chorus, verse, chorus, verse, c...","[0.0, 0.5309729999999999, 1.0619459999999998, ...","[0, 11, 22, 34, 45, 57, 68, 80, 91, 102, 114, ..."
1,0003_6foot7foot,157.347,84,4|4,Hip-Hop,"[2.857108, 8.571396, 31.428548, 37.14283599999...","[chorus, verse, chorus, verse, chorus, verse, ...","[2.857108, 3.571394, 4.28568, 4.99996600000000...","[61, 76, 92, 107, 123, 138, 153, 169, 184, 199..."
2,0004_abc,180.955,94,4|4,Pop-Rock,"[2.666656, 28.300542999999998, 58.263180000000...","[verse, chorus, verse, chorus, bridge, chorus,...","[2.666656, 3.238084, 3.952369, 4.597529, 5.242...","[57, 69, 85, 98, 112, 126, 140, 154, 169, 183,..."
3,0006_aint2proud2beg,181.034,105,4|4,R&B,"[0.0, 27.4652, 45.203726, 63.518522999999995, ...","[intro, verse, transition, chorus, verse, brid...","[0.0, 0.572203, 1.144406, 1.716609, 2.288812, ...","[0, 12, 24, 36, 49, 61, 73, 86, 98, 110, 123, ..."
4,0008_america,222.683,136,4|4,Metal,"[3.871208, 10.56504, 33.217138, 56.85190400000...","[intro, verse, verse, bridge, solo/instrumenta...","[3.871208, 4.359011, 4.846814, 5.338616, 5.830...","[83, 93, 104, 114, 125, 136, 146, 156, 166, 17..."


In [5]:
harmonix = harmonix.head(50)

## Load spectrograms

In [6]:
harmonix['Songs'] = harmonix.File.progress_map(lambda f: np.load(os.path.join(ROOT, HARMONIX_MELS, f + '-mel.npy')))

100%|██████████| 50/50 [00:01<00:00, 27.11it/s]


## Helper functions

In [7]:
def sync(row):
    mel = row.Songs
    frames = row.Beat_frames
    return librosa.util.sync(mel, frames) 

#### Smear the labels
This is a form of oversampling, we add triangular weights  
around the ground truth labels and then set the labels to  
true. This function changes quite a bit so it is left in  
the notebook for easier access

In [8]:
def label_smear(row, window):
    full_window = int((window * 2) + 1)
    weights = np.zeros(shape=(row.Binary_Labels.shape))
    labels = row.Binary_Labels.copy()
    truthy = np.where(labels == 1)[0]
    labels[truthy[:-1] + 1] = 1
    weights[truthy[:-1] + 1] = 0.25
    labels[truthy[1:] - 1] = 1
    weights[truthy[1:] - 1] = 0.25
    weights[truthy] = 1
    return labels, weights

## Sync beats

In [9]:
harmonix['Sync'] = harmonix.progress_apply(sync, axis=1)

100%|██████████| 50/50 [00:00<00:00, 129.50it/s]


## Process Frames

### Line-up labels and context windows

#### Check that shape is the same

#### Check that labels line up

### Apply label smearing

#### Check the number of truthy labels

### Save frames

## Process Beats

### Standard scale spectrograms

### Line-up labels and context windows

#### Check that shape is the same

### Apply label smearing
This is a form of oversampling, we add triangular weights  
around the ground truth labels and then set the labels to  
true.

#### Check that labels line up

#### Check the number of truthy labels

### Create ID vector

### Save beats

## Process Bars

In [10]:
bars = harmonix.copy()

In [11]:
bars['Sub_Divisions'] = bars['Sync'].progress_map(lambda t: create_spec_windows(t, 8, 4))

100%|██████████| 50/50 [00:00<00:00, 285.72it/s]


### Standard scale spectrograms

In [12]:
bars['Sub_Divisions'] = bars['Sub_Divisions'].progress_map(normalize)

NameError: name 'scale' is not defined

### Line-up labels and context windows

In [None]:
bars['Binary_Labels'] = bars.progress_apply(lambda t: subdivide_labels(t, 8, 4), axis=1)

#### Check that shape is the same

In [None]:
bars_sample = bars.iloc[1]
bars_sample['Sub_Divisions'].shape, bars_sample['Binary_Labels'].shape

### Apply label smearing

In [None]:
bars['Weighted_Labels'] = bars.progress_apply(lambda t: label_smear(t, 3), axis=1)

In [None]:
bars['Weights'] = bars['Weighted_Labels'].progress_map(lambda t: t[1])

In [None]:
bars['Weighted_Labels'] = bars['Weighted_Labels'].progress_map(lambda t: t[0])

#### Check that labels line up

In [None]:
bars_sample = bars.iloc[0]
truthy = np.where(bars_sample['Binary_Labels'] == 1)[0] * 4
beat_times = bars_sample['Beat_times']
times = beat_times[truthy]
print(f"Beat label times: {times} \nLabel times: {bars_sample['Labels']}")

#### Check the number of truthy labels

In [None]:
bars_sample = bars.iloc[4]
weighted = np.where(bars_sample['Weighted_Labels'] == 1)[0]
normal = np.where(bars_sample['Binary_Labels'] == 1)[0]
weighted.shape, normal.shape, bars_sample['Labels'].shape

### Create IDS vector

In [None]:
bars['IDS'] = bars.progress_apply(lambda r: np.full(r.Binary_Labels.shape, r.File), axis=1)

### Save bars

In [None]:
bars = bars[['File', 'Sub_Divisions', 'Binary_Labels', 'Weighted_Labels', 'Weights', 'IDS', 'Beat_times', 'Labels', 'BPM']]
bars.to_pickle(os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'harmonix.p'))