# Create standard subdivisions for Salami

## Load libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import os

ROOT = os.path.join(os.getcwd(), '..', '..')

## Load custom modules

In [2]:
from nnssa.constants import *
from nnssa.sub_divisions import *

## Progress bars!

In [3]:
from tqdm import tqdm
tqdm.pandas()

  from pandas import Panel


## Load dataset

In [4]:
salami = pd.read_pickle(os.path.join(ROOT, BEATS_DIR, 'salami.p'))
salami.head()

Unnamed: 0,File,Song_duration,Song_title,Artist,Format,Labels,Sections,Beat_times,Beat_frames
0,956,320.0,Revolution,Burden_Hand,mp3,"[28.746303854, 49.357959183, 91.03056689299999...","[intro, verse, chorus, transition, verse, chor...","[0.0, 2.5541950113378684, 3.9009523809523814, ...","[0, 55, 84, 107, 131, 163, 187, 212, 238, 266,..."
1,958,344.0,Rap_Medley,5point,mp3,"[0.045, 26.129208333, 71.4750625, 116.7300625,...","[intro, verse, verse, verse, transition, verse...","[0.0, 0.18575963718820865, 0.8823582766439909,...","[0, 4, 19, 34, 49, 64, 79, 93, 109, 124, 138, ..."
2,960,293.0,Woman_Across_The_River,Absolute_Gruv,mp3,"[0.048979590999999996, 76.260068027, 108.70204...","[live, solo/instrumental, bridge, solo/instrum...","[0.0, 0.18575963718820865, 1.3931972789115643,...","[0, 4, 29, 56, 82, 107, 135, 163, 191, 219, 24..."
3,962,579.0,_,Afrissippi,mp3,"[15.18585034, 56.47031746, 104.25839002200001,...","[intro, verse, verse, solo/instrumental, verse...","[0.0, 0.3715192743764173, 1.0216780045351477, ...","[0, 8, 22, 36, 50, 65, 79, 93, 107, 122, 135, ..."
6,968,377.0,Quinn_the_Eskimo_The_Mighty_Quinn,Al_and_the_Transamericans,mp3,"[0.865306122, 6.802834467, 27.484149659, 47.89...","[intro, verse, chorus, verse, chorus, transiti...","[0.0, 0.4643990929705216, 1.7182766439909296, ...","[0, 10, 37, 65, 93, 123, 151, 179, 207, 236, 2..."


In [5]:
salami = salami.head(50)

## Load spectrograms

In [6]:
salami['Songs'] = salami.File.progress_map(lambda f: np.load(os.path.join(ROOT, SALAMI_MELS, f'{f}-mel.npy')))

100%|██████████| 50/50 [00:00<00:00, 668.89it/s]


## Helper functions

In [7]:
def sync(row):
    mel = row.Songs
    frames = row.Beat_frames
    return librosa.util.sync(mel, frames) 

#### Smear the labels
This is a form of oversampling, we add triangular weights  
around the ground truth labels and then set the labels to  
true. This function changes quite a bit so it is left in  
the notebook for easier access

In [8]:
def label_smear(row):
    weights = np.ones(shape=(row.Binary_Labels.shape))
    labels = row.Binary_Labels.copy()
    truthy = np.where(labels == 1)[0]
    for t in (truthy - 1):
        if (t > 0):
            labels[t] = 2
            weights[t] = 0.5
    for t in (truthy + 1):
        if (t  < len(labels)):
            labels[t] = 1
            weights[t] = 0.5
    mid_segments = (truthy[1:] + truthy[:-1]) / 2
    for l in ((truthy[1:] + truthy[:-1]) / 2):
        labels[t] = -1
    labels[truthy] = 1
    weights[truthy] = 3
    return labels, weights

## Sync beats

In [9]:
salami['Sync'] = salami.progress_apply(sync, axis=1)

100%|██████████| 50/50 [00:00<00:00, 181.69it/s]


## Process Frames

### Line-up labels and context windows

#### Check that shape is the same

#### Check that labels line up

### Apply label smearing

#### Check the number of truthy labels

### Save frames

## Process Beats

### Standard scale spectrograms

### Line-up labels and context windows

#### Check that shape is the same

### Apply label smearing
This is a form of oversampling, we add triangular weights  
around the ground truth labels and then set the labels to  
true.

#### Check that labels line up

#### Check the number of truthy labels

### Create ID vector

### Save beats

## Process Bars

In [10]:
bars = salami.copy()

In [11]:
bars['Sub_Divisions'] = bars['Sync'].progress_map(lambda t: create_spec_windows(t, 16, 4))

100%|██████████| 50/50 [00:00<00:00, 252.28it/s]


### Standard scale spectrograms

In [12]:
bars['Sub_Divisions'] = bars['Sub_Divisions'].progress_map(normalize)

100%|██████████| 50/50 [00:00<00:00, 71.90it/s]


### Line-up labels and context windows

In [13]:
bars['Binary_Labels'] = bars.progress_apply(lambda t: subdivide_labels(t, 16, 4), axis=1)

100%|██████████| 50/50 [00:00<00:00, 4485.89it/s]


#### Check that shape is the same

In [14]:
bars_sample = bars.iloc[1]
bars_sample['Sub_Divisions'].shape, bars_sample['Binary_Labels'].shape

((124, 80, 4, 33), (124,))

### Apply label smearing

In [15]:
bars['Weighted_Labels'] = bars.progress_apply(label_smear, axis=1)

100%|██████████| 50/50 [00:00<00:00, 7477.28it/s]


In [16]:
bars['Weights'] = bars['Weighted_Labels'].progress_map(lambda t: t[1])

100%|██████████| 50/50 [00:00<00:00, 122568.79it/s]


In [17]:
bars['Weighted_Labels'] = bars['Weighted_Labels'].progress_map(lambda t: t[0])

100%|██████████| 50/50 [00:00<00:00, 91738.93it/s]


#### Check that labels line up

In [18]:
bars_sample = bars.iloc[0]
truthy = np.where(bars_sample['Binary_Labels'] == 1)[0] * 4
beat_times = bars_sample['Beat_times']
times = beat_times[truthy]
print(f"Beat label times: {times} \nLabel times: {bars_sample['Labels']}")

Beat label times: [ 25.82058957  49.50494331  88.51446712 132.30730159 151.53342404
 191.65750567 230.43482993 258.99537415] 
Label times: [ 28.74630385  49.35795918  91.03056689 132.45095238 152.5884127
 192.22426304 231.20195011 259.62206349]


#### Check the number of truthy labels

In [19]:
bars_sample = bars.iloc[4]
weighted = np.where(bars_sample['Weighted_Labels'] == 1)[0]
normal = np.where(bars_sample['Binary_Labels'] == 1)[0]
weighted.shape, normal.shape, bars_sample['Labels'].shape

((31,), (17,), (17,))

### Create IDS vector

In [20]:
bars['IDS'] = bars.progress_apply(lambda r: np.full(r.Binary_Labels.shape, r.File), axis=1)

100%|██████████| 50/50 [00:00<00:00, 17210.93it/s]


### Save bar mel spectrograms

In [21]:
for _, feature in tqdm(bars.iterrows(), total=bars.shape[0]):
    np.save(os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'salami', f'{feature.File}.npy'), feature.Sub_Divisions)

100%|██████████| 50/50 [00:00<00:00, 81.40it/s]


#### Save path instead of melspec

In [22]:
bars['Sub_Divisions'] = bars['File'].progress_map(lambda file: os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'salami', f'{file}.npy'))

100%|██████████| 50/50 [00:00<00:00, 87272.24it/s]


#### Save metadata

In [23]:
bars = bars[['File', 'Sub_Divisions', 'Binary_Labels', 'Weighted_Labels', 'Weights', 'IDS', 'Beat_times', 'Labels']]
bars.to_pickle(os.path.join(ROOT, SUB_DIVS_DIR, 'bars', 'salami.p'))