## beatThis (i need madmom if i want to apply DBN at the end)

downbeats = 1 3 5 7

upbeats   = 2 4 6 8

In [2]:
import numpy as np
np.set_printoptions(precision = 3, suppress = True)

from beat_this.inference import File2Beats, Audio2Frames
from beat_this.model.postprocessor import Postprocessor
# from madmom.features. import DBNBeatTrackingProcessor


import torchaudio
from pathlib import Path
from IPython.display import Audio

import warnings
warnings.filterwarnings("ignore", category = UserWarning)

annotatedBeats = 1 3 5 7

| Song # | The old model | The new model, minimal type | The new model, dbn type | 
| - | - | - | - | 
| #37 | oddBeats[::2] (+/-)= 1 3 5 7 => oddBeats = 1-8 | same as the DBN type| beats = 1-8, downbeats = 1 5|  
| #76 | oddBeats (+/-)= 1 3 5 7 | same as DBN | beats = 1 3 5 7, downbeats = 1 | 
| #96 | evenBeats[::2] = 1, 3, 5, 7 => evenBeats = 1-8 => oddBeats = 0.5 1.5 ... 7.5 (somehow) | ??? looks a bit disastruous - at some moment it just starts identifying non-existent beats & downbeats, and i'm not sure why | beats = +/- 1 3 5 7, except for some random beats it gets quite a big error (~20%); same for downbeats|

In [3]:
N = 96
songPath = Path(f"theSalsaProject/audioFragments/{N}.wav")
song, sr = torchaudio.load(songPath)

annotatedBeats = np.loadtxt(songPath.parent.parent / Path(f"beatAnnotations/{N}.txt")) / 1000
annotatedBeats = annotatedBeats[annotatedBeats < 30]

Audio(filename = songPath)

In [4]:
audio2frames = Audio2Frames()
dbnPostProc = Postprocessor(type = 'dbn')
minPostProc = Postprocessor(type = 'minimal')
beatLogits, downBeatLogits = audio2frames(song.T, sr)
beatsDBN, downBeatsDBN = dbnPostProc(beatLogits, downBeatLogits)
beatsMin, downBeatsMin = minPostProc(beatLogits, downBeatLogits)

print(f"The shapes are: \n beatsDBN \t -> {beatsDBN.shape} \n downBeatsDBN \t -> {downBeatsDBN.shape} \n beatsMin \t -> {beatsMin.shape} \n downBeatsMin \t -> {downBeatsMin.shape} \n annotatedBeats\t -> {annotatedBeats.shape}")

startTime    = annotatedBeats[0]
delta        = annotatedBeats[1] - annotatedBeats[0]
cutoff       = startTime - delta / 2
beatsDBN     = beatsDBN[beatsDBN >= cutoff]
downBeatsDBN = downBeatsDBN[downBeatsDBN >= cutoff]
beatsMin     = beatsMin[beatsMin >= cutoff]
downBeatsMin = downBeatsMin[downBeatsMin >= cutoff]

print(f"\nEstablished a cutoff at {cutoff:.3f} seconds with a delta of {delta:.3f} seconds.")
print(f"The new shapes are: \n beatsDBN \t -> {beatsDBN.shape} \n downBeatsDBN \t -> {downBeatsDBN.shape} \n beatsMin \t -> {beatsMin.shape} \n downBeatsMin \t -> {downBeatsMin.shape} \n annotatedBeats\t -> {annotatedBeats.shape}")

The shapes are: 
 beatsDBN 	 -> (43,) 
 downBeatsDBN 	 -> (11,) 
 beatsMin 	 -> (58,) 
 downBeatsMin 	 -> (19,) 
 annotatedBeats	 -> (33,)

Established a cutoff at 7.650 seconds with a delta of 0.591 seconds.
The new shapes are: 
 beatsDBN 	 -> (33,) 
 downBeatsDBN 	 -> (9,) 
 beatsMin 	 -> (48,) 
 downBeatsMin 	 -> (14,) 
 annotatedBeats	 -> (33,)


In [5]:
# interleave annotatedBeats, beatsMin (they're sorted), and for each number store the source array
annotatedBeats, beatsMin
# np.array(sorted([(t, 'annot') for t in annotatedBeats] + [(t, 'min') for t in beatsMin]), dtype = object)

(array([ 7.945,  8.536,  9.189,  9.863, 10.539, 11.239, 11.873, 12.576,
        13.232, 13.932, 14.599, 15.271, 15.976, 16.652, 17.305, 17.996,
        18.681, 19.385, 20.033, 20.709, 21.391, 22.092, 22.761, 23.431,
        24.099, 24.833, 25.489, 26.165, 26.879, 27.532, 28.193, 28.885,
        29.567]),
 array([ 7.82,  8.5 ,  9.18,  9.86, 10.54, 11.22, 11.74, 11.9 , 12.56,
        13.08, 13.24, 13.76, 13.92, 14.44, 14.6 , 15.3 , 15.8 , 15.96,
        16.48, 16.64, 17.16, 17.32, 17.84, 18.02, 18.52, 18.7 , 19.38,
        20.06, 20.74, 21.42, 22.1 , 22.62, 22.78, 23.46, 23.98, 24.14,
        24.82, 25.36, 25.5 , 26.18, 26.72, 26.86, 27.38, 27.54, 28.06,
        28.24, 28.76, 29.44]))

In [6]:
np.searchsorted(annotatedBeats, beatsMin)

array([ 0,  1,  2,  3,  5,  5,  6,  7,  7,  8,  9,  9,  9, 10, 11, 12, 12,
       12, 13, 13, 14, 15, 15, 16, 16, 17, 17, 19, 20, 21, 22, 22, 23, 24,
       24, 25, 25, 26, 27, 28, 28, 28, 29, 30, 30, 31, 31, 32])

In [7]:
print("Biggest deviation occurs at ", np.abs(annotatedBeats - beatsDBN).argmax())
print(f"It represents {np.abs(annotatedBeats - beatsDBN).max() / delta * 100:.2f}% of a beat interval")
annotatedBeats - beatsDBN

Biggest deviation occurs at  30
It represents 22.50% of a beat interval


array([ 0.125,  0.036, -0.011,  0.003, -0.001,  0.019, -0.007,  0.016,
       -0.008,  0.012, -0.001, -0.029,  0.016,  0.012, -0.035, -0.024,
       -0.019,  0.005, -0.007, -0.031, -0.029, -0.008, -0.019, -0.029,
       -0.041,  0.013, -0.011, -0.015,  0.019,  0.092,  0.133,  0.125,
        0.127])