In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pickle
from scipy.sparse import csr_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F

import IPython.display as ipd
from librosa import clicks
from scipy.io import wavfile

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from python.postprocessing_boeck import activations2beats
from python.models import ModelBoeck

import madmom

In [2]:
print('Torch version: {}'.format(torch.__version__))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Device: %s' % (device))

Torch version: 1.0.0
Device: cpu


## Load data

In [11]:
features = pickle.load(open('../data/pickle/ballroom_features_boeck.npy', 'rb'))
labels = pickle.load(open('../data/pickle/ballroom_labels_boeck.npy', 'rb'))

In [4]:
files = sorted(os.listdir('../data/audio/Ballroom/'))
annotations = sorted(os.listdir('../data/annotations/Ballroom/'))
annotated_beats = [np.loadtxt('../data/annotations/Ballroom/' + ann, ndmin=2)[:, 0] for ann in annotations]

## Select fold

In [4]:
fold = 0

indices_test = np.loadtxt('../data/splits/Ballroom/indices_'+str(fold)+'.fold', dtype=np.int64)

## Model

In [8]:
model = ModelBoeck()
model.load_state_dict(torch.load('../models/mb_ballroom_bs-100_fold-0.pt', map_location=device))
model.eval()

ModelBoeck(
  (lstm): LSTM(120, 25, num_layers=3, batch_first=True, bidirectional=True)
  (fc): Linear(in_features=50, out_features=2, bias=True)
)

## Calculate predictions

In [13]:
for i, example in enumerate(indices_test):
    
    print('Evaluate file {} of {}'.format(i+1, len(indices_test)), end="\r")

    test_input = features[example].view(1, len(features[example]),-1)

    with torch.no_grad():
        out = model(test_input)

    activations = np.exp(np.array(out[0,1,:]))
    
    pred_beats = activations2beats(activations)
    
    np.savetxt('../data/predictions/fold_'+str(fold)+'/'+files[example][:-4]+'.beats', 
               pred_beats, delimiter=',',fmt='%.2f',)

Evaluete file 87 of 87

## Load predictions

In [5]:
predictions = sorted(os.listdir('../data/predictions/fold_'+str(fold)+'/'))
pred_beats = [np.loadtxt('../data/predictions/fold_'+str(fold)+'/'+ pred) for pred in predictions]

In [6]:
p_scores = []
for i, example in enumerate(indices_test):
    pred = pred_beats[i]
    true = annotated_beats[example]
    p_scores.append(madmom.evaluation.beats.pscore(pred,true))

NameError: name 'annotated_beats' is not defined

In [7]:
pred_beats[0]

array([ 0.23,  0.83,  1.43,  2.04,  2.64,  3.24,  3.84,  4.44,  5.04,
        5.64,  6.24,  6.84,  7.47,  8.04,  8.65,  9.26,  9.86, 10.46,
       11.06, 11.66, 12.26, 12.86, 13.45, 14.05, 14.65, 15.26, 15.87,
       16.47, 17.07, 17.68, 18.29, 18.89, 19.49, 20.09, 20.69, 21.24,
       21.82, 22.34, 22.94, 23.54, 24.14, 24.74, 25.34, 25.94, 26.54,
       27.14, 27.74, 28.34, 28.94, 29.54, 30.14])

In [30]:
np.mean(p_scores)

0.517583715713869

In [62]:
files = sorted(os.listdir('../data/audio/Ballroom/'))

sr, signal = wavfile.read('../data/audio/Ballroom/' + files[example], mmap=False)

signal = signal/np.max(signal)

metronome = clicks(beats, sr=sr, length=len(signal))

test_signal = signal + metronome

ipd.Audio(test_signal, rate=sr)

In [63]:
madmom.evaluation.beats.pscore(pred_beats,true_beats)

0.125

In [12]:
test_input = features[300].view(1, len(features[300]),-1)

with torch.no_grad():
    out = model(test_input)

activations = np.exp(np.array(out[0,1,:]))
    
pred_beats = activations2beats(activations)