## Loading Libraries


In [None]:
!pip install import_ipynb

Collecting import_ipynb
  Downloading https://files.pythonhosted.org/packages/63/35/495e0021bfdcc924c7cdec4e9fbb87c88dd03b9b9b22419444dc370c8a45/import-ipynb-0.1.3.tar.gz
Building wheels for collected packages: import-ipynb
  Building wheel for import-ipynb (setup.py) ... [?25l[?25hdone
  Created wheel for import-ipynb: filename=import_ipynb-0.1.3-cp37-none-any.whl size=2976 sha256=f4d15acfc3c58e998d396ee2de228e1f31480966cf6430a73b8f6d4f319c8544
  Stored in directory: /root/.cache/pip/wheels/b4/7b/e9/a3a6e496115dffdb4e3085d0ae39ffe8a814eacc44bbf494b5
Successfully built import-ipynb
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.3


In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import seaborn as sns

import os
import glob

import pandas as pd
import numpy as np

import librosa


import progressbar
import random
import import_ipynb


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=""


FPS = 100
ONSET_PATH = '/content/gdrive/My Drive/randomsample/'

In [None]:
class Dataset(object):
    
    def __init__(self, path, audio_suffix='.wav', annotation_suffix='.onsets'):
        
        self.path = path
        # populate lists containing audio and annotation files
        audio_files = madmom.utils.search_files(self.path + 'audio', audio_suffix)
        annotation_files = madmom.utils.search_files(self.path + '/annotations', annotation_suffix, recursion_depth=1)
        
        # match annotation to audio files
        self.files = []   #name 
        self.audio_files = []   # name of audios location
        self.annotation_files = []   # location of annotation files
        self.ind=[]   # [0,1,2,3,4...]   indexes
        k=0
        for annotation_file in annotation_files:           
            # search matching audio file
            matches = madmom.utils.match_file(annotation_file, audio_files, suffix=annotation_suffix, match_suffix=audio_suffix)
            if len(matches) == 1:
                audio_file = matches[0]
                self.audio_files.append(audio_file)                   
                self.annotation_files.append(annotation_file)
                self.ind.append(k)
                k=k+1
                # save the base name
                self.files.append(os.path.basename(annotation_file[:-len(annotation_suffix)]))
            else:
                warnings.warn('skipping %s, no audio file found' % annotation_file)
        print (self.annotation_files) 
        random.shuffle(self.ind)    #shuffling index
        self.ind=np.array(self.ind)      
        

### Loading Processed Datasets


In [None]:
with open('/content/gdrive/My Drive/onset_db.pkl', 'rb') as f:
    onsets_db = pickle.load(f)


In [None]:
from tensorflow.keras.utils import Sequence

class DataSequence(Sequence):
    
    mask_value = -999  # only needed for batch sizes > 1

    def __init__(self, x, y, batch_size=1, max_seq_length=None, fps=FPS):
        self.x = x
        self.y = [madmom.utils.quantize_events(o, fps=fps, length=len(d))
                  for o, d in zip(y, self.x)]
        self.batch_size = batch_size
        # print(self.batch_size)
        self.max_seq_length = max_seq_length

    def __len__(self):
        return int(np.ceil(len(self.x) / float(self.batch_size)))

    def __getitem__(self, idx):
        # determine which sequence(s) to use
        x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
        y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
        # pad them if needed
        if self.batch_size > 1:
            x = keras.preprocessing.sequence.pad_sequences(
                x, maxlen=self.max_seq_length, dtype=np.float32, truncating='post', value=self.mask_value)
            y = keras.preprocessing.sequence.pad_sequences(
                y, maxlen=self.max_seq_length, dtype=np.int32, truncating='post', value=self.mask_value)
        return np.array(x), np.array(y)[..., np.newaxis]

In [None]:
basedir = 'models/onsets/'

In [None]:
lr = 0.01
print(onsets_db.ind)
print(onsets_db.train)
print(onsets_db.val)
print(onsets_db.test)
train = DataSequence([onsets_db.x[i] for i in onsets_db.train],
                     [onsets_db.annotations[i] for i in onsets_db.train],
                      batch_size=1, max_seq_length=60 * FPS)
                             
val = DataSequence([onsets_db.x[i] for i in onsets_db.val],
                   [onsets_db.annotations[i] for i in onsets_db.val],
                    batch_size=1, max_seq_length=60 * FPS)


[197  69 412 444 191 151 126 179 233  37 137 100 364 426 132 114 360 476
 474 437  23 139 116 404 164  53  25 368 465 187 276  59 248  21 375 348
 334 281  73 323  71 366 260 321 283 409  79 190  19 211 193 274 419 290
 168 308 367 445   5 347 216  99 236 292  31 225 146 256 235 449  65 381
  22 372  50   3 261 241 189 159 282 425  35 433 207 387 119 335  26  42
 329  20  64 153 407 112 222 316 304 286  84 230 370 128 406 384 319 102
 432  41 353 359 262 257  74  75 416  49 106  30 202 302  55 217  83 346
 395 399   7 229  45  67 460  80 455  78  38 109  33  40 439 450 472 243
  36 376  32 258 342 447 457 194 242 184 354 199 280 313 138  92 471 134
 212 303 358 420 446 206 268 273 414 339 272 269 325 461 101 178 270  44
 470 228 220 355 314 267 284 459 351 400 115 410 397 380 396 393 245 305
 204 300  62 124 123 173   2 149 361  29 391  91 251 259  89 110 451 266
 464 333 154 278 392 129 365 443 297 105  10 440 265 186 417 104 401 462
 307 160 477 176 158 311 279  97  85 144 327 122  5

In [None]:
from tensorflow import keras
from tensorflow. keras import Sequential
from tensorflow.keras.layers import Input, SimpleRNN, Bidirectional, Masking, LSTM, Dense

In [None]:
model = keras.Sequential()
model.add(Masking(input_shape=(None, train[0][0].shape[-1]), mask_value=train.mask_value))
model.add(Bidirectional(SimpleRNN(units=25, return_sequences=True)))
model.add(Bidirectional(SimpleRNN(units=25, return_sequences=True)))
model.add(Bidirectional(SimpleRNN(units=25, return_sequences=True)))
model.add(Dense(units=1, activation='sigmoid'))


model.compile(loss=keras.losses.binary_crossentropy,
              optimizer=keras.optimizers.SGD(lr=lr, clipvalue=5, momentum=0.9),
              metrics=['binary_accuracy'])

  "The `lr` argument is deprecated, use `learning_rate` instead.")


In [None]:
verbose=0
name = '%s/lr_%s/fold_%s/' % (basedir, str(lr).replace('.', ''), str(0))

mca = keras.callbacks.ModelCheckpoint(name + 'model_{epoch:02d}.h5', monitor='loss', save_best_only=False, verbose=verbose)
mcb = keras.callbacks.ModelCheckpoint(name + 'model_best.h5', monitor='loss', save_best_only=True, verbose=verbose)
mcv = keras.callbacks.ModelCheckpoint(name + 'model_best_val.h5', monitor='val_loss', save_best_only=True, verbose=verbose)
es = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=20, verbose=verbose)
tb = keras.callbacks.TensorBoard(log_dir=name + 'logs', write_graph=True, write_images=True)

print(name)

models/onsets//lr_001/fold_0/


In [None]:
history = model.fit_generator(train, steps_per_epoch=len(train), epochs=2, shuffle=True,
                              validation_data=val, validation_steps=len(val),
                              callbacks=[mca, mcb, mcv, es, tb])

model.save(name + 'model_final.h5')



Epoch 1/2
Epoch 2/2


In [None]:

outdir = basedir + 'lr_%s_predictions/' % str(lr).replace('.', '')


In [None]:
rnn_peak_picking = madmom.features.onsets.OnsetPeakPickingProcessor(
        threshold=0.35, pre_max=0.001, post_max=0.001, smooth=0.07,combine=0.03,fps=100)

In [None]:
from madmom.processors import ParallelProcessor, SequentialProcessor
from madmom.audio.signal import SignalProcessor, FramedSignalProcessor
from madmom.audio.stft import ShortTimeFourierTransformProcessor
from madmom.audio.spectrogram import FilteredSpectrogramProcessor, LogarithmicSpectrogramProcessor, SpectrogramDifferenceProcessor
        
# define pre-processor
class OnsetPreProcessor(SequentialProcessor):

    def __init__(self, frame_sizes=[1024, 2048, 4096], num_bands=[3, 6, 12]):
        # resample to a fixed sample rate in order to get always the same number of filter bins
        sig = SignalProcessor(num_channels=1, sample_rate=44100)
        # process multi-resolution spec & diff in parallel
        multi = ParallelProcessor([])
        for frame_size, num_bands in zip(frame_sizes, num_bands):
            # split audio signal in overlapping frames
            frames = FramedSignalProcessor(frame_size=frame_size)
            # compute STFT
            stft = ShortTimeFourierTransformProcessor()
            # filter the magnitudes
            filt = FilteredSpectrogramProcessor(num_bands=num_bands)
            # scale them logarithmically
            spec = LogarithmicSpectrogramProcessor()
            # stack positive differences
            diff = SpectrogramDifferenceProcessor(positive_diffs=True, stack_diffs=np.hstack)
            # process each frame size with spec and diff sequentially
            multi.append(SequentialProcessor((frames, stft, filt, spec, diff)))
        # instantiate a SequentialProcessor
        super(OnsetPreProcessor, self).__init__((sig, multi, np.hstack))

# create a callable pre-processor
pp = OnsetPreProcessor()

In [None]:
data=pp('/content/gdrive/My Drive/randomsample/audio/mridanga16_60bpm.wav')

print(data.shape)
print(model.predict(data[np.newaxis, ...]).squeeze().shape)
act = model.predict(data[np.newaxis, ...]).squeeze()
print(act)
det = rnn_peak_picking(act)
print(det)

(1600, 314)
(1600,)
[9.6011579e-01 2.1688700e-02 2.5497973e-02 ... 4.2873621e-04 4.1359663e-04
 2.1741688e-03]
[ 0.    1.49  1.99  2.99  3.99  4.99  5.99  6.99  7.99 10.99 11.99 12.99
 13.49 13.99 14.49 14.99]


In [None]:
def evaluate_onsets(predictions, annotations, verbose=False, ann_suffix='.onsets', det_suffix='.onsets.txt'):
    evals = []
    for ann in annotations:
        name = os.path.basename(ann)
        # get the matching detection files
        matches = madmom.utils.match_file(ann, detections, ann_suffix, det_suffix)
        if len(matches) == 1:
            det = madmom.io.load_onsets(matches[0])
            ann = madmom.io.load_onsets(ann)
            e = madmom.evaluation.onsets.OnsetEvaluation(
                det, ann, combine=0.03, window=0.025, name=name)
            evals.append(e)
        if verbose:
            print(e)
    se = madmom.evaluation.onsets.OnsetSumEvaluation(evals)
    me = madmom.evaluation.onsets.OnsetMeanEvaluation(evals)
    return se, me

In [None]:
detections = madmom.utils.search_files(outdir, '.onsets.txt')
annotations = madmom.utils.search_files(ONSET_PATH + '/annotations', '.onsets')

se, me = evaluate_onsets(detections, annotations)
print(se)
print(me)

