In [1]:
%load_ext autoreload
%autoreload 2
%cd ..

/media/minhduc0711/Libraries/Codes/EURECOM/MALIS_project


In [27]:
from mir_eval.transcription import precision_recall_f1_overlap as eval_notes
from mir_eval.transcription_velocity import precision_recall_f1_overlap as eval_notes_with_velocity
import numpy as np
import torch
import torch.nn as nn
from torchaudio import transforms
from torch.utils.data import DataLoader, Subset

from src.data.datasets import MAPSDataset
from src.data.data_modules import MAPSDataModule
from src.utils import Lambda
from src.models import OnsetsAndFrames

In [3]:
audio_transform = nn.Sequential(
    transforms.MelSpectrogram(
        n_mels=229, hop_length=512, n_fft=2048
    ),
    Lambda(lambda x: torch.log(x))
)
ds = MAPSDataset("data/processed/MAPS_MUS/",
                 subsets=["AkPnBcht"],
                 max_steps=640,
                 audio_transform=audio_transform)

sample = ds[0] 
print(sample["audio_path"])
X, onset_true, frame_true = sample["audio"], sample["onsets"], sample["frames"]

model = OnsetsAndFrames(in_feats=229)
onset_pred, frame_pred, vel_pred = model(X.unsqueeze(0))
X.min(), X.max()

/media/minhduc0711/Libraries/Codes/EURECOM/MALIS_project/data/processed/MAPS_MUS/AkPnBcht/MAPS_MUS-alb_se3_AkPnBcht.wav


(tensor(-17.9050), tensor(5.1918))

In [10]:
dm = MAPSDataModule(batch_size=1, lazy_loading=True)
dm.setup()

sample = next(iter(dm.train_dataloader()))
X, onset_true, frame_true = sample["audio"], sample["onsets"], sample["frames"]

model = OnsetsAndFrames(in_feats=229)
onset_pred, frame_pred, vel_pred = model(X)
onset_pred.max(), onset_pred.min()

(tensor(0.6361, grad_fn=<MaxBackward1>),
 tensor(0.3641, grad_fn=<MinBackward1>))

In [11]:
def extract_notes(onsets, frames, velocity):
    """
    Make sure that a note is produced only when both an onset & one or more frames agree
    """
    onset_threshold=0.5
    frame_threshold=0.5
    onsets = (onsets > onset_threshold).cpu().to(torch.int)
    frames = (frames > frame_threshold).cpu().to(torch.int)

    # squashing adjacent onsets
    onsets = torch.cat([onsets[:1, :], onsets[1:, :] - onsets[:-1, :]], dim=0) == 1
    pitches = []
    # array of (n, 2) - onset and offset times
    intervals = []
    velocities = []

    for onset_t, onset_pitch in onsets.nonzero():
        onset = onset_t.item()
        pitch = onset_pitch.item()

        offset = onset
        while offset < onsets.shape[0] and ( \
                frames[offset, pitch].item() == 1 or \
                onsets[offset, pitch].item() == 1):
            offset += 1
            
        if offset > onset:
            pitches.append(pitch)
            intervals.append([onset, offset])
            velocities.append(velocity[onset, pitch])
            
    sample_rate = 16000
    hop_length = 512
    scale_factor = hop_length / sample_rate
    from mir_eval.util import midi_to_hz
    pitches = np.array([midi_to_hz(p + 21) for p in pitches])
    intervals = np.array(intervals) * scale_factor
    velocities =np.array(velocities)
    
    return pitches, intervals, velocities

In [15]:
pitches_true, intervals_true, vels_true = extract_notes(onset_true.squeeze(), 
                                                        frame_true.squeeze(), 
                                                        sample["velocity"].squeeze())
pitches_pred, intervals_pred, vels_pred = extract_notes(onset_pred.squeeze(),
                                                        frame_pred.squeeze(), 
                                                        sample["velocity"].squeeze())

intervals_true.shape, intervals_pred.shape

((158, 2), (7440, 2))

In [39]:
intervals_pred = np.empty((0, 2))
pitches_pred = np.random.uniform(size=0)
precision_recall_f1_overlap(intervals_true, pitches_true,
                            intervals_pred, pitches_pred, 
                            offset_ratio=None)

(0.0, 0.0, 0.0, 0.0)

In [31]:
eval_notes_with_velocity(intervals_true, pitches_true, vels_true,
                            intervals_pred, pitches_pred, np.array([]),
                            offset_ratio=None)

(0.0, 0.0, 0.0, 0.0)

In [35]:
np.empty((0,2))

array([], shape=(0, 2), dtype=float64)

In [2]:
import torch
a = torch.randn(32, 2, 2)
b = torch.randn(32, 2, 2)
for x, y in zip(a,b):
    print(x.shape, y.shape)
    break

torch.Size([2, 2]) torch.Size([2, 2])


In [8]:
for i in a[0][0]:
    print(type(i), i)

<class 'torch.Tensor'> tensor(-0.2453)
<class 'torch.Tensor'> tensor(-0.7164)
