In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchmetrics as metrics
import numpy as np
from preprocessing.audio_preprocessor import load_metadata
from preprocessing.data_preprocessor import MaestroDataset
from models.baseline import Baseline

saved_var_path = "D:/dlp/"
data_path = "data/maestro-v3.0.0/"
meta = load_metadata(data_path + 'maestro-v3.0.0.json')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
params = {'batch_size': 1,
          'shuffle': True,
          'num_workers': 0}



In [2]:
def data_idx():
    train_idx, test_idx, val_idx = [], [], []
    for i in range(len(meta['duration'])):
        if meta['split'][str(i)] == 'train':
            train_idx.append(i)
        if meta['split'][str(i)] == 'test':
            test_idx.append(i)
        if meta['split'][str(i)] == 'val':
            val_idx.append(i)
    return np.array(train_idx), np.array(test_idx), np.array(val_idx)

train_idx, test_idx, val_idx = data_idx()

In [3]:
train_set = MaestroDataset(train_idx, meta, data_path, 512, audio_preprocessor.SPECTROGRAM)
training_generator = torch.utils.data.DataLoader(train_set, **params)

In [4]:
# calculate the event vocab size: 3 + 88 + 128 + np.round(time per sequence * 100)
vocab_size = 3 + 88 + 128 + int(np.round(4.088 * 100))
model = Baseline(vocab_size=vocab_size).to(device)

optimizer = optim.Adam(model.parameters())
loss_function = nn.CrossEntropyLoss()

In [5]:
def train(model, loss_function, optimizer):
    model.train()
    loss = 0
    for i in range(100):
        for batch, sample_batched in enumerate(training_generator):
            X = sample_batched['spectrogram'].to(device)
            y = sample_batched['midi'].to(device)
            src_pad_mask = sample_batched['src_pad_mask'].to(device)
            tgt_pad_mask = sample_batched['tgt_pad_mask'].to(device)
            # zero out the parameter gradients
            optimizer.zero_grad()
            # do forward pass with current batch of input
            outs = model(X, y, src_key_padding_mask=src_pad_mask, tgt_key_padding_mask=tgt_pad_mask)
            # get loss with model predictions and true labels
            # we permute in order to fit the cross entropy expected shape of (batch, classes, d_1)
            # our sequence length technically isn't multidimensional but this says it's fine: https://stackoverflow.com/questions/69367671/correct-use-of-cross-entropy-as-a-loss-function-for-sequence-of-elements
            loss = loss_function(outs.permute(0, 2, 1), y)
            # update model parameters
            loss.backward()
            optimizer.step()
        if i % 10 == 0:
            log = "Fake epoch " + str(i) + ": loss: " + str(loss)
            print(log)
            torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, "model")



In [6]:
def test(model, loss_function):
    model.eval()
    correct = 0
    total = 0
    for i in range(2000):
        X, y = sample_test_batch()
        outs = model(X, y)
        pred = torch.argmax(outs, dim=2)
        y = torch.argmax(y, dim=2)
        correct += torch.eq(pred, y).sum()
        total += X.shape[0]
    print(correct )
    print(total)
    print(correct / total)


In [None]:
train(model, loss_function, optimizer)

In [None]:
checkpoint = torch.load("model")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
loss = checkpoint['loss']

#test(model, loss_function)
#model.eval()
# X, y = sample_train_batch()
# outs = model(X, y)
# m = nn.Softmax()
# soft = m(outs)
# pred = torch.argmax(outs, dim=2)
# print(soft)
# print(pred)
# print(torch.argmax(y, dim=2))
# print(outs)
#
# del soft
# del pred
# del outs
# del X
# del y
# torch.cuda.empty_cache()

In [None]:
checkpoint = torch.load("model")
model.load_state_dict(checkpoint['model_state_dict'])

X = np.load(saved_var_path + "test/spec/test_spec_6.npy")
y = np.load(saved_var_path + 'test/midi/train_midi_6.npy')

out_seq = []

for i in range(X.shape[0]):
    b = torch.nn.functional.one_hot(torch.tensor(y[i]).to(torch.int64), num_classes=512)
    b = torch.unsqueeze(b, dim=0)
    print(b)
    x = torch.unsqueeze(torch.tensor(X[i]), dim=0)
    #print(x)
    outs = model(torch.tensor(x).to(device), b.to(torch.float32).to(device))
    print(outs)
    pred = torch.argmax(outs, dim=2)
    #print(pred.shape)
    #print(pred)
    out_seq.extend(pred)



In [None]:
print(len(out_seq))
print(out_seq)
print(out_seq[0])
for i in out_seq[0]:
    print(i, ' ')