In [1]:
# get the music transformer implementation
!git clone https://github.com/jason9693/MusicTransformer-pytorch.git music_transformer
!cp -a ./music_transformer/. ./


# install dependencies
!pip install --upgrade -q mido fairseq transformers pretty-midi tensorboardX progress
!pip install -r requirements.txt

# fetch the dataset
!wget https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0.json

Cloning into 'music_transformer'...
remote: Enumerating objects: 217, done.[K
remote: Counting objects: 100% (66/66), done.[K
remote: Compressing objects: 100% (26/26), done.[K
remote: Total 217 (delta 49), reused 40 (delta 40), pack-reused 151[K
Receiving objects: 100% (217/217), 491.55 KiB | 12.94 MiB/s, done.
Resolving deltas: 100% (117/117), done.
[K     |████████████████████████████████| 51 kB 5.4 MB/s 
[K     |████████████████████████████████| 1.7 MB 28.6 MB/s 
[K     |████████████████████████████████| 4.0 MB 49.0 MB/s 
[K     |████████████████████████████████| 5.6 MB 22.6 MB/s 
[K     |████████████████████████████████| 125 kB 54.3 MB/s 
[K     |████████████████████████████████| 90 kB 5.8 MB/s 
[K     |████████████████████████████████| 147 kB 45.9 MB/s 
[K     |████████████████████████████████| 84 kB 3.2 MB/s 
[K     |████████████████████████████████| 596 kB 52.4 MB/s 
[K     |████████████████████████████████| 880 kB 51.9 MB/s 
[K     |█████████████████████████████

In [2]:
!nvidia-smi

Thu May 12 10:52:35 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P0    23W / 300W |      0MiB / 16160MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
from music_transformer.model import MusicTransformer
import custom
from custom.metrics import *
from custom.criterion import SmoothCrossEntropyLoss, CustomSchedule
from custom.config import config
from data import Data
from mido import MidiFile, MidiTrack, Message, MetaMessage

import utils
import datetime
import time
import json
import pickle
import glob
import os

from enum import IntEnum
from os import path
from collections import deque

import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
BASE_PATH = "/content"
DATASET_PATH = "maestro-v3.0.0"

with open("maestro-v3.0.0.json", "r") as index:
  index_js = json.load(index)

def get_path(index: int) -> str:
  return path.join(BASE_PATH, DATASET_PATH, index_js["midi_filename"][str(index)])

In [6]:
# import serializer definition
%run /content/gdrive/MyDrive/Colab\ Notebooks/serializer-chord.ipynb

In [7]:
MAX_TOKENS = 512

In [8]:
class MaestroDataset(Dataset):

    def __init__(self, ds_path: str, device: str):
        self.ds_path = ds_path
        self.length = len(glob.glob(path.join(self.ds_path, "*.pt")))
        self.device = device


    def __len__(self):
        return self.length

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        if isinstance(idx, int):
          idx = [idx]

        tensors = []
        for i in idx:
          ts_path = os.path.join(self.ds_path, str(i) + ".pt")
          tsor = torch.load(ts_path, map_location=self.device)[:MAX_TOKENS+1]
          if tsor[-1] != SpecialTokens.PADDING.value:
            tsor[-1] = SpecialTokens.END.value
          tensors.append(tsor)

        return torch.cat(tensors)

In [9]:
DS_TYPE = "chord" # chord or single
DS_PATH = "/content/gdrive/MyDrive/BP/datasets/MAESTRO-" + DS_TYPE
BATCH_SIZE = 1
with open(path.join(DS_PATH, "serializer.pickle"), "rb") as f:
  serializer = pickle.load(f)
train_ds = MaestroDataset(path.join(DS_PATH, "train"), "cuda:0")
test_ds = MaestroDataset(path.join(DS_PATH, "test"), "cuda:0")
valid_ds = MaestroDataset(path.join(DS_PATH, "validation"), "cuda:0")

In [10]:
training_loader = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

In [11]:
# load model
device = 'cuda:0'

EMBEDDING_DIM = 256
LABEL_SMOOTH = 0.1

DEBUG = 'true'
config.pad_token = SpecialTokens.PADDING.value
# define model
model = MusicTransformer(
            embedding_dim=EMBEDDING_DIM,
            vocab_size=serializer.vocab_size(),
            num_layer=6,
            max_seq=MAX_TOKENS,
            dropout=0.1,
            debug=DEBUG,
            loader_path=None
)
model.to(device)
opt = optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)
scheduler = CustomSchedule(EMBEDDING_DIM, optimizer=opt)


# init train metric set
metric_set = MetricsSet({
    'accuracy': CategoricalAccuracy(),
    'loss': SmoothCrossEntropyLoss(LABEL_SMOOTH, serializer.vocab_size(), SpecialTokens.PADDING.value)
})

In [12]:
EPOCHS = 100

In [13]:
epoch = -1
cumulative_epoch_time = 0

In [14]:
def train():
    model.train()
    total_loss = 0.
    total_acc = 0.

    for batch in training_loader:
        batch_x = batch[:, :-1].contiguous()
        batch_y = batch[:, 1:].contiguous()
        scheduler.optimizer.zero_grad()
        sample = model.forward(batch_x)

        metrics = metric_set(sample, batch_y)
        loss = metrics['loss']
        loss.backward()
        scheduler.step()

        total_acc += float(metrics['accuracy'])
        total_loss += loss.item()

    return total_loss / len(training_loader), total_acc / len(training_loader)


def evaluate(eval_model, data_source):
    eval_model.test() # Turn on the evaluation mode
    eval_model.infer = False
    total_loss = 0.
    total_acc = 0.
    vocab_size = serializer.vocab_size()
    with torch.no_grad():
        for eval_batch in data_source:
            eval_x = eval_batch[:, :-1].contiguous()
            eval_y = eval_batch[:, 1:].contiguous()
            output = eval_model.forward(eval_x)[0]

            metrics = metric_set(output, eval_y)
            total_loss += metrics['loss'].item()
            total_acc += float(metrics['accuracy'])

    return total_loss / len(data_source), total_acc / len(data_source)

In [None]:
metric_log_path = '/content/gdrive/MyDrive/BP/logs/music-transformer/' + DS_TYPE + '/training.csv'
time_log_path = '/content/gdrive/MyDrive/BP/logs/music-transformer/' + DS_TYPE + '/time.csv'
best_test_loss = float("inf")
best_model = None
best_model_epoch = epoch

# Train Start
for _ in range(EPOCHS):
    epoch += 1
    epoch_start_time = time.time()
    train_loss, train_accuracy = train()
    test_loss, test_accuracy = evaluate(model, test_loader)
    time_elapsed = time.time() - epoch_start_time
    cumulative_epoch_time += time_elapsed
    print('-' * 112)
    print('| end of epoch {} | time: {:.2f}s | train loss {:.2f} | train accuracy {:.2f} | test loss {:.2f} | test accuracy {:.2f} |'.format(epoch+1, time_elapsed, train_loss, train_accuracy, test_loss, test_accuracy))
    print('-' * 112)

    # write csv metric information
    with open(metric_log_path, "a") as logf:
      logf.write(",".join([str(epoch+1), str(train_loss), str(train_accuracy), str(test_loss), str(test_accuracy)]) + "\n")
    
    # write learning time information
    gpu_name = torch.cuda.get_device_name(torch.device('cuda:0'))
    with open(time_log_path, "a") as logf:
      logf.write(",".join([str(epoch+1), str(cumulative_epoch_time), gpu_name]) + "\n")


    if test_loss < best_test_loss:
        best_test_loss = test_loss
        best_model = model
        best_model_epoch = epoch

    scheduler.step()
    torch.save(model.state_dict(), '/content/gdrive/MyDrive/BP/checkpoints/music-transformer/' + DS_TYPE + '/epoch-{}.pth'.format(epoch))

torch.save(model.state_dict(), '/content/gdrive/MyDrive/BP/checkpoints/music-transformer/' + DS_TYPE + '/epoch-{}-final.pth'.format(epoch))
torch.save(best_model.state_dict(), '/content/gdrive/MyDrive/BP/checkpoints/music-transformer/' + DS_TYPE + '/best-model-epoch-{}.pth'.format(best_model_epoch))

----------------------------------------------------------------------------------------------------------------
| end of epoch 51 | time: 307.83s | train loss 4.10 | train accuracy 0.40 | test loss 5.08 | test accuracy 0.31 |
----------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------
| end of epoch 52 | time: 305.10s | train loss 4.09 | train accuracy 0.40 | test loss 5.06 | test accuracy 0.31 |
----------------------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------------------
| end of epoch 53 | time: 304.85s | train loss 4.09 | train accuracy 0.40 | test loss 5.07 | test accuracy 0.31 |
---------------------------------------------------------------------------------------------

In [None]:
# fetch the dataset
!wget https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
!unzip -q maestro-v3.0.0-midi.zip

--2022-05-11 13:54:47--  https://storage.googleapis.com/magentadata/datasets/maestro/v3.0.0/maestro-v3.0.0-midi.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.31.128, 142.251.18.128, 142.250.153.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.31.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 58416533 (56M) [application/octet-stream]
Saving to: ‘maestro-v3.0.0-midi.zip’


2022-05-11 13:54:51 (32.0 MB/s) - ‘maestro-v3.0.0-midi.zip’ saved [58416533/58416533]



In [22]:
# run inference
import numpy as np
config.threshold_len = 500 # deprecated value, that is required by model but not used

best_model.test()
inputs = np.array([[SpecialTokens.START.value]])
inputs = torch.from_numpy(inputs).to(device)
result = best_model(inputs, MAX_TOKENS)

CONFIG_FILE_NAME = save.yml
pad_token = 0
threshold_len = 500
512


In [23]:
generated = serializer.deserialize(result)
generated.save('/content/generated.midi')