In [1]:
import numpy as np
import torch
import os
from tqdm.notebook import tqdm

from deepnote import MusicRepr, Constants
from importlib import reload

from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor

seed_everything(42)

  rank_zero_deprecation(
Global seed set to 42


42

## Data

In [2]:
const = Constants(unit=4, num_tempo_bins=20, num_velocity_bins=20)

data_config = {
    'data_dir' : '/home/soroosh/data/MIDI/pop909/train/',
    'const' : const,
    'instruments' : ['piano'],
    'mode' : 'remi',
    'max_files' : 2,
    'window_len' : 1024,
    'n_jobs' : 20
}

name = 'remi-small-linear-pop909-win1024'
print('model name:',name)

model name: remi-small-linear-pop909-win1024


In [3]:
import src.data
reload(src.data)
from src.data import MidiDataset

dataset = MidiDataset(**data_config)
n = len(dataset)
n

  0%|          | 0/2 [00:00<?, ?it/s]

13783

In [4]:
from torch.utils.data import DataLoader, random_split

t = int(0.1 * n)
td, vd = random_split(dataset, [n-t, t])
tl = DataLoader(dataset=td, batch_size=16, pin_memory=True, shuffle=True, num_workers=4, collate_fn=dataset.fn)
vl = DataLoader(dataset=vd, batch_size=32, pin_memory=True, shuffle=False, num_workers=4, collate_fn=dataset.fn)

In [5]:
b = next(iter(tl))
for k in b:
    print(k, b[k].shape)

X torch.Size([16, 1023])
X_len torch.Size([16])
labels torch.Size([16, 1023])


## Model

In [6]:
import src.config
reload(src.config)
from src.config import make_config

from transformers import GPT2Config

# config = make_config(
#     const,
#     mode='remi',
#     model='transformer',
#     d_model=256, 
#     dropout=0.1, 
#     lr=2e-4,
#     tie_emb=False,
#     pos_emb=True, 
#     n_layer=4, 
#     n_head=8, 
#     d_inner=512, 
#     activation='gelu'
# )
config = {
    'lr' : 1e-4,
    'transformer': GPT2Config(
        vocab_size=len(const.all_tokens),
        n_positions=1024,
        n_ctx=1024,
        n_embd=256,
        n_layer=4,
        n_head=8,
        n_inner=512
    )
}
config

{'lr': 0.0001,
 'transformer': GPT2Config {
   "activation_function": "gelu_new",
   "attn_pdrop": 0.1,
   "bos_token_id": 50256,
   "embd_pdrop": 0.1,
   "eos_token_id": 50256,
   "gradient_checkpointing": false,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
   "n_ctx": 1024,
   "n_embd": 256,
   "n_head": 8,
   "n_inner": 512,
   "n_layer": 4,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
   "summary_proj_to_labels": true,
   "summary_type": "cls_index",
   "summary_use_proj": true,
   "transformers_version": "4.5.1",
   "use_cache": true,
   "vocab_size": 351
 }}

In [7]:
from src.models.remi import RemiLinearTransformer, RemiHFTransformer

model = RemiHFTransformer(config)
# model = RemiLinearTransformer.load_from_checkpoint(f'weights/{name}/last.ckpt', config=config, is_training=True)
model.count_parameters()

2460928

In [8]:
# h, logits, state, loss = model(x=b['X'], x_len=b['X_len'], y=b['labels'])

In [9]:
logger = TensorBoardLogger(save_dir='logs/', name=name)
lr_logger = LearningRateMonitor(logging_interval='step')
checkpoint = ModelCheckpoint(
    dirpath=f'weights/{name}/', 
    filename='{epoch}-{val_loss:.2f}', 
    monitor='train_loss',
    save_top_k=5, 
    period=1
)

trainer = Trainer(
    benchmark=True, 
    gpus=1, 
    accumulate_grad_batches=1,
    logger=logger, 
    max_epochs=10,
    callbacks=[checkpoint, lr_logger]
)

  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [None]:
trainer.fit(model, tl, vl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type             | Params
-------------------------------------------------
0 | loss_func   | CrossEntropyLoss | 0     
1 | transformer | GPT2LMHeadModel  | 2.5 M 
-------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
9.844     Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 42


Training: 0it [00:00, ?it/s]

In [None]:
trainer.save_checkpoint(f'weights/{name}/last.ckpt')

## generate

In [None]:
from src.models.remi import RemiLinearTransformer

gen_model = RemiLinearTransformer.load_from_checkpoint(f'weights/{name}/last.ckpt', config=config)

In [None]:
# gen_conf = {
#     'p_ttype' : 1.,
#     't_ttype' : 1.,
#     'p_barbeat' : .9,
#     't_barbeat' : .7,
#     'p_tempo' : 1.,
#     't_tempo' : .7,
#     'p_chord' : 1.,
#     't_chord' : .9,
#     'p_pitch' : .8,
#     't_pitch' : .7,
#     'p_duration' : .8,
#     't_duration' : .7,
#     'p_velocity' : 1.,
#     't_velocity' : 1.,
# }

In [None]:
path = data_config['data_dir']
seq = MusicRepr.from_file(path + os.listdir(path)[0], const=const)
prompt = MusicRepr.concatenate(seq.get_bars()[:4])
# len(prompt)

In [None]:
gen_remi = gen_model.generate(prompt=None, max_len=1000, cuda=True, top_p=0.8, temperature=0.7)
gen_remi.shape

In [None]:
tokens = [const.all_tokens[idx] for idx in gen_remi]
print(tokens[:10])
gen_seq = MusicRepr.from_string(' '.join(tokens), const=const)
len(gen_seq)

In [None]:
# gen_cp = np.concatenate(
#     [
#         gen_cp[:,:4], 
#         np.ones(shape=(gen_cp.shape[0],1))*const.instruments.index('piano'), 
#         gen_cp[:, 4:]
#     ], 
#     axis=1
# )
# gen_seq = MusicRepr.from_cp(gen_cp.astype(int), const=const)

In [None]:
gen_seq.to_midi('test.mid')