In [1]:
import numpy as np
import torch
import os
from tqdm.notebook import tqdm

from deepnote import MusicRepr, Constants
from importlib import reload

from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor

seed_everything(42)

  rank_zero_deprecation(
Global seed set to 42


42

## Data

In [2]:
const = Constants(unit=4, num_tempo_bins=20, num_velocity_bins=20)

data_config = {
    'data_dir' : '/home/soroosh/data/MIDI/pop909/train/',
    'const' : const,
    'instruments' : ['piano'],
    'mode' : 'remi',
    'max_files' : 2,
    'window_len' : 4096,
    'n_jobs' : 20
}

name = 'remi-small-linear-pop909-win4096'
print('model name:',name)

model name: remi-small-linear-pop909-win4096


In [3]:
import src.data
reload(src.data)
from src.data import MidiDataset

dataset = MidiDataset(**data_config)
n = len(dataset)
n

  0%|          | 0/2 [00:00<?, ?it/s]

7639

In [6]:
from torch.utils.data import DataLoader, random_split

t = int(0.1 * n)
td, vd = random_split(dataset, [n-t, t])
tl = DataLoader(dataset=td, batch_size=20, pin_memory=True, shuffle=True, num_workers=4, collate_fn=dataset.fn)
vl = DataLoader(dataset=vd, batch_size=20, pin_memory=True, shuffle=False, num_workers=4, collate_fn=dataset.fn)

In [7]:
b = next(iter(tl))
for k in b:
    print(k, b[k].shape)

X torch.Size([10, 4095])
X_len torch.Size([10])
labels torch.Size([10, 4095])


## Model

In [3]:
import src.config
reload(src.config)
from src.config import make_config

config = make_config(
    const,
    mode='remi',
    model='transformer',
    d_model=256, 
    dropout=0.1, 
    lr=2e-4,
    tie_emb=False,
    pos_emb=True, 
    n_layer=6, 
    n_head=8, 
    d_inner=1024, 
    activation='gelu'
)
config

{'lr': 0.0002,
 'embedding': {'d_model': 512,
  'dropout': 0.1,
  'max_len': 10000,
  'pos_emb': True,
  'n_vocab': 351},
 'head': {'d_model': 512, 'n_vocab': 351},
 'transformer': {'d_model': 512,
  'n_layer': 6,
  'n_head': 8,
  'd_inner': 1024,
  'dropout': 0.1,
  'activation': 'gelu'},
 'tie_emb': False}

In [9]:
from src.models.remi import RemiLinearTransformer

model = RemiLinearTransformer(config)
# model = RemiLinearTransformer.load_from_checkpoint(f'weights/{name}/epoch=29-train_loss=0.53.ckpt', config=config, is_training=True)
model.count_parameters()

12977503

In [8]:
# h, logits, state, loss = model(x=b['X'], x_len=b['X_len'], y=b['labels'])

In [10]:
logger = TensorBoardLogger(save_dir='logs/', name=name)
lr_logger = LearningRateMonitor(logging_interval='step')
checkpoint = ModelCheckpoint(
    dirpath=f'weights/{name}/', 
    filename='{epoch}-{val_loss:.2f}', 
    monitor='train_loss',
    save_top_k=5, 
    period=1
)

trainer = Trainer(
    benchmark=True, 
    gpus=1, 
    accumulate_grad_batches=1,
    logger=logger, 
    max_epochs=100,
    callbacks=[checkpoint, lr_logger]
)

  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores


In [None]:
trainer.fit(model, tl, vl)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name        | Type              | Params
--------------------------------------------------
0 | embedding   | RemiEmbedding     | 179 K 
1 | transformer | LinearTransformer | 12.6 M
2 | head        | RemiHead          | 180 K 
3 | loss_func   | CrossEntropyLoss  | 0     
--------------------------------------------------
13.0 M    Trainable params
0         Non-trainable params
13.0 M    Total params
51.910    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Global seed set to 42


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [11]:
trainer.save_checkpoint(f'weights/{name}/last.ckpt')

## generate

In [7]:
from src.models.remi import RemiLinearTransformer

gen_model = RemiLinearTransformer.load_from_checkpoint(f'weights/{name}/last.ckpt', config=config)

In [6]:
# gen_conf = {
#     'p_ttype' : 1.,
#     't_ttype' : 1.,
#     'p_barbeat' : .9,
#     't_barbeat' : .7,
#     'p_tempo' : 1.,
#     't_tempo' : .7,
#     'p_chord' : 1.,
#     't_chord' : .9,
#     'p_pitch' : .8,
#     't_pitch' : .7,
#     'p_duration' : .8,
#     't_duration' : .7,
#     'p_velocity' : 1.,
#     't_velocity' : 1.,
# }

In [8]:
path = data_config['data_dir']
seq = MusicRepr.from_file(path + os.listdir(path)[0], const=const)
prompt = None#MusicRepr.concatenate(seq.get_bars()[:4])
# len(prompt)

In [10]:
gen_remi = gen_model.generate(prompt=prompt, max_len=100, cuda=False, top_p=0.85, temperature=0.8)
gen_remi.shape

  0%|          | 0/100 [00:00<?, ?it/s]

(101,)

In [11]:
tokens = [const.all_tokens[idx] for idx in gen_remi]
print(tokens[:10])
gen_seq = MusicRepr.from_string(' '.join(tokens), const=const)
len(gen_seq)

['Bar', 'BeatTempo_101', 'NoteDuration_1', 'NoteVelocity_60', 'BeatPosition_7', 'NoteInstFamily_piano', 'NotePitch_74', 'NoteDuration_6', 'NoteVelocity_80', 'Bar']


AssertionError: 

In [None]:
# gen_cp = np.concatenate(
#     [
#         gen_cp[:,:4], 
#         np.ones(shape=(gen_cp.shape[0],1))*const.instruments.index('piano'), 
#         gen_cp[:, 4:]
#     ], 
#     axis=1
# )
# gen_seq = MusicRepr.from_cp(gen_cp.astype(int), const=const)

In [None]:
gen_seq.to_midi('test.mid')