In [None]:
!nvidia-smi -L

In [None]:
# Mount drive and cd to notebook folder
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

%cd "/content/drive/MyDrive/pato/upf-smc/thesis/rhythmic-relationships/scripts"

import sys
COLAB_WORKDIR = "/content/drive/MyDrive/pato/upf-smc/thesis/rhythmic-relationships/scripts"
if COLAB_WORKDIR not in sys.path:
  sys.path.append(COLAB_WORKDIR)
print(sys.path)

In [None]:
rsync -Par ../../datasets/lmdc_17243_2bar_4res/ /tmp/lmdc_17243_2bar_4res

In [None]:
# Install dependencies
!pip install --upgrade pip -qq
!pip install .. -qq
!pip install git+https://github.com/danielgomezmarin/rhythmtoolbox -qq

In [None]:
import torch
import yaml
from model_utils import get_model_name, load_config, save_model, get_loss_fn
from rhythmic_relationships.data import PartDatasetSequential
from rhythmic_relationships.model import TransformerDecoder
from rhythmic_relationships.train import train_transformer_decoder
from torch.utils.data import DataLoader, random_split

DEVICE = torch.device('cuda:0')
CONFIG_FILEPATH = "transformer_decoder_config.yml"


config = load_config(CONFIG_FILEPATH)

# Overwrite hyperparameters
config['dataset'] = {
  "dataset_name": "lmdc_17243_2bar_4res",
  "part": "Bass",
  "representation": "onset_roll",
  "context_len": 31,
}
config['model'] = {
  'vocab_size': 130,
  'n_embed': 64,
  'n_head': 2,
  'n_layer': 3,
  'dropout': 0.3,
}
config['sequence_len'] = 32
config['resolution'] = 4
config['loss_fn'] = 'cross-entropy'
config['loss_reduction'] = 'mean'
config['clip_gradients'] = False
config['num_epochs'] = 15
config['batch_size'] = 256
config['lr'] = 1e-3
config['wandb'] = False
config['n_eval_iters'] = 20
config['n_eval_seqs'] = 100
config['splits']['train'] = 0.8
config['splits']['val'] = 0.1
config['splits']['test'] = 0.1
config['seed'] = 13


print(yaml.dump(config))

torch.manual_seed(config["seed"])

dataset = PartDatasetSequential(**config["dataset"], datasets_dir='/tmp')
splits = config["splits"]
train_data, val_data, test_data = random_split(dataset, list(splits.values()))
print(f"{splits=}: {len(train_data)}, {len(val_data)}, {len(test_data)}")

train_loader = DataLoader(train_data, batch_size=config["batch_size"], shuffle=True)
val_loader = DataLoader(val_data, batch_size=config["batch_size"], shuffle=True)

model_name = get_model_name()
print(f"{model_name=}")

config["model"]["context_len"] = config["dataset"]["context_len"]
model = TransformerDecoder(**config["model"]).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=config["lr"])
loss_fn = get_loss_fn(config)

epoch_evals = train_transformer_decoder(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    config=config,
    device=DEVICE,
    model_name=model_name,
)

# Save the stats for the last epoch
stats = {
    "epoch_evals": epoch_evals,
    "n_params": sum(p.nelement() for p in model.parameters()),
}
print(stats)

save_model(model, config, model_name, stats)