In [2]:
from copy import deepcopy
from pathlib import Path
from random import shuffle

from evaluate import load as load_metric
from miditok import REMI, TokenizerConfig
from miditok.pytorch_data import DatasetMIDI, DataCollator, split_files_for_training
from miditok.data_augmentation import augment_dataset
from torch import Tensor, argmax
from torch.utils.data import DataLoader
from torch.cuda import is_available as cuda_available, is_bf16_supported
from torch.backends.mps import is_available as mps_available
from transformers import AutoModelForCausalLM, MistralConfig, Trainer, TrainingArguments, GenerationConfig
from transformers.trainer_utils import set_seed
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Seed
set_seed(777)

# Our tokenizer's configuration
BEAT_RES = {(0, 1): 12, (1, 2): 4, (2, 4): 2, (4, 8): 1}
TOKENIZER_PARAMS = {
    "pitch_range": (21, 109),
    "beat_res": BEAT_RES,
    "num_velocities": 24,
    "special_tokens": ["PAD", "BOS", "EOS"],
    "use_chords": True,
    "use_rests": True,
    "use_tempos": True,
    "use_time_signatures": True,
    "use_programs": False,  # no multitrack here
    "num_tempos": 32,
    "tempo_range": (50, 200),  # (min_tempo, max_tempo)
}
config = TokenizerConfig(**TOKENIZER_PARAMS)

# Creates the tokenizer
tokenizer = REMI(config)

# Trains the tokenizer with Byte Pair Encoding (BPE) to build the vocabulary, here 30k tokens
midi_paths = list(Path("./processed/piano_midi").resolve().glob("**/*.mid")) + list(Path("./processed/piano_midi").resolve().glob("**/*.midi"))
tokenizer.train(
    vocab_size=30000,
    files_paths=midi_paths,
)
tokenizer.save_params("tokenizer.json")

  config = TokenizerConfig(**TOKENIZER_PARAMS)







In [None]:
from transformers import T5Config

model = T5Config(
    vocab_size=tokenizer.vocab_size,
    n_positions=tokenizer.max_len,
    d_model=512,
    d_ff=2048,
    num_heads=8,
    num_layers=6,
    relative_attention_num_buckets=32,
    dropout_rate=0.1,
    max_position_embeddings=tokenizer.max_len,
    initializer_range=0.02,
    label_smoothing_factor=0.1,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id,
    bos_token_id=tokenizer.bos_token_id,
    decoder_start_token_id=tokenizer.bos_token_id,
    use_cache=True,
    is_encoder_decoder=True,
)

input_ids = tokenizer("The <extra_id_0> walks in <extra_id_1> park", return_tensors="pt").input_ids
labels = tokenizer("<extra_id_0> cute dog <extra_id_1> the <extra_id_2>", return_tensors="pt").input_ids

# the forward function automatically creates the correct decoder_input_ids
loss = model(input_ids=input_ids, labels=labels).loss
loss.item()