In [3]:
from miditok import REMI, TokenizerConfig
from miditok.pytorch_data import DatasetMIDI, DataCollator
from miditok.utils import split_files_for_training
from torch.utils.data import DataLoader
from pathlib import Path

# Creating a multitrack tokenizer, read the doc to explore all the parameters
config = TokenizerConfig(num_velocities=16, use_chords=True, use_programs=True)
tokenizer = REMI(config)

# Train the tokenizer with Byte Pair Encoding (BPE)
#files_paths = list(Path("../datasets").glob("**/*.mid"))
dataset_dir = Path("/Users/hapticslab/Programming/humusic/datasets/orchestra")
files_paths = list(dataset_dir.glob("**/*.mid"))

tokenizer.train(vocab_size=30000, files_paths=files_paths)
#tokenizer.save(Path("path", "to", "save", "tokenizer.json"))
tokenizer.save(Path("/Users/hapticslab/Programming/humusic/token/tokenizer.json"))
# And pushing it to the Hugging Face hub (you can download it back with .from_pretrained)
#tokenizer.push_to_hub("username/model-name", private=True, token="your_hf_token")

# Split MIDIs into smaller chunks for training
dataset_chunks_dir = Path("/Users/hapticslab/Programming/humusic/midi_chunks")

split_files_for_training(
    files_paths=files_paths,
    tokenizer=tokenizer,
    save_dir=dataset_chunks_dir,
    max_seq_len=1024,
)

# Create a Dataset, a DataLoader and a collator to train a model
dataset = DatasetMIDI(
    files_paths=list(dataset_chunks_dir.glob("**/*.mid")),
    tokenizer=tokenizer,
    max_seq_len=1024,
    bos_token_id=tokenizer["BOS_None"],
    eos_token_id=tokenizer["EOS_None"],
)
collator = DataCollator(tokenizer.pad_token_id, copy_inputs_as_labels=True)
dataloader = DataLoader(dataset, batch_size=64, collate_fn=collator)






Splitting music files (/Users/hapticslab/Programming/humusic/midi_chunks): 100%|██████████| 292/292 [00:00<00:00, 766.96it/s]


In [11]:
from LSTMwithAtt import LSTMwithAtt
import torch

from torch.nn.utils.rnn import pad_sequence
import time

import icecream as ic

In [6]:
#device setting

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
hidden_size = 200
model = LSTMwithAtt(4, hidden_size)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss(ignore_index=-1)

num_epoch = 10

model.train()

start_time = time.time()

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    i = 0

    for batch in dataloader:
        xs, ys ,ans = [],[],[]

        for k in range(len(batch)):
            
            input = batch["input_ids"].to(device)
            
            tid = batch[k]
            xs.append(torch.LongTensor(tid[1:]))
            ys.append(torch.LongTensor(tid[:-1]))
            ans.append(torch.LongTensor(tid[1:]))
        
        encoder_input = pad_sequence(xs, batch_first=True).to(device)
        decoder_input = pad_sequence(ys, batch_first=True).to(device)
        answer = pad_sequence(ans, batch_first=True, padding_value=-1.0).to(device)

        out = model(encoder_input, decoder_input)
        loss = criterion(out[0],answer[0])
        for h in range(1,len(out)):
            loss += criterion(out[h],answer[h])
        print(f'| epoch {epoch:3d} | {i:5d}/{len(dataloader)} batches | loss {loss.item():5.2f} | time {time.time()-epoch_start_time:5.2f}s')
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        i += 1
    
    print(f'epoch {epoch+1}/{num_epoch} time : {time.time()-epoch_start_time}')
    outfile = "test/model"+ str(epoch+1) + ".model"
    torch.save(model.state_dict(),outfile)




TypeError: 'torch.Size' object is not callable

In [None]:

from transformers import GPT2Config, GPT2LMHeadModel

config = GPT2Config(
    vocab_size=tokenizer.vocab_size,
    n_positions=1024,
    n_ctx=1024,
    n_embd=256,
    n_layer=4,
    n_head=4,
    
    #bos_token_id=tokenizer["BOS_None"],
    #eos_token_id=tokenizer["EOS_None"],
    #pad_token_id=tokenizer.pad_token_id,
    
    )

model = GPT2LMHeadModel(config).to(device)

# 損失関数とオプティマイザ
criterion = torch.nn.CrossEntropyLoss()  # トークンの予測タスクに使う
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)  # 適切な学習率を設定

In [26]:
from tqdm import tqdm  # 進行状況を可視化するライブラリ

# 学習ループ
epochs = 10  # エポック数
for epoch in range(epochs):
    model.train()  # モデルを学習モードに切り替え
    total_loss = 0

    for batch in tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}"):
        inputs = batch["input_ids"].to(device)  # トークンID列
        labels = batch["labels"].to(device)    # 正解ラベル
        attention_mask = batch["attention_mask"].to(device)  # マスク（任意）

        # 勾配を初期化
        optimizer.zero_grad()

        # モデルの前方計算
        outputs = model(
            input_ids=inputs,
            labels=labels,
            attention_mask=attention_mask,
        )
        loss = outputs.loss  # GPT2LMHeadModelは自動で損失を計算する

        # 勾配の計算とパラメータの更新
        loss.backward()
        optimizer.step()

        # 損失を記録
        total_loss += loss.item()

    # エポックごとの平均損失を出力
    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")

Epoch 1/10:  29%|██▊       | 4/14 [03:50<09:36, 57.69s/it]


KeyboardInterrupt: 