Import Library

In [4]:
import os
import music21 as m21
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import importlib as imp
from torch.utils.data import DataLoader
import tensorflow as tf
import pickle
import time

In [12]:
import a02_transformer
imp.reload(a02_transformer)
import a00_funs_make_symbol_seqs as fmseq
from a01_melody_preprocessor import MelodyPreprocessor
from a02_transformer import TransformerModel
from a04_melody_generator import MelodyGenerator
import a03_train


In [13]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Using device: {device}')

Using device: cpu


設定參數，並且載入資料集

In [5]:
## Parameters for Data Preprocessing
time_signature = '4/4'
beats_per_measure=4
step_duration = 0.25  # 0.25 = a 1/16 note 
acceptable_durations = np.arange(0.25, 8.1, 0.25) 


## Import Data and Prepare batches
songs = m21.corpus.search('bach', fileExtensions='xml')
melodies = fmseq.make_melody_symbol_sequences(songs, time_signature, 
                                              acceptable_durations)
preprocessor = MelodyPreprocessor(melodies)
training_dataset = preprocessor.create_training_dataset()
training_batches = DataLoader(training_dataset, shuffle=True,
                              batch_size=128)

print(preprocessor.vocab_size)
print(preprocessor.data_size)
print(preprocessor.seq_length)

  return self.iter().getElementsByClass(classFilterList)


142
18523
187


儲存成 pickle 檔案，讓 Data 不要每次都重載一次

In [17]:
# 存儲 preprocessor 物件
with open('preprocessor.pkl', 'wb') as f:
    pickle.dump(preprocessor, f)

# 存儲 training_dataset
with open('training_dataset.pkl', 'wb') as f:
    pickle.dump(training_dataset, f)

In [18]:
# 載入 preprocessor 物件
with open('preprocessor.pkl', 'rb') as f:
    preprocessor2 = pickle.load(f)

# 載入 training_dataset
with open('training_dataset.pkl', 'rb') as f:
    training_dataset2 = pickle.load(f)

搭建模型

In [6]:
def key_padding_mask(seq, pad_token=0):
    return (seq == pad_token)

def look_ahead_mask(dim):
    return nn.Transformer.generate_square_subsequent_mask(dim)

In [7]:
def position_encoding(num_pos, d_model):
    position = torch.arange(num_pos).unsqueeze(1)
    div_term = torch.exp(torch.arange(0, d_model, 2).float() *
                         (-torch.log(torch.tensor(10000.0)) / d_model))
    angles = position * div_term
    pos_encoding = torch.zeros(num_pos, d_model)
    pos_encoding[:, 0::2] = torch.sin(angles)
    pos_encoding[:, 1::2] = torch.cos(angles)
    return pos_encoding.unsqueeze(0)  # Add batch dimension

In [8]:
class TransformerModel(nn.Module):

    def __init__(self, d_model, nhead, dropout, dim_feedforward, vocab_size_padding,
                 num_encoder_layers, num_decoder_layers, device):
        super(TransformerModel, self).__init__()  
        self.d_model = d_model
        self.device = device
        self.embedding = nn.Embedding(vocab_size_padding, d_model).to(device)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, 
                                                        dropout=dropout, batch_first=True)
        self.encoder = nn.TransformerEncoder(self.encoder_layer, num_encoder_layers)
        self.decoder_layer = nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward, 
                                                        dropout=dropout, batch_first=True)
        self.decoder = nn.TransformerDecoder(self.decoder_layer, num_decoder_layers)
        self.dropout = nn.Dropout(dropout)
        self.final_layer = nn.Linear(d_model, vocab_size_padding)



    def forward(self, src, tgt):
        src_padding_mask = key_padding_mask(src).to(self.device)
        tgt_padding_mask = key_padding_mask(tgt).to(self.device)
        tgt_mask = look_ahead_mask(tgt.size(-1)).to(self.device)  
        scale_factor = torch.sqrt(torch.tensor(self.d_model, dtype=torch.float32, device=self.device))

        x = self.embedding(src) 
        x *= scale_factor
        x += position_encoding(src.size(-1), self.d_model).to(self.device)
        x = self.dropout(x)
        enc_output = self.encoder(x, src_key_padding_mask=src_padding_mask)

        y = self.embedding(tgt)
        y *= scale_factor
        y += position_encoding(tgt.size(-1), self.d_model).to(self.device)
        y = self.dropout(y)
        dec_output = self.decoder(y, enc_output, tgt_mask=tgt_mask,
                                  tgt_key_padding_mask=tgt_padding_mask)
        output = self.final_layer(dec_output)
        return output

Training 設定

In [9]:
# Model Specification and Training
vocab_size_padding = preprocessor.vocab_size + 1
model = TransformerModel(d_model=128, nhead=2, dim_feedforward=128, dropout=0.1, 
                         vocab_size_padding=vocab_size_padding, 
                         num_encoder_layers=6, num_decoder_layers=6, device=device)
model = model.to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)

  from .autonotebook import tqdm as notebook_tqdm


In [24]:
# epochs = 200
# save_interval = 20
epochs = 3
save_interval =1
save_dir= "/Users/ranli/Documents/python_ve/MS_Pytorch_Thesis/teacher_transformercode_0226/epoch"
start_sequence = ['C4-1.0', 'G4-1.0', 'E4-1.0', 'C4-1.0']

losses = []
epoch_times = [] 

for epoch in range(epochs):
    start_time = time.time()
    average_loss = a03_train.train_each_step(training_batches, model, 
                                             criterion, optimizer, device)
    losses.append(average_loss)  # 將當前 epoch 的 loss 加入到列表中

    
    end_time = time.time()
    epoch_duration = end_time - start_time
    epoch_times.append(epoch_duration)

    print(f'Epoch {epoch + 1}/{epochs}, Average Loss: {average_loss},Duration: {epoch_duration} seconds')

    if epoch > 0 and (epoch + 1) % save_interval == 0:
        melody_generator = MelodyGenerator(model, preprocessor.tokenizer, device)
        new_melody = melody_generator.generate(start_sequence, preprocessor.tokenizer)
        np.savetxt(f"{save_dir}/{epoch + 1}.txt", new_melody, fmt='%s')



Epoch 1/3, Average Loss: 0.3114804326460279
Epoch 2/3, Average Loss: 0.2570605012877234
Epoch 3/3, Average Loss: 0.21700749911110975


In [None]:
# 將 loss 和每個 epoch 的執行時間寫入同一個文件中
with open(f"{save_dir}/loss_and_epoch_times.txt", "w") as file:
    for epoch, (loss, duration) in enumerate(zip(losses, epoch_times), 1):
        file.write(f'Epoch {epoch}, Average Loss: {loss}, Duration: {duration} seconds\n')

In [None]:
# 儲存模型
torch.save(model.state_dict(), 'model.pth')

其他紀錄：
- CPU : one epoch for  about 30 mins
- CPU : 3 epoch for 104m
- 可以嘗試不同的起始值的影響