In [1]:
from data_utils import SeparatedMelHarmTextDataset, MelHarmTextCollatorForSeq2Seq
import os
import numpy as np
from harmony_tokenizers_m21 import ChordSymbolTokenizer, RootTypeTokenizer, \
    PitchClassTokenizer, RootPCTokenizer, GCTRootPCTokenizer, \
    GCTSymbolTokenizer, GCTRootTypeTokenizer, MelodyPitchTokenizer, \
    MergedMelHarmTokenizer
from torch.utils.data import DataLoader
from transformers import BartForConditionalGeneration, BartConfig, DataCollatorForSeq2Seq
import torch
from torch.optim import AdamW
from tqdm import tqdm
from models import TransTextVAE

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
chordSymbolTokenizer = ChordSymbolTokenizer.from_pretrained('saved_tokenizers/ChordSymbolTokenizer')
rootTypeTokenizer = RootTypeTokenizer.from_pretrained('saved_tokenizers/RootTypeTokenizer')
pitchClassTokenizer = PitchClassTokenizer.from_pretrained('saved_tokenizers/PitchClassTokenizer')
rootPCTokenizer = RootPCTokenizer.from_pretrained('saved_tokenizers/RootPCTokenizer')
melodyPitchTokenizer = MelodyPitchTokenizer.from_pretrained('saved_tokenizers/MelodyPitchTokenizer')

In [3]:
m_chordSymbolTokenizer = MergedMelHarmTokenizer(melodyPitchTokenizer, chordSymbolTokenizer)
m_rootTypeTokenizer = MergedMelHarmTokenizer(melodyPitchTokenizer, rootTypeTokenizer)
m_pitchClassTokenizer = MergedMelHarmTokenizer(melodyPitchTokenizer, pitchClassTokenizer)
m_rootPCTokenizer = MergedMelHarmTokenizer(melodyPitchTokenizer, rootPCTokenizer)

In [4]:
# tokenizer = m_chordSymbolTokenizer
# tokenizer_name = 'ChordSymbolTokenizer'
# tokenizer = m_rootTypeTokenizer
# tokenizer_name = 'RootTypeTokenizer'
# tokenizer = m_pitchClassTokenizer
# tokenizer_name = 'PitchClassTokenizer'
tokenizer = m_rootPCTokenizer
tokenizer_name = 'RootPCTokenizer'

root_dir = '/media/maindisk/maximos/data/hooktheory_test'
dataset = SeparatedMelHarmTextDataset(root_dir, tokenizer, max_length=512, num_bars=64)
def create_data_collator(tokenizer, model):
    return MelHarmTextCollatorForSeq2Seq(tokenizer=tokenizer, model=model, padding=True)
# end create_data_collator

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [6]:
bart_config = BartConfig(
    vocab_size=len(tokenizer.vocab),
    pad_token_id=tokenizer.pad_token_id,
    bos_token_id=tokenizer.bos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    decoder_start_token_id=tokenizer.bos_token_id,
    forced_eos_token_id=tokenizer.eos_token_id,
    max_position_embeddings=512,
    encoder_layers=8,
    encoder_attention_heads=8,
    encoder_ffn_dim=512,
    decoder_layers=8,
    decoder_attention_heads=8,
    decoder_ffn_dim=512,
    d_model=512,
    encoder_layerdrop=0.3,
    decoder_layerdrop=0.3,
    dropout=0.3
)

bart = BartForConditionalGeneration(bart_config)

bart_path = 'saved_models/bart/' + tokenizer_name + '/' + tokenizer_name + '.pt'
if device == 'cpu':
    checkpoint = torch.load(bart_path, map_location="cpu", weights_only=True)
else:
    checkpoint = torch.load(bart_path, weights_only=True)
bart.load_state_dict(checkpoint)

bart.to(device)
bart.eval()

bart_encoder, bart_decoder = bart.get_encoder(), bart.get_decoder()
bart_encoder.to(device)
bart_decoder.to(device)

# Freeze BART parameters
for param in bart_encoder.parameters():
    param.requires_grad = False
for param in bart_encoder.parameters():
    param.requires_grad = False

In [7]:
collator = create_data_collator(tokenizer, model=bart)

In [8]:
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=collator)

In [9]:
b = next(iter(dataloader))

  return self.iter().getElementsByClass(classFilterList)
  batch["labels"] = torch.tensor(batch["labels"], dtype=torch.int64)


In [10]:
config = {
    'lstm_dim': 2048,
    'roberta_model': "roberta-base",
    'latent_dim': 2048,
    'freeze_roberta': True
}

model = TransTextVAE(bart, tokenizer=tokenizer, device=device, config=config)
model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TransTextVAE(
  (transformer): BartForConditionalGeneration(
    (model): BartModel(
      (shared): BartScaledWordEmbedding(221, 512, padding_idx=1)
      (encoder): BartEncoder(
        (embed_tokens): BartScaledWordEmbedding(221, 512, padding_idx=1)
        (embed_positions): BartLearnedPositionalEmbedding(514, 512)
        (layers): ModuleList(
          (0-7): 8 x BartEncoderLayer(
            (self_attn): BartSdpaAttention(
              (k_proj): Linear(in_features=512, out_features=512, bias=True)
              (v_proj): Linear(in_features=512, out_features=512, bias=True)
              (q_proj): Linear(in_features=512, out_features=512, bias=True)
              (out_proj): Linear(in_features=512, out_features=512, bias=True)
            )
            (self_attn_layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=512, out_features=512, bias=True)
            (fc2): Linear(in_featu

In [11]:
b['input_ids']

tensor([[  2,   6, 180,  95,  51, 107,  53, 123,  53,   6,  95,  53, 119,  48,
           6,  95,  51, 107,  53, 123,  53,   6,  95,  53, 119,  46,   6,  95,
          51, 107,  53, 119,  46,   6,  95,  46, 119,  48, 123,  41,   6,  95,
          41,   6,  95,  41,   6,  95,  51, 107,  53, 123,  53,   6,  95,  53,
         119,  48,   6,  95,  51, 107,  53, 123,  53,   6,  95,  53, 119,  58,
           6,  95,  55, 107,  51, 119,  46,   6,  95,  46, 119,  48, 123,  53,
           6,  95,  53,   6,  95,  53]])

In [12]:
# from transformers import RobertaModel, RobertaTokenizer
# roberta_model = "roberta-base"
# # Load RoBERTa
# roberta = RobertaModel.from_pretrained(roberta_model)
# text_tokenizer = RobertaTokenizer.from_pretrained(roberta_model)

In [13]:
# txts = ['Bar number 2 begins with a A:(7b9) chord.']
# roberta_inputs = text_tokenizer(
#     txts, padding=True, truncation=True, return_tensors="pt"
# ).to(device)
# print(roberta_inputs)
# print(text_tokenizer.decode(roberta_inputs['input_ids'][0]))

In [14]:
with torch.no_grad():
    input_ids = b['input_ids'].to(device)
    txts = ['Bar number 0 begins with a G:maj chord.']
    num_bars = (input_ids == tokenizer.vocab['<bar>']).sum(dim=1).reshape(input_ids.shape[0],-1)
    outputs = model(input_ids, txts, encoder_attention=None, generate_max_tokens=500, num_bars=num_bars, temperature=1.0)

recon generation
bars_left: tensor([[16]], device='cuda:0')
bars_left: tensor([[16]], device='cuda:0')
bars_left: tensor([[15]], device='cuda:0')
bars_left: tensor([[15]], device='cuda:0')
bars_left: tensor([[15]], device='cuda:0')
bars_left: tensor([[15]], device='cuda:0')
bars_left: tensor([[15]], device='cuda:0')
bars_left: tensor([[14]], device='cuda:0')
bars_left: tensor([[14]], device='cuda:0')
bars_left: tensor([[14]], device='cuda:0')
bars_left: tensor([[14]], device='cuda:0')
bars_left: tensor([[14]], device='cuda:0')
bars_left: tensor([[13]], device='cuda:0')
bars_left: tensor([[13]], device='cuda:0')
bars_left: tensor([[13]], device='cuda:0')
bars_left: tensor([[13]], device='cuda:0')
bars_left: tensor([[13]], device='cuda:0')
bars_left: tensor([[12]], device='cuda:0')
bars_left: tensor([[12]], device='cuda:0')
bars_left: tensor([[12]], device='cuda:0')
bars_left: tensor([[12]], device='cuda:0')
bars_left: tensor([[12]], device='cuda:0')
bars_left: tensor([[11]], device='cud

In [15]:
print(outputs)

{'loss': tensor(0.9263, device='cuda:0'), 'recon_loss': tensor(0.9191, device='cuda:0'), 'kl_loss': tensor(0.0072, device='cuda:0'), 'x': tensor([[[-0.4881, -1.1768,  2.9108,  ..., -0.0986,  0.4619, -0.5124],
         [-0.8871, -0.1297,  0.8752,  ...,  0.0655,  1.2185,  1.3982],
         [ 0.8006, -1.0139,  2.3500,  ..., -1.1259, -0.0338, -0.4932],
         ...,
         [-0.2790, -1.0336,  1.1319,  ..., -0.2008,  0.0918, -1.7273],
         [ 1.0614,  0.9184, -0.6428,  ...,  0.3749, -1.1559, -0.3657],
         [ 0.3564,  0.9066, -1.6436,  ...,  0.5790, -0.9214, -1.1320]]],
       device='cuda:0'), 'recon_x': tensor([[[-0.1933,  0.0223, -0.0198,  ...,  0.0324, -0.0238, -0.0899],
         [-0.2106,  0.0394, -0.0446,  ...,  0.0851, -0.0088, -0.0775],
         [-0.2136,  0.0464, -0.0555,  ...,  0.1146,  0.0002, -0.0728],
         ...,
         [-0.1522,  0.0590, -0.0661,  ...,  0.1593,  0.0190, -0.0620],
         [-0.1316,  0.0609, -0.0657,  ...,  0.1631,  0.0244, -0.0506],
         [-0.09

In [16]:
output_tokens = []
output_recon_tokens = []

for i in outputs['generated_ids'][0]:
    output_tokens.append( tokenizer.ids_to_tokens[ int(i) ].replace(' ','x') )
for i in outputs['generated_recon_ids'][0]:
    output_recon_tokens.append( tokenizer.ids_to_tokens[ int(i) ].replace(' ','x') )

In [17]:
print(output_tokens)
print(output_recon_tokens)

['<s>', '<h>', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7', '<bar>', 'position_0x00', 'chord_root_10', 'chord_pc_2', 'chord_pc_5', '<bar>', 'position_0x00', 'chord_root_10', 'chord_pc_2', 'chord_pc_5', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7', '<bar>', 'position_0x00', 'chord_root_10', 'chord_pc_2', 'chord_pc_5', '<bar>', 'position_0x00', 'chord_root_10', 'chord_pc_2', 'chord_pc_5', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7', '<bar>', 'position_0x00', 'chord_root_10', 'chord_pc_2', 'chord_pc_5', '<bar>', 'position_0x00', 'chord_root_10', 'chord_pc_2', 'chord_pc_5', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7', '<bar>', 'position_0x00', 'chord_root_0', 'chord_pc_3', 'chord_pc_7',

In [18]:
input_ids = b['input_ids'].to(device)
print(input_ids)
input_tokens = []
for i in input_ids[0]:
    input_tokens.append( tokenizer.ids_to_tokens[ int(i) ].replace(' ','x') )
print(input_tokens)

tensor([[  2,   6, 180,  95,  51, 107,  53, 123,  53,   6,  95,  53, 119,  48,
           6,  95,  51, 107,  53, 123,  53,   6,  95,  53, 119,  46,   6,  95,
          51, 107,  53, 119,  46,   6,  95,  46, 119,  48, 123,  41,   6,  95,
          41,   6,  95,  41,   6,  95,  51, 107,  53, 123,  53,   6,  95,  53,
         119,  48,   6,  95,  51, 107,  53, 123,  53,   6,  95,  53, 119,  58,
           6,  95,  55, 107,  51, 119,  46,   6,  95,  46, 119,  48, 123,  53,
           6,  95,  53,   6,  95,  53]], device='cuda:0')
['<s>', '<bar>', 'ts_4x4', 'position_0x00', 'P:65', 'position_1x50', 'P:67', 'position_3x50', 'P:67', '<bar>', 'position_0x00', 'P:67', 'position_3x00', 'P:62', '<bar>', 'position_0x00', 'P:65', 'position_1x50', 'P:67', 'position_3x50', 'P:67', '<bar>', 'position_0x00', 'P:67', 'position_3x00', 'P:60', '<bar>', 'position_0x00', 'P:65', 'position_1x50', 'P:67', 'position_3x00', 'P:60', '<bar>', 'position_0x00', 'P:60', 'position_3x00', 'P:62', 'position_3x50', 'P:5

In [19]:
os.makedirs('examples', exist_ok=True)
tokenizer.decode( input_tokens + output_tokens[1:], output_format='file', output_path='examples/encdec.mxl' )
tokenizer.decode( input_tokens + output_recon_tokens[1:], output_format='file', output_path='examples/recon.mxl' )

Saved as examples/encdec.mxl
Saved as examples/recon.mxl
