In [1]:
import numpy as np
import torch

from data_preperation import dataset_snapshot
from transformer_decoder_training.dataprep_transformer import dataprep_1
from sklearn.model_selection import train_test_split

# Check if GPU is available, set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

#load data
dataset_as_snapshots = dataset_snapshot.process_dataset_multithreaded("/home/falaxdb/Repos/minus1/datasets/maestro_v3_split/hands_split_into_seperate_midis", 0.05, amount=100)
# filter snapshots to 88 piano notes
dataset_as_snapshots = dataset_snapshot.filter_piano_range(dataset_as_snapshots)

dataset_as_snapshots = dataset_snapshot.compress_existing_dataset_to_12keys(dataset_as_snapshots)

# split songs into train, test and val
train_data, temp_data = train_test_split(dataset_as_snapshots, test_size=0.3, random_state=42, shuffle=True)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42, shuffle=True)

# see if split is correct
print("Train data:", len(train_data))
print("test data:", len(test_data))
print("val data:", len(val_data))

Processed dataset (200/200): 100%|██████████| 200/200 [00:02<00:00, 70.88it/s]


Processed 200 of 200 files
Train data: 70
test data: 15
val data: 15


In [2]:
# Define special Tokens
# Token dimension needs to fit Data
sos_token = np.full((1, 24), 1)
pad_token = np.full((1, 24), 2)
pad_token = torch.tensor(pad_token, device=device)

# Define other parameters
batch_size = 64
seq_length = 512
stride = 256

# create dataset + dataloader
from torch.utils.data import DataLoader
from transformer_decoder_training.dataset_transformer.dataset_2 import AdvancedPianoDataset

train_dataset = AdvancedPianoDataset(train_data, seq_length, stride, sos_token)
val_dataset = AdvancedPianoDataset(val_data, seq_length, stride, sos_token)
test_dataset = AdvancedPianoDataset(test_data, seq_length, stride, sos_token)

print("Check length of datasets. should roughly match split ratio")
print("train dataset:", len(train_dataset))
print("val dataset:", len(val_dataset))
print("test dataset:", len(test_dataset))
print("")

# Create DataLoaders for each subset with drop_last=True
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

Check length of datasets. should roughly match split ratio
train dataset: 2292
val dataset: 478
test dataset: 696



In [3]:
# Embedding Size
hidden_size = 256
# Number of transformer blocks
num_layers = 8
# MultiheadAttention Heads
num_heads = 8

from transformer_decoder_training.models.transformer_decoder_1 import Transformer

model = Transformer(num_emb=24, num_layers=num_layers, hidden_size=hidden_size, num_heads=num_heads).to(device)
model.load_state_dict(torch.load("/home/falaxdb/Repos/minus1/transformer_decoder_training/saved_files/saved_models/model_1_notebook_v6.pth"))
model.eval()

Transformer(
  (embedding): Linear(in_features=24, out_features=256, bias=True)
  (pos_emb): SinusoidalPosEmb()
  (blocks): ModuleList(
    (0-7): 8 x TransformerBlock(
      (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (multihead_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
      )
      (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (mlp): Sequential(
        (0): Linear(in_features=256, out_features=1024, bias=True)
        (1): ELU(alpha=1.0)
        (2): Linear(in_features=1024, out_features=256, bias=True)
      )
    )
  )
  (fc_out): Linear(in_features=256, out_features=24, bias=True)
  (sigmoid): Sigmoid()
)

# Iteration i bei inference nochmal anschauen. iwas stimmt nicht glaube ich

In [4]:
from transformer_decoder_training.inference.inference_2 import inference

# just do one single sequence
for batch in test_loader:
    # get single sequence
    # blow it up to one batch again
    sequence = torch.unsqueeze(batch[0], 0)
    print(sequence.shape)
    
    # split into context sequence and truth sequence
    context_seq = sequence[: ,:200]
    continuing_seq = sequence[:, 200:]
    
    output_tokens, melody_output_tokens = inference(model, context_seq, continuing_seq, 0.25, pad_token, device)
    
    print(output_tokens)
    print(melody_output_tokens)

torch.Size([1, 513, 24])
Next token shape: torch.Size([1, 24])
iteration: 0
Next token shape torch.Size([1, 24])
ground truth shape torch.Size([1, 24])
[tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 1.]], device='cuda:0')]
[tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], device='cuda:0')]
torch.Size([1, 513, 24])
Next token shape: torch.Size([1, 24])
iteration: 0
Next token shape torch.Size([1, 24])
ground truth shape torch.Size([1, 24])
[tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0')]
[tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]], device='cuda:0')]
torch.Size([1, 513, 24])
Next token shape: torch.Size([1, 24])
iteration: 0
Next token shape torch.Size([1, 24])
ground truth shape torch.Size([1, 24])
[tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], dev