In this notebook i try multiple different methods to have more variance in inference

Uses Transformer without sigmoid output

## This uses already known data -> no true Testdata

## Get data

In [1]:
import torch
import numpy as np

# Check if GPU is available, set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

In [2]:
# Define Data parameters

sos_token = np.full((1, 24), 1)
pad_token = np.full((1, 24), 2)
pad_token = torch.tensor(pad_token, device=device)

dataset_dir = "/home/falaxdb/Repos/minus1/datasets/maestro_v3_split/hands_split_into_seperate_midis"
snapshot_intervall = 0.05

batch_size = 1
seq_length = 1024
stride = 256

test_size=0.3

In [3]:
from transformer_decoder_training.dataprep_transformer.prepare_dataloader_complete import prepare_dataset_as_dataloaders

# Load Data

train_loader, val_loader, test_loader = prepare_dataset_as_dataloaders(dataset_dir, snapshot_intervall, batch_size, seq_length, stride, test_size, sos_token)

Processed dataset (10/10): 100%|██████████| 10/10 [00:00<00:00, 12.35it/s]


Processed 10 of 10 files


## Load model

In [4]:
# Define model Parameters

# Embedding Size
hidden_size = 256
# Number of transformer blocks
num_layers = 8
# MultiheadAttention Heads
num_heads = 8

# Transformer without sigmoid output
from transformer_decoder_training.models.transformer_decoder_2 import Transformer

model = Transformer(num_emb=24, num_layers=num_layers, hidden_size=hidden_size, num_heads=num_heads).to(device)
model.load_state_dict(torch.load("/home/falaxdb/Repos/minus1/transformer_decoder_training/saved_files/saved_models/model_1_notebook_v6.1.pth"))
model.eval()

Transformer(
  (embedding): Linear(in_features=24, out_features=256, bias=True)
  (pos_emb): SinusoidalPosEmb()
  (blocks): ModuleList(
    (0-7): 8 x TransformerBlock(
      (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (multihead_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
      )
      (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (mlp): Sequential(
        (0): Linear(in_features=256, out_features=1024, bias=True)
        (1): ELU(alpha=1.0)
        (2): Linear(in_features=1024, out_features=256, bias=True)
      )
    )
  )
  (fc_out): Linear(in_features=256, out_features=24, bias=True)
)

## Inference

In [5]:
midi_save_dir = "/home/falaxdb/Repos/minus1/transformer_decoder_training/saved_files/midi_outputs/notebook_6"

In [6]:
from transformer_decoder_training.transformer_inference_eval import inference_and_visualize_1

# Get sequence
sequence = next(iter(train_loader))
print(sequence.shape)

context_seq, continuing_seq, original_seq = inference_and_visualize_1.prepare_sequence(sequence, 513)

torch.Size([1, 1025, 24])


In [7]:
#Inference with threshold

from transformer_decoder_training.inference import inference_3

output_tokens, harmony_output_tokens, last_input_seq = inference_3.inference(model, context_seq, continuing_seq, 0.17, pad_token, device)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "threshold_only.mid")

Tokens to generate: 512
Token after sigmoid:  tensor([[0.0103, 0.0114, 0.1994, 0.0015, 0.0199, 0.0998, 0.0057, 0.0155, 0.0064,
         0.1296, 0.0232, 0.0042, 0.0064, 0.0185, 0.1375, 0.0029, 0.0266, 0.0487,
         0.0026, 0.0105, 0.0015, 0.0667, 0.0144, 0.0047]], device='cuda:0')
Token after sigmoid:  tensor([[0.0083, 0.0022, 0.6755, 0.0032, 0.0189, 0.1612, 0.0054, 0.0268, 0.0102,
         0.1837, 0.0702, 0.0067, 0.0062, 0.0058, 0.8552, 0.0012, 0.0220, 0.0591,
         0.0031, 0.0074, 0.0019, 0.0374, 0.0157, 0.0068]], device='cuda:0')
Token after sigmoid:  tensor([[3.0288e-03, 1.0059e-03, 6.6390e-01, 5.8821e-04, 7.9946e-03, 2.2131e-01,
         5.9920e-03, 7.8347e-03, 1.3728e-03, 6.0303e-01, 1.2282e-02, 3.0136e-03,
         2.9939e-03, 4.6116e-03, 8.5832e-01, 4.4778e-04, 3.4519e-02, 1.5900e-01,
         1.0080e-02, 1.1726e-02, 2.5960e-03, 9.3731e-02, 1.6529e-02, 9.5156e-03]],
       device='cuda:0')
Token after sigmoid:  tensor([[1.4234e-03, 1.6287e-04, 8.2925e-01, 1.1086e-04, 7.547

In [8]:
# Inference with sampling

from transformer_decoder_training.inference import inference_4

output_tokens, harmony_output_tokens, last_input_seq = inference_4.inference_with_temperature_sampling(model, context_seq, continuing_seq, 0.7, pad_token, device)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "temperature_sampling.mid")

Tokens to generate: 512
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],


In [9]:
# inference with sampling + max allowed notes

output_tokens, harmony_output_tokens, last_input_seq = inference_4.inference_with_temperature_and_max_notes_sampling(model, context_seq, continuing_seq, 0.8, 0.25, pad_token, device, 3)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "temperature_sampling_max_notes.mid")

Tokens to generate: 512
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0')
Binary token after applying threshold and max notes constraint:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0')
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Binary token after applying threshold and max notes constraint:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
       device='cuda:0')
Binary token after applying threshold an