In this notebook i try multiple different methods to have more variance in inference

Uses Transformer without sigmoid output

## This uses already known data -> no true Testdata

## Get data

In [1]:
import torch
import numpy as np

# Check if GPU is available, set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

In [2]:
# Define Data parameters

sos_token = np.full((1, 24), 1)
pad_token = np.full((1, 24), 2)
pad_token = torch.tensor(pad_token, device=device)

dataset_dir = "/home/falaxdb/Repos/minus1/transformer_decoder_training/jupyter_notebooks/evaluation/specific_test_midis/split_into_mid"
snapshot_intervall = 0.05

batch_size = 1
seq_length = 1024
stride = 128

test_size=0.1

In [3]:
from transformer_decoder_training.dataprep_transformer.prepare_dataloader_complete import prepare_dataset_as_dataloaders
from transformer_decoder_training.dataprep_transformer.prepare_dataloader_complete import prepare_dataset_as_single_loader

# Load Data

#train_loader, val_loader, test_loader = prepare_dataset_as_dataloaders(dataset_dir, snapshot_intervall, batch_size, seq_length, stride, test_size, sos_token)
train_loader = prepare_dataset_as_single_loader(dataset_dir, snapshot_intervall, batch_size, seq_length, stride, sos_token, shuffle=False)

Processed dataset (2/2): 100%|██████████| 2/2 [00:00<00:00,  4.97it/s]

Processed 2 of 2 files





## Load model

In [4]:
# Define model Parameters

# Embedding Size
hidden_size = 256
# Number of transformer blocks
num_layers = 8
# MultiheadAttention Heads
num_heads = 8

# Transformer without sigmoid output
from transformer_decoder_training.models.transformer_decoder_2 import Transformer

model = Transformer(num_emb=24, num_layers=num_layers, hidden_size=hidden_size, num_heads=num_heads).to(device)
model.load_state_dict(torch.load("/home/falaxdb/Repos/minus1/transformer_decoder_training/saved_files/saved_models/model_1_notebook_v6.1.pth"))
model.eval()

Transformer(
  (embedding): Linear(in_features=24, out_features=256, bias=True)
  (pos_emb): SinusoidalPosEmb()
  (blocks): ModuleList(
    (0-7): 8 x TransformerBlock(
      (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (multihead_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
      )
      (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (mlp): Sequential(
        (0): Linear(in_features=256, out_features=1024, bias=True)
        (1): ELU(alpha=1.0)
        (2): Linear(in_features=1024, out_features=256, bias=True)
      )
    )
  )
  (fc_out): Linear(in_features=256, out_features=24, bias=True)
)

## Inference

In [5]:
midi_save_dir = "/home/falaxdb/Repos/minus1/transformer_decoder_training/saved_files/midi_outputs/notebook_6"

In [6]:
from transformer_decoder_training.transformer_inference_eval import inference_and_visualize_1

# Get sequence
sequence = next(iter(train_loader))
print(sequence.shape)

context_seq, continuing_seq, original_seq = inference_and_visualize_1.prepare_sequence(sequence, 513)

torch.Size([1, 1025, 24])


In [7]:
#Inference with threshold

from transformer_decoder_training.inference import inference_3

output_tokens, harmony_output_tokens, last_input_seq = inference_3.inference(model, context_seq, continuing_seq, 0.17, pad_token, device)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "threshold_only.mid")

Tokens to generate: 512
Token after sigmoid:  tensor([[8.4361e-04, 1.9129e-04, 3.0500e-04, 9.7016e-01, 2.6296e-04, 4.2497e-03,
         5.4911e-03, 8.5550e-04, 6.9373e-03, 8.6189e-04, 2.9786e-02, 6.1674e-03,
         2.4936e-02, 4.6675e-03, 8.9613e-01, 7.7255e-02, 3.3386e-05, 1.3665e-02,
         2.0036e-03, 5.9947e-04, 1.0588e-02, 1.1874e-04, 1.5969e-02, 2.0193e-02]],
       device='cuda:0')
Token after sigmoid:  tensor([[6.6719e-04, 2.5725e-04, 6.5572e-04, 9.7427e-01, 2.1545e-04, 4.8429e-03,
         4.0000e-03, 5.0021e-04, 5.6639e-03, 7.3793e-04, 2.7287e-02, 4.8440e-03,
         2.1479e-02, 3.1799e-03, 8.8513e-01, 6.1513e-02, 3.2100e-05, 1.4749e-02,
         2.6334e-03, 5.0916e-04, 1.6556e-02, 1.6193e-04, 3.2306e-02, 2.6359e-02]],
       device='cuda:0')
Token after sigmoid:  tensor([[4.6093e-04, 2.1518e-04, 8.2443e-04, 9.8042e-01, 2.5611e-04, 3.0688e-03,
         1.3207e-03, 2.0332e-04, 2.4973e-03, 5.8383e-04, 1.9751e-02, 2.4030e-03,
         1.6533e-02, 1.6402e-03, 8.5476e-01, 1.6

In [8]:
# Inference with sampling

from transformer_decoder_training.inference import inference_4

output_tokens, harmony_output_tokens, last_input_seq = inference_4.inference_with_temperature_sampling(model, context_seq, continuing_seq, 0.7, pad_token, device)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "temperature_sampling.mid")

Tokens to generate: 512
Token after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0.,

In [9]:
# inference with sampling + max allowed notes

output_tokens, harmony_output_tokens, last_input_seq = inference_4.inference_with_temperature_and_max_notes_sampling(model, context_seq, continuing_seq, 0.8, 0.25, pad_token, device, 3)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "temperature_sampling_max_notes.mid")

Tokens to generate: 512
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Binary token after applying threshold and max notes constraint:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Binary token after applying threshold and max notes constraint:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
         0., 0., 0., 0., 0., 0.]], device='cuda:0')
Binary token after applying thresh