In this notebook i try multiple different methods to have more variance in inference

Uses Transformer without sigmoid output

## This uses already known data -> no true Testdata

## Get data

In [1]:
import torch
import numpy as np

# Check if GPU is available, set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

In [2]:
# Define Data parameters

sos_token = np.full((1, 24), 1)
pad_token = np.full((1, 24), 2)
pad_token = torch.tensor(pad_token, device=device)

dataset_dir = "/home/falaxdb/Repos/minus1/datasets/maestro_v3_split/hands_split_into_seperate_midis"
snapshot_intervall = 0.05

batch_size = 1
seq_length = 512
stride = 256

test_size=0.3

In [3]:
from transformer_decoder_training.dataprep_transformer.prepare_dataloader_complete import prepare_dataset_as_dataloaders

# Load Data

train_loader, val_loader, test_loader = prepare_dataset_as_dataloaders(dataset_dir, snapshot_intervall, batch_size, seq_length, stride, test_size, sos_token, amount=20)

Processed dataset (40/40): 100%|██████████| 40/40 [00:00<00:00, 40.09it/s]


Processed 40 of 40 files


## Load model

In [4]:
# Define model Parameters

# Embedding Size
hidden_size = 256
# Number of transformer blocks
num_layers = 8
# MultiheadAttention Heads
num_heads = 8

# Transformer without sigmoid output
from transformer_decoder_training.models.transformer_decoder_2 import Transformer

model = Transformer(num_emb=24, num_layers=num_layers, hidden_size=hidden_size, num_heads=num_heads).to(device)
model.load_state_dict(torch.load("/home/falaxdb/Repos/minus1/transformer_decoder_training/saved_files/saved_models/model_1_notebook_v6.1.pth"))
model.eval()

Transformer(
  (embedding): Linear(in_features=24, out_features=256, bias=True)
  (pos_emb): SinusoidalPosEmb()
  (blocks): ModuleList(
    (0-7): 8 x TransformerBlock(
      (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (multihead_attn): MultiheadAttention(
        (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
      )
      (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
      (mlp): Sequential(
        (0): Linear(in_features=256, out_features=1024, bias=True)
        (1): ELU(alpha=1.0)
        (2): Linear(in_features=1024, out_features=256, bias=True)
      )
    )
  )
  (fc_out): Linear(in_features=256, out_features=24, bias=True)
)

## Inference

In [5]:
midi_save_dir = "/home/falaxdb/Repos/minus1/transformer_decoder_training/saved_files/midi_outputs/notebook_6"

In [6]:
from transformer_decoder_training.transformer_inference_eval import inference_and_visualize_1

# Get sequence
sequence = next(iter(train_loader))
print(sequence.shape)

context_seq, continuing_seq, original_seq = inference_and_visualize_1.prepare_sequence(sequence, 350)

torch.Size([1, 513, 24])


In [7]:
#Inference with threshold

from transformer_decoder_training.inference import inference_3

output_tokens, harmony_output_tokens, last_input_seq = inference_3.inference(model, context_seq, continuing_seq, 0.17, pad_token, device)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "threshold_only.mid")

Tokens to generate: 163
Token after sigmoid:  tensor([[5.5471e-02, 1.0775e-01, 3.0094e-02, 7.1517e-03, 2.9430e-01, 3.8756e-03,
         3.8671e-04, 3.7498e-03, 4.4804e-03, 4.9327e-03, 6.5295e-04, 1.6171e-02,
         2.9552e-03, 7.0402e-03, 3.8767e-03, 5.5839e-04, 3.7114e-02, 9.0569e-01,
         1.1452e-03, 2.9747e-03, 9.4299e-01, 4.6548e-03, 2.5806e-04, 9.5700e-01]],
       device='cuda:0')
Token after sigmoid:  tensor([[1.1687e-02, 1.5412e-03, 4.7312e-02, 2.8427e-03, 8.6214e-01, 1.9009e-03,
         2.3901e-04, 1.1103e-03, 1.0081e-03, 5.5176e-04, 1.3259e-04, 1.3555e-03,
         2.0278e-03, 1.0084e-03, 1.6705e-03, 1.0288e-04, 9.2230e-03, 8.8674e-01,
         4.2276e-04, 1.7938e-03, 9.7759e-01, 1.8549e-03, 1.4203e-04, 9.6422e-01]],
       device='cuda:0')
Token after sigmoid:  tensor([[7.4386e-03, 2.6210e-03, 2.4908e-01, 5.4190e-03, 7.7229e-01, 1.8744e-03,
         2.7574e-04, 8.3702e-04, 8.5400e-04, 4.2117e-04, 9.8250e-05, 2.6857e-03,
         9.5472e-04, 8.2004e-04, 1.6850e-03, 1.0

In [8]:
# Inference with sampling

from transformer_decoder_training.inference import inference_4

output_tokens, harmony_output_tokens, last_input_seq = inference_4.inference_with_temperature_sampling(model, context_seq, continuing_seq, 0.7, pad_token, device)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "temperature_sampling.mid")

Tokens to generate: 163
Token after temperature sampling:  tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Token after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1.,

In [9]:
# inference with sampling + max allowed notes

output_tokens, harmony_output_tokens, last_input_seq = inference_4.inference_with_temperature_and_max_notes_sampling(model, context_seq, continuing_seq, 0.7, 1.7, pad_token, device, 3)

inference_and_visualize_1.inference_output_to_midi_one_octave(original_seq, context_seq, last_input_seq, 0.05, midi_save_dir, "temperature_sampling_max_notes.mid")

Tokens to generate: 163
Token probabilities after temperature sampling:  tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Binary token after applying threshold and max notes constraint:  tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Binary token after applying threshold and max notes constraint:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 1.]], device='cuda:0')
Token probabilities after temperature sampling:  tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 1., 0., 0., 0.]], device='cuda:0')
Binary token after applying thresh