Jetzt versuch mit Beispiel aus https://github.com/LukeDitria/pytorch_tutorials/blob/main/section14_transformers/solutions/Pytorch2_Transformer_Text_Generation.ipynb.

Dazugehöriges Video: https://youtu.be/7J4Xn0LnnEA?list=PLyHaDji6oZkV4sRUVoJdvZm2Sk7ohQ9yD

(Eventuelle Idee: ich baue einen Tokenizer für Die einzelnen Töne. -> Jeder snapshot wird getokenized. so wie die Einzelnen wörter bei einem Transformer für text. Sollte gut machbar sein bei 12 Tönen in einer Oktave -> könnten zu viele mögliche Tokens werden bei 88 Tönen. Aber es werden ja meißtens nicht alle töne gleichzeitig gespielt.)

In [None]:
# imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch import Tensor

from data_preperation import dataset_snapshot

import math
import numpy as np

import random

In [None]:
# Define hyperparameters

SOS_TOKEN = np.full((1, 12), 1)  # SOS token representation with 1

# Learning rate for the optimizer
learning_rate = 1e-4

# Number of epochs for training
nepochs = 20

# Batch size for data loaders
batch_size = 128

# Maximum sequence length for inputs
max_len = 64

# Root directory of the dataset
data_set_root = "/home/falaxdb/Repos/Learn-ml/Transformer-pytorch/piano_data/maestro_v3/hands_split_into_seperate_midis"

In [None]:
# Create snapshots
dataset_as_snapshots = dataset_snapshot.process_dataset_multithreaded("/home/falaxdb/Repos/minus1/datasets/maestro_v3_split/hands_split_into_seperate_midis", 0.1)
# filter snapshots to 88 piano notes
dataset_as_snapshots = dataset_snapshot.filter_piano_range(dataset_as_snapshots)
# compress data into one octave
dataset_as_snapshots =  dataset_snapshot.compress_existing_dataset_to_12keys(dataset_as_snapshots)

for song in dataset_as_snapshots:
    print("song:")
    for track in song:
        print(track.shape)

In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split


# Function to add SOS and EOS tokens to each chunk
def add_sos_eos_to_chunks(chunks):
    new_chunks = []
    for chunk in chunks:
        # new_chunk = np.vstack([SOS_TOKEN, chunk, EOS_TOKEN]) eos token probably not neccessary
        new_chunk = np.vstack([SOS_TOKEN, chunk])
        new_chunks.append(new_chunk)
    return new_chunks

# Function to split sequences into chunks
def split_into_chunks(sequence, chunk_size):
    return [sequence[i:i + chunk_size] for i in range(0, len(sequence), chunk_size)]

# Function to filter out short chunks while maintaining pairs
def filter_short_chunks(chunks_1, chunks_2, min_length):
    filtered_chunks_1 = []
    filtered_chunks_2 = []
    for chunk_1, chunk_2 in zip(chunks_1, chunks_2):
        if len(chunk_1) >= min_length and len(chunk_2) >= min_length:
            filtered_chunks_1.append(chunk_1)
            filtered_chunks_2.append(chunk_2)
    return filtered_chunks_1, filtered_chunks_2

# Custom Dataset class
class PianoDataset(Dataset):
    def __init__(self, data):
        self.data = data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        x, y = self.data[idx]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Prepare the dataset with paired sequences and SOS/EOS tokens for each chunk
def prepare_dataset(dataset_as_snapshots, chunk_size, min_length):
    data = []
    for song in dataset_as_snapshots:
        track_1, track_2 = song
        assert len(track_1) == len(track_2), "Tracks must have the same length"
        
        chunks_1 = split_into_chunks(track_1, chunk_size)
        chunks_2 = split_into_chunks(track_2, chunk_size)
        chunks_1, chunks_2 = filter_short_chunks(chunks_1, chunks_2, min_length)
        
        # Add SOS and EOS tokens to each chunk
        chunks_1 = add_sos_eos_to_chunks(chunks_1)
        chunks_2 = add_sos_eos_to_chunks(chunks_2)
        
        for x, y in zip(chunks_1, chunks_2):
            data.append((x, y))
    return data

In [None]:
data = prepare_dataset(dataset_as_snapshots, max_len, max_len)

# Split the dataset using sklearn while maintaining pairs
train_data, temp_data = train_test_split(data, test_size=0.3, random_state=42, shuffle=True)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42, shuffle=True)

# Create custom datasets
train_dataset = PianoDataset(train_data)
val_dataset = PianoDataset(val_data)
test_dataset = PianoDataset(test_data)

# Create DataLoaders for each subset with drop_last=True
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Iterate over the DataLoader (example with train_loader)
for batch in train_loader:
    X, y = batch
    print(X.shape, y.shape)
    # X and y should both have shape (batch_size, chunk_size + 2, feature dimension) because of SOS and EOS tokens