In [8]:
#import statements 
import glob
import random
from typing import List
from collections import defaultdict

import numpy as np
from numpy.random import choice

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from symusic import Score
from miditok import REMI, TokenizerConfig
from midi2audio import FluidSynth # Import library
from IPython.display import Audio, display

In [35]:
git checkout -b epochtrained
git add .
git commit -m "my code that has a training model on 43 epochs"
git push -u origin epochtrained

ERROR: Could not find a version that satisfies the requirement git (from versions: none)
ERROR: No matching distribution found for git
'git' is not recognized as an internal or external command,
operable program or batch file.
'git' is not recognized as an internal or external command,
operable program or batch file.
'git' is not recognized as an internal or external command,
operable program or batch file.
'git' is not recognized as an internal or external command,
operable program or batch file.


Task 1 <br>
This assignment focuses on symbolic music modeling. The goal is to train a model that learns a distribution \( p(x) \) over symbolic music data (e.g., MIDI)  specifically within the EDM genre. In addition it is capable of sampling new sequences from this learned distribution unconditionally. We will be using the LSTM model for this task. <br>

Get list of files for training/test sets

In [9]:
import os
import glob
print("CWD:", os.getcwd())
print("Train directory exists?", os.path.exists("./train"))
print("Train files (glob):", glob.glob("./train/*.midi"))

train_files = glob.glob("./train/*.midi")
test_files = glob.glob("./test/*.midi")

CWD: C:\Users\charl\Downloads\cse153_task1-addrhythm 2\cse153_task1-addrhythm
Train directory exists? True
Train files (glob): ['./train\\MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_01_Track01_wav.midi', './train\\MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_02_Track02_wav.midi', './train\\MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_03_Track03_wav.midi', './train\\MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_04_Track04_wav.midi', './train\\MIDI-Unprocessed_01_R1_2006_01-09_ORIG_MID--AUDIO_01_R1_2006_05_Track05_wav.midi', './train\\MIDI-Unprocessed_01_R1_2008_01-04_ORIG_MID--AUDIO_01_R1_2008_wav--1.midi', './train\\MIDI-Unprocessed_01_R1_2008_01-04_ORIG_MID--AUDIO_01_R1_2008_wav--2.midi', './train\\MIDI-Unprocessed_01_R1_2008_01-04_ORIG_MID--AUDIO_01_R1_2008_wav--3.midi', './train\\MIDI-Unprocessed_01_R1_2009_01-04_ORIG_MID--AUDIO_01_R1_2009_01_R1_2009_01_WAV.midi', './train\\MIDI-Unprocessed_01_R1_2009_01-04_ORIG_MID--AU

Construct a PyTorch Dataset

In [10]:
class MIDIDataset(Dataset):
    def __init__(self, file_paths: List[str], tokenizer):
        self.tokenizer = tokenizer
        self.file_paths = file_paths
        
    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        midi = Score(self.file_paths[idx])
        MAX_LEN = 512  # or even 512 to be safe
        tokens = self.tokenizer(midi)
        tokens = tokens[:MAX_LEN]
        # Return as dictionary to match training function expectations
        return {'input_ids': torch.tensor(tokens, dtype=torch.long)}

Configure the Tokenizer in order to be use to 

In [11]:
config = TokenizerConfig(
    num_velocities=32,           # Classical dynamics (was 1)
    use_chords=True,            # Essential for harmony (was False)
    use_programs=False,         # Piano only (was True)
    use_time_signatures=True,   # Classical changes time sigs
    use_rests=True, 
    use_tempos=True            # Important in classical
)
tokenizer = REMI(config)
tokenizer.train(vocab_size=1500, files_paths=train_files)
tokenizer.save("tokenizer.json")

Define PyTorch datasets and dataloaders

In [12]:
from torch.nn.utils.rnn import pad_sequence
from miditok.pytorch_data import DatasetMIDI, DataCollator
dataset = MIDIDataset(train_files, tokenizer)


train_dataset = DatasetMIDI(
    files_paths=train_files,
    tokenizer=tokenizer,
    max_seq_len=1024,
    bos_token_id=tokenizer["BOS_None"],
    eos_token_id=tokenizer["EOS_None"],
)
test_dataset = DatasetMIDI(
    files_paths=test_files,
    tokenizer=tokenizer,
    max_seq_len=1024,
    bos_token_id=tokenizer["BOS_None"],
    eos_token_id=tokenizer["EOS_None"],
)
print(f"# Train files loaded: {len(train_dataset)}")
print(f"# Test files loaded: {len(test_dataset)}")

collator = DataCollator(tokenizer.pad_token_id)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collator)
test_loader = DataLoader(test_dataset, batch_size=16, collate_fn=collator)


# Train files loaded: 938
# Test files loaded: 105


In [13]:
#for rhythm
def extract_position_ids(token_sequence, tokenizer):
    position_ids = []
    current_pos = 0  # default if no position is found yet

    for token in token_sequence:
        token_str = tokenizer.vocab[token]
        if token_str.startswith("Position_"):
            try:
                current_pos = int(token_str.split("_")[1])
            except:
                current_pos = 0
        position_ids.append(current_pos)

    return torch.tensor(position_ids, dtype=torch.long)



LSTM Model<br>

In [14]:
class MusicRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim=768, hidden_dim=1024, num_layers=4,
                 dropout=0.3, bidirectional=False, max_position_embeddings=1024):
        super(MusicRNN, self).__init__()
        
        # Larger embeddings for richer representations
        self.token_embedding = nn.Embedding(vocab_size, embedding_dim)
        self.position_embedding = nn.Embedding(max_position_embeddings, embedding_dim)
        
        # Deeper LSTM with residual connections
        self.rnn_layers = nn.ModuleList([
            nn.LSTM(
                input_size=embedding_dim if i == 0 else hidden_dim,
                hidden_size=hidden_dim,
                num_layers=1,
                dropout=0,
                batch_first=True,
                bidirectional=bidirectional
            ) for i in range(num_layers)
        ])
        
        rnn_output_dim = hidden_dim * 2 if bidirectional else hidden_dim
        
        # Enhanced output processing
        self.layer_norms = nn.ModuleList([
            nn.LayerNorm(hidden_dim) for _ in range(num_layers)
        ])
        self.dropout = nn.Dropout(dropout)
        
        # Multi-layer output head
        self.output_projection = nn.Sequential(
            nn.Linear(rnn_output_dim, rnn_output_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(rnn_output_dim // 2, vocab_size)
        )
        
        # Initialize weights properly
        self._init_weights()
    
    def _init_weights(self):
        """Xavier initialization for better training"""
        for module in self.modules():
            if isinstance(module, nn.Linear):
                torch.nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    torch.nn.init.zeros_(module.bias)
            elif isinstance(module, nn.Embedding):
                torch.nn.init.normal_(module.weight, mean=0, std=0.02)
    
    def forward(self, x, position_ids, hidden_states=None):
        # Token and position embeddings
        tok_emb = self.token_embedding(x)
        pos_emb = self.position_embedding(position_ids)
        x = tok_emb + pos_emb
        x = self.dropout(x)
        
        # Pass through LSTM layers with residual connections
        new_hidden_states = []
        for i, (rnn_layer, layer_norm) in enumerate(zip(self.rnn_layers, self.layer_norms)):
            hidden = hidden_states[i] if hidden_states else None
            out, new_hidden = rnn_layer(x, hidden)
            out = layer_norm(out)
            
            # Residual connection (when dimensions match)
            if i > 0 and out.size(-1) == x.size(-1):
                out = out + x
            
            x = self.dropout(out)
            new_hidden_states.append(new_hidden)
        
        # Output projection
        output = self.output_projection(x)
        return output, new_hidden_states

In [15]:
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0.001, restore_best_weights=True):
        self.patience = patience
        self.min_delta = min_delta
        self.restore_best_weights = restore_best_weights
        self.best_loss = None
        self.counter = 0
        self.best_weights = None
        
    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(model)
        elif val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
            self.save_checkpoint(model)
        else:
            self.counter += 1
            
        if self.counter >= self.patience:
            if self.restore_best_weights:
                model.load_state_dict(self.best_weights)
            return True
        return False
    
    def save_checkpoint(self, model):
        self.best_weights = model.state_dict().copy()

In [16]:
def check_gpu_setup():
    """Check and force GPU usage"""
    print("=== GPU Setup Check ===")
    print(f"CUDA Available: {torch.cuda.is_available()}")
    
    if torch.cuda.is_available():
        print(f"CUDA Device Count: {torch.cuda.device_count()}")
        print(f"Current CUDA Device: {torch.cuda.current_device()}")
        print(f"CUDA Device Name: {torch.cuda.get_device_name(0)}")
        print(f"CUDA Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
        device = torch.device('cuda')
        print("✅ Using GPU")
    else:
        print("❌ CUDA not available!")
        print("Install CUDA PyTorch with:")
        print("pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118")
        device = torch.device('cpu')
        print("⚠️ Falling back to CPU")
    
    print("=" * 30)
    return device

Training<br>

In [17]:
def train_model(model, train_loader, val_loader, vocab_size, num_epochs=50, 
                        initial_lr=1e-3, device='cpu', save_path='best_model.pth'):
    """Advanced training with learning rate scheduling and early stopping"""
    model = model.to(device)
    criterion = nn.CrossEntropyLoss(ignore_index=0)  # Ignore padding tokens
    optimizer = optim.AdamW(model.parameters(), lr=initial_lr, weight_decay=0.01)
    
    # FIXED: Learning rate scheduler - removed 'verbose' parameter
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5, min_lr=1e-6
    )
    
    # Early stopping to prevent overfitting
    early_stopping = EarlyStopping(patience=10, min_delta=0.001)
    
    # Training history
    train_losses = []
    val_losses = []
    learning_rates = []
    
    print(f"Starting training with {len(train_loader)} batches per epoch")
    print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")
    print(f"Training on: {device}")
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        total_train_loss = 0
        num_batches = 0
        
        for batch_idx, batch in enumerate(train_loader):
            batch = batch['input_ids'].to(device)
            
            # Skip batch if too short
            if batch.size(1) < 2:
                continue
                
            input_ids = batch[:, :-1]
            target_ids = batch[:, 1:]
            
            # Position IDs (simple sequential for now)
            position_ids = torch.arange(input_ids.size(1), device=device).unsqueeze(0).expand_as(input_ids)
            
            optimizer.zero_grad()
            outputs, _ = model(input_ids, position_ids)
            
            # Reshape for loss calculation
            outputs = outputs.reshape(-1, vocab_size)
            targets = target_ids.reshape(-1)
            
            loss = criterion(outputs, targets)
            loss.backward()
            
            # Gradient clipping to prevent exploding gradients
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            
            total_train_loss += loss.item()
            num_batches += 1
            
            # Print progress every 50 batches
            if batch_idx % 50 == 0:
                print(f"Epoch {epoch+1}, Batch {batch_idx}, Loss: {loss.item():.4f}")
        
        avg_train_loss = total_train_loss / num_batches if num_batches > 0 else 0
        
        # Validation phase
        model.eval()
        total_val_loss = 0
        val_batches = 0
        
        with torch.no_grad():
            for batch in val_loader:
                batch = batch['input_ids'].to(device)
                
                if batch.size(1) < 2:
                    continue
                    
                input_ids = batch[:, :-1]
                target_ids = batch[:, 1:]
                position_ids = torch.arange(input_ids.size(1), device=device).unsqueeze(0).expand_as(input_ids)
                
                outputs, _ = model(input_ids, position_ids)
                outputs = outputs.reshape(-1, vocab_size)
                targets = target_ids.reshape(-1)
                
                loss = criterion(outputs, targets)
                total_val_loss += loss.item()
                val_batches += 1
        
        avg_val_loss = total_val_loss / val_batches if val_batches > 0 else 0
        current_lr = optimizer.param_groups[0]['lr']
        
        # Record metrics
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        learning_rates.append(current_lr)
        
        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | LR: {current_lr:.2e}")
        
        # GPU memory info if available
        if torch.cuda.is_available():
            print(f"GPU Memory: {torch.cuda.memory_allocated()/1e9:.2f}GB allocated")
        print("-" * 60)
        
        # Learning rate scheduling
        scheduler.step(avg_val_loss)
        
        # Early stopping check
        if early_stopping(avg_val_loss, model):
            print(f"Early stopping triggered at epoch {epoch+1}")
            break
        
        # Save checkpoint every 10 epochs
        if (epoch + 1) % 10 == 0:
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'train_loss': avg_train_loss,
                'val_loss': avg_val_loss,
                'vocab_size': vocab_size
            }, f'checkpoint_epoch_{epoch+1}.pth')
    
    # Save final model
    torch.save({
        'model_state_dict': model.state_dict(),
        'vocab_size': vocab_size,
        'train_losses': train_losses,
        'val_losses': val_losses,
        'learning_rates': learning_rates
    }, save_path)
    
    return train_losses, val_losses, learning_rates

In [18]:
def setup_training():
    """Set up and run the improved training pipeline"""
    
    # Force GPU check first
    device = check_gpu_setup()
    
    # Create improved tokenizer
    print("Creating improved tokenizer...")
    config = TokenizerConfig(
        num_velocities=32,           
        use_chords=True,            
        use_programs=False,         
        use_time_signatures=True,   
        use_rests=True, 
        use_tempos=True            
    )
    tokenizer = REMI(config)
    tokenizer.train(vocab_size=1500, files_paths=train_files)
    tokenizer.save("tokenizer.json")
    print(f"Tokenizer vocabulary size: {tokenizer.vocab_size}")
    
    # Create datasets
    train_dataset = DatasetMIDI(
        files_paths=train_files,
        tokenizer=tokenizer,
        max_seq_len=1024,
        bos_token_id=tokenizer["BOS_None"],
        eos_token_id=tokenizer["EOS_None"],
    )
    
    test_dataset = DatasetMIDI(
        files_paths=test_files,
        tokenizer=tokenizer,
        max_seq_len=1024,
        bos_token_id=tokenizer["BOS_None"],
        eos_token_id=tokenizer["EOS_None"],
    )
    
    collator = DataCollator(tokenizer.pad_token_id)
    # Smaller batch size for GPU memory
    batch_size = 16 if device.type == 'cuda' else 8
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collator)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, collate_fn=collator)
    
    model = MusicRNN(
        vocab_size=tokenizer.vocab_size,
        embedding_dim=768,
        hidden_dim=1024,
        num_layers=4,
        dropout=0.3,
        max_position_embeddings=1024
    )
    
    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    train_losses, val_losses, learning_rates = train_model(
        model=model,
        train_loader=train_loader,
        val_loader=test_loader,
        vocab_size=tokenizer.vocab_size,
        num_epochs=50,
        initial_lr=1e-3,
        device=device,  # Use the checked device
        save_path='improved_music_model.pth'
    )
    
    return model, tokenizer, train_losses, val_losses, learning_rates

In [19]:
def plot_training_progress(train_losses, val_losses, learning_rates):
    """Plot training metrics to visualize progress"""
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Loss plot
    axes[0].plot(train_losses, label='Training Loss', color='blue')
    axes[0].plot(val_losses, label='Validation Loss', color='red')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Loss')
    axes[0].set_title('Training Progress')
    axes[0].legend()
    axes[0].grid(True)
    
    # Learning rate plot
    axes[1].plot(learning_rates, color='green')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Learning Rate')
    axes[1].set_title('Learning Rate Schedule')
    axes[1].set_yscale('log')
    axes[1].grid(True)
    
    plt.tight_layout()
    plt.show()


# Run the improved training
if __name__ == "__main__":
    model, tokenizer, train_losses, val_losses, learning_rates = setup_training()
    
    # Plot results if matplotlib is available
    try:
        import matplotlib.pyplot as plt
        plot_training_progress(train_losses, val_losses, learning_rates)
    except ImportError:
        print("Install matplotlib to see training plots: pip install matplotlib")

=== GPU Setup Check ===
CUDA Available: True
CUDA Device Count: 1
Current CUDA Device: 0
CUDA Device Name: NVIDIA GeForce RTX 4060
CUDA Memory: 8.6 GB
✅ Using GPU
Creating improved tokenizer...
Tokenizer vocabulary size: 1500
Model parameters: 35,779,548
Starting training with 59 batches per epoch
Model has 35779548 parameters
Training on: cuda
Epoch 1, Batch 0, Loss: 9.6613
Epoch 1, Batch 50, Loss: 5.4455
Epoch 1/50
Train Loss: 6.1816 | Val Loss: 5.0821 | LR: 1.00e-03
GPU Memory: 0.65GB allocated
------------------------------------------------------------
Epoch 2, Batch 0, Loss: 5.2526
Epoch 2, Batch 50, Loss: 4.8264
Epoch 2/50
Train Loss: 4.9140 | Val Loss: 4.6023 | LR: 1.00e-03
GPU Memory: 0.65GB allocated
------------------------------------------------------------
Epoch 3, Batch 0, Loss: 4.7606
Epoch 3, Batch 50, Loss: 4.5603
Epoch 3/50
Train Loss: 4.6035 | Val Loss: 4.5934 | LR: 1.00e-03
GPU Memory: 0.65GB allocated
------------------------------------------------------------
Ep

ValueError: too many values to unpack (expected 4)

In [27]:
# Fixed model loading - the tokenizer loading method was wrong

import os
import torch
from miditok import REMI, TokenizerConfig

def load_trained_model_fixed():
    """Load your trained model with proper tokenizer loading"""
    try:
        model_path = 'improved_music_model.pth'
        print(f"Loading model from {model_path}")
        checkpoint = torch.load(model_path, map_location='cpu')
        
        # Create and load tokenizer - FIXED METHOD
        if os.path.exists("tokenizer.json"):
            print("Loading existing tokenizer...")
            tokenizer = REMI.from_pretrained("./", "tokenizer")  # Correct loading method
            print("✅ Loaded existing tokenizer")
        else:
            print("Creating new tokenizer...")
            config = TokenizerConfig(
                num_velocities=32,           
                use_chords=True,            
                use_programs=False,         
                use_time_signatures=True,   
                use_rests=True, 
                use_tempos=True            
            )
            tokenizer = REMI(config)
            tokenizer.train(vocab_size=1500, files_paths=train_files)
            tokenizer.save("tokenizer.json")
            print("✅ Created and saved new tokenizer")
        
        # Create model with same architecture
        model = MusicRNN(
            vocab_size=tokenizer.vocab_size,
            embedding_dim=768,
            hidden_dim=1024,
            num_layers=4,
            dropout=0.3,
            max_position_embeddings=1024
        )
        
        # Load the trained weights
        model.load_state_dict(checkpoint['model_state_dict'])
        print("✅ Successfully loaded your trained model!")
        
        # Show training info if available
        if 'train_losses' in checkpoint:
            print(f"Model was trained for {len(checkpoint['train_losses'])} epochs")
            print(f"Final training loss: {checkpoint['train_losses'][-1]:.4f}")
        if 'val_losses' in checkpoint:
            print(f"Final validation loss: {checkpoint['val_losses'][-1]:.4f}")
        
        return model, tokenizer
        
    except Exception as e:
        print(f"Error loading model: {e}")
        print("Trying alternative loading method...")
        
        # Alternative: Create new tokenizer if loading fails
        try:
            print("Creating fresh tokenizer...")
            config = TokenizerConfig(
                num_velocities=32,           
                use_chords=True,            
                use_programs=False,         
                use_time_signatures=True,   
                use_rests=True, 
                use_tempos=True            
            )
            tokenizer = REMI(config)
            tokenizer.train(vocab_size=1500, files_paths=train_files)
            
            # Create model
            model = MusicRNN(
                vocab_size=tokenizer.vocab_size,
                embedding_dim=768,
                hidden_dim=1024,
                num_layers=4,
                dropout=0.3,
                max_position_embeddings=1024
            )
            
            # Load weights
            checkpoint = torch.load(model_path, map_location='cpu')
            model.load_state_dict(checkpoint['model_state_dict'])
            
            print("✅ Successfully loaded model with fresh tokenizer!")
            return model, tokenizer
            
        except Exception as e2:
            print(f"Alternative loading also failed: {e2}")
            return None, None

# Load your trained model with the fixed method
model, tokenizer = load_trained_model_fixed()

if model is not None:
    print("\n🎉 Your model is ready for sampling!")
    print(f"Tokenizer vocab size: {tokenizer.vocab_size}")
    print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
    
    # Test that the tokenizer works
    try:
        test_token = tokenizer["BOS_None"]
        print(f"✅ Tokenizer working - BOS token: {test_token}")
    except:
        print("⚠️ Tokenizer may have issues, but model is loaded")
else:
    print("❌ Could not load the trained model.")

Loading model from improved_music_model.pth
Loading existing tokenizer...
Error loading model: ModelHubMixin.from_pretrained() takes 2 positional arguments but 3 were given
Trying alternative loading method...
Creating fresh tokenizer...
✅ Successfully loaded model with fresh tokenizer!

🎉 Your model is ready for sampling!
Tokenizer vocab size: 1500
Model parameters: 35,779,548
✅ Tokenizer working - BOS token: 1


Sampling<br>

In [28]:

def sample_top_k(model, start_token, max_length, temperature, k, tokenizer, device='cuda'):
    model = model.to(device)
    model.eval()

    generated = [start_token]
    position_ids = [0]  # start with position 0
    input_token = torch.tensor([[start_token]], device=device)
    input_pos = torch.tensor([[0]], device=device)  # position ID
    hidden = None
    current_position = 0
    vocab_size = tokenizer.vocab_size # keeps count of token count to avoid out of bound errors

    for _ in range(max_length):
        with torch.no_grad():
            output, hidden = model(input_token, input_pos, hidden)
            logits = output[:, -1, :] / temperature
            logits = logits[:, :vocab_size]  # Constrain to valid vocab
            
            top_k_logits, top_k_indices = torch.topk(logits, min(k, vocab_size))
            top_k_probs = F.softmax(top_k_logits, dim=-1)
            next_token_idx = torch.multinomial(top_k_probs, 1).item()
            next_token = top_k_indices[0, next_token_idx].item()
            
            if next_token >= vocab_size:
                next_token = tokenizer["EOS_None"]
                
            generated.append(next_token)

            # Update position safely
            token_str = None
            for token_name, token_id in tokenizer._vocab_base.items():
                if token_id == next_token:
                    token_str = token_name
                    break
            
            if token_str and token_str.startswith("Position_"):
                try:
                    current_position = int(token_str.split("_")[1])
                except:
                    current_position = min(current_position + 1, 95)
            else:
                current_position = min(current_position + 1, 95)

            if (next_token == tokenizer["EOS_None"] or 
                next_token == tokenizer["PAD_None"]):
                break

            input_token = torch.tensor([[next_token]], device=device)
            input_pos = torch.tensor([[current_position]], device=device)

    return generated


start_token = tokenizer["BOS_None"]
generated_sequence = sample_top_k(
    model=model,
    start_token=start_token,
    max_length=1024,
    temperature=0.7,
    k=5,
    tokenizer=tokenizer,
    device='cuda' if torch.cuda.is_available() else 'cpu'
)


Generation output of midi files<br>

In [29]:
def generate_midi(tokenizer, generated_sequence, output_filename="rnn.mid"):
    try:
        vocab_size = tokenizer.vocab_size
        valid_sequence = [token for token in generated_sequence if 0 <= token < vocab_size]
        
        print(f"Original sequence length: {len(generated_sequence)}")
        print(f"Valid sequence length: {len(valid_sequence)}")
        
        if len(valid_sequence) < 2:
            print("Sequence too short or no valid tokens found")
            return None
            
        # Fix: Handle both possible return types from tokenizer.decode()
        output_scores = tokenizer.decode([valid_sequence])
        
        # Check if it returned a list or a single score
        if isinstance(output_scores, list):
            output_score = output_scores[0]  # Get the first score from list
        else:
            output_score = output_scores  # It's already a single score
        
        if len(output_score.tracks) == 0:
            print("Generated MIDI has no tracks")
            return None
            
        output_score.dump_midi(output_filename)
        print(f"Successfully generated {output_filename}")
        return output_score
        
    except Exception as e:
        print(f"Error during MIDI generation: {e}")
        print(f"Sequence sample: {generated_sequence[:20]}...")
        return None


# Clean the MIDI file function (defined at module level)
import pretty_midi

def remove_clashing_notes(midi_path_in, midi_path_out):
    pm = pretty_midi.PrettyMIDI(midi_path_in)
    
    for instrument in pm.instruments:
        instrument.notes.sort(key=lambda n: n.start)
        filtered_notes = []
        
        for note in instrument.notes:
            if not filtered_notes:
                filtered_notes.append(note)
            else:
                last_note = filtered_notes[-1]
                # If overlapping and pitch close (within 1 semitone)
                if note.start < last_note.end and abs(note.pitch - last_note.pitch) < 2:
                    # Keep louder note
                    if note.velocity > last_note.velocity:
                        filtered_notes[-1] = note
                else:
                    filtered_notes.append(note)
        
        instrument.notes = filtered_notes
    
    pm.write(midi_path_out)
    print(f"Saved cleaned MIDI to {midi_path_out}")


# Usage with the FIXED sampling function
start_token = tokenizer["BOS_None"]  # Use proper token access
generated_sequence = sample_top_k(  # Use the fixed function
    model=model, 
    start_token=start_token, 
    max_length=512,  # Reduced for testing
    temperature=0.8, 
    k=10,
    tokenizer=tokenizer,
    device='cuda' if torch.cuda.is_available() else 'cpu'
)

# Generate MIDI safely
output_score = generate_midi(tokenizer, generated_sequence, "rnn.mid")

# Use the function after MIDI generation
if output_score:
    remove_clashing_notes("rnn.mid", "cleanedrnn.mid")
else:
    print("MIDI generation failed")

Original sequence length: 513
Valid sequence length: 513
Successfully generated rnn.mid
Saved cleaned MIDI to cleanedrnn.mid
