<a href="https://colab.research.google.com/github/the-crHack/email/blob/main/HW4_REG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!apt install unzip
!pip install py_midicsv==4.1.2
!pip install midi_player==0.5.1
!unzip /content/sample_data/train-20241205T181153Z-001.zip -d /content/sample_data

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, random_split
import random
import glob
import numpy as np
import matplotlib.pyplot as plt


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unzip is already the newest version (6.0-26ubuntu3.2).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
Collecting py_midicsv==4.1.2
  Downloading py_midicsv-4.1.2-py3-none-any.whl.metadata (4.7 kB)
Collecting rich-click<2.0.0,>=1.8.3 (from py_midicsv==4.1.2)
  Downloading rich_click-1.8.5-py3-none-any.whl.metadata (7.9 kB)
Downloading py_midicsv-4.1.2-py3-none-any.whl (16 kB)
Downloading rich_click-1.8.5-py3-none-any.whl (35 kB)
Installing collected packages: rich-click, py_midicsv
Successfully installed py_midicsv-4.1.2 rich-click-1.8.5
Collecting midi_player==0.5.1
  Downloading midi_player-0.5.1-py3-none-any.whl.metadata (2.2 kB)
Downloading midi_player-0.5.1-py3-none-any.whl (6.4 kB)
Installing collected packages: midi_player
Successfully installed midi_player-0.5.1
Archive:  /content/sample_data/train-20241205T181153Z-001.zip
  End-of-central-directory signature not fou

4) To ensure an adequate and diverse dataset, we extracted multiple overlapping context windows from each song with the following considerations:

I used Multiple Instances Per Song to better capture the variations in musical patterns. This approach avoided under-utilizing the available data. I used a stride of 4 to avoid overlapping windows, increasing the effective dataset size.I limited max_samples_per_song to ensure that no single song dominated the dataset due to repeated patterns.
Random Sampling: Random sampling of context windows ensured diversity in the extracted data, making the dataset more robust.

In [None]:
!unzip /content/sample_data/train-20241205T181153Z-001.zip -d /content/sample_data

In [None]:


# --- Dataset Definition ---

class SongsDataset(Dataset):
    def __init__(self, files, context_window=64, max_samples_per_song=250):
        self.data = []  # List to store input sequences (X)
        self.labels = []  # List to store corresponding labels (Y)

        # Iterate over each song file
        for file in files:
            # Load the song data (assuming it's stored as a tensor)
            song_data = torch.load(file)  # song_data should be a tensor with shape (num_events, 4)

            # Calculate dynamic stride based on the length of the song
            stride = 4 #max(4, len(song_data) // 300)  # dynamic stride (longer songs get smaller strides)

            # Create indices for sliding window with dynamic stride and sampling
            indices = range(0, len(song_data) - context_window, stride)

            # Sample a few indices for training (to avoid memory overload)
            sampled_indices = random.sample(list(indices), min(max_samples_per_song, len(indices)))

            # Extract data slices and labels
            for i in sampled_indices:
                # Input sequence: slice of notes with size `context_window`
                self.data.append(song_data[i:i + context_window])  # Shape: (context_window, 4)
                # Label: the next note after the context window
                self.labels.append(song_data[i + context_window])  # Shape: (1, 4)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)


# --- Model Definition ---

class NotePredictionModel(nn.Module):
    def __init__(self, dropout=0.2):
        super(NotePredictionModel, self).__init__()

        # LSTM Layer
        self.lstm = nn.LSTM(input_size=4, hidden_size=128, batch_first=True, dropout=dropout)

        # Fully connected layer
        self.fc = nn.Linear(128, 134)  # We predict 134 values: [µ_t, σ_t, µ_d, σ_d, log(π0), ..., log(π127), µ_v, σ_v]

        # Dropout layer for regularization
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        # LSTM layer with dropout
        lstm_out, (h_n, c_n) = self.lstm(x)
        last_output = lstm_out[:, -1, :]  # (batch_size, hidden_size)

        # Apply dropout for regularization
        last_output = self.dropout(last_output)

        # Fully connected layer to predict the required values
        output = self.fc(last_output)  # Shape: (batch_size, 134)

        # Split the output into the predicted values
        µ_t, σ_t, µ_d, σ_d, logits_n, µ_v, σ_v = output.split([1, 1, 1, 1, 128, 1, 1], dim=-1)

        # Return the predicted values
        return torch.cat((µ_t, σ_t, µ_d, σ_d, logits_n, µ_v, σ_v), dim=-1)


# --- Loss Function ---

def nll_loss_continuous(pred_mu, pred_sigma, target):
    epsilon = 1e-6
    pred_sigma = torch.max(pred_sigma, torch.tensor(epsilon))  # Prevent log(0)

    loss = 0.5 * torch.log(2 * torch.tensor(torch.pi)) + torch.log(pred_sigma) + (target - pred_mu) ** 2 / (2 * pred_sigma ** 2)
    return loss.mean()


# --- Training Function ---

def train_model(model, train_loader, val_loader, epochs, learning_rate, scheduler=None):
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    training_losses = []
    validation_losses = []

    for epoch in range(12):
        model.train()
        running_loss = 0.0

        for step, (context, target) in enumerate(train_loader):
            optimizer.zero_grad()

            # Forward pass
            output = model(context)

            # Split the output into predicted values
            µ_t_pred, σ_t_pred, µ_d_pred, σ_d_pred, logits_n_pred, µ_v_pred, σ_v_pred = output.split([1, 1, 1, 1, 128, 1, 1], dim=-1)
            logits_n_pred = F.softmax(logits_n_pred, dim=-1)

            # Calculate the loss for continuous distributions using NLL
            t_loss = nll_loss_continuous(µ_t_pred, σ_t_pred, target[:, 0])
            d_loss = nll_loss_continuous(µ_d_pred, σ_d_pred, target[:, 1])
            v_loss = nll_loss_continuous(µ_v_pred, σ_v_pred, target[:, 2])

            # Calculate the categorical cross entropy for the note value logits
            nll_loss = nn.CrossEntropyLoss()(logits_n_pred, target[:, 3].long())

            # Total loss
            loss = t_loss + d_loss + v_loss + nll_loss

            if step % 100 == 0:
                print(f"Epoch {epoch+1}/{epochs}, Step {step}/{len(train_loader)}, Loss: {loss.item()}")


            # Backward pass
            loss.backward()

            # Optimize the model
            optimizer.step()

            running_loss += loss.item()

        # Log training loss
        training_losses.append(running_loss / len(train_loader))

        # Validate after every epoch
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for context, target in val_loader:
                output = model(context)
                µ_t_pred, σ_t_pred, µ_d_pred, σ_d_pred, logits_n_pred, µ_v_pred, σ_v_pred = output.split([1, 1, 1, 1, 128, 1, 1], dim=-1)
                logits_n_pred = F.softmax(logits_n_pred, dim=-1)

                # Calculate the loss for validation
                t_loss = nll_loss_continuous(µ_t_pred, σ_t_pred, target[:, 0])
                d_loss = nll_loss_continuous(µ_d_pred, σ_d_pred, target[:, 1])
                v_loss = nll_loss_continuous(µ_v_pred, σ_v_pred, target[:, 2])

                nll_loss = nn.CrossEntropyLoss()(logits_n_pred, target[:, 3].long())
                val_loss += t_loss + d_loss + v_loss + nll_loss

        validation_losses.append(val_loss / len(val_loader))

        print(f"Epoch [{epoch+1}/{epochs}], Training Loss: {running_loss / len(train_loader)}, Validation Loss: {val_loss / len(val_loader)}")

    return training_losses, validation_losses


# --- Dataset Preparation ---

# Example of file list with paths (you need to replace these with actual paths)
train_files = glob.glob("/content/sample_data/train/*/*/*/*.pt")  # Replace with your actual data files

# Initialize the dataset
context_window = 64  # Size of the context window
max_samples_per_song = 250  # Max samples per song

dataset = SongsDataset(train_files, context_window=context_window, max_samples_per_song=max_samples_per_song)

# Split the dataset into training and validation sets
train_size = int(0.8 * len(dataset))  # 80% for training
val_size = len(dataset) - train_size  # 20% for validation
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# DataLoader for training and validation
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# --- Model Initialization and Training ---

# Initialize the model
model = NotePredictionModel(dropout=0.3)  # Adding dropout for regularization

# Train the model
training_losses, validation_losses = train_model(model, train_loader, val_loader, epochs=12, learning_rate=0.01)

# --- Plotting the Training and Validation Loss ---

plt.plot(training_losses, label='Training Loss')
plt.plot(validation_losses, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss over Epochs')
plt.show()


  song_data = torch.load(file)  # song_data should be a tensor with shape (num_events, 4)
  return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.labels[idx], dtype=torch.float32)


Epoch 1/12, Step 0/2391, Loss: 3.8997766176768e+16
Epoch 1/12, Step 100/2391, Loss: 4597.6943359375
Epoch 1/12, Step 200/2391, Loss: 1992.455078125
Epoch 1/12, Step 300/2391, Loss: 4134.08203125
Epoch 1/12, Step 400/2391, Loss: 2098.338134765625
Epoch 1/12, Step 500/2391, Loss: 2105.70263671875
Epoch 1/12, Step 600/2391, Loss: 3364.2822265625
Epoch 1/12, Step 700/2391, Loss: 2555.210205078125
Epoch 1/12, Step 800/2391, Loss: 4631.392578125
Epoch 1/12, Step 900/2391, Loss: 1154.1571044921875
Epoch 1/12, Step 1000/2391, Loss: 16236.0126953125
Epoch 1/12, Step 1100/2391, Loss: 583.235107421875
Epoch 1/12, Step 1200/2391, Loss: 1064.77587890625
Epoch 1/12, Step 1300/2391, Loss: 1044.274658203125
Epoch 1/12, Step 1400/2391, Loss: 373.1807861328125
Epoch 1/12, Step 1500/2391, Loss: 468.5412292480469
Epoch 1/12, Step 1600/2391, Loss: 1352.0775146484375
Epoch 1/12, Step 1700/2391, Loss: 867.3375854492188
Epoch 1/12, Step 1800/2391, Loss: 1286.4000244140625
Epoch 1/12, Step 1900/2391, Loss: 648