In [1]:
import random
import os
import csv
from dataclasses import dataclass
import time
from collections import defaultdict

import torch
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt # for making figures
%matplotlib inline
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
import numpy as np
from torch.utils.data import Dataset, DataLoader
from dataclasses import dataclass
from typing import List, Dict, Tuple

In [2]:
# Set device and CPU threads
from multiprocessing import cpu_count
torch.set_num_threads(cpu_count())
print(f'Using {torch.get_num_threads()} threads')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#device = 'cpu'
print(f'Using device {device}')

Using 32 threads
Using device cuda:0


In [3]:
NOT_VOICE_TOKEN = 2100
MIN_TOKEN = 3600
MAX_TOKEN = 8400
VALID_TOKENS = set()
for v in range(MIN_TOKEN, MAX_TOKEN + 1, 10):
    VALID_TOKENS.add(v)
# NUM_SECONDS seconds of data * around 87 pitch readings per second
NUM_SECONDS = 10
BLOCK_SIZE = 87 * NUM_SECONDS
NOISE_WINDOW_SIZE = 10

In [4]:
# CLASS NAMES - NOTE: Do not change numbering.
CLASS_NAMES = {
    'saveri': 0,
    'hemavati': 1,
    'thodi': 2,
    'sindhubhairavi': 3
}

In [5]:
@dataclass
class PitchDataFile:
    file_path: str
    pitches: list

In [6]:
# For initial tests, keep entire data in memory. Keep only the pitch list.
pitch_data_dir = '../../data/simple-test/pitch_data_midi'

X = []
pitch_data = {}
# 10 second segments with overlap of 5 seconds to previous segment
SAMPLE_HOP_LENGTH = BLOCK_SIZE // 2 
print(f'Sample Hop Length: {SAMPLE_HOP_LENGTH}')

num_invalid = 0

pitch_counter = defaultdict(int)

for class_name in os.listdir(pitch_data_dir):
    if class_name not in CLASS_NAMES:
        print(f'{class_name} not included for training')
        continue
    class_dir = os.path.join(pitch_data_dir, class_name)
    for file_name in os.listdir(class_dir):
        file_path = os.path.join(class_dir, file_name)
        data = []
        print(f'processing {file_path}')
        with open(file_path, 'r') as file:
            reader = csv.reader(file)
            for row in reader:
                if len(row) >= 3:
                    data.append(row[2])
            ########################################################
            ## NOTE ################################################
            ## Remove noise token - add noise padding on sides instead
            #########################################################
            pitches = []
            for x in data:
                ix = int(x)
                if ix in VALID_TOKENS:
                    pitches.append(ix)
                elif ix != NOT_VOICE_TOKEN:
                    num_invalid += 1
            for v in pitches:
                pitch_counter[v] += 1
            #pitches = [stoi[NOT_VOICE_TOKEN]] * NOISE_WINDOW_SIZE + pitches + [stoi[NOT_VOICE_TOKEN]] * NOISE_WINDOW_SIZE
            print(f'pitches length: {len(pitches)}')
            #########################################################
            pd = PitchDataFile(file_path=file_path, pitches=pitches)
            sampling_data = [(file_path, i, CLASS_NAMES[class_name]) for i in range(0, len(pitches) - BLOCK_SIZE - 1, SAMPLE_HOP_LENGTH)]
            X.extend(sampling_data)
            pitch_data[file_path] = pd

print(f'Total data: {len(X)}')
assert num_invalid == 0

Sample Hop Length: 435
processing ../../data/simple-test/pitch_data_midi/hemavati/trichur-hemavati-alapana_C#3_minus_1
pitches length: 41884
processing ../../data/simple-test/pitch_data_midi/hemavati/trichur-hemavati-alapana_C#3_plus_1
pitches length: 43699
processing ../../data/simple-test/pitch_data_midi/hemavati/aruna-hemavati_F3
pitches length: 106804
processing ../../data/simple-test/pitch_data_midi/hemavati/aruna-hemavati_F3_plus_3
pitches length: 102215
processing ../../data/simple-test/pitch_data_midi/hemavati/aruna-hemavati_F3_minus_1
pitches length: 99673
processing ../../data/simple-test/pitch_data_midi/hemavati/aruna-hemavati_F3_minus_2
pitches length: 99591
processing ../../data/simple-test/pitch_data_midi/hemavati/trichur-hemavati-alapana_C#3_plus_3
pitches length: 43829
processing ../../data/simple-test/pitch_data_midi/hemavati/trichur-hemavati-alapana_C#3
pitches length: 49792
processing ../../data/simple-test/pitch_data_midi/hemavati/aruna-hemavati_F3_plus_2
pitches le

pitches length: 50646
processing ../../data/simple-test/pitch_data_midi/saveri/amritha-murali-parashakti-manuparada_G3_minus_2
pitches length: 49992
Total data: 5891


In [7]:
class PitchDataset(Dataset):
    def __init__(self, data_samples: List[Tuple], pitch_data: Dict[str, PitchDataFile], 
                 block_size: int, device: torch.device):
        """
        Args:
            data_samples: List of tuples containing (file_path, start_index, class_label)
            pitch_data: Dictionary mapping file paths to PitchDataFile objects
            block_size: Size of each pitch sequence block
            device: torch device to store tensors on
        """
        self.data_samples = data_samples
        self.pitch_data = pitch_data
        self.block_size = block_size
        self.device = device

    def __len__(self):
        return len(self.data_samples)

    def __getitem__(self, idx):
        file_path, start_index, class_label = self.data_samples[idx]
        pitch_sequence = self.pitch_data[file_path].pitches[start_index:start_index + self.block_size]
        return (torch.tensor(pitch_sequence, device=self.device, dtype=torch.int),
                torch.tensor(class_label, device=self.device))

def create_pitch_datasets(X: List[Tuple], pitch_data: Dict[str, PitchDataFile], 
                         block_size: int, device: torch.device, 
                         train_size: float = 0.8, val_size: float = 0.1,
                         random_state: int = 42):
    """
    Create train, validation, and test datasets with computed class weights.
    
    Args:
        X: List of (file_path, start_index, class_label) tuples
        pitch_data: Dictionary mapping file paths to PitchDataFile objects
        block_size: Size of each pitch sequence block
        device: torch device to store tensors on
        train_size: Proportion of data to use for training
        val_size: Proportion of data to use for validation
        random_state: Random seed for reproducibility
    
    Returns:
        train_dataset, val_dataset, test_dataset, class_weights
    """
    # First split into train and temp
    train_data, temp_data = train_test_split(X, train_size=train_size, 
                                           random_state=random_state)
    
    # Then split temp into validation and test
    val_ratio = val_size / (1 - train_size)
    val_data, test_data = train_test_split(temp_data, train_size=val_ratio,
                                         random_state=random_state)
    
    # Create datasets
    train_dataset = PitchDataset(train_data, pitch_data, block_size, device)
    val_dataset = PitchDataset(val_data, pitch_data, block_size, device)
    test_dataset = PitchDataset(test_data, pitch_data, block_size, device)
    
    # Compute class weights
    y_train = [sample[2] for sample in train_data]
    class_weights = compute_class_weight(class_weight="balanced",
                                       classes=np.unique(y_train),
                                       y=y_train)
    class_weights = torch.tensor(class_weights, device=device, dtype=torch.float32)
    
    return train_dataset, val_dataset, test_dataset, class_weights

# Usage example:
def create_data_loaders(X, pitch_data, block_size, device, random_state, batch_size=32):
    # Create datasets
    train_dataset, val_dataset, test_dataset, class_weights = create_pitch_datasets(
        X, pitch_data, block_size, device, random_state=random_state
    )
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader, class_weights

In [8]:
train_loader, val_loader, test_loader, class_weights = create_data_loaders(X, pitch_data, BLOCK_SIZE, device, 42)

In [9]:
import torch.nn as nn
import torch.nn.functional as F

class LSTMNet(nn.Module):
    def __init__(self, out_channels, n_embd, n_tokens, hidden_size, num_layers, device='cpu', dropout=0.1):
        super().__init__()
        self.out_channels = out_channels
        self.n_embd = n_embd
        self.n_tokens = n_tokens
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.emb = nn.Embedding(n_tokens, n_embd, device=device)
        self.lstm = nn.LSTM(input_size=n_embd, hidden_size=hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout if num_layers > 1 else 0, device=device)
        self.task = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(hidden_size, 100, device=device),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(100, out_channels, device=device)
        )

    def forward(self, x):
        x = self.emb(x)
        # x shape: (batch_size, sequence_length, embedding_dim)
        lstm_out, _ = self.lstm(x)
        # lstm_out shape: (batch_size, sequence_length, hidden_size)
        # Take the last hidden state of the LSTM
        last_hidden_state = lstm_out[:, -1, :]
        output = self.task(last_hidden_state)
        return output

    def class_params(self):
        return {
            'out_channels': self.out_channels,
            'n_embd': self.n_embd,
            'n_tokens': self.n_tokens,
            'dropout': self.dropout,
            'hidden_size': self.hidden_size,
            'num_layers': self.num_layers
        }


In [10]:
def save_model(path:str, model, optimizer, class_params, epochs, train_loss, val_loss):
    torch.save({
        'epochs': epochs,
        'train_loss': train_loss,
        'val_loss': val_loss,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'class_params': class_params
    }, path)

In [11]:
# Example usage
out_channels = len(CLASS_NAMES)  # Output channels (e.g., regression output)
n_tokens = len(VALID_TOKENS)
n_embd = 96

# For LSTM
hidden_size = 256
num_layers = 1

# Learning rate
lr = 1e-3
epochs_so_far = [0]
model = LSTMNet(out_channels, n_embd, n_tokens, hidden_size, num_layers, device=device)
optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
#optimizer = optim.SGD(model.parameters(), lr=lr)

IMPORT_PATH = '../models/lstm-midi-1-epochs-[200000]'
MODEL_PATH = '../models/lstm-midi-1'
if os.path.exists(IMPORT_PATH):
    print('Model exists: Loading')
    #MODEL_PATH = '../models/cnn-5-no-noise-token-1e3'
    checkpoint = torch.load(IMPORT_PATH)
    epochs_so_far[0] = checkpoint['epochs']
    train_loss = checkpoint['train_loss']
    val_loss = checkpoint['val_loss']
    print(f'checkpoint after epoch: {epochs_so_far[0]}')
    print(f'train loss: {train_loss}')
    print(f'val loss: {val_loss}')
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
else:
    print(f'Creating new model')
    
model.train()
batch_size = 32
lossi = []
tr_losses = []
v_losses = []
total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params}")
print(f'Epochs trained so far: {epochs_so_far[0]}')
ud = []
# Example input - First dimension is number of minibatches, second is embedding dimension, third is context size
#input_tensor = torch.randint(0, n_tokens, (32, 2700), device=device)  # Batch size, channels, sequence length
#logits = model(input_tensor)
#loss = F.cross_entropy(logits, torch.randint(0, out_channels, (32,), device=device))
#print(f'output shape {output.shape}')
#print(f'logits: {logits.shape}')
#print(f'loss: {loss}')
#total_params = sum(param.numel() for param in model.parameters())
#print(f'total params {total_params}')

#for p in model.parameters():
#    p.grad = None
#loss.backward()   

Creating new model
Total number of parameters: 434776
Epochs trained so far: 0


In [12]:
def train(epochs_so_far, max_steps):
    it = iter(train_loader)
    for i in range(0, max_steps):
        start_time = time.time()
        # minibatch construct
        Xb, Yb = next(it)
        #Xb, Yb = next(data_iterator)
        # forward pass
        logits = model(Xb)
        loss = F.cross_entropy(logits, Yb, weight=class_weights) # loss function
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # track stats
        epochs_so_far[0] += 1
        if i % 1000 == 0: # print every once in a while
            print(f'{i:7d}/{max_steps:7d}: {loss.item():.4f}')
        #lossi.append(loss.log10().item())
        #with torch.no_grad():
        #    ud.append([((lr*p.grad).std() / p.data.std()).log10().item() for p in model.parameters()])

In [13]:
def loop(epochs):
    max_steps = 5
    N = 10
    for _ in range(N):
        model.train()
        train(epochs, max_steps)
        model.eval()
        train_loss = split_loss('train')
        val_loss = split_loss('val')
        #train_loss, val_loss = 0,0
        tr_losses.append(train_loss)
        v_losses.append(val_loss)
        print(f'Train loss: {train_loss}, val loss: {val_loss}')
        save_model(MODEL_PATH + '-epochs-' + str(epochs), model, optimizer, model.class_params(), epochs_so_far[0], tr_losses[-1], v_losses[-1])
    plt.figure(figsize=(10, 6))
    plt.plot(torch.tensor(tr_losses), label='Training Loss', color='blue')
    plt.plot(torch.tensor(v_losses), label='Validation Loss', color='red')
    plt.legend()
    plt.title('Training and Validation Losses Over Time')
    plt.xlabel('Steps')
    plt.ylabel('Loss')
    
    # Show the plot
    plt.show()

In [14]:
loop(epochs_so_far)

../aten/src/ATen/native/cuda/Indexing.cu:1308: indexSelectLargeIndex: block: [36,0,0], thread: [32,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1308: indexSelectLargeIndex: block: [36,0,0], thread: [33,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1308: indexSelectLargeIndex: block: [36,0,0], thread: [34,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1308: indexSelectLargeIndex: block: [36,0,0], thread: [35,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1308: indexSelectLargeIndex: block: [36,0,0], thread: [36,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1308: indexSelectLargeIndex: block: [36,0,0], thread: [37,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
../aten/src/ATen/native/cuda/Indexing.cu:1308: indexSelectLargeIndex: block: [36,0,0], t

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
