In [1]:
import os
from data.process_data import MidiEncoder, MIDIEncoderREMI
import pickle as pkl
from torch.utils.data import DataLoader
from data.dataset import TransformerDatasetREMI
import numpy as np
import glob
import torch

In [2]:
path_to_midi = "data/emopia/EMOPIA_2.2/midis/"

In [3]:
# Instantiate your MidiEncoder and MidiEncoderREMI
midi_files_list = [os.path.join(path_to_midi, file) for file in os.listdir(path_to_midi) if file.endswith(".mid")]
midi_encoder = MidiEncoder(steps_per_sec=100, num_vel_bins=32, min_pitch=21, max_pitch=108)
midi_encoder_remi = MIDIEncoderREMI(dict_path="data/encoder_dict.pkl", midi_files_list=midi_files_list)

In [4]:
encoded_sequences_path = "data/encoded_sequences.pkl"
encoded_sequences = midi_encoder.encode_midi_list(midi_files_list, pkl_path=encoded_sequences_path)

data/emopia/EMOPIA_2.2/midis/Q1_9v2WSpn4FCw_10.mid
0.02978801727294922
data/emopia/EMOPIA_2.2/midis/Q2_dtS02mrDMsM_1.mid
0.042944908142089844
data/emopia/EMOPIA_2.2/midis/Q3_3ZnxqCZ7qGg_0.mid
0.009124279022216797
data/emopia/EMOPIA_2.2/midis/Q4_vpTguZtJAFA_2.mid
0.019979238510131836
data/emopia/EMOPIA_2.2/midis/Q3_Ie5koh4qvJc_5.mid
0.013935089111328125
data/emopia/EMOPIA_2.2/midis/Q4_JP3QKZlyQz4_0.mid
0.010724067687988281
data/emopia/EMOPIA_2.2/midis/Q1_Y5JcZQ0xg4Y_3.mid
0.035540103912353516
data/emopia/EMOPIA_2.2/midis/Q1_1Qc15G0ZHIg_3.mid
0.023600101470947266
data/emopia/EMOPIA_2.2/midis/Q1_ZgT7yq2jsBk_0.mid


0.027865886688232422
data/emopia/EMOPIA_2.2/midis/Q2_1kny88W533Q_4.mid
0.02965092658996582
data/emopia/EMOPIA_2.2/midis/Q3_xIsvaT20pZ0_1.mid
0.021212100982666016
data/emopia/EMOPIA_2.2/midis/Q2_k-FNDbK6Qhg_2.mid
0.15807867050170898
data/emopia/EMOPIA_2.2/midis/Q4_YAAxPW1GB7w_1.mid
0.013483524322509766
data/emopia/EMOPIA_2.2/midis/Q2_Q5b5unyP8BM_0.mid
0.035289764404296875
data/emopia/EMOPIA_2.2/midis/Q4_JxSU49jFKwM_1.mid
0.012176990509033203
data/emopia/EMOPIA_2.2/midis/Q4_PK8YIUaV3Xw_1.mid
0.013318538665771484
data/emopia/EMOPIA_2.2/midis/Q1_Jn9r0avp0fY_3.mid
0.033811330795288086
data/emopia/EMOPIA_2.2/midis/Q3_TonQX8XbvX8_2.mid
0.015692949295043945
data/emopia/EMOPIA_2.2/midis/Q4_iHPKusssXzk_2.mid
0.020656347274780273
data/emopia/EMOPIA_2.2/midis/Q1_POaIGvLsp5M_1.mid
0.017627954483032227
data/emopia/EMOPIA_2.2/midis/Q1_eVMSeElk81Q_2.mid
0.014144182205200195
data/emopia/EMOPIA_2.2/midis/Q1_1vjy9oMFa8c_2.mid
0.015867233276367188
data/emopia/EMOPIA_2.2/midis/Q3_1Q3MoBFh6eU_2.mid
0.020051

In [5]:
# Now create the dataset and save it in a NumPy file
dataset_path = "data/datasets/"
midi_encoder_remi.save_dataset(midi_files_list, dataset_path)

In [6]:
# Save the dataset as a single file
single_file_dataset_path = "data/single_file_dataset.npz"
midi_encoder_remi.save_dataset_as_single_file(glob.glob(os.path.join(dataset_path, '*.npy')), single_file_dataset_path)

  val = np.asanyarray(val)


In [7]:
max_seq_len = 256
dataset = TransformerDatasetREMI(single_file_dataset_path, seq_len=max_seq_len)

  self.sequences = torch.Tensor(self.sequences)


In [8]:
train_size = int(0.7 * len(dataset))
valid_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - valid_size
train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size, test_size])

In [9]:
print("Train dataset size: ", len(train_dataset))
print("Validation dataset size: ", len(valid_dataset))
print("Test dataset size: ", len(test_dataset))

Train dataset size:  3507
Validation dataset size:  751
Test dataset size:  753


In [10]:
from torch.utils.data import DataLoader
batch_size = 64

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

In [11]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

print("Train dataset size: ", len(train_dataloader))
print("Validation dataset size: ", len(valid_dataloader))
print("Test dataset size: ", len(test_dataloader))

Train dataset size:  54
Validation dataset size:  11
Test dataset size:  11


In [12]:
vocab_set = set()
for dic in train_dataset:
    ipt = dic['input'].numpy()
    vocab_set = vocab_set.union(set(ipt))
vocab_size = len(vocab_set)

In [13]:
# from utils.trainer import TransformerTrainer
# from model.transformer import Generator, Discriminator, PatchDiscriminator
# from utils.losses import MultiCrossEntropyLoss, TransfoCrossEntropyLoss,TransfoL1Loss, wgan_loss

In [14]:
# generator = Generator(vocab_size, max_seq_len, dim=256)
# discriminator = Discriminator(vocab_size, max_seq_len, dim=256)
# patch_discriminator = PatchDiscriminator(vocab_size, max_seq_len, dim=256)

# ce_loss = TransfoCrossEntropyLoss()
# gan_loss = wgan_loss

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# generator.to(device)
# discriminator.to(device)
# patch_discriminator.to(device)

# g_lr = 1e-4
# d_lr = 1e-4

# EPOCHS = 10
# checkpoint_dir = "checkpoints/model.pt"

In [15]:
# trainer = TransformerTrainer(generator, patch_discriminator, train_dataloader, train_dataloader, test_dataloader, ce_loss,
#                gan_loss, device, g_lr, d_lr, vocab_size)

In [16]:
# hist = trainer.train( EPOCHS, checkpoint_dir, validate = False, log_interval=20, load=False, save=True, train_gan=False)

In [17]:
def pad_features(reviews_ints, seq_length):
    ''' Return features of review_ints, where each review is padded with 0's
        or truncated to the input seq_length.
    '''

    # getting the correct rows x cols shape
    features = np.zeros((len(reviews_ints), seq_length), dtype=int)

    # for each review, I grab that review and put it into features
    for i, row in enumerate(reviews_ints):
        features[i, :len(row)] = np.array(row)[:seq_length]

    return features

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import math
import copy

# Classifier

In [26]:
class ZeroEmbedding(nn.Embedding):
    """
    Used for biases.
    """

    def reset_parameters(self):
        """
        Initialize parameters.
        """

        self.weight.data.zero_()
        if self.padding_idx is not None:
            self.weight.data[self.padding_idx].fill_(0.0)

In [27]:
class PositionalEmbedding(nn.Module):
    """
    PositionalEmbedding is a class for adding positional embeddings to token embeddings.

    Args:
        sequence_length (int): The maximum sequence length.
        input_dim (int): The dimension of input token embeddings (vocabulary size)
        output_dim (int): The dimension of output positional embeddings.

    Attributes:
        token_embeddings (nn.Embedding): The token embedding layer.
        position_embeddings (nn.Embedding): The positional embedding layer.
        sequence_length (int): The maximum sequence length.
    """

    def __init__(self, sequence_length, input_dim, output_dim):
        super(PositionalEmbedding, self).__init__()
        #TODO: add embedding layers to map words to vectors
        self.token_embeddings = nn.Embedding(input_dim, output_dim)
        #TODO: add embedding layers to map position index to vectors
        self.position_embeddings = ZeroEmbedding(sequence_length, output_dim)
        self.sequence_length = sequence_length

    def forward(self, inputs):
        """
        Forward pass of the PositionalEmbedding.

        Args:
            inputs (torch.Tensor): The input tensor of token indices.

        Returns:
            torch.Tensor: The output tensor with positional embeddings added to token embeddings.
        """
        length = inputs.size(-1)
        device = inputs.device
        #TODO: compute the position index 0, 1, ..., seq_len -1
        positions = torch.arange(0, self.sequence_length).unsqueeze(0).to(device) # here we add the batch_size as first dim
        #TODO: compute the word embeddings
        embedded_tokens = self.token_embeddings(inputs)
        #TODO: compute the positional embeddings
        embedded_positions = self.position_embeddings(positions)
        #TODO: return the final embeddings
        return embedded_tokens + embedded_positions   # here the first dim of embedded_positions will be broadcasted to match the batch size

In [28]:
class TransformerEncoder(nn.Module):

    def __init__(self, embed_dim, dense_dim, num_heads):
        super(TransformerEncoder, self).__init__()
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads

        #TODO: Define multihead attention layer
        #Input and output both have size (batch_size, seq_len, embed_dim)
        self.attention = nn.MultiheadAttention(embed_dim, num_heads,batch_first=True)


        #TODO: define a two-layer Feed-forward network with hidden layer size dense_dim and output layer size embed_dim
        self.dense_proj = nn.Sequential(
            nn.Linear(embed_dim, dense_dim),
            nn.ReLU(),
            nn.Linear(dense_dim, embed_dim)
        )
        #TODO: define two layer normalization layers
        self.layernorm_1 = nn.LayerNorm(embed_dim)
        self.layernorm_2 = nn.LayerNorm(embed_dim)

    def forward(self, inputs):
        """
        Forward pass of the TransformerEncoder block.

        Args:
            inputs (torch.Tensor): The input tensor of shape (batch_size, seq_len, embed_dim).

        Returns:
            torch.Tensor: The output tensor of shape ( batch_size,seq_len, embed_dim).
        """
        #TODO: pass inputs through the multihead attention layer
        attention_output, _ = self.attention(query=inputs, key=inputs, value=inputs)
        #TODO: pass the attention output through the add+normalization layer
        proj_input = self.layernorm_1(inputs + attention_output)
        #TODO: pass through the feed-forward network
        proj_output = self.dense_proj(proj_input)
        #TODO: pass through another add+normalization layer and output
        return self.layernorm_2(proj_input + proj_output)

In [29]:
class TransformerEncoderModel(nn.Module):
    """
    TransformerEncoderModel is a class representing a text classification model using Transformer encoder with positional embeddings.

    Args:
        vocab_size (int): The size of the vocabulary.
        embed_dim (int): The dimension of the input embeddings and output embeddings.
        num_heads (int): The number of attention heads in the Transformer encoder.
        dense_dim (int): The dimension of the intermediate dense layer.
        sequence_length (int): The maximum sequence length for positional embeddings.

    Attributes:
        embedding (PositionalEmbedding): The positional embedding layer.
        transformer_encoder (TransformerEncoder): The Transformer encoder block.
        global_max_pooling (nn.AdaptiveMaxPool1d): The global max-pooling layer.
        dropout (nn.Dropout): The dropout layer.
        fc (nn.Linear): The fully connected layer for classification.
        sigmoid (nn.Sigmoid): The sigmoid activation function for binary classification.
    """

    def __init__(self, vocab_size, embed_dim, num_heads, dense_dim, sequence_length, num_classes):
        super(TransformerEncoderModel, self).__init__()

        #TODO: add embedding layer that maps word to vectors
        self.embedding = PositionalEmbedding(sequence_length, vocab_size, embed_dim)

        #TODO: add transformer encoder
        self.transformer_encoder = TransformerEncoder(embed_dim, dense_dim, num_heads)

        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(embed_dim, num_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, inputs):
        """
        Forward pass of the TransformerEncoderModel for text classification.

        Args:
            inputs (torch.Tensor): The input tensor of shape (batch_size, seq_len).
            mask (torch.Tensor, optional): The attention mask tensor of shape (batch_size, seq_len).
                Defaults to None.

        Returns:
            torch.Tensor: The output tensor with sigmoid activation for binary classification.
        """
        x = self.embedding(inputs)
        x = self.transformer_encoder(x)        # x has shape (Batch, Seq_Len, Embed_dim)
        x,_ = torch.max(x, dim=1)              # x has shape (Batch, Embd_dim)
        x = self.dropout(x)                    # pass dropout layer
        x = self.fc(x)                         # pass a linear layer
        return self.sigmoid(x)                 # pass sigmoid activation


In [30]:
# Model
num_heads = 8
dense_dim = 1024
embed_dim = 512
model = TransformerEncoderModel(vocab_size, embed_dim, num_heads, dense_dim, sequence_length = max_seq_len, num_classes=4)
model.to(device)
print(model)

TransformerEncoderModel(
  (embedding): PositionalEmbedding(
    (token_embeddings): Embedding(198, 512)
    (position_embeddings): ZeroEmbedding(300, 512)
  )
  (transformer_encoder): TransformerEncoder(
    (attention): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
    )
    (dense_proj): Sequential(
      (0): Linear(in_features=512, out_features=1024, bias=True)
      (1): ReLU()
      (2): Linear(in_features=1024, out_features=512, bias=True)
    )
    (layernorm_1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (layernorm_2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
  )
  (dropout): Dropout(p=0.2, inplace=False)
  (fc): Linear(in_features=512, out_features=4, bias=True)
  (sigmoid): Sigmoid()
)


In [19]:
from data.dataset import ClassifierDataset

In [20]:
dataset = ClassifierDataset(single_file_dataset_path, seq_len=max_seq_len, labels_path="data/emopia/EMOPIA_2.2/label.csv")
train_size = int(0.7 * len(dataset))
valid_size = int(0.15 * len(dataset))
test_size = len(dataset) - train_size - valid_size
train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, valid_size, test_size])

In [21]:
print("Train dataset size: ", len(train_dataset))
print("Validation dataset size: ", len(valid_dataset))
print("Test dataset size: ", len(test_dataset))

batch_size = 64

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
valid_dataloader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

vocab_set = set()
for dic in train_dataset:
    ipt = dic['input'].numpy()
    vocab_set = vocab_set.union(set(ipt))
vocab_size = len(vocab_set)

Train dataset size:  3520
Validation dataset size:  754
Test dataset size:  755


In [22]:
# criterion = nn.CrossEntropyLoss()

In [23]:
def train(model, train_loader, valid_loader, num_epochs=100):
    optimizer = torch.optim.Adam(model.parameters())
    train_losses = []
    valid_losses = []
    best_loss = 50
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        test_loss = 0
        ### Traing Loop ####
        #0. Loop through all the training batches
        for batch in train_loader:
            inputs = batch['input'].to(device)
            labels = batch['target'].to(device)
            #1. Compute the output
            outputs = model(inputs).squeeze(1)
            #2. Compute the loss
            loss = criterion(outputs, labels)
            #3. Zero out gradients
            optimizer.zero_grad()
            #4. Compute new gradients, by backward propagation
            loss.backward()
            #5. Update the model parameters
            optimizer.step()

            train_loss += loss.item()
        train_losses.append(train_loss / len(train_loader))
        # Print training loss for this epoch
        # print(f"Epoch {epoch + 1}/{num_epochs}, Training Loss: {train_loss / len(train_loader)}")

        ### Validation Loop ###
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for batch in valid_loader:
                inputs = batch['input'].to(device)
                labels = batch['target'].to(device)
                outputs = model(inputs).squeeze(1)
                loss = criterion(outputs, labels)
                predicted = torch.argmax(outputs, dim=1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                test_loss += loss.item()
            valid_losses.append(test_loss / len(valid_loader))

        val_acc = 100 * correct / total
        if test_loss < best_loss:
            best_loss = test_loss
            print("Saving...")
            torch.save(model.state_dict(), "checkpoints/transformer_classifier.pt")
            print("Model saved!")
        print(f"Epoch {epoch + 1}/{num_epochs},Training Loss: {(train_loss / len(train_loader)):.4f}, Validation Loss: {(test_loss / len(valid_loader)):.4f}, Validation Accuracy: {val_acc:.2f}")

    
    return train_losses, valid_losses

In [87]:
train_losses, valid_losses = train(model, train_dataloader, valid_dataloader, num_epochs=100)

Saving...
Model saved!
Epoch 1/100,Training Loss: 1.1627, Validation Loss: 1.1095, Validation Accuracy: 52.97
Saving...
Model saved!
Epoch 2/100,Training Loss: 1.0991, Validation Loss: 1.1025, Validation Accuracy: 52.03
Saving...
Model saved!
Epoch 3/100,Training Loss: 1.0819, Validation Loss: 1.0880, Validation Accuracy: 60.47
Saving...
Model saved!
Epoch 4/100,Training Loss: 1.0689, Validation Loss: 1.0808, Validation Accuracy: 62.50
Saving...
Model saved!
Epoch 5/100,Training Loss: 1.0543, Validation Loss: 1.0781, Validation Accuracy: 63.44
Saving...
Model saved!
Epoch 6/100,Training Loss: 1.0359, Validation Loss: 1.0702, Validation Accuracy: 61.56
Saving...
Model saved!
Epoch 7/100,Training Loss: 1.0261, Validation Loss: 1.0609, Validation Accuracy: 62.66
Epoch 8/100,Training Loss: 1.0177, Validation Loss: 1.0666, Validation Accuracy: 63.59
Saving...
Model saved!
Epoch 9/100,Training Loss: 1.0155, Validation Loss: 1.0598, Validation Accuracy: 62.34
Epoch 10/100,Training Loss: 1.005

In [24]:
from model.classifier import PatchClassifier
from utils.classifier_trainer import ClassifierTrainer

In [25]:
# Assuming you have your model, dataloaders, loss function, device, learning rate, vocabulary size, etc. defined

# Instantiate your ClassifierTrainer
classifier_model = PatchClassifier(vocab_size, max_seq_len, 256)  # Replace with your actual classifier model
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Replace with your actual loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available, else use CPU

classifier_model.to(device)

vocab_size = vocab_size  
learning_rate = 0.001  # Replace with your actual learning rate

# Instantiate the ClassifierTrainer
classifier_trainer = ClassifierTrainer(
    model=classifier_model,
    dataloader=train_dataloader,
    valid_dataloader=valid_dataloader,
    ce_loss=cross_entropy_loss,
    device=device,
    lr=learning_rate,
    vocab_size=vocab_size,
    warmup_steps=1000,  # Replace with your desired warmup steps
    total_iters=30000,  # Replace with your desired total iterations
    schedule='cosine_with_warmup'  # Replace with your desired learning rate schedule
)

In [26]:
classifier_trainer.train(EPOCHS=20, checkpoint_dir='checkpoints/original_classifier.pt')  # Start training

Epoch 1/20
----------
| End of epoch   1  | time: 3.7005s | valid loss 1.320108 | valid accuracy 0.367045 |
Epoch 2/20
----------
| End of epoch   2  | time: 2.4831s | valid loss 0.946805 | valid accuracy 0.526705 |
Epoch 3/20
----------
| End of epoch   3  | time: 2.4765s | valid loss 0.873441 | valid accuracy 0.567330 |
Epoch 4/20
----------
| End of epoch   4  | time: 2.5036s | valid loss 0.791025 | valid accuracy 0.640909 |
Epoch 5/20
----------
| End of epoch   5  | time: 2.4774s | valid loss 0.709347 | valid accuracy 0.690909 |
Epoch 6/20
----------
| End of epoch   6  | time: 2.4580s | valid loss 0.705034 | valid accuracy 0.701136 |
Epoch 7/20
----------
| End of epoch   7  | time: 2.4757s | valid loss 0.642763 | valid accuracy 0.732670 |
Epoch 8/20
----------
| End of epoch   8  | time: 2.4528s | valid loss 0.673216 | valid accuracy 0.700568 |
Epoch 9/20
----------
| End of epoch   9  | time: 2.4249s | valid loss 0.635992 | valid accuracy 0.727273 |
Epoch 10/20
----------
| End

defaultdict(list,
            {'train_acc': [tensor(0.3670, device='cuda:0'),
              tensor(0.5267, device='cuda:0'),
              tensor(0.5673, device='cuda:0'),
              tensor(0.6409, device='cuda:0'),
              tensor(0.6909, device='cuda:0'),
              tensor(0.7011, device='cuda:0'),
              tensor(0.7327, device='cuda:0'),
              tensor(0.7006, device='cuda:0'),
              tensor(0.7273, device='cuda:0'),
              tensor(0.7659, device='cuda:0'),
              tensor(0.7557, device='cuda:0'),
              tensor(0.7514, device='cuda:0'),
              tensor(0.8210, device='cuda:0'),
              tensor(0.8361, device='cuda:0'),
              tensor(0.8401, device='cuda:0'),
              tensor(0.8372, device='cuda:0'),
              tensor(0.8060, device='cuda:0'),
              tensor(0.8540, device='cuda:0'),
              tensor(0.8256, device='cuda:0'),
              tensor(0.8372, device='cuda:0')],
             'train_loss': [

# GAN

In [33]:
# build a generator
class Generator(nn.Module):
    def __init__(self, vocab_size, max_seq_length, dim, num_heads, num_layers, d_ff, dropout):
        super(Generator, self).__init__()
        self.transformer = Transformer(vocab_size, vocab_size, dim, num_heads, num_layers, d_ff, max_seq_length, dropout)
        self.fc = nn.Linear(dim, vocab_size)
        
    def forward(self, src, tgt):
        output = self.transformer(src, tgt)
        output = self.fc(output)
        return output

In [39]:
# build a discriminator
class Discriminator(nn.Module):
    def __init__(self, vocab_size, embed_dim, num_heads, dense_dim, num_classes,sequence_length):
        super(Discriminator, self).__init__()
        self.transformer = TransformerEncoderModel(vocab_size, embed_dim, num_heads, dense_dim, num_classes, sequence_length)
        self.fc = nn.Linear(vocab_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, src, tgt):
        output = self.transformer(src, tgt)
        output = self.sigmoid(self.fc(output))
        return output

In [45]:
# Create the generator

netG = Generator(vocab_size, max_seq_len, dim = 512, num_heads = 8, num_layers = 6, d_ff = 256, dropout = 0.2).to(device)
# Create the Discriminator
num_heads = 8
dense_dim = 1024
embed_dim = 512
netD = Discriminator(vocab_size, embed_dim, num_heads, dense_dim, num_classes=1, sequence_length=max_seq_len).to(device)

# Initialize the ``BCELoss`` function
criterion = nn.BCELoss()

# Create batch of latent vectors that we will use to visualize
#  the progression of the generator
nz = 100
fixed_noise = torch.randn(64, nz, 1, 1, device=device)

# Establish convention for real and fake labels during training
real_label = 1.
fake_label = 0.

lr = 0.0002
beta1 = 0.5

# Setup Adam optimizers for both G and D
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

In [41]:
num_epochs = 10

In [44]:
target

tensor([[ 53,  60,  83,  ...,  46,  53,  60],
        [ 50,  62,  10,  ...,  69,  26,  42],
        [128,  58,  16,  ...,   0,   0,   0],
        ...,
        [ 49,  65,  74,  ...,   0,   0,   0],
        [ 19,  59,  53,  ..., 162, 102,  69],
        [ 62, 110,  74,  ...,  52,  43,  73]], device='cuda:0')

In [46]:
# Training Loop

# Lists to keep track of progress
txt_list = []
G_losses = []
D_losses = []
iters = 0


print("Starting Training Loop...")
# For each epoch
for epoch in range(num_epochs):
    # For each batch in the dataloader
    for i, data in enumerate(train_dataloader):
        input = data['input']
        target = data['target'].to(device)
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z)))
        ###########################
        ## Train with all-real batch
        netD.zero_grad()
        # Format batch
        real_cpu = input.to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
        # Forward pass real batch through D
        output = netD(real_cpu, target).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label)
        # Calculate gradients for D in backward pass
        errD_real.backward()
        D_x = output.mean().item()

        ## Train with all-fake batch
        # Generate batch of latent vectors
        noise = torch.randn(b_size, nz, 1, 1, device=device)
        # Generate fake image batch with G
        fake = netG(noise)
        label.fill_(fake_label)
        # Classify all fake batch with D
        output = netD(fake.detach()).view(-1)
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch, accumulated (summed) with previous gradients
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        # Compute error of D as sum over the fake and the real batches
        errD = errD_real + errD_fake
        # Update D
        optimizerD.step()

        ############################
        # (2) Update G network: maximize log(D(G(z)))
        ###########################
        netG.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = netD(fake).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        D_G_z2 = output.mean().item()
        # Update G
        optimizerG.step()

        # Output training stats
        if i % 50 == 0:
            print('[%d/%d][%d/%d]\tLoss_D: %.4f\tLoss_G: %.4f\tD(x): %.4f\tD(G(z)): %.4f / %.4f'
                  % (epoch, num_epochs, i, len(train_dataloader),
                     errD.item(), errG.item(), D_x, D_G_z1, D_G_z2))

        # Save Losses for plotting later
        G_losses.append(errG.item())
        D_losses.append(errD.item())

        # Check how the generator is doing by saving G's output on fixed_noise
        if (iters % 500 == 0) or ((epoch == num_epochs-1) and (i == len(train_dataloader)-1)):
            with torch.no_grad():
                fake = netG(fixed_noise).detach().cpu()
            txt_list.append(fake)

        iters += 1

Starting Training Loop...


TypeError: forward() takes 2 positional arguments but 3 were given

In [None]:
# Assuming you have your model, dataloaders, loss function, device, learning rate, vocabulary size, etc. defined

# Instantiate your ClassifierTrainer
classifier_model = PatchClassifier()  # Replace with your actual classifier model
train_dataloader = train_dataset  # Replace with your actual training dataloader
valid_dataloader = test_dataset  # Replace with your actual validation dataloader
cross_entropy_loss = torch.nn.CrossEntropyLoss()  # Replace with your actual loss function
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # Use GPU if available, else use CPU

vocab_size = 10000  # Replace with your actual vocabulary size
learning_rate = 0.001  # Replace with your actual learning rate

# Instantiate the ClassifierTrainer
classifier_trainer = ClassifierTrainer(
    model=classifier_model,
    dataloader=train_dataloader,
    valid_dataloader=valid_dataloader,
    ce_loss=cross_entropy_loss,
    device=device,
    lr=learning_rate,
    vocab_size=vocab_size,
    warmup_steps=1000,  # Replace with your desired warmup steps
    total_iters=30000,  # Replace with your desired total iterations
    schedule='cosine_with_warmup'  # Replace with your desired learning rate schedule
)

In [None]:
# Instantiate your MidiEncoder
midi_encoder = MidiEncoder(steps_per_sec=100, num_vel_bins=32, min_pitch=21, max_pitch=108)

# Define a list of MIDI files to encode

midi_files_list = [os.path.join(path_to_midi, file) for file in os.listdir(path_to_midi) if file.endswith(".midi")]

# Encode the MIDI files and save the encoded sequences to a pickle file
encoded_sequences_path = "/workspaces/Transformers_sentiment/data/pickle/encoded_sequences.pkl"
encoded_sequences = midi_encoder.encode_midi_list(midi_files_list, pkl_path=encoded_sequences_path)

# Load the encoded sequences from the pickle file (optional)
with open(encoded_sequences_path, 'rb') as handle:
    loaded_encoded_sequences = pkl.load(handle)

# Instantiate your MIDIEncoderREMI
midi_encoder_remi = MIDIEncoderREMI(dict_path="midi_transcribed/src_001", midi_files_list=midi_files_list)

# Convert MIDI files to REMI words and save the dataset
dataset_dir = "data/train_data"
midi_encoder_remi.save_dataset(midi_files_list, dataset_dir)

# Save the dataset as a single file
single_file_dataset_path = "data/npz_midi/single_file_dataset.npz"
midi_encoder_remi.save_dataset_as_single_file(glob.glob(os.path.join(dataset_dir, '*.npy')), single_file_dataset_path)

In [None]:
# Load the dataset from the single file (optional)
loaded_dataset = np.load(single_file_dataset_path)
loaded_sequences = loaded_dataset['sequences']
loaded_ids = loaded_dataset['ids']

# Convert REMI words back to MIDI and save
output_midi_path = "path/to/save/output_midi.mid"
midi_encoder_remi.words_to_midi(loaded_sequences[0], output_midi_path)

# Calculate scores for a MIDI file using MidiEncoder
midi_file_to_score = "path/to/midi/file1.mid"
scores = midi_encoder.calculate_scores(midi_file_to_score)
print(scores)