In [1]:
import os

import torch
import torchaudio.functional
from torch.utils.data import DataLoader
import numpy as np

from torch import nn, Tensor
import pandas as pd
from torch import optim

from src.data import WaveDataset, SpectrogramDataset
from src.visualization import plot_spectrogram, plot_waveform
import matplotlib.pyplot as plt

wdir = os.getcwd().replace("\\", "/") + "/.."
processed_dir = f"{wdir}/data/processed/musicnet"
output_dir = f"{wdir}/data/output/musicnet"
print(f"Working Directory: {wdir}")
print(f"Processed Directory: {processed_dir}")

Working Directory: D:/lyx20/Documents/LiuYuxuan/universite/Year4/csc413/a2a-music-gen/notebooks/..
Processed Directory: D:/lyx20/Documents/LiuYuxuan/universite/Year4/csc413/a2a-music-gen/notebooks/../data/processed/musicnet


In [2]:
from data import DataLoaders

# Load the dataset
dataset = SpectrogramDataset(f'{processed_dir}/train_data')
# split dataset
train_size = int(0.6 * len(dataset))
val_size = (len(dataset) - train_size) // 2
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
    dataset, [train_size, val_size, test_size]
)

dataloaders = DataLoaders(
    train=DataLoader(train_dataset, batch_size=50, shuffle=True),
    val=DataLoader(val_dataset, batch_size=10, shuffle=True),
    test=DataLoader(test_dataset, batch_size=10, shuffle=True),
)
nfreqs, nframes = dataset[0][0].shape
nfreqs, nframes

(200, 882)

In [3]:
# Setup the model
from src.models import BaseTransformer, ModularizedTransformer
from src.models import TransformerConfig
from src.models.trainer import Trainer

model = BaseTransformer(nfreqs)
model

BaseTransformer(
  (embedding): Linear(in_features=200, out_features=200, bias=True)
  (positional_encoding): PositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-5): 6 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=200, out_features=200, bias=True)
          )
          (linear1): Linear(in_features=200, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=200, bias=True)
          (norm1): LayerNorm((200,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((200,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
      (norm): LayerNorm((200,), eps=1e-05, elementwise_aff

In [4]:
device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
# sample_batch = [dataset[i][0] for i in range(1)]
src = dataset[0][0][:, 0:10] #torch.stack(sample_batch)
tgt = dataset[0][0][:, 1:11] #torch.stack(sample_batch)
print(src.shape)
src_mask = nn.Transformer.generate_square_subsequent_mask(len(src))
tgt_mask = nn.Transformer.generate_square_subsequent_mask(len(tgt))
src = model.embedding(src)
src = model.positional_encoding(src)
tgt = model.embedding(tgt)
tgt = model.positional_encoding(tgt)

out = model.transformer(src, tgt)
out = model.output(out)
# model(sample_batch.T)
out.shape

torch.Size([200, 200])


KeyboardInterrupt: 

In [None]:
def get_accuracy(output: torch.Tensor, target: torch.Tensor) -> Tensor:
    # Calculate accuracy by finding cos similiraty
    cos = nn.CosineSimilarity(dim=1, eps=1e-6)

    return torch.mean(torch.abs(cos(output, target)))

def train(
        model: nn.Module,
        weight_decay: float,
        learning_rate: float,
        num_epochs: int,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    ) -> pd.DataFrame:

        model = model.to(device)  # move model to GPU if applicable
        criterion = nn.MSELoss()
        optimizer = optim.Adam(
            model.parameters(), lr=learning_rate, weight_decay=weight_decay
        )
        history = []

        for e in range(num_epochs):
            train_loss = 0.0
            val_loss = 0.0

            train_acc = 0
            val_acc = 0

            model.train()

            for data, target, names in dataloaders.train:

                if torch.cuda.is_available():
                    data, target = data.cuda(), target.cuda()

                # Clear gradients
                optimizer.zero_grad()
                # Predicted outputs
                output = model(data)

                # Loss and backpropagation of gradients
                loss = criterion(output, target)
                loss.backward()

                # Update the parameters
                optimizer.step()

                # Track train loss by multiplying average loss by number of examples in batch
                train_loss += loss.item() * data.size(0)
                # check target have same shape as output
                target = target.data.view_as(output)
                accuracy = get_accuracy(output, target)
                # Multiply average accuracy times the number of examples in batch
                train_acc += accuracy.item() * data.size(0)

            # Don't need to keep track of gradients
            with torch.no_grad():
                # Set to evaluation mode
                model.eval()

                # Validation loop
                for data, target, names in dataloaders.val:
                    # Tensors to gpu
                    data, target = data.cuda(), target.cuda()

                    # Forward pass
                    output = model(data)

                    # Validation loss
                    loss = criterion(output, target)
                    # Multiply average loss times the number of examples in batch
                    val_loss += loss.item() * data.size(0)

                    # check target have same shape as output
                    target = target.data.view_as(output)
                    accuracy = get_accuracy(output, target)
                    # Multiply average accuracy times the number of examples
                    val_acc += accuracy.item() * data.size(0)

                # Calculate average losses
                train_loss = train_loss / len(dataloaders.train.dataset)
                val_loss = val_loss / len(dataloaders.val.dataset)

                # Calculate average accuracy
                train_acc = train_acc / len(dataloaders.train.dataset)
                val_acc = val_acc / len(dataloaders.val.dataset)

                print(
                    f"\nEpoch: {e} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {val_loss:.4f}"
                )
                print(
                    f"\t \tTraining Accuracy: {100 * train_acc:.2f}% \tValidation Accuracy: {100 * val_acc:.2f}%"
                )
                history.append([train_loss, val_loss, train_acc, val_acc])

        return pd.DataFrame(
            history, columns=["train_loss", "val_loss", "train_acc", "val_acc"]
        )