In [1]:
from src.models.trainer import Trainer
from src.models.cnn import CNN
from src.data import WaveDataset, SpectrogramDataset
from src.features import WaveProcessor, WaveProcessorConfig

import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd

import os

In [2]:
wdir = os.getcwd().replace("\\", "/")
processed_dir = f"{wdir}/data/processed"
sr = 44100 // 8
wav_dataset = WaveDataset(f"{wdir}/data/raw/train_data", sr=sr, max_sec=30)
wpconfig = WaveProcessorConfig(sr=sr)
wp = WaveProcessor(wpconfig)

wav_loader = DataLoader(wav_dataset, batch_size=1, shuffle=False)

In [3]:
# for b, (xs, ys, fnames) in enumerate(wav_loader):
#     for x, y, fname in zip(xs, ys, fnames):
#         input_spec = wp.wav2freq(x)
#         label_spec = wp.wav2freq(y)

#         SpectrogramDataset.save(input_spec, f"{processed_dir}/train_data", fname)
#         SpectrogramDataset.save(
#             label_spec, f"{processed_dir}/train_labels", fname, is_label=True
#         )
#         SpectrogramDataset.save_metadata(
#             wpconfig.to_dict(), f"{processed_dir}/train_meta", fname
#         )

In [15]:
class CNNTrainer(Trainer):
    def create_model(self, token_size, seq_size, n_layer, out_size) -> None:
        self.model = CNN(token_size, seq_size, n_layer, out_size)
    
    def train(
        self,
        weight_decay: float,
        learning_rate: float,
        num_epochs: int,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
    ) -> pd.DataFrame:

        self.model = self.model.to(device)  # move model to GPU if applicable
        criterion = nn.MSELoss()
        optimizer = optim.Adam(
            self.model.parameters(), lr=learning_rate, weight_decay=weight_decay
        )
        history = []

        for e in range(num_epochs):
            train_loss = 0.0
            val_loss = 0.0

            train_acc = 0
            val_acc = 0

            self.model.train()

            for data, target, fname in self.dataloaders["train"]:

                if torch.cuda.is_available():
                    data, target = data.cuda(), target.cuda()

                # Clear gradients
                optimizer.zero_grad()

                output = torch.empty((target.shape[0], target.shape[1], target.shape[2]), device=device)
                batch_loss = 0.0
                for i in range(target.shape[-1]):
                    # Predicted outputs
                    cur_output = self.model(data)

                    # Loss and backpropagation of gradients
                    loss = criterion(cur_output, target[:, :, i]) / target.shape[1]
                    loss.backward()
                    batch_loss += loss.item()

                    output[:, :, i] = cur_output
                    data = torch.cat((data[:, :, 1:], torch.unsqueeze(target[:, :, i], dim= -1)), dim=-1)

                # Update the parameters
                optimizer.step()

                # Track train loss by multiplying average loss by number of examples in batch
                train_loss += batch_loss * data.size(0)
                # check target have same shape as output
                target = target.data.view_as(output)
                accuracy = self.get_accuracy(output, target)
                # Multiply average accuracy times the number of examples in batch
                train_acc += accuracy.item() * data.size(0)

            # Don't need to keep track of gradients
            with torch.no_grad():
                # Set to evaluation mode
                self.model.eval()

                # Validation loop
                for data, target, fname in self.dataloaders["val"]:
                    # Tensors to gpu
                    if torch.cuda.is_available():
                        data, target = data.cuda(), target.cuda()

                    # Forward pass
                    output = self.model.generate(data)

                    # Validation loss
                    loss = criterion(output, target)
                    # Multiply average loss times the number of examples in batch
                    val_loss += loss.item() * data.size(0)

                    # check target have same shape as output
                    target = target.data.view_as(output)
                    accuracy = self.get_accuracy(output, target)
                    # Multiply average accuracy times the number of examples
                    val_acc += accuracy.item() * data.size(0)

                # Calculate average losses
                train_loss = train_loss / len(self.dataloaders["train"].dataset)
                val_loss = val_loss / len(self.dataloaders["val"].dataset)

                # Calculate average accuracy
                train_acc = train_acc / len(self.dataloaders["train"].dataset)
                val_acc = val_acc / len(self.dataloaders["val"].dataset)

                print(
                    f"\nEpoch: {e} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {val_loss:.4f}"
                )
                print(
                    f"\t \tTraining Accuracy: {100 * train_acc:.2f}% \tValidation Accuracy: {100 * val_acc:.2f}%"
                )
                history.append([train_loss, val_loss, train_acc, val_acc])

        return pd.DataFrame(
            history, columns=["train_loss", "val_loss", "train_acc", "val_acc"]
        )

In [4]:
spec_dataset = SpectrogramDataset(
    f"{processed_dir}/train_data", label_dir=f"{processed_dir}/train_labels"
)

token_size = spec_dataset[0][0].shape[0]
seq_size = spec_dataset[0][0].shape[1]
out_size = spec_dataset[0][1].shape[1]

train_set, val_set = random_split(spec_dataset, [0.8, 0.2])

train_loader = DataLoader(train_set, batch_size=5, shuffle=True)
val_loader = DataLoader(val_set, batch_size=5, shuffle=True)

In [16]:
trainer = CNNTrainer()
trainer.create_model(token_size, seq_size, 3, out_size)
trainer.set_dataloaders(train_loader, val_loader, None)
trainer.train(0.001, 0.001, 10)


Epoch: 0 	Training Loss: 53.5987 	Validation Loss: 81.5682
	 	Training Accuracy: 29.08% 	Validation Accuracy: 32.77%

Epoch: 1 	Training Loss: 53.2315 	Validation Loss: 144.7879
	 	Training Accuracy: 32.84% 	Validation Accuracy: 33.13%

Epoch: 2 	Training Loss: 52.8900 	Validation Loss: 79.2473
	 	Training Accuracy: 32.55% 	Validation Accuracy: 32.35%

Epoch: 3 	Training Loss: 52.6506 	Validation Loss: 118.3015
	 	Training Accuracy: 31.72% 	Validation Accuracy: 30.95%

Epoch: 4 	Training Loss: 52.4387 	Validation Loss: 74.6868
	 	Training Accuracy: 29.93% 	Validation Accuracy: 27.42%

Epoch: 5 	Training Loss: 52.2427 	Validation Loss: 74.8083
	 	Training Accuracy: 29.70% 	Validation Accuracy: 27.29%

Epoch: 6 	Training Loss: 51.6361 	Validation Loss: 6551.7528
	 	Training Accuracy: 29.90% 	Validation Accuracy: 23.94%

Epoch: 7 	Training Loss: 51.1751 	Validation Loss: 74.2440
	 	Training Accuracy: 28.70% 	Validation Accuracy: 23.06%

Epoch: 8 	Training Loss: 50.7906 	Validation Loss: 

Unnamed: 0,train_loss,val_loss,train_acc,val_acc
0,53.598694,81.568247,0.290772,0.327663
1,53.231481,144.787884,0.328352,0.331297
2,52.890021,79.247265,0.325539,0.323512
3,52.650615,118.301543,0.317167,0.309474
4,52.438657,74.686847,0.299343,0.274247
5,52.242709,74.808338,0.297029,0.272946
6,51.636094,6551.752848,0.299007,0.239435
7,51.175065,74.244013,0.287027,0.230575
8,50.790633,inf,0.277845,0.177391
9,50.683963,74.392654,0.275984,0.206731
