## Setup

In [None]:
import os, sys
sys.path.append(os.path.abspath('../../src/'))

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.optim.lr_scheduler import StepLR, ExponentialLR, LinearLR, LambdaLR
from torch.utils.data import DataLoader

from model.spectogram_dataset import SpectrogramDataset

import wandb

wandb.login()

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

## CNN

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, dropout_conv_p=0.0, dropout_fc_p=0.0, use_batchnorm=True):
        super(Net, self).__init__()

        # Convolutional and pooling layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)  # Output: (16, 83, 82)
        self.bn1 = nn.BatchNorm2d(16) if use_batchnorm else None  # Batch Norm (optional)
        self.pool1 = nn.MaxPool2d(2, 2)              # Output: (16, 41, 41)

        self.conv2 = nn.Conv2d(16, 32, kernel_size=5) # Output: (32, 37, 37)
        self.bn2 = nn.BatchNorm2d(32) if use_batchnorm else None  # Batch Norm (optional)
        self.pool2 = nn.MaxPool2d(2, 2)              # Output: (32, 18, 18)

        # Dropout after convolutional layers
        self.dropout_conv = nn.Dropout2d(p=dropout_conv_p)  # Dropout2d for 2D data

        # Fully connected layers
        self.fc1 = nn.Linear(32 * 18 * 18, 256)      # Fully connected layer 1
        self.fc2 = nn.Linear(256, 128)              # Fully connected layer 2
        self.fc3 = nn.Linear(128, 1)   
        
        # self.fc1 = nn.Linear(32 * 18 * 18, 128)      # Fully connected layer 1
        # self.fc2 = nn.Linear(128, 1)              # Fully connected layer 2   

        # Dropout for fully connected layers
        self.dropout_fc = nn.Dropout(p=dropout_fc_p)

    def forward(self, x, return_features = False, return_layer = 1):
        # Convolutional layers
        x = self.conv1(x)
        if self.bn1:  # Apply Batch Norm if enabled
            x = self.bn1(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.pool1(x)  # Pooling
        x = self.dropout_conv(x)  # Dropout (no effect if p=0.0)

        x = self.conv2(x)
        if self.bn2:  # Apply Batch Norm if enabled
            x = self.bn2(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.pool2(x)  # Pooling
        x = self.dropout_conv(x)  # Dropout (no effect if p=0.0)

        # Flatten
        x = torch.flatten(x, 1)

        # Fully connected layers
        x = self.fc1(x)
        x = F.relu(x)  # Apply ReLU activation

        if return_features and return_layer == 1:
            return x

        x = self.dropout_fc(x)  # Dropout (no effect if p=0.0)

        x = self.fc2(x)
        x = F.relu(x)  # Apply ReLU activation

        if return_features and return_layer == 2:
            return x

        x = self.dropout_fc(x)  # Dropout (no effect if p=0.0)

        x = self.fc3(x)

        if return_features and return_layer == 3:
            return x
        
        x = torch.sigmoid(x)  # Output layer
        
        # x = torch.sigmoid(self.fc2(x))
        return x


In [13]:
from sklearn.metrics import f1_score

import numpy as np

def compute_feature_sparsity(loader, model, return_layer, n_batches = 20):
    with torch.no_grad():
        features_list = []
        for i, data in enumerate(loader):
            if n_batches != -1 and i > n_batches:
                break

            inputs, _ = data
            inputs = inputs.to(device)

            features = model(inputs, return_features=True, return_layer=return_layer).cpu().numpy()
            features_list.append(features)

        phi = np.vstack(features_list)
        phi = phi.reshape(phi.shape[0], np.prod(phi.shape[1:]))

        sparsity = (phi > 0).sum() / (phi.shape[0] * phi.shape[1])
        return sparsity
        
def train(model, criterion, optimizer, scheduler, num_epochs, train_loader, val_loader, big_batch_loader = None, model_name = "cnn"):
    model.to(device)
    
    PRINT_STEP = len(train_loader) // 5 - 1
    epochs_without_val_acc_improvement = 0
    best_val_acc = 0.0

    for epoch in range(num_epochs):
        model.train()

        running_loss = 0.0
        correct = 0
        all_labels = []
        all_preds = []

        for i, data in enumerate(train_loader):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels = labels.unsqueeze(1)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            preds = (outputs > 0.5).float()  # Binary prediction with threshold 0.5
            all_labels.extend(labels.cpu().numpy().flatten())
            all_preds.extend(preds.cpu().detach().numpy().flatten())

            correct += (preds == labels).float().mean().item()
            
            running_loss += loss.item()

            if i % PRINT_STEP == PRINT_STEP-1:
                accuracy = correct / PRINT_STEP
                loss = running_loss / PRINT_STEP
                step = epoch * len(train_loader) + i

                lr = scheduler.get_last_lr()[0]
                print(lr)
                wandb.log({
                        "train/accuracy": accuracy,
                        "train/loss": loss,
                        "train/learning_rate": lr 
                    },
                    step=step
                )
                running_loss = 0.0
                correct = 0
        # ^ for data in train loader



        step = epoch * len(train_loader) + i
        sparsity1 = compute_feature_sparsity(big_batch_loader, model, 1)
        sparsity2 = compute_feature_sparsity(big_batch_loader, model, 2)
        sparsity3 = compute_feature_sparsity(big_batch_loader, model, 3)
        wandb.log({
                "sparsity/1": sparsity1,
                "sparsity/2": sparsity2,
                "sparsity/3": sparsity3,
            },
            step=step
        )

        f1 = f1_score(all_labels, all_preds, average='macro')

        lr = scheduler.get_last_lr()[0]
        print(lr)
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}, F1 Score: {f1:.4f}")

        if scheduler is not None:
            scheduler.step()

        # Validation
        model.eval()

        val_loss = 0.0
        val_correct = 0

        with torch.no_grad():
            for j, data in enumerate(val_loader):
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                labels = labels.unsqueeze(1)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # Binary predictions
                preds = (outputs > 0.5).float()
                val_correct += (preds == labels).float().mean().item()

        accuracy = val_correct / len(val_loader)
        loss = val_loss / len(val_loader)
        wandb.log({
                "validation/accuracy": accuracy,
                "validation/loss": loss
            },
            step=(epoch + 1) * len(train_loader)
        )

        if accuracy > best_val_acc:
            best_val_acc = accuracy
            epochs_without_val_acc_improvement = 0
        else:
            epochs_without_val_acc_improvement += 1
        if epochs_without_val_acc_improvement >= 40:
            print("40 epochs without a val accuracy improvement. Stopping the train")
            break

    torch.save(model, f"{model_name}.pth")
    print("Training complete.")
    
def test(model, test_loader, criterion):
    model.eval()

    test_loss = 0.0
    test_correct = 0
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels = labels.unsqueeze(1)

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            # Binary predictions
            preds = (outputs > 0.5).float()
            all_labels.extend(labels.cpu().numpy().flatten())
            all_preds.extend(preds.detach().cpu().numpy().flatten())

            test_correct += (preds == labels).float().mean().item()

    accuracy = test_correct / len(test_loader)
    loss = test_loss / len(test_loader)
    f1 = f1_score(all_labels, all_preds, average='macro')

    print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}, Test F1 Score: {f1:.4f}')
    wandb.log({
            "test/accuracy": accuracy,
            "test/loss": loss,
            "test/f1_score": f1
        }
    )

## Data

In [5]:
from utils.dataset_creator import SpecgramsRandomFilter, SpecgramsSilentFilter

DATASETS_PARENT_PATH = 'datasets'
DATASET_PATH = 'dataset'
DATA_DIR = os.path.join('../../', DATASETS_PARENT_PATH, DATASET_PATH)

In [None]:
# from prepare_datasets import create_datasets

# create_datasets(DATASETS_PARENT_PATH, DATASET_PATH, [
#     SpecgramsSilentFilter(),
# ])

In [None]:
INPUT_WIDTH = 87
INPUT_HEIGHT = 86

LR_DECAY = 0.95
EPOCHS = 50
DECAY_AT = 0.4

HIGH_LEARNING_RATE = 0.01
LEARNING_RATE = 0.001

BATCH_SIZE = 64
BATCH_SIZE_EVAL = 512

DROPOUT_CONV = 0.4
DROPOUT_FC = 0.4

USE_BATCHNORM = False

SCHEDULE = 'const'

print(LEARNING_RATE)

## Training

In [11]:
def train_and_test():
  name = f""
  match SCHEDULE:
    case 'const':
      name += f"const {LEARNING_RATE}"
    case 'decay':
      name += f"decay {DECAY_AT * 100}% {HIGH_LEARNING_RATE} {LEARNING_RATE} "
    case 'warmup':
      name += f"warmup {DECAY_AT * 100}% {HIGH_LEARNING_RATE} {LEARNING_RATE} "

  # name = "debug"
  print(name)

  def lr_schedule(epoch):
    match SCHEDULE:
      case 'const':
        return LEARNING_RATE
      
      case 'decay':
        if epoch / EPOCHS <= DECAY_AT:
          print(HIGH_LEARNING_RATE)
          return HIGH_LEARNING_RATE
        else:
          lr = np.interp(epoch / EPOCHS, [DECAY_AT * EPOCHS, EPOCHS], [HIGH_LEARNING_RATE, LEARNING_RATE])
          return lr
        
      case 'warmup':
        if epoch / EPOCHS <= DECAY_AT:
          return np.interp(epoch / EPOCHS, [1 / EPOCHS, DECAY_AT * EPOCHS], [LEARNING_RATE, HIGH_LEARNING_RATE])
        else:
          return np.interp(epoch / EPOCHS, [DECAY_AT * EPOCHS, EPOCHS], [HIGH_LEARNING_RATE, LEARNING_RATE])

  wandb.init(
      name=name,
      project="High learning rate",
      config={
          "schedule": SCHEDULE,
          "learning_rate": LEARNING_RATE,
          "high_learning_rate": HIGH_LEARNING_RATE,
          "start_decay_at": DECAY_AT,
          "epochs": EPOCHS,
          "batch_size": BATCH_SIZE,
          "batch_size_eval": BATCH_SIZE_EVAL,
          "dropout_conv_p": DROPOUT_CONV,
          "dropout_fc_p": DROPOUT_FC,
          "use_batchnorm": USE_BATCHNORM,
          "input_resolution": (INPUT_WIDTH, INPUT_HEIGHT),
          "architecture": "CNN",
          "dataset": "DAPS"
      }
  )

  model = Net(dropout_conv_p=DROPOUT_CONV,
              dropout_fc_p=DROPOUT_FC,
              use_batchnorm=USE_BATCHNORM)
  criterion = nn.BCELoss()
  optimizer = optim.Adam(model.parameters(), lr=1)
  schedulers = [None,
                  StepLR(optimizer, step_size=10, gamma=LR_DECAY),
                  ExponentialLR(optimizer, gamma=LR_DECAY),
                  LinearLR(optimizer, start_factor=0.333, end_factor=1.0, total_iters=EPOCHS),
                  LambdaLR(optimizer, lr_lambda=lr_schedule)]

  transform = transforms.ToTensor()

  train_dataset = SpectrogramDataset(data_dir=os.path.join(DATA_DIR, "train"), transform=transform)
  train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
  big_batch_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE_EVAL, shuffle=False)

  val_dataset = SpectrogramDataset(data_dir=os.path.join(DATA_DIR, "validation"), transform=transform)
  val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

  test_dataset = SpectrogramDataset(data_dir=os.path.join(DATA_DIR, "test"), transform=transform)
  test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

  try:
    train(model, criterion, optimizer, schedulers[4], EPOCHS, train_loader, val_loader, big_batch_loader=big_batch_loader, model_name=name)
    test(model, test_loader, criterion)
  finally:
    wandb.finish()


In [14]:
wandb.finish()