## Setup

In [1]:
import os, sys
sys.path.append(os.path.abspath('../../src/'))

import torch
import torch.optim as optim
import torch.nn as nn
import itertools
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR, ExponentialLR, LinearLR

from model.spectogram_dataset import SpectrogramDataset

import wandb

wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mmldogs[0m ([33mmldogs-wut[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


## CNN

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self, dropout_conv_p=0.3, dropout_fc_p=0.3, use_batchnorm=True):
        super(Net, self).__init__()

        # Convolutional and pooling layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=5)  # Output: (16, 83, 82)
        self.bn1 = nn.BatchNorm2d(16) if use_batchnorm else None  # Batch Norm (optional)
        self.pool1 = nn.MaxPool2d(2, 2)              # Output: (16, 41, 41)

        self.conv2 = nn.Conv2d(16, 32, kernel_size=5) # Output: (32, 37, 37)
        self.bn2 = nn.BatchNorm2d(32) if use_batchnorm else None  # Batch Norm (optional)
        self.pool2 = nn.MaxPool2d(2, 2)              # Output: (32, 18, 18)

        # Dropout after convolutional layers
        self.dropout_conv = nn.Dropout2d(p=dropout_conv_p)  # Dropout2d for 2D data

        # Fully connected layers
        self.fc1 = nn.Linear(32 * 18 * 18, 256)      # Fully connected layer 1
        self.fc2 = nn.Linear(256, 128)              # Fully connected layer 2
        self.fc3 = nn.Linear(128, 1)   
        
        self.dropout_fc = nn.Dropout(p=dropout_fc_p)

    def forward(self, x):
        # Convolutional layers
        x = self.conv1(x)
        if self.bn1:  # Apply Batch Norm if enabled
            x = self.bn1(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.pool1(x)  # Pooling
        x = self.dropout_conv(x)  # Dropout (no effect if p=0.0)

        x = self.conv2(x)
        if self.bn2:  # Apply Batch Norm if enabled
            x = self.bn2(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.pool2(x)  # Pooling
        x = self.dropout_conv(x)  # Dropout (no effect if p=0.0)

        # Flatten
        x = torch.flatten(x, 1)

        # Fully connected layers
        x = self.fc1(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.dropout_fc(x)  # Dropout (no effect if p=0.0)

        x = self.fc2(x)
        x = F.relu(x)  # Apply ReLU activation
        x = self.dropout_fc(x)  # Dropout (no effect if p=0.0)
        x = torch.sigmoid(self.fc3(x))  # Output layer
        
        return x


In [4]:
from sklearn.metrics import f1_score

def train(model, criterion, optimizer, scheduler, num_epochs, train_loader, val_loader, model_name = "cnn"):
    model.to(device)
    
    PRINT_STEP = len(train_loader) // 5 - 1
    epochs_without_val_acc_improvement = 0
    best_val_acc = 0.0

    for epoch in range(num_epochs):
        model.train()

        running_loss = 0.0
        correct = 0
        all_labels = []
        all_preds = []

        for  i, data in enumerate(train_loader):
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels = labels.unsqueeze(1)

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            preds = (outputs > 0.5).float()  # Binary prediction with threshold 0.5
            all_labels.extend(labels.cpu().numpy().flatten())
            all_preds.extend(preds.detach().cpu().numpy().flatten())

            correct += (preds == labels).float().mean().item()
            
            running_loss += loss.item()

            if i % PRINT_STEP == PRINT_STEP-1:
                accuracy = correct / PRINT_STEP
                loss = running_loss / PRINT_STEP
                step = epoch * len(train_loader) + i
                wandb.log({
                        "train/accuracy": accuracy,
                        "train/loss": loss
                    },
                    step=step
                )
                running_loss = 0.0
                correct = 0

        f1 = f1_score(all_labels, all_preds, average='macro')
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader):.4f}, F1 Score: {f1:.4f}")

        if scheduler is not None:
            scheduler.step()

        # Validation
        model.eval()

        val_loss = 0.0
        val_correct = 0

        with torch.no_grad():
            for j, data in enumerate(val_loader):
                inputs, labels = data
                inputs = inputs.to(device)
                labels = labels.to(device)
                labels = labels.unsqueeze(1)

                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                # Binary predictions
                preds = (outputs > 0.5).float()
                val_correct += (preds == labels).float().mean().item()

        accuracy = val_correct / len(val_loader)
        loss = val_loss / len(val_loader)
        wandb.log({
                "validation/accuracy": accuracy,
                "validation/loss": loss
            },
            step=(epoch + 1) * len(train_loader)
        )

        if accuracy > best_val_acc:
            best_val_acc = accuracy
            epochs_without_val_acc_improvement = 0
        else:
            epochs_without_val_acc_improvement += 1
        if epochs_without_val_acc_improvement >= 10:
            print("10 epochs without a val accuracy improvement. Stopping the train")
            break
        if epoch > 20 and accuracy < 0.9:
            print("Too low accuracy. Stopping the train")
            break

    print("Training complete.")
    
def test(model, test_loader):
    model.eval()

    correct = 0
    total = 0

    with torch.no_grad():
        for data in test_loader:
            images, labels = data
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on test images: {100 * correct // total} %')

## Data

In [5]:
from utils.dataset_creator import SpecgramsRandomFilter, SpecgramsSilentFilter

DATASETS_PARENT_PATH = 'datasets'
DATASET_PATH = 'dataset'
DATA_DIR = os.path.join('../../', DATASETS_PARENT_PATH, DATASET_PATH)

In [6]:
from prepare_datasets import create_datasets

create_datasets(DATASETS_PARENT_PATH, DATASET_PATH, [
    SpecgramsSilentFilter(0.75)
])

DatasetCreator: Exporting the dataset with the following parameters:
    n_fft=1024
    n_mels=86
    interval_duration=2s
    multithreading=True
Class 0 recordings count: 270
Class 1 recordings count: 630
Image properties:
    width=87px
    height=86px
Removing directory /home/migoox/dev/ml-dogs-sharepoint/src/../datasets/dataset/train/0
Removing directory /home/migoox/dev/ml-dogs-sharepoint/src/../datasets/dataset/train/1
Finished [900/900]Dataset has been exported. Elapsed time: 147.87281608581543s.
DatasetCreator: Exporting the dataset with the following parameters:
    n_fft=1024
    n_mels=86
    interval_duration=2s
    multithreading=True
Class 0 recordings count: 90
Class 1 recordings count: 210
Image properties:
    width=87px
    height=86px
Finished [300/300]Dataset has been exported. Elapsed time: 53.31251573562622s.
DatasetCreator: Exporting the dataset with the following parameters:
    n_fft=1024
    n_mels=86
    interval_duration=2s
    multithreading=True
Class 0 r

In [9]:
INPUT_WIDTH = 67
INPUT_HEIGHT = 66

LEARNING_RATE = 0.001
LR_DECAY = 0.95
BATCH_SIZE = 32
EPOCHS = 25

start_factors = [0.333]
end_factors = [0.1]
total_iters = [30]


## Training

In [10]:
for sf, ef, ti in itertools.product(start_factors, end_factors, total_iters):
    wandb.init(
        project="sched-tests",
        config={
            "learning_rate": LEARNING_RATE,
            "learning_rate_decay": LR_DECAY,
            "batch_size": BATCH_SIZE,
            "scheduler": "linear_real_one",
            "start_factor": sf,
            "end_factor": ef,
            "total_iters": ti,
            "input_resolution": (INPUT_WIDTH, INPUT_HEIGHT),
            "architecture": "CNN",
            "dataset": "DAPS"
        },
        name=f"sched:lin-sf:{sf}-ef:{ef}-ti:{ti}-2"
    )
    model = Net()
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    transform = transforms.ToTensor()
    scheduler = LinearLR(optimizer, start_factor=sf, end_factor=ef, total_iters=ti)
    
    train_dataset = SpectrogramDataset(data_dir=os.path.join(DATA_DIR, "train"), transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    
    val_dataset = SpectrogramDataset(data_dir=os.path.join(DATA_DIR, "validation"), transform=transform)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    test_dataset = SpectrogramDataset(data_dir=os.path.join(DATA_DIR, "test"),transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
    
    train(model, criterion, optimizer, scheduler, ti, train_loader, val_loader, model_name="simple_cnn")

    wandb.finish()


Epoch 1/30, Loss: 0.0004, F1 Score: 0.7691
Epoch 2/30, Loss: 0.0002, F1 Score: 0.8716
Epoch 3/30, Loss: 0.0003, F1 Score: 0.8927
Epoch 4/30, Loss: 0.0002, F1 Score: 0.9044
Epoch 5/30, Loss: 0.0002, F1 Score: 0.9128
Epoch 6/30, Loss: 0.0002, F1 Score: 0.9195
Epoch 7/30, Loss: 0.0002, F1 Score: 0.9242
Epoch 8/30, Loss: 0.0002, F1 Score: 0.9267
Epoch 9/30, Loss: 0.0002, F1 Score: 0.9302
Epoch 10/30, Loss: 0.0001, F1 Score: 0.9339
Epoch 11/30, Loss: 0.0001, F1 Score: 0.9357
Epoch 12/30, Loss: 0.0001, F1 Score: 0.9412
Epoch 13/30, Loss: 0.0001, F1 Score: 0.9426
Epoch 14/30, Loss: 0.0001, F1 Score: 0.9455
Epoch 15/30, Loss: 0.0001, F1 Score: 0.9476
Epoch 16/30, Loss: 0.0001, F1 Score: 0.9499
Epoch 17/30, Loss: 0.0002, F1 Score: 0.9524
Epoch 18/30, Loss: 0.0002, F1 Score: 0.9535
Epoch 19/30, Loss: 0.0001, F1 Score: 0.9548
Epoch 20/30, Loss: 0.0003, F1 Score: 0.9564
Epoch 21/30, Loss: 0.0001, F1 Score: 0.9582
Epoch 22/30, Loss: 0.0001, F1 Score: 0.9602
Epoch 23/30, Loss: 0.0002, F1 Score: 0.96

0,1
train/accuracy,▁▂▃▄▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████
train/loss,█▆▆▅▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
validation/accuracy,▁▄▅▅▆▆▆▆▇▇▇██▇██▇▇█▇██████▇███
validation/loss,█▅▄▃▃▃▃▂▂▂▂▁▁▂▁▁▂▂▁▂▁▁▂▂▁▂▃▂▂▂

0,1
train/accuracy,0.97402
train/loss,0.05937
validation/accuracy,0.93655
validation/loss,0.15803


In [None]:
wandb.finish()