# Misc


In [None]:
import numpy as np

from torch import nn
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
from torchmetrics import Accuracy, Precision, Recall, F1, ConfusionMatrix
import torchvision.models as models

from tools.utils import create_dir
from dataset import get_dataset
from model import get_model
from tools.checkpoint import CheckpointManager, Checkpoint
from tools.train_manager import TrainManager
from one_stage.new_dataset import SpectrogramDataset
from dataset import create_data_loader
from tqdm import tqdm

import matplotlib.pyplot as plt

In [None]:
batch_size = 25
train_dataset = SpectrogramDataset(
    "/home/dev/dataset/onc/preprocessed/train", "mel", ["other"]
)
validation_dataset = SpectrogramDataset(
    "/home/dev/dataset/onc/preprocessed/validation", "mel", ["other"]
)

train_dataloader = create_data_loader(train_dataset, batch_size)
validation_dataloader = create_data_loader(validation_dataset, batch_size)

In [None]:
for inputs, labels in train_dataloader:
    print("Input shape:", inputs.shape)
    break

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchsummary import summary

# Define the number of classes and input channels
num_of_classes = 5
input_channels = 3

vgg16 = models.vgg16(pretrained=True)

# Modify the first layer to accept single-channel input
vgg16.features[0] = nn.Conv2d(
    input_channels, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
)

# Modify the last layer to output 5 classes
vgg16.classifier[6] = nn.Linear(4096, num_of_classes)

for param in vgg16.features.parameters():
    param.requires_grad = False

# Print the modified VGG16 architecture
model = vgg16.to("cuda")
print("Model Architecture")
print(summary(model, (input_channels, 95, 126)))

In [None]:
learning_rate = 0.001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16.parameters(), lr=learning_rate)

In [None]:
num_epochs = 10
model.train()
device = "cuda"

for epoch in range(num_epochs):
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for inputs, labels in tqdm(
        train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False
    ):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)
    epoch_loss = running_loss / len(train_dataset)
    epoch_accuracy = correct_predictions / total_samples

    print(
        f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}"
    )


model.eval()
val_correct_predictions = 0
val_total_samples = 0

with torch.no_grad():
    for inputs, labels in tqdm(validation_dataloader, desc="Validation", leave=False):
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        val_correct_predictions += (predicted == labels).sum().item()
        val_total_samples += labels.size(0)

val_accuracy = val_correct_predictions / val_total_samples
print(f"Validation Accuracy: {val_accuracy:.4f}")

# Paper VGGNet


In [32]:
import torch
import torch.nn as nn
import torchvision.models as models


class CNN(nn.Module):
    def __init__(self, input_channels=1, num_classes=5):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=input_channels,
                out_channels=16,
                kernel_size=3,
                stride=1,
                padding=2,
            ),
            nn.BatchNorm2d(16),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(
                in_channels=16,
                out_channels=32,
                kernel_size=3,
                stride=1,
                padding=2,
            ),
            nn.BatchNorm2d(32),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(
                in_channels=32,
                out_channels=64,
                kernel_size=3,
                stride=1,
                padding=2,
            ),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(
                in_channels=64,
                out_channels=128,
                kernel_size=3,
                stride=1,
                padding=2,
            ),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(),
            nn.MaxPool2d(kernel_size=2),
        )

        self.flatten = nn.Flatten()
        self.linear = nn.Linear(128 * 7 * 9, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, input_data):
        x = self.conv1(input_data)
        # print("Shape after conv1:", x.shape)
        x = self.conv2(x)
        # print("Shape after conv2:", x.shape)
        x = self.conv3(x)
        # print("Shape after conv3:", x.shape)
        x = self.conv4(x)
        # print("Shape after conv4:", x.shape)
        x = self.flatten(x)
        # print("Shape after flattening:", x.shape)
        # print("Shape of the weight matrix:", self.linear.weight.shape)
        logits = self.linear(x)
        predictions = self.softmax(logits)
        return predictions


# Instantiate the model
model = CNN()
input_channels = 1

model = model.to("cuda")
print("Model Architecture")
print(summary(model, (input_channels, 95, 126)))

# Print the model architecture

Model Architecture
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 16, 97, 128]             160
       BatchNorm2d-2          [-1, 16, 97, 128]              32
         LeakyReLU-3          [-1, 16, 97, 128]               0
         MaxPool2d-4           [-1, 16, 48, 64]               0
            Conv2d-5           [-1, 32, 50, 66]           4,640
       BatchNorm2d-6           [-1, 32, 50, 66]              64
         LeakyReLU-7           [-1, 32, 50, 66]               0
         MaxPool2d-8           [-1, 32, 25, 33]               0
            Conv2d-9           [-1, 64, 27, 35]          18,496
      BatchNorm2d-10           [-1, 64, 27, 35]             128
        LeakyReLU-11           [-1, 64, 27, 35]               0
        MaxPool2d-12           [-1, 64, 13, 17]               0
           Conv2d-13          [-1, 128, 15, 19]          73,856
      BatchNorm2d-14

# Paper ResNet18


In [34]:
# Define a custom ResNet-18 model
class CustomResNet18(nn.Module):
    def __init__(self, num_classes):
        super(CustomResNet18, self).__init__()
        # Load the pre-trained ResNet-18 model
        resnet = models.resnet18(pretrained=True)

        # Modify the first convolutional layer to accept 1 input channel
        # Original input channels: 3
        # New input channels: 1
        resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

        # Modify the output layer to have the desired number of classes
        num_ftrs = resnet.fc.in_features
        resnet.fc = nn.Linear(num_ftrs, num_classes)

        self.resnet = resnet

    def forward(self, x):
        return self.resnet(x)


# Instantiate the model
model = CustomResNet18(num_classes=5)

# Test the model with random input
input_tensor = torch.randn(1, 1, 95, 126)  # Batch size, channels, height, width
output = model(input_tensor)
print(output.shape)

input_channels = 1
model = model.to("cuda")
print("Model Architecture")
print(summary(model, (input_channels, 95, 126)))

torch.Size([1, 5])
Model Architecture
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 48, 63]           3,136
       BatchNorm2d-2           [-1, 64, 48, 63]             128
              ReLU-3           [-1, 64, 48, 63]               0
         MaxPool2d-4           [-1, 64, 24, 32]               0
            Conv2d-5           [-1, 64, 24, 32]          36,864
       BatchNorm2d-6           [-1, 64, 24, 32]             128
              ReLU-7           [-1, 64, 24, 32]               0
            Conv2d-8           [-1, 64, 24, 32]          36,864
       BatchNorm2d-9           [-1, 64, 24, 32]             128
             ReLU-10           [-1, 64, 24, 32]               0
       BasicBlock-11           [-1, 64, 24, 32]               0
           Conv2d-12           [-1, 64, 24, 32]          36,864
      BatchNorm2d-13           [-1, 64, 24, 32]             128
 