# Model Architecture (CNN Class)

The neural network definition.
Contains:
- __init__: Defines all layers (conv, pooling, fully connected)
- forward: Defines how data flows through the network

Architecture:  
- Convolutional blocks with BatchNorm, ReLU, and MaxPool
- Global Average Pooling
- Fully connected layers with Dropout

In [13]:
import torch.nn as nn
import torch.nn.functional as F

In [14]:
class SpectrogramCNN(nn.Module):

    def __init__(self):
        super(SpectrogramCNN, self).__init__()

        # 1) convolutional blocks (for each block: ConvLayer + BatchNorm + MaxPooling)
        # convolutional layers
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        # batch normalization
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(64)
        self.bn3 = nn.BatchNorm2d(128)
        self.bn4 = nn.BatchNorm2d(256)
        #Â max pooling
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 2) adaptive (global) pooling layer
        self.global_pool = nn.AdaptiveAvgPool2d((1,1))
        
        # 3) fully connected layers
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 2)  # 2 classes for output
        # dropout
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        
        # convolutional blocks
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        x = self.pool(F.relu(self.bn4(self.conv4(x))))
        
        # adaptive (global) pooling layer
        x = self.global_pool(x)
        # flattering the layer
        x = x.view(x.size(0), -1)
        
        # fully connected layers (with dropout)
        x = F.relu(self.fc1(self.dropout(x)))
        x = F.relu(self.fc2(self.dropout(x)))
        x = self.fc3(x)
        
        return x

In [26]:
# for CHECK
from torchvision import transforms
from PIL import Image
import pandas as pd

train_transform_test = transforms.Compose([
    transforms.ToTensor(),
])

# load image
df = pd.read_csv('/Users/hela/Code/pata/data_labeled.csv')
initial_image = Image.open(df.iloc[1476]['image_path']).convert('RGB')
# transform (tensor)
transformed_image = train_transform_test(initial_image)

In [28]:
# CHECK

# init:
model = SpectrogramCNN()
model.conv1

# forward:
# model expects 4D input tensor (batch_size, channels, height, width). In case of a single img, it becomes 3D tensor (channels, height, width).
# So to test the model, I add a batch dimension.
output = model(transformed_image.unsqueeze(0))
output

tensor([[-0.0762, -0.3792]], grad_fn=<AddmmBackward0>)