In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchinfo as info
import data

device = "cuda" if torch.cuda.is_available() else "cpu"
seed = 42
torch.cuda.manual_seed(seed)

In [64]:
import torch.nn as nn

class BottleneckLayer(nn.Module):
    def __init__(self, in_c, out_c, exp_f, stride=1):
        super().__init__()

        self.use_res_connect = (stride == 1 and in_c == out_c)
        mid_c = in_c * exp_f

        self.block = nn.Sequential(

            nn.Conv2d(in_c, mid_c, kernel_size=1, bias=False),
            nn.BatchNorm2d(mid_c),
            nn.ReLU6(inplace=True),


            nn.Conv2d(mid_c, mid_c, kernel_size=3, stride=stride,
                      padding=1, groups=mid_c, bias=False),
            nn.BatchNorm2d(mid_c),
            nn.ReLU6(inplace=True),

            # 1x1 projection
            nn.Conv2d(mid_c, out_c, kernel_size=1, bias=False),
            nn.BatchNorm2d(out_c),
        )

    def forward(self, x):
        if self.use_res_connect:
            return x + self.block(x)
        else:
            return self.block(x)


In [80]:
class Model(nn.Module):
    def __init__(self, in_c=1, num_classes=10):
        super().__init__()

        self.conv1 = nn.Conv2d(in_c, 8, kernel_size=3, stride=1, padding=1)  # (1 → 4)

        self.block1 = BottleneckLayer(8, 16, exp_f=2, stride=2)               # (4 → 8)
        self.block2 = BottleneckLayer(16, 16, exp_f=2, stride=1)               # (8 → 8)
        self.block3 = BottleneckLayer(16, 32, exp_f=2, stride=2)              # (8 → 16)
        self.block4 = BottleneckLayer(32, 32, exp_f=2, stride=1)             # (16 → 16)

        self.pool = nn.AdaptiveAvgPool2d((1, 1))                             # (B, 16, 1, 1)
        self.fc = nn.Linear(32, num_classes, bias=False)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)


In [81]:
model = Model().to(device=device)
print(info.summary(model))

Layer (type:depth-idx)                   Param #
Model                                    --
├─Conv2d: 1-1                            80
├─BottleneckLayer: 1-2                   --
│    └─Sequential: 2-1                   --
│    │    └─Conv2d: 3-1                  128
│    │    └─BatchNorm2d: 3-2             32
│    │    └─ReLU6: 3-3                   --
│    │    └─Conv2d: 3-4                  144
│    │    └─BatchNorm2d: 3-5             32
│    │    └─ReLU6: 3-6                   --
│    │    └─Conv2d: 3-7                  256
│    │    └─BatchNorm2d: 3-8             32
├─BottleneckLayer: 1-3                   --
│    └─Sequential: 2-2                   --
│    │    └─Conv2d: 3-9                  512
│    │    └─BatchNorm2d: 3-10            64
│    │    └─ReLU6: 3-11                  --
│    │    └─Conv2d: 3-12                 288
│    │    └─BatchNorm2d: 3-13            64
│    │    └─ReLU6: 3-14                  --
│    │    └─Conv2d: 3-15                 512
│    │    └─BatchNorm

In [82]:
criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters() , lr= 0.001)
epochs = 20

In [83]:
def train(model, train_loader, test_loader, criterion, optimizer, device, epochs=10):
    model.to(device)

    for epoch in range(epochs):
        model.train()
        train_correct = 0
        train_total = 0
        train_loss = 0.0

        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)

            optimizer.zero_grad()
            outputs = model(xb)
            loss = criterion(outputs, yb)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * xb.size(0)
            _, predicted = torch.max(outputs, 1)
            train_correct += (predicted == yb).sum().item()
            train_total += yb.size(0)

        train_acc = 100 * train_correct / train_total
        avg_train_loss = train_loss / train_total

        # --- Evaluation on Test Set ---
        model.eval()
        test_correct = 0
        test_total = 0
        with torch.no_grad():
            for xb, yb in test_loader:
                xb, yb = xb.to(device), yb.to(device)
                outputs = model(xb)
                _, predicted = torch.max(outputs, 1)
                test_correct += (predicted == yb).sum().item()
                test_total += yb.size(0)

        test_acc = 100 * test_correct / test_total

        print(f"Epoch {epoch+1}/{epochs} | Loss: {avg_train_loss:.4f} | "
              f"Train Acc: {train_acc:.2f}% | Test Acc: {test_acc:.2f}%")


In [84]:
train(model , data.train_loader , data.test_loader , criterion , optim , device , epochs=10)

Epoch 1/10 | Loss: 0.4815 | Train Acc: 86.17% | Test Acc: 95.91%
Epoch 2/10 | Loss: 0.1043 | Train Acc: 96.97% | Test Acc: 97.90%
Epoch 3/10 | Loss: 0.0734 | Train Acc: 97.79% | Test Acc: 98.17%
Epoch 4/10 | Loss: 0.0592 | Train Acc: 98.20% | Test Acc: 98.27%
Epoch 5/10 | Loss: 0.0500 | Train Acc: 98.47% | Test Acc: 98.40%
Epoch 6/10 | Loss: 0.0457 | Train Acc: 98.58% | Test Acc: 98.44%
Epoch 7/10 | Loss: 0.0418 | Train Acc: 98.69% | Test Acc: 98.51%
Epoch 8/10 | Loss: 0.0363 | Train Acc: 98.83% | Test Acc: 98.74%
Epoch 9/10 | Loss: 0.0353 | Train Acc: 98.85% | Test Acc: 98.77%
Epoch 10/10 | Loss: 0.0340 | Train Acc: 98.86% | Test Acc: 98.68%
