In [1]:
# ============================
#  CELL 1: Imports & Setup
# ============================
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import numpy as np
import random
import matplotlib.pyplot as plt
import pickle
import csv

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print("Using device:", device)

Using device: cuda


In [3]:
SEED = 1234
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [4]:
MEAN = [0.4914, 0.4822, 0.4465]
STD  = [0.2023, 0.1994, 0.2010]

train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(MEAN, STD),
])

train_dataset = datasets.CIFAR10(root="../cifar-10-python", train=True,
                                 download=True, transform=train_transform)
test_dataset  = datasets.CIFAR10(root="../cifar-10-python", train=False,
                                 download=True, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True,  num_workers=2)
test_loader  = DataLoader(test_dataset,  batch_size=64, shuffle=False, num_workers=2)

In [5]:
class BasicBlock(nn.Module):
    """
    Enhanced Basic Block with SE (Squeeze-and-Excitation) attention
    """
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3,
            stride=stride, padding=1, bias=False
        )
        self.bn1 = nn.BatchNorm2d(planes)
        
        self.conv2 = nn.Conv2d(
            planes, planes, kernel_size=3,
            stride=1, padding=1, bias=False
        )
        self.bn2 = nn.BatchNorm2d(planes)
        
        # SE attention module
        self.se = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(planes, planes // 16, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(planes // 16, planes, kernel_size=1),
            nn.Sigmoid()
        )
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes * self.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(
                    in_planes, planes * self.expansion,
                    kernel_size=1, stride=stride, bias=False
                ),
                nn.BatchNorm2d(planes * self.expansion)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        
        # Apply SE attention
        out = out * self.se(out)
        
        out += self.shortcut(x)
        return F.relu(out)


class ResNet(nn.Module):
    """
    Improved ResNet with slightly deeper architecture and dropout
    """
    def __init__(self, block, num_blocks, num_classes=10, dropout_rate=0.1):
        super(ResNet, self).__init__()
        self.in_planes = 32
        
        # Initial convolution with more filters
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1,
                               padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(32)
        
        # Deeper layers - slightly increase depth (2,2,2,2)
        self.layer1 = self._make_layer(block, 32, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 64, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 128, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 256, num_blocks[3], stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        # Add dropout for regularization
        self.dropout = nn.Dropout(dropout_rate)
        
        self.fc = nn.Linear(256 * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1]*(num_blocks - 1)
        layers = []
        for s in strides:
            layers.append(block(self.in_planes, planes, s))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.dropout(out)
        out = self.fc(out)
        return out

def ImprovedResNet():
    # Increase depth slightly (2,2,2,2) instead of (1,1,1,1)
    return ResNet(BasicBlock, [4,4,4,3], num_classes=10, dropout_rate=0.2)

# Calculate parameter count
model = ImprovedResNet().to(device)
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Improved model param count: {num_params:,} (<= 5,000,000)")

Improved model param count: 4,791,314 (<= 5,000,000)


In [6]:
def evaluate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for imgs, labels in loader:
            # Move your data to GPU
            imgs, labels = imgs.to(device), labels.to(device)

            outputs = model(imgs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * imgs.size(0)
            _, pred = outputs.max(1)
            correct += pred.eq(labels).sum().item()
            total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = 100.0 * correct / total
    return epoch_loss, epoch_acc

In [7]:
best_model = ImprovedResNet().to(device)
# best_model.load_state_dict(torch.load("best_model_dropoutIncrease_augmentation.pth"))
best_model.load_state_dict(torch.load("resnet_model_200epoch_mixcut.pth"))
best_model.eval()
criterion = nn.CrossEntropyLoss()

final_loss, final_acc = evaluate(best_model, test_loader, criterion)
print(f"Final Test Accuracy (Best Model): {final_acc:.2f}%")

Final Test Accuracy (Best Model): 96.13%


In [8]:
custom_test_path = "../cifar_test_nolabel.pkl"

In [None]:

try:
    with open(custom_test_path, 'rb') as f:
        custom_test_data = pickle.load(f)

    # 'b'data'' might be shape (N, 3*32*32)
    # 'b'ids''  is a list/array of IDs for each image
    def custom_transform(img_np):
        # reshape if necessary
        if img_np.shape == (3*32*32,):
            img_np = img_np.reshape(3,32,32).transpose(1,2,0)
        # Convert to float tensor, scale to [0,1]
        tensor_img = torch.from_numpy(img_np.transpose(2,0,1)).float() / 255.0
        # Apply same normalization as CIFAR-10
        for i in range(3):
            tensor_img[i,:,:] = (tensor_img[i,:,:] - MEAN[i]) / STD[i]
        return tensor_img

    unlabeled_imgs = custom_test_data[b'data']
    unlabeled_ids  = custom_test_data[b'ids']

    best_model.eval()
    predictions = []

    with torch.no_grad():
        for i in range(len(unlabeled_imgs)):
            img_tensor = custom_transform(unlabeled_imgs[i]).unsqueeze(0).to(device)
            output = best_model(img_tensor)
            _, pred_label = torch.max(output, 1)
            predictions.append((unlabeled_ids[i], pred_label.item()))

    # Write CSV
    csv_filename = "submission_confidence_seb_200.csv"
    with open(csv_filename, 'w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(["ID", "Labels"])  # columns
        for (img_id, label) in predictions:
            writer.writerow([img_id, label])

    print(f"CSV saved as '{csv_filename}' with {len(predictions)} entries.")

except FileNotFoundError:
    print(f"File '{custom_test_path}' not found. Please upload your custom test dataset.")

CSV saved as 'submission_confidence_seb_350_samridh.csv' with 10000 entries.
