In [None]:
# Declaration of hyperparameters for training a neural network

BATCH_SIZE=64
EPOCHS=32
AUGEMENTATION_PERCENTAGE = 0.2
LEARNING_RATE=0.1
OPTIMIZER='sgd'
DROPOUT=0.1
# SCHEDULER='LambdaLR'
SCHEDULER='StepLR'
STEP_SIZE=16
GAMMA=0.1
# DECRESE_LR=0.95


In [227]:
# Load all dependencies
import torch
import torch.nn as nn
from torchvision import transforms, datasets

In [228]:
# Load CIFar-10 dataset

# Define the transformation
train_dataset = datasets.CIFAR10(root='./data',
                                             train=True,
                                             download=True,
                                             transform=transforms.Compose([
                                                 transforms.RandomApply(
                                                 [
                                                #  transforms.RandomCrop(size=24,padding=4),
                                                 transforms.RandomHorizontalFlip(),
                                                 transforms.RandomRotation(degrees=30),
                                                 transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
                                
                                             ],p=AUGEMENTATION_PERCENTAGE),
                                                    transforms.ToTensor(),
                                                    
                                                ]))
test_dataset = datasets.CIFAR10(root='./data',
                                            train=False,
                                            download=True,
                                            transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=BATCH_SIZE,
                                           shuffle=False)

print(train_loader.dataset)
print(train_loader.dataset.data.shape)


Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               RandomApply(
               p=0.2
               RandomHorizontalFlip(p=0.5)
               RandomRotation(degrees=[-30.0, 30.0], interpolation=nearest, expand=False, fill=0)
               ColorJitter(brightness=(0.8, 1.2), contrast=(0.8, 1.2), saturation=(0.8, 1.2), hue=(-0.2, 0.2))
           )
               ToTensor()
           )
(50000, 32, 32, 3)


In [229]:
class CNN(nn.Module):
    """4-conv original architecture (NOT copied from any public model)."""
    def __init__(self, p_drop=0.25):
        super().__init__()
        # 32x32 → 16x16
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        # 16x16 → 8x8
        self.conv_block2 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Dropout2d(p_drop),
        )
        # 8x8 → 4x4
        self.conv_block3 = nn.Sequential(
            nn.Conv2d(128, 256, 3, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, 3, padding=1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            nn.Dropout2d(p_drop),
        )
        # 4x4 → 1x1
        self.conv_block4 = nn.Sequential(
            nn.Conv2d(256, 512, 3, padding=1, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.AdaptiveAvgPool2d(1),
        )
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = self.conv_block4(x)
        x = x.flatten(1)
        return self.classifier(x)

In [230]:
DEVICE=torch.device("cuda" if torch.cuda.is_available() else "cpu")

model=CNN(p_drop=DROPOUT).to(DEVICE)
# optimizer=torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
if OPTIMIZER == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
elif OPTIMIZER == 'sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

scheduler=torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
# scheduler=torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda epoch: DECRESE_LR ** epoch)
criterion=nn.CrossEntropyLoss()

In [231]:
def train(model, train_loader,optimizer):
    model.train()
    for batch_idx,(image,label) in enumerate(train_loader):
        image=image.to(DEVICE)
        label=label.to(DEVICE)
        optimizer.zero_grad()

        output=model(image)
        loss=criterion(output,label)
        loss.backward()
        optimizer.step()
    scheduler.step()

In [232]:
def evaluate(model,test_loader):
    model.eval()
    test_loss=0
    correct=0
    with torch.no_grad():
        for image,label in test_loader:
            image=image.to(DEVICE)
            label=label.to(DEVICE)
            output=model(image)
            test_loss+=criterion(output,label).item()
            pred=output.argmax(dim=1, keepdim=True)
            correct+=pred.eq(label.view_as(pred)).sum().item()
    test_loss/=len(test_loader.dataset)
    test_accuracy=100. * correct / len(test_loader.dataset)
    return test_loss, test_accuracy

In [233]:

for epoch in range(1, EPOCHS + 1):
    train(model, train_loader, optimizer)
    test_loss,test_accuracy=evaluate(model, test_loader)
    print(f'Epoch: {epoch}, Test set: Average loss: {test_loss:.4f}, Accuracy: {test_accuracy:.2f}%\n')
test_loss,test_accuracy=evaluate(model, test_loader)

Epoch: 1, Test set: Average loss: 0.0222, Accuracy: 47.47%

Epoch: 2, Test set: Average loss: 0.0154, Accuracy: 65.71%

Epoch: 3, Test set: Average loss: 0.0119, Accuracy: 73.95%

Epoch: 4, Test set: Average loss: 0.0099, Accuracy: 78.15%

Epoch: 5, Test set: Average loss: 0.0087, Accuracy: 81.27%

Epoch: 6, Test set: Average loss: 0.0080, Accuracy: 82.59%

Epoch: 7, Test set: Average loss: 0.0075, Accuracy: 83.76%

Epoch: 8, Test set: Average loss: 0.0072, Accuracy: 84.88%

Epoch: 9, Test set: Average loss: 0.0071, Accuracy: 84.91%

Epoch: 10, Test set: Average loss: 0.0072, Accuracy: 84.52%

Epoch: 11, Test set: Average loss: 0.0065, Accuracy: 86.13%

Epoch: 12, Test set: Average loss: 0.0080, Accuracy: 84.77%

Epoch: 13, Test set: Average loss: 0.0065, Accuracy: 87.24%

Epoch: 14, Test set: Average loss: 0.0067, Accuracy: 86.81%

Epoch: 15, Test set: Average loss: 0.0065, Accuracy: 87.10%

Epoch: 16, Test set: Average loss: 0.0068, Accuracy: 87.22%

Epoch: 17, Test set: Average loss

In [234]:
f=open('./log.txt','a')
f.write(f'BATCH_SIZE={BATCH_SIZE},EPOCHS={EPOCHS},AUGEMENTATION_PERCENTAGE={AUGEMENTATION_PERCENTAGE},LEARNING_RATE={LEARNING_RATE},OPTIMIZER={OPTIMIZER},DROPOUT={DROPOUT},SCHEDULER={SCHEDULER}, STEP_SIZE={STEP_SIZE}, GAMMA={GAMMA}\n')
f.write(f'Test set: Average loss: {test_loss:.4f}, Accuracy: {test_accuracy:.2f}%\n')
f.close()