In [4]:
import torch
import torch.nn as nn
import pandas as pd 
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [6]:
df = pd.read_csv('fashion-mnist_train.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [7]:
X = df.iloc[:,1:].values
y = df.iloc[:,0].values

In [8]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [9]:
X_train = X_train/255.0
X_test = X_test/255.0

In [10]:
from torchvision import transforms 
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAffine(0,translate=(0.1,0.1)),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

In [11]:
class CustomDataset(Dataset):
    def __init__(self,features,labels,transform=None):
        self.features = torch.tensor(features,dtype=torch.float32).reshape(-1,1,28,28)
        self.labels = torch.tensor(labels,dtype=torch.long)
        self.transform = transform

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        feature,label = self.features[index] , self.labels[index]
        if self.transform:
            feature = self.transform(feature.squeeze(0).numpy())
        return feature,label
    

In [12]:
train_dataset = CustomDataset(X_train,y_train,transform=train_transform)
test_dataset = CustomDataset(X_test,y_test,transform=test_transform)

In [13]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True,pin_memory=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=False,pin_memory=True)

In [20]:
class DynamicCNN(nn.Module):
    def __init__(self,num_conv_layers,num_filters,kernel_size,num_fc_layers,fc_layer_size,dropout_rate):
        super(DynamicCNN,self).__init__()
        layers = []
        in_channels = 1 ## grayscale images have 1 input channel

        for _ in range(num_conv_layers):
            layers.append(nn.Conv2d(in_channels,num_filters,kernel_size=kernel_size,padding='same'))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm2d(num_filters))
            layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
            in_channels = num_filters ## update input channels for the next layer
        self.features = nn.Sequential(*layers)

        fc_layers = [nn.Flatten()]
        input_size = num_filters * (28 // (2 ** num_conv_layers)) ** 2
        for _ in range(num_fc_layers):
            fc_layers.append(nn.Linear(input_size,fc_layer_size))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout_rate))
            input_size = fc_layer_size
        fc_layers.append(nn.Linear(input_size,10)) ## Final layer for 10 classes
        self.classifier = nn.Sequential(*fc_layers)
    
    def forward(self,x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [23]:
def objective(trial):
    ## Hyperparameters to tune 
    num_conv_layers = trial.suggest_int('num_conv_layers',1,3)
    num_filters = trial.suggest_categorical('num_filters',[16,32,64,128])
    kernel_size = trial.suggest_categorical('kernel_size',[3,5])
    num_fc_layers = trial.suggest_int('num_fc_layers',1,3)
    fc_layer_size = trial.suggest_categorical('fc_layer_size',[64,128,256])
    dropout_rate = trial.suggest_uniform('dropout_rate',0.2,0.5)
    weight_decay = trial.suggest_loguniform('weight_decay',1e-5,1e-2)
    learning_rate = trial.suggest_loguniform('learning_rate',1e-4,1e-2)
    optimizer_name = trial.suggest_categorical('optimizer',['SGD','Adam','RMSprop'])
    batch_size = trial.suggest_categorical('batch_size',[32,64,128])
    num_epochs = trial.suggest_int('num_epochs',10,30)

    model = DynamicCNN(num_conv_layers,num_filters,kernel_size,num_fc_layers,fc_layer_size,dropout_rate).to(device)

    train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
    test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True)

    if optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    elif optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        for batch_features,batch_labels in train_loader:
            batch_features , batch_labels = batch_features.to(device),batch_labels.to(device)
            optimizer.zero_grad()
            outputs = model(batch_features)
            loss = criterion(outputs,batch_labels)
            loss.backward()
            optimizer.step()
    model.eval()
    correct = 0 
    total = 0
    with torch.no_grad():
        for batch_features,batch_labels in test_loader:
            batch_features,batch_labels = batch_features.to(device),batch_labels.to(device)
            outputs = model(batch_features)
            _,predicted = torch.max(outputs,1)
            total += batch_labels.size(0)
            correct += (predicted == batch_labels).sum().item()

    accuracy = correct/total
    return accuracy

In [25]:
import optuna 
pruner = optuna.pruners.MedianPruner()
study = optuna.create_study(direction='maximize',pruner=pruner)
study.optimize(objective,n_trials=10)

[I 2025-12-27 14:59:07,236] A new study created in memory with name: no-name-65e5740d-6e49-4646-b1c4-249a30a34b32
  dropout_rate = trial.suggest_uniform('dropout_rate',0.2,0.5)
  weight_decay = trial.suggest_loguniform('weight_decay',1e-5,1e-2)
  learning_rate = trial.suggest_loguniform('learning_rate',1e-4,1e-2)
[I 2025-12-27 15:12:55,362] Trial 0 finished with value: 0.8955833333333333 and parameters: {'num_conv_layers': 1, 'num_filters': 128, 'kernel_size': 5, 'num_fc_layers': 2, 'fc_layer_size': 128, 'dropout_rate': 0.30545283599145634, 'weight_decay': 0.007102889643460566, 'learning_rate': 0.003576714542511845, 'optimizer': 'SGD', 'batch_size': 64, 'num_epochs': 29}. Best is trial 0 with value: 0.8955833333333333.
[I 2025-12-27 15:18:06,896] Trial 1 finished with value: 0.8874166666666666 and parameters: {'num_conv_layers': 1, 'num_filters': 16, 'kernel_size': 3, 'num_fc_layers': 2, 'fc_layer_size': 64, 'dropout_rate': 0.2099522461140198, 'weight_decay': 0.0017825175075973163, 'le

In [26]:
best_params = study.best_params

In [27]:
best_params

{'num_conv_layers': 2,
 'num_filters': 16,
 'kernel_size': 5,
 'num_fc_layers': 1,
 'fc_layer_size': 256,
 'dropout_rate': 0.29471470140674794,
 'weight_decay': 4.8991073100665955e-05,
 'learning_rate': 0.0007660861657726818,
 'optimizer': 'RMSprop',
 'batch_size': 128,
 'num_epochs': 26}

In [28]:
# Train a model using the best hyperparameters found by Optuna
model = DynamicCNN(
    num_conv_layers=best_params['num_conv_layers'],
    num_filters=best_params['num_filters'],
    kernel_size=best_params['kernel_size'],
    num_fc_layers=best_params['num_fc_layers'],
    fc_layer_size=best_params['fc_layer_size'],
    dropout_rate=best_params['dropout_rate']
).to(device)

batch_size = best_params['batch_size']
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, pin_memory=True)

opt_name = best_params['optimizer']
lr = best_params['learning_rate']
wd = best_params['weight_decay']

if opt_name == 'SGD':
    optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=wd)
elif opt_name == 'Adam':
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
elif opt_name == 'RMSprop':
    optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=wd)

criterion = nn.CrossEntropyLoss()
num_epochs = best_params['num_epochs']

for epoch in range(1, num_epochs + 1):
    model.train()
    running_loss = 0.0
    for features, labels in train_loader:
        features, labels = features.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * features.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f'Epoch {epoch}/{num_epochs} - loss: {epoch_loss:.4f}')

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for features, labels in test_loader:
        features, labels = features.to(device), labels.to(device)
        outputs = model(features)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (preds == labels).sum().item()

test_accuracy = correct / total
print(f'Test accuracy: {test_accuracy:.4f}')

# Save trained model
torch.save(model.state_dict(), 'best_model.pth')

Epoch 1/26 - loss: 0.6261
Epoch 2/26 - loss: 0.4569
Epoch 3/26 - loss: 0.4134
Epoch 4/26 - loss: 0.3919
Epoch 5/26 - loss: 0.3728
Epoch 6/26 - loss: 0.3633
Epoch 7/26 - loss: 0.3502
Epoch 8/26 - loss: 0.3469
Epoch 9/26 - loss: 0.3387
Epoch 10/26 - loss: 0.3277
Epoch 11/26 - loss: 0.3237
Epoch 12/26 - loss: 0.3232
Epoch 13/26 - loss: 0.3155
Epoch 14/26 - loss: 0.3131
Epoch 15/26 - loss: 0.3109
Epoch 16/26 - loss: 0.3067
Epoch 17/26 - loss: 0.3036
Epoch 18/26 - loss: 0.3010
Epoch 19/26 - loss: 0.3004
Epoch 20/26 - loss: 0.2942
Epoch 21/26 - loss: 0.2935
Epoch 22/26 - loss: 0.2898
Epoch 23/26 - loss: 0.2910
Epoch 24/26 - loss: 0.2909
Epoch 25/26 - loss: 0.2851
Epoch 26/26 - loss: 0.2862
Test accuracy: 0.9127
