In [1]:
import torch
import torch.nn as nn
import pandas as pd 
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
import matplotlib.pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
df = pd.read_csv('fashion-mnist_train.csv')
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
X = df.iloc[:,1:].values
y = df.iloc[:,0].values

In [5]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [6]:
X_train = X_train/255.0
X_test = X_test/255.0

In [None]:
from torchvision import transforms 
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomRotation(10),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAffine(0,translate=(0.1,0.1)),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

In [None]:
class CustomDataset(Dataset):
    def __init__(self,features,labels,transform=None):
        self.features = torch.tensor(features,dtype=torch.float32).reshape(-1,1,28,28)
        self.labels = torch.tensor(labels,dtype=torch.long)
        self.transform = transform

    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, index):
        feature,label = self.features[index] , self.labels[index]
        if self.transform:
            feature = self.transform(feature.squeeze(0).numpy())
        return feature,label
    

In [None]:
train_dataset = CustomDataset(X_train,y_train,transform=train_transform)
test_dataset = CustomDataset(X_test,y_test,transform=test_transform)

In [24]:
train_dataloader = DataLoader(train_dataset,batch_size=32,shuffle=True,pin_memory=True)
test_dataloader = DataLoader(test_dataset,batch_size=32,shuffle=False,pin_memory=True)

In [None]:
class DynamicCNN(nn.Module):
    def __init__(self,num_conv_layers,num_filters,kernel_size,num_fc_layers,fc_layer_size,dropout_rate):
        super(DynamicCNN,self).__init__()
        layers = []
        in_channels = 1 ## grayscale images have 1 input channel

        for _ in range(num_conv_layers):
            layers.append(nn.Conv2d(in_channels,num_filters,kernel_size=kernel_size,padding='same'))
            layers.append(nn.ReLU())
            layers.append(nn.BatchNorm2d(num_filters))
            layers.append(nn.MaxPool2d(kernel_size=2,stride=2))
            in_channels = num_filters ## update input channels for the next layer
        self.features = nn.Sequential(*layers)

        fc_layers = [nn.Flatten()]
        input_size = num_filters * (28 // (2 ** num_conv_layers)) ** 2
        for _ in range(num_fc_layers):
            fc_layers.append(nn.Linear(input_size,fc_layer_size))
            fc_layers.append(nn.ReLU())
            fc_layers.append(nn.Dropout(dropout_rate))
            input_size = fc_layer_size
        fc_layers.append(nn.Linear(input_size,10)) ## Final layer for 10 classes

In [None]:
def objective(trial):
    ## Hyperparameters to tune 
    num_conv_layers = trial.suggest_int('num_conv_layers',1,3)
    num_filters = trial.suggest_categorical('num_filters',[16,32,64,128])
    kernel_size = trial.suggest_categorical('kernel_size',[3,5])
    num_fc_layers = trial.suggest_int('num_fc_layers',1,3)
    fc_layer_size = trial.suggest_categorical('fc_layer_size',[64,128,256])
    dropout_rate = trial.suggest_uniform('dropout_rate',0.2,0.5)
    weight_decay = trial.suggest_loguniform('weight_decay',1e-5,1e-2)
    learning_rate = trial.suggest_loguniform('learning_rate',1e-4,1e-2)
    optimizer_name = trial.suggest_categorical('optimizer',['SGD','Adam','RMSprop'])
    batch_size = trial.suggest_categorical('batch_size',[32,64,128])
    num_epochs = trial.suggest_int('num_epochs',10,30)

    model = DynamicCNN(num_conv_layers,num_filters,kernel_size,num_fc_layers,fc_layer_size,dropout_rate).to(device)

    train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
    test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=True)

    if optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    elif optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    elif optimizer_name == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(),lr=learning_rate,weight_decay=weight_decay)
    
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        for batch_features,batch_labels in train_loader:
            batch_features , batch_labels = batch_features.to(device),batch_labels.to(device)
            optimizer.zero_grad()
            outputs = model(batch_features)
            loss = criterion(outputs,batch_labels)
            loss.backward()
            optimizer.step()
    model.eval()
    correct = 0 
    total = 0
    with torch.no_grad():
        for batch_features,batch_labels in test_loader:
            batch_features,batch_labels = batch_features.to(device),batch_labels.to(device)
            outputs = model(batch_features)
            _,predicted = torch.max(outputs,1)
            total += batch_labels.size()
            correct += (predicted == batch_labels).sum().item()

    accuracy = correct/total
    return accuracy

In [None]:
import optuna 
pruner = optuna.pruners.MedianPruner()
study = optuna.create_study(direction='maximize',pruner=pruner)
study.optimize(objective,n_trials=50)