# 1. Import required libraries

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
from torch.optim import SGD, Adam
from torch.nn import CrossEntropyLoss
import torchmetrics
import logging
from itertools import product

# 2. Define MLP

In [2]:
class MLP(nn.Module):
    def __init__(self, n_hidden_nodes, n_classes, image_width=32, image_height=32, color_channels=3, n_hidden_layers=1):
        super(MLP, self).__init__()
        input_size = image_width * image_height * color_channels
        self.layers = nn.Sequential(
            nn.Linear(input_size, n_hidden_nodes),
            nn.ReLU(),
            nn.Linear(n_hidden_nodes, n_classes)
        )
        
        if n_hidden_layers > 1:
            self.added_layers = nn.Sequential()
            for i in range(n_hidden_layers - 1):
                self.added_layers.add_module(str(2 * (i + 1) + 1), nn.Linear(n_hidden_nodes, n_hidden_nodes))
                self.added_layers.add_module(str(2 * (i + 1) + 2), nn.ReLU())
            layers = list(self.layers)
            layers.insert(2, self.added_layers)
            self.layers = nn.Sequential(*layers)
            
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.layers(x)

In [3]:
import torchvision
from torchvision import transforms
from torch.utils.data import DataLoader

class DataLoaderFactory:
    def __init__(self, root='./data', transform=None, batch_size=32, num_workers=2):
        self.root = root
        self.transform = transform or transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, padding=4),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        self.batch_size = batch_size
        self.num_workers = num_workers

    def load_data(self, train=True):
        dataset = torchvision.datasets.CIFAR10(
            root=self.root, 
            train=train, 
            download=True, 
            transform=self.transform
        )
        return DataLoader(
            dataset, 
            batch_size=self.batch_size, 
            shuffle=train, 
            num_workers=self.num_workers
        )

# Usage
data_loader_factory = DataLoaderFactory()
train_loader = data_loader_factory.load_data(train=True)
test_loader = data_loader_factory.load_data(train=False)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [None]:
import torch
import torchmetrics
import logging
from torch import nn
from torch.optim import SGD
from torch.nn import CrossEntropyLoss

class Model:
    def __init__(self, model=None, criterion=None, optimizer=None, dataloader=None):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = model or MLP(n_classes=10, n_hidden_nodes=100, image_width=32, image_height=32, color_channels=3)
        self.model.to(self.device)
        self.criterion = criterion or CrossEntropyLoss()
        self.optimizer = optimizer or SGD(self.model.parameters(), lr=0.005)
        self.dataloader = dataloader or DataLoaderFactory(root='./data', batch_size=32, num_workers=4)
        self.epochs = 0

    def train(self, epochs=10, write_log=False):
        self.epochs = epochs
        train_loader = self.dataloader.load_data(train=True)
        train_accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(self.device)
        test_loader = self.dataloader.load_data(train=False)
        test_accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(self.device)

        if write_log:
            num_layers = len(list(self.model.children())) // 2 + 1
            logging.basicConfig(filename=f'training_mlp_{num_layers}_hidden_layers.log', level=logging.INFO)
            logging.info("Training started\n")

        for epoch in range(epochs):
            running_loss = 0.0
            self.model.train()
            train_accuracy.reset()

            for inputs, labels in train_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()
                running_loss += loss.item()
                train_accuracy.update(outputs.argmax(dim=1), labels)

            final_train_accuracy = train_accuracy.compute()
            print(f'Epoch [{epoch+1}/{epochs}]\n', 
                  f'Loss: {running_loss/len(train_loader):.4f}\n',
                  f'Train Accuracy: {final_train_accuracy * 100:.2f}\n',
                  '--------------------------------------------------\n')

            final_test_accuracy = self.evaluate(test_loader=test_loader, test_accuracy=test_accuracy)
            if write_log:
                logging.info(f"Epoch: {epoch + 1}, Loss: {running_loss/len(train_loader):.4f}, Train accuracy: {final_train_accuracy}\t|\t Test accuracy: {final_test_accuracy}\n")

        print('======================Finished=========================')
        return running_loss / len(train_loader)

    def evaluate(self, test_loader=None, test_accuracy=None):
        test_loader = test_loader or self.dataloader.load_data(train=False)
        test_accuracy = test_accuracy or torchmetrics.Accuracy(task="multiclass", num_classes=10).to(self.device)
        self.model.eval()
        test_accuracy.reset()

        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                test_accuracy.update(outputs.argmax(dim=1), labels)

        final_test_accuracy = test_accuracy.compute()
        print(f'Test Accuracy: {final_test_accuracy * 100:.2f}\n',
              '--------------------------------------------------\n')
        return final_test_accuracy

    def predict(self, data):
        predictions = []
        self.model.eval()

        with torch.no_grad():
            for input in data:
                input = input.to(self.device)
                outputs = self.model(input)
                predictions.append(outputs.argmax(dim=1))

        return predictions

    def save(self, name=None):
        parent = 'models'
        checkpoint_path = name or 'mlp_checkpoint.pth'
        path = f'{parent}/{checkpoint_path}'
        checkpoint = {
            'model_state_dict': self.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'criterion_state_dict': self.criterion.state_dict(),
            'epochs': self.epochs
        }
        torch.save(checkpoint, path)
        print(f"Checkpoint saved to {path}")

In [6]:
clf = Model()
clf.train(epochs=1, write_log=True)

Files already downloaded and verified
Files already downloaded and verified
Epoch [1/1]
 Loss: 1.9885
 Train Accuracy: 28.82
 --------------------------------------------------

Test Accuracy: 34.21
 --------------------------------------------------



1.9885345200888256

In [None]:
# clf.evaluate()

In [None]:
def hyper_tuning(model, epochs=1):
    param_grid = {
        'batch_size': [16, 32, 64],
        'learning_rate': [1e-3, 5e-3],
        'optimizer': ['SGD', 'Adam']
    }

    param_grid = {
        'batch_size': [16],
        'learning_rate': [5e-3],
        'optimizer': ['Adam']
    }

    param_combinations = list(product(*param_grid.values()))

    best_params = None
    best_loss = float('inf')
    for params in param_combinations:
        batch_size, lr, opt = params
        print(f"\nTesting with batch size={batch_size}, learning rate={lr}, optimizer={opt}")
        
        dataloader = DataLoaderFactory(root='./data', batch_size=batch_size)

        if opt == 'SGD':
            optimizer = SGD(model.parameters(), lr=lr)
        elif opt == 'Adam':
            optimizer = Adam(model.parameters(), lr=lr)

        clf = Model(model=model, dataloader=dataloader, optimizer=optimizer)
        avg_loss = clf.train(epochs=epochs)
        
        if avg_loss < best_loss:
            best_loss = avg_loss
            best_params = {
                'batch_size': batch_size,
                'learning_rate': lr,
                'optimizer': opt
            }

    return best_params, best_loss


In [None]:
model = MLP(n_classes=10, n_hidden_nodes=100, image_width=32, image_height=32, color_channels=3)

hyper_tuning(model)

In [None]:
clf.save('mlp_checkpoint.pth')

In [None]:
# Saving the refactored code into a Python file as requested.

# refactored_code = """
# Import necessary libraries
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision
from torch.optim import SGD, Adam
from torch.nn import CrossEntropyLoss
import torchmetrics
import logging
from itertools import product

# Define MLP model
class MLP(nn.Module):
    def __init__(self, n_hidden_nodes, n_classes, image_width=32, image_height=32, color_channels=3, n_hidden_layers=1):
        super(MLP, self).__init__()
        input_size = image_width * image_height * color_channels
        self.layers = nn.Sequential(
            nn.Linear(input_size, n_hidden_nodes),
            nn.ReLU(),
            nn.Linear(n_hidden_nodes, n_classes)
        )
        
        if n_hidden_layers != 1:
            self.added_layers = nn.Sequential()
            for i in range(n_hidden_layers - 1):
                self.added_layers.add_module(str(2 * (i + 1) + 1), nn.Linear(n_hidden_nodes, n_hidden_nodes))
                self.added_layers.add_module(str(2 * (i + 1) + 2), nn.ReLU())
            layers = list(self.layers)
            layers.insert(2, self.added_layers)
            self.layers = nn.Sequential(*layers)
            
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.layers(x)


# Data loading utility
class Data:
    def __init__(self, root='./data', batch_size=32, num_workers=2):
        self.root = root
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, padding=4),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

    def load_data(self, train=True):
        dataset = torchvision.datasets.CIFAR10(root=self.root, train=train, download=True, transform=self.transform)
        return DataLoader(dataset, batch_size=self.batch_size, shuffle=train, num_workers=self.num_workers)

# Model training, evaluation, and saving
class ModelTrainer:
    def __init__(self, model, dataloader, optimizer, criterion=CrossEntropyLoss(), epochs=10, log_path=None):
        self.model = model.to(self._get_device())
        self.dataloader = dataloader
        self.optimizer = optimizer
        self.criterion = criterion
        self.epochs = epochs
        self.device = self._get_device()
        if log_path:
            logging.basicConfig(filename=log_path, level=logging.INFO)
    
    def _get_device(self):
        return torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def train(self):
        train_loader = self.dataloader.load_data(train=True)
        accuracy_metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(self.device)

        for epoch in range(self.epochs):
            running_loss = 0.0
            self.model.train()
            accuracy_metric.reset()

            for inputs, labels in train_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                self.optimizer.zero_grad()

                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)
                loss.backward()
                self.optimizer.step()

                running_loss += loss.item()
                accuracy_metric.update(outputs.argmax(dim=1), labels)

            avg_loss = running_loss / len(train_loader)
            avg_accuracy = accuracy_metric.compute().item() * 100
            print(f'Epoch [{epoch+1}/{self.epochs}], Loss: {avg_loss:.4f}, Accuracy: {avg_accuracy:.2f}%')
            if logging.getLogger().isEnabledFor(logging.INFO):
                logging.info(f"Epoch {epoch+1}/{self.epochs}: Loss={avg_loss:.4f}, Accuracy={avg_accuracy:.2f}%")

    def evaluate(self):
        test_loader = self.dataloader.load_data(train=False)
        accuracy_metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(self.device)
        self.model.eval()

        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = self.model(inputs)
                accuracy_metric.update(outputs.argmax(dim=1), labels)

        accuracy = accuracy_metric.compute().item() * 100
        print(f'Test Accuracy: {accuracy:.2f}%')
        return accuracy

    def save_model(self, path='mlp_checkpoint.pth'):
        torch.save(self.model.state_dict(), path)
        print(f'Model saved to {path}')

# Hyperparameter tuning function
def hyperparameter_tuning(model, param_grid, dataloader, epochs=1):
    best_params = None
    best_loss = float('inf')
    
    for batch_size, lr, opt_name in product(*param_grid.values()):
        print(f'\\nTesting with batch_size={batch_size}, lr={lr}, optimizer={opt_name}')
        
        optimizer = SGD(model.parameters(), lr=lr) if opt_name == 'SGD' else Adam(model.parameters(), lr=lr)
        dataloader.batch_size = batch_size
        trainer = ModelTrainer(model, dataloader, optimizer, epochs=epochs)
        
        loss = trainer.train()
        if loss < best_loss:
            best_loss = loss
            best_params = {'batch_size': batch_size, 'lr': lr, 'optimizer': opt_name}

    print(f'Best params: {best_params}, Loss: {best_loss:.4f}')
    return best_params

# # Execution script
# if __name__ == "__main__":
#     model = MLP(n_hidden_nodes=100, n_classes=10)
#     data = Data(batch_size=32)
#     optimizer = SGD(model.parameters(), lr=0.005)
#     trainer = ModelTrainer(model, data, optimizer, epochs=10, log_path="training.log")

#     # Training
#     trainer.train()
    
#     # Evaluation
#     trainer.evaluate()

#     # Hyperparameter tuning
#     param_grid = {
#         'batch_size': [16, 32],
#         'lr': [1e-3, 5e-3],
#         'optimizer': ['SGD', 'Adam']
#     }
#     hyperparameter_tuning(model, param_grid, data, epochs=1)

#     # Save model
#     trainer.save_model("mlp_checkpoint.pth")
# # # """

# # Write the refactored code to a Python file
# with open("/mnt/data/refactored_code.py", "w", encoding="utf-8") as file:
#     file.write(refactored_code)

# "/mnt/data/refactored_code.py"


In [None]:
model = MLP(n_hidden_nodes=100, n_classes=10, n_hidden_layers=2)
data = Data(batch_size=32)
optimizer = SGD(model.parameters(), lr=0.005)
trainer = ModelTrainer(model, data, optimizer, epochs=1, log_path="training.log")

# Training
trainer.train()