# Cerebro

#### <font color='blue'>Imports</font>

In [None]:
from cerebro.etl.etl_spec import ETLSpec
from cerebro.experiment import Experiment
from cerebro.mop.minibatch_spec import MiniBatchSpec

### <font color='blue'> Initialize Data Preprocessing </font>

In [None]:
class ImagenetETLSpec(ETLSpec):
    def __init__(self):
        pass
        
    def initialize_worker(self):
        pass

    def read_misc(self, misc_path):
        pass

    def set_features(self):
        return [False, False, True, False, False]
    
    def row_prep(self, row, mode, object_dir):
        import torch
        import numpy as np
        from PIL import Image

        input_image_path = object_dir + "/" + str(row["filepath"])

        pil_image = Image.open(input_image_path)
        image = np.asarray(pil_image.convert('RGB').resize((112, 112)))
        image = image / 255.0
        image = image - [0.485, 0.456, 0.406]
        image = image / [0.229, 0.224, 0.225]

        torch_image = torch.from_numpy(image).float()
        image = torch.reshape(torch_image, (torch_image.shape[2], torch_image.shape[0], torch_image.shape[1]))
        if mode == 'predict':
            return image, None
        else:
            label = torch.tensor(row["label"])
            return image, label

### <font color='blue'> Initialize Model Building </font>

In [None]:
class ImagenetTrainingSpec(MiniBatchSpec):
    def __init__(self):
        import torch
        import torch.nn as nn

        self.criterion = nn.CrossEntropyLoss()
        self.log_softmax = torch.nn.LogSoftmax()

    def initialize_worker(self):
        pass
    
    def read_misc(self, misc_path):
        import os
        import json
        
        path = os.path.join(misc_path, "imagenet_label_mapping.json")
        with open(path) as f:
            self.class_to_idx = json.load(f) 

    def create_model_components(self, hyperparams):
        import torch
        import warnings
        from torchvision import models
        warnings.filterwarnings("ignore")

        learning_rate = hyperparams["learning_rate"]
        lambda_value = hyperparams["lambda_value"]
        model_type = hyperparams["model_type"]

        if model_type == "resnet50":
            model = models.resnet50(pretrained=False)
        elif model_type == "vgg16":
            model = models.vgg16(pretrained=False)
        else:
            model = None

        # Define the optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=learning_rate,
                                     weight_decay=lambda_value)

        model_object = {
            "imagenet_model": model,
            "optimizer": optimizer
        }

        return model_object

    def accuracy(self, output, target, topk=(1,), binary=False):
        import torch

        """Computes the precision@k for the specified values of k"""
        if binary:
            batch_size = target.size(0)
            _, pred = torch.max(output.data, 1)
            correct = (pred == target).sum().item()
            res = [torch.tensor(correct / batch_size)]
        else:
            maxk = max(topk)
            maxk = min(maxk, output.shape[1])
            batch_size = target.size(0)

            _, pred = output.topk(maxk, 1, True, True)
            pred = pred.t()
            correct = pred.eq(target.view(1, -1).expand_as(pred))
            res = []
            for k in topk:
                correct_k = correct[:k].reshape(-1).float().sum(0)
                res.append(correct_k.mul_(1.0 / batch_size))
        return res

    def metrics_agg(self, mode, hyperparams, metrics):
        batch_size = hyperparams["batch_size"]
        updated_metrics = {}
        if mode == "train":
            updated_metrics = {
                "epoch_loss": sum(metrics["minibatch_loss"]) / (batch_size * len(metrics)),
                "epoch_top_1_acc": sum(metrics["minibatch_top_1_acc"]) / len(metrics),
                "epoch_top_5_acc": sum(metrics["minibatch_top_5_acc"]) / len(metrics)
            }

            stats = "Train Metrics: epoch_loss: %.4f, epoch_top_1_acc: %5.4f, , epoch_top_5_acc: %5.4f"\
                    % (updated_metrics["epoch_loss"], updated_metrics["epoch_top_1_acc"],
                       updated_metrics["epoch_top_5_acc"])
            print(stats)

        elif mode == "val" or mode == "test":
            updated_metrics = {
                "epoch_loss": sum(metrics["epoch_loss"]) / (batch_size * len(metrics)),
                "epoch_top_1_acc": sum(metrics["epoch_top_1_acc"]) / len(metrics),
                "epoch_top_5_acc": sum(metrics["epoch_top_5_acc"]) / len(metrics)
            }

            stats = "Validation/Test Metrics:  loss: %.4f, top_1_acc: %5.4f, , top_5_acc: %5.4f" \
                    % (updated_metrics["epoch_loss"], updated_metrics["epoch_top_1_acc"],
                       updated_metrics["epoch_top_5_acc"])
            print(stats)

        return updated_metrics

    def train(self, model_object, minibatch, hyperparams, device):
        import torch

        model = model_object["imagenet_model"]
        optimizer = model_object["optimizer"]
        model.train()
        
        model.to(device)
        images, labels = minibatch[0].to(device), torch.tensor(minibatch[1]).to(device)
        outputs = model(images)
        loss = self.criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        outputs_softmax = self.log_softmax(outputs)
        
        # Print the loss value
        print(f'Loss: {loss.item()}')

        top_1_acc, top_5_acc = self.accuracy(outputs_softmax, labels, (1, 5))
        metrics = {
            "minibatch_loss": loss.item(),
            "minibatch_top_1_acc": top_1_acc.item(),
            "minibatch_top_5_acc": top_5_acc.item()
        }

        updated_model_object = {
            "imagenet_model": model,
            "optimizer": optimizer
        }
        return updated_model_object, metrics

    def val_test(self, model_object, minibatch, hyperparams, device):
        import torch

        model = model_object["imagenet_model"]
        model.eval()

        with torch.no_grad():
            images, labels = minibatch[0].to(device), torch.tensor(minibatch[1]).to(device)
            outputs = model(images)
            loss = self.criterion(outputs, labels)
            outputs_softmax = self.log_softmax(outputs)
            top_1_acc, top_5_acc = self.accuracy(outputs_softmax, labels, (1, 5))

        metrics = {
            "epoch_loss": loss.item(),
            "epoch_top_1_acc": top_1_acc.item(),
            "epoch_top_5_acc": top_5_acc.item()
        }

        return metrics

    def predict(self, model_object, minibatch, hyperparams, device):
        import torch

        model = model_object["imagenet_model"]
        model.eval()
        model.to(device)
    
        images = minibatch[0].to(device)
        with torch.no_grad():
            output = model(images)
    
        probabilities = torch.nn.functional.softmax(output, dim=0)
        top_probabilities, top_indices = torch.topk(probabilities, 1)
    
        # Convert indices to class labels
        idx_to_class = {idx: label for label, idx in self.class_to_idx.items()}
        top_classes = [idx_to_class[idx.item()] for idx in top_indices]
        top_probabilities = [i.item() for i in top_probabilities]
    
        return top_classes, top_probabilities

#### <font color='blue'> Model Building specifications </font>

In [None]:
num_epochs = 2
param_grid = {
    'batch_size': [128, 256],
    'learning_rate': [1e-2, 1e-3],
    'lambda_value': [1e-3, 1e-4],
    'model_type': ['resnet50']
}

#### <font color='blue'> Initialize Experiment </font>

In [None]:
params = {
    "train_main": "/voyager/ceph/users/prsridha/datasets/imagenet/Metadata/train.csv",
    "val_main": "/voyager/ceph/users/prsridha/datasets/imagenet/Metadata/valid.csv",
    "test_main": "/voyager/ceph/users/prsridha/datasets/imagenet/Metadata/valid.csv",
    "predict_main": "/voyager/ceph/users/prsridha/datasets/imagenet/Metadata/test.csv",
    "train_dir": "/voyager/ceph/users/prsridha/datasets/imagenet/Data/CLS-LOC/train",
    "val_dir": "/voyager/ceph/users/prsridha/datasets/imagenet/Data/CLS-LOC/val",
    "test_dir": "/voyager/ceph/users/prsridha/datasets/imagenet/Data/CLS-LOC/val",
    "predict_dir": "/voyager/ceph/users/prsridha/datasets/imagenet/Data/CLS-LOC/test",
    "misc": [
        "/voyager/ceph/users/prsridha/datasets/imagenet/Metadata/imagenet_label_mapping.json"
    ],
    "etl_dir": "/voyager/ceph/users/prsridha/datasets/imagenet/ProcessedData",
    "models_dir": "/voyager/ceph/users/prsridha/datasets/imagenet/SavedModels",
    "output_dir": "/voyager/ceph/users/prsridha/datasets/imagenet/SavedArtifacts"
}

In [None]:
experiment = Experiment(params)
imagenet_etl_spec = ImagenetETLSpec()
imagenet_training_spec = ImagenetTrainingSpec()

#### <font color='blue'> Run Data Preprocessing </font>

In [None]:
experiment.run_etl(imagenet_etl_spec, fraction=0.1)

#### <font color='blue'> Run Model Building </font>

In [None]:
experiment.run_fit(imagenet_training_spec, param_grid, num_epochs)

In [None]:
experiment.run_test(imagenet_training_spec, "model_3/model_object_3.pt", 128)

In [None]:
experiment.run_predict(imagenet_training_spec, "model_3/model_object_3.pt", 128)