# Cerebro

#### <font color='blue'>Imports</font>

In [None]:
from cerebro.etl.etl_spec import ETLSpec
from cerebro.experiment import Experiment
from cerebro.mop.sub_epoch_spec import SubEpochSpec

### <font color='blue'> Initialize Data Preprocessing </font>

In [None]:
class ImagenetETLSpec(ETLSpec):
    def __init__(self):
        pass
        
    def initialize_worker(self):
        pass

    def read_misc(self, misc_path):
        pass

    def set_features(self):
        return [False, False, True, False, False]
    
    def row_prep(self, row, mode, object_dir):
        import torch
        import numpy as np
        from PIL import Image

        input_image_path = object_dir + "/" + str(row["filepath"])

        pil_image = Image.open(input_image_path)
        image = np.asarray(pil_image.convert('RGB').resize((112, 112)))
        image = image / 255.0
        image = image - [0.485, 0.456, 0.406]
        image = image / [0.229, 0.224, 0.225]

        torch_image = torch.from_numpy(image).float()
        image = torch.reshape(torch_image, (torch_image.shape[2], torch_image.shape[0], torch_image.shape[1]))
        if mode == 'predict':
            return image, None
        else:
            label = torch.tensor(row["label"])
            return image, label

### <font color='blue'> Initialize Model Building </font>

In [None]:
class ImagenetTrainingSpec(SubEpochSpec):
    def __init__(self):
        import torch
        import torch.nn as nn

        self.criterion = nn.CrossEntropyLoss().cuda() if torch.cuda.is_available() else nn.CrossEntropyLoss()
        self.log_softmax = torch.nn.LogSoftmax().cuda() if torch.cuda.is_available() else torch.nn.LogSoftmax()

    def initialize_worker(self):
        pass

    def create_model_components(self, hyperparams):
        import torch
        from torchvision import models

        learning_rate = hyperparams["learning_rate"]
        lambda_value = hyperparams["lambda_value"]
        model_type = hyperparams["model_type"]

        if model_type == "resnet50":
            model = models.resnet50(pretrained=False)
        elif model_type == "vgg16":
            model = models.vgg16(pretrained=False)

        # Define the optimizer
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=learning_rate,
                                     weight_decay=lambda_value)

        model_object = {
            "imagenet_model": model,
            "optimizer": optimizer
        }

        return model_object

    def accuracy(self, output, target, topk=(1,), binary=False):
        import torch

        """Computes the precision@k for the specified values of k"""
        if binary:
            batch_size = target.size(0)
            _, pred = torch.max(output.data, 1)
            correct = (pred == target).sum().item()
            res = [torch.tensor(correct / batch_size)]
        else:
            maxk = max(topk)
            maxk = min(maxk, output.shape[1])
            batch_size = target.size(0)

            _, pred = output.topk(maxk, 1, True, True)
            pred = pred.t()
            correct = pred.eq(target.view(1, -1).expand_as(pred))
            res = []
            for k in topk:
                correct_k = correct[:k].reshape(-1).float().sum(0)
                res.append(correct_k.mul_(1.0 / batch_size))
        return res

    def metrics_agg(self, mode, hyperparams, metrics):
        batch_size = hyperparams["batch_size"]
        if mode == "train":
            updated_metrics = {
                "minibatch_loss": sum(metrics["minibatch_loss"]) / (batch_size * len(metrics)),
                "minibatch_top_1_acc": sum(metrics["minibatch_top_1_acc"]) / len(metrics),
                "minibatch_top_5_acc": sum(metrics["minibatch_top_5_acc"]) / len(metrics)
            }

            stats = "Train Metrics: minibatch_loss: %.4f, minibatch_top_1_acc: %5.4f, , minibatch_top_5_acc: %5.4f"\
                    % (updated_metrics["minibatch_loss"], updated_metrics["minibatch_top_1_acc"],
                       updated_metrics["minibatch_top_5_acc"])
            print(stats)

        elif mode == "val":
            updated_metrics = {
                "total_epoch_loss": sum(metrics["total_epoch_loss"]) / (batch_size * len(metrics)),
                "total_epoch_top_1_acc": sum(metrics["total_epoch_top_1_acc"]) / len(metrics),
                "total_epoch_top_5_acc": sum(metrics["total_epoch_top_5_acc"]) / len(metrics)
            }

            stats = "Validation Metrics:  loss: %.4f, top_1_acc: %5.4f, , top_5_acc: %5.4f" \
                    % (updated_metrics["total_epoch_loss"], updated_metrics["total_epoch_top_1_acc"],
                       updated_metrics["total_epoch_top_5_acc"])
            print(stats)

        return updated_metrics

    def train(self, model_object, minibatch, hyperparams, device):
        import torch

        model = model_object["imagenet_model"]
        optimizer = model_object["optimizer"]
        model.train()

        images, labels = minibatch[0].to(device), torch.tensor(minibatch[1]).to(device)
        outputs = model(images)
        loss = self.criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        model.zero_grad()
        optimizer.zero_grad()
        outputs_softmax = self.log_softmax(outputs)

        top_1_acc, top_5_acc = self.accuracy(outputs_softmax, labels, (1, 5))
        metrics = {
            "minibatch_loss": loss.item(),
            "minibatch_top_1_acc": top_1_acc.item(),
            "minibatch_top_5_acc": top_5_acc.item()
        }

        updated_model_object = {
            "imagenet_model": model,
            "optimizer": optimizer
        }
        return updated_model_object, metrics

    def val_test(self, model_object, minibatch, hyperparams, device):
        import torch

        model = model_object["imagenet_model"]
        model.eval()

        with torch.no_grad():
            images, labels = minibatch[0].to(device), torch.tensor(minibatch[1]).to(device)
            outputs = model(images)
            loss = self.criterion(outputs, labels)
            outputs_softmax = self.log_softmax(outputs)
            top_1_acc, top_5_acc = self.accuracy(outputs_softmax, labels, (1, 5))

        metrics = {
            "total_epoch_loss": loss.item(),
            "total_epoch_top_1_acc": top_1_acc.item(),
            "total_epoch_top_5_acc": top_5_acc.item()
        }

        return metrics

    def predict(self, model_object, dataloader, hyperparams, device):
        import math
        import torch

        predictions = []
        batch_size = hyperparams["batch_size"]

        model = model_object["imagenet_model"]

        model.eval()

        minibatch_total_step = math.ceil(len(dataloader.dataset) / batch_size)

        batch_num = 1
        with torch.no_grad():
            for batch in dataloader:
                images = batch[0].to(device)
                outputs = model(images)
                outputs_softmax = self.log_softmax(outputs)
                predictions.append(outputs_softmax)

                batch_num += 1

                stats = "Test step [%d/%d]" \
                        % (batch_num, minibatch_total_step)

            print("\r" + stats, end="")

        return predictions

#### <font color='blue'> Model Building specifications </font>

In [None]:
num_epochs = 2
param_grid = {
    'batch_size': [128, 256],
    'learning_rate': [1e-2, 1e-3],
    'lambda_value': [1e-3, 1e-4],
    'model_type': ['vgg16', 'resnet50']
}

#### <font color='blue'> Initialize Experiment </font>

In [None]:
params = {
    
}

In [None]:
experiment = Experiment(params)
imagenet_etl_spec = ImagenetETLSpec()
imagenet_training_spec = ImagenetTrainingSpec()

#### <font color='blue'> Run Data Preprocessing </font>

In [None]:
experiment.run_etl(imagenet_etl_spec, fraction=0.1)

#### <font color='blue'> Run Model Building </font>

In [None]:
experiment.run_fit(imagenet_training_spec, param_grid, num_epochs)

In [None]:
experiment.run_test("resnet50.pt", 64, "test_output.csv", imagenet_training_spec)

In [None]:
experiment.run_predict()