# How to present CV with Neptune

## Before you start

### Install dependencies

In [None]:
! pip install neptune-client torch==1.10.2 torchvision==0.11.3 scikit-learn==1.0.2

**Import libraries**

In [None]:
import neptune.new as neptune
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import SubsetRandomSampler, DataLoader
from torchvision import datasets, transforms
from sklearn.model_selection import KFold
from statistics import mean

## Step 1: Create a Neptune *Run*

To log metadata to the Neptune project, you need the `project name` and the `api_token`.

To make this example easy to follow, we have created a public project **'common/optuna-integration'** and a shared user **'neptuner'** with the API token **'ANONYMOUS'**. As you will see in the code cell below.

**(Optional)** To log to your Neptune project:

* [Create a Neptune account](https://app.neptune.ai/register/)

* [Find your API token](https://docs.neptune.ai/getting-started/installation#authentication-neptune-api-token)
* [Find your project name](https://docs.neptune.ai/getting-started/installation#setting-the-project-name)

Pass your credentials to project and api_token arguments of neptune.init()

`run = neptune.init(api_token='<YOUR_API_TOKEN>', project='<YOUR_WORKSPACE/YOUR_PROJECT>')` # pass your credentials


In [None]:
run = neptune.init(
    project="common/showroom",
    api_token="ANONYMOUS",
    tags=["Colab Notebook", "cross-validation"],
)

Running this cell creates a Run in Neptune, and you can log model building metadata to it.

**Click on the link above to open the Run in Neptune UI.** For now, it is empty, but you should keep the tab open to see what happens next

## Step 2: Log config and hyperparameters

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

### Log Hyperparameters

In [None]:
parameters = {
    "epochs": 10,
    "lr": 1e-2,
    "bs": 10,
    "input_sz": 32 * 32 * 3,
    "n_classes": 10,
    "k_folds": 5,
    "model_name": "checkpoint.pth",
    "seed": 42,
}

In [None]:
run["global/params"] = parameters

### Log Config
Model and Dataset

In [None]:
class BaseModel(nn.Module):
    def __init__(self, input_sz, hidden_dim, n_classes):
        super(BaseModel, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_sz, hidden_dim * 2),
            nn.ReLU(),
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, n_classes),
        )

    def forward(self, input):
        x = input.view(-1, 32 * 32 * 3)
        return self.main(x)

In [None]:
torch.manual_seed(parameters["seed"])
model = BaseModel(
    parameters["input_sz"], parameters["input_sz"], parameters["n_classes"]
).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=parameters["lr"])

Log model, criterion and optimizer name

In [None]:
run["global/config/model"] = type(model).__name__
run["global/config/criterion"] = type(criterion).__name__
run["global/config/optimizer"] = type(optimizer).__name__

In [None]:
data_dir = "data/CIFAR10"
compressed_ds = "./data/CIFAR10/cifar-10-python.tar.gz"
data_tfms = {
    "train": transforms.Compose(
        [
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
}

In [None]:
trainset = datasets.CIFAR10(data_dir, transform=data_tfms["train"], download=True)

validset = datasets.CIFAR10(
    data_dir, train=False, transform=data_tfms["train"], download=True
)

dataset_size = len(trainset)

Log dataset details

In [None]:
run["global/dataset/CIFAR-10"].track_files(data_dir)
run["global/dataset/dataset_transforms"] = data_tfms
run["global/dataset/dataset_size"] = dataset_size

## Step 3: Log losses and metrics 
Training Loop

In [None]:
splits = KFold(n_splits=parameters["k_folds"], shuffle=True)
epoch_acc_list, epoch_loss_list = [], []

In [None]:
for fold, (train_ids, _) in enumerate(splits.split(trainset)):
    train_sampler = SubsetRandomSampler(train_ids)
    train_loader = DataLoader(
        trainset, batch_size=parameters["bs"], sampler=train_sampler
    )
    for epoch in range(parameters["epochs"]):
        epoch_acc, epoch_loss = 0, 0.0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model.forward(x)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, y)
            acc = (torch.sum(preds == y.data)) / len(x)

            # Log batch loss and acc
            run[f"fold_{fold}/training/batch/loss"].log(loss)
            run[f"fold_{fold}/training/batch/acc"].log(acc)

            loss.backward()
            optimizer.step()

        epoch_acc += torch.sum(preds == y.data).item()
        epoch_loss += loss.item() * x.size(0)

    epoch_acc_list.append((epoch_acc / len(train_loader.sampler)) * 100)
    epoch_loss_list.append(epoch_loss / len(train_loader.sampler))

    # Log model checkpoint
    torch.save(model.state_dict(), f"./{parameters['model_name']}")
    run[f"fold_{fold}/checkpoint"].upload(parameters["model_name"])

In [None]:
# log global acc and loss
run["global/metrics/train/mean_acc"] = mean(epoch_acc_list)
run["global/metrics/train/mean_loss"] = mean(epoch_loss_list)

# Stop run

<font color=red>**Warning:**</font><br>
Once you are done logging, you should stop tracking the run using the `stop()` method.
This is needed only while logging from a notebook environment. While logging through a script, Neptune automatically stops tracking once the script has completed execution.

In [None]:
run.stop()