# ModelBox SDK Tutorial with Pytorch
This notebook demonstrates the use of the ModelBox Python SDK with PyTorch and explains the major concepts around working with models and checkpoints.
We will train the MNIST classifier, and use ModelBox to store the checkpoints during training and also store the final model which is meant to be deployed in production.

In [5]:
pip install torch torchvision

Collecting torchvision
  Downloading torchvision-0.13.1-cp310-cp310-manylinux1_x86_64.whl (19.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.1/19.1 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: torchvision
Successfully installed torchvision-0.13.1
Note: you may need to restart the kernel to use updated packages.


In [27]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
import time

# Import the ModelBox Client and initialize it
from modelbox.modelbox import ModelBoxClient, MLFramework, Artifact, ArtifactMime, MetricValue
client = ModelBoxClient(addr="172.21.0.2:8085")

In [19]:
resp = client.create_experiment(name="cifar10", owner="diptanuc@gmail.com", namespace="modelbox-demos", external_id="", framework=MLFramework.PYTORCH)
experiment_id = resp.experiment_id

In [23]:
data_dir = "data/CIFAR10"
compressed_ds = "./data/CIFAR10/cifar-10-python.tar.gz"
data_tfms = {
    "train": transforms.Compose(
        [
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]
    )
}

params = {
    "lr": 1e-2,
    "bs": 128,
    "input_sz": 32 * 32 * 3,
    "n_classes": 10,
    "model_filename": "basemodel",
}
# Step 2: Log config & pararameters
client.update_metadata(parent_id=experiment_id, key="dataset/path", val=data_dir)
client.update_metadata(parent_id=experiment_id, key='hyperparmas', val=params)
# TODO Log the data transformations - support tranforming python objects to class name as strings and their args as values


UpdateMetadataResponse(updated_at=seconds: 1663444672
nanos: 818657530
)

In [9]:
# Model & Dataset
class BaseModel(nn.Module):
    def __init__(self, input_sz, hidden_dim, n_classes):
        super(BaseModel, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(input_sz, hidden_dim * 2),
            nn.ReLU(),
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, n_classes),
        )

    def forward(self, input):
        x = input.view(-1, 32 * 32 * 3)
        return self.main(x)

In [25]:

trainset = datasets.CIFAR10(data_dir, transform=data_tfms["train"], download=True)
trainloader = torch.utils.data.DataLoader(
    trainset, batch_size=params["bs"], shuffle=True
)
dataset_size = {"train": len(trainset)}
# Log the dataset size
client.update_metadata(parent_id=experiment_id, key="dataset/size", val=dataset_size)


# Instatiate model, criterion and optimizer
model = BaseModel(params["input_sz"], params["input_sz"], params["n_classes"])
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=params["lr"])

Files already downloaded and verified


In [28]:
for i, (x, y) in enumerate(trainloader, 0):
    optimizer.zero_grad()
    outputs = model.forward(x)
    _, preds = torch.max(outputs, 1)
    loss = criterion(outputs, y)
    acc = (torch.sum(preds == y.data)) / len(x)
    client.log_metrics(parent_id=experiment_id,key='loss', value=MetricValue(step=i, wallclock_time=int(time.time()), value=loss))
    client.log_metrics(parent_id=experiment_id,key='accu', value=MetricValue(step=i, wallclock_time=int(time.time()), value=acc))
    loss.backward()
    optimizer.step()