### Save the MNIST model to Model Catalog

In this NB we go through the entire deployment life-cycle. We start from a trained model, we save it in the Model Catalog and then we deploy it as a REST service.

Finally, we test it with some samples images

In [1]:
import numpy as np

from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms

from pytorch_lightning import LightningModule

# from here we get MNIST dataset
from torchvision.datasets import MNIST

import ads
from ads import set_auth
from ads.common.model_metadata import UseCaseType, MetadataCustomCategory
from ads.model.framework.pytorch_model import PyTorchModel

import tempfile

In [2]:
print(ads.__version__)

2.6.8


In [3]:
# where we're storing the downloaded datase
PATH_DATASETS = "."

In [4]:
# we need the class to load the model after
class LitMNISTCNN(LightningModule):
    def __init__(self, data_dir=PATH_DATASETS, learning_rate=2e-4):

        super().__init__()

        # Set our init args as class attributes
        self.data_dir = data_dir
        self.learning_rate = learning_rate

        # dataset specific attributes
        self.num_classes = 10
        # shape of input images in MNIST
        self.dims = (1, 28, 28)
        channels, width, height = self.dims
        
        self.transform = transforms.Compose(
            [
                transforms.ToTensor(),
                # normalization is clarified here
                # https://discuss.pytorch.org/t/normalization-in-the-mnist-example/457
                transforms.Normalize((0.1307,), (0.3081,)),
            ]
        )

        # Define PyTorch model: a simple CNN
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=5),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Conv2d(32, 64, kernel_size=5),
            nn.MaxPool2d(kernel_size=2),
            nn.ReLU(),
            nn.Dropout(0.5),
            
            nn.Flatten(),
            nn.Linear(3*3*64, 256),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(256, self.num_classes),
        )

        self.val_accuracy = Accuracy()
        self.test_accuracy = Accuracy()

    def forward(self, x):
        # the model outputs logits not probabilities
        # this is better for numerical stability
        x = self.model(x)
        return F.log_softmax(x, dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.val_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", self.val_accuracy, prog_bar=True)

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        self.test_accuracy.update(preds, y)

        # Calling self.log will surface up scalars for you in TensorBoard
        self.log("test_loss", loss, prog_bar=True)
        self.log("test_acc", self.test_accuracy, prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer
    
    # we can remove the dataloader part here

#### Reload the model from a checkpoint and prepare to save to Model Catalog

In [5]:
model = LitMNISTCNN.load_from_checkpoint("./checkpoint_mnist/best.ckpt")

In [6]:
# get the summary of CNN architecture
model

LitMNISTCNN(
  (model): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(5, 5), stride=(1, 1))
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): ReLU()
    (5): Dropout(p=0.5, inplace=False)
    (6): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): ReLU()
    (9): Dropout(p=0.5, inplace=False)
    (10): Flatten(start_dim=1, end_dim=-1)
    (11): Linear(in_features=576, out_features=256, bias=True)
    (12): ReLU()
    (13): Dropout(p=0.1, inplace=False)
    (14): Linear(in_features=256, out_features=10, bias=True)
  )
  (val_accuracy): Accuracy()
  (test_accuracy): Accuracy()
)

In [7]:
# set RP
set_auth(auth='resource_principal')

artifact_dir = "pytorch_artifact_dir"

pytorch_model = PyTorchModel(model, artifact_dir=artifact_dir)

In [8]:
# switched the inference env to a published conda env
# being a published env I need to pass the object storage path

# this is the path of OSS wehere we have saved the published env
INF_ENV_PATH = "oci://custom_conda_envs@frqap2zhtzbe/conda_environments/gpu/mycomputervision_p37_gpu_/1.0/mycomputervision_p37_gpu_v1_0"

pytorch_model.prepare(
    inference_conda_env=INF_ENV_PATH,
    training_conda_env="computervision_p37_cpu_v1",
    use_case_type=UseCaseType.IMAGE_CLASSIFICATION,
    force_overwrite=True,
)



algorithm: LitMNISTCNN
artifact_dir:
  /home/datascience/pytorch-on-oci/ch-04/pytorch_artifact_dir:
  - - score.py
    - test_json_output.json
    - saved_score.py
    - model.pt
    - runtime.yaml
    - .ipynb_checkpoints
    - .ipynb_checkpoints/score-checkpoint.py
    - .ipynb_checkpoints/runtime-checkpoint.yaml
framework: pytorch
model_deployment_id: null
model_id: null

after the prepare you need to cancel model.pt, **copy best.ckpt** to pytorch_attifact_dir and rename to model.pt

This is related to using Lightning

#### correctly setting some metadata

In [9]:
# set the correct name of the model file name
pytorch_model.metadata_custom['ModelFileName'].update(value="model.pt", category=MetadataCustomCategory.OTHER, description="model file name")

In [10]:
pytorch_model.metadata_custom['ModelArtifacts'].update(value="score.py, model.pt, runtime.yaml", category=MetadataCustomCategory.TRAINING_ENV, 
                                                       description="The list of files located in artifacts folder.")

In [11]:
pytorch_model.metadata_custom

data:
- category: Training Environment
  description: The list of files located in artifacts folder.
  key: ModelArtifacts
  value: score.py, model.pt, runtime.yaml
- category: Training Environment
  description: The conda environment where the model was trained.
  key: CondaEnvironment
  value: oci://service-conda-packs@id19sfcrra6z/service_pack/cpu/Computer_Vision_for_CPU_on_Python_3.7/1.0/computervision_p37_cpu_v1
- category: Training Profile
  description: The model serialization format.
  key: ModelSerializationFormat
  value: pt
- category: Other
  description: model file name
  key: ModelFileName
  value: model.pt
- category: Training Environment
  description: The URI of the training conda environment.
  key: CondaEnvironmentPath
  value: oci://service-conda-packs@id19sfcrra6z/service_pack/cpu/Computer_Vision_for_CPU_on_Python_3.7/1.0/computervision_p37_cpu_v1
- category: Training Environment
  description: The slug name of the training conda environment.
  key: SlugName
  valu

#### Finally save the model to Model Catalog

before running this you must modify the file score.py to load successfully the model

from 

/home/datascience/pytorch-on-oci/ch-04/checkpoint_mnist dir 

exec command:

cp best.ckpt ../pytorch_artifact_dir/model.pt

In [12]:
MODEL_NAME = "pytorch-mnist08"

model_id = pytorch_model.save(display_name=MODEL_NAME)

Start loading model.pt from model directory /home/datascience/pytorch-on-oci/ch-04/pytorch_artifact_dir ...
loading model.pt is complete.
Model is successfully loaded.
['score.py', 'test_json_output.json', 'saved_score.py', 'model.pt', 'runtime.yaml']


loop1:   0%|          | 0/5 [00:00<?, ?it/s]

In [13]:
pytorch_model.summary_status()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Actions Needed
Step,Status,Details,Unnamed: 3_level_1
initiate,Done,Initiated the model,
prepare(),Done,Generated runtime.yaml,
prepare(),Done,Generated score.py,
prepare(),Done,Serialized model,
prepare(),Done,"Populated metadata(Custom, Taxonomy and Provenance)",
verify(),Available,Local tested .predict from score.py,
save(),Done,Conducted Introspect Test,
save(),Done,Uploaded artifact to model catalog,
deploy(),Available,Deployed the model,
predict(),Not Available,Called deployment predict endpoint,


#### Test locally the model

In [14]:
# we take an input image from the dataset
# when we load the dataset we apply transforms as expected from the model
mnist_test = MNIST(".", train=False, transform=model.transform)

In [15]:
INDEX = 10

# take a sample
img_tensor, label = mnist_test[INDEX]

print(img_tensor.shape)
print()
print(f"Expected label is: {label}")

# make it a batch
input_batch = img_tensor.unsqueeze(0)

torch.Size([1, 28, 28])

Expected label is: 0


#### Call the model and predict the label from the image

In [16]:
prediction = pytorch_model.verify(input_batch)["prediction"]

print()
print(f"Predicted label is: {np.argmax(prediction)}")

Start loading model.pt from model directory /home/datascience/pytorch-on-oci/ch-04/pytorch_artifact_dir ...
loading model.pt is complete.
Model is successfully loaded.

Predicted label is: 0


In [17]:
pytorch_model.summary_status()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Actions Needed
Step,Status,Details,Unnamed: 3_level_1
initiate,Done,Initiated the model,
prepare(),Done,Generated runtime.yaml,
prepare(),Done,Generated score.py,
prepare(),Done,Serialized model,
prepare(),Done,"Populated metadata(Custom, Taxonomy and Provenance)",
verify(),Done,Local tested .predict from score.py,
save(),Done,Conducted Introspect Test,
save(),Done,Uploaded artifact to model catalog,
deploy(),Available,Deployed the model,
predict(),Not Available,Called deployment predict endpoint,


#### Ready for Model Deployment

at this point we're ready to deploy a model as a REST service

In [18]:
pytorch_model.deploy(
        display_name="MNIST Model For Classification",
        deployment_log_group_id="ocid1.loggroup.oc1.eu-frankfurt-1.amaaaaaangencdya63i3qhao4bjx754lb3m2jpekev5oc55p5ebjvykbtgya",
        deployment_access_log_id="ocid1.log.oc1.eu-frankfurt-1.amaaaaaangencdyamg6gyfdjofod7hlnbhhjtgeaeyy3fkrmh3cyb4dxx7xa",
        deployment_predict_log_id="ocid1.log.oc1.eu-frankfurt-1.amaaaaaangencdyaddqi3rff7kdbxhxdpi2rx65dynuye36dayz7nivbwsca",
)

loop1:   0%|          | 0/6 [00:00<?, ?it/s]

<ads.model.deployment.model_deployment.ModelDeployment at 0x7fcf3528a950>

In [19]:
# this way we get the URL of the service
pytorch_model.model_deployment.url

'https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaangencdya2vdsvxogbuqgw2hh5ztq3qooowulxw2y6z6pqh2kndhq'

#### Test the deployed endpoint

In [28]:
prediction = pytorch_model.predict(input_batch)["prediction"]
print()
print(f"Predicted label is: {np.argmax(prediction)}")


Predicted label is: 0


#### Final check of the status

In [29]:
pytorch_model.summary_status()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Actions Needed
Step,Status,Details,Unnamed: 3_level_1
initiate,Done,Initiated the model,
prepare(),Done,Generated runtime.yaml,
prepare(),Done,Generated score.py,
prepare(),Done,Serialized model,
prepare(),Done,"Populated metadata(Custom, Taxonomy and Provenance)",
verify(),Done,Local tested .predict from score.py,
save(),Done,Conducted Introspect Test,
save(),Done,Uploaded artifact to model catalog,
deploy(),ACTIVE,Deployed the model,
predict(),Available,Called deployment predict endpoint,
