# Chapter 11: Serving Models with TorchServe

In [None]:
!pip install torch-model-archiver torchserve captum pyngrok

## 11.3 Archiving and Serving Models

In [6]:
!wget https://github.com/dvgodoy/assets/releases/download/model/fomo_model.pth

In [7]:
import torch
import torch.nn as nn

repo = 'pytorch/vision:v0.15.2'
model = torch.hub.load(repo, 'resnet18', weights=None)
model.fc = nn.Linear(512, 4)

state = torch.load('fomo_model.pth', map_location='cpu')
model.load_state_dict(state)

Downloading: "https://github.com/pytorch/vision/zipball/main" to /root/.cache/torch/hub/main.zip
  warn(


<All keys matched successfully>

### 11.3.2 Model File

In [8]:
from torchvision.models import resnet18

class FOMONet(nn.Module):
    def __init__(self):
        super().__init__()

        # Create an instance of an untrained ResNet18
        resnet = resnet18(weights=None)
        # Modifies the architecture to our task
        resnet.fc = nn.Linear(512, 4)

        # Replicate ResNet's modified architecture to FOMONet
        self.__dict__.update(resnet.__dict__)
        # Replicate Resnet's forward method to FOMONet
        setattr(self, 'forward', resnet.forward)

In [9]:
fomo = FOMONet()
fomo.load_state_dict(model.state_dict())

<All keys matched successfully>

In [10]:
fomo.eval()
model.eval()

torch.manual_seed(32)
x = torch.randn(1, 3, 224, 224)

fomo(x), model.cpu()(x)

(tensor([[ 0.2412, -2.8556, -1.1869,  0.8597]], grad_fn=<AddmmBackward0>),
 tensor([[ 0.2412, -2.8556, -1.1869,  0.8597]], grad_fn=<AddmmBackward0>))

In [11]:
model_file_script = """
import torch.nn as nn
from torchvision.models import resnet18

class FOMONet(nn.Module):
    def __init__(self):
        super().__init__()

        # Create an instance of an untrained ResNet18
        resnet = resnet18(weights=None)
        # Modifies the architecture to our task
        resnet.fc = nn.Linear(512, 4)

        # Replicate ResNet's modified architecture to FOMONet
        self.__dict__.update(resnet.__dict__)
        # Replicate Resnet's forward method to FOMONet
        setattr(self, 'forward', resnet.forward)
"""

with open('model_file.py', 'w') as fp:
    fp.write(model_file_script)

### 11.3.3 Scripted Models

In [12]:
# once it is scripted, there is no need for the model class def anymore
scripted_model = torch.jit.script(model)

### 11.3.4 Serialized File

In [13]:
# We already saved the model to disk in the previous chapter
# eager mode version
torch.save(model.state_dict(), 'fomo_model.pth')

# scripted version
scripted_model.save("fomo_model.pt")

### 11.3.5 Inference Handler

#### 11.3.5.3 Preprocess

Let's take a quick look at the `image_processing()` function that's called by the `preprocess()` method:

In [17]:
from ts.torch_handler.image_classifier import ImageClassifier

ImageClassifier.image_processing



Compose(
    Resize(size=256, interpolation=bilinear, max_size=None, antialias=warn)
    CenterCrop(size=(224, 224))
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)

In [18]:
from torchvision.models import get_weight

weights = get_weight('ResNet18_Weights.DEFAULT')
weights.transforms()

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BILINEAR
)

In [21]:
!wget https://raw.githubusercontent.com/dvgodoy/assets/main/PyTorchInPractice/images/ch9/fig_0_100.jpg

In [22]:
from PIL import Image

img = Image.open('./fig_0_100.jpg')

(ImageClassifier.image_processing(img) == weights.transforms()(img)).all()

tensor(True)

#### 11.3.5.6 Custom Handler

In [23]:
handler_file_script = """
from ts.torch_handler.image_classifier import ImageClassifier

class FOMOHandler(ImageClassifier):
    def __init__(self):
      super().__init__()

      # By default, ImageClassifier uses top-5 classes
      # but our task has only 4, so we need to tweak it
      self.set_max_result_classes(4)
"""

with open('handler_file.py', 'w') as fp:
    fp.write(handler_file_script)

### 11.3.6 Extra Files

In [26]:
# We didn't load the dataset in this chapter, so we're building the dict manually
# class_to_idx = datasets['train'].class_to_idx

class_to_idx = {'Fig': 0, 'Mandarine': 1, 'Onion White': 2, 'Orange': 3}

In [27]:
index_to_name = {v: k for k, v in class_to_idx.items()}
index_to_name

{0: 'Fig', 1: 'Mandarine', 2: 'Onion White', 3: 'Orange'}

In [28]:
import json

with open('index_to_name.json', 'w') as f:
    json.dump(index_to_name, f)

### 11.3.7 Packaging

In [29]:
!mkdir ./model_store

In [30]:
import sys
from model_archiver.model_packaging import generate_model_archive

sys.argv = ['',
            '--model-name', 'FOMO',
            '--version', '1.0',
            '--model-file', 'model_file.py',
            '--serialized-file', 'fomo_model.pth',
            '--handler', 'handler_file.py',
            '--extra-files', 'index_to_name.json',
            '--export-path', './model_store',
            '--force']

generate_model_archive()

## 11.4 TorchServe

In [31]:
config_properties = """
inference_address=http://127.0.0.1:7777
"""

with open('config.properties', 'w') as fp:
    fp.write(config_properties)

In [32]:
from ts.model_server import start

sys.argv = ['',
            '--start',
            '--disable-token-auth',
            '--model-store', './model_store',
            '--models', 'fomo=FOMO.mar',
            '--ts-config', 'config.properties']
start()

In [35]:
import requests

with open('./fig_0_100.jpg', 'rb') as f:
    data = f.read()

response = requests.put('http://127.0.0.1:7777/predictions/fomo', data=data)
response.json()

{'Fig': 0.9934685230255127,
 'Orange': 0.004324017558246851,
 'Onion White': 0.0012627042597159743,
 'Mandarine': 0.0009447108022868633}

In [36]:
#!torchserve --stop
sys.argv = ['', '--stop']
start()

TorchServe has stopped.


### 11.4.1 Ngrok (optional)

In [None]:
# Option 1
# You can call ngrok with your token 
# Uncomment the line below and replace ... with your token
# !ngrok authtoken ...

# Option 2
# Or you can save it to a configuration file
# Uncomment the line below and replace ... with your token
# !echo "authtoken: ..." >> /root/.ngrok2/ngrok.yml

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml


In [None]:
config_properties = """
inference_address=http://127.0.0.1:7777
cors_allowed_origin=*
cors_allowed_methods=GET, POST, PUT, OPTIONS
"""

with open('config_cors.properties', 'w') as fp:
    fp.write(config_properties)

In [None]:
sys.argv = ['',
            '--start',
            '--model-store', './model_store',
            '--models', 'fomo=FOMO.mar',
            '--ts-config', 'config_cors.properties']
start()

In [None]:
from pyngrok import ngrok

# <NgrokTunnel: "http://<public_sub>.ngrok.io" -> "http://localhost:7777">
http_tunnel = ngrok.connect(7777, "http")



In [None]:
http_tunnel.public_url

'https://f295-35-202-252-169.ngrok-free.app'

In [None]:
with open('./fig_0_100.jpg', 'rb') as f:
    data = f.read()

response = requests.put(f'{http_tunnel.public_url}/predictions/fomo', data=data)
response.json()

{'Fig': 0.9934685230255127,
 'Orange': 0.004324017558246851,
 'Onion White': 0.0012627042597159743,
 'Mandarine': 0.0009447108022868633}

In [None]:
ngrok.disconnect(http_tunnel.public_url)

In [None]:
sys.argv = ['', '--stop']
start()

TorchServe has stopped.
