### single node triton server with load test
1. Loading to and from minio workflow (multiple models)
2. Getting models from [timm](https://timm.fast.ai/)
3. Making the node
4. Load and unload models operations
5. Monitoring
6. Naive load test
7. TODO Add language models from Huggingface


In [134]:
import torch
from PIL import Image
import os
import numpy as np
import pandas as pd

In [11]:
len(timm.list_models())

739

In [138]:
os.system('sudo umount -l ~/my_mounting_point')
os.system('cc-cloudfuse mount ~/my_mounting_point')

data_folder_path = '/home/cc/my_mounting_point/datasets'
dataset_folder_path = os.path.join(
    data_folder_path, 'ILSVRC/Data/DET/test'
)
classes_file_path = os.path.join(
    data_folder_path, 'imagenet_classes.txt'
)

image_names = os.listdir(dataset_folder_path)
image_names.sort()
with open(classes_file_path) as f:
    classes = [line.strip() for line in f.readlines()]

def image_loader(folder_path, image_name):
    image = Image.open(
        os.path.join(folder_path, image_name))
    # if there was a need to filter out only color images
    # if image.mode == 'RGB':
    #     pass
    return image
num_loaded_images = 20
images = {
    image_name: image_loader(
        dataset_folder_path, image_name) for image_name in image_names[
            :num_loaded_images]}

In [140]:
# load and transform model
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(
    mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225]
)])

batch = torch.stack(list(map(lambda a: transform(a), list(images.values()))))

In [184]:
# load and predict with the timm model
model_name = 'resnet50'
model = timm.create_model(model_name, pretrained=True)

model.eval()
torch_output = model(batch)
torch_output = torch.nn.functional.softmax(torch_output, dim=1) * 100
torch_output = torch_output.detach().numpy()
torch_output = torch_output.argmax(axis=1)
torch_class = np.array(classes)[torch_output]
torch_class

array(['lion', 'studio_couch', 'harp', 'goldfish', 'snowplow',
       'pomegranate', 'alligator_lizard', 'stethoscope', 'banjo', 'junco',
       'shoe_shop', 'albatross', 'notebook', 'lesser_panda', 'microphone',
       'football_helmet', 'damselfly', 'barrel', 'harp', 'mortarboard'],
      dtype='<U30')

In [170]:
# save the onnx model
import torch.onnx

# Standard ImageNet input - 3 channels, 224x224,
# values don't matter as we care about network structure.
# But they can also be real inputs.
dummy_input = torch.randn(1, 3, 224, 224)
# Invoke export
torch.onnx.export(
    model, dummy_input,
    f"{model_name}.onnx",
    input_names = ['input'],
    output_names = ['output'],
    dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                  'output' : {0 : 'batch_size'}})

In [185]:
# use onnx model
import onnx
import onnxruntime

onnx_model = onnx.load(f"{model_name}.onnx")
onnx.checker.check_model(onnx_model)

ort_session = onnxruntime.InferenceSession(f"{model_name}.onnx", providers=['CPUExecutionProvider'])
onnx_output = ort_session.run(None, {'input': batch.numpy()})
onnx_output = torch.nn.functional.softmax(torch.tensor(onnx_output), dim=1)[0] * 100
onnx_output = onnx_output.detach().numpy()
onnx_output = onnx_output.argmax(axis=1)
onnx_class = np.array(classes)[onnx_output]
onnx_class

array(['lion', 'studio_couch', 'slot', 'goldfish', 'snowplow',
       'pomegranate', 'alligator_lizard', 'seat_belt', 'hatchet', 'junco',
       'shoe_shop', 'albatross', 'bookshop', 'lesser_panda', 'marimba',
       'football_helmet', 'damselfly', 'horse_cart', 'harp',
       'mortarboard'], dtype='<U30')

In [187]:
# TODO find out why slightly different

assert np.all(onnx_output == torch_output)
print(onnx_class)

AssertionError: 