*Copyright 2024 Modular, Inc: Licensed under the Apache License v2.0 with LLVM Exceptions.*

# MAX Serve and PyTorch model client example

In [None]:
from python import Python
from tensor import Tensor, TensorShape, TensorSpec
from max.engine import EngineNumpyView

@always_inline
fn numpy_data_pointer[
    type: DType
](numpy_array: PythonObject) raises -> DTypePointer[type]:
    var data_ptr = numpy_array.__array_interface__["data"][0].__index__()
    return DTypePointer[type](address=data_ptr)

@always_inline
fn memcpy_to_numpy[
    type: DType
](array: PythonObject, tensor: Tensor[type]) raises:
    var dst = numpy_data_pointer[type](array)
    var src = tensor._ptr
    var length = tensor.num_elements()
    memcpy(dst, src, length)


@always_inline
fn shape_to_python_list(shape: TensorShape) raises -> PythonObject:
    var python_list = Python.evaluate("list()")
    for i in range(shape.rank()):
        _ = python_list.append(shape[i])
    return python_list^

@always_inline
fn get_np_dtype[type: DType](np: PythonObject) raises -> PythonObject:
    @parameter
    if type is DType.float32:
        return np.float32
    elif type is DType.int32:
        return np.int32
    elif type is DType.int64:
        return np.int64
    elif type is DType.uint8:
        return np.uint8

    raise "Unknown datatype"

@always_inline
fn tensor_to_numpy[
    type: DType
](tensor: Tensor[type], np: PythonObject) raises -> PythonObject:
    var shape = shape_to_python_list(tensor.shape())
    var tensor_as_numpy = np.zeros(shape, get_np_dtype[type](np))
    _ = shape^
    memcpy_to_numpy(tensor_as_numpy, tensor)
    return tensor_as_numpy^

@always_inline
fn numpy_to_tensor[
    dtype: DType
](inout np_array: PythonObject) raises -> Tensor[dtype]:
    var view = EngineNumpyView(np_array)
    var size = view.spec().num_elements()
    var ptr = DTypePointer[dtype].alloc(size)
    memcpy(ptr, view.unsafe_ptr().bitcast[dtype](), size)
    return Tensor[dtype](view.spec(), ptr)

## Prepare client/inputs

In [None]:
var open_clip = Python.import_module("open_clip")
var PIL = Python.import_module("PIL")
var requests = Python.import_module("requests")
var torch = Python.import_module("torch")

var tup = open_clip.create_model_and_transforms(
    "ViT-B-32", pretrained="laion2b_s34b_b79k"
)
var tokenizer = open_clip.get_tokenizer("ViT-B-32")

var url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
var labels = ["cats", "dogs", "fish"]
var raw_image = PIL.Image.open(requests.get(url, stream=True).raw)
var image = tup[2](raw_image).unsqueeze(0).detach().numpy()
var text = tokenizer(labels).detach().numpy()

In [None]:
from max.engine import InferenceSession
from max.engine.tensor import EngineNumpyView
from max.serve.kserve.client import GRPCInferenceClient

var session = InferenceSession()
var inputs = session.new_tensor_map()
var image_tensor = numpy_to_tensor[DType.float32](image)
var text_tensor = numpy_to_tensor[DType.int64](text)
inputs.borrow("image", image_tensor)
inputs.borrow("text", text_tensor)
print(str(inputs))

## Run an inference

In [None]:
var req_outputs = List[String]("image_features", "text_features")
var client = GRPCInferenceClient("0.0.0.0:8000", session)
var response = client.infer("openclip", "0", inputs, req_outputs)
var outputs = response.get_output_tensors()

var np = Python.import_module("numpy")
var img_feats = tensor_to_numpy(outputs.get[DType.float32]("image_features"), np)
var txt_feats = tensor_to_numpy(outputs.get[DType.int64]("text_features"), np)
fn softmax(np: PythonObject, x: PythonObject) raises -> PythonObject:
    var z = x - np.max(x)
    var num = np.exp(z)
    return np.exp(z) / np.sum(num)

txt_feats /= np.linalg.norm(txt_feats)
img_feats /= np.linalg.norm(img_feats)
var similarity = softmax(np, 100.0 * np.matmul(img_feats, txt_feats.T))
print("Label probs:\n", similarity)