This notebooks allows us to test Triton's output via the FastAPI proxy, from our local machine.

In [4]:
import httpx

ahttp = httpx.AsyncClient()

In [12]:
async def health_check():
    resp = await ahttp.get("http://localhost:8005/health")
    return resp.status_code, resp.text


async def test_simple_net():
    resp = await ahttp.post(
        "http://localhost:8005/models/simple_net",
        json={"x": [1.0, 1.0]},
    )
    return resp.json()


async def test_finbert_model(trt_model: bool = False):
    resp = await ahttp.post(
        "http://localhost:8005/models/finbert-model",
        json={
            "input_ids": [101, 15768, 24356, 1998, 1996, 2329, 9044, 4227, 1012, 102],
            "attention_mask": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
            "token_type_ids": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            "trt_model": trt_model,
        },
    )
    return resp.json()


async def test_finbert_tokenizer():
    resp = await ahttp.post(
        "http://localhost:8005/models/finbert-tokenizer",
        json={
            "text": "Stocks rallied and the British pound gained.",
        },
    )
    return resp.json()


async def test_finbert():
    resp = await ahttp.post(
        "http://localhost:8005/models/finbert",
        json={
            "text": "Stocks rallied and the British pound gained.",
        },
    )
    return resp.json()

In [13]:
print(await health_check())
print(await test_simple_net())
print(await test_finbert_model())
print(await test_finbert_model(True))
print(await test_finbert_tokenizer())
print(await test_finbert())

(200, '{"status":"OK"}')
{'prediction': [2.0, 2.0]}
{'error': 'Failed to query Triton Inference Server', 'details': "[StatusCode.NOT_FOUND] Request for unknown model: 'finbert-model' is not found"}
{'error': 'Failed to query Triton Inference Server', 'details': "[StatusCode.NOT_FOUND] Request for unknown model: 'finbert-trt-model' is not found"}
{'error': 'Failed to query Triton Inference Server', 'details': "[StatusCode.NOT_FOUND] Request for unknown model: 'finbert-tokenizer' is not found"}
{'error': 'Failed to query Triton Inference Server', 'details': "[StatusCode.NOT_FOUND] Request for unknown model: 'finbert' is not found"}


In [None]:
# ## [debug]
# resp = await test_finbert()
# print(resp.json()['details'])