In [1]:
!pip install fastapi Pillow python-multipart torch transformers uvicorn nest-asyncio pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

In [2]:
from huggingface_hub import login
from google.colab import userdata

login(token=userdata.get('HF_TOKEN'))

In [15]:
from transformers import ViltProcessor, ViltForQuestionAnswering
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from PIL import Image
import io
import torch
import requests


processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")

In [17]:
def model_pipeline(text: str, image: Image.Image) -> str:
    encoding = processor(image, text, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**encoding)
    logits = outputs.logits
    idx = logits.argmax(-1).item()
    return model.config.id2label[idx]

app = FastAPI()

@app.get("/")
def read_root():
    return {"Hello": "World"}

@app.post("/ask")
async def ask(text: str = Form(...), image: UploadFile = File(...)):
    try:
        content = await image.read()
        image_data = Image.open(io.BytesIO(content)).convert("RGB")
    except Exception:
        raise HTTPException(status_code=400, detail="Invalid image format.")

    result = model_pipeline(text, image_data)

    print(f"Question: {text}")
    print(f"Answer: {result}")
    return {"answer": result}


import nest_asyncio
from pyngrok import ngrok
import uvicorn
import os

NGROK_TOKEN = os.getenv("NGROK_AUTH_TOKEN")
ngrok.set_auth_token(NGROK_TOKEN)

# Run
public_url = ngrok.connect(8000)
print("Public URL:", public_url.public_url)

nest_asyncio.apply()
uvicorn.run(app, port=8000)

Public URL: https://d4a8-34-81-138-181.ngrok-free.app


INFO:     Started server process [361]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     128.135.204.40:0 - "GET / HTTP/1.1" 200 OK
INFO:     128.135.204.40:0 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     128.135.204.40:0 - "GET /docs HTTP/1.1" 200 OK
INFO:     128.135.204.40:0 - "GET /openapi.json HTTP/1.1" 200 OK
Question: What is the color of the cat?
Answer: brown
INFO:     128.135.204.40:0 - "POST /ask HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [361]
