[Serve] Issue on page /serve/getting_started.html #27905

sujit420 · 2022-08-16T12:47:01Z

I am using below code as suggested for my 2 gpus.
from starlette.requests import Request

import ray
from ray import serve

from transformers import pipeline
from parallel import *

@serve.deployment(num_replicas=2, ray_actor_options={"num_cpus": 0, "num_gpus": 1})
class Translator:
def init(self):
# Load model
self.model = get_model()#pipeline("translation_en_to_fr", model="t5-small")

def translate(self, count: int) -> int:
    # Run inference
    model_output =  predict(self.model, count)#self.model(text)

    # Post-process output to return only the translation text
    # translation = model_output[0]["translation_text"]

    return 'translation'

async def __call__(self, http_request: Request) -> str:
    count: str = await http_request.json()
    return self.translate(count)

translator = Translator.bind()

I have other file which loads the model and predict.
This is how, model is loaded:
def get_model():
model = LayoutLMv2ForQuestionAnswering.from_pretrained(model_checkpoint_finetuned)
# model.to(device0)
# model.to(device1)
print('model loaded in device')
return model

I don't see any gpus being used while predicting. It just uses CPU.

Can anyone help here?

The text was updated successfully, but these errors were encountered:

james811223ad · 2022-08-16T14:20:29Z

Here's how I do it

import numpy as np
import re
import requests
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from torch.cuda import device_count as ngpu
from ray import serve, init
from fastapi import FastAPI

device = f"c{'uda' if ngpu() else 'pu'}"
init(include_dashboard=False, ignore_reinit_error=True)
app = FastAPI()
serve.start()


@serve.deployment(ray_actor_options={"num_gpus": 1 if ngpu() else 0}, num_replicas=ngpu() if ngpu() > 1 else 1)
@serve.ingress(app)
class cv:
    def __init__(self):
        cfg = get_cfg()
        cfg.merge_from_file(cur_dir + "cv_application.yaml")
        weight_path = "/home/ec2-user/SageMaker/cv_application.pth"
        cfg.MODEL.WEIGHTS = weight_path
        cfg.MODEL.DEVICE = device
        self.model_application = DefaultPredictor(cfg)

    @app.get("/application")
    def application_predict(self, img_path):
        with fs.open(img_path) as f:
            im = np.array(Image.open(f))
        outputs = self.model_application(im)
        return {
            'pred_classes': outputs["instances"].pred_classes.cpu().numpy().tolist(),
            'scores': outputs['instances'].get_fields()['scores'].cpu().numpy().tolist(),
            'bbox': outputs['instances'].get_fields()['pred_boxes'].tensor.cpu().numpy().tolist(),
            'img_height': outputs['instances'].image_size[0]
            }

cv.deploy()
resp = requests.get("http://127.0.0.1:8000/cv/application", params={'img_path': 'path/to/image'})

sujit420 · 2022-08-18T09:32:09Z

Here's how I do it

import numpy as np
import re
import requests
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from torch.cuda import device_count as ngpu
from ray import serve, init
from fastapi import FastAPI

device = f"c{'uda' if ngpu() else 'pu'}"
init(include_dashboard=False, ignore_reinit_error=True)
app = FastAPI()
serve.start()


@serve.deployment(ray_actor_options={"num_gpus": 1 if ngpu() else 0}, num_replicas=ngpu() if ngpu() > 1 else 1)
@serve.ingress(app)
class cv:
    def __init__(self):
        cfg = get_cfg()
        cfg.merge_from_file(cur_dir + "cv_application.yaml")
        weight_path = "/home/ec2-user/SageMaker/cv_application.pth"
        cfg.MODEL.WEIGHTS = weight_path
        cfg.MODEL.DEVICE = device
        self.model_application = DefaultPredictor(cfg)

    @app.get("/application")
    def application_predict(self, img_path):
        with fs.open(img_path) as f:
            im = np.array(Image.open(f))
        outputs = self.model_application(im)
        return {
            'pred_classes': outputs["instances"].pred_classes.cpu().numpy().tolist(),
            'scores': outputs['instances'].get_fields()['scores'].cpu().numpy().tolist(),
            'bbox': outputs['instances'].get_fields()['pred_boxes'].tensor.cpu().numpy().tolist(),
            'img_height': outputs['instances'].image_size[0]
            }

cv.deploy()
resp = requests.get("http://127.0.0.1:8000/cv/application", params={'img_path': 'path/to/image'})

does this code consume multi GPU in your system?

james811223ad · 2022-08-18T12:54:39Z

It does

simon-mo · 2022-11-04T23:48:20Z

Seems to be answered? Thanks @sujit420. Feel free to re-open if it is not

jjyao changed the title ~~Issue on page /serve/getting_started.html~~ [Serve] Issue on page /serve/getting_started.html Aug 17, 2022

jjyao added triage Needs triage (eg: priority, bug/not-bug, and owning component) serve Ray Serve Related Issue labels Aug 17, 2022

simon-mo closed this as completed Nov 4, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[Serve] Issue on page /serve/getting_started.html #27905

[Serve] Issue on page /serve/getting_started.html #27905

sujit420 commented Aug 16, 2022

james811223ad commented Aug 16, 2022

sujit420 commented Aug 18, 2022

james811223ad commented Aug 18, 2022

simon-mo commented Nov 4, 2022

[Serve] Issue on page /serve/getting_started.html #27905

[Serve] Issue on page /serve/getting_started.html #27905

Comments

sujit420 commented Aug 16, 2022

james811223ad commented Aug 16, 2022

sujit420 commented Aug 18, 2022

james811223ad commented Aug 18, 2022

simon-mo commented Nov 4, 2022