Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Serve] Issue on page /serve/getting_started.html #27905

Closed
sujit420 opened this issue Aug 16, 2022 · 4 comments
Closed

[Serve] Issue on page /serve/getting_started.html #27905

sujit420 opened this issue Aug 16, 2022 · 4 comments
Labels
serve Ray Serve Related Issue triage Needs triage (eg: priority, bug/not-bug, and owning component)

Comments

@sujit420
Copy link

I am using below code as suggested for my 2 gpus.
from starlette.requests import Request

import ray
from ray import serve

from transformers import pipeline
from parallel import *

@serve.deployment(num_replicas=2, ray_actor_options={"num_cpus": 0, "num_gpus": 1})
class Translator:
def init(self):
# Load model
self.model = get_model()#pipeline("translation_en_to_fr", model="t5-small")

def translate(self, count: int) -> int:
    # Run inference
    model_output =  predict(self.model, count)#self.model(text)

    # Post-process output to return only the translation text
    # translation = model_output[0]["translation_text"]

    return 'translation'

async def __call__(self, http_request: Request) -> str:
    count: str = await http_request.json()
    return self.translate(count)

translator = Translator.bind()

I have other file which loads the model and predict.
This is how, model is loaded:
def get_model():
model = LayoutLMv2ForQuestionAnswering.from_pretrained(model_checkpoint_finetuned)
# model.to(device0)
# model.to(device1)
print('model loaded in device')
return model

I don't see any gpus being used while predicting. It just uses CPU.

Can anyone help here?

@james811223ad
Copy link

Here's how I do it

import numpy as np
import re
import requests
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from torch.cuda import device_count as ngpu
from ray import serve, init
from fastapi import FastAPI

device = f"c{'uda' if ngpu() else 'pu'}"
init(include_dashboard=False, ignore_reinit_error=True)
app = FastAPI()
serve.start()


@serve.deployment(ray_actor_options={"num_gpus": 1 if ngpu() else 0}, num_replicas=ngpu() if ngpu() > 1 else 1)
@serve.ingress(app)
class cv:
    def __init__(self):
        cfg = get_cfg()
        cfg.merge_from_file(cur_dir + "cv_application.yaml")
        weight_path = "/home/ec2-user/SageMaker/cv_application.pth"
        cfg.MODEL.WEIGHTS = weight_path
        cfg.MODEL.DEVICE = device
        self.model_application = DefaultPredictor(cfg)

    @app.get("/application")
    def application_predict(self, img_path):
        with fs.open(img_path) as f:
            im = np.array(Image.open(f))
        outputs = self.model_application(im)
        return {
            'pred_classes': outputs["instances"].pred_classes.cpu().numpy().tolist(),
            'scores': outputs['instances'].get_fields()['scores'].cpu().numpy().tolist(),
            'bbox': outputs['instances'].get_fields()['pred_boxes'].tensor.cpu().numpy().tolist(),
            'img_height': outputs['instances'].image_size[0]
            }

cv.deploy()
resp = requests.get("http://127.0.0.1:8000/cv/application", params={'img_path': 'path/to/image'})

@jjyao jjyao changed the title Issue on page /serve/getting_started.html [Serve] Issue on page /serve/getting_started.html Aug 17, 2022
@jjyao jjyao added triage Needs triage (eg: priority, bug/not-bug, and owning component) serve Ray Serve Related Issue labels Aug 17, 2022
@sujit420
Copy link
Author

Here's how I do it

import numpy as np
import re
import requests
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from torch.cuda import device_count as ngpu
from ray import serve, init
from fastapi import FastAPI

device = f"c{'uda' if ngpu() else 'pu'}"
init(include_dashboard=False, ignore_reinit_error=True)
app = FastAPI()
serve.start()


@serve.deployment(ray_actor_options={"num_gpus": 1 if ngpu() else 0}, num_replicas=ngpu() if ngpu() > 1 else 1)
@serve.ingress(app)
class cv:
    def __init__(self):
        cfg = get_cfg()
        cfg.merge_from_file(cur_dir + "cv_application.yaml")
        weight_path = "/home/ec2-user/SageMaker/cv_application.pth"
        cfg.MODEL.WEIGHTS = weight_path
        cfg.MODEL.DEVICE = device
        self.model_application = DefaultPredictor(cfg)

    @app.get("/application")
    def application_predict(self, img_path):
        with fs.open(img_path) as f:
            im = np.array(Image.open(f))
        outputs = self.model_application(im)
        return {
            'pred_classes': outputs["instances"].pred_classes.cpu().numpy().tolist(),
            'scores': outputs['instances'].get_fields()['scores'].cpu().numpy().tolist(),
            'bbox': outputs['instances'].get_fields()['pred_boxes'].tensor.cpu().numpy().tolist(),
            'img_height': outputs['instances'].image_size[0]
            }

cv.deploy()
resp = requests.get("http://127.0.0.1:8000/cv/application", params={'img_path': 'path/to/image'})

does this code consume multi GPU in your system?

@james811223ad
Copy link

It does

@simon-mo
Copy link
Contributor

simon-mo commented Nov 4, 2022

Seems to be answered? Thanks @sujit420. Feel free to re-open if it is not

@simon-mo simon-mo closed this as completed Nov 4, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
serve Ray Serve Related Issue triage Needs triage (eg: priority, bug/not-bug, and owning component)
Projects
None yet
Development

No branches or pull requests

4 participants