-
Notifications
You must be signed in to change notification settings - Fork 84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Paligemma Workflows Block #399
base: main
Are you sure you want to change the base?
Changes from all commits
a61d71b
52a9dd8
887ab54
deafae0
08fd384
b68e587
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union | ||
|
||
from pydantic import AliasChoices, ConfigDict, Field | ||
|
||
from inference.core.entities.requests.paligemma import PaliGemmaInferenceRequest | ||
from inference.core.managers.base import ModelManager | ||
from inference.core.workflows.core_steps.common.utils import load_core_model | ||
from inference.core.workflows.entities.base import OutputDefinition | ||
from inference.core.workflows.entities.types import ( | ||
BATCH_OF_PARENT_ID_KIND, | ||
BATCH_OF_STRING_KIND, | ||
STRING_KIND, | ||
FlowControl, | ||
StepOutputImageSelector, | ||
WorkflowImageSelector, | ||
WorkflowParameterSelector, | ||
) | ||
from inference.core.workflows.prototypes.block import ( | ||
WorkflowBlock, | ||
WorkflowBlockManifest, | ||
) | ||
|
||
LONG_DESCRIPTION = """ | ||
PaliGemmaModel block is used to run PaliGemma model. It takes an image and a prompt as input and returns the model output. | ||
""" | ||
|
||
|
||
class BlockManifest(WorkflowBlockManifest): | ||
model_config = ConfigDict( | ||
json_schema_extra={ | ||
"short_description": "Run PaliGemma model.", | ||
"long_description": LONG_DESCRIPTION, | ||
"license": "Apache-2.0", | ||
"block_type": "model", | ||
} | ||
) | ||
type: Literal["PaliGemmaModel"] | ||
images: Union[WorkflowImageSelector, StepOutputImageSelector] = Field( | ||
description="Reference at image to be used as input for step processing", | ||
examples=["$inputs.image", "$steps.cropping.crops"], | ||
validation_alias=AliasChoices("images", "image"), | ||
) | ||
prompt: Union[WorkflowParameterSelector(kind=[STRING_KIND]), str] = Field( | ||
description="Holds unconstrained text prompt to LMM mode", | ||
examples=["my prompt", "$inputs.prompt"], | ||
) | ||
|
||
@classmethod | ||
def describe_outputs(cls) -> List[OutputDefinition]: | ||
return [ | ||
OutputDefinition(name="parent_id", kind=[BATCH_OF_PARENT_ID_KIND]), | ||
OutputDefinition(name="model_output", kind=[BATCH_OF_STRING_KIND]), | ||
] | ||
|
||
|
||
class PaliGemmaModelBlock(WorkflowBlock): | ||
|
||
def __init__( | ||
self, | ||
model_manager: ModelManager, | ||
api_key: Optional[str], | ||
): | ||
self._model_manager = model_manager | ||
self._api_key = api_key | ||
|
||
@classmethod | ||
def get_init_parameters(cls) -> List[str]: | ||
return ["model_manager", "api_key"] | ||
|
||
@classmethod | ||
def get_manifest(cls) -> Type[WorkflowBlockManifest]: | ||
return BlockManifest | ||
|
||
async def run_locally( | ||
self, | ||
images: List[dict], | ||
prompt: str, | ||
) -> Union[List[Dict[str, Any]], Tuple[List[Dict[str, Any]], FlowControl]]: | ||
|
||
responses = [] | ||
|
||
for img in images: | ||
|
||
inference_request = PaliGemmaInferenceRequest( | ||
image=img, prompt=prompt, api_key=self._api_key | ||
) | ||
paligemma_model_id = load_core_model( | ||
model_manager=self._model_manager, | ||
inference_request=inference_request, | ||
core_model="paligemma", | ||
) | ||
|
||
response = await self._model_manager.infer_from_request( | ||
paligemma_model_id, inference_request | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this require a server to be running? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks more like what I'd expect: https://colab.research.google.com/drive/1_q09OjR2Ldl1FZnvfqwckvxrW_FIYclC?usp=sharing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I don't think requires server, this is how we load all the models in workflow blocks |
||
) | ||
|
||
responses.append( | ||
{ | ||
"parent_id": img["parent_id"], | ||
"model_output": response.response, | ||
} | ||
) | ||
|
||
return responses |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We may want this one to be setup both for local & async execution since it can't run without an NVIDIA GPU. You may want to be doing realtime video for your workflow (eg on a Jetson) but occasionally call out to a beefy server somewhere for a LLM response.