diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/README.md b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/README.md new file mode 100644 index 000000000..d70f082c3 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/README.md @@ -0,0 +1,73 @@ + +# OCI OpenAI-Compatible Gateway + +Simple FastAPI gateway exposing OCI LLMs and Agents via OpenAI-compatible API. +Big thanks to https://github.com/RETAJD/modelsOCI-toOpenAI/tree/main + +## Quick Start + +1. **Install dependencies**: +```bash +pip install fastapi uvicorn oci pyyaml openai +``` + +2. **Set API key (optional)**: +```bash +export GATEWAY_API_KEYS="ocigenerativeai" #default +``` + +3. **Prepare config files** (`agents.yaml`, `models.yaml`) next to `app.py`: + +Example `agents.yaml`: +```yaml +agents: + - id: "sales-kb" + name: "Sales KB Agent" + description: "Grounded in sales docs" + region: "eu-frankfurt-1" + endpoint_ocid: "ocid1.genaiagentendpoint.oc1.xxx" +``` + +Example `models.yaml`: +```yaml +region: eu-frankfurt-1 +compartment_id: "ocid1.compartment.oc1..xxx" +models: + ondemand: + - name: "cohere.command-r" + model_id: "cohere.command-r" + description: "Command R Model" +``` + +4. **Run the app**: +```bash +uvicorn app:app --host 0.0.0.0 --port 8088 +``` +or +```bash +python app.py +``` + +## Usage Example + +```python +from openai import OpenAI + +client = OpenAI(api_key="ocigenerativeai", base_url="http://localhost:8088/v1/") + +r1 = client.chat.completions.create( + model="ignored", + messages=[{"role": "user", "content": "Reply with 'pong'."}], + extra_body={ + "agent_endpoint_ocid": "ocid1.genaiagentendpoint.oc1.eu-frankfurt-1.", #your genai agent **endpoint** OCID + "region": "eu-frankfurt-1", + }, +) +print(r1.choices[0].message.content) + +``` + +## n8n/Open WebUI Integration + +- URL: `http://localhost:8088/v1` +- Model: `agent:sales-kb` or model names from `models.yaml` diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/agents.yaml b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/agents.yaml new file mode 100644 index 000000000..090c0c7b3 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/agents.yaml @@ -0,0 +1,7 @@ +# agents.yaml +agents: + - id: "sales-kb" # your local handle + name: "Sales KB Agent" + description: "Grounded in sales docs" + region: "eu-frankfurt-1" # agent region + endpoint_ocid: "ocid1.genaiagentendpoint.oc1.eu-frankfurt-1." # ai agent **endpoint OCID** diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/__init__.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/auth.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/auth.py new file mode 100644 index 000000000..a4ecca0a8 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/auth.py @@ -0,0 +1,28 @@ +import os +from typing import Annotated + +# import boto3 +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials + +from api.setting import DEFAULT_API_KEYS + +api_key_param = os.environ.get("API_KEY_PARAM_NAME") +# if api_key_param: +# ssm = boto3.client("ssm") +# api_key = ssm.get_parameter(Name=api_key_param, WithDecryption=True)["Parameter"][ +# "Value" +# ] +# else: +api_key = DEFAULT_API_KEYS + +security = HTTPBearer() + + +def api_key_auth( + credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)] +): + if credentials.credentials != api_key: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid API Key" + ) diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/__init__.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/base.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/base.py new file mode 100644 index 000000000..99b571cff --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/base.py @@ -0,0 +1,67 @@ +import time +import uuid +from abc import ABC, abstractmethod +from typing import AsyncIterable + +from api.schema import ( + # Chat + ChatResponse, + ChatRequest, + ChatStreamResponse, + # Embeddings + EmbeddingsRequest, + EmbeddingsResponse, +) + + +class BaseChatModel(ABC): + """Represent a basic chat model + + Currently, OCI GenAI model is supported. + """ + + def list_models(self) -> list[str]: + """Return a list of supported models""" + return [] + + def validate(self, chat_request: ChatRequest): + """Validate chat completion requests.""" + pass + + @abstractmethod + def chat(self, chat_request: ChatRequest) -> ChatResponse: + """Handle a basic chat completion requests.""" + pass + + @abstractmethod + def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]: + """Handle a basic chat completion requests with stream response.""" + pass + + @staticmethod + def generate_message_id() -> str: + return "chatcmpl-" + str(uuid.uuid4())[:8] + + @staticmethod + def stream_response_to_bytes( + response: ChatStreamResponse | None = None + ) -> bytes: + if response: + # to populate other fields when using exclude_unset=True + response.system_fingerprint = "fp" + response.object = "chat.completion.chunk" + response.created = int(time.time()) + return "data: {}\n\n".format(response.model_dump_json(exclude_unset=True)).encode("utf-8") + return "data: [DONE]\n\n".encode("utf-8") + + +class BaseEmbeddingsModel(ABC): + """Represents a basic embeddings model. + + Currently, OCI GenAI models are supported. + """ + + @abstractmethod + def embed(self, embeddings_request: EmbeddingsRequest) -> EmbeddingsResponse: + """Handle a basic embeddings request.""" + pass diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/ocigenai.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/ocigenai.py new file mode 100644 index 000000000..cf0e40e51 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/ocigenai.py @@ -0,0 +1,738 @@ +import base64 +import json +import logging +import re +import time +from abc import ABC +from typing import AsyncIterable, Iterable, Literal + +import oci +from oci.generative_ai_inference import models as oci_models +from api.setting import DEBUG +from api.setting import CLIENT_KWARGS, \ + INFERENCE_ENDPOINT_TEMPLATE, \ + SUPPORTED_OCIGENAI_EMBEDDING_MODELS, \ + SUPPORTED_OCIGENAI_CHAT_MODELS + +import numpy as np +import requests +import tiktoken +from fastapi import HTTPException + +from api.models.base import BaseChatModel, BaseEmbeddingsModel +from api.schema import ( + # Chat + ChatResponse, + ChatRequest, + Choice, + ChatResponseMessage, + Usage, + ChatStreamResponse, + ImageContent, + TextContent, + ToolCall, + ChoiceDelta, + UserMessage, + AssistantMessage, + ToolMessage, + Function, + ResponseFunction, + # Embeddings + EmbeddingsRequest, + EmbeddingsResponse, + EmbeddingsUsage, + Embedding, + Convertor +) +from config import EMBED_TRUNCATE + +logging.basicConfig(format='%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + datefmt='%Y-%m-%d:%H:%M:%S', + level=logging.DEBUG) +logger = logging.getLogger(__name__) + +generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient( + **CLIENT_KWARGS +) + +ENCODER = tiktoken.get_encoding("cl100k_base") + + +class OCIGenAIModel(BaseChatModel): + # https://docs.oracle.com/en-us/iaas/Content/generative-ai/pretrained-models.htm + # https://docs.oracle.com/en-us/iaas/data-science/using/ai-quick-actions-model-deploy.htm + _supported_models = {} + + for model in SUPPORTED_OCIGENAI_CHAT_MODELS: + model_setting = SUPPORTED_OCIGENAI_CHAT_MODELS[model] + _supported_models[model] = { + "system": model_setting.get('system', True), + "multimodal": model_setting.get('multimodal', False), + "tool_call": model_setting.get('tool_call', False), + "stream_tool_call": model_setting.get('stream_tool_call', False), + } + + def list_models(self) -> list[str]: + return list(self._supported_models.keys()) + + def validate(self, chat_request: ChatRequest): + """Perform basic validation on requests""" + error = "" + # check if model is supported + if chat_request.model not in self._supported_models.keys(): + error = f"Unsupported model {chat_request.model}, please use models API to get a list of supported models" + + # check if tool call is supported + elif chat_request.tools and not self._is_tool_call_supported(chat_request.model, stream=chat_request.stream): + tool_call_info = "Tool call with streaming" if chat_request.stream else "Tool call" + error = f"{tool_call_info} is currently not supported by {chat_request.model}" + + if error: + print(error) + raise HTTPException( + status_code=400, + detail=error, + ) + + def _invoke_genai(self, chat_request: ChatRequest, stream=False): + """Common logic for invoke OCI GenAI models""" + if DEBUG: + logger.info("Raw request:\n" + chat_request.model_dump_json()) + + # convert OpenAI chat request to OCI Generative AI SDK request + chat_detail = self._parse_request(chat_request) + if DEBUG: + logger.info("OCI Generative AI request:\n" + json.dumps(json.loads(str(chat_detail)), ensure_ascii=False)) + try: + region = SUPPORTED_OCIGENAI_CHAT_MODELS[chat_request.model]["region"] + # generative_ai_inference_client.base_client.config["region"] = region + generative_ai_inference_client.base_client._endpoint = INFERENCE_ENDPOINT_TEMPLATE.replace("{region}", region) + response = generative_ai_inference_client.chat(chat_detail) + if DEBUG and not chat_detail.chat_request.is_stream: + logger.info("OCI Generative AI response:\n" + json.dumps(json.loads(str(response.data)), ensure_ascii=False)) + except Exception as e: + logger.error(e) + raise HTTPException(status_code=500, detail=str(e)) + return response + + def chat(self, chat_request: ChatRequest) -> ChatResponse: + """Default implementation for Chat API.""" + + # message_id = self.generate_message_id() + response = self._invoke_genai(chat_request) + message_id = response.request_id + + chat_response = self._create_response( + model=response.data.model_id, + message_id=message_id, + chat_response=response.data.chat_response, + input_tokens=0, + output_tokens=0, + ) + if DEBUG: + logger.info("Proxy response :" + chat_response.model_dump_json()) + return chat_response + + def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]: + """Default implementation for Chat Stream API""" + # print("="*20,str(chat_request)) + response = self._invoke_genai(chat_request) + if not response.data: + raise HTTPException(status_code=500, detail="OCI AI API returned empty response") + + # message_id = self.generate_message_id() + message_id = response.request_id + + events = response.data.events() + for stream in events: + chunk = json.loads(stream.data) + stream_response = self._create_response_stream( + model_id=chat_request.model, message_id=message_id, chunk=chunk + ) + if not stream_response: + continue + if DEBUG: + logger.info("Proxy response :" + stream_response.model_dump_json()) + if stream_response.choices: + yield self.stream_response_to_bytes(stream_response) + elif ( + chat_request.stream_options + and chat_request.stream_options.include_usage + ): + # An empty choices for Usage as per OpenAI doc below: + # if you set stream_options: {"include_usage": true}. + # an additional chunk will be streamed before the data: [DONE] message. + # The usage field on this chunk shows the token usage statistics for the entire request, + # and the choices field will always be an empty array. + # All other chunks will also include a usage field, but with a null value. + yield self.stream_response_to_bytes(stream_response) + + # return an [DONE] message at the end. + yield self.stream_response_to_bytes() + + def _parse_system_prompts(self, chat_request: ChatRequest) -> list[dict[str, str]]: + """Create system prompts. + Note that not all models support system prompts. + + example output: [{"text" : system_prompt}] + + See example: + https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + """ + + system_prompts = [] + for message in chat_request.messages: + if message.role != "system": + # ignore system messages here + continue + assert isinstance(message.content, str) + system_prompts.append(message.content) + + return system_prompts + + def _parse_messages(self, chat_request: ChatRequest) -> list[dict]: + """ + Converse API only support user and assistant messages. + + example output: [{ + "role": "user", + "content": [{"text": input_text}] + }] + + See example: + https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + """ + messages = [] + for message in chat_request.messages: + if isinstance(message, UserMessage): + messages.append( + { + "role": message.role, + "content": self._parse_content_parts( + message, chat_request.model + ), + } + ) + elif isinstance(message, AssistantMessage): + if message.content: + # Text message + messages.append( + {"role": message.role, "content": [{"text": message.content}]} + ) + elif message.tool_calls: + # Tool use message + # formate https://platform.openai.com/docs/guides/function-calling?api-mode=chat#handling-function-calls + messages.append({"role": message.role,"tool_calls": message.tool_calls}) + elif isinstance(message, ToolMessage): + # Add toolResult to content + # https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + messages.append( + { + "role": "tool", + "tool_call_id": message.tool_call_id, + "content": message.content + } + ) + + else: + # ignore others, such as system messages + continue + return messages + + def _parse_request(self, chat_request: ChatRequest) -> oci_models.ChatDetails: + """Create default converse request body. + + Also perform validations to tool call etc. + + Ref: https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + """ + + messages = self._parse_messages(chat_request) + system_prompts = self._parse_system_prompts(chat_request) + + + # Base inference parameters. + inference_config = { + "max_tokens": chat_request.max_tokens, + "is_stream": chat_request.stream, + "frequency_penalty": chat_request.frequency_penalty, + "presence_penalty": chat_request.presence_penalty, + "temperature": chat_request.temperature, + "top_p": chat_request.top_p + } + + model_name = chat_request.model + provider = SUPPORTED_OCIGENAI_CHAT_MODELS[model_name]["provider"] + compartment_id = SUPPORTED_OCIGENAI_CHAT_MODELS[model_name]["compartment_id"] + + if provider == "dedicated": + endpoint = SUPPORTED_OCIGENAI_CHAT_MODELS[model_name]["endpoint"] + servingMode = oci_models.DedicatedServingMode( + serving_type = "DEDICATED", + endpoint_id = endpoint + ) + else: + model_id = SUPPORTED_OCIGENAI_CHAT_MODELS[model_name]["model_id"] + servingMode = oci_models.OnDemandServingMode( + serving_type = "ON_DEMAND", + model_id = model_id + ) + chat_detail = oci_models.ChatDetails( + compartment_id = compartment_id, + serving_mode = servingMode, + # chat_request = chatRequest + ) + + if provider == "cohere": + cohere_chatRequest = oci_models.CohereChatRequest(**inference_config) + if system_prompts: + cohere_chatRequest.preamble_override = ' '.join(system_prompts) + + # add tools + if chat_request.tools: + cohere_tools = Convertor.convert_tools_openai_to_cohere(chat_request.tools) + cohere_chatRequest.tools = cohere_tools + + chatHistory = [] + for i,message in enumerate(messages): + # process chat history + if i < len(messages)-1: + # print("="*22,'\n',message) + # text = text.encode("unicode_escape").decode("utf-8") + try: + text = message["content"][0]["text"] + except: + text = "" + if message["role"] == "user": + message_line = oci_models.CohereUserMessage( + role = "USER", + message = text + ) + elif message["role"] == "assistant": + if "tool_calls" in message: + if not message["tool_calls"]: + message_line = oci_models.CohereChatBotMessage( + role = "CHATBOT", + message = text + ) + else: + message_line = oci_models.CohereChatBotMessage( + role = "CHATBOT", + message = text, + tool_calls = Convertor.convert_tool_calls_openai_to_cohere(message["tool_calls"]) + ) + else: + message_line = oci_models.CohereChatBotMessage( + role = "CHATBOT", + message = text + ) + + elif message["role"] == "tool": + cohere_tool_results = [] + cohere_tool_result = Convertor.convert_tool_result_openai_to_cohere(message) + cohere_tool_results.append(cohere_tool_result) + message_line = oci_models.CohereToolMessage( + role = "TOOL", + tool_results = cohere_tool_results + ) + + chatHistory.append(message_line) + # process the last message + elif i == len(messages)-1: + if message["role"] in ("user","assistant","system"): + cohere_chatRequest.message = message["content"][0]["text"] + # text = text.encode("unicode_escape").decode("utf-8") + # input tool result + elif message["role"] == "tool": + cohere_chatRequest.message = "" + cohere_tool_results = [] + cohere_tool_result = Convertor.convert_tool_result_openai_to_cohere(message) + cohere_tool_results.append(cohere_tool_result) + cohere_chatRequest.tool_results = cohere_tool_results + + cohere_chatRequest.chat_history = chatHistory + chat_detail.chat_request = cohere_chatRequest + + elif provider == "meta": + generic_chatRequest = oci_models.GenericChatRequest(**inference_config) + generic_chatRequest.numGenerations = chat_request.n + generic_chatRequest.topK = -1 + + # add tools + if chat_request.tools: + llama_tools = Convertor.convert_tools_openai_to_llama(chat_request.tools) + generic_chatRequest.tools = llama_tools + + meta_messages = [] + for message in messages: + message["role"] = message["role"].upper() + if message["role"] == "TOOL": + message = Convertor.convert_tool_result_openai_to_llama(message) + elif message["role"] == "ASSISTANT": + if message["tool_calls"]: + message = oci_models.AssistantMessage( + role = "ASSISTANT", + content = None, + tool_calls = Convertor.convert_tool_calls_openai_to_llama(message["tool_calls"]) + ) + + meta_messages.append(message) + generic_chatRequest.messages = meta_messages + chat_detail.chat_request = generic_chatRequest + return chat_detail + + def _create_response( + self, + model: str, + message_id: str, + # content: list[dict] = None, + chat_response = None, + # finish_reason: str | None = None, + input_tokens: int = 0, + output_tokens: int = 0, + ) -> ChatResponse: + message = ChatResponseMessage(role="assistant") + if type(chat_response) == oci_models.CohereChatResponse: + finish_reason = chat_response.finish_reason + if chat_response.tool_calls: + oepnai_tool_calls = Convertor.convert_tool_calls_cohere_to_openai(chat_response.tool_calls) + message.tool_calls = oepnai_tool_calls + message.content = None + else: + message.content = chat_response.text + elif type(chat_response) == oci_models.GenericChatResponse: + finish_reason = chat_response.choices[-1].finish_reason + if chat_response.choices[0].finish_reason == "tool_calls": + oepnai_tool_calls = Convertor.convert_tool_calls_llama_to_openai(chat_response.choices[0].message.tool_calls) + message.tool_calls = oepnai_tool_calls + message.content = None + else: + message.content = chat_response.choices[0].message.content[0].text + + response = ChatResponse( + id = message_id, + model = model, + choices = [ + Choice( + index=0, + message=message, + finish_reason=self._convert_finish_reason(finish_reason), + logprobs=None, + ) + ], + usage=Usage( + prompt_tokens=input_tokens, + completion_tokens=output_tokens, + total_tokens=input_tokens + output_tokens, + ), + ) + response.system_fingerprint = "fp" + response.object = "chat.completion" + response.created = int(time.time()) + return response + + def _create_response_stream( + self, model_id: str, message_id: str, chunk: dict + ) -> ChatStreamResponse | None: + """Parsing the OCI GenAI stream response chunk. + + Ref: https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + """ + if DEBUG: + logger.info("OCI GenAI response chunk: " + str(chunk)) + finish_reason = None + message = None + usage = None + text = None + openai_tool_calls = None + if "finishReason" not in chunk: + if model_id.startswith("cohere"): + if "tooCalls" not in chunk: + text = chunk["text"] + message = ChatResponseMessage( + role="assistant", + content=text, + tool_calls=openai_tool_calls + ) + elif "toolCalls" in chunk: + pass + # openai_tool_calls = Convertor.convert_tool_calls_cohere_to_openai(chunk["toolCalls"]) + # message = ChatResponseMessage( + # tool_calls=openai_tool_calls + # ) + elif model_id.startswith("meta"): + text = chunk["message"]["content"][0]["text"] + message = ChatResponseMessage( + role="assistant", + content=text, + tool_calls=openai_tool_calls + ) + elif "finishReason" in chunk: + message = ChatResponseMessage(role="assistant") + finish_reason = chunk["finishReason"] + if "toolCalls" in chunk: + openai_tool_calls = Convertor.convert_tool_calls_cohere_to_openai(chunk["toolCalls"]) + message.tool_calls = openai_tool_calls + message.content = "" + + # if "contentBlockStart" in chunk: + # # tool call start + # delta = chunk["contentBlockStart"]["start"] + # if "toolUse" in delta: + # # first index is content + # index = chunk["contentBlockStart"]["contentBlockIndex"] - 1 + # message = ChatResponseMessage( + # tool_calls=[ + # ToolCall( + # index=index, + # type="function", + # id=delta["toolUse"]["toolUseId"], + # function=ResponseFunction( + # name=delta["toolUse"]["name"], + # arguments="", + # ), + # ) + # ] + # ) + + if "metadata" in chunk: + # usage information in metadata. + metadata = chunk["metadata"] + if "usage" in metadata: + # token usage + return ChatStreamResponse( + id=message_id, + model=model_id, + choices=[], + usage=Usage( + prompt_tokens=metadata["usage"]["inputTokens"], + completion_tokens=metadata["usage"]["outputTokens"], + total_tokens=metadata["usage"]["totalTokens"], + ), + ) + if message: + return ChatStreamResponse( + id=message_id, + model=model_id, + choices=[ + ChoiceDelta( + index=0, + delta=message, + logprobs=None, + finish_reason=self._convert_finish_reason(finish_reason), + ) + ], + usage=usage, + ) + + return None + + def _parse_content_parts( + self, + message: UserMessage, + model_id: str, + ) -> list[dict]: + if isinstance(message.content, str): + return [ + { + "type": "TEXT", + "text": message.content, + } + ] + content_parts = [] + for part in message.content: + if isinstance(part, TextContent): + content_parts.append( + { + "type": "TEXT", + "text": part.text, + } + ) + elif isinstance(part, ImageContent): + if not self._is_multimodal_supported(model_id): + raise HTTPException( + status_code=400, + detail=f"Multimodal message is currently not supported by {model_id}", + ) + # image_data, content_type = self._parse_image(part.image_url.url) + content_parts.append( + { + "type": "IMAGE", + "imageUrl": {"url": f"{part.image_url.url}"}, + } + ) + else: + # Ignore.. + continue + return content_parts + + def _is_tool_call_supported(self, model_id: str, stream: bool = False) -> bool: + feature = self._supported_models.get(model_id) + if not feature: + return False + return feature["stream_tool_call"] if stream else feature["tool_call"] + + def _is_multimodal_supported(self, model_id: str) -> bool: + feature = self._supported_models.get(model_id) + if not feature: + return False + return feature["multimodal"] + + def _is_system_prompt_supported(self, model_id: str) -> bool: + feature = self._supported_models.get(model_id) + if not feature: + return False + return feature["system"] + + # def _convert_tool_spec(self, func: Function) -> dict: + + # return { + # "name": func.name, + # "description": func.description, + # "parameter_definitions": { + # "type": + # "description": + # "is_required": + # "json": func.parameters, + # } + # } + + def _convert_finish_reason(self, finish_reason: str | None) -> str | None: + """ + Below is a list of finish reason according to OpenAI doc: + + - stop: if the model hit a natural stop point or a provided stop sequence, + - length: if the maximum number of tokens specified in the request was reached, + - content_filter: if content was omitted due to a flag from our content filters, + - tool_calls: if the model called a tool + """ + if finish_reason: + finish_reason_mapping = { + "tool_use": "tool_calls", + "COMPLETE": "stop", + "ERROR_TOXIC": "content_filter", + "ERROR_LIMIT": "stop", + "ERROR": "stop", + "USER_CANCEL": "stop", + "MAX_TOKENS": "length", + } + return finish_reason_mapping.get(finish_reason.lower(), finish_reason.lower()) + return None + + +class OCIGenAIEmbeddingsModel(BaseEmbeddingsModel, ABC): + accept = "application/json" + content_type = "application/json" + + def _invoke_model(self, args: dict, model_id: str): + # body = json.dumps(args) + compartment_id = SUPPORTED_OCIGENAI_EMBEDDING_MODELS[model_id]["compartment_id"] + region = SUPPORTED_OCIGENAI_EMBEDDING_MODELS[model_id]["region"] + generative_ai_inference_client.base_client._endpoint = INFERENCE_ENDPOINT_TEMPLATE.replace("{region}", region) + body = { + "inputs": args["texts"], + "servingMode": {"servingType": "ON_DEMAND", "modelId": model_id}, + "truncate": args["truncate"], + "compartmentId": compartment_id + } + if DEBUG: + logger.info("Invoke OCI GenAI Model: " + model_id) + logger.info("OCI GenAI request body: " + json.dumps(body)) + try: + + embed_text_response = generative_ai_inference_client.embed_text(body) + return embed_text_response + + except Exception as e: + logger.error("Validation Error: " + str(e)) + raise HTTPException(status_code=400, detail=str(e)) + + def _create_response( + self, + embeddings: list[float], + model: str, + input_tokens: int = 0, + output_tokens: int = 0, + encoding_format: Literal["float", "base64"] = "float", + ) -> EmbeddingsResponse: + data = [] + for i, embedding in enumerate(embeddings): + if encoding_format == "base64": + arr = np.array(embedding, dtype=np.float32) + arr_bytes = arr.tobytes() + encoded_embedding = base64.b64encode(arr_bytes) + data.append(Embedding(index=i, embedding=encoded_embedding)) + else: + data.append(Embedding(index=i, embedding=embedding)) + + response = EmbeddingsResponse( + data=data, + model=model, + usage=EmbeddingsUsage( + prompt_tokens=input_tokens, + total_tokens=input_tokens + output_tokens, + ), + ) + if DEBUG: + logger.info("Proxy response :" + response.model_dump_json()[:100]) + return response + + +class CohereEmbeddingsModel(OCIGenAIEmbeddingsModel): + + def _parse_args(self, embeddings_request: EmbeddingsRequest) -> dict: + texts = [] + if isinstance(embeddings_request.input, str): + texts = [embeddings_request.input] + elif isinstance(embeddings_request.input, list): + texts = embeddings_request.input + elif isinstance(embeddings_request.input, Iterable): + # For encoded input + # The workaround is to use tiktoken to decode to get the original text. + encodings = [] + for inner in embeddings_request.input: + if isinstance(inner, int): + # Iterable[int] + encodings.append(inner) + else: + # Iterable[Iterable[int]] + text = ENCODER.decode(list(inner)) + texts.append(text) + if encodings: + texts.append(ENCODER.decode(encodings)) + + # Maximum of 2048 characters + args = { + "texts": texts, + "input_type": "search_document", + "truncate": EMBED_TRUNCATE, # "NONE|START|END" + } + return args + + def embed(self, embeddings_request: EmbeddingsRequest) -> EmbeddingsResponse: + response = self._invoke_model( + args=self._parse_args(embeddings_request), model_id=embeddings_request.model + ) + response_body = response.data + if DEBUG: + logger.info("OCI GenAI response body: " + str(response_body)[:50]) + + return self._create_response( + embeddings=response_body.embeddings, + model=response_body.model_id, + encoding_format=embeddings_request.encoding_format, + ) + + +def get_embeddings_model(model_id: str) -> OCIGenAIEmbeddingsModel: + model_name = SUPPORTED_OCIGENAI_EMBEDDING_MODELS.get(model_id, "") + if model_name: + if DEBUG: + logger.info("model name is " + model_name["name"]) + return CohereEmbeddingsModel() + else: + logger.error("Unsupported model id " + model_id) + raise HTTPException( + status_code=400, + detail="Unsupported embedding model id " + model_id, + ) diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/ociodsc.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/ociodsc.py new file mode 100644 index 000000000..ff8d9ac17 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/ociodsc.py @@ -0,0 +1,493 @@ +import base64 +import json +import logging +import re +import time +from abc import ABC +from typing import AsyncIterable, Iterable, Literal + +import oci +from api.setting import DEBUG +from api.setting import CLIENT_KWARGS, SUPPORTED_OCIODSC_CHAT_MODELS + +import requests, json +import numpy as np +from api.models.odsc_client import DataScienceAiInferenceClient +from fastapi import HTTPException + +from api.models.base import BaseChatModel +from api.schema import ( + # Chat + ChatResponse, + ChatRequest, + Choice, + ChatResponseMessage, + Usage, + ChatStreamResponse, + ImageContent, + TextContent, + ToolCall, + ChoiceDelta, + UserMessage, + AssistantMessage, + ToolMessage, + Function, + ResponseFunction +) + +from api.setting import DEBUG + +logging.basicConfig(format='%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + datefmt='%Y-%m-%d:%H:%M:%S', + level=logging.DEBUG) +logger = logging.getLogger(__name__) + +odsc_client = DataScienceAiInferenceClient( + **CLIENT_KWARGS + ) + + +class OCIOdscModel(BaseChatModel): + # https://docs.oracle.com/en-us/iaas/data-science/using/ai-quick-actions-model-deploy.htm + _supported_models = {} + + for model in SUPPORTED_OCIODSC_CHAT_MODELS: + _supported_models[model] = { + "system": True, + "multimodal": False, + "tool_call": False, + "stream_tool_call": False, + } + + def list_models(self) -> list[str]: + return list(self._supported_models.keys()) + + def validate(self, chat_request: ChatRequest): + """Perform basic validation on requests""" + error = "" + # check if model is supported + if chat_request.model not in self._supported_models.keys(): + error = f"Unsupported model {chat_request.model}, please use models API to get a list of supported models" + + # check if tool call is supported + elif chat_request.tools and not self._is_tool_call_supported(chat_request.model, stream=chat_request.stream): + tool_call_info = "Tool call with streaming" if chat_request.stream else "Tool call" + error = f"{tool_call_info} is currently not supported by {chat_request.model}" + + if error: + raise HTTPException( + status_code=400, + detail=error, + ) + + def _invoke_genai(self, chat_request: ChatRequest, stream=False): + """Common logic for invoke OCI GenAI models""" + if DEBUG: + logger.info("Raw request: " + chat_request.model_dump_json()) + + # convert OpenAI chat request to OCI Generative AI SDK request + args = self._parse_request(chat_request) + if DEBUG: + logger.info("OCI Data Science AI Quick Actions request: " + json.dumps(args)) + try: + endpoint = SUPPORTED_OCIODSC_CHAT_MODELS[chat_request.model]["endpoint"] + response = odsc_client.chat(endpoint, chat_details=args) + except Exception as e: + logger.error(e) + raise HTTPException(status_code=500, detail=str(e)) + return response + + def chat(self, chat_request: ChatRequest) -> ChatResponse: + """Default implementation for Chat API.""" + + # message_id = self.generate_message_id() + try: + response = self._invoke_genai(chat_request) + + texts = [{"text": response["choices"][0]["message"]["content"].strip()}] + + chat_response = self._create_response( + model=chat_request.model, + message_id=response["id"], + content=texts, + finish_reason=response["choices"][0]["finish_reason"], + input_tokens=response["usage"]["prompt_tokens"], + output_tokens=response["usage"]["completion_tokens"], + ) + except Exception as e: + logger.error("Error in _invoke_genai: " + str(response)) + logger.error(e) + raise HTTPException(status_code=500, detail=str(e) + str(response)) + if DEBUG: + logger.info("Proxy response :" + chat_response.model_dump_json()) + return chat_response + + def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]: + """Default implementation for Chat Stream API""" + response = self._invoke_genai(chat_request, stream=True) + + events = response.events() + for stream in events: + try: + chunk = json.loads(stream.data) + except: + break + stream_response = self._create_response_stream( + model_id=chat_request.model, + message_id=chunk["id"], + chunk=chunk + ) + if DEBUG: + logger.info(stream_response) + if not stream_response: + continue + if DEBUG: + logger.info("Proxy response :" + stream_response.model_dump_json()) + if stream_response.choices: + yield self.stream_response_to_bytes(stream_response) + elif ( + chat_request.stream_options + and chat_request.stream_options.include_usage + ): + # An empty choices for Usage as per OpenAI doc below: + # if you set stream_options: {"include_usage": true}. + # an additional chunk will be streamed before the data: [DONE] message. + # The usage field on this chunk shows the token usage statistics for the entire request, + # and the choices field will always be an empty array. + # All other chunks will also include a usage field, but with a null value. + yield self.stream_response_to_bytes(stream_response) + + # return an [DONE] message at the end. + yield self.stream_response_to_bytes() + + def _parse_system_prompts(self, chat_request: ChatRequest) -> list[dict[str, str]]: + """Create system prompts. + Note that not all models support system prompts. + + example output: [{"text" : system_prompt}] + + See example: + https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + """ + + system_prompts = [] + for message in chat_request.messages: + if message.role != "system": + # ignore system messages here + continue + assert isinstance(message.content, str) + system_prompts.append(message.content) + + return system_prompts + + def _parse_messages(self, chat_request: ChatRequest) -> list[dict]: + """ + Converse API only support user and assistant messages. + + example output: [ + {"role": "user", "content": "What is your favourite condiment?"}, + {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. }, + {"role": "user", "content": "Do you have mayonnaise recipes?"} + ] + + See example: + https://github.com/oracle-samples/oci-data-science-ai-samples/blob/b1e319a935a0b85ccc2f6f1065e63915581c9442/ai-quick-actions/multimodal-models-tips.md + """ + messages = [] + for message in chat_request.messages: + if isinstance(message, UserMessage): + messages.append( + { + "role": message.role, + "content": self._parse_content_parts( + message, chat_request.model + ), + } + ) + elif isinstance(message, AssistantMessage): + if message.content: + # Text message + messages.append( + {"role": message.role, "content": message.content} + ) + # else: + # Tool use message + # tool_input = json.loads(message.tool_calls[0].function.arguments) + # messages.append( + # { + # "role": message.role, + # "content": [ + # { + # "toolUse": { + # "toolUseId": message.tool_calls[0].id, + # "name": message.tool_calls[0].function.name, + # "input": tool_input + # } + # } + # ], + # } + # ) + # elif isinstance(message, ToolMessage): + + # # Add toolResult to content + # # https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + # messages.append( + # { + # "role": "user", + # "content": [ + # { + # "toolResult": { + # "toolUseId": message.tool_call_id, + # "content": [{"text": message.content}], + # } + # } + # ], + # } + # ) + + else: + # ignore others, such as system messages + continue + return messages + + def _parse_request(self, chat_request: ChatRequest) -> dict: + """Create default converse request body. + + Also perform validations to tool call etc. + + Ref: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/b1e319a935a0b85ccc2f6f1065e63915581c9442/ai-quick-actions + """ + + messages = self._parse_messages(chat_request) + + # Base inference parameters. + chat_detail = { + "model": "odsc-llm", + "messages": messages, + "max_tokens": chat_request.max_tokens, + "stream": chat_request.stream, + "frequency_penalty": chat_request.frequency_penalty, + "presence_penalty": chat_request.presence_penalty, + "temperature": chat_request.temperature, + "top_p": chat_request.top_p, + # "topK": chat_request.top_k + } + + return chat_detail + + def _create_response( + self, + model: str, + message_id: str, + content: list[dict] = None, + finish_reason: str | None = None, + input_tokens: int = 0, + output_tokens: int = 0, + ) -> ChatResponse: + + message = ChatResponseMessage( + role="assistant", + ) + if finish_reason == "tool_use": + # https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/datatypes/CohereChatRequest + tool_calls = [] + for part in content: + if "toolUse" in part: + tool = part["toolUse"] + tool_calls.append( + ToolCall( + id=tool["toolUseId"], + type="function", + function=ResponseFunction( + name=tool["name"], + arguments=json.dumps(tool["input"]), + ), + ) + ) + message.tool_calls = tool_calls + message.content = None + else: + message.content = content[0]["text"] + + response = ChatResponse( + id=message_id, + model=model, + choices=[ + Choice( + index=0, + message=message, + finish_reason=self._convert_finish_reason(finish_reason), + logprobs=None, + ) + ], + usage=Usage( + prompt_tokens=input_tokens, + completion_tokens=output_tokens, + total_tokens=input_tokens + output_tokens, + ), + ) + response.system_fingerprint = "fp" + response.object = "chat.completion" + response.created = int(time.time()) + return response + + def _create_response_stream( + self, model_id: str, message_id: str, chunk: dict + ) -> ChatStreamResponse | None: + """Parsing the OCI GenAI stream response chunk. + + Ref: https://docs.oracle.com/en-us/iaas/api/#/EN/generative-ai-inference/20231130/ChatResult/Chat + """ + # if DEBUG: + # logger.info("OCI GenAI response chunk: " + str(chunk)) + + finish_reason = None + message = None + usage = None + if "choices" in chunk: + finish_reason = chunk["choices"][0]["finish_reason"] + text = chunk["choices"][0]["delta"]["content"] + message = ChatResponseMessage( + role="assistant", + content=text, + ) + + # logger.info("消息:"+str(message)) + if "contentBlockStart" in chunk: + # tool call start + delta = chunk["contentBlockStart"]["start"] + if "toolUse" in delta: + # first index is content + index = chunk["contentBlockStart"]["contentBlockIndex"] - 1 + message = ChatResponseMessage( + tool_calls=[ + ToolCall( + index=index, + type="function", + id=delta["toolUse"]["toolUseId"], + function=ResponseFunction( + name=delta["toolUse"]["name"], + arguments="", + ), + ) + ] + ) + + if "metadata" in chunk: + # usage information in metadata. + metadata = chunk["metadata"] + if "usage" in metadata: + # token usage + return ChatStreamResponse( + id=message_id, + model=model_id, + choices=[], + usage=Usage( + prompt_tokens=metadata["usage"]["inputTokens"], + completion_tokens=metadata["usage"]["outputTokens"], + total_tokens=metadata["usage"]["totalTokens"], + ), + ) + + if message: + return ChatStreamResponse( + id=message_id, + model=model_id, + choices=[ + ChoiceDelta( + index=0, + delta=message, + logprobs=None, + finish_reason=self._convert_finish_reason(finish_reason), + ) + ], + usage=usage, + ) + + return None + + def _parse_content_parts( + self, + message: UserMessage, + model_id: str, + ) -> list[dict]: + if isinstance(message.content, str): + return message.content + content_parts = [] + for part in message.content: + if isinstance(part, TextContent): + content_parts.append( + { + "text": part.text, + } + ) + elif isinstance(part, ImageContent): + if not self._is_multimodal_supported(model_id): + raise HTTPException( + status_code=400, + detail=f"Multimodal message is currently not supported by {model_id}", + ) + image_data, content_type = self._parse_image(part.image_url.url) + content_parts.append( + { + "image": { + "format": content_type[6:], # image/ + "source": {"bytes": image_data}, + }, + } + ) + else: + # Ignore.. + continue + return content_parts + + def _is_tool_call_supported(self, model_id: str, stream: bool = False) -> bool: + feature = self._supported_models.get(model_id) + if not feature: + return False + return feature["stream_tool_call"] if stream else feature["tool_call"] + + def _is_multimodal_supported(self, model_id: str) -> bool: + feature = self._supported_models.get(model_id) + if not feature: + return False + return feature["multimodal"] + + def _is_system_prompt_supported(self, model_id: str) -> bool: + feature = self._supported_models.get(model_id) + if not feature: + return False + return feature["system"] + + # def _convert_tool_spec(self, func: Function) -> dict: + + # return { + # "name": func.name, + # "description": func.description, + # "parameter_definitions": { + # "type": + # "description": + # "is_required": + # "json": func.parameters, + # } + # } + + def _convert_finish_reason(self, finish_reason: str | None) -> str | None: + """ + Below is a list of finish reason according to OpenAI doc: + + - stop: if the model hit a natural stop point or a provided stop sequence, + - length: if the maximum number of tokens specified in the request was reached, + - content_filter: if content was omitted due to a flag from our content filters, + - tool_calls: if the model called a tool + """ + if finish_reason: + finish_reason_mapping = { + "tool_calls": "tool_calls", + "stop": "stop", + "content_filter": "content_filter", + "stop": "stop", + "length": "length", + } + return finish_reason_mapping.get(finish_reason.lower(), finish_reason.lower()) + return None diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/odsc_client.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/odsc_client.py new file mode 100644 index 000000000..2675a6489 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/models/odsc_client.py @@ -0,0 +1,69 @@ +from oci.config import get_config_value_or_default, validate_config +from oci.signer import Signer +from oci.util import get_signer_from_authentication_type, AUTHENTICATION_TYPE_FIELD_NAME +import requests +import json +import sseclient + + +class DataScienceAiInferenceClient(object): + def __init__(self, config, **kwargs): + validate_config(config, signer=kwargs.get('signer')) + if 'signer' in kwargs: + self.signer = kwargs['signer'] + + elif AUTHENTICATION_TYPE_FIELD_NAME in config: + self.signer = get_signer_from_authentication_type(config) + + else: + self.signer = Signer( + tenancy=config["tenancy"], + user=config["user"], + fingerprint=config["fingerprint"], + private_key_file_location=config.get("key_file"), + pass_phrase=get_config_value_or_default(config, "pass_phrase"), + private_key_content=config.get("key_content") + ) + self.session = requests.Session() + + class ChatDetails(object): + def __init__(self, messages, **kwargs): + self.model = "odsc-llm" + self.messages = messages + self.max_tokens = kwargs.get("max_tokens", 1024) + self.temperature = kwargs.get("temperature", 0) + self.top_p = kwargs.get("top_p", 0.75) + self.stream = kwargs.get("stream", True) + self.frequency_penalty = kwargs.get("frequency_penalty", 0) + self.presence_penalty = kwargs.get("frequency_penalty", 0) + + def chat(self, endpoint, chat_details, **kwargs): + is_stream = chat_details["stream"] + + return self.call_api( + endpoint=endpoint, + is_stream=is_stream, + chat_details=chat_details) + + def call_api(self, endpoint, is_stream, chat_details, **kwargs): + if is_stream: + header_params = {'Content-Type': 'application/json', + 'enable-streaming': 'true', + 'Accept': 'text/event-stream'} + else: + header_params = {'Content-Type': 'application/json', + 'enable-streaming': 'false', + 'Accept': 'application/json'} + + response = self.session.request( + method='POST', + url=endpoint, + json=chat_details, + auth=self.signer, + stream=is_stream, + headers=header_params) + if is_stream: + client = sseclient.SSEClient(response) + return client + else: + return json.loads(response.text) diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/__init__.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/chat.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/chat.py new file mode 100644 index 000000000..546b335a8 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/chat.py @@ -0,0 +1,292 @@ +# app/api/routers/chat.py +from __future__ import annotations + +import os +import time +import json +import yaml +import logging +from typing import Annotated, Any, Dict, List, Optional, Tuple, Union + +import oci +from fastapi import APIRouter, Depends, Body, HTTPException, Request +from fastapi.responses import StreamingResponse, JSONResponse + +from api.auth import api_key_auth +from api.models.ocigenai import OCIGenAIModel +from api.models.ociodsc import OCIOdscModel +from api.schema import ChatRequest, ChatResponse, ChatStreamResponse +from api.setting import ( + SUPPORTED_OCIGENAI_CHAT_MODELS, + SUPPORTED_OCIODSC_CHAT_MODELS, + DEFAULT_MODEL, +) + +router = APIRouter(prefix="/chat", dependencies=[Depends(api_key_auth)]) + +# ---------------- paths & loaders ---------------- +def _root_dir() -> str: + return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +def _load_yaml(path: str) -> Any: + if not os.path.exists(path): + return None + with open(path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + +def _load_agents() -> List[Dict[str, Any]]: + data = _load_yaml(os.path.join(_root_dir(), "agents.yaml")) or {} + agents = data.get("agents") or [] + out: List[Dict[str, Any]] = [] + for a in agents: + entry = { + "id": a.get("id"), + "name": a.get("name") or a.get("id"), + "description": a.get("description"), + "region": a.get("region"), + "endpoint_ocid": a.get("endpoint_ocid"), + "agent_ocid": a.get("agent_ocid"), + "compartment_ocid": a.get("compartment_ocid"), + } + if entry["id"] and entry["region"] and (entry["endpoint_ocid"] or entry["agent_ocid"]): + out.append(entry) + return out + +def _match_agent_by_model(model_name: str) -> Optional[Dict[str, Any]]: + """ + Accepts: + - "agent:" (preferred) + - "" + - raw endpoint OCID or agent OCID + """ + agents = _load_agents() + # Direct OCIDs? + if model_name.startswith("ocid1.genaiagentendpoint.") or model_name.startswith("ocid1.genaiagent."): + for a in agents: + if model_name in (a.get("endpoint_ocid"), a.get("agent_ocid")): + return a + # alias forms + alias = model_name.split("agent:", 1)[1] if model_name.startswith("agent:") else model_name + for a in agents: + if a["id"] == alias: + return a + return None + +# ---------------- OCI clients ---------------- +def _base_cfg(region: str) -> Dict[str, Any]: + if not region: + raise HTTPException(status_code=400, detail="`region` is required with agent fields.") + if os.environ.get("OCI_RESOURCE_PRINCIPAL_VERSION"): + return { + "signer": oci.auth.signers.get_resource_principals_signer(), + "config": {}, + "region": region, + } + cfg = oci.config.from_file( + os.environ.get("OCI_CONFIG_FILE", "~/.oci/config"), + os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT"), + ) + cfg["region"] = region + return {"signer": None, "config": cfg, "region": region} + +def _agent_runtime_client(region: str): + base = _base_cfg(region) + if base["signer"] is not None: + return oci.generative_ai_agent_runtime.GenerativeAiAgentRuntimeClient( + config={}, signer=base["signer"], region=base["region"] + ) + return oci.generative_ai_agent_runtime.GenerativeAiAgentRuntimeClient(base["config"]) + +def _agent_mgmt_client(region: str): + base = _base_cfg(region) + if base["signer"] is not None: + return oci.generative_ai_agent.GenerativeAiAgentClient( + config={}, signer=base["signer"], region=base["region"] + ) + return oci.generative_ai_agent.GenerativeAiAgentClient(base["config"]) + +# ---------------- utilities ---------------- +def _extract_user_text(messages: List[Dict[str, Any]] | List[Any]) -> str: + if not messages: + return "" + # normalize + norm: List[Dict[str, Any]] = [] + for m in messages: + if isinstance(m, dict): + norm.append(m) + else: + try: + norm.append(m.model_dump()) + except Exception: + try: + norm.append(dict(m)) + except Exception: + pass + for m in reversed(norm): + if m.get("role") == "user": + c = m.get("content", "") + if isinstance(c, str): + return c + if isinstance(c, list): + return "".join( + part.get("text", "") for part in c if isinstance(part, dict) and part.get("type") == "text" + ) + return "" + +def _resolve_endpoint_ocid(region: str, endpoint_ocid: Optional[str], agent_ocid: Optional[str], compartment_ocid: Optional[str]) -> str: + if endpoint_ocid: + return endpoint_ocid + if not agent_ocid: + raise HTTPException(status_code=400, detail="Provide endpoint OCID or agent OCID.") + mgmt = _agent_mgmt_client(region) + comp_id = compartment_ocid + if not comp_id: + try: + agent = mgmt.get_agent(agent_id=agent_ocid).data + comp_id = getattr(agent, "compartment_id", None) + except Exception as e: + raise HTTPException(status_code=502, detail=f"Failed to get agent {agent_ocid}: {e}") + if not comp_id: + raise HTTPException(status_code=502, detail=f"Agent {agent_ocid} has no compartment_id.") + try: + endpoints = oci.pagination.list_call_get_all_results( + mgmt.list_agent_endpoints, + compartment_id=comp_id, + agent_id=agent_ocid, + ).data + except Exception as e: + raise HTTPException(status_code=502, detail=f"Failed to list endpoints for agent {agent_ocid}: {e}") + if not endpoints: + raise HTTPException(status_code=404, detail=f"No endpoints found for agent {agent_ocid} in {region}.") + # prefer ACTIVE newest + active = [ep for ep in endpoints if getattr(ep, "lifecycle_state", "").upper() == "ACTIVE"] + chosen = sorted(active or endpoints, key=lambda ep: getattr(ep, "time_created", 0), reverse=True)[0] + eid = getattr(chosen, "id", None) + if not eid: + raise HTTPException(status_code=502, detail="Resolved endpoint missing id.") + return eid + +def _openai_like_response(text: str, model_tag: str) -> Dict[str, Any]: + now = int(time.time()) + return { + "id": f"chatcmpl-{now}", + "object": "chat.completion", + "created": now, + "model": model_tag, + "choices": [{"index": 0, "message": {"role": "assistant", "content": text}, "finish_reason": "stop"}], + } + +def _stream_one_chunk(text: str, model_tag: str): + now = int(time.time()) + first = { + "id": f"chatcmpl-{now}", + "object": "chat.completion.chunk", + "created": now, + "model": model_tag, + "choices": [{"index": 0, "delta": {"role": "assistant", "content": text}, "finish_reason": None}], + } + yield f"data: {json.dumps(first, ensure_ascii=False)}\n\n" + yield "data: [DONE]\n\n" + +# ---------------- route ---------------- +@router.post("/completions", response_model=ChatResponse | ChatStreamResponse, response_model_exclude_unset=True) +async def chat_completions( + request: Request, + chat_request: Annotated[ + ChatRequest, + Body(examples=[{"model": "agent:sales-kb", "messages": [{"role": "user", "content": "Hello!"}]}]), + ], +): + try: + raw = await request.json() + except Exception: + raw = {} + + # ---- A) MODEL == agent from agents.yaml (preferred for n8n) ---- + agent_cfg = _match_agent_by_model(chat_request.model or "") + if agent_cfg: + region = agent_cfg["region"] + endpoint_id = _resolve_endpoint_ocid(region, agent_cfg.get("endpoint_ocid"), agent_cfg.get("agent_ocid"), agent_cfg.get("compartment_ocid")) + runtime = _agent_runtime_client(region) + user_text = _extract_user_text(chat_request.messages) + + # Ensure session (allow client-provided via extra_body.session_id too) + session_id = (raw.get("extra_body") or {}).get("session_id") + if not session_id: + create_session_details = oci.generative_ai_agent_runtime.models.CreateSessionDetails(description="Gateway session") + session_obj = runtime.create_session(create_session_details=create_session_details, agent_endpoint_id=endpoint_id).data + session_id = getattr(session_obj, "id", None) + if not session_id: + raise HTTPException(status_code=502, detail="Failed to acquire sessionId for agent chat.") + + chat_details = oci.generative_ai_agent_runtime.models.ChatDetails( + user_message=user_text, session_id=session_id, should_stream=False + ) + try: + result = runtime.chat(agent_endpoint_id=endpoint_id, chat_details=chat_details).data + text = "" + if getattr(result, "message", None) and getattr(result.message, "content", None): + text = getattr(result.message.content, "text", "") or "" + except oci.exceptions.ServiceError as se: + raise HTTPException(status_code=502, detail=f"Agent chat failed ({se.status}): {getattr(se,'message',str(se))}") + + tag = f"oci:agentendpoint:{endpoint_id}" + if getattr(chat_request, "stream", False): + return StreamingResponse(_stream_one_chunk(text, tag), media_type="text/event-stream", headers={"x-oci-session-id": session_id}) + return JSONResponse(content=_openai_like_response(text, tag), headers={"x-oci-session-id": session_id}) + + # ---- B) EXTRA BODY (agent_ocid/endpoint_ocid) — still supported ---- + extra = (raw.get("extra_body") if isinstance(raw, dict) else None) or {} + endpoint_ocid = raw.get("agent_endpoint_ocid") or extra.get("agent_endpoint_ocid") + agent_ocid = raw.get("agent_ocid") or extra.get("agent_ocid") + region = raw.get("region") or extra.get("region") + compartment_ocid = raw.get("compartment_ocid") or extra.get("compartment_ocid") or os.getenv("OCI_COMPARTMENT_OCID") + + if endpoint_ocid or agent_ocid: + endpoint_id = _resolve_endpoint_ocid(region or "", endpoint_ocid, agent_ocid, compartment_ocid) + runtime = _agent_runtime_client(region or "") + user_text = _extract_user_text(chat_request.messages) + session_id = raw.get("session_id") or extra.get("session_id") + if not session_id: + create_session_details = oci.generative_ai_agent_runtime.models.CreateSessionDetails(description="Gateway session") + session_obj = runtime.create_session(create_session_details=create_session_details, agent_endpoint_id=endpoint_id).data + session_id = getattr(session_obj, "id", None) + if not session_id: + raise HTTPException(status_code=502, detail="Failed to acquire sessionId for agent chat.") + chat_details = oci.generative_ai_agent_runtime.models.ChatDetails(user_message=user_text, session_id=session_id, should_stream=False) + result = runtime.chat(agent_endpoint_id=endpoint_id, chat_details=chat_details).data + text = getattr(getattr(result, "message", None), "content", None) + text = getattr(text, "text", "") if text else "" + tag = f"oci:agentendpoint:{endpoint_id}" + if getattr(chat_request, "stream", False): + return StreamingResponse(_stream_one_chunk(text, tag), media_type="text/event-stream", headers={"x-oci-session-id": session_id}) + return JSONResponse(content=_openai_like_response(text, tag), headers={"x-oci-session-id": session_id}) + + # ---- C) FALLBACK: your existing LLM model routing ---- + model_name = chat_request.model or DEFAULT_MODEL + chat_request.model = model_name + + model_type = None + if model_name in SUPPORTED_OCIGENAI_CHAT_MODELS: + model_type = SUPPORTED_OCIGENAI_CHAT_MODELS[model_name]["type"] + elif model_name in SUPPORTED_OCIODSC_CHAT_MODELS: + model_type = SUPPORTED_OCIODSC_CHAT_MODELS[model_name]["type"] + + if not model_type: + raise HTTPException( + status_code=400, + detail=f"Unknown model '{model_name}'. Use an agent (e.g., 'agent:sales-kb') or a supported model.", + ) + + if model_type == "datascience": + model = OCIOdscModel() + elif model_type in ("ondemand", "dedicated"): + model = OCIGenAIModel() + else: + raise HTTPException(status_code=400, detail=f"Unsupported model type '{model_type}'") + + model.validate(chat_request) + + if chat_request.stream: + return StreamingResponse(content=model.chat_stream(chat_request), media_type="text/event-stream") + return model.chat(chat_request) diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/embeddings.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/embeddings.py new file mode 100644 index 000000000..5d4fc23b1 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/embeddings.py @@ -0,0 +1,36 @@ +from typing import Annotated + +from fastapi import APIRouter, Depends, Body + +from api.auth import api_key_auth +from api.models.ocigenai import get_embeddings_model +from api.schema import EmbeddingsRequest, EmbeddingsResponse +from api.setting import DEFAULT_EMBEDDING_MODEL + +router = APIRouter( + prefix="/embeddings", + dependencies=[Depends(api_key_auth)], +) + + +@router.post("", response_model=EmbeddingsResponse) +async def embeddings( + embeddings_request: Annotated[ + EmbeddingsRequest, + Body( + examples=[ + { + "model": "cohere.embed-multilingual-v3", + "input": [ + "Your text string goes here" + ], + } + ], + ), + ] +): + if embeddings_request.model is None: + embeddings_request.model = DEFAULT_EMBEDDING_MODEL + # Exception will be raised if model not supported. + model = get_embeddings_model(embeddings_request.model) + return model.embed(embeddings_request) diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/model.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/model.py new file mode 100644 index 000000000..da9864965 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/routers/model.py @@ -0,0 +1,117 @@ +# app/api/routers/model.py +from __future__ import annotations + +import os +import yaml +from typing import Any, Dict, List + +from fastapi import APIRouter, Depends +from api.auth import api_key_auth + +router = APIRouter(prefix="/models", dependencies=[Depends(api_key_auth)]) + +# ---- helpers ---- +def _here() -> str: + return os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +def _load_yaml(path: str) -> Any: + if not os.path.exists(path): + return None + with open(path, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + +def _flatten_models(models_yaml: Any) -> List[Dict[str, Any]]: + """ + Flattens your existing models.yaml structure into a list of dicts with keys: + - name/model_id/endpoint, plus inferred _region/_bucket/_compartment_id + """ + if not models_yaml: + return [] + def flatten(entry: Dict[str, Any]) -> List[Dict[str, Any]]: + out: List[Dict[str, Any]] = [] + models = (entry or {}).get("models") or {} + for bucket in ("ondemand", "dedicated", "datascience"): + for m in (models.get(bucket) or []): + m2 = dict(m) + m2["_bucket"] = bucket + m2["_region"] = entry.get("region") + m2["_compartment_id"] = entry.get("compartment_id") + out.append(m2) + return out + + if isinstance(models_yaml, list): + flat: List[Dict[str, Any]] = [] + for ent in models_yaml: + if isinstance(ent, dict): + flat.extend(flatten(ent)) + return flat + if isinstance(models_yaml, dict): + return flatten(models_yaml) + return [] + +def _load_agents() -> List[Dict[str, Any]]: + data = _load_yaml(os.path.join(_here(), "agents.yaml")) or {} + agents = data.get("agents") or [] + # normalize required fields + out: List[Dict[str, Any]] = [] + for a in agents: + entry = { + "id": a.get("id"), + "name": a.get("name") or a.get("id"), + "description": a.get("description"), + "region": a.get("region"), + "endpoint_ocid": a.get("endpoint_ocid"), + "agent_ocid": a.get("agent_ocid"), + "compartment_ocid": a.get("compartment_ocid"), + } + if entry["id"] and entry["region"] and (entry["endpoint_ocid"] or entry["agent_ocid"]): + out.append(entry) + return out + +# ---- route ---- +@router.get("") +def list_models(): + # 1) Your OCI LLM models (unchanged) + models_yaml = _load_yaml(os.path.join(_here(), "models.yaml")) + llms = _flatten_models(models_yaml) + + llm_items = [ + { + "id": m.get("name") or m.get("model_id") or "unknown", + "object": "model", + "owned_by": f"oci:{m.get('_bucket')}", + "permission": [], + "metadata": { + "region": m.get("_region"), + "bucket": m.get("_bucket"), + "model_id": m.get("model_id"), + "endpoint": m.get("endpoint"), + "description": m.get("description"), + }, + } + for m in llms + ] + + # 2) OCI Agents as "virtual models" + agents = _load_agents() + agent_items = [ + { + "id": f"agent:{a['id']}", # what n8n will select in the dropdown + "object": "model", + "owned_by": "oci-agent", + "permission": [], + "metadata": { + "kind": "agent", + "alias": a["id"], + "name": a["name"], + "description": a.get("description"), + "region": a["region"], + "endpoint_ocid": a.get("endpoint_ocid"), + "agent_ocid": a.get("agent_ocid"), + "compartment_ocid": a.get("compartment_ocid"), + }, + } + for a in agents + ] + + return {"object": "list", "data": llm_items + agent_items} diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/schema.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/schema.py new file mode 100644 index 000000000..591681d5c --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/schema.py @@ -0,0 +1,375 @@ +import time +from typing import Literal, Iterable + +from pydantic import BaseModel, Field + +import oci +from oci.generative_ai_inference import models as oci_models +import json +import uuid +import base64 + + +class Model(BaseModel): + id: str + created: int = Field(default_factory=lambda: int(time.time())) + object: str | None = "model" + owned_by: str | None = "ocigenerativeai" + + +class Models(BaseModel): + object: str | None = "list" + data: list[Model] = [] + + +class ResponseFunction(BaseModel): + name: str | None = None + arguments: str + + +class ToolCall(BaseModel): + index: int | None = None + id: str | None = None + type: Literal["function"] = "function" + function: ResponseFunction + + +class TextContent(BaseModel): + type: Literal["text"] = "text" + text: str + + +class ImageUrl(BaseModel): + url: str + detail: str | None = "auto" + + +class ImageContent(BaseModel): + type: Literal["image_url"] = "image" + image_url: ImageUrl + + +class SystemMessage(BaseModel): + name: str | None = None + role: Literal["system"] = "system" + content: str | None = None + + +class UserMessage(BaseModel): + name: str | None = None + role: Literal["user"] = "user" + content: str | list[TextContent | ImageContent] + + +class AssistantMessage(BaseModel): + name: str | None = None + role: Literal["assistant"] = "assistant" + content: str | None = None + tool_calls: list[ToolCall] | None = None + + +class ToolMessage(BaseModel): + role: Literal["tool"] = "tool" + content: str + tool_call_id: str + + +class Function(BaseModel): + name: str + description: str | None = None + parameters: object + + +class Tool(BaseModel): + type: Literal["function"] = "function" + function: Function + + +class StreamOptions(BaseModel): + include_usage: bool = True + + +class ChatRequest(BaseModel): + messages: list[SystemMessage | UserMessage | AssistantMessage | ToolMessage] + model: str + frequency_penalty: float | None = Field(default=None, le=2.0, ge=-2.0) # Not used + presence_penalty: float | None = Field(default=None, le=2.0, ge=-2.0) # Not used + stream: bool | None = False + stream_options: StreamOptions | None = None + temperature: float | None = Field(default=None, le=2.0, ge=0.0) + top_p: float | None = Field(default=None, le=1.0, ge=0.0) + user: str | None = None # Not used + max_tokens: int | None = 2048 + n: int | None = 1 # Not used + tools: list[Tool] | None = None + tool_choice: str | object = "auto" + + +class Usage(BaseModel): + prompt_tokens: int + completion_tokens: int + total_tokens: int + + +class ChatResponseMessage(BaseModel): + # tool_calls + role: Literal["assistant"] | None = None + content: str | None = None + tool_calls: list[ToolCall] | None = None + + +class BaseChoice(BaseModel): + index: int | None = 0 + finish_reason: str | None = None + logprobs: dict | None = None + + +class Choice(BaseChoice): + message: ChatResponseMessage + + +class ChoiceDelta(BaseChoice): + delta: ChatResponseMessage + + +class BaseChatResponse(BaseModel): + # id: str = Field(default_factory=lambda: "chatcmpl-" + str(uuid.uuid4())[:8]) + id: str + created: int = Field(default_factory=lambda: int(time.time())) + model: str + system_fingerprint: str = "fp" + + +class ChatResponse(BaseChatResponse): + choices: list[Choice] + object: Literal["chat.completion"] = "chat.completion" + usage: Usage + + +class ChatStreamResponse(BaseChatResponse): + choices: list[ChoiceDelta] + object: Literal["chat.completion.chunk"] = "chat.completion.chunk" + usage: Usage | None = None + + +class EmbeddingsRequest(BaseModel): + input: str | list[str] | Iterable[int | Iterable[int]] + model: str + encoding_format: Literal["float", "base64"] = "float" + dimensions: int | None = None # not used. + user: str | None = None # not used. + + +class Embedding(BaseModel): + object: Literal["embedding"] = "embedding" + embedding: list[float] | bytes + index: int + + +class EmbeddingsUsage(BaseModel): + prompt_tokens: int + total_tokens: int + + +class EmbeddingsResponse(BaseModel): + object: Literal["list"] = "list" + data: list[Embedding] + model: str + usage: EmbeddingsUsage + + +type_mapping = { + "array": "list", + "boolean": "bool", + "null": "NoneType", + "integer": "int", + "number": "float", + "object": "dict", + "regular expressions": "str", + "string": "str" + } + +class Convertor: + def __init__(self): + pass + + @staticmethod + def convert_tools_openai_to_cohere(openai_tools: list) -> list[oci_models.CohereTool]: + """ + Convert a list of OpenAI tool definitions into OCI Cohere tool objects. + """ + cohere_tools = [] + + for tool in openai_tools: + # if tool.get("type") == "function": + func = tool.function + name = func.name.replace("-","_") + description = func.description + parameters_schema = func.parameters + + properties = parameters_schema.get("properties", {}) + required = parameters_schema.get("required", []) + + # Iterate through each property to build parameter definitions + parameter_definitions = {} + for param_name, param_schema in properties.items(): + is_required = param_name in required + # Map the OpenAI JSON schema type to the Python type using type_mapping + openai_type = param_schema.get("type", "string") + mapped_type = type_mapping.get(openai_type, "str") + param_description = param_schema.get("description", "") + parameter_definitions[param_name] = oci_models.CohereParameterDefinition( + is_required = is_required, + type = mapped_type, + description = param_description + ) + + cohere_tool = oci_models.CohereTool( + name = name, + description = description, + parameter_definitions = parameter_definitions + ) + cohere_tools.append(cohere_tool) + + return cohere_tools + + @staticmethod + def convert_tools_openai_to_llama(openai_tools: list) -> list[oci_models.FunctionDefinition]: + """ + Convert a list of OpenAI tool definitions into OCI Llama tool objects. + """ + llama_tools = [] + for tool in openai_tools: + llama_tool = oci_models.FunctionDefinition( + type = "FUNCTION", + name = tool.function.name, + description = tool.function.description, + parameters = tool.function.parameters + ) + llama_tools.append(llama_tool) + return llama_tools + + @staticmethod + def convert_tool_calls_cohere_to_openai(cohere_tool_calls) -> list[ToolCall]: + """ + Convert a list of Cohere tool calls into a list of OpenAI tool calls. + Returns: list: List of OpenAI tool call dictionaries. + """ + openai_tool_calls = [] + for call in cohere_tool_calls: + + # MODYFIKACJA + try: + name = call["name"].replace("_", "-") + arguments = json.dumps(call["parameters"]) + except (TypeError, KeyError): + name = call.name.replace("_", "-") + arguments = json.dumps(call.parameters) + + function = ResponseFunction( + name=name, + arguments=arguments + ) + # MODYFIKACJA + + # Generate a unique id for the OpenAI tool call + tool_id = base64.b64encode(json.dumps(function.model_dump()).encode()) + + openai_call = ToolCall( + index = len(openai_tool_calls), + id = tool_id, + type = "function", + function = function + ) + openai_tool_calls.append(openai_call) + return openai_tool_calls + + @staticmethod + def convert_tool_calls_llama_to_openai(llama_tool_calls) -> list[ToolCall]: + """ + Convert a list of Llama tool calls into a list of OpenAI tool calls. + Returns: list: List of OpenAI tool call dictionaries. + """ + openai_tool_calls = [] + for call in llama_tool_calls: + openai_call = ToolCall( + index = len(openai_tool_calls), + id = call.id, + type = "function", + function = ResponseFunction( + name = call.name, + arguments = call.arguments + ) + ) + openai_tool_calls.append(openai_call) + return openai_tool_calls + + @staticmethod + def convert_tool_calls_openai_to_cohere(openai_tool_calls) -> list[oci_models.CohereToolCall]: + """ + Convert a list of OpenAI tool calls into a list of Cohere tool calls. + Returns: list: List of Cohere tool call dictionaries. + """ + cohere_tool_calls = [] + for call in openai_tool_calls: + function_data = call.function + # Parse the JSON string from the "arguments" field + try: + parameters = json.loads(function_data.arguments) + except json.JSONDecodeError: + parameters = {} + cohere_tool_call = oci_models.CohereToolCall( + name = function_data.name, + parameters = parameters + ) + cohere_tool_calls.append(cohere_tool_call) + return cohere_tool_calls + + @staticmethod + def convert_tool_calls_openai_to_llama(openai_tool_calls) -> list[oci_models.FunctionCall]: + """ + Convert a list of OpenAI tool calls into a list of Llama tool calls. + """ + llama_tool_calls = [] + for call in openai_tool_calls: + function_data = call.function + llama_tool_call = oci_models.FunctionCall( + id = call.id, + type = "FUNCTION", + name = function_data.name, + arguments = function_data.arguments + ) + llama_tool_calls.append(llama_tool_call) + return llama_tool_calls + + @staticmethod + def convert_tool_result_openai_to_llama(openai_tool_message) -> list[oci_models.CohereToolCall]: + """ + Convert an OpenAI tool message into an OCI ToolMessage object. + """ + llama_message = oci_models.ToolMessage( + role = "TOOL", + tool_call_id = openai_tool_message.get("tool_call_id",""), + content = [oci_models.TextContent( + type = "TEXT", + text = openai_tool_message.get("content",""))] + ) + return llama_message + + @staticmethod + def convert_tool_result_openai_to_cohere(openai_tool_message: dict) -> oci_models.CohereToolResult: + """ + Convert an OpenAI tool result message into an OCI CohereToolResult object. + """ + tool_id = openai_tool_message.get("tool_call_id", "{}") + func = json.loads(base64.b64decode(tool_id)) + print(func) + cohere_tool_call = oci_models.CohereToolCall( + name = func["name"], + parameters = json.loads(func["arguments"]), + ) + cohere_tool_result = oci_models.CohereToolResult( + call = cohere_tool_call, + outputs = [{"result": openai_tool_message.get("content", "")}] + ) + return cohere_tool_result diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/setting.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/setting.py new file mode 100644 index 000000000..0dbe63081 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/api/setting.py @@ -0,0 +1,70 @@ +import os, sys, yaml +import oci + +current_dir = os.path.dirname(os.path.realpath(__file__)) +parent_dir = os.path.dirname(current_dir) +sys.path.append(parent_dir) +from config import * + +CLIENT_KWARGS = { + "retry_strategy": oci.retry.DEFAULT_RETRY_STRATEGY, + "timeout": (10, 240), # default timeout config for OCI Gen AI service +} + +if AUTH_TYPE == "API_KEY": + OCI_CONFIG = oci.config.from_file(OCI_CONFIG_FILE, OCI_CONFIG_FILE_KEY) + signer = oci.signer.Signer( + tenancy=OCI_CONFIG['tenancy'], + user=OCI_CONFIG['user'], + fingerprint=OCI_CONFIG['fingerprint'], + private_key_file_location=OCI_CONFIG['key_file'], + pass_phrase=OCI_CONFIG['pass_phrase'] + ) + CLIENT_KWARGS.update({'config': OCI_CONFIG}) + CLIENT_KWARGS.update({'signer': signer}) +elif AUTH_TYPE == 'INSTANCE_PRINCIPAL': + OCI_CONFIG = {} + signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner() + CLIENT_KWARGS.update({'config': OCI_CONFIG}) + CLIENT_KWARGS.update({'signer': signer}) + + +def parse_model_settings(yaml_file): + with open(yaml_file, 'r') as stream: + models = yaml.safe_load(stream) + model_settings = [] + for i in models: + for k, v in i['models'].items(): + for m in v: + m["region"] = i['region'] + m["compartment_id"] = i['compartment_id'] + m["type"] = k + model_settings.append(m) + return model_settings + + +MODEL_SETTINGS = parse_model_settings(os.path.join(parent_dir, 'models.yaml')) + +SUPPORTED_OCIGENAI_EMBEDDING_MODELS = {} +SUPPORTED_OCIGENAI_CHAT_MODELS = {} +DEFAULT_EMBEDDING_MODEL, DEFAULT_MODEL = None, None +SUPPORTED_OCIODSC_CHAT_MODELS = {} + +for m in MODEL_SETTINGS: + if m['type'] == "embedding": + if DEFAULT_EMBEDDING_MODEL is None: + DEFAULT_EMBEDDING_MODEL = m["name"] + SUPPORTED_OCIGENAI_EMBEDDING_MODELS[m['name']] = m + elif m['type'] == "ondemand": + if DEFAULT_MODEL is None: + DEFAULT_MODEL = m["name"] + if m["model_id"].startswith("cohere"): + m["provider"] = "cohere" + elif m["model_id"].startswith("meta"): + m["provider"] = "meta" + SUPPORTED_OCIGENAI_CHAT_MODELS[m['name']] = m + elif m['type'] == "dedicated": + m["provider"] = "dedicated" + SUPPORTED_OCIGENAI_CHAT_MODELS[m['name']] = m + elif m['type'] == "datascience": + SUPPORTED_OCIODSC_CHAT_MODELS[m['name']] = m diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/app.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/app.py new file mode 100644 index 000000000..b09c347a5 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/app.py @@ -0,0 +1,166 @@ +# app.py +import os +import sys +import logging +from typing import Any, Dict, List + +import uvicorn +from fastapi import FastAPI, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import PlainTextResponse, JSONResponse + +API_ROUTE_PREFIX = "/v1" +TITLE = "OCI OpenAI-Compatible Gateway" +DESCRIPTION = "FastAPI service that proxies OCI models/agents behind OpenAI-style endpoints" +SUMMARY = "OpenAI-style API over OCI backends" + +# ---------- App ---------- +app = FastAPI(title=TITLE, description=DESCRIPTION, summary=SUMMARY) + +# CORS (open defaults; tighten in production) +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], allow_credentials=True, + allow_methods=["*"], allow_headers=["*"], +) + +# ---------- Mount routers if available ---------- +have_models_router = False + +try: + # /v1/models (usually backed by models.yaml) + from api.routers import model # type: ignore + app.include_router(model.router, prefix=API_ROUTE_PREFIX) + have_models_router = True +except Exception as e: + logging.getLogger(__name__).warning("Models router not loaded: %s", e) + +try: + # /v1/chat/completions (your handler; now supports agent_ocid if you added that branch) + from api.routers import chat # type: ignore + app.include_router(chat.router, prefix=API_ROUTE_PREFIX) +except Exception as e: + logging.getLogger(__name__).warning("Chat router not loaded: %s", e) + +try: + # /v1/embeddings + from api.routers import embeddings # type: ignore + app.include_router(embeddings.router, prefix=API_ROUTE_PREFIX) +except Exception as e: + logging.getLogger(__name__).warning("Embeddings router not loaded: %s", e) + +try: + # Optional: /v1/oci/agents (list/health/chat via agent endpoint OCIDs) + from api.routers import oci_agents # type: ignore + app.include_router(oci_agents.router, prefix=API_ROUTE_PREFIX) +except Exception as e: + logging.getLogger(__name__).info("OCI agents router not loaded (optional): %s", e) + +# ---------- Fallback /v1/models (if no models router present) ---------- +if not have_models_router: + try: + import yaml # lazy import to avoid dependency if router exists + + def _models_yaml_paths() -> List[str]: + here = os.path.dirname(os.path.abspath(__file__)) + return [ + os.path.join(here, "models.yaml"), + os.path.join(os.path.dirname(here), "models.yaml"), + ] + + def _load_models_flat() -> List[Dict[str, Any]]: + path = None + for p in _models_yaml_paths(): + if os.path.exists(p): + path = p + break + if not path: + return [] + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) or {} + + def flatten(entry: Dict[str, Any]) -> List[Dict[str, Any]]: + out: List[Dict[str, Any]] = [] + models = (entry or {}).get("models") or {} + for bucket in ("ondemand", "dedicated", "datascience"): + for m in (models.get(bucket) or []): + m2 = dict(m) + m2["_bucket"] = bucket + m2["_region"] = entry.get("region") + m2["_compartment_id"] = entry.get("compartment_id") + out.append(m2) + return out + + if isinstance(data, list): + flat: List[Dict[str, Any]] = [] + for ent in data: + if isinstance(ent, dict): + flat.extend(flatten(ent)) + return flat + if isinstance(data, dict): + return flatten(data) + return [] + + @app.get(f"{API_ROUTE_PREFIX}/models") + def list_models_fallback(): + try: + models = _load_models_flat() + return { + "object": "list", + "data": [ + { + "id": m.get("name") or m.get("model_id") or "unknown", + "object": "model", + "owned_by": "oci-local", + "permission": [], + "metadata": { + "region": m.get("_region"), + "bucket": m.get("_bucket"), + "model_id": m.get("model_id"), + "endpoint": m.get("endpoint"), + "description": m.get("description"), + }, + } + for m in models + ], + } + except Exception as e: + logging.exception("Failed to read models.yaml: %s", e) + return {"object": "list", "data": []} + except Exception as e: + logging.getLogger(__name__).warning("Fallback /v1/models not available: %s", e) + +# ---------- Health & basic routes ---------- +@app.get("/") +def root_ok(): + return {"ok": True, "service": TITLE} + +@app.get("/health") +def health_ok(): + return {"status": "ok"} + +# ---------- Error handler ---------- +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception): + logging.exception("Unhandled error: %s", exc) + return PlainTextResponse(str(exc), status_code=500) + +# ---------- Startup banner ---------- +@app.on_event("startup") +def show_routes(): + try: + print("✅ Available routes:") + for r in app.routes: + print(f" {r.path:30s} → {r.name}") + except Exception: + pass + +# ---------- Entrypoint (exact launcher you requested) ---------- +PORT = int(os.getenv("PORT", "8088")) +RELOAD = os.getenv("RELOAD", "false").lower() in ("1", "true", "yes") + +if __name__ == "__main__": + # Make sure the string target "app:app" resolves to THIS module when run as a script. + # This avoids the common folder/file name collision on Windows. + sys.modules.setdefault("app", sys.modules[__name__]) + uvicorn.run("app:app", host="0.0.0.0", port=PORT, reload=RELOAD) diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/config.py b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/config.py new file mode 100644 index 000000000..e05599c98 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/config.py @@ -0,0 +1,27 @@ +PORT = 8088 +RELOAD = True +DEBUG = True +DEFAULT_API_KEYS = "ocigenerativeai" +API_ROUTE_PREFIX = "/api/v1" + +EMBED_TRUNCATE = "END" +# One of NONE|START|END to specify how the API will handle inputs longer than the maximum token length. +# START: will discard the start of the input until the remaining input is exactly the maximum input token length for the model. +# END: will discard the end of the input until the remaining input is exactly the maximum input token length for the model. +# NONE: when the input exceeds the maximum input token length an error will be returned + + +# AUTH_TYPE can be "API_KEY" or "INSTANCE_PRINCIPAL" +AUTH_TYPE = "API_KEY" +OCI_CONFIG_FILE = "~/.oci/config" +OCI_CONFIG_FILE_KEY = "DEFAULT" +INFERENCE_ENDPOINT_TEMPLATE = "https://inference.generativeai.eu-frankfurt-1.oci.oraclecloud.com/20231130" + +TITLE = "OCI Generative AI Proxy APIs" +SUMMARY = "OpenAI-Compatible RESTful APIs for OCI Generative AI Service" +VERSION = "0.1.0" +DESCRIPTION = """ +Use OpenAI-Compatible RESTful APIs for OCI Generative AI Service models and OCI Data Science AI quick actions models. + +Please edit "models.yaml" to specify your models and their call endpoints. +""" diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/connection_test.ipynb b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/connection_test.ipynb new file mode 100644 index 000000000..fa1c78c13 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/connection_test.ipynb @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: openai in c:\\python313\\lib\\site-packages (1.84.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in c:\\python313\\lib\\site-packages (from openai) (4.8.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in c:\\python313\\lib\\site-packages (from openai) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in c:\\python313\\lib\\site-packages (from openai) (0.28.1)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in c:\\python313\\lib\\site-packages (from openai) (0.8.2)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in c:\\python313\\lib\\site-packages (from openai) (2.10.6)\n", + "Requirement already satisfied: sniffio in c:\\python313\\lib\\site-packages (from openai) (1.3.1)\n", + "Requirement already satisfied: tqdm>4 in c:\\python313\\lib\\site-packages (from openai) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.11 in c:\\python313\\lib\\site-packages (from openai) (4.12.2)\n", + "Requirement already satisfied: idna>=2.8 in c:\\python313\\lib\\site-packages (from anyio<5,>=3.5.0->openai) (3.10)\n", + "Requirement already satisfied: certifi in c:\\python313\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (2025.1.31)\n", + "Requirement already satisfied: httpcore==1.* in c:\\python313\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in c:\\python313\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in c:\\python313\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in c:\\python313\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (2.27.2)\n", + "Requirement already satisfied: colorama in c:\\python313\\lib\\site-packages (from tqdm>4->openai) (0.4.6)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install openai" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pong\n" + ] + } + ], + "source": [ + "from openai import OpenAI\n", + "\n", + "client = OpenAI(api_key=\"ocigenerativeai\", base_url=\"http://localhost:8088/v1/\")\n", + "\n", + "r1 = client.chat.completions.create(\n", + " model=\"ignored\",\n", + " messages=[{\"role\": \"user\", \"content\": \"Reply with 'pong'.\"}],\n", + " extra_body={\n", + " \"agent_endpoint_ocid\": \"ocid1.genaiagentendpoint.oc1.eu-frankfurt-1.\", #your genai agent **endpoint** OCID\n", + " \"region\": \"eu-frankfurt-1\",\n", + " },\n", + ")\n", + "print(r1.choices[0].message.content)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/kill.sh b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/kill.sh new file mode 100644 index 000000000..931f2be6b --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/kill.sh @@ -0,0 +1 @@ +kill -9 $(lsof -i :8088 | awk 'NR>1 {print $2}') \ No newline at end of file diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/log.txt b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/log.txt new file mode 100644 index 000000000..e69de29bb diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/models.yaml b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/models.yaml new file mode 100644 index 000000000..248bbe768 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/models.yaml @@ -0,0 +1,88 @@ +- region: eu-frankfurt-1 + compartment_id: ocid1.compartment.oc1.. + models: + ondemand: + - name: cohere.command-r-plus-08-2024 + model_id: cohere.command-r-plus-08-2024 + description: "delivers roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same." + "tool_call": True, + "stream_tool_call": True, + + - name: cohere.command-r-08-2024 + model_id: cohere.command-r-08-2024 + description: "delivers roughly 50% higher throughput and 25% lower latencies as compared to the previous Command R+ version, while keeping the hardware footprint the same." + "tool_call": True, + "stream_tool_call": True, +# - name: cohere.command-r-16k +# model_id: cohere.command-r-16k +# description: "Optimized for conversational interaction and long context tasks. Ideal for text generation, summarization, translation, or text-based classification." + + - name: meta.llama-3.3-70b-instruct + model_id: meta.llama-3.3-70b-instruct + description: "Model has 70 billion parameters.Accepts text-only inputs and produces text-only outputs.Delivers better performance than both Llama 3.1 70B and Llama 3.2 90B for text tasks.Maximum prompt + response length 128,000 tokens for each run.For on-demand inferencing, the response length is capped at 4,000 tokens for each run." + "tool_call": True, + "stream_tool_call": True, +# - name: meta.llama-3.2-11b-vision-instruct +# model_id: meta.llama-3.2-11b-vision-instruct +# description: "Model has 11 billion parameters.Dedicated mode only. (On-demand inferencing not available.) For dedicated inferencing, create a dedicated AI cluster and endpoint and host the model on the cluster.Context length 128,000 tokens.Maximum prompt + response length 128,000 tokens for each run.Multimodal support Input text and images and get a text output.English is the only supported language for the image plus text option.Multilingual option supported for the text only option." + +# - name: meta.llama-3.2-90b-vision-instruct +# model_id: meta.llama-3.2-90b-vision-instruct +# description: "Model has 90 billion parameters.Context length: 128,000 tokens.Maximum prompt + response length: 128,000 tokens for each run.For on-demand inferencing, the response length is capped at 4,000 tokens for each run.Multimodal support: Input text and images and get a text output.English is the only supported language for the image plus text option.Multilingual option supported for the text only option." + + - name: meta.llama-3-70b-instruct + model_id: meta.llama-3-70b-instruct + description: "This 70 billion-parameter generation model is perfect for content creation, conversational AI, and enterprise applications." + "tool_call": True, + "stream_tool_call": True, + + #- name: meta.llama-3.1-405b-instruct + # model_id: meta.llama-3.1-405b-instruct + # description: "This 405 billion-parameter model is a high-performance option that offers speed and scalability." + # "tool_call": True, + # "stream_tool_call": True, + +- region: us-chicago-1 + compartment_id: ocid1.compartment.oc1.. + models: + ondemand: + - name: meta.llama-3.2-90b-vision-instruct + model_id: meta.llama-3.2-90b-vision-instruct + description: "Model has 11 billion parameters.Dedicated mode only. (On-demand inferencing not available.) For dedicated inferencing, create a dedicated AI cluster and endpoint and host the model on the cluster.Context length 128,000 tokens.Maximum prompt + response length 128,000 tokens for each run.Multimodal support Input text and images and get a text output.English is the only supported language for the image plus text option.Multilingual option supported for the text only option." + "tool_call": True, + "stream_tool_call": True, + stream_multimodal: True, + multimodal: True, + +# dedicated: +# - name: my-dedicated-model-name +# endpoint: https://ocid1.generativeaiendpoint.... # endpoint url for dedicated model +# description: "my dedicated model description" + +# datascience: +# - name: my-datascience-model-name +# endpoint: https://modeldeployment.xxxxxx/predict # Model deployment endpoint url +# description: "my dedicated model description" + +#- region: us-ashburn-1 +# compartment_id: ocid1.compartment.oc1..xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx +# models: +# datascience: +# - name: ODSC-Mistral-7B-Instruct +# endpoint: https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/predict +# description: "Data science Model deployment for Mistral 7B Instruct model" +# - name: ODSC-DeepSeek-R1-Distill-Qwen-7B +# endpoint: https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad.xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/predict +# description: "Data science Model deployment for DeepSeek-R1-Distill-Qwen-7B model" + +# Modify this file to specify the call information of the model. +# You can define 3 types of models: +# ondemand: pre-trained model on OCI generative AI +# dedicated: dedicated model on OCI generative AI, including dedicated infrastructure, fine-tuned model, etc. +# datascience: model deployed by OCI data science service + +# Where: +# region: the region where the model is located +# compartment_id: the compartment where the model is located +# name: any specified model name, use this name to point to different models when calling +# model_id: for ondemand, it is the model id, for dedicated and datascience, it is the call endpoint diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/requirements.txt b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/requirements.txt new file mode 100644 index 000000000..b1a385787 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/requirements.txt @@ -0,0 +1,10 @@ +fastapi>=0.111.0 +pydantic>=2.7.1 +uvicorn>=0.29.0 +tiktoken>=0.6.0 +requests>=2.32.3 +numpy>=1.26.4 +oci>=2.129.4 +sseclient-py +pyyaml +gunicorn==23.0.0 diff --git a/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/run.sh b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/run.sh new file mode 100644 index 000000000..8a1b91976 --- /dev/null +++ b/ai/gen-ai-agents/agentsOCI-OpenAI-gateway/run.sh @@ -0,0 +1 @@ +gunicorn app:app --workers 16 --worker-class uvicorn.workers.UvicornWorker --timeout 600 --bind 0.0.0.0:8088 \ No newline at end of file