diff --git a/libs/partners/openai/langchain_openai/chat_models/azure.py b/libs/partners/openai/langchain_openai/chat_models/azure.py index a13ef2b3850664..0d2632b9bf570e 100644 --- a/libs/partners/openai/langchain_openai/chat_models/azure.py +++ b/libs/partners/openai/langchain_openai/chat_models/azure.py @@ -57,48 +57,417 @@ def _is_pydantic_class(obj: Any) -> bool: class AzureChatOpenAI(BaseChatOpenAI): - """`Azure OpenAI` Chat Completion API. + """Azure OpenAI chat model integration. + + Setup: + Head to the https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart?tabs=command-line%2Cpython-new&pivots=programming-language-python + to create your Azure OpenAI deployment. + + Then install ``langchain-openai`` and set environment variables + ``AZURE_OPENAI_API_KEY`` and ``AZURE_OPENAI_ENDPOINT``: + + .. code-block:: bash + + pip install -U langchain-openai + + export AZURE_OPENAI_API_KEY="your-api-key" + export AZURE_OPENAI_ENDPOINT="https://your-endpoint.openai.azure.com/" + + Key init args — completion params: + azure_deployment: str + Name of Azure OpenAI deployment to use. + temperature: float + Sampling temperature. + max_tokens: Optional[int] + Max number of tokens to generate. + logprobs: Optional[bool] + Whether to return logprobs. + + Key init args — client params: + api_version: str + Azure OpenAI API version to use. See more on the different versions here: + https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning + timeout: Union[float, Tuple[float, float], Any, None] + Timeout for requests. + max_retries: int + Max number of retries. + organization: Optional[str] + OpenAI organization ID. If not passed in will be read from env + var OPENAI_ORG_ID. + + See full list of supported init args and their descriptions in the params section. + + Instantiate: + .. code-block:: python + + from langchain_openai import AzureChatOpenAI + + llm = AzureChatOpenAI( + azure_deployment="your-deployment", + api_version="2024-05-01-preview", + temperature=0, + max_tokens=None, + timeout=None, + max_retries=2, + # organization="...", + # other params... + ) + + **NOTE**: Any param which is not explicitly supported will be passed directly to the + ``openai.AzureOpenAI.chat.completions.create(...)`` API every time to the model is + invoked. For example: + .. code-block:: python + + from langchain_openai import AzureChatOpenAI + import openai + + AzureChatOpenAI(..., logprobs=True).invoke(...) + + # results in underlying API call of: + + openai.AzureOpenAI(..).chat.completions.create(..., logprobs=True) + + # which is also equivalent to: + + AzureChatOpenAI(...).invoke(..., logprobs=True) + + Invoke: + .. code-block:: python + + messages = [ + ( + "system", + "You are a helpful translator. Translate the user sentence to French.", + ), + ("human", "I love programming."), + ] + llm.invoke(messages) + + .. code-block:: python + + AIMessage( + content="J'adore programmer.", + usage_metadata={"input_tokens": 28, "output_tokens": 6, "total_tokens": 34}, + response_metadata={ + "token_usage": { + "completion_tokens": 6, + "prompt_tokens": 28, + "total_tokens": 34, + }, + "model_name": "gpt-4", + "system_fingerprint": "fp_7ec89fabc6", + "prompt_filter_results": [ + { + "prompt_index": 0, + "content_filter_results": { + "hate": {"filtered": False, "severity": "safe"}, + "self_harm": {"filtered": False, "severity": "safe"}, + "sexual": {"filtered": False, "severity": "safe"}, + "violence": {"filtered": False, "severity": "safe"}, + }, + } + ], + "finish_reason": "stop", + "logprobs": None, + "content_filter_results": { + "hate": {"filtered": False, "severity": "safe"}, + "self_harm": {"filtered": False, "severity": "safe"}, + "sexual": {"filtered": False, "severity": "safe"}, + "violence": {"filtered": False, "severity": "safe"}, + }, + }, + id="run-6d7a5282-0de0-4f27-9cc0-82a9db9a3ce9-0", + ) + + Stream: + .. code-block:: python + + for chunk in llm.stream(messages): + print(chunk) + + .. code-block:: python + + AIMessageChunk(content="", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content="J", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content="'", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content="ad", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content="ore", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content=" la", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content=" programm", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content="ation", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk(content=".", id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f") + AIMessageChunk( + content="", + response_metadata={ + "finish_reason": "stop", + "model_name": "gpt-4", + "system_fingerprint": "fp_811936bd4f", + }, + id="run-a6f294d3-0700-4f6a-abc2-c6ef1178c37f", + ) + + .. code-block:: python + + stream = llm.stream(messages) + full = next(stream) + for chunk in stream: + full += chunk + full + + .. code-block:: python + + AIMessageChunk( + content="J'adore la programmation.", + response_metadata={ + "finish_reason": "stop", + "model_name": "gpt-4", + "system_fingerprint": "fp_811936bd4f", + }, + id="run-ba60e41c-9258-44b8-8f3a-2f10599643b3", + ) + + Async: + .. code-block:: python + + await llm.ainvoke(messages) + + # stream: + # async for chunk in (await llm.astream(messages)) + + # batch: + # await llm.abatch([messages]) + + Tool calling: + .. code-block:: python + + from langchain_core.pydantic_v1 import BaseModel, Field + + + class GetWeather(BaseModel): + '''Get the current weather in a given location''' + + location: str = Field( + ..., description="The city and state, e.g. San Francisco, CA" + ) + + + class GetPopulation(BaseModel): + '''Get the current population in a given location''' + + location: str = Field( + ..., description="The city and state, e.g. San Francisco, CA" + ) + + + llm_with_tools = llm.bind_tools([GetWeather, GetPopulation]) + ai_msg = llm_with_tools.invoke( + "Which city is hotter today and which is bigger: LA or NY?" + ) + ai_msg.tool_calls + + .. code-block:: python + + [ + { + "name": "GetWeather", + "args": {"location": "Los Angeles, CA"}, + "id": "call_6XswGD5Pqk8Tt5atYr7tfenU", + }, + { + "name": "GetWeather", + "args": {"location": "New York, NY"}, + "id": "call_ZVL15vA8Y7kXqOy3dtmQgeCi", + }, + { + "name": "GetPopulation", + "args": {"location": "Los Angeles, CA"}, + "id": "call_49CFW8zqC9W7mh7hbMLSIrXw", + }, + { + "name": "GetPopulation", + "args": {"location": "New York, NY"}, + "id": "call_6ghfKxV264jEfe1mRIkS3PE7", + }, + ] + + Structured output: + .. code-block:: python + + from typing import Optional + + from langchain_core.pydantic_v1 import BaseModel, Field + + + class Joke(BaseModel): + '''Joke to tell user.''' - To use this class you - must have a deployed model on Azure OpenAI. Use `deployment_name` in the - constructor to refer to the "Model deployment name" in the Azure portal. + setup: str = Field(description="The setup of the joke") + punchline: str = Field(description="The punchline to the joke") + rating: Optional[int] = Field(description="How funny the joke is, from 1 to 10") - In addition, you should have the - following environment variables set or passed in constructor in lower case: - - ``AZURE_OPENAI_API_KEY`` - - ``AZURE_OPENAI_ENDPOINT`` - - ``AZURE_OPENAI_AD_TOKEN`` - - ``OPENAI_API_VERSION`` - - ``OPENAI_PROXY`` - For example, if you have `gpt-3.5-turbo` deployed, with the deployment name - `35-turbo-dev`, the constructor should look like: + structured_llm = llm.with_structured_output(Joke) + structured_llm.invoke("Tell me a joke about cats") - .. code-block:: python + .. code-block:: python - from langchain_openai import AzureChatOpenAI + Joke( + setup="Why was the cat sitting on the computer?", + punchline="To keep an eye on the mouse!", + rating=None, + ) + + See ``AzureChatOpenAI.with_structured_output()`` for more. - AzureChatOpenAI(azure_deployment="35-turbo-dev", openai_api_version="2023-05-15") + JSON mode: + .. code-block:: python + + json_llm = llm.bind(response_format={"type": "json_object"}) + ai_msg = json_llm.invoke( + "Return a JSON object with key 'random_ints' and a value of 10 random ints in [0-99]" + ) + ai_msg.content - Be aware the API version may change. + .. code-block:: python - You can also specify the version of the model using ``model_version`` constructor - parameter, as Azure OpenAI doesn't return model version with the response. + '\\n{\\n "random_ints": [23, 87, 45, 12, 78, 34, 56, 90, 11, 67]\\n}' - Default is empty. When you specify the version, it will be appended to the - model name in the response. Setting correct version will help you to calculate the - cost properly. Model version is not validated, so make sure you set it correctly - to get the correct cost. + Image input: + .. code-block:: python - Any parameters that are valid to be passed to the openai.create call can be passed - in, even if not explicitly saved on this class. + import base64 + import httpx + from langchain_core.messages import HumanMessage + + image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" + image_data = base64.b64encode(httpx.get(image_url).content).decode("utf-8") + message = HumanMessage( + content=[ + {"type": "text", "text": "describe the weather in this image"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + }, + ] + ) + ai_msg = llm.invoke([message]) + ai_msg.content + + .. code-block:: python + + "The weather in the image appears to be quite pleasant. The sky is mostly clear" + + Token usage: + .. code-block:: python + + ai_msg = llm.invoke(messages) + ai_msg.usage_metadata + + .. code-block:: python + + {"input_tokens": 28, "output_tokens": 5, "total_tokens": 33} + + Logprobs: + .. code-block:: python + + logprobs_llm = llm.bind(logprobs=True) + ai_msg = logprobs_llm.invoke(messages) + ai_msg.response_metadata["logprobs"] + + .. code-block:: python + + { + "content": [ + { + "token": "J", + "bytes": [74], + "logprob": -4.9617593e-06, + "top_logprobs": [], + }, + { + "token": "'adore", + "bytes": [39, 97, 100, 111, 114, 101], + "logprob": -0.25202933, + "top_logprobs": [], + }, + { + "token": " la", + "bytes": [32, 108, 97], + "logprob": -0.20141791, + "top_logprobs": [], + }, + { + "token": " programmation", + "bytes": [ + 32, + 112, + 114, + 111, + 103, + 114, + 97, + 109, + 109, + 97, + 116, + 105, + 111, + 110, + ], + "logprob": -1.9361265e-07, + "top_logprobs": [], + }, + { + "token": ".", + "bytes": [46], + "logprob": -1.2233183e-05, + "top_logprobs": [], + }, + ] + } + + Response metadata + .. code-block:: python + + ai_msg = llm.invoke(messages) + ai_msg.response_metadata + + .. code-block:: python + + { + "token_usage": { + "completion_tokens": 6, + "prompt_tokens": 28, + "total_tokens": 34, + }, + "model_name": "gpt-35-turbo", + "system_fingerprint": None, + "prompt_filter_results": [ + { + "prompt_index": 0, + "content_filter_results": { + "hate": {"filtered": False, "severity": "safe"}, + "self_harm": {"filtered": False, "severity": "safe"}, + "sexual": {"filtered": False, "severity": "safe"}, + "violence": {"filtered": False, "severity": "safe"}, + }, + } + ], + "finish_reason": "stop", + "logprobs": None, + "content_filter_results": { + "hate": {"filtered": False, "severity": "safe"}, + "self_harm": {"filtered": False, "severity": "safe"}, + "sexual": {"filtered": False, "severity": "safe"}, + "violence": {"filtered": False, "severity": "safe"}, + }, + } """ # noqa: E501 azure_endpoint: Union[str, None] = None """Your Azure endpoint, including the resource. Automatically inferred from env var `AZURE_OPENAI_ENDPOINT` if not provided. - Example: `https://example-resource.azure.openai.com/` """ deployment_name: Union[str, None] = Field(default=None, alias="azure_deployment") @@ -115,7 +484,6 @@ class AzureChatOpenAI(BaseChatOpenAI): """Your Azure Active Directory token. Automatically inferred from env var `AZURE_OPENAI_AD_TOKEN` if not provided. - For more: https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id. """ @@ -124,13 +492,25 @@ class AzureChatOpenAI(BaseChatOpenAI): Will be invoked on every request. """ + model_version: str = "" - """Legacy, for openai<1.0.0 support.""" + """The version of the model (e.g. "0125" for gpt-3.5-0125). + + Azure OpenAI doesn't return model version with the response by default so it must + be manually specified if you want to use this information downstream, e.g. when + calculating costs. + + When you specify the version, it will be appended to the model name in the + response. Setting correct version will help you to calculate the cost properly. + Model version is not validated, so make sure you set it correctly to get the + correct cost. + """ + openai_api_type: str = "" """Legacy, for openai<1.0.0 support.""" validate_base_url: bool = True - """For backwards compatibility. If legacy val openai_api_base is passed in, try to - infer if it is a base_url or azure_endpoint and update accordingly. + """If legacy arg openai_api_base is passed in, try to infer if it is a base_url or + azure_endpoint and update client params accordingly. """ @classmethod