# Chat completion API

The chat completion API is designed to facilitate conversations with the model. It allows you to send a series of messages and receive a model-generated response.
The API is optimized for chat-based interactions, making it suitable for applications like chatbots, virtual assistants, and interactive dialogue systems.
The chat completion API is designed to handle multi-turn conversations, where the context of the conversation is maintained across multiple messages. This allows for more coherent and contextually relevant responses from the model.
The API is also capable of handling system messages, which can be used to set the behavior or personality of the assistant. This allows for customization of the model's responses based on the specific needs of the application.

In [1]:
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAI
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage

In [2]:
gpt4o_chat = ChatOpenAI(model="gpt-4o", temperature=0)

In [3]:
# Message list
messages = [
    SystemMessage(content="You are a helpful assistant and for each phrase in english you say, you say another in spanish, with a naive joke."),
    HumanMessage(content="What is the capital of France?"),
    AIMessage(content="The capital of France is Paris."),
    HumanMessage(content="What is the capital of Germany?"),
]

# Invoke the model with a list of messages 
response = gpt4o_chat.invoke(messages)
response

AIMessage(content='The capital of Germany is Berlin.  \nLa capital de Alemania es Berlín. ¿Sabes por qué los pájaros no usan Facebook? Porque ya tienen Twitter.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 35, 'prompt_tokens': 65, 'total_tokens': 100, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_de57b65c90', 'finish_reason': 'stop', 'logprobs': None}, id='run-e1e003e0-8092-4dee-8102-87269fe38e10-0', usage_metadata={'input_tokens': 65, 'output_tokens': 35, 'total_tokens': 100, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [4]:
response_attributes = [x for x in dir(response) if not callable(getattr(response, x, None)) and not x.startswith("_") and hasattr(response, x)]
response_attributes.sort()
response_attributes

/tmp/ipykernel_654593/3168877468.py:1: PydanticDeprecatedSince20: The `__fields__` attribute is deprecated, use `model_fields` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  response_attributes = [x for x in dir(response) if not callable(getattr(response, x, None)) and not x.startswith("_") and hasattr(response, x)]
/tmp/ipykernel_654593/3168877468.py:1: PydanticDeprecatedSince20: The `__fields_set__` attribute is deprecated, use `model_fields_set` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  response_attributes = [x for x in dir(response) if not callable(getattr(response, x, None)) and not x.startswith("_") and hasattr(response, x)]


['additional_kwargs',
 'content',
 'example',
 'id',
 'invalid_tool_calls',
 'lc_attributes',
 'lc_secrets',
 'model_computed_fields',
 'model_config',
 'model_extra',
 'model_fields',
 'model_fields_set',
 'name',
 'response_metadata',
 'tool_calls',
 'type',
 'usage_metadata']

In [5]:
for attribute in response_attributes:
    print(f"{attribute}:\n\t {getattr(response, attribute)}")
    print("-" * 20)

additional_kwargs:
	 {'refusal': None}
--------------------
content:
	 The capital of Germany is Berlin.  
La capital de Alemania es Berlín. ¿Sabes por qué los pájaros no usan Facebook? Porque ya tienen Twitter.
--------------------
example:
	 False
--------------------
id:
	 run-e1e003e0-8092-4dee-8102-87269fe38e10-0
--------------------
invalid_tool_calls:
	 []
--------------------
lc_attributes:
	 {'tool_calls': [], 'invalid_tool_calls': []}
--------------------
lc_secrets:
	 {}
--------------------
model_computed_fields:
	 {}
--------------------
model_config:
	 {'extra': 'allow'}
--------------------
model_extra:
	 {}
--------------------
model_fields:
	 {'content': FieldInfo(annotation=Union[str, list[Union[str, dict]]], required=True), 'additional_kwargs': FieldInfo(annotation=dict, required=False, default_factory=dict), 'response_metadata': FieldInfo(annotation=dict, required=False, default_factory=dict), 'type': FieldInfo(annotation=Literal['ai'], required=False, default='ai')

In [18]:
response.content

'The capital of Germany is Berlin.  \nLa capital de Alemania es Berlín. ¿Sabes por qué los pájaros no usan Facebook? Porque ya tienen Twitter.'

In [19]:
response.additional_kwargs

{'refusal': None}

In [20]:
response.response_metadata

{'token_usage': {'completion_tokens': 35,
  'prompt_tokens': 65,
  'total_tokens': 100,
  'completion_tokens_details': {'accepted_prediction_tokens': 0,
   'audio_tokens': 0,
   'reasoning_tokens': 0,
   'rejected_prediction_tokens': 0},
  'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}},
 'model_name': 'gpt-4o-2024-08-06',
 'system_fingerprint': 'fp_6dd05565ef',
 'finish_reason': 'stop',
 'logprobs': None}

In [21]:
response.id

'run-505e1e8d-f82f-44ff-91ba-3b321aa17a56-0'

In [22]:
response.usage_metadata

{'input_tokens': 65,
 'output_tokens': 35,
 'total_tokens': 100,
 'input_token_details': {'audio': 0, 'cache_read': 0},
 'output_token_details': {'audio': 0, 'reasoning': 0}}

In [3]:
messages = [
    {"role": "developer", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
  ]

from openai  import OpenAI 

# import langchain_openai

# client = langchain_openai.OpenAI()
client = OpenAI()

def get_completion(
    messages: list[dict[str, str]],
    model: str = "gpt-4",
    max_tokens=500,
    temperature=0,
    stop=None,
    seed=123,
    tools=None,
    logprobs=None,  # whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message..
    top_logprobs=None,
) -> str:
    params = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stop": stop,
        "seed": seed,
        "logprobs": logprobs,
        "top_logprobs": top_logprobs,
    }
    if tools:
        params["tools"] = tools

    completion = client.chat.completions.create(**params)
    return completion

In [4]:
res = get_completion(messages, model="gpt-4o", logprobs=True, top_logprobs=5)
res

ChatCompletion(id='chatcmpl-BJOxBZeknU8tRwHSHxMliikCXWjci', choices=[Choice(finish_reason='stop', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='Hello', bytes=[72, 101, 108, 108, 111], logprob=-0.023246178403496742, top_logprobs=[TopLogprob(token='Hello', bytes=[72, 101, 108, 108, 111], logprob=-0.023246178403496742), TopLogprob(token='Hi', bytes=[72, 105], logprob=-3.7732462882995605), TopLogprob(token='Good', bytes=[71, 111, 111, 100], logprob=-14.273245811462402), TopLogprob(token='Hey', bytes=[72, 101, 121], logprob=-16.27324676513672), TopLogprob(token='Greetings', bytes=[71, 114, 101, 101, 116, 105, 110, 103, 115], logprob=-17.52324676513672)]), ChatCompletionTokenLogprob(token='!', bytes=[33], logprob=-4.246537173457909e-06, top_logprobs=[TopLogprob(token='!', bytes=[33], logprob=-4.246537173457909e-06), TopLogprob(token=' there', bytes=[32, 116, 104, 101, 114, 101], logprob=-12.375003814697266), TopLogprob(token='.', bytes=[46], logprob=-19.62500381

In [27]:
res.choices[0].logprobs.content[8]

ChatCompletionTokenLogprob(token='?', bytes=[63], logprob=-2.1008713702030946e-06, top_logprobs=[TopLogprob(token='?', bytes=[63], logprob=-2.1008713702030946e-06), TopLogprob(token='?\n', bytes=[63, 10], logprob=-13.125001907348633), TopLogprob(token='?\n\n', bytes=[63, 10, 10], logprob=-17.000001907348633), TopLogprob(token='?\n\n\n', bytes=[63, 10, 10, 10], logprob=-21.500001907348633), TopLogprob(token='?\\', bytes=[63, 92], logprob=-22.000001907348633)])

# Reponse API

The response API is designed to generate text completions based on a given prompt. It is suitable for a wide range of applications, including content generation, text completion, and creative writing. The response API is optimized for generating single-turn completions, where the model generates a response based on a single input prompt without maintaining context from previous messages.
The response API is more straightforward and simpler to use for tasks that do not require multi-turn interactions or conversational context. It is ideal for applications where the focus is on generating text based on a specific input without the need for maintaining a conversation history.


In [14]:
gpt4o_response = OpenAI(model="gpt-4o", temperature=0)

In [15]:
gpt4o_response.invoke("What is the capital of France?")

NotFoundError: Error code: 404 - {'error': {'message': 'This is a chat model and not supported in the v1/completions endpoint. Did you mean to use v1/chat/completions?', 'type': 'invalid_request_error', 'param': 'model', 'code': None}}