In [1]:
import os
from math import exp

import numpy as np
from IPython.display import HTML, display
from openai import OpenAI

from pydantic import BaseModel,Field

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

In [20]:
def get_completion(
    messages: list[dict[str, str]],
    model: str = "gpt-4o-mini",
    max_tokens=500,
    temperature=0,
    stop=None,
    seed=123,
    tools=None,
    logprobs=None,  # whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message..
    top_logprobs=None,
) -> str:
    params = {
        "model": model,
        "messages": messages,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "stop": stop,
        "seed": seed,
        "logprobs": logprobs,
        "top_logprobs": top_logprobs,
    }
    if tools:
        params["tools"] = tools

    completion = client.chat.completions.create(**params)
    return completion

In [3]:
CLASSIFICATION_PROMPT = """You will be given a headline of a news article.
Classify the article into one of the following categories: Technology, Politics, Sports, and Art.
Return only the name of the category, and nothing else.
MAKE SURE your output is one of the four categories stated.
Article headline: {headline}"""

In [4]:
headlines = [
    "Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.",
    "Local Mayor Launches Initiative to Enhance Urban Public Transport.",
    "Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut",
]

In [5]:
for headline in headlines:
    print(f"\nHeadline: {headline}")
    API_RESPONSE = get_completion(
        [{"role": "user", "content": CLASSIFICATION_PROMPT.format(headline=headline)}],
        logprobs=True,
        top_logprobs=2,
    )
    top_two_logprobs = API_RESPONSE.choices[0].logprobs.content[0].top_logprobs
    html_content = ""
    for i, logprob in enumerate(top_two_logprobs, start=1):
        html_content += (
            f"<span style='color: cyan'>Output token {i}:</span> {logprob.token}, "
            f"<span style='color: darkorange'>logprobs:</span> {logprob.logprob}, "
            f"<span style='color: magenta'>linear probability:</span> {np.round(np.exp(logprob.logprob)*100,2)}%<br>"
        )
    display(HTML(html_content))
    print("\n")


Headline: Tech Giant Unveils Latest Smartphone Model with Advanced Photo-Editing Features.





Headline: Local Mayor Launches Initiative to Enhance Urban Public Transport.





Headline: Tennis Champion Showcases Hidden Talents in Symphony Orchestra Debut






---

In [15]:
TEST_PROMPT = """Give me the name of the president of US at year 2000, return only the name, and nothing else"""

In [16]:
api_response=get_completion(
    [{"role":"user","content":TEST_PROMPT}],
    logprobs=True,
    top_logprobs=3,
)
generate_answer=api_response.choices[0].message.content

possible_tokens = api_response.choices[0].logprobs.content
html_content = f"Question: {TEST_PROMPT}<br>Answer: {generate_answer}<br>"
for i , token in enumerate(possible_tokens,start=1):
    html_content+=f"<span style='color: cyan'>Output token {i}:</span><br>"
    for j, logprob in enumerate(token.top_logprobs):
        html_content += (
            f"{logprob.token}, <span style='color: darkorange'>logprobs:</span> {logprob.logprob}, "
            f"<span style='color: magenta'>linear probability:</span> {np.round(np.exp(logprob.logprob)*100,2)}%<br>"
        )
display(HTML(html_content))
print("\n")





---

In [78]:
SYS_PROMPT="""You are given an article: {article}. You are going to answer user's question only based on this article, but before you answer the question, \
        please consider if you have sufficient information in the article to answer the question. \
        Then answer the question. Even you do not have sufficient information in the article, you still need to answer the question, return the answer of the question, nothing else. \
        After you answer the question, please check if your answer is fully based on the article given."""
USER_PROMPT="""Question: {question}"""
ARTICLE="The president of the US at year of 2000 is Bill Clinton, and the president of the US at year 2024 is Joe Biden"
QUESTION="Give me the name of the president of US at year 2010, return only the name, and nothing else"

In [81]:
class PresidentName(BaseModel):
    pre_validation: str = Field(description="Before you answer, consider if you have sufficient information in the article to answer the question, Yes or No only, nothing else")
    president_name: str = Field(description="The name of the president, nothing else. You have to give a name no matter what.")
    post_validation: str = Field(descrption="After you answer the question, check if your answer is fully based on the article given, Yes or On only, nothing else")

completion = client.beta.chat.completions.parse(
    model="gpt-4o-2024-08-06",
    #model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": SYS_PROMPT.format(article=ARTICLE)},
        {"role": "user", "content": USER_PROMPT.format(question=QUESTION)},
    ],
    response_format=PresidentName,
    logprobs=True,
    top_logprobs=2,
)

message = completion.choices[0].message
if message.parsed:
    print(message.parsed.pre_validation)
    print(message.parsed.president_name)
    print(message.parsed.post_validation)
else:
    print(message.refusal)

No
Bill Clinton
Yes


In [82]:
possible_tokens = completion.choices[0].logprobs.content
html_content = f"Question: {QUESTION}<br>Answer: {message.parsed.president_name}<br>"
for i , token in enumerate(possible_tokens,start=1):
    html_content+=f"<span style='color: cyan'>Output token {i}:</span><br>"
    for j, logprob in enumerate(token.top_logprobs):
        html_content += (
            f"{logprob.token}, <span style='color: darkorange'>logprobs:</span> {logprob.logprob}, "
            f"<span style='color: magenta'>linear probability:</span> {np.round(np.exp(logprob.logprob)*100,2)}%<br>"
        )
display(HTML(html_content))
print("\n")





---