# Azure OpenAI Chat Completions - Solved

For more information, see [MS Learn](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/chatgpt?tabs=python-new)

The Azure Key and Endpoint are set in your Codespace's Environment automatically.

In [1]:
import sys
sys.path.insert(1, '../../../')
import init_creds as creds
 
AZURE_OPENAI_KEY = creds.get_api_key()
AZURE_OPENAI_ENDPOINT = creds.get_endpoint()
 
if not AZURE_OPENAI_KEY:
    raise ValueError("No AZURE_OPENAI_KEY set for Azure OpenAI API")
if not AZURE_OPENAI_ENDPOINT:
    raise ValueError("No AZURE_OPENAI_ENDPOINT set for Azure OpenAI API")

In [2]:
# Sample Chat Completion using Azure OpenAI API
from openai import AzureOpenAI
from openai import OpenAIError

client = AzureOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_KEY,
    api_version="2024-07-01-preview"
)

prePrompt = "You are a helpful assistant that knows about US geography, topography, flora, and fauna."
prompt = """What is the capital of New York?"""
messages = [
    {"role": "system", "content": prePrompt},
    {"role": "user", "content": prompt}
]
try:
    chat_completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=messages
    )
    print(chat_completion)
    print(chat_completion.choices[0].message.content)
    
except Exception as e:
    print(f"An error occurred:\n{e}")

ChatCompletion(id='chatcmpl-BALOvBHlPnRjyoffGGr1fpdvOwHQZ', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of New York is Albany.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None), content_filter_results={'hate': {'filtered': False, 'severity': 'safe'}, 'protected_material_code': {'filtered': False, 'detected': False}, 'protected_material_text': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}})], created=1741805245, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_b705f0c291', usage=CompletionUsage(completion_tokens=8, prompt_tokens=38, total_tokens=46, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_token

Exercise 1: Refactor the code above so that the prePrompt and Prompt can be supplied as parameters to a function

In [None]:
# Refactor the code above so that the prePrompt and Prompt can be supplied as parameters to a function
def chat_completion(prePrompt, prompt):
    messages = [
        {"role": "system", "content": prePrompt},
        {"role": "user", "content": prompt}
    ]
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        return f"Error: An error occurred:\n{e}"
    
prePrompt = "You are a helpful assistant that knows about US geography, topography, flora, and fauna."
prompt = """What is the capital of New York?"""
print(chat_completion(prePrompt, prompt))

Exercise 2: Add error handling to the function to catch the openai.error.OpenAIError

In [None]:
# Add error handling to the function to catch the openai.error.OpenAIError
def chat_completion(prePrompt, prompt):
    messages = [
        {"role": "system", "content": prePrompt},
        {"role": "user", "content": prompt}
    ]
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
        return chat_completion.choices[0].message.content
    except OpenAIError as e:
        return f"Error: An error occurred in the OpenAI API:\n{e}"
    except Exception as e:
        return f"Error: An error occurred:\n{e}"
    
prePrompt = "You are a helpful assistant that knows about US geography, topography, flora, and fauna."
prompt = """What is the capital of New York?"""
print(chat_completion(prePrompt, prompt))

Exercise 3: Add the **temperature** and **top_p**, also experiment with the **max_tokens**, **n**, and **stop** parameters. When might one want to use the latter three?
- **temperature**: Controls the randomness of the output. Lower values make the output more deterministic, while higher values make it more random.
- **top_p**: Controls diversity via nucleus sampling. 0.9 means only the top 90% of probability mass is considered.
- **max_tokens**: The maximum number of tokens to generate in the completion.
- **n**: The number of completions to generate
- **stop**: Define stop sequences if needed

Try this prompt to see the parameters in action:

```Write a short story about a robot learning to understand human emotions.```

How do the various parameters affect your output?

In [None]:
# Add the **temperature** and **top_p**, also experiment with the **max_tokens**, **n**, and **stop** parameters to the function.
def chat_completion(prePrompt, prompt, temperature=0.7, top_p=1, max_tokens=150, n=1, stop=None):
    messages = [
        {"role": "system", "content": prePrompt},
        {"role": "user", "content": prompt}
    ]
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            n=n,
            stop=stop
        )
        return chat_completion.choices[0].message.content
    except OpenAIError as e:
        return f"Error: An error occurred in the OpenAI API:\n{e}"
    except Exception as e:
        return f"Error: An error occurred:\n{e}"
    
prePrompt = "You are a helpful assistant that knows how to tell a good story, spin a great yarn, and write a tall tale."
prompt = """Write a short story about a robot learning to understand human emotions."""
print("temperature=0.7, top_p=1, max_tokens=500, n=1, stop=None")
print(chat_completion(prePrompt, prompt, temperature=0.7, top_p=1, max_tokens=500, n=1, stop=None))
print('-----------------------------------')
print("temperature=0.2, top_p=1, max_tokens=500, n=1, stop=None")
print(chat_completion(prePrompt, prompt, temperature=0.2, top_p=1, max_tokens=500, n=1, stop=None))



Exercise 4: Add context to the conversation so that you can continue the dialog instead of each prompt standing on its own.
1) Store conversation history
2) Append new user inputs to the conversation history.
3) Send the entire conversation history as context in each API call. 

In [None]:
# Add context to the conversation so that you can continue the dialog instead of each prompt standing on its own.
# 1) Store conversation history
# 2) Append new user inputs to the conversation history.
# 3) Send the entire conversation history as context in each API call. 

def chat_completion(prePrompt, prompt, conversation_history, temperature=0.7, top_p=1, max_tokens=5000, n=1, stop=None):
    if conversation_history is None or len(conversation_history) == 0:
        conversation_history = [
            {"role": "system", "content": prePrompt}
        ]
    conversation_history.append(
        {"role": "user", "content": prompt}
    )
    try:
        chat_completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=conversation_history,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            n=n,
            stop=stop
        )
        response = chat_completion.choices[0].message.content
        conversation_history.append(
            {"role": "assistant", "content": response}
        )
        return response, conversation_history
    except OpenAIError as e:
        return f"Error: An error occurred in the OpenAI API:\n{e}", conversation_history
    except Exception as e:
        return f"Error: An error occurred:\n{e}", conversation_history
    
# Initialize conversation history
conversation_history = []

prePrompt = "You are a helpful assistant that knows about US geography, topography, flora, and fauna."
prompt = """What is the capital of New York?"""
response, conversation_history = chat_completion(prePrompt, prompt, conversation_history)
print(response)
prompt = """How many people live there?"""
response, conversation_history = chat_completion(None, prompt, conversation_history)
print(response)
prompt = """What is the weather like?"""
response, conversation_history = chat_completion(None, prompt, conversation_history)
print(response)
prompt = """What is there to do there?"""
response, conversation_history = chat_completion(None, prompt, conversation_history)
print(response)

Exercise 5: Use TikToken to make sure the conversation (Exercise 4) doesn't exceed the model's token limit.

In [None]:
# Use TikToken to make sure the conversation (Exercise 4) doesn't exceed the model's token limit.
# 1) Add a stop parameter to the function that will be used to stop the conversation when the token limit is reached.
# 2) Use the stop parameter to stop the conversation when the token limit is reached.

import tiktoken

maximum_tokens = 128000
# Load the encoding for the model
encoding = tiktoken.encoding_for_model("gpt-4o-mini")

def measure_token_length(text):
    # Encode the text to get the tokens
    tokens = encoding.encode(text)
    # Return the length of the tokens
    return len(tokens)

def chat_completion(prePrompt, prompt, conversation_history = [], temperature=0.7, top_p=1, max_tokens=150, n=1, stop=None):
    messages = [
        {"role": "system", "content": prePrompt},
        {"role": "user", "content": prompt}
    ]
    try:
        text = conversation_history.append(messages)
        token_length = measure_token_length(text)
        if token_length > maximum_tokens:
            return f"Error: The conversation exceeds the token limit of {maximum_tokens}."
        chat_completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=conversation_history.append(messages),
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            n=n,
            stop=stop
        )
        return chat_completion.choices[0].message.content
    except OpenAIError as e:
        return f"Error: An error occurred in the OpenAI API:\n{e}"
    except Exception as e:
        return f"Error: An error occurred:\n{e}"
    
def chat_completion(prePrompt, prompt, conversation_history, temperature=0.7, top_p=1, max_tokens=5000, n=1, stop=None):
    if conversation_history is None or len(conversation_history) == 0:
        conversation_history = [
            {"role": "system", "content": prePrompt}
        ]
    conversation_history.append(
        {"role": "user", "content": prompt}
    )
    try:
        # use a list comprehension to convert the conversation history to a single string
        text = " ".join([message["content"] for message in conversation_history])
        token_length = measure_token_length(text)
        if token_length > maximum_tokens:
            return f"Error: The conversation exceeds the token limit of {maximum_tokens}."
        chat_completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=conversation_history,
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            n=n,
            stop=stop
        )
        response = chat_completion.choices[0].message.content
        conversation_history.append(
            {"role": "assistant", "content": response}
        )
        return response, conversation_history
    except OpenAIError as e:
        return f"Error: An error occurred in the OpenAI API:\n{e}", conversation_history
    except Exception as e:
        return f"Error: An error occurred:\n{e}", conversation_history
    
# Initialize conversation history
conversation_history = []

prePrompt = "You are a helpful assistant that knows about US geography, topography, flora, and fauna."
prompt = """What is the capital of New York?"""
response, conversation_history = chat_completion(prePrompt, prompt, conversation_history)
print(response)
