# Part 9

Universal code that we will use for the entire notebook.

In [50]:
# required libraries
from openai import OpenAI

# libraries needed for streaming output
from typing_extensions import override
from openai import AssistantEventHandler

# additional libraries
import time
import pytz
import datetime

In [None]:
# Create an instance of the OpenAI class
# This assumes you have the OPENAI_API_KEY environment variable set
client = OpenAI()

## Creating Assistants, Threads, and Messages Review
Let's create a new assistant, thread, and some messages for us to use later on and to review the code for creating them.

### Creating an Assistant
First, let's make an Assistant we can use to communicate with our run.

In [None]:

# Create an assistant.
assistant = client.beta.assistants.create(
    model="gpt-4o",
    instructions="You are a helpful assistant.",
    name="Son of Run Tester Assistant",
    metadata={
        "holds_threads": "True",
        "likes_threads": "True",
        "holds_messages": "True",
        "likes_messages": "True",
    },
    temperature=1,
    top_p=1,
)

# Print the details of the created assistant to check the properties.
print(assistant)
print("\n\n")
print(assistant.name)
print(assistant.metadata)

### Creating a Thread
Now, let's create a Thread that can be used to hold our messages.

In [None]:
# Create a thread using the OpenAI API and store it in a variable
# The metadata specifies a user identifier
thread = client.beta.threads.create(
    metadata={
        "user": "abc123"
    }
)

# Output the result of the thread creation to the console
print(thread)

### Creating a Message
Finally, let's create a Message that we can go into the Thread for use later.

In [None]:
# Create a message in a specific thread using the client's message creation method.
message = client.beta.threads.messages.create(
    thread_id=thread.id,  # ID of the thread where the message will be posted
    role="user",  # Role of the entity posting the message
    content="Tell me what a penguin is in 100 words or less.",  # The textual content of the message
    metadata={"key": "value"}  # Additional data associated with the message in key-value pairs
)

# Print the entire message object to view its details.
print(message)

# Print a blank line for better readability of the output.
print("\n")

# Print specific attributes of the message.
print(message.id)  # The unique identifier of the message
print(message.content)  # The content of the message
print(message.content[0].text.value)  # Assuming 'content' is a list of text objects, print the value of the first one
print(message.role)  # The role associated with the message

## Creating Streaming Run
Let's create a run and get some output!

In [None]:
# First, we create a EventHandler class to define
# how we want to handle the events in the response stream.
# Normally, you would define this class at the top of your script or in a separate file.
class EventHandler(AssistantEventHandler):    
    @override
    def on_text_created(self, text) -> None:
        print(f"\nassistant text > ", end="", flush=True)
        
    @override
    def on_text_delta(self, delta, snapshot):
        print(delta.value, end="", flush=True)
        
    def on_tool_call_created(self, tool_call):
        print(f"\nassistant tool > {tool_call.type}\n", flush=True)

    def on_tool_call_delta(self, delta, snapshot):
        if delta.type == 'code_interpreter':
            if delta.code_interpreter.input:
                print(delta.code_interpreter.input, end="", flush=True)
            if delta.code_interpreter.outputs:
                print(f"\n\noutput >", flush=True)
            for output in delta.code_interpreter.outputs:
                if output.type == "logs":
                    print(f"\n{output.logs}", flush=True)

In [17]:
# Then, we use the `stream` SDK helper 
# with the `EventHandler` class to create the Run 
# and stream the response.
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    event_handler=EventHandler(),
    ) as stream:
    stream.until_done()


assistant text > A penguin is a flightless seabird native to the Southern Hemisphere, particularly Antarctica. Known for their distinctive black and white plumage, penguins have streamlined bodies adapted for swimming and waddle awkwardly on land. They are excellent swimmers, using their flippers to propel through water, where they hunt for fish, krill, and squid. Penguins are social birds, often forming large colonies for breeding and nesting. They endure extreme cold with their dense feathers and a layer of blubber. There are several species of penguins, varying in size and features, such as the Emperor, King, and Adelie penguins.

## Token Management
We have two options for contolling the amount of tokens used:

max_prompt_tokens (integer or null)
Optional
The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status incomplete. 

max_completion_tokens (integer or null)
Optional
The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status incomplete. 

Let's see it in action!

First we start with max_prompt_tokens to limit the input tokens that can be used.

In [8]:
# Let's start by seeing see how many messages we have
messages = client.beta.threads.messages.list(
    thread_id=thread.id
)

# Initialize a counter for the messages
message_count = 0

# Loop through the messages, print the id and content, and count the messages
for message in messages:
    print(message.id)
    print(message.content)
    print("\n")
    message_count += 1  # Increment the message count

# Print the total number of messages
print(f"Total number of messages: {message_count}")

msg_MAVhSfsYWEP9imHTtk2UrXwT
[TextContentBlock(text=Text(annotations=[], value='A penguin is a flightless seabird native to the Southern Hemisphere, particularly Antarctica. Known for their distinctive black and white plumage, penguins have streamlined bodies adapted for swimming and waddle awkwardly on land. They are excellent swimmers, using their flippers to propel through water, where they hunt for fish, krill, and squid. Penguins are social birds, often forming large colonies for breeding and nesting. They endure extreme cold with their dense feathers and a layer of blubber. There are several species of penguins, varying in size and features, such as the Emperor, King, and Adelie penguins.'), type='text')]


msg_2FiIEQV0NWq74ZBbiaykFV4J
[TextContentBlock(text=Text(annotations=[], value='Tell me what a penguin is in 100 words or less.'), type='text')]


Total number of messages: 2


In [9]:
# First let's list our runs to see what we have
runs = client.beta.threads.runs.list(
    thread_id=thread.id
)

# Dump all the runs
print(runs)
print("\n\n")

# Loop throught the runs and print the id and status
for run in runs:
    print(run.id)
    print(run.status)
    print("\n")

SyncCursorPage[Run](data=[Run(id='run_P9L2GTrEVUSFa8FbWKOuMv6i', assistant_id='asst_rN1GCMyXuzrXoxdcbu979T67', cancelled_at=None, completed_at=1715816083, created_at=1715816080, expires_at=None, failed_at=None, incomplete_details=None, instructions='You are a helpful assistant.', last_error=None, max_completion_tokens=None, max_prompt_tokens=None, metadata={}, model='gpt-4o', object='thread.run', required_action=None, response_format='auto', started_at=1715816080, status='completed', thread_id='thread_OAMXvqWnBHgPMlziOoOiukBO', tool_choice='auto', tools=[], truncation_strategy=TruncationStrategy(type='auto', last_messages=None), usage=Usage(completion_tokens=128, prompt_tokens=44, total_tokens=172), temperature=1.0, top_p=1.0, tool_resources={})], object='list', first_id='run_P9L2GTrEVUSFa8FbWKOuMv6i', last_id='run_P9L2GTrEVUSFa8FbWKOuMv6i', has_more=False)



run_P9L2GTrEVUSFa8FbWKOuMv6i
completed




In [10]:
# Now let's do a run limiting the prompt tokens
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    max_prompt_tokens=5, # must be set to 256 or more
    # max_prompt_tokens=256,
    event_handler=EventHandler(),
    ) as stream:
    stream.until_done()


assistant text > A penguin is a flightless seabird native to the Southern Hemisphere, particularly Antarctica. Known for their black and white plumage, penguins have streamlined bodies adapted for swimming and waddle awkwardly on land. They are excellent swimmers, using their flippers to propel through water, where they hunt for fish, krill, and squid. Penguins are social birds, often forming large colonies for breeding and nesting. They endure extreme cold with their dense feathers and a layer of blubber. Several species exist, varying in size and features, such as the Emperor, King, and Adelie penguins.

And list the runs again

In [11]:
# Let's list our runs to see what we have
runs = client.beta.threads.runs.list(
    thread_id=thread.id
)


# Loop throught the runs and print the id and status
for run in runs:
    print(run.id)
    print(run.status)
    print(run.incomplete_details)
    print("\n")

run_93rVztkwdkaX4EY9CSJWu1FY
incomplete
IncompleteDetails(reason='max_prompt_tokens')


run_P9L2GTrEVUSFa8FbWKOuMv6i
completed
None




And now limiting the completion tokens

In [12]:
# Now let's do a run limiting the completion tokens
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    max_completion_tokens=5, # must be set to 16 or more
    # max_completion_tokens=16,
    event_handler=EventHandler(),
    ) as stream:
    stream.until_done()


assistant text > A penguin is a flightless seabird native to the Southern Hemisphere,

In [13]:
# Now let's list our runs again
runs = client.beta.threads.runs.list(
    thread_id=thread.id
)


# Loop throught the runs and print the id and status
for run in runs:
    print(run.id)
    print(run.status)
    print(run.incomplete_details)
    print("\n")

run_SSgZthpLe616QD6e2NvGD9ct
incomplete
IncompleteDetails(reason='max_completion_tokens')


run_93rVztkwdkaX4EY9CSJWu1FY
incomplete
IncompleteDetails(reason='max_prompt_tokens')


run_P9L2GTrEVUSFa8FbWKOuMv6i
completed
None




Now let's examine truncation_strategy:

truncation_strategy (object)
Optional
Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run.

Properties:
type (string)
Required
The truncation strategy to use for the thread. The default is auto. If set to last_messages, the thread will be truncated to the n most recent messages in the thread. When set to auto, messages in the middle of the thread will be dropped to fit the context length of the model, max_prompt_tokens.

last_messages (integer or null)
Optional
The number of most recent messages from the thread when constructing the context for the run.

In [14]:
# Now let's use a truncation strategy on the last set of messages
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    max_prompt_tokens=1000,
    truncation_strategy={  # Adding a truncation strategy to control the context of the conversation
        "type": "last_messages",  # Strategy to keep only the most recent messages
        "last_messages": 5  # Keep the last 5 messages in the context window
    },
    event_handler=EventHandler(),
    ) as stream:
    stream.until_done()



assistant text > A penguin is a flightless seabird native to the Southern Hemisphere,

In [15]:
# Now let's use a truncation strategy where the system decides what messages to cut
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    max_prompt_tokens=1000,
    truncation_strategy={  # Adding a truncation strategy to control the context of the conversation
        "type": "auto",  # Strategy to keep only prompt
    },
    event_handler=EventHandler(),
    ) as stream:
    stream.until_done()


assistant text > A penguin is a flightless seabird native to the Southern Hemisphere,

In [16]:
# Let's see how many messages we have now
messages = client.beta.threads.messages.list(
    thread_id=thread.id
)

# Initialize a counter for the messages
message_count = 0

# Loop through the messages, print the id and content, and count the messages
for message in messages:
    print(message.id)
    print(message.content)
    print("\n")
    message_count += 1  # Increment the message count

# Print the total number of messages
print(f"Total number of messages: {message_count}")


msg_n07urra4ymORoREqVmbyfx2w
[TextContentBlock(text=Text(annotations=[], value='A penguin is a flightless seabird native to the Southern Hemisphere,'), type='text')]


msg_cXDkEDtEa6fEpN7Ra0YNHINT
[TextContentBlock(text=Text(annotations=[], value='A penguin is a flightless seabird native to the Southern Hemisphere,'), type='text')]


msg_LCX8KU39URj9UfpDrADUTuMd
[TextContentBlock(text=Text(annotations=[], value='A penguin is a flightless seabird native to the Southern Hemisphere,'), type='text')]


msg_J5zf4nlPXhcuYQgyCRIJz9rj
[TextContentBlock(text=Text(annotations=[], value='A penguin is a flightless seabird native to the Southern Hemisphere, particularly Antarctica. Known for their black and white plumage, penguins have streamlined bodies adapted for swimming and waddle awkwardly on land. They are excellent swimmers, using their flippers to propel through water, where they hunt for fish, krill, and squid. Penguins are social birds, often forming large colonies for breeding and nesti

## Controlling Tool Choice and Response Format
In addition to all the other arguments we can modify, we also have tool_choice and response_format:

tool_choice (string or object)

Optional

Controls which (if any) tool is called by the model. none means the model will not call any tools and instead generates a message. auto is the default value and means the model can pick between generating a message or calling one or more tools. required means the model must call one or more tools before responding to the user. Specifying a particular tool like {"type": "file_search"} or {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool.

response_format (string or object)

Optional

Specifies the format that the model must output. Compatible with GPT-4o, GPT-4 Turbo, and all GPT-3.5 Turbo models since gpt-3.5-turbo-1106.

Setting to { "type": "json_object" } enables JSON mode, which guarantees the message the model generates is valid JSON.

Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if finish_reason="length", which indicates the generation exceeded max_tokens or the conversation exceeded the max context length.



In [19]:
# Then, we use the `stream` SDK helper 
# with the `EventHandler` class to create the Run 
# and stream the response.
with client.beta.threads.runs.stream(
    thread_id=thread.id,
    assistant_id=assistant.id,
    additional_instructions="Make sure your output is in JSON format.",
    response_format={"type" : "json_object"},
    tool_choice=None,
    event_handler=EventHandler(),
    ) as stream:
    stream.until_done()


assistant text > {
    "description": "A penguin is a flightless seabird native to the Southern Hemisphere, particularly Antarctica. Known for their distinctive black and white plumage, penguins have streamlined bodies adapted for swimming and waddle awkwardly on land. They are excellent swimmers, using their flippers to propel through water, where they hunt for fish, krill, and squid. Penguins are social birds, often forming large colonies for breeding and nesting. They endure extreme cold with their dense feathers and a layer of blubber. There are several species of penguins, varying in size and features, such as the Emperor, King, and Adelie penguins."
}

## Listing Runs
thread_id
(string)

Required
The ID of the thread the run belongs to.

limit
(integer)

Optional
Defaults to 20
A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.

order
(string)

Optional
Defaults to desc
Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.

after
(string)

Optional
A cursor for use in pagination. after is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.

before
(string)

Optional
A cursor for use in pagination. before is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.

In [24]:
# Let's list our runs to see what we have
runs = client.beta.threads.runs.list(
    thread_id=thread.id,
    order="desc",
)


# Loop throught the runs and print the id and status
for run in runs:
    print(run.id)
    print(run.status)
    print(run.incomplete_details)
    print("\n")


run_BmslqqGIT1S1cfEH5qJ4mKRo
completed
None


run_8Cq8IebY665PFDU7rDiIzbOI
completed
None


run_K5H7P1b7QSk5LxQhTNYIvH7w
incomplete
IncompleteDetails(reason='max_prompt_tokens')


run_8efzAgLzZBkmA2SVP91dmTwV
completed
None


run_SSgZthpLe616QD6e2NvGD9ct
incomplete
IncompleteDetails(reason='max_completion_tokens')


run_93rVztkwdkaX4EY9CSJWu1FY
incomplete
IncompleteDetails(reason='max_prompt_tokens')


run_P9L2GTrEVUSFa8FbWKOuMv6i
completed
None




## Retrieve Runs

thread_id
(string)

Required
The ID of the thread that was run.

run_id
(string)

Required
The ID of the run to retrieve.

In [27]:
# Let's list our runs to see what we have
runs = client.beta.threads.runs.list(
    thread_id=thread.id,
    order="desc",
)

# Convert the normal SyncCursorPage object we get back to an actual list
runs_list = list(runs)

# Check if there are any runs
if runs_list:
    # Get the run id of the last run (first in the list since it's sorted in descending order)
    last_run_id = runs_list[0].id
    print("Last run ID:", last_run_id)

# Loop through the runs and print the id and status
for run in runs_list:
    print(run.id)
    print(run.status)
    print(run.incomplete_details)
    print("\n")


Last run ID: run_BmslqqGIT1S1cfEH5qJ4mKRo
run_BmslqqGIT1S1cfEH5qJ4mKRo
completed
None


run_8Cq8IebY665PFDU7rDiIzbOI
completed
None


run_K5H7P1b7QSk5LxQhTNYIvH7w
incomplete
IncompleteDetails(reason='max_prompt_tokens')


run_8efzAgLzZBkmA2SVP91dmTwV
completed
None


run_SSgZthpLe616QD6e2NvGD9ct
incomplete
IncompleteDetails(reason='max_completion_tokens')


run_93rVztkwdkaX4EY9CSJWu1FY
incomplete
IncompleteDetails(reason='max_prompt_tokens')


run_P9L2GTrEVUSFa8FbWKOuMv6i
completed
None




In [28]:

run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=runs_list[0].id
)

print(run)


Run(id='run_BmslqqGIT1S1cfEH5qJ4mKRo', assistant_id='asst_rN1GCMyXuzrXoxdcbu979T67', cancelled_at=None, completed_at=1715816584, created_at=1715816580, expires_at=None, failed_at=None, incomplete_details=None, instructions='You are a helpful assistant. Make sure your output is in JSON format.', last_error=None, max_completion_tokens=None, max_prompt_tokens=None, metadata={}, model='gpt-4o', object='thread.run', required_action=None, response_format=AssistantResponseFormat(type='json_object'), started_at=1715816580, status='completed', thread_id='thread_OAMXvqWnBHgPMlziOoOiukBO', tool_choice='auto', tools=[], truncation_strategy=TruncationStrategy(type='auto', last_messages=None), usage=Usage(completion_tokens=134, prompt_tokens=499, total_tokens=633), temperature=1.0, top_p=1.0, tool_resources={})


## Create Thread and Run
We have a helper function that can be used to create a thread and run it in one shot.

assistant_id
(string)

Required

The ID of the assistant to use to execute this run.




In [39]:

run = client.beta.threads.create_and_run(
    assistant_id=assistant.id,
    thread={
    "messages": [
        {"role": "user", "content": "Explain deep learning to me like I'm a 5 year old. In 50 words or less."}
    ]
    }
)

print(run)
print("\n")

print(run.thread_id)
print("\n")

# Get the latest status of the run
run = client.beta.threads.runs.retrieve(
    thread_id=run.thread_id,
    run_id=run.id
)

# sleep for a few seconds to allow the run to complete
time.sleep(10)

print(run.id)
print(run.status)
print("\n")

# Get the messages from the thread
messages = client.beta.threads.messages.list(thread_id=run.thread_id)

# Loop through the messages and print the content
for message in messages:
    print(message.content)
    print("\n")


Run(id='run_ZfAgmBAeNYjMGHRTZAwAeGbj', assistant_id='asst_rN1GCMyXuzrXoxdcbu979T67', cancelled_at=None, completed_at=None, created_at=1715842506, expires_at=1715843106, failed_at=None, incomplete_details=None, instructions='You are a helpful assistant.', last_error=None, max_completion_tokens=None, max_prompt_tokens=None, metadata={}, model='gpt-4o', object='thread.run', required_action=None, response_format='auto', started_at=None, status='queued', thread_id='thread_7Wdi60w4T0SSYlvJXGXzOykH', tool_choice='auto', tools=[], truncation_strategy=TruncationStrategy(type='auto', last_messages=None), usage=None, temperature=1.0, top_p=1.0, tool_resources={})


thread_7Wdi60w4T0SSYlvJXGXzOykH


run_ZfAgmBAeNYjMGHRTZAwAeGbj
in_progress


[TextContentBlock(text=Text(annotations=[], value='Deep learning is like teaching a computer to learn from examples, just like how you learn by seeing things. Imagine showing it lots of pictures of cats and dogs, and it learns to tell which is which all by its

## Modifying Runs
You can only modify the metadata on runs at this time.

thread_id
(string)

Required
The ID of the thread that was run.

run_id
(string)

Required
The ID of the run to modify.

Request body
(metadata)
map

Optional
Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.

In [41]:
# show our run information before modification
print(run.id)
print(run.metadata)
print("\n")

run = client.beta.threads.runs.update(
    thread_id=run.thread_id,
    run_id=run.id,
    metadata={"user_id": "user_abc123"},
)

# show our run information before modification
print(run.id)
print(run.metadata)


run_ZfAgmBAeNYjMGHRTZAwAeGbj
{'user_id': 'user_abc123'}


run_ZfAgmBAeNYjMGHRTZAwAeGbj
{'user_id': 'user_abc123'}


## Canceling Runs
You can cancel any run that has a state of in_progress at any time. 

thread_id
(string)

Required
The ID of the thread to which this run belongs.

run_id
(string)

Required
The ID of the run to cancel.

In [55]:
# Create a new run with the stream option set to True
# Attempt to create a new run
stream_run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
    stream=True
)

# variable to hold the cancelled run object
cancelled_run = None

# Continue processing the new run
for event in stream_run:
    if hasattr(event.data, 'status'):  # Check if 'status' is an attribute of the event data
        print(event.data.id)
        print(event.data.status)

        if event.data.status == "in_progress":
            print("Cancelling the run.")
            cancelled_run = client.beta.threads.runs.cancel(thread_id=thread.id, run_id=event.data.id)
    else:
        print(f"Event ID: {event.data.id} does not have a status attribute.")
        print(event.data.delta)
    print("---------------\n")

# retrieve the latest status of the cancelled run
cancelled_run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=cancelled_run.id
)


# Show the run ID and when it was cancelled
print("=============== Cancelled Run Information ================\n")
print(cancelled_run.id)
print(cancelled_run.status)
print(cancelled_run.cancelled_at)

import datetime
import pytz

# Convert the created date/time in Unix format directly to Central Time and format it
formatted_date = datetime.datetime.fromtimestamp(cancelled_run.created_at, tz=pytz.utc)\
    .astimezone(pytz.timezone('America/Chicago'))\
    .strftime('%Y-%m-%d %I:%M:%S %Z')

# Print the formatted date and time
print(formatted_date)


run_KVMkQKNshc9hQwRjYmNtemmV
queued
---------------

run_KVMkQKNshc9hQwRjYmNtemmV
queued
---------------

run_KVMkQKNshc9hQwRjYmNtemmV
in_progress
Cancelling the run.
---------------

run_KVMkQKNshc9hQwRjYmNtemmV
cancelling
---------------

run_KVMkQKNshc9hQwRjYmNtemmV
cancelled
---------------


run_KVMkQKNshc9hQwRjYmNtemmV
cancelled
1715844768
2024-05-16 02:32:48 CDT
