# Part 15

# Using Code Interpreter

Universal code for the entire notebook

In [28]:
# Uncomment the line below to make sure you have all the packages needed
# %pip install -r requirements.txt

In [29]:
# Import necessary libraries
from openai import OpenAI  # Used for interacting with OpenAI's API
from typing_extensions import override  # Used for overriding methods in subclasses
from openai import AssistantEventHandler  # Used for handling events related to OpenAI assistants

In [30]:
# Create an instance of the OpenAI class to interact with the API.
# This assumes you have set the OPENAI_API_KEY environment variable.
client = OpenAI() 

In [31]:
# Event handler class to handle events related to streaming output from the assistant
class EventHandler(AssistantEventHandler):
    @override
    def on_text_created(self, text) -> None:
        print(f"\nASSISTANT MESSAGE >\n", end="", flush=True)

    @override
    def on_tool_call_created(self, tool_call):
        print(f"\nASSISTANT MESSAGE >\n{tool_call.type}\n", flush=True)

    @override
    def on_message_done(self, message) -> None:
        # print a citation to the file searched
        message_content = message.content[0].text
        annotations = message_content.annotations
        citations = []
        for index, annotation in enumerate(annotations):
            message_content.value = message_content.value.replace(
                annotation.text, f"[{index}]"
            )
            if file_citation := getattr(annotation, "file_citation", None):
                cited_file = client.files.retrieve(file_citation.file_id)
                citations.append(f"[{index}] {cited_file.filename}")

        print(message_content.value)
        print("\n".join(citations))

## Creating an Assistant with Code Interpreter Enabled

Our first step is to create an Assistant that can use Code Interpreter

In [32]:
# Create an assistant using the client library.
assistant = client.beta.assistants.create(
    model="gpt-4o",  # Specify the model to be used.
    
    instructions=""" 
        You are a helpful assistant.
    """,
    
    name="Code Interpreter Assistant",  # Give the assistant a name.
    
    tools=[{"type": "code_interpreter"}], # Add the code interpreter capability to the assistant.
    
    metadata={  # Add metadata about the assistant's capabilities.
        "can_be_used_for_code_analysis": "True",
        "can_do_python": "True",
    },
    temperature=1,  # Set the temperature for response variability.
    top_p=1,  # Set the top_p for nucleus sampling.
)

# Print the details of the created assistant to check its properties.
print(assistant)  # Print the full assistant object.
print("\n\n")
print(assistant.name)  # Print the name of the assistant.
print(assistant.metadata)  # Print the metadata of the assistant.

Assistant(id='asst_NhHQtzG0a3e3rOBM1CScNn03', created_at=1718452978, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=[]), file_search=None), top_p=1.0)



Code Interpreter Assistant
{'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}


## Passing Files to Code Interpreter

There are a variety of ways to get files for Code Interpreter to use. 
- Assistant files - viewable by all runs that use the assistant.
- Thread files - only viewable by runs that use the thread. 

Let's review the code for the two main approaches.

### Getting Files to the Assistant

First, you have to have a file that has been uploaded so we can pass it to our assistant.

In [33]:
# Upload a file with an "assistants" purpose
assistant_file = client.files.create(
    file=open("./artifacts/penguins_size.csv", "rb"),
    purpose='assistants'
)

print(assistant_file)

FileObject(id='file-3P2mhdFtnDTKpsxawxiWdFBS', bytes=13519, created_at=1718452978, filename='penguins_size.csv', object='file', purpose='assistants', status='processed', status_details=None)


Next, we need to modify our Assistant with the new file information. 

In [34]:
assistant = client.beta.assistants.update(
    assistant_id=assistant.id,
    tools=[{"type": "code_interpreter"}],
    tool_resources={
        "code_interpreter": {
            "file_ids": [assistant_file.id]
        }
    }
)

print(assistant)

Assistant(id='asst_NhHQtzG0a3e3rOBM1CScNn03', created_at=1718452978, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-3P2mhdFtnDTKpsxawxiWdFBS']), file_search=None), top_p=1.0)


Finally, let's run a message and see if it is working.

In [36]:
# Need a thread to send message and get output
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the file penguins_size.csv."
        },
    ]
)

# stream the output from the assistant
with client.beta.threads.runs.stream(
    thread_id=assistant_thread.id,
    assistant_id=assistant.id,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()


ASSISTANT MESSAGE >
code_interpreter


ASSISTANT MESSAGE >
The dataset `penguins_size.csv` contains measurements and characteristics of 344 penguins. Here’s a summary of the file:

1. **Data Columns and Types:**
   - `species`: (object) The species of the penguin, with 3 unique species.
   - `island`: (object) The island where the penguin was found, with 3 unique islands.
   - `culmen_length_mm`: (float64) Length of the culmen (beak), with some missing values (342 non-null).
   - `culmen_depth_mm`: (float64) Depth of the culmen (beak), with some missing values (342 non-null).
   - `flipper_length_mm`: (float64) Length of the flipper, with some missing values (342 non-null).
   - `body_mass_g`: (float64) Body mass of the penguin in grams, with some missing values (342 non-null).
   - `sex`: (object) Sex of the penguin, with 3 unique values and some missing values (334 non-null).

2. **Statistical Summary:**
   - There are 3 species with `Adelie` being the most frequent.
   - Penguins a

### Getting Files to the Thread

First, we need a file uploaded.


In [45]:
# Upload a file with an "assistants" purpose
thread_file = client.files.create(
    file=open("./artifacts/daily-bike-share.csv", "rb"),
    purpose='assistants'
)

print(thread_file)

FileObject(id='file-ja8z169Xf9X5mU248naSSheD', bytes=43599, created_at=1717720479, filename='daily-bike-share.csv', object='file', purpose='assistants', status='processed', status_details=None)


Second, we need a thread to attach the file to

In [46]:
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the daily-bike-share.csv file."
        },
    ]
)

print(thread)

Thread(id='thread_Wrq2Tj1zHqTOT91mf41HVUFn', created_at=1717720480, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


Third, we can update the thread with the file information

In [47]:
updated_thread = client.beta.threads.update(
    thread_id=thread.id,
    tool_resources={
        "code_interpreter": {
            "file_ids": [thread_file.id]
        }
    }
)

print(updated_thread)

Thread(id='thread_Wrq2Tj1zHqTOT91mf41HVUFn', created_at=1717720480, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-ja8z169Xf9X5mU248naSSheD']), file_search=None))


Finally, let's run it against a new assistant and see the results

In [48]:
# Create an assistant using the client library.
thread_assistant = client.beta.assistants.create(
    model="gpt-4o",  # Specify the model to be used.
    
    instructions=""" 
        You are a helpful assistant.
    """,
    
    name="Code Interpreter Assistant Using Thread Data",  # Give the assistant a name.
    
    tools=[{"type": "code_interpreter"}], # Add the code interpreter capability to the assistant.
    
    metadata={  # Add metadata about the assistant's capabilities.
        "can_be_used_for_code_analysis": "True",
        "can_do_python": "True",
    },
    temperature=1,  # Set the temperature for response variability.
    top_p=1,  # Set the top_p for nucleus sampling.
)

# Print the details of the created assistant to check its properties.
print(assistant)  # Print the full assistant object.
print("\n\n")
print(assistant.name)  # Print the name of the assistant.
print(assistant.metadata)  # Print the metadata of the assistant.

Assistant(id='asst_nikNl521MHGPe1CRxj0mrr1i', created_at=1717720458, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-QKiPf2QBRZdfVMkytsid5r2p']), file_search=None), top_p=1.0)



Code Interpreter Assistant
{'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}


In [49]:
# stream the output from the assistant
with client.beta.threads.runs.stream(
    thread_id=updated_thread.id,
    assistant_id=thread_assistant.id,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()


ASSISTANT MESSAGE >
code_interpreter


ASSISTANT MESSAGE >
The dataset "daily-bike-share.csv" contains 731 records and the following 13 columns. Below is a summary of each column:

1. **day (integer)**:
   - Count: 731
   - Mean: 15.74
   - Std: 8.81
   - Min: 1
   - 25th percentile: 8
   - 50th percentile: 16
   - 75th percentile: 23
   - Max: 31

2. **mnth (integer)**:
   - Count: 731
   - Mean: 6.52
   - Std: 3.45
   - Min: 1
   - 25th percentile: 4
   - 50th percentile: 7
   - 75th percentile: 10
   - Max: 12

3. **year (integer)**:
   - Count: 731
   - Mean: 2011.50
   - Std: 0.50
   - Min: 2011
   - 25th percentile: 2011
   - 50th percentile: 2012
   - 75th percentile: 2012
   - Max: 2012

4. **season (integer)**:
   - Count: 731
   - Mean: 2.50
   - Std: 1.11
   - Min: 1
   - 25th percentile: 2
   - 50th percentile: 3
   - 75th percentile: 3
   - Max: 4

5. **holiday (integer, binary)**:
   - Count: 731
   - Mean: 0.03
   - Std: 0.17
   - Min: 0
   - 25th percentile: 0
   - 50t

### Text Results from files in Assistants and Threads

Let's see what happens if we use an assistant with a file and a thread with a file together

In [50]:
super_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the penguins-size.csv and daily-bike-share.csv files."
        },
    ]
)

print(super_thread)

Thread(id='thread_HCO2Yi5dpRHtQm4x0lLNhIgs', created_at=1717720503, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


In [51]:
super_updated_thread = client.beta.threads.update(
    thread_id=super_thread.id,
    tool_resources={
        "code_interpreter": {
            "file_ids": [thread_file.id]
        }
    }
)

print(super_updated_thread)

Thread(id='thread_HCO2Yi5dpRHtQm4x0lLNhIgs', created_at=1717720503, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-ja8z169Xf9X5mU248naSSheD']), file_search=None))


In [52]:
# stream the output from the assistant
with client.beta.threads.runs.stream(
    thread_id=super_updated_thread.id,
    assistant_id=assistant.id,
    event_handler=EventHandler(),
) as stream:
    stream.until_done()


ASSISTANT MESSAGE >
code_interpreter


ASSISTANT MESSAGE >
### penguins-size.csv Summary

**Statistics:**
- **species**: 344 entries; species include 'Adelie', 'Chinstrap', and 'Gentoo'.
- **island**: 344 entries; islands include 'Biscoe', 'Dream', and 'Torgersen'.
- **culmen_length_mm**: 342 non-null entries, mean of 43.92 mm, ranging from 32.1 to 59.6 mm.
- **culmen_depth_mm**: 342 non-null entries, mean of 17.15 mm, ranging from 13.1 to 21.5 mm.
- **flipper_length_mm**: 342 non-null entries, mean of 200.92 mm, ranging from 172 to 231 mm.
- **body_mass_g**: 342 non-null entries, mean of 4201.75 g, ranging from 2700 to 6300 g.
- **sex**: 334 non-null entries; categories are MALE and FEMALE.

**First Few Rows:**
| species | island    | culmen_length_mm | culmen_depth_mm | flipper_length_mm | body_mass_g | sex    |
|---------|-----------|------------------|-----------------|-------------------|-------------|--------|
| Adelie  | Torgersen | 39.1             | 18.7            | 181.0   

### Full Results Output

We may have other things that are produced

In [70]:
import openai
from IPython.display import Image, display
import json

# Initialize the OpenAI client
client = openai.OpenAI()

# Upload the file
file = client.files.create(
    file=open("./artifacts/penguins_size.csv", "rb"),
    purpose='assistants'
)

# Create the assistant with the Code Interpreter tool
assistant = client.beta.assistants.create(
    instructions="You are a data analyst. When provided with a file, analyze the data and generate visualizations.",
    model="gpt-4o",
    tools=[{"type": "code_interpreter"}],
    tool_resources={
        "code_interpreter": {
            "file_ids": [file.id]
        }
    }
)

# Create a thread to start the analysis
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Please analyze the data in the uploaded file and generate relevant visualizations.",
            "attachments": [
                {
                    "file_id": file.id,
                    "tools": [{"type": "code_interpreter"}]
                }
            ]
        }
    ]
)

# Fetch the results of the analysis
response = client.beta.threads.retrieve(thread.id)

# Convert the response to a dictionary to access its attributes
response_dict = response.to_dict()

# Debug: Print the entire response to understand its structure
print(json.dumps(response_dict, indent=2))

# Access messages from the thread
messages = response_dict.get('messages', [])
for message in messages:
    content = message.get('content', '')
    print(content)
    
    # If the response includes images, download and save them
    if "image_file" in message:
        image_file_id = message["image_file"]["file_id"]
        image_data = client.files.content(image_file_id)
        with open("analysis_image.png", "wb") as image_file:
            image_file.write(image_data.read())
        # Display the image in the notebook
        display(Image("analysis_image.png"))


{
  "id": "thread_DvnJkhiUKl6rtR6gI8SQu5XD",
  "created_at": 1717759497,
  "metadata": {},
  "object": "thread",
  "tool_resources": {
    "code_interpreter": {
      "file_ids": [
        "file-mnBlQQ2gtHTHA3o3tztpCXzv"
      ]
    }
  }
}
