# Part 16

# Using Code Interpreter

Universal code for the entire notebook

In [1]:
# Uncomment the line below to make sure you have all the packages needed
# %pip install -r requirements.txt

In [2]:
# Standard library imports
import time  # Used for time-related functions
import threading  # Used for creating and managing threads

# Third-party library imports
from openai import OpenAI  # Used for interacting with OpenAI's API
from openai import AssistantEventHandler  # Used for handling events related to OpenAI assistants
from typing_extensions import override  # Used for overriding methods in subclasses
from IPython.display import display, Markdown, clear_output  # Used for displaying content in Jupyter Notebooks

import base64  # Used for encoding and decoding binary data
import requests  # Used for making HTTP requests
import markdown2  # Used for converting Markdown to HTML
from IPython.display import display, HTML  # Used for displaying HTML content in Jupyter Notebooks


In [3]:
# Create an instance of the OpenAI class to interact with the API.
# This assumes you have set the OPENAI_API_KEY environment variable.
client = OpenAI() 

In [4]:
# Create our custom event handler class that inherits from AssistantEventHandler for streaming assistant output.
class EventHandler(AssistantEventHandler):
    """Custom event handler for processing assistant events."""

    def __init__(self):
        super().__init__()
        self.results = []  # Initialize the results list

    @override
    def on_text_created(self, text) -> None:
        """Handle the event when text is first created."""
        # Print the created text to the console
        print("\nassistant text > ", end="", flush=True)
        # Append the created text to the results list
        self.results.append(text)

    @override
    def on_text_delta(self, delta, snapshot):
        """Handle the event when there is a text delta (partial text)."""
        # Print the delta value (partial text) to the console
        print(delta.value, end="", flush=True)
        # Append the delta value to the results list
        self.results.append(delta.value)

    def on_tool_call_created(self, tool_call):
        """Handle the event when a tool call is created."""
        # Print the type of the tool call to the console
        print(f"\nassistant tool > {tool_call.type}\n", flush=True)

    def on_tool_call_delta(self, delta, snapshot):
        """Handle the event when there is a delta (update) in a tool call."""
        if delta.type == 'code_interpreter':
            # Check if there is an input in the code interpreter delta
            if delta.code_interpreter.input:
                # Print the input to the console
                print(delta.code_interpreter.input, end="", flush=True)
                # Append the input to the results list
                self.results.append(delta.code_interpreter.input)
            # Check if there are outputs in the code interpreter delta
            if delta.code_interpreter.outputs:
                # Print a label for outputs to the console
                print("\n\noutput >", flush=True)
                # Iterate over each output and handle logs specifically
                for output in delta.code_interpreter.outputs or []:
                    if output.type == "logs":
                        # Print the logs to the console
                        print(f"\n{output.logs}", flush=True)
                        # Append the logs to the results list
                        self.results.append(output.logs)


## Creating an Assistant with Code Interpreter Enabled

Our first step is to create an Assistant that can use Code Interpreter

In [5]:
# Create an assistant using the client library.
assistant = client.beta.assistants.create(
    model="gpt-4o",  # Specify the model to be used.
    
    instructions=""" 
        You are a helpful assistant.
    """,  # Instructions for the assistant.
    
    name="Code Interpreter Assistant",  # Give the assistant a name.
    
    tools=[{"type": "code_interpreter"}],  # Add the code interpreter capability to the assistant.
    
    metadata={  # Add metadata about the assistant's capabilities.
        "can_be_used_for_code_analysis": "True",
        "can_do_python": "True",
    },
    
    temperature=1,  # Set the temperature for response variability.
    top_p=1,  # Set the top_p for nucleus sampling.
)

# Print the details of the created assistant to check its properties.
print(assistant)  # Print the full assistant object.
print("\n\n")
print(assistant.name)  # Print the name of the assistant.
print(assistant.metadata)  # Print the metadata of the assistant.


Assistant(id='asst_OhOeXCXyY3ly7zKBKZYEFQkb', created_at=1719317311, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=[]), file_search=None), top_p=1.0)



Code Interpreter Assistant
{'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}


## Passing Files to Code Interpreter

There are a variety of ways to get files for Code Interpreter to use. 
- Assistant files - viewable by all runs that use the assistant.
- Thread files - only viewable by runs that use the thread. 

Let's review the code for the two main approaches.

### Getting Files to the Assistant

First, you have to have a file that has been uploaded so we can pass it to our assistant.

In [6]:
# Upload a file with an "assistants" purpose.
assistant_file = client.files.create(
    file=open("./artifacts/penguins_size.csv", "rb"),  # Open the file in binary read mode.
    purpose='assistants'  # Specify the purpose of the file upload.
)

# Print the details of the uploaded file to check its properties.
print(assistant_file)


FileObject(id='file-aKwzrvGKa8phcqCgDxq5HtYQ', bytes=13519, created_at=1719317311, filename='penguins_size.csv', object='file', purpose='assistants', status='processed', status_details=None)


Next, we need to modify our Assistant with the new file information. 

In [7]:
# Update the assistant to add tools and tool resources.
assistant = client.beta.assistants.update(
    assistant_id=assistant.id,  # Use the assistant's ID.
    tools=[{"type": "code_interpreter"}],  # Add the code interpreter capability to the assistant.
    tool_resources={
        "code_interpreter": {
            "file_ids": [assistant_file.id]  # Link the uploaded file to the code interpreter tool.
        }
    }
)

# Print the details of the updated assistant to check its properties.
print(assistant)


Assistant(id='asst_OhOeXCXyY3ly7zKBKZYEFQkb', created_at=1719317311, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-aKwzrvGKa8phcqCgDxq5HtYQ']), file_search=None), top_p=1.0)


Finally, let's run a message and see if it is working.

In [8]:
# Create a new assistant thread with an initial user message.
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the file penguins_size.csv."
        },
    ]
)

# Stream the assistant's response to the thread.
with client.beta.threads.runs.stream(
    thread_id=assistant_thread.id,  # Use the thread's ID.
    assistant_id=assistant.id,  # Use the assistant's ID.
    instructions="""
    You are a helpful assistant.
    """,  # Provide instructions to the assistant.
    event_handler=EventHandler(),  # Use the custom event handler for processing events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.



assistant tool > code_interpreter

import pandas as pd

# Load the CSV file
file_path = '/mnt/data/file-aKwzrvGKa8phcqCgDxq5HtYQ'
penguins_data = pd.read_csv(file_path)

# Display the first few rows and summary of the dataframe
penguins_data.head(), penguins_data.describe()
assistant text > The file `penguins_size.csv` contains data about different species of penguins. Here's a summary of its contents:

### Columns:
1. **species**: The species of the penguin (e.g., Adelie).
2. **island**: The island where the penguin was observed (e.g., Torgersen).
3. **culmen_length_mm**: Length of the culmen (beak) in millimeters.
4. **culmen_depth_mm**: Depth of the culmen (beak) in millimeters.
5. **flipper_length_mm**: Length of the flipper in millimeters.
6. **body_mass_g**: Body mass in grams.
7. **sex**: Sex of the penguin (e.g., MALE, FEMALE).

### Summary Statistics:
- **culmen_length_mm**:
  - Count: 342 entries
  - Mean: 43.92 mm
  - Standard Deviation: 5.46 mm
  - Min: 32.1 mm
  - Max: 59

### Formatting the Output
What if we want to format the markdown output? There are two ways to do it. The "easy" way is to just let the output render without streaming and format it afterward.

In [9]:
# Create a new assistant thread with an initial user message.
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the file penguins_size.csv."
        },
    ]
)

# Create and poll a new run for the assistant thread to get the response.
run = client.beta.threads.runs.create_and_poll(
    thread_id=assistant_thread.id,  # Specify the thread ID.
    assistant_id=assistant.id  # Specify the assistant ID.
)

# Retrieve all messages from the thread using the run ID.
messages = list(client.beta.threads.messages.list(
    thread_id=assistant_thread.id,  # Specify the thread ID.
    run_id=run.id  # Specify the run ID.
))

# Extract the content from the first message in the retrieved messages.
message_content = messages[0].content[0].text
annotations = message_content.annotations  # Extract annotations from the message content.
citations = []  # Initialize an empty list to store citations.

# Process each annotation to replace the text with indexed references and gather citations.
for index, annotation in enumerate(annotations):
    # Replace the annotated text with an indexed reference in the message content.
    message_content.value = message_content.value.replace(annotation.text, f"[{index}]")
    # Check if there is a file citation in the annotation.
    file_citation = getattr(annotation, "file_citation", None)
    if file_citation:
        # Retrieve the cited file's details using its file ID.
        cited_file = client.files.retrieve(file_citation.file_id)
        # Append the citation with the indexed reference and file name to the citations list.
        citations.append(f"[{index}] {cited_file.filename}")

# Display the processed message content with indexed references using Markdown.
display(Markdown(message_content.value))


The file `penguins_size.csv` contains data on 344 penguins with the following columns:

1. **species**: The species of the penguin (categorical).
2. **island**: The island where the penguin was observed (categorical).
3. **culmen_length_mm**: The length of the culmen (bill) in millimeters (numerical, float).
4. **culmen_depth_mm**: The depth of the culmen in millimeters (numerical, float).
5. **flipper_length_mm**: The length of the flipper in millimeters (numerical, float).
6. **body_mass_g**: The body mass in grams (numerical, float).
7. **sex**: The sex of the penguin (categorical).

### Summary:
- The dataset has **344 entries**.
- There are some missing values in **culmen_length_mm, culmen_depth_mm, flipper_length_mm, body_mass_g**, and **sex** columns.
- Numerical columns: `culmen_length_mm`, `culmen_depth_mm`, `flipper_length_mm`, `body_mass_g`.
- Categorical columns: `species`, `island`, `sex`.

### Sample Data:
Here are the first few rows of the dataset to give an impression of the values:
| species | island    | culmen_length_mm | culmen_depth_mm | flipper_length_mm | body_mass_g | sex   |
|---------|-----------|------------------|-----------------|-------------------|-------------|-------|
| Adelie  | Torgersen | 39.1             | 18.7            | 181.0             | 3750.0      | MALE  |
| Adelie  | Torgersen | 39.5             | 17.4            | 186.0             | 3800.0      | FEMALE|
| Adelie  | Torgersen | 40.3             | 18.0            | 195.0             | 3250.0      | FEMALE|
| Adelie  | Torgersen | NaN              | NaN             | NaN               | NaN         | NaN   |
| Adelie  | Torgersen | 36.7             | 19.3            | 193.0             | 3450.0      | FEMALE|

If you need further analysis or specific operations on this dataset, please let me know!

The "hard"  (but more user-friendly) way is to stream the output and update the display while streaming to show the formatted text. This is what ChatGPT does when you use it. This will require modifying our event handler to be more streamlined, formatting output, and updating our display.

In [10]:
class EventHandler(AssistantEventHandler):
    """Custom event handler for processing assistant events."""

    def __init__(self):
        super().__init__()
        self.results = []  # Initialize an empty list to store the results

    @override
    def on_text_delta(self, delta, snapshot):
        """Handle the event when there is a text delta (partial text)."""
        # Append the delta value (partial text) to the results list
        self.results.append(delta.value)
        # Call the method to update the Jupyter Notebook cell
        self.update_output()

    def update_output(self):
        """Update the Jupyter Notebook cell with the current markdown content."""
        # Clear the current output in the Jupyter Notebook cell
        clear_output(wait=True)
        # Join all the text fragments stored in results to form the complete markdown content
        markdown_content = "".join(self.results)
        # Display the markdown content in the Jupyter Notebook cell
        display(Markdown(markdown_content))

# Create a new assistant thread with an initial user message.
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the file penguins_size.csv. With at least one small table of data. Make the information well formatted and easy to read."
        },
    ]
)

# Stream the assistant's response to the thread.
with client.beta.threads.runs.stream(
    thread_id=assistant_thread.id,  # Specify the thread ID.
    assistant_id=assistant.id,  # Specify the assistant ID.
    instructions="""
    You are a helpful assistant.
    """,  # Provide instructions to the assistant.
    event_handler=EventHandler(),  # Use the custom event handler for processing events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.


### Summary of `penguins_size.csv`

The dataset consists of penguin measurements including species information and various physical attributes such as culmen length, culmen depth, flipper length, and body mass. Below is a statistical summary and a small sample of the data.

#### Statistical Summary

| Statistic         | Culmen Length (mm) | Culmen Depth (mm) | Flipper Length (mm) | Body Mass (g)  |
|-------------------|--------------------|-------------------|---------------------|----------------|
| Count             | 342                | 342               | 342                 | 342            |
| Mean              | 43.92              | 17.15             | 200.92              | 4201.75        |
| Std Dev           | 5.46               | 1.97              | 14.06               | 801.95         |
| Min               | 32.10              | 13.10             | 172.00              | 2700.00        |
| 25th Percentile   | 39.23              | 15.60             | 190.00              | 3550.00        |
| Median (50th %ile)| 44.45              | 17.30             | 197.00              | 4050.00        |
| 75th Percentile   | 48.50              | 18.70             | 213.00              | 4750.00        |
| Max               | 59.60              | 21.50             | 231.00              | 6300.00        |

#### Sample Data

| Species | Island    | Culmen Length (mm) | Culmen Depth (mm) | Flipper Length (mm) | Body Mass (g) | Sex   |
|---------|-----------|--------------------|-------------------|---------------------|---------------|-------|
| Adelie  | Torgersen | 39.1               | 18.7              | 181.0               | 3750.0        | MALE  |
| Adelie  | Torgersen | 39.5               | 17.4              | 186.0               | 3800.0        | FEMALE|
| Adelie  | Torgersen | 40.3               | 18.0              | 195.0               | 3250.0        | FEMALE|
| Adelie  | Torgersen | NaN                | NaN               | NaN                 | NaN           | NaN   |
| Adelie  | Torgersen | 36.7               | 19.3              | 193.0               | 3450.0        | FEMALE|

This summary and sample data provide a clear overview of the dataset, highlighting essential statistics and giving a glimpse into the data structure.

### Getting Files to the Thread

First, we need a file uploaded.


In [11]:
# Upload a file with an "assistants" purpose.
thread_file = client.files.create(
    file=open("./artifacts/daily-bike-share.csv", "rb"),  # Open the file in binary read mode.
    purpose='assistants'  # Specify the purpose of the file upload.
)

# Print the details of the uploaded file to check its properties.
print(thread_file)


FileObject(id='file-CXzGmzUz78HYY8vOHucDCzm7', bytes=43599, created_at=1719317366, filename='daily-bike-share.csv', object='file', purpose='assistants', status='processed', status_details=None)


Second, we need a thread to attach the file to

In [12]:
# Create a new thread with an initial user message requesting a summary of the file.
thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a summary of the daily-bike-share.csv file."
        },
    ]
)

# Print the details of the created thread to check its properties.
print(thread)


Thread(id='thread_yvjtV29Eibl5UqR6pQWciG9C', created_at=1719317368, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


Third, we can update the thread with the file information

In [13]:
# Update the existing thread to add tool resources, specifically linking the uploaded file.
updated_thread = client.beta.threads.update(
    thread_id=thread.id,  # Use the ID of the existing thread.
    tool_resources={
        "code_interpreter": {
            "file_ids": [thread_file.id]  # Link the uploaded file to the code interpreter tool.
        }
    }
)

# Print the details of the updated thread to check its properties.
print(updated_thread)


Thread(id='thread_yvjtV29Eibl5UqR6pQWciG9C', created_at=1719317368, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-CXzGmzUz78HYY8vOHucDCzm7']), file_search=None))


Finally, let's run it against a new assistant and see the results

In [14]:
# Create an assistant using the client library.
thread_assistant = client.beta.assistants.create(
    model="gpt-4o",  # Specify the model to be used.
    
    instructions=""" 
        You are a helpful assistant.
    """,  # Instructions for the assistant.
    
    name="Code Interpreter Assistant Using Thread Data",  # Give the assistant a name.
    
    tools=[{"type": "code_interpreter"}],  # Add the code interpreter capability to the assistant.
    
    metadata={  # Add metadata about the assistant's capabilities.
        "can_be_used_for_code_analysis": "True",
        "can_do_python": "True",
    },
    temperature=1,  # Set the temperature for response variability.
    top_p=1,  # Set the top_p for nucleus sampling.
)

# Print the details of the created assistant to check its properties.
print(thread_assistant)  # Print the full assistant object.
print("\n\n")
print(thread_assistant.name)  # Print the name of the assistant.
print(thread_assistant.metadata)  # Print the metadata of the assistant.


Assistant(id='asst_mCoidHXknJZXBbiA5qZfWSrY', created_at=1719317369, description=None, instructions=' \n        You are a helpful assistant.\n    ', metadata={'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}, model='gpt-4o', name='Code Interpreter Assistant Using Thread Data', object='assistant', tools=[CodeInterpreterTool(type='code_interpreter')], response_format='auto', temperature=1.0, tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=[]), file_search=None), top_p=1.0)



Code Interpreter Assistant Using Thread Data
{'can_be_used_for_code_analysis': 'True', 'can_do_python': 'True'}


In [15]:
# Stream the output from the assistant.
with client.beta.threads.runs.stream(
    thread_id=updated_thread.id,  # Use the ID of the updated thread.
    assistant_id=thread_assistant.id,  # Use the ID of the newly created assistant.
    event_handler=EventHandler(),  # Use the custom event handler to process events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.


The dataset consists of 731 entries with 13 columns. Here's a summary of the columns:

### Columns and Data Types
1. **day**: Integer (non-null)
2. **mnth**: Integer (non-null)
3. **year**: Integer (non-null)
4. **season**: Integer (non-null)
5. **holiday**: Integer (non-null)
6. **weekday**: Integer (non-null)
7. **workingday**: Integer (non-null)
8. **weathersit**: Integer (non-null)
9. **temp**: Float (non-null)
10. **atemp**: Float (non-null)
11. **hum**: Float (non-null)
12. **windspeed**: Float (non-null)
13. **rentals**: Integer (non-null)

### Summary Statistics
For the numeric columns, here are some key statistics:
- **day**:
  - Mean: 15.74
  - Std: 8.81
  - Min: 1
  - Max: 31
- **mnth**:
  - Mean: 6.52
  - Std: 3.45
  - Min: 1
  - Max: 12
- **year**:
  - Mean: 2011.5
  - Std: 0.5
  - Min: 2011
  - Max: 2012
- **season**:
  - Mean: 2.5
  - Std: 1.11
  - Min: 1
  - Max: 4
- **holiday**:
  - Mean: 0.03
  - Std: 0.17
  - Min: 0
  - Max: 1
- **weekday**:
  - Mean: 3.0
  - Std: 2.0
  - Min: 0
  - Max: 6
- **workingday**:
  - Mean: 0.68
  - Std: 0.47
  - Min: 0
  - Max: 1
- **weathersit**:
  - Mean: 1.4
  - Std: 0.54
  - Min: 1
  - Max: 3
- **temp**:
  - Mean: 0.495
  - Std: 0.183
  - Min: 0.059
  - Max: 0.861
- **atemp**:
  - Mean: 0.474
  - Std: 0.163
  - Min: 0.079
  - Max: 0.841
- **hum**:
  - Mean: 0.628
  - Std: 0.142
  - Min: 0.000
  - Max: 0.973
- **windspeed**:
  - Mean: 0.190
  - Std: 0.077
  - Min: 0.022
  - Max: 0.507
- **rentals**:
  - Mean: 848.18
  - Std: 686.62
  - Min: 2
  - Max: 3410

### Memory Usage
- The DataFrame uses approximately 74.4 KB of memory.

The dataset includes various metrics related to bike rentals, such as the day, month, year, season, whether it was a holiday, the weekday, whether it was a working day, the weather situation, temperature, "feels-like" temperature, humidity, wind speed, and the total number of rentals.

### Text Results from files in Assistants and Threads

Let's see what happens if we use an assistant with a file and a thread with a file together

In [16]:
# Create a new thread with an initial user message requesting a summary of two files.
super_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": (
                "Give me a summary of the penguins-size.csv and daily-bike-share.csv files. "
                "Make the information well formatted and easy to read."
            )
        },
    ]
)

# Print the details of the created thread to check its properties.
print(super_thread)


Thread(id='thread_r2QHpzFRud96eEGHHHwHO2qS', created_at=1719317386, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=None, file_search=None))


In [17]:
# Update the newly created thread to add tool resources, specifically linking the uploaded file.
super_updated_thread = client.beta.threads.update(
    thread_id=super_thread.id,  # Use the ID of the newly created thread.
    tool_resources={
        "code_interpreter": {
            "file_ids": [thread_file.id]  # Link the uploaded file to the code interpreter tool.
        }
    }
)

# Print the details of the updated thread to check its properties.
print(super_updated_thread)


Thread(id='thread_r2QHpzFRud96eEGHHHwHO2qS', created_at=1719317386, metadata={}, object='thread', tool_resources=ToolResources(code_interpreter=ToolResourcesCodeInterpreter(file_ids=['file-CXzGmzUz78HYY8vOHucDCzm7']), file_search=None))


In [18]:
# Stream the output from the assistant.
with client.beta.threads.runs.stream(
    thread_id=super_updated_thread.id,  # Use the ID of the updated thread.
    assistant_id=assistant.id,  # Use the ID of the assistant.
    event_handler=EventHandler(),  # Use the custom event handler to process events.
) as stream:
    stream.until_done()  # Continue streaming until the assistant has finished responding.


Here is the summary of the two datasets: **penguins-size.csv** and **daily-bike-share.csv**

### 1. penguins-size.csv

**General Information:**
- **Total Entries:** 344
- **Total Columns:** 7

**Columns Detail:**
1. **species**
   - **Data Type:** Object
   - **Non-Null Count:** 344
   - **Unique Values:** 3 (Adelie, Chinstrap, Gentoo)
2. **island**
   - **Data Type:** Object
   - **Non-Null Count:** 344
   - **Unique Values:** 3 (Torgersen, Biscoe, Dream)
3. **culmen_length_mm**
   - **Data Type:** Float64
   - **Non-Null Count:** 342
   - **Mean:** 43.92
   - **Standard Deviation:** 5.46
   - **Minimum:** 32.10
   - **Maximum:** 59.60
4. **culmen_depth_mm**
   - **Data Type:** Float64
   - **Non-Null Count:** 342
   - **Mean:** 17.15
   - **Standard Deviation:** 1.97
   - **Minimum:** 13.10
   - **Maximum:** 21.50
5. **flipper_length_mm**
   - **Data Type:** Float64
   - **Non-Null Count:** 342
   - **Mean:** 200.92
   - **Standard Deviation:** 14.06
   - **Minimum:** 172.00
   - **Maximum:** 231.00
6. **body_mass_g**
   - **Data Type:** Float64
   - **Non-Null Count:** 342
   - **Mean:** 4201.75
   - **Standard Deviation:** 801.95
   - **Minimum:** 2700.00
   - **Maximum:** 6300.00
7. **sex**
   - **Data Type:** Object
   - **Non-Null Count:** 334
   - **Unique Values:** 3 (MALE, FEMALE, .) 

### 2. daily-bike-share.csv

**General Information:**
- **Total Entries:** 731
- **Total Columns:** 13

**Columns Detail:**
1. **day**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 15.74
   - **Standard Deviation:** 8.81
   - **Minimum:** 1
   - **Maximum:** 31
2. **mnth**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 6.52
   - **Standard Deviation:** 3.45
   - **Minimum:** 1
   - **Maximum:** 12
3. **year**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 2011.50
   - **Standard Deviation:** 0.50
   - **Minimum:** 2011
   - **Maximum:** 2012
4. **season**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 2.50
   - **Standard Deviation:** 1.11
   - **Minimum:** 1
   - **Maximum:** 4
5. **holiday**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 0.03
   - **Standard Deviation:** 0.17
   - **Minimum:** 0
   - **Maximum:** 1
6. **weekday**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 2.99
   - **Standard Deviation:** 2.00
   - **Minimum:** 0
   - **Maximum:** 6
7. **workingday**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 0.68
   - **Standard Deviation:** 0.47
   - **Minimum:** 0
   - **Maximum:** 1
8. **weathersit**
   - **Data Type:** Int64
   - **Non-Null Count:** 731
   - **Mean:** 1.42
   - **Standard Deviation:** 0.64
   - **Minimum:** 1
   - **Maximum:** 3
9. **temp**
   - **Data Type:** Float64
   - **Non-Null Count:** 731
   - **Mean:** 0.50
   - **Standard Deviation:** 0.18
   - **Minimum:** 0.02
   - **Maximum:** 0.86
10. **atemp**
    - **Data Type:** Float64
    - **Non-Null Count:** 731
    - **Mean:** 0.50
    - **Standard Deviation:** 0.16
    - **Minimum:** 0.10
    - **Maximum:** 0.84
11. **hum**
    - **Data Type:** Float64
    - **Non-Null Count:** 731
    - **Mean:** 0.62
    - **Standard Deviation:** 0.14
    - **Minimum:** 0.00
    - **Maximum:** 0.97
12. **windspeed**
    - **Data Type:** Float64
    - **Non-Null Count:** 731
    - **Mean:** 0.19
    - **Standard Deviation:** 0.08
    - **Minimum:** 0.02
    - **Maximum:** 0.51
13. **rentals**
    - **Data Type:** Int64
    - **Non-Null Count:** 731
    - **Mean:** 848.18
    - **Standard Deviation:** 686.62
    - **Minimum:** 2
    - **Maximum:** 3410

These summaries include the mean, standard deviation, min, and max values for numerical columns as well as the unique values and most frequent values for categorical columns.

### Full Results Output

We may also have images produced as well from the Code Interpreter output. Handling this can be tricky and getting it in the right sequence is pretty difficult. Here is some sample code that will assist but I don't pretend to be good at the interface stuff. :)

In [19]:

# Create a thread to send a message and get output
assistant_thread = client.beta.threads.create(
    messages=[
        {
            "role": "user",
            "content": "Give me a one paragraph summary of the file penguins_size.csv. With at least one small table of data and one visualization."
        },
    ]
)

run = client.beta.threads.runs.create_and_poll(
    thread_id=assistant_thread.id, assistant_id=assistant.id
)

messages = list(client.beta.threads.messages.list(thread_id=assistant_thread.id, run_id=run.id))

content_blocks = []

# Process each message
for message in messages:
    # Process each content block in the message
    for content_block in message.content:
        if content_block.type == 'text':
            text_content = content_block.text
            annotations = text_content.annotations
            citations = []
            for index, annotation in enumerate(annotations):
                # Replace the text with a footnote
                text_content.value = text_content.value.replace(annotation.text, f' [{index}]')
                # Gather citations based on annotation attributes
                if hasattr(annotation, 'file_citation'):
                    file_citation = annotation.file_citation
                    cited_file = client.files.retrieve(file_citation.file_id)
                    citations.append(f'[{index}] {file_citation.quote} from {cited_file.filename}')
                elif hasattr(annotation, 'file_path'):
                    file_path = annotation.file_path
                    cited_file = client.files.retrieve(file_path.file_id)
                    citations.append(f'[{index}] Click <here> to download {cited_file.filename}')
            # Add footnotes to the end of the message before displaying to user
            text_content.value += '\n' + '\n'.join(citations)
            # Convert Markdown to HTML and append to content_blocks
            content_blocks.append(markdown2.markdown(text_content.value, extras=["tables"]))

        elif content_block.type == 'image_file':
            image_file = content_block.image_file
            file_info = client.files.retrieve(image_file.file_id)
            image_content = client.files.content(file_info.id).content
            image_base64 = base64.b64encode(image_content).decode('utf-8')
            # Append the image HTML to content_blocks
            content_blocks.append(f'<img src="data:image/png;base64,{image_base64}" width="700" height="700"><br>')

# Join all content blocks into a single HTML string
html_content = ''.join(content_blocks)

# Display the combined content
display(HTML(html_content))


species,island,culmenlengthmm,culmendepthmm,flipperlengthmm,bodymassg,sex
Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
Adelie,Torgersen,,,,,
Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


### Streaming Formatted Output
I tried everything I could think of to get the images to stream inline with the text but it appears that doing so with Jupyter Notebook cells is very difficult. I finally had to throw in the towel. I did post to the forums to see if anyone had and answer but, apparently, no one did. You can see if someone finally answered here: https://community.openai.com/t/streaming-markdown-text-and-images-from-assistant-using-code-interpreter/823042/9