In [None]:
# Learning material: https://github.com/openai/openai-cookbook/blob/main/examples/Assistants_API_overview_python.ipynb

In [1]:
from openai import OpenAI
import os
import json
from dotenv import load_dotenv
load_dotenv()

# pretty printing helper
import json
def show_json(obj):
    display(json.loads(obj.model_dump_json()))

In [2]:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
assistant = client.beta.assistants.create(
    name="Math Tutor",
    instructions="You are a personal math tutor. Answer questions briefly, in a sentence or less.",
    model="gpt-3.5-turbo",
)
show_json(assistant)

{'id': 'asst_9AoXVWBxHzWGAb6bYJqyI4tK',
 'created_at': 1713281425,
 'description': None,
 'file_ids': [],
 'instructions': 'You are a personal math tutor. Answer questions briefly, in a sentence or less.',
 'metadata': {},
 'model': 'gpt-3.5-turbo',
 'name': 'Math Tutor',
 'object': 'assistant',
 'tools': [],
 'top_p': 1.0,
 'temperature': 1.0,
 'response_format': 'auto'}

In [4]:
thread = client.beta.threads.create()
show_json(thread)


{'id': 'thread_UBNuUjvlzX5b4WtA2GCmHxuU',
 'created_at': 1713281430,
 'metadata': {},
 'object': 'thread'}

In [5]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
)
show_json(message)


{'id': 'msg_6GBvx7xAhB0IcX9gRMRBAbga',
 'assistant_id': None,
 'completed_at': None,
 'content': [{'text': {'annotations': [],
    'value': 'I need to solve the equation `3x + 11 = 14`. Can you help me?'},
   'type': 'text'}],
 'created_at': 1713281432,
 'file_ids': [],
 'incomplete_at': None,
 'incomplete_details': None,
 'metadata': {},
 'object': 'thread.message',
 'role': 'user',
 'run_id': None,
 'status': None,
 'thread_id': 'thread_UBNuUjvlzX5b4WtA2GCmHxuU'}

In [6]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)
show_json(run)

{'id': 'run_yY42hBbv9qXwg8owYrNEChFE',
 'assistant_id': 'asst_9AoXVWBxHzWGAb6bYJqyI4tK',
 'cancelled_at': None,
 'completed_at': None,
 'created_at': 1713281435,
 'expires_at': 1713282035,
 'failed_at': None,
 'file_ids': [],
 'instructions': 'You are a personal math tutor. Answer questions briefly, in a sentence or less.',
 'last_error': None,
 'metadata': {},
 'model': 'gpt-3.5-turbo',
 'object': 'thread.run',
 'required_action': None,
 'started_at': None,
 'status': 'queued',
 'thread_id': 'thread_UBNuUjvlzX5b4WtA2GCmHxuU',
 'tools': [],
 'usage': None,
 'temperature': 1.0,
 'top_p': 1.0,
 'max_completion_tokens': None,
 'max_prompt_tokens': None,
 'truncation_strategy': {'type': 'auto', 'last_messages': None},
 'incomplete_details': None,
 'response_format': 'auto',
 'tool_choice': 'auto'}

In [7]:
import time

def wait_on_run(run, thread):
    while run.status == "queued" or run.status == "in_progress":
        run = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id,
        )
        time.sleep(0.5)
    return run

In [8]:
run = wait_on_run(run, thread)
show_json(run)

{'id': 'run_yY42hBbv9qXwg8owYrNEChFE',
 'assistant_id': 'asst_9AoXVWBxHzWGAb6bYJqyI4tK',
 'cancelled_at': None,
 'completed_at': 1713281436,
 'created_at': 1713281435,
 'expires_at': None,
 'failed_at': None,
 'file_ids': [],
 'instructions': 'You are a personal math tutor. Answer questions briefly, in a sentence or less.',
 'last_error': None,
 'metadata': {},
 'model': 'gpt-3.5-turbo',
 'object': 'thread.run',
 'required_action': None,
 'started_at': 1713281435,
 'status': 'completed',
 'thread_id': 'thread_UBNuUjvlzX5b4WtA2GCmHxuU',
 'tools': [],
 'usage': {'completion_tokens': 12, 'prompt_tokens': 48, 'total_tokens': 60},
 'temperature': 1.0,
 'top_p': 1.0,
 'max_completion_tokens': None,
 'max_prompt_tokens': None,
 'truncation_strategy': {'type': 'auto', 'last_messages': None},
 'incomplete_details': None,
 'response_format': 'auto',
 'tool_choice': 'auto'}

In [11]:
messages = client.beta.threads.messages.list(thread_id=thread.id)
show_json(messages)

{'data': [{'id': 'msg_GCXFXyuiNvrkj3aeA8ojMwLd',
   'assistant_id': None,
   'completed_at': None,
   'content': [{'text': {'annotations': [],
      'value': 'I need to solve the equation `3x + 11 = 14`. Can you help me?'},
     'type': 'text'}],
   'created_at': 1712875616,
   'file_ids': [],
   'incomplete_at': None,
   'incomplete_details': None,
   'metadata': {},
   'object': 'thread.message',
   'role': 'user',
   'run_id': None,
   'status': None,
   'thread_id': 'thread_oBBkudNMbVsXgpGIpIlgNIO3'}],
 'object': 'list',
 'first_id': 'msg_GCXFXyuiNvrkj3aeA8ojMwLd',
 'last_id': 'msg_GCXFXyuiNvrkj3aeA8ojMwLd',
 'has_more': False}

In [9]:
messages = client.beta.threads.messages.list(thread_id=thread.id)
show_json(messages)

{'data': [{'id': 'msg_uf2C8mVHzf4Nb8M7OM2eKigN',
   'assistant_id': 'asst_9AoXVWBxHzWGAb6bYJqyI4tK',
   'completed_at': None,
   'content': [{'text': {'annotations': [],
      'value': 'Yes, the solution is x = 1.'},
     'type': 'text'}],
   'created_at': 1713281436,
   'file_ids': [],
   'incomplete_at': None,
   'incomplete_details': None,
   'metadata': {},
   'object': 'thread.message',
   'role': 'assistant',
   'run_id': 'run_yY42hBbv9qXwg8owYrNEChFE',
   'status': None,
   'thread_id': 'thread_UBNuUjvlzX5b4WtA2GCmHxuU'},
  {'id': 'msg_6GBvx7xAhB0IcX9gRMRBAbga',
   'assistant_id': None,
   'completed_at': None,
   'content': [{'text': {'annotations': [],
      'value': 'I need to solve the equation `3x + 11 = 14`. Can you help me?'},
     'type': 'text'}],
   'created_at': 1713281432,
   'file_ids': [],
   'incomplete_at': None,
   'incomplete_details': None,
   'metadata': {},
   'object': 'thread.message',
   'role': 'user',
   'run_id': None,
   'status': None,
   'thread_id'

Let's ask our Assistant to explain the result a bit further!

In [10]:
# Create a message to append to our thread
message = client.beta.threads.messages.create(
    thread_id=thread.id, role="user", content="Could you explain this to me?"
)

# Execute our run
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
)

# Wait for completion
wait_on_run(run, thread)

# Retrieve all the messages added after our last user message
messages = client.beta.threads.messages.list(
    thread_id=thread.id, order="asc", after=message.id
)
show_json(messages)

{'data': [{'id': 'msg_rgB5Xhp4j7L90lklI7jciYIB',
   'assistant_id': 'asst_9AoXVWBxHzWGAb6bYJqyI4tK',
   'completed_at': None,
   'content': [{'text': {'annotations': [],
      'value': 'Sure, you can solve for x by subtracting 11 from both sides and then dividing by 3 to isolate x.'},
     'type': 'text'}],
   'created_at': 1713282292,
   'file_ids': [],
   'incomplete_at': None,
   'incomplete_details': None,
   'metadata': {},
   'object': 'thread.message',
   'role': 'assistant',
   'run_id': 'run_XZqYiYRLmqyxI7XHZbZNVvaY',
   'status': None,
   'thread_id': 'thread_UBNuUjvlzX5b4WtA2GCmHxuU'}],
 'object': 'list',
 'first_id': 'msg_rgB5Xhp4j7L90lklI7jciYIB',
 'last_id': 'msg_rgB5Xhp4j7L90lklI7jciYIB',
 'has_more': False}

Example

In [13]:
from openai import OpenAI

MATH_ASSISTANT_ID = assistant.id  # or a hard-coded ID like "asst-..."

client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def submit_message(assistant_id, thread, user_message):
    client.beta.threads.messages.create(
        thread_id=thread.id, role="user", content=user_message
    )
    return client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant_id,
    )

def get_response(thread):
    return client.beta.threads.messages.list(thread_id=thread.id, order="asc")

Notice how all of these API calls are asynchronous operations; this means we actually get async behavior in our code without the use of async libraries! (e.g. asyncio)

In [14]:
def create_thread_and_run(user_input):
    thread = client.beta.threads.create()
    run = submit_message(MATH_ASSISTANT_ID, thread, user_input)
    return thread, run


# Emulating concurrent user requests
thread1, run1 = create_thread_and_run(
    "I need to solve the equation `3x + 11 = 14`. Can you help me?"
)
thread2, run2 = create_thread_and_run("Could you explain linear algebra to me?")
thread3, run3 = create_thread_and_run("I don't like math. What can I do?")

# Now all Runs are executing...

In [15]:
import time

# Pretty printing helper
def pretty_print(messages):
    print("# Messages")
    for m in messages:
        print(f"{m.role}: {m.content[0].text.value}")
    print()


# Waiting in a loop
def wait_on_run(run, thread):
    while run.status == "queued" or run.status == "in_progress":
        run = client.beta.threads.runs.retrieve(
            thread_id=thread.id,
            run_id=run.id,
        )
        time.sleep(0.5)
    return run


# Wait for Run 1
run1 = wait_on_run(run1, thread1)
pretty_print(get_response(thread1))

# Wait for Run 2
run2 = wait_on_run(run2, thread2)
pretty_print(get_response(thread2))

# Wait for Run 3
run3 = wait_on_run(run3, thread3)
pretty_print(get_response(thread3))

# Thank our assistant on Thread 3 :)
run4 = submit_message(MATH_ASSISTANT_ID, thread3, "Thank you!")
run4 = wait_on_run(run4, thread3)
pretty_print(get_response(thread3))

# Messages
user: I need to solve the equation `3x + 11 = 14`. Can you help me?
assistant: Sure, the solution for the equation `3x + 11 = 14` is `x = 1`.

# Messages
user: Could you explain linear algebra to me?
assistant: Linear algebra is a branch of mathematics that deals with equations representing straight lines or linear relationships between variables, often involving matrices and vectors.

# Messages
user: I don't like math. What can I do?
assistant: Try to approach math with a positive attitude and practice regularly to improve your skills.

# Messages
user: I don't like math. What can I do?
assistant: Try to approach math with a positive attitude and practice regularly to improve your skills.
user: Thank you!
assistant: You're welcome! If you have any more questions, feel free to ask.



How do we use Code Interpreter in Assistant API to solve the same problem?

In [16]:
assistant = client.beta.assistants.update(
    MATH_ASSISTANT_ID,
    tools=[{"type": "code_interpreter"}],
)
show_json(assistant)

{'id': 'asst_9AoXVWBxHzWGAb6bYJqyI4tK',
 'created_at': 1713281425,
 'description': None,
 'file_ids': [],
 'instructions': 'You are a personal math tutor. Answer questions briefly, in a sentence or less.',
 'metadata': {},
 'model': 'gpt-3.5-turbo',
 'name': 'Math Tutor',
 'object': 'assistant',
 'tools': [{'type': 'code_interpreter'}],
 'top_p': 1.0,
 'temperature': 1.0,
 'response_format': 'auto'}

In [17]:
thread, run = create_thread_and_run(
    "Generate the first 20 fibbonaci numbers with code."
)
run = wait_on_run(run, thread)
pretty_print(get_response(thread))

# Messages
user: Generate the first 20 fibbonaci numbers with code.
assistant: The first 20 Fibonacci numbers are: [0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584, 4181].



For some use cases this may be enough – however, if we want more details on what precisely an Assistant is doing we can take a look at a Run's Steps.

In [18]:
run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id, run_id=run.id, order="asc"
)

In [19]:
for step in run_steps.data:
    step_details = step.step_details
    print(json.dumps(show_json(step_details), indent=4))

{'tool_calls': [{'id': 'call_2QtMnzzgMLwiGY48hgxtCr9h',
   'code_interpreter': {'input': 'def fibonacci(n):\n    fib_list = [0, 1]\n    for i in range(2, n):\n        fib_list.append(fib_list[i-1] + fib_list[i-2])\n    return fib_list\n\nfirst_20_fibonacci = fibonacci(20)\nfirst_20_fibonacci',
    'outputs': [{'logs': '[0,\n 1,\n 1,\n 2,\n 3,\n 5,\n 8,\n 13,\n 21,\n 34,\n 55,\n 89,\n 144,\n 233,\n 377,\n 610,\n 987,\n 1597,\n 2584,\n 4181]',
      'type': 'logs'}]},
   'type': 'code_interpreter'}],
 'type': 'tool_calls'}

null


{'message_creation': {'message_id': 'msg_OqjMe8G4GyLSxDhaZoLSqgWQ'},
 'type': 'message_creation'}

null


In [23]:
MATH_ASSISTANT_ID

'asst_9AoXVWBxHzWGAb6bYJqyI4tK'

In [27]:
# Upload the file
file = client.files.create(
    file=open(
        "data/cities_for_map.csv",
        "rb",
    ),
    purpose="assistants",
)
# Update Assistant
# assistant = client.beta.assistants.update(
#     MATH_ASSISTANT_ID,
#     tools=[{"type": "code_interpreter"}, {"type": "retrieval"}],
#     file_ids=[file.id],
# )

assistant = client.beta.assistants.create(
    name="Math Tutor",
    instructions="You are professional frontend data visualization developer. Please generate code that answers users questions on data",
    model="gpt-3.5-turbo",
    tools=[{"type": "code_interpreter"}, {"type": "retrieval"}],
    file_ids=[file.id],
)
show_json(assistant)

{'id': 'asst_HUCig74tVEzxYoRvyvwwLPq5',
 'created_at': 1713293892,
 'description': None,
 'file_ids': ['file-cY3KNR7ujFVIRlXSEtHhWNRJ'],
 'instructions': 'You are professional frontend data visualization developer. Please generate code that answers users questions on data',
 'metadata': {},
 'model': 'gpt-3.5-turbo',
 'name': 'Math Tutor',
 'object': 'assistant',
 'tools': [{'type': 'code_interpreter'}, {'type': 'retrieval'}],
 'top_p': 1.0,
 'temperature': 1.0,
 'response_format': 'auto'}

In [28]:
thread, run = create_thread_and_run(
    "Can you describe the data?"
)
run = wait_on_run(run, thread)
pretty_print(get_response(thread))

# Messages
user: Can you describe the data?
assistant: The dataset consists of 4500 entries with 43 columns. The columns include information such as City, Country, Latitude, Longitude, Population, Waste Generation Rate, Waste Components breakdown, Diversion rates, Precipitation, and Methane Capture details.



Can the assistant handle two csv files?

In [49]:
city_data = client.files.create(
    file=open(
        "data/cities_for_map.csv",
        "rb",
    ),
    purpose="assistants",
)

city_data_catalogue = client.files.create(
    file=open(
        "data/catalogue.csv",
        "rb",
    ),
    purpose="assistants",
)

In [51]:
assistant = client.beta.assistants.update(
    name="Data Visualization Developer",
    instructions="You are professional data visualization developer. Please generate code that answers users questions on data",
    model="gpt-3.5-turbo",
    tools=[{"type": "code_interpreter"}, {"type": "retrieval"}],
    assistant_id=assistant.id,
    file_ids=[city_data.id,city_data_catalogue.id],
)
show_json(assistant)

{'id': 'asst_IlfxlF2eIwYoQQ4PZE3U5tBw',
 'created_at': 1713299059,
 'description': None,
 'file_ids': ['file-Z0i9s2HxGCYsSkbLnd7gMhmR',
  'file-zN29c0zJEM4Yf5P82g95QRxx'],
 'instructions': 'You are professional data visualization developer. Please generate code that answers users questions on data',
 'metadata': {},
 'model': 'gpt-3.5-turbo',
 'name': 'Data Visualization Developer',
 'object': 'assistant',
 'tools': [{'type': 'code_interpreter'}, {'type': 'retrieval'}],
 'top_p': 1.0,
 'temperature': 1.0,
 'response_format': 'auto'}

In [52]:
thread, run = create_thread_and_run(
    "You are given a data file that describes the emissions of different types of waste from cities. \
        The data is in the cities_for_map.csv and the data catalogue is in catalogue.csv. Please generate \
        code that can make an interactive map using 2022 emissions data. Each bubble on the map represents one city. The size of  \
        the bubble is proportional to the total emissions for the city. When users click on the ciy, it will\
             show the emissions for each type of waste as a pie chart."
)
run = wait_on_run(run, thread)
pretty_print(get_response(thread))

# Messages
user: You are given a data file that describes the emissions of different types of waste from cities.         The data is in the cities_for_map.csv and the data catalogue is in catalogue.csv. Please generate         code that can make an interactive map using 2022 emissions data. Each bubble on the map represents one city. The size of          the bubble is proportional to the total emissions for the city. When users click on the ciy, it will             show the emissions for each type of waste as a pie chart.
assistant: To create an interactive map that shows city bubbles proportional to total emissions and displays a pie chart of waste type emissions upon clicking a city, we will need to use Python with libraries like pandas, plotly, and folium. First, we will load and preprocess the data from cities_for_map.csv, and then create the interactive map. Finally, we will add interactivity to show the pie chart upon clicking a city. Let's start by loading and exploring the data

In [53]:
run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id, run_id=run.id, order="asc"
)

In [54]:
for step in run_steps.data:
    step_details = step.step_details
    print(json.dumps(show_json(step_details), indent=4))

{'message_creation': {'message_id': 'msg_D7z2vtHazA7TjAxk8cka5g9h'},
 'type': 'message_creation'}

null


{'tool_calls': [{'id': 'call_nmSGGpShosmIMLvHjsFpVvC8',
   'code_interpreter': {'input': "import pandas as pd\r\n\r\n# Load the data from cities_for_map.csv\r\ndata_path = '/mnt/data/file-aYruoQVegqZgU2WhOGWfZVC0/cities_for_map.csv'\r\ncities_data = pd.read_csv(data_path)\r\n\r\n# Display the first few rows of the data to understand its structure\r\ncities_data.head()",
      'type': 'logs'}]},
   'type': 'code_interpreter'}],
 'type': 'tool_calls'}

null


{'message_creation': {'message_id': 'msg_d8avLFSe4aqXovY30zBZlalV'},
 'type': 'message_creation'}

null


{'tool_calls': [{'id': 'call_U0tnFprKkWqJBP9FQ46EbUDM',
   'code_interpreter': {'input': "# Correct the file path and load the data\r\ndata_path = '/mnt/data/file-aYruoQVegqZgU2WhOGWfZVC0'\r\ncities_data = pd.read_csv(data_path)\r\n\r\n# Display the first few rows of the data to understand its structure\r\ncities_data.head()",
    'outputs': [{'logs': '       City    Country Country_ISO3   Latitude   Longitude  Population  \\\n0  Belgrade     Serbia          SRB  44.817813   20.456897     1392691   \n1  Tartumaa    Estonia          EST  58.407462   26.734665      150000   \n2   Jakarta  Indonesia          IDN  -6.175247  106.827049     9607787   \n3    Ottawa     Canada          CAN  45.420878  -75.690111      934243   \n4   Hamburg    Germany          DEU  53.550341   10.000654     1831000   \n\n  Input_Data_Source  Year_of_Data_Collection  \\\n0        World Bank                     2012   \n1        World Bank                     2016   \n2        World Bank                     2010

null


{'message_creation': {'message_id': 'msg_gnFKMbRLdea5l373uegctKDe'},
 'type': 'message_creation'}

null


{'message_creation': {'message_id': 'msg_w5WfuQIIGdGXH4QX4VCBFhpM'},
 'type': 'message_creation'}

null


{'tool_calls': [{'id': 'call_e04gSjViNbbnB9vNQhLpkZ3g',
   'code_interpreter': {'input': "import folium\r\nimport plotly.express as px\r\nimport plotly.graph_objects as go\r\n\r\n# Create an initial world map using Plotly\r\nfig = px.scatter_geo(cities_data, lat='Latitude', lon='Longitude', size='Waste_Generation_Rate__tons_year_',\r\n                     hover_name='City', hover_data=['Country', 'Waste_Generation_Rate__tons_year_'],\r\n                     projection='natural earth', title='City Emissions Map')\r\n\r\n# Convert the Plotly figure to a Folium map\r\nfolium_map = folium.Figure(width=1000, height=600)\r\nfolium_map.add_child(fig)\r\n\r\n# Display the Folium map\r\nfolium_map.save('/mnt/data/interactive_map.html')\r\n'/mnt/data/interactive_map.html'",
    'outputs': [{'logs': '---------------------------------------------------------------------------\nAttributeError                            Traceback (most recent call last)\nCell In[3], line 12\n     10 # Convert the Pl

null


{'message_creation': {'message_id': 'msg_tMWDfAJPtzzO1QbirFAiwVwk'},
 'type': 'message_creation'}

null


{'tool_calls': [{'id': 'call_AUbMpd2p0kNlAXomoU6nnPry',
   'code_interpreter': {'input': "import pandas as pd\nimport plotly.express as px\nimport folium\nfrom folium.plugins import MarkerCluster\n\n# Load the data\ndata = pd.read_csv('/mnt/data/New_File_Calculation-datasetCSV.csv')\n\n# Create a Folium map\nm = folium.Map(location=[34.0522, -118.2437], zoom_start=11)\n\n# Create a MarkerCluster map\nmarker_cluster = MarkerCluster().add_to(m)\n\n# Add the locations to the map\nfor i in range(len(data)):\n    folium.Marker(\n        location=[data.loc[i, 'Latitude'], data.loc[i, 'Longitude']],\n        popup=data.loc[i, 'Incident Description']\n    ).add_to(marker_cluster)\n\nm.save('/mnt/data/interactive_map.html')\nm",
      'type': 'logs'}]},
   'type': 'code_interpreter'}],
 'type': 'tool_calls'}

null


In [61]:
assistant.id

'asst_IlfxlF2eIwYoQQ4PZE3U5tBw'

In [59]:
thread.id

'thread_7NzJWIKMaEXuQcX5YTc6Ht84'

In [63]:
run = submit_message(assistant.id, thread, "Please print code only")
run = wait_on_run(run, thread)
pretty_print(get_response(thread))

# Messages
user: You are given a data file that describes the emissions of different types of waste from cities.         The data is in the cities_for_map.csv and the data catalogue is in catalogue.csv. Please generate         code that can make an interactive map using 2022 emissions data. Each bubble on the map represents one city. The size of          the bubble is proportional to the total emissions for the city. When users click on the ciy, it will             show the emissions for each type of waste as a pie chart.
assistant: To create an interactive map that shows city bubbles proportional to total emissions and displays a pie chart of waste type emissions upon clicking a city, we will need to use Python with libraries like pandas, plotly, and folium. First, we will load and preprocess the data from cities_for_map.csv, and then create the interactive map. Finally, we will add interactivity to show the pie chart upon clicking a city. Let's start by loading and exploring the data