# Azure OpenAI - AlphaCentauri Project
## Step 1: Setting up your Azure & Langchain
based on https://learn.deeplearning.ai/courses/building-your-own-database-agent

In [1]:
import os
import pandas as pd
from IPython.display import Markdown, HTML, display
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv(override=True)


True

In [2]:

from openai import AzureOpenAI

# Azure OpenAI Configuration (CORRECT endpoint from Azure Portal)
#endpoint = "https://js-alphacentauri-resource.cognitiveservices.azure.com/"
endpoint ="https://js-alphacentauri-resource.openai.azure.com/"
deployment = "gpt-4.1-mini"
v_model = "gpt-4.1-mini"
api_version = "2025-03-01-preview"  # Updated to latest API version for Responses API support

# Get API key from environment
subscription_key = os.getenv("AZURE_OPENAI_API_KEY")

# Create Azure OpenAI client
client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint,
    api_key=subscription_key,
)

print("‚úÖ Azure OpenAI client configured")
print(f"Endpoint: {endpoint}")
print(f"Deployment: {deployment}")
print(f"API Version: {api_version}")


‚úÖ Azure OpenAI client configured
Endpoint: https://js-alphacentauri-resource.openai.azure.com/
Deployment: gpt-4.1-mini
API Version: 2025-03-01-preview


In [3]:
# Test the connection
response = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {
            "role": "user",
            "content": "Say hello and tell me you're working! Tell me not to worry, everything will be fine.",
        }
    ],
    model=deployment
)

print(response.choices[0].message.content)


Hello! I'm working right now, so no need to worry‚Äîeverything will be fine! If you need any help, just let me know.


### LangChain Integration


In [4]:
from langchain_openai import AzureChatOpenAI
from langchain.schema import HumanMessage, SystemMessage

# Create LangChain model
model = AzureChatOpenAI(
    openai_api_version=api_version,
    azure_deployment=deployment,
    azure_endpoint=endpoint,
    api_key=subscription_key
)

# Test with LangChain
messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="Translate 'Hello, how are you?' to French and Spanish.")
]

response = model.invoke(messages)
print(response.content)


ModuleNotFoundError: No module named 'langchain.schema'

### Streaming Response Example


In [None]:
v_messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content="Translate this sentence from English "
    "to French and Spanish. I like red cars and "
    "blue houses, but my dog is yellow.")
]   

## alternative way to stream the response
    # messages=[
    #     {
    #         "role": "system",
    #         "content": "You are a helpful assistant.",
    #     },
    #     {
    #         "role": "user",
    #         "content": "Translate this sentence from English "
    # "to French and Spanish. I like red cars and "
    # "blue houses, but my dog is yellow.",
    #     }
    # ],

# Stream the output for better user experience
response = client.chat.completions.create(
    stream=True,
    messages=[
        {
            "role": "system",
            "content": "You are a helpful assistant.",
        },
        {
            "role": "user",
            "content": "Translate this sentence from English "
    "to French, West Vlaams and Spanish. I like red cars and "
    "blue houses, but my dog is yellow.",
        }
    ],
    model=deployment,
)

print("Streaming response:")
for update in response:
    if update.choices:
        print(update.choices[0].delta.content or "", end="")


### üîç Debug: Verify Deployment Name


In [None]:
import json
response2 = model.invoke((v_messages))
# Pretty print the response as JSON
print(json.dumps({
    "content": response2.content,
    "usage": response2.usage_metadata,
    "model": response2.response_metadata.get('model_name', 'N/A')
}, indent=2))
print (response2)

## Step 2: Interacting with CSV Data

In [None]:
# Create LangChain model
model = AzureChatOpenAI(
    openai_api_version=api_version,
    azure_deployment=deployment,
    azure_endpoint=endpoint,
    api_key=subscription_key
)


### Load the dataset
- baseline model -->
- could use 
  a) fine tuning for sql tasks 
  b) RAG (database as a source)
    - use Langchain agents to connect to SQL Database or CSV file
    - via Azure OpenAI Assistants API (function calling + code interpreter) (stateful management + short term memory)
    - via Azure OpenAI Fucntion Calling: to perform tasks based on your questions
    - Vai Native Database API

In [None]:
#read in the data from the csv file
#df = pd.read_csv("./data/synthetic_sales_data.csv").fillna(value = 0)
df = pd.read_csv("./data/synthetic_sales_data.csv", sep=";").fillna(value=0)

In [None]:
# Check if the file is read and display the first few rows with headers
print("First 5 rows of the dataset:")
print("=" * 80)
display(df.head())

# Alternative: use this if display() doesn't work
# print(df.head().to_string())

In [None]:
# Show detailed information about the dataset structure
print("üìä Dataset Information:")
print("-" * 80)
print(f"Total Rows: {len(df)}")
print(f"Total Columns: {len(df.columns)}")
print("\nColumn Names and Data Types:")
print("-" * 80)
for i, (col, dtype) in enumerate(zip(df.columns, df.dtypes), 1):
    print(f"{i:2d}. {col:30s} | Type: {dtype}")
print("-" * 80)


#### <span style="color: red">SOME ATTENTION REQUIRED</span> 
create_pandas_dataframe_agent() maak een ‚Äúagent‚Äù die:
- Je dataframe (df) kent
- Je prompt interpreteert (‚ÄúHow many rows are there?‚Äù)
- Daarvoor zelf Python-code schrijft
- En die code uitvoert in een ‚ÄúPython REPL‚Äù (read‚Äìeval‚Äìprint-loop) in jouw proces.
De ‚ÄúREPL‚Äù is in feite een mini-Python-console binnen jouw proces.
En de LLM (of een kwaadaardige prompt) kan daarin eender welke code uitvoeren.
Niet alleen veilige dingen zoals len(df), maar ook bijvoorbeeld:

--> gezien we controle hebben over de data hier kunnen we **allow_dangerous_code=True** toevoegen, MAAR GEBRUIK DIT NIET IN PRODUCTIE!


In [None]:
from langchain.agents.agent_types import AgentType
from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent

agent = create_pandas_dataframe_agent(
    llm=model,
    df=df,
    verbose=True, 
    allow_dangerous_code=True,
    max_iterations=30,  # Increase from default 15
    max_execution_time=300  # 5 minutes timeout
)

agent.invoke("how many rows are there?")

In [None]:
# agent.invoke("What can you tell me about the data?")
# agent.invoke("List me all the diffrent productlines and their individual summed uprevenue")
agent.invoke("List per region which productline sold most units in total and how many units were sold.")


In [None]:
CSV_PROMPT_PREFIX = """
First set the pandas display options to show all the columns,
get the column names, then answer the question.
"""

CSV_PROMPT_SUFFIX = """
- **ALWAYS** before giving the Final Answer, try another method.
Then reflect on the answers of the two methods you did and ask yourself
if it answers correctly the original question.
If you are not sure, try another method.
- If the methods tried do not give the same result,reflect and
try again until you have two methods that have the same result.
- If you still cannot arrive to a consistent result, say that
you are not sure of the answer.
- If you are sure of the correct answer, create a beautiful
and thorough response using Markdown.
- **DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE,
ONLY USE THE RESULTS OF THE CALCULATIONS YOU HAVE DONE**.
- **ALWAYS**, as part of your "Final Answer", explain how you got
to the answer on a section that starts with: "\n\nExplanation:\n".
In the explanation, mention the column names that you used to get
to the final answer.
"""

#QUESTION = "How may patients were hospitalized during July 2020" 
#"in Texas, and nationwide as the total of all states?"
SIMPLE_PROMPT = """
How many patients were hospitalized during July 2020 in Texas, and nationwide as the total of all states?
Use the hospitalizedIncrease column.

Instructions:
1. Filter the data for July 2020 (dates between 2020-07-01 and 2020-07-31)
2. Sum the hospitalizedIncrease column for Texas
3. Sum the hospitalizedIncrease column for all states
4. Provide both answers with a clear explanation
"""

SIMPLE_PROMPT_SALES = """
How many toys units are sold in the month of July, regardless the year?
"""

QUESTION = "How may patients were hospitalized during July 2020" \
"in Texas, and nationwide as the total of all states?" \
"Use the hospitalizedIncrease column" 


output = agent.invoke(CSV_PROMPT_PREFIX + SIMPLE_PROMPT_SALES + CSV_PROMPT_SUFFIX)

In [None]:
# Alternative: Use the complex prompt with reflection (now with increased iteration limit)
# Uncomment the line below to try the more thorough approach with validation
# agent.invoke(CSV_PROMPT_PREFIX + QUESTION + CSV_PROMPT_SUFFIX)


In [None]:
# Access the output from the dictionary
print(output['output'])

# Or display it in markdown for better formatting
# display(Markdown(output['output']))

## Step 3: Connecting to a SQL Database

In [None]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase


In [None]:
import urllib.request

# Create data directory if it doesn't exist
os.makedirs("data", exist_ok=True)

# Download the file using Python (works on Windows, Mac, Linux)
# url = "https://covidtracking.com/data/download/all-states-history.csv"
# file_path = "./data/all-states-history.csv"

# print(f"Downloading from {url}...")
# urllib.request.urlretrieve(url, file_path)
# print(f"‚úÖ File downloaded to {file_path}")

# # Load the data
# df = pd.read_csv(file_path).fillna(value=0)
# print(f"‚úÖ Loaded {len(df)} rows of data")
df.head()

In [None]:
from sqlalchemy import create_engine

# Path to your SQLite database file
# database_file_path = "./data/test.db"
database_file_path = "./data/sales_db.db"

# Create an engine to connect to the SQLite database
# SQLite only requires the path to the database file
engine = create_engine(f'sqlite:///{database_file_path}')
# file_url = "./data/all-states-history.csv"
# df = pd.read_csv(file_url).fillna(value = 0)
# df.to_sql(
#     'all_states_history',
#     con=engine,
#     if_exists='replace',
#     index=False
# )
df.to_sql(
    'sales_db',
    con=engine,
    if_exists='replace',
    index=False
)



In [None]:
MSSQL_AGENT_PREFIX = """

You are an agent designed to interact with a SQL database.
## Instructions:
- Given an input question, create a syntactically correct {dialect} query
to run, then look at the results of the query and return the answer.
- Unless the user specifies a specific number of examples they wish to
obtain, **ALWAYS** limit your query to at most {top_k} results.
- You can order the results by a relevant column to return the most
interesting examples in the database.
- Never query for all the columns from a specific table, only ask for
the relevant columns given the question.
- You have access to tools for interacting with the database.
- You MUST double check your query before executing it.If you get an error
while executing a query,rewrite the query and try again.
- DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.)
to the database.
- DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE, ONLY USE THE RESULTS
OF THE CALCULATIONS YOU HAVE DONE.
- Your response should be in Markdown. However, **when running  a SQL Query
in "Action Input", do not include the markdown backticks**.
Those are only for formatting the response, not for executing the command.
- ALWAYS, as part of your final answer, explain how you got to the answer
on a section that starts with: "Explanation:". Include the SQL query as
part of the explanation section.
- If the question does not seem related to the database, just return
"I don\'t know" as the answer.
- Only use the below tools. Only use the information returned by the
below tools to construct your query and final answer.
- Do not make up table names, only use the tables returned by any of the
tools below.

## Tools:

"""

In [None]:
MSSQL_AGENT_FORMAT_INSTRUCTIONS = """

## Use the following format:

Question: the input question you must answer.
Thought: you should always think about what to do.
Action: the action to take, should be one of [{tool_names}].
Action Input: the input to the action.
Observation: the result of the action.
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer.
Final Answer: the final answer to the original input question.

Example of Final Answer:
<=== Beginning of example

Action: query_sql_db
Action Input: 
SELECT TOP (10) [death]
FROM covidtracking 
WHERE state = 'TX' AND date LIKE '2020%'

Observation:
[(27437.0,), (27088.0,), (26762.0,), (26521.0,), (26472.0,), (26421.0,), (26408.0,)]
Thought:I now know the final answer
Final Answer: There were 27437 people who died of covid in Texas in 2020.

Explanation:
I queried the `covidtracking` table for the `death` column where the state
is 'TX' and the date starts with '2020'. The query returned a list of tuples
with the number of deaths for each day in 2020. To answer the question,
I took the sum of all the deaths in the list, which is 27437.
I used the following query

```sql
SELECT [death] FROM covidtracking WHERE state = 'TX' AND date LIKE '2020%'"
```
===> End of Example

"""

In [None]:
# Create LangChain model
model2 = AzureChatOpenAI(
    openai_api_version=api_version,
    azure_deployment=deployment,
    azure_endpoint=endpoint,
    api_key=subscription_key,
    temperature = 0,
    max_tokens=2000  # Increased to allow proper agent format responses with SQL queries and explanations
)
db = SQLDatabase.from_uri(f'sqlite:///{database_file_path}')
toolkit = SQLDatabaseToolkit(db=db, llm=model2)

In [None]:
QUESTION = """How much money did we make?
"""

agent_executor_SQL = create_sql_agent(
    prefix=MSSQL_AGENT_PREFIX,
    format_instructions = MSSQL_AGENT_FORMAT_INSTRUCTIONS,
    llm=model2,
    toolkit=toolkit,
    top_k=30,
    verbose=True,
    handle_parsing_errors=True   # don't crash on parsing errors
)

In [None]:
output_2 = agent_executor_SQL.invoke({"input": QUESTION})

In [None]:

# Or display it in markdown for better formatting
display(Markdown(output_2['input']))
display(Markdown(output_2['output']))

In [None]:
QUESTION = """Which product line had the highest average profit margin in the West region during 2023, and how much higher was it compared to the lowest one?
"""

agent_executor_SQL = create_sql_agent(
    prefix=MSSQL_AGENT_PREFIX,
    format_instructions = MSSQL_AGENT_FORMAT_INSTRUCTIONS,
    llm=model2,
    toolkit=toolkit,
    top_k=30,
    verbose=True
)

In [None]:
output_challenge = agent_executor_SQL.invoke({"input": QUESTION})

In [None]:
display(Markdown(output_challenge['input']))
display(Markdown(output_challenge['output']))

## STEP 4. Azure OpenAI Function Calling
What is the additional values
- provide specific isntructions for finding information 
- prioritzie queries for precise reulst and desired formats
- more control 

In [None]:

import json

## example of fucntion 
def get_current_weather(location, unit="Celsius"):
    """Get the current weather in a given location. 
    The default unit when not specified is Celsius"""
    if "merelbeke" in location.lower():
        return json.dumps(
            {"location": "Merelbeke", "country":"Belgium", "temperature": "20", "unit": unit}
        )
    elif "antwerpen" in location.lower():
        return json.dumps(
            {"location": "Antwerpen", "country":"Belgium", "temperature": "25", "unit": unit}
        )
    elif "las vegas" in location.lower():
        return json.dumps(
            {"location": "Las Vegas", "country":"USA", "temperature": "35", "unit": unit}
        )
    else:
        return json.dumps(
            {"location": location, "country":"unknown", "temperature": "unknown", "unit":  unit}
        )

get_current_weather("Merelbeke")

In [None]:

# user prompt
weather_messages = [
    {"role": "user",
     "content": """What's the weather like in Merelbeke,
                   Antwerpen, and Las Vegass?"""
    }
]

#tool definition

tools = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": """Get the current weather in a given
                              location.The default unit when not
                              specified is Celsius""",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": """The city and state,
                                        e.g. San Francisco, CA""",
                    },
                    "unit": {
                        "type": "string",
                        "default":"Celsius",
                        "enum": [ "Fahrenheit", "Celsius"],
                        "description": """The messuring unit for
                                          the temperature.
                                          If not explicitly specified
                                          the default unit is 
                                          Celsius"""
                    },
                },
                "required": ["location"],
            },
        },
    }
]

In [None]:
#defining the OpenAI clietn class
client = AzureOpenAI(
    api_version=api_version,
    azure_endpoint=endpoint,
    api_key=subscription_key,
)


#just call the class
response = client.chat.completions.create(
    model=v_model,
    messages=weather_messages,
    tools=tools,
    tool_choice="auto", 
)

response_message = response.choices[0].message
print ("Response message: \n" , response_message, "\n\n" )

tool_calls = response_message.tool_calls

print ("TOOLS CALLS: \n" , tool_calls , "\n\n" )

available_functions = {
    "get_current_weather": get_current_weather,
} 

answers = []

if tool_calls:
   
    for tool_call in tool_calls:
        function_name = tool_call.function.name
        function_to_call = available_functions[function_name]
        function_args = json.loads(tool_call.function.arguments)
        function_response = function_to_call(
            location=function_args.get("location"),
            unit=function_args.get("unit", "Celsius")  ## you can see from the function=Fuction that the only argument is location
        )
        answers.append(
            {
                "tool_call_id": tool_call.id,
                "role": "tool",
                "name": function_name,
                "content": function_response,
            }
        )  
    print ("Answers: \n" , answers , "\n\n" ) 
    
   
    def print_arguments(obj):
        "Recursively find and print JSON arguments or content with key/value pairs."
        if isinstance(obj, list):
            for item in obj:
                print_arguments(item)
        elif isinstance(obj, dict):
            # if it looks like a serialized JSON string of arguments/content, try parsing it
            for key, val in obj.items():
                if key in ("arguments", "content") and isinstance(val, str) and val.strip().startswith("{"):
                    try:
                        parsed = json.loads(val)
                        for k, v in parsed.items():
                            if k == "unit":
                                print(f"{k}: {v} \n")
                                v = 1
                            else:
                                print(f"{k}: {v}")
                    except Exception:
                        pass
                else:
                    print_arguments(val)

print_arguments(answers)

In [None]:
import numpy as np
from sqlalchemy import text

def Total_KPI_01(reg, prodline):
    try:
        query = f"""
        SELECT region, productline,  KPI_01
        FROM sales_db
        WHERE region = '{reg}' AND 
              productline = '{prodline}'
        group by region, productline;
        """
        query = text(query)

        with engine.connect() as connection:
            result = pd.read_sql_query(query, connection)
            
        if not result.empty:
            return result.to_dict('records')
        else:
            return np.nan
        return result
    except Exception as e:
        print(e)
        return np.nan
    
    
def sum_profit_cost (reg, prodline):
    try:
        query = f"""
        SELECT region, productline, sum(profit), sum(cost)
        FROM sales_db
        where region = '{reg}'  AND
            productline = '{prodline}'
        group by region, productline;
        """
        query = text(query)

        with engine.connect() as connection:
            result = pd.read_sql_query(query, connection)
            
        if not result.empty:
            return result.to_dict('records')
        else:
            return np.nan
        return result
    except Exception as e:
        print(e)
        return np.nan

In [None]:
print(Total_KPI_01("East","Toys"))

print(sum_profit_cost("East","Toys"))

In [None]:
sql_messages = [
    {"role": "user",
     "content": """ How much what the summed up KPI_01 for Region East and productline Toys?"""
    },
    {"role": "user",
     "content": """ What is the sum of the profits and costs for Region East and productline Clothing"""
    },
    
]

In [None]:
tools_sql = [
    {
        "type": "function",
        "function": {
            "name": "get_total_kpi_01_for_region_productline",
            "description": """Retrieves the sum of the KPI_01 for a specified region and specified productline.""",
            "parameters": {
                "type": "object",
                "properties": {
                    "reg": {
                        "type": "string",
                        "description": """The name of the region
                                          (e.g., 'East', 'West')."""
                    },
                    "prodline": {
                        "type": "string",
                        "description": """The name of the productline 
                                          (e.g. 'Toys','Clothing')."""
                    }
                },
                "required": ["reg", "prodline"]
            }
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_sum_profit_cost_for_region_productline",
            "description": """Retrieves the sum of the profits and the costs for a specified region and specified productline
                                """,
            "parameters": {
                "type": "object",
                "properties": {
                    "reg": {
                        "type": "string",
                        "description": """The name of the region
                                          (e.g., 'East', 'West')."""
                    },
                    "prodline": {
                        "type": "string",
                        "description": """The name of the productline 
                                          (e.g. 'Toys','Clothing')."""
                    }
                },
                "required": ["reg", "prodline"]
            }
        }
    }
]

In [None]:
#just call the class
response = client.chat.completions.create(
    model=v_model,
    messages=sql_messages,
    tools=tools_sql,
    tool_choice="auto", 
)

response_sql_message = response.choices[0].message
tool_calls = response_sql_message.tool_calls

print (tool_calls)
sql_answers = []

available_functions = {
    "get_total_kpi_01_for_region_productline": Total_KPI_01,
    "get_sum_profit_cost_for_region_productline":sum_profit_cost
}  

if tool_calls:
   
    for tool_call in tool_calls:
        function_name = tool_call.function.name
        function_to_call = available_functions[function_name]
        function_args = json.loads(tool_call.function.arguments)
        function_response = function_to_call(
            reg=function_args.get("reg"),
            prodline=function_args.get("prodline"),
        )
        sql_answers.append(
            {
                "tool_call_id": tool_call.id,
                "role": "tool",
                "name": function_name,
                "content": str(function_response),
            }
        ) 
    print ("Answers: \n" , sql_answers , "\n\n" ) 
    
import ast

for ans in sql_answers:
    content = ast.literal_eval(ans['content'])
    
    # handle both list and dict formats safely
    data = content[0] if isinstance(content, list) else content

    print(f"- Request: {ans['name']}")
    print(f"- Region: {data.get('Region', '')}")
    print(f"- ProdLine: {data.get('ProductLine', '')}")
    print(f"- KPI_01: {data.get('KPI_01', '').replace(',', '.')}")
    print(f"- Sum of profit: {data.get('sum(profit)', '')}")
    print(f"- Sum of Cost: {data.get('sum(cost)', '')}")

In [None]:
second_response = client.chat.completions.create(
            model=v_model,
            messages=sql_messages,
        )
print (second_response)

## Step 5: Leveraging Assistants API for SQL Database
### Assistants API
- behoudt conversatie context - statefull vs de chatcomplete niet
- stateful, historiek van interacties
- supporteert functions / tool calling
- Depreciation note from OpenAI --> responses API (mid 2026)
### Code Interpreter
- maakt het mogelijk voor de Assistants API om python code te genereren, uit te voeren en veranderen

Eigenschap	|  Assistants API  |  Responses API

Stateful (threads & runs)  |  ‚úÖ Ja  |  ‚ùå Nee

Contextbeheer  |  Server-side bij OpenAI  |  Client-side door jou

Flexibiliteit  |  Minder  |  Veel groter

Complexiteit  |  Hoog  |  Lager



In [None]:
import Helper
from Helper import get_positive_cases_for_state_on_date
from Helper import get_hospitalized_increase_for_state_on_date

In [None]:
# I) Create assistant
assistant = client.beta.assistants.create(
  instructions="""You are an assistant answering questions 
                  about a Covid dataset.""",
  model=v_model,  # Use the deployment name configured at the top (gpt-4.1-mini)
  tools=Helper.tools_sql)

# response = client.responses.create(
#     model=v_model,
#     instructions="You are an assistant answering questions about a Covid dataset.",
#     tools=Helper.tools_sql,
#     input=[
#         {"role": "user", "content": "What is the average infection rate by region?"}
#     ]
# )

# II) Create thread
thread = client.beta.threads.create()
print(thread)

In [None]:
# III) Add message
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="""how many hospitalized people we had in Alaska
               the 2021-03-05?"""
)
print(message)

In [None]:
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

print(messages.model_dump_json(indent=2))

In [None]:
# IV) Run assistant on thread

run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

In [None]:
import time
from IPython.display import clear_output

start_time = time.time()

status = run.status

while status not in ["completed", "cancelled", "expired", "failed"]:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,run_id=run.id
    )
    print("Elapsed time: {} minutes {} seconds".format(
        int((time.time() - start_time) // 60),
        int((time.time() - start_time) % 60))
         )
    status = run.status
    print(f'Status: {status}')
    if (status=="requires_action"):
        available_functions = {
            "get_positive_cases_for_state_on_date": get_positive_cases_for_state_on_date,
            "get_hospitalized_increase_for_state_on_date":get_hospitalized_increase_for_state_on_date
        }

        tool_outputs = []
        for tool_call in run.required_action.submit_tool_outputs.tool_calls:
            function_name = tool_call.function.name
            function_to_call = available_functions[function_name]
            function_args = json.loads(tool_call.function.arguments)
            function_response = function_to_call(
                state_abbr=function_args.get("state_abbr"),
                specific_date=function_args.get("specific_date"),
            )
            print(function_response)
            print(tool_call.id)
            tool_outputs.append(
                { "tool_call_id": tool_call.id,
                 "output": str(function_response)
                }
            )

        run = client.beta.threads.runs.submit_tool_outputs(
          thread_id=thread.id,
          run_id=run.id,
          tool_outputs = tool_outputs
        )


messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

print(messages)

In [None]:
# Check the error details
print("Run Status:", run.status)
print("\nFull Run Object:")
print(run)

if hasattr(run, 'last_error') and run.last_error:
    print("\n‚ùå ERROR DETAILS:")
    print(f"Error Code: {run.last_error.code}")
    print(f"Error Message: {run.last_error.message}")


In [None]:
print(messages.model_dump_json(indent=2))

###Code Interpreter

In [None]:
file = client.files.create(
  file=open("./data/all-states-history.csv", "rb"),
  purpose='assistants'
)
assistant = client.beta.assistants.create(
  instructions="""You are an assitant answering questions about
                  a Covid dataset.""",
  model=v_model, 
  tool_resources={
        "code_interpreter": {"file_ids": [file.id]}
    } 
  )## new syntx
thread = client.beta.threads.create()
print(thread)
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="""how many hospitalized people we had in Alaska
               the 2021-03-05?"""
)
print(message)
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

In [None]:
status = run.status
start_time = time.time()
while status not in ["completed", "cancelled", "expired", "failed"]:
    time.sleep(5)
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,
        run_id=run.id
    )
    print("Elapsed time: {} minutes {} seconds".format(
        int((time.time() - start_time) // 60),
        int((time.time() - start_time) % 60))
         )
    status = run.status
    print(f'Status: {status}')
    clear_output(wait=True)


messages = client.beta.threads.messages.list(
  thread_id=thread.id
)

print(messages.model_dump_json(indent=2))