In [16]:
# Import necessary libraries
import os
import requests
import json
from dotenv import load_dotenv
from openai import AzureOpenAI
from langfuse.decorators import observe
# Load environment variables
load_dotenv()

# Initialize the Azure OpenAI client
client = AzureOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_API_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv('AZURE_OPENAI_VERSION')
)



In [None]:
import time
import datetime

# Function to retrieve the schema from ROAPI
def get_roapi_schema():
    url = "http://roapi-app-arash.dhbjd9bvccbjedea.eastus.azurecontainer.io:8000/api/schema"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print("Error retrieving schema:", response.status_code)
        return None


# Function to get the current date
def get_current_date():
    return datetime.datetime.now().strftime('%Y-%m-%d')

# Function to create an assistant
@observe()
def create_assistant(formatted_date):
    
    assistant_instructions = f"""
Given a user question related to the data in the database, first generate the SQL query that conforms to PostgreSQL SQL dialect. Pay attention to the columns that need to be in double quotes and the ones that should not be. 
Please pay close attention to the following points:
1- 'channel' is an alias to 'device_class', so if the user asks for count of 'channel' you need to do the count of 'device_class'.
2- 'is_live' indicates whether the video is live or not, so use this if you want to calculate on-demand versus live.
3- If you need to compare numbers such as 'duration' or anything else, and the results are all zero, you should not rank them.
4- The format of the date is like '2024-07-03', so you might need to do some formatting to get the date in the right format.
5- If you are asked for analysis over days, months, or time, you might need to look up 'days', 'months', and 'time' columns, and that is not part of 'date' column.
6- For video duration use 'duration' column, not 'video_duration' column.

Below are some examples of the SQL queries:
1- SELECT COUNT(DISTINCT user_id) AS active_user_count FROM bitmovin WHERE date >= to_date(cast(now() AS VARCHAR)) - INTERVAL '200 days' AND user_id IS NOT NULL;
2- SELECT * FROM bitmovin WHERE date >= to_date('{formatted_date}') - INTERVAL '200 days' AND user_id IS NOT NULL LIMIT 10;
3- SELECT "AccountType", "DeviceType", "ApiName", COUNT(*) AS api_usage_count FROM asl GROUP BY "AccountType", "DeviceType", "ApiName" ORDER BY "AccountType", "DeviceType", api_usage_count DESC;
4- SELECT asl."TenantId", asl."DeviceType", bitmovin."browser", COUNT(*) AS usage_count, AVG(bitmovin."page_load_time") AS avg_page_load_time, AVG(bitmovin."startuptime") AS avg_startup_time FROM asl JOIN bitmovin ON asl."TenantId" = bitmovin."tenant" GROUP BY asl."TenantId", asl."DeviceType", bitmovin."browser" ORDER BY usage_count DESC LIMIT 10;

Make sure to make date queries align to today's date {formatted_date}.

Then tell me the SQL query that you will use.
"""

    assistant = client.beta.assistants.create(
        name="SQL Assistant",
        description="An assistant that generates SQL queries for ROAPI.",
        model="gpt-4",  # Replace with your deployed Azure OpenAI model
        tools=[],  # No tools are necessary for SQL generation
        instructions=assistant_instructions
    )
    return assistant


# Function to create a thread
@observe()
def create_thread(user_input, schema):
    # Prepare the initial message with the schema
    initial_message = f"""
Database schema:
{json.dumps(schema, indent=2)}

User request:
{user_input}
"""
    thread = client.beta.threads.create(
        messages=[
            {"role": "user", "content": initial_message}
        ]
    )
    return thread

# Function to run the assistant and process user input
@observe()
def run_thread(thread_id, assistant_id):
    # Start a new run
    run = client.beta.threads.runs.create(
        thread_id=thread_id,
        assistant_id=assistant_id
    )

    # Poll for results
    while run.status not in ["completed", "failed", "cancelled"]:
        time.sleep(1)  # Wait for 1 second before checking the status again
        run = client.beta.threads.runs.retrieve(
            thread_id=thread_id,
            run_id=run.id
        )

    if run.status == "completed":
        # Retrieve the generated SQL query
        messages = client.beta.threads.messages.list(thread_id=thread_id)
        assistant_messages = [m for m in messages.data if m.role == "assistant"]
        if assistant_messages:
            assistant_message = assistant_messages[-1]
            return assistant_message.content
        else:
            print("Error: No response from assistant.")
            return None
    else:
        print(f"Run failed with status: {run.status}")
        return None
    
# Function to execute SQL query against ROAPI
@observe()
def execute_sql_query(sql_query):
    url = "http://roapi-app-arash.dhbjd9bvccbjedea.eastus.azurecontainer.io:8000/api/sql"

    print("SQL Query: ", sql_query)
    response = requests.post(url, data=sql_query, headers={"Content-Type": "application/json"})
    if response.status_code == 200:
        try:
            return response.json()
        except json.JSONDecodeError:
            return response.text
    else:
        print("Error executing SQL query:", response.status_code)
        return None

# Main function
def main():
    # Step 1: Get the schema
    schema = get_roapi_schema()
    if schema is None:
        return

    # Step 2: Create an assistant
    assistant = create_assistant()
    print("Assistant created:", assistant.id)

    # Step 3: User input
    user_input = input("Enter your query request: ")

    # Step 4: Create a thread
    thread = create_thread(user_input=user_input, schema=schema)
    print("Thread created:", thread.id)

    # Step 5: Run the assistant
    sql_query = run_thread(thread_id=thread.id, assistant_id=assistant.id)
    if sql_query:
        print("\nGenerated SQL Query:")
        print(sql_query)

        # Step 6: Execute the SQL query
        result = execute_sql_query(sql_query)
        if result:
            print("\nQuery Result:")
            print(json.dumps(result, indent=2))
        else:
            print("No results returned.")
    else:
        print("Failed to generate a SQL query.")

In [20]:
main()

TypeError: create_assistant() missing 1 required positional argument: 'formatted_date'

In [3]:


# Function to retrieve the schema from ROAPI
def get_roapi_schema():
    url = "http://roapi-app-arash.dhbjd9bvccbjedea.eastus.azurecontainer.io:8000/api/schema"
    response = requests.get(url)
    if response.status_code == 200:
        schema = response.json()
        return schema
    else:
        print("Error retrieving schema:", response.status_code)
        return None

# Function to wait for run completion
def wait_for_run_completion(thread_id, run_id):
    while True:
        run = client.beta.threads.runs.retrieve(thread_id=thread_id, run_id=run_id)
        if run.status in ['completed', 'failed', 'incomplete']:
            return run
        else:
            time.sleep(1)  # Wait before checking again

# Main function to interact with the user
def main():
    # Step 1: Get the schema
    schema = get_roapi_schema()
    if schema is None:
        return

    # Save the schema to a file
    with open("schema.json", "w") as f:
        json.dump(schema, f)

    # Upload the schema file to the Assistant
    schema_file = client.files.create(
        file=open("schema.json", "rb"),
        purpose='assistants'
    )

    # Step 2: Create the Assistant
    assistant = client.beta.assistants.create(
        name="SQL Query Generator",
        instructions="""
You are an assistant that generates SQL queries in PostgreSQL dialect based on user requests.
Use the database schema provided to generate syntactically correct SQL queries that consider the schema.

When providing the SQL query, please enclose it within <SQL></SQL> tags to make it easier to extract.
""",
        model="gpt-4",
        tools=[{"type": "file_search"}],
        tool_resources={
            "file_search": {
                "file_ids": [schema_file.id]
            }
        }
    )

    # Step 3: Get user input
    user_input = input("Please enter your request: ")

    # Create a Thread with the user's message
    thread = client.beta.threads.create(
        messages=[
            {
                "role": "user",
                "content": user_input
            }
        ]
    )

    # Step 4: Run the Assistant on the Thread
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id
    )

    # Wait for the run to complete
    run = wait_for_run_completion(thread.id, run.id)
    if run.status != 'completed':
        print("Run did not complete successfully. Status:", run.status)
        return

    # Step 5: Retrieve the Assistant's response
    messages = client.beta.threads.messages.list(thread_id=thread.id)
    assistant_message = None
    for message in messages.data[::-1]:
        if message.role == 'assistant':
            assistant_message = message
            break

    if assistant_message is None:
        print("No assistant response found.")
        return

    # Extract the content from the assistant's message
    assistant_content = ''.join([item['text']['value'] for item in assistant_message.content if item['type'] == 'text'])

    print("\nAssistant's Response:")
    print(assistant_content)

    # Step 6: Extract SQL query from the assistant's response
    import re
    match = re.search(r'<SQL>(.*?)</SQL>', assistant_content, re.DOTALL)
    if match:
        sql_query = match.group(1).strip()
        print("\nGenerated SQL Query:")
        print(sql_query)
    else:
        print("No SQL query found in the assistant's response.")
        return

    # Step 7: Execute the SQL query against ROAPI
    url = "http://roapi-app-arash.dhbjd9bvccbjedea.eastus.azurecontainer.io:8000/api/sql"
    response = requests.post(url, data=sql_query, headers={"Content-Type": "application/json"})
    if response.status_code == 200:
        try:
            result = response.json()
            print("\nQuery Result:")
            print(json.dumps(result, indent=2))
        except json.JSONDecodeError:
            print("Error decoding JSON response.")
            print(response.text)
    else:
        print("Error executing SQL query:", response.status_code)
        print(response.text)



In [4]:
main()

BadRequestError: Error code: 400 - {'error': {'message': "Unknown parameter: 'tool_resources.file_search.file_ids'.", 'type': 'invalid_request_error', 'param': 'tool_resources.file_search.file_ids', 'code': 'unknown_parameter'}}