In [1]:
import os
import sys
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir,".."))
project_root = os.path.abspath(os.path.join(project_root,"..")) # one level more down
paths_to_add = [project_root,
                os.path.join(project_root,"src","init")]
for path in paths_to_add:
    if path not in sys.path:
        sys.path.append(path)

In [2]:
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
os.environ['ANTHROPIC_API_KEY'] = ANTHROPIC_API_KEY
# connection to db
connection_string = os.getenv('CONNECTION_STRING_DB')

In [3]:
# Imports

import agent
from langchain_core.messages import AIMessage, HumanMessage

graph = agent.graph

orchestrator = agent.orchestrator
run_control_flow = agent.run_control_flow
generate_answer = agent.generate_answer
create_sql_query_or_queries = agent.create_sql_query_or_queries
extract_analytical_intent = agent.extract_analytical_intent

# Import initialization components
from src.init.initialization import (
    llm, llm_fast, create_config, tracer,
    objects_documentation, sql_dialect, connection_string
)

question = 'placeholder'
test_state = {
'objects_documentation':objects_documentation,
'sql_dialect': sql_dialect,
'messages_log':[],
'intermediate_steps' : [],
'analytical_intent': [],
'current_question':question,
'current_sql_queries': [],
'generate_answer_details': {},
'llm_answer': AIMessage(content='')
}

âœ… All terms in synonyms and related_terms exist in key_terms


In [None]:
# start the conversation with the graph
question = 'What is the distribution of assets per household across asset ranges?' 

test_state['current_question'] = question
vector_store = None  # reset vector store
config, thread_id = create_config('Run Agent',True)

result = graph.invoke(test_state, config = config)
display = f'''Analytical intent: {result['analytical_intent']}\n\nSQL query: {result['current_sql_queries']}\n\nGenerate Answer Details: {result['generate_answer_details']}\n\nAnswer: {result['llm_answer'].content}'''
print(display)

In [None]:
# continue the conversation with the graph (followup 1)
question = 'can you say again which one is the best one? sorry i missed it'

test_state['current_question'] = question
vector_store = None  # reset vector store
config, _ = create_config('Run Agent', False, thread_id) # (re-use same thread)
result = graph.invoke(test_state, config)
display = f'''Analytical intent: {result['analytical_intent']}\n\nSQL query: {result['current_sql_queries']}\n\nGenerate Answer Details: {result['generate_answer_details']}\n\nAnswer: {result['llm_answer']}'''
print(display)

In [10]:
import agent
from langchain_core.messages import AIMessage, HumanMessage

orchestrator = agent.orchestrator
run_control_flow = agent.run_control_flow
generate_answer = agent.generate_answer
create_sql_query_or_queries = agent.create_sql_query_or_queries
extract_analytical_intent = agent.extract_analytical_intent

# Import initialization components
from src.init.initialization import  llm, llm_fast, create_config, tracer, objects_documentation, sql_dialect, connection_string

question = 'What is the distribution of assets per household?'

test_state = {
'objects_documentation':objects_documentation,
'sql_dialect': sql_dialect,
'messages_log':[],
'intermediate_steps' : [],
'analytical_intent': [],
'current_question':question,
'current_sql_queries': [],
'generate_answer_details': {},
'llm_answer': AIMessage(content='')
}

#orchestrator(test_state)
#test_state = run_control_flow(test_state) # extract_analytical_intent
#test_state = run_control_flow(test_state) # create sql query + execute sql query
#orchestrator(test_state)
#test_state = run_control_flow(test_state) # generate answer + manage memory
# test_state = generate_answer.invoke({'state':test_state})

In [11]:

from langchain_core.agents import AgentAction
from typing_extensions import Annotated,Literal,Union,TypedDict

llm_provider = 'anthropic'  # Change to 'openai' or 'anthropic' as needed

def extract_msg_content_from_history(messages_log:list):
 ''' from a list of base messages, extract just the content '''
 content = []
 for msg in messages_log:
     content.append(msg.content)
 return "\n".join(content)

class ClearOrAmbiguous(TypedDict):
  ''' conclusion about the analytical intent extraction process '''
  analytical_intent_clearness: Annotated[Literal["Analytical Intent Extracted", "Analytical Intent Ambiguous"],"conclusion about the analytical intent extraction process"] 

class AnalyticalIntents(TypedDict):
  ''' list of analytical intents '''
  analytical_intent: Annotated[Union[list[str], None] ,"natural language descriptions to capture the analytical intents"]

class AmbiguityAnalysis(TypedDict):
  ''' analysis of ambiguous question with explanation and alternatives '''
  ambiguity_explanation: Annotated[str, "brief explanation of what makes the question ambiguous"]
  agent_questions: Annotated[list[str], "2-3 alternative analytical intents as questions"]     

sys_prompt_clear_or_ambiguous = """Decide whether the user question is clear or ambigous based on this specific database schema:
{objects_documentation}.

Conversation history:
"{messages_log}".

User question:
"{question}".

*** The question is CLEAR if ***
- It has a single, obvious analytical approach in terms of underlying source columns, relationships or past conversations.    
  Example: "what is the revenue?" is clear in a database schema that contains just 1 single metric that can answer the question (ex: net_revenue).

- The column and metric naming in the schema clearly points to one dominant method of interpretation. 
  Example: "what is the top client?" is clear in a database schema that contains just 1 single metric that can answer the question (ex: sales_amount). 

- You can apply reasonable assumptions. Examples:
  No specific time periods indicated -> assume a recent period -> CLEAR.
  No level of details specified -> use highest aggregation level -> CLEAR.

- You can deduct the analytical intent from the conversation history.

*** The question is AMBIGUOUS if ***
- Different source columns would give different insights.     

- Different metrics could answer the same question:
  Example: "What is the top client?" is ambigous in a database schema that contains multiple metrics that can answer the question (highest value of sales / highest number of sales). 

Response format:
If CLEAR -> "Analytical Intent Extracted".
If AMBIGUOUS -> "Analytical Intent Ambiguous". 
"""

sys_prompt_clear = """Refine technically the user ask for a sql developer with access to the following database schema:
{objects_documentation}.

Conversation history:
"{messages_log}".

Last user prompt:
"{question}".

Important considerations about creating analytical intents:
    - The analytical intent will be used to create a single sql query.
    - Write it in 1 sentence.
    - Mention just the column names, tables names, grouping levels, aggregation functions (preffered if it doesn't restrict insights) and filters from the database schema.
    - If the user ask is exploratory (ex: "What can you tell me about the dataset?"), create 3-5 analytical intents.
    - If the user ask is non-exploratory, create only one analytical intent.
    - If the user asks for statistical analysis between variables (ex correlation) do not compute the statistical metrics, instead just show a simple side by side or group summary.

Important considerations about time based analysis:
  - If the source columns are from tables showing evolutions of metrics over time, clearly specify the time range to apply the analysis on:
    Example: If the question does not specify a time range, specify a recent period like last 12 months, last 3 months.
  - Use explicit date filters instead of relative expressions like "last 12 months". 
  - Derive actual date ranges from the database schema under "Important considerations about dates available".
  - Group the specified period in time-based groups (monthly, quarterly) and compare first with last group.

Important considerations about complex, multi-steps analytical intents:
- An analytical query is multi-step if it requires sequential data gathering and statistical analysis,
  where each search builds upon previous results to examine relationships, correlations, or comparative patterns between variables.
- Break it down into sequential steps where each represents a distinct analytical intent:
  Template: "Step 1: <analytical intent 1>. Step 2: <analytical intent 2>. Step 3: <analytical intent 3>"
  """

sys_prompt_ambiguous = """
The latest user question is ambiguous based on the following database schema:
{objects_documentation}.

Here is the conversation history with the user:
"{messages_log}".

Latest user message:
"{question}".

Step 1: Identify what makes the question ambiguous. The question is ambiguous if:

- Different source columns would give substantially different insights:
  Example: pre-aggregated vs computed metrics with different business logic.

- Multiple fundamentally different metrics could answer the same question:
  Example: "What is the top client?" is ambiguous in a database schema that contains multiple metrics that can answer the question (highest value of sales / highest number of sales).

- Different columns with the same underlying source data (check database schema) do NOT create ambiguity.

Step 2: Create maximum 3 alternatives of analytical intents to choose from.
    - Do not include redundant intents, be focused.
    - Each analytical intent is for creating one single sql query.
    - Write each analytical intent using 1 sentence.
    - Mention specific column names, tables names, aggregation functions and filters from the database schema.
    - Mention only the useful info for creating sql queries.

Step 3: Create a brief explanation in this format:
  1. One sentence explaining the ambiguity
  2. Present the 2-3 alternatives as clear options for the user to choose from

Use simple, non-technical language. Be concise.
"""  

prompt_clear_or_ambiguous = create_prompt_template('system', sys_prompt_clear_or_ambiguous)
chain_1= prompt_clear_or_ambiguous | llm.with_structured_output(ClearOrAmbiguous)  

prompt_clear = create_prompt_template('system', sys_prompt_clear)
chain_2= prompt_clear | llm.with_structured_output(AnalyticalIntents)

prompt_ambiguous = create_prompt_template('system', sys_prompt_ambiguous)
chain_3= prompt_ambiguous | llm.with_structured_output(AmbiguityAnalysis)

# Prepare common input data
input_data = {
      'objects_documentation': test_state['objects_documentation'], 
      'question': test_state['current_question'], 
      'messages_log': extract_msg_content_from_history(test_state['messages_log'])
 }

# determine if clear or ambiguous
result_1 = chain_1.invoke(input_data)

# Based on result, invoke appropriate chain
if result_1['analytical_intent_clearness'] == "Analytical Intent Extracted":
      # create analytical intents
      result_2 = chain_2.invoke(input_data)
      # next tool to call 
      tool_name = 'create_sql_query_or_queries' 
      output = {
          'scenario': 'A',
          'analytical_intent': result_2['analytical_intent'],
          'agent_questions': None
      }
elif result_1['analytical_intent_clearness'] == "Analytical Intent Ambiguous":
       # create ambiguity analysis (combines both analytical intents and explanation)
       result_3 = chain_3.invoke(input_data)
       # next tool to call
       tool_name = 'generate_answer'
       output = {
          'scenario': 'D',
          'analytical_intent': result_3['agent_questions'],
          'agent_questions': result_3['agent_questions'] }

       # store in generate_answer_details
       test_state['generate_answer_details']['ambiguity_explanation'] = result_3['ambiguity_explanation']
       test_state['generate_answer_details']['agent_questions'] = result_3['agent_questions']

# update the state
test_state['scenario'] = output['scenario']
test_state['generate_answer_details']['agent_questions'] = output['agent_questions']
test_state['analytical_intent'] = output['analytical_intent']

# control flow
action = AgentAction(tool='extract_analytical_intent', tool_input='',log='tool ran successfully')
test_state['intermediate_steps'].append(action)
test_state['intermediate_steps'].append(AgentAction(tool=tool_name, tool_input='',log=''))    

NameError: name 'llm_provider' is not defined