In [1]:
import os
import sys
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir,".."))
project_root = os.path.abspath(os.path.join(project_root,"..")) # one level more down
paths_to_add = [project_root,
                os.path.join(project_root,"src","init")]
for path in paths_to_add:
    if path not in sys.path:
        sys.path.append(path)

In [2]:
ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
os.environ['ANTHROPIC_API_KEY'] = ANTHROPIC_API_KEY
# connection to db
connection_string = os.getenv('CONNECTION_STRING_DB')

In [6]:
# Imports

import agent
from langchain_core.messages import AIMessage, HumanMessage

graph = agent.graph

orchestrator = agent.orchestrator
run_control_flow = agent.run_control_flow
generate_answer = agent.generate_answer
create_sql_query_or_queries = agent.create_sql_query_or_queries
extract_analytical_intent = agent.extract_analytical_intent

# Import initialization components
from src.init.initialization import (
    llm, llm_fast, create_config, tracer,
    objects_documentation, sql_dialect, connection_string
)

question = 'placeholder'
test_state = {
'objects_documentation':objects_documentation,
'sql_dialect': sql_dialect,
'messages_log':[],
'intermediate_steps' : [],
'analytical_intent': [],
'current_question':question,
'current_sql_queries': [],
'generate_answer_details': {},
'llm_answer': AIMessage(content='')
}

In [7]:
# start the conversation with the graph
question = 'What is the distribution of assets per household across asset ranges?' 

test_state['current_question'] = question
vector_store = None  # reset vector store
config, thread_id = create_config('Run Agent',True)

result = graph.invoke(test_state, config = config)
display = f'''Analytical intent: {result['analytical_intent']}\n\nSQL query: {result['current_sql_queries']}\n\nGenerate Answer Details: {result['generate_answer_details']}\n\nAnswer: {result['llm_answer'].content}'''
print(display)

Analytical intent: ["Aggregate household_assets from fact_household_monthly table grouped by asset_range_bucket for the most recent snapshot_date (2025-09-30), filtering for active households where household.household_status = 'Active' and household.to_date = '9999-12-31', and calculate count of households and sum of household_assets for each asset_range_bucket"]

SQL query: [{'query': "SELECT fhm.asset_range_bucket, COUNT(DISTINCT fhm.household_key) AS household_count, SUM(fhm.household_assets) AS total_household_assets FROM public.fact_household_monthly fhm INNER JOIN public.household h ON fhm.household_key = h.household_key WHERE fhm.snapshot_date = '2025-09-30' AND h.household_status = 'Active' AND h.to_date = '9999-12-31' GROUP BY fhm.asset_range_bucket ORDER BY fhm.asset_range_bucket", 'explanation': ['Filtered to snapshot date 2025-09-30', 'Excluded inactive households', 'public.fact_account_monthly, column snapshot_date: account dates between 2024-09-30 and 2025-09-30', 'public

In [None]:
# continue the conversation with the graph (followup 1)
question = 'can you say again which one is the best one? sorry i missed it'

test_state['current_question'] = question
vector_store = None  # reset vector store
config, _ = create_config('Run Agent', False, thread_id) # (re-use same thread)
result = graph.invoke(test_state, config)
display = f'''Analytical intent: {result['analytical_intent']}\n\nSQL query: {result['current_sql_queries']}\n\nGenerate Answer Details: {result['generate_answer_details']}\n\nAnswer: {result['llm_answer']}'''
print(display)

In [6]:
import agent
from langchain_core.messages import AIMessage, HumanMessage

orchestrator = agent.orchestrator
run_control_flow = agent.run_control_flow
generate_answer = agent.generate_answer
create_sql_query_or_queries = agent.create_sql_query_or_queries
extract_analytical_intent = agent.extract_analytical_intent

# Import initialization components
from src.init.initialization import  llm, llm_fast, create_config, tracer, objects_documentation, sql_dialect, connection_string

question = 'What is the distribution of assets per household?'

test_state = {
'objects_documentation':objects_documentation,
'sql_dialect': sql_dialect,
'messages_log':[],
'intermediate_steps' : [],
'analytical_intent': [],
'current_question':question,
'current_sql_queries': [],
'generate_answer_details': {},
'llm_answer': AIMessage(content='')
}

#orchestrator(test_state)
#test_state = run_control_flow(test_state) # extract_analytical_intent
#test_state = run_control_flow(test_state) # create sql query + execute sql query
#orchestrator(test_state)
#test_state = run_control_flow(test_state) # generate answer + manage memory
# test_state = generate_answer.invoke({'state':test_state})

In [4]:
from agent import create_prompt_template
from typing_extensions import TypedDict, Annotated, Literal

def extract_msg_content_from_history(messages_log:list):
 ''' from a list of base messages, extract just the content '''
 content = []
 for msg in messages_log:
     content.append(msg.content)
 return "\n".join(content)


def format_sql_query_results_for_prompt (sql_queries : list[dict]) -> str:
    """ based on the current_sql_queries, creates a string like so: Insight 1: ... Raw Result of insight 1: ... Insight 2 ... etc """
    formatted_queries = []
    for query_index,q in enumerate(sql_queries):
        block = f"Insight {query_index+1}:\n{q['insight']}\n\nRaw Result of insight {query_index+1}:\n{q['result']}"
        formatted_queries.append(block)
    return "\n\n".join(formatted_queries)

class ScenarioBC(TypedDict):
  ''' indication of the next step to be performed by the agent '''
  next_step: Annotated[Literal["B", "C","Continue"],"indication of the next step to be performed by the agent"] 

system_prompt = f"""You are a decision support consultant helping users make data-driven decisions.

    Your task is to decide the next action for this question: {{question}}.

    Conversation history: {{messages_log}}. 
    Current insights: "{{insights}}".
    Database schema: {{objects_documentation}}

    Decision process:  

    Step 1. Check if question is non-analytical or already answered:
       - If question is just pleasantries ("thank you", "hello", "how are you") → "B"
       - If the same question was already answered in conversation history → "B"

    Step 2. Check if requested data exists in schema:
      - If the user asks for data/metrics not available in the database schema → "C"
    
    Step 3. Otherwise → "Continue".
    """
prompt = create_prompt_template('system', system_prompt)
chain = prompt | llm_fast.with_structured_output(ScenarioBC)
result = chain.invoke({'messages_log':extract_msg_content_from_history(test_state['messages_log']),
                         'question': test_state['current_question'], 
                         'insights': format_sql_query_results_for_prompt(test_state['current_sql_queries']),
                         'objects_documentation':test_state['objects_documentation']
                         })   