In [1]:
import os
import sys
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir,".."))
paths_to_add = [project_root,
                os.path.join(project_root,"src","init")]
for path in paths_to_add:
    if path not in sys.path:
        sys.path.append(path)

In [2]:
import psycopg2
from contextlib import contextmanager

@contextmanager
def get_db_connection(connection_string):
    """Context manager for database connections."""
    conn = None
    try:
        conn = psycopg2.connect(connection_string)
        yield conn
    finally:
        if conn:
            conn.close()

def execute_sql_query(query, connection_string):
    """
    Execute a SQL query and return results.
    
    Args:
        query: SQL query string to execute
        connection_string: PostgreSQL connection string
        
    Returns:
        List of results (each row as a tuple), or None if query fails
    """
    try:
        with get_db_connection(connection_string) as conn:
            cursor = conn.cursor()
            cursor.execute(query)
            results = cursor.fetchall()
            return results
    except Exception as e:
        print(f"Error executing query: {e}")
        return None

In [3]:
# check if missing terms are in glossary
from business_glossary import check_glossary_consistency
check_glossary_consistency()

âœ… All terms in synonyms exist in key_terms


In [15]:
# User asks for synonyms
user_question = 'aum by clients'
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

{'key_terms': [{'name': 'Assets Under Management',
   'definition': 'Total market value of all client investments',
   'query_instructions': "Use account_assets from fact_account_monthly table. \n         Don't aggregate over time as this measure is semi-additive. \n         You can aggregate this measure at advisor, household, or business line level.",
   'exists_in_database': True}],
 'synonym_searched_for': 'aum',
 'synonym': {'name': 'Assets Under Management',
  'definition': 'Total market value of all client investments',
  'query_instructions': "Use account_assets from fact_account_monthly table. \n         Don't aggregate over time as this measure is semi-additive. \n         You can aggregate this measure at advisor, household, or business line level.",
  'exists_in_database': True},
 'synonym_exists_in_db': True,
 'synonym_docu': 'aum is synonym with Assets Under Management',
 'related_term_searched_for': None,
 'related_term_exists_in_db': False,
 'related_terms': None,
 'rel

In [None]:
# User asks for synonyms
user_question = 'aum by practice segments'
# expecting to query for assets under management and to not aggregate it over time.
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

In [None]:
# User asks for a vague term (undefined at org level) but related to multiple terms available in db 
user_question = 'Payments associated with advisors from firm Cedar Capital LLC'
# expecting to enter disambiguation node and say that payment can mean net revenue (Revenue retained by Capital Partners) or payout (Dollar amount paid to advisor), and ask the user which one it prefers.
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

In [None]:
# User asking with lazy language: 
user_question = 'net revenue associated with Cedar Capital advisors'
# expecting to filter for sum of net revenue for advisors belonging to the firm Cedar Capital LLC
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

In [None]:
# User asks for a term not in db (but defined) but related to multiple terms available in db 
user_question = 'distribution of advisor ID 8'
# expecting to enter disambiguation node and say that distribution (advisor payout after tech fees are deducted) is not in tables it has access to, 
# but it can offer net revenue (Revenue retained by Capital Partners) or payout (Dollar amount paid to advisor), which one you prefer?
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

In [None]:
# User asks for a vague term (undefined at org level) but related to a single terms available in db: producing assets (vague): 
user_question = 'producing assets by practice segments'
# expecting to say to query for advisory assets and say that advisory assets is Assets in Managed Portfolio and SMA business lines.
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

In [None]:
# User asks for a term not in db (but defined) but related to a single terms available in db
user_question = 'liquid assets by practice segments'
# expecting to say it doesn't have access to liquid assets (assets easily converted to cash) but here are the results for advisory assets (Assets in Managed Portfolio and SMA business lines).
# filter for account_type = 'Custody', account.account_status = 'Active' and account.to_date = '9999-12-31'
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

In [None]:
# User asks for a term not in db , wit no synonyms and no related terms in db: 

In [None]:
user_question = 'List advisors under age 40 who have more than $300M in total assets.'
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output

In [None]:
user_question = 'List advisors under age 40 who have more than $300M in compensation'
search_terms_output = search_terms(user_question, key_terms, synonyms, related_terms)
search_terms_output