In [1]:
from dotenv import load_dotenv
load_dotenv()

import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
import json
from insights.agents.text2sql_agent import Text2SQLExecuteAgent
from insights.agents.insight_agent import InsightAgent, ProcessedQuestionResult, StructuredInsight
from insights.agents.db_summary_agent import DatabaseSummaryAgent, ColumnSummary, TableSummary, DatabaseSummary
from insights.agents.insights_consolidation_agent import InsightConsolidationAgent

from sklearn.metrics.pairwise import cosine_similarity


In [3]:
db_summary_path = '/Users/arshath/play/experiments/insights/database_summary.json'
questions_path = '/Users/arshath/play/experiments/insights/generated_questions.json'
execution_results_path = '/Users/arshath/play/experiments/insights/nbs/sql_execution_results_gpt4.1.json'
insights_path = '/Users/arshath/play/experiments/insights/nbs/insights.json'

with open(db_summary_path, 'r', encoding='utf-8') as f:
    db_summary_data = json.load(f)
    db_summary = DatabaseSummary(**db_summary_data['technical_summary'])
    db_summary.natural_language_summary = db_summary_data['natural_language_summary']

with open(questions_path, 'r', encoding='utf-8') as f:
    questions_input = json.load(f)
    analysis_questions = questions_input.get("questions", [])

with open(execution_results_path, 'r', encoding='utf-8') as f:
    execution_results = json.load(f)
    execution_results = [ProcessedQuestionResult(**result) for result in execution_results]

with open(insights_path, 'r', encoding='utf-8') as f:
    insights = json.load(f)
    insights = [StructuredInsight(**insight) for insight in insights]

In [4]:
insights_consolidation_agent = InsightConsolidationAgent(
    llm_provider="openai",
    embedding_model_name="text-embedding-3-large"
)

insights_consolidation_agent = InsightConsolidationAgent()


INFO:insights.utils:Using existing OpenAI client for embeddings (Model: text-embedding-3-large).
INFO:insights.utils:InsightConsolidationAgent initialized. Embedding Provider: OpenAI (Model: text-embedding-3-large), Deduplication: 0.9, Synthesis: True
INFO:insights.utils:Using existing OpenAI client for embeddings (Model: text-embedding-3-large).
INFO:insights.utils:InsightConsolidationAgent initialized. Embedding Provider: OpenAI (Model: text-embedding-3-large), Deduplication: 0.9, Synthesis: True


In [5]:
consolidated_insights = insights_consolidation_agent.consolidate(insights)

INFO:insights.utils:Starting consolidation for 49 raw insights...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:insights.utils:Reduced to 49 unique insights after deduplication.
INFO:insights.utils:Attempting insight synthesis...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:insights.utils:Found 10 clusters for synthesis. 20 insights remain unclustered.
INFO:insights.utils:Synthesizing cluster 1/10...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:insights.utils:Successfully synthesized cluster 1.
INFO:insights.utils:Synthesizing cluster 2/10...
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:insights.llm:Retrying... (1/3)
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:insights.utils:Successfully synthesized cluster 2.
INFO:insights.utils:Synthesizing clus

In [11]:
consolidated_insights[0].model_dump()

{'insight_id': 'SYN-295270aba7',
 'question_id': 'N/A',
 'question_text': 'N/A',
 'headline': "Apple's M2 Series Solidifies Its Premium Position in the High-End Laptop Market",
 'description': "Apple's laptops, particularly those equipped with M2 processors, command the highest average selling prices in the market. The M2 Max processor, in particular, stands out with an average sales price of $2798.99, underscoring Apple's dominance in the premium segment. This reflects a strong brand premium and consumer perception of value, positioning Apple as a leader in high-end laptops. Competitors must understand these dynamics to effectively compete, while Apple can leverage this premium positioning in its marketing strategies.",
 'tier': <InsightTier.CONTRIBUTION: 'Contribution'>,
 'supporting_metrics': {},
 'supporting_examples': None,
 'comparison_details': 'Apple laptops vs other brands',
 'trend_pattern': None,
 'anomaly_description': None,
 'contribution_details': "Apple's M2 series proce

In [12]:
consolidated_insights = [insight.model_dump() for insight in consolidated_insights]

with open('consolidated_insights.json', 'w', encoding='utf-8') as f:
    json.dump(consolidated_insights, f, indent=4)