In [11]:
import json
import os
import random
import sqlite3
from datetime import datetime
from typing import Dict, List, TypedDict
from loguru import logger

from langchain.agents import create_sql_agent
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.sql_database import SQLDatabase
from langchain_core.tools import tool
from langgraph.graph import END, START, StateGraph
from pydantic import BaseModel

# Define State Schema
class GraphState(Dict):
    input_query: str
    year: int
    selic_rate: float
    sufficient_analysis: bool
    suggested_metrics: List[str]
    selected_metric: str
    analysis_results: Dict[str, Dict[str, str]]
    sql_data: Dict
    future_data: Dict
    report: str

# Define Input Schema for Tools
class ToolInputSchema(BaseModel):
    state: Dict

# Initialize Logging
logger.add("workflow.log", rotation="1 MB", retention="7 days", level="DEBUG")

# 1. SQL Agent Setup
def connect_sql_agent():
    """
    Sets up the SQL agent using an LLM and a database connection.
    """
    db_path = "synthetic_data.db"
    db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)

    sql_agent = create_sql_agent(
        llm=llm,
        db=db,
        agent_type="openai-tools",
        verbose=True,
    )
    return sql_agent, db


# Initialize SQL Agent and Database Connection
sql_agent, db = connect_sql_agent()



def list_available_data(db_path: str = "synthetic_data.db") -> Dict[str, List[str]]:
    """
    Lists all available tables and their columns in the SQLite database.

    Args:
        db_path (str): Path to the SQLite database file. Default is "synthetic_data.db".

    Returns:
        Dict[str, List[str]]: A dictionary where keys are table names and values are lists of column names.
    """
    available_data = {}

    # Connect to the database
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()

        # Fetch all table names
        cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
        tables = [row[0] for row in cursor.fetchall()]

        # Fetch columns for each table
        for table in tables:
            cursor.execute(f"PRAGMA table_info({table});")
            columns = [row[1] for row in cursor.fetchall()]
            available_data[table] = columns

        conn.close()
    except sqlite3.Error as e:
        logger.error(f"Error accessing the database: {e}")

    return available_data


# Process Input Query with LLM
def process_input_with_llm(state: Dict) -> Dict:
    """
    Processes user input using an LLM to extract parameters for the workflow.

    Updates the state with extracted parameters (year, selic_rate).
    """
    # Get input query from state
    state["input_query"] = input("Enter your query (comma-separated): ").split(',')
    input_query = state.get("input_query", "Analyze the credit environment.")
    
    print("\nProcessing your input query:")
    print(f"Input Query: {input_query}")

    try:
        # Initialize LLM
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)

        # Define the prompt
        prompt_template = f"""
        You are an assistant that extracts financial parameters from user queries.
        Given the following input, identify and extract the 'selic_rate' and 'year':

        Input: {input_query}

        Provide the output in the following JSON format:
        {{
            "year": <year>,
            "selic_rate": <selic_rate>
        }}

        If a parameter is missing or cannot be extracted, return null for that parameter.
        """

        print("\nExtracting parameters using the LLM...")
        llm_response = llm.invoke(prompt_template)

        # Parse the LLM response into a Python dictionary
        extracted_parameters = json.loads(llm_response)
        print("Parameters extracted successfully.")
    except Exception as e:
        # Handle any errors and provide fallback values
        logger.error(f"Error processing input with LLM: {e}")
        print("Error encountered during processing. Using default values.")
        extracted_parameters = {"year": None, "selic_rate": None}

    # Update state with extracted parameters or fallback defaults
    state["year"] = extracted_parameters.get("year", datetime.now().year)  # Default to current year
    state["selic_rate"] = extracted_parameters.get("selic_rate", 14.5)  # Default SELIC rate

    # Log the updated state
    log_state(state, "process_input_with_llm")
    return state



# Define Workflow Functions
def define_objectives(state: Dict) -> Dict:
    """
    Takes user's input and defines the objectives of the credit policy based on business goals.
    """
    print("Please define the business goals for the credit policy:")
    print("For example: Increase revenue, expand market share, or manage risk.")
    business_goals = input("Enter business goals (comma-separated): ").split(',')

    print("\nBased on the business goals, define specific credit policy objectives:")
    print("For example: Reduce bad debts, support sales growth, or ensure compliance.")
    credit_policy_goals = input("Enter credit policy goals (comma-separated): ").split(',')

    state["business_goals"] = [goal.strip() for goal in business_goals if goal.strip()]
    state["credit_policy_goals"] = [goal.strip() for goal in credit_policy_goals if goal.strip()]
    log_state(state, "define_objectives")
    return state

def assess_environment(state: Dict) -> Dict:
    """
    Assesses the current environment by analyzing market conditions, reviewing policies, and understanding customers.
    """
    # Prepare the input query
    input_query = state.get("input_query", "Analyze the credit environment.")
    prompt = (
        f"Based on the following user query:\n"
        f"\"{input_query}\"\n\n"
        "Financial diagnostics is a process of researching the financial condition of the company's main financial performance "
        "in order to identify reserves.\n\n"
        "Assess the current credit environment, focusing on its financials:\n"
        "- Economic conditions and market trends, for credit itself in terms of demand, competition and so on.\n"
        "- Effectiveness of existing policies, how lucrative and costly is the current operations set.\n"
        "- Current and past customer credit scoring and underwriting policies.\n"
        "- Portfolio of defaults: what is the distribution considering the main metrics such as over90, ever180, over180 and so on."
    )

    # Use the LLM to process the prompt
    try:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
        environment_assessment = llm.invoke(prompt).content
    except Exception as e:
        logger.error(f"Error during environment assessment: {e}")
        environment_assessment = "Environment assessment could not be completed due to an error."

    # Update and log state
    state["environment_assessment"] = environment_assessment
    log_state(state, "assess_environment")
    return state



def elicit_metrics(state: Dict) -> Dict:
    """
    Uses an LLM to generate a list of relevant metrics to answer the user's query.
    """
    input_query = state.get("input_query", "Analyze the credit environment.")

    # Fetch available data from the database
    try:
        available_data = list_available_data()
        available_data_str = json.dumps(available_data, indent=2)
    except Exception as e:
        logger.error(f"Error fetching available data: {e}")
        available_data_str = ""  # Fallback to empty string if fetching data fails

    try:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
        prompt_template = f"""
        Act as an experienced Economist with Credit Management and Collections experience.
        
        Take the following query:
        "{input_query}"
        
        Break down the query. Is it an open question? Is is specific? Rephrase the question so as to provide an objective answer.
        
        The available database contains the following tables and columns:
        {available_data_str}
        
        You must come up with metrics or plain queries (like filtering only unique values) that can be calculated from the available data. 
        
        Suggest at least 5 relevant metrics that should be analyzed to provide a thorough answer.
        Given the {available_data_str} the metrics should be used in the argumentation process to respond to the user's inquiry.
        
        You respond with '\n' separated queries as you were asking a database with no coding.
        
        """
        suggested_metrics = llm.invoke(prompt_template).content
        metrics = [metric.strip() for metric in suggested_metrics.split("\n") if metric.strip()]
        #metrics = metrics[:3]  # Ensure at least 5 metrics
    except Exception as e:
        logger.error(f"Error eliciting metrics: {e}")
        metrics = [
            "Credit score distributions",
            "Default rates",
            "DSO",
            "Concentration risks",
            "Loan approval rates",
        ]

    state["suggested_metrics"] = metrics
    log_state(state, "elicit_metrics")
    return state


def select_top_metric(state: Dict) -> Dict:
    """
    Selects the top metric from suggested_metrics or additional_metrics for analysis.
    """
    suggested_metrics = state.get("suggested_metrics", [])
    additional_metrics = state.get("additional_metrics", [])

    if suggested_metrics:
        # Pop the first metric from the suggested metrics list
        selected_metric = suggested_metrics.pop(0)
        state["selected_metric"] = selected_metric
        state["suggested_metrics"] = suggested_metrics
    elif additional_metrics:
        # Pop the first metric from the additional metrics list
        selected_metric = additional_metrics.pop(0)
        state["selected_metric"] = selected_metric
        state["additional_metrics"] = additional_metrics
    else:
        # No metrics left to analyze
        state["selected_metric"] = None
        logger.warning("No metrics left to analyze. Please provide additional metrics.")
    
    # Log the updated state
    log_state(state, "select_top_metric")
    return state



def ask_for_metric(state: Dict) -> Dict:
    """
    Uses an LLM to select a metric from the suggested metrics to analyze.
    """
    
    additional_metrics = state.get("additional_metrics", [])
        
    suggested_metrics = state.get("suggested_metrics", [])
    

    if additional_metrics:
        suggested_metrics  = additional_metrics

    
    if not suggested_metrics:
        logger.error("No suggested metrics available in the state.")
        state["selected_metric"] = "Default Metric"
        log_state(state, "ask_for_metric")
        return state

    try:
        # Initialize LLM
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
        
        # Formulate the prompt
        prompt_template = f"""
        You are an expert assistant helping select the most relevant metric for analysis.
        
        The user has suggested the following metrics:
        {', '.join(suggested_metrics)}
        
        Based on the user's query and the current context, suggest an entirely new metric unlike the suggested ones and with a new way of looking at the data in aggregate queries. 
        Provide your response in this format:
        
        Selected Metric: <Metric>
        Explanation: <Why this metric is relevant>
        """
        
        # Query the LLM
        print("Asking the LLM to select the most relevant metric...")
        llm_response = llm.invoke(prompt_template).content
        
        # Parse the LLM's response
        selected_metric = None
        if "Selected Metric:" in llm_response:
            selected_metric = llm_response.split("Selected Metric:")[1].split("\n")[0].strip()
        
        # Default to the first metric if no valid selection is made
        if not selected_metric or selected_metric not in suggested_metrics:
            logger.warning("LLM did not select a valid metric. Defaulting to the first metric.")
            selected_metric = suggested_metrics[0]
    except Exception as e:
        # Log the error and fall back to the first metric
        logger.error(f"Error in LLM-based metric selection: {e}")
        selected_metric = suggested_metrics[0]

    # Update the state with the selected metric
    state["selected_metric"] = selected_metric
    log_state(state, "ask_for_metric")
    return state


def analyze_metrics(state: Dict) -> Dict:
    """
    Iterates through the suggested metrics in the state and performs analysis on each metric.

    Args:
        state (Dict): The current workflow state containing the suggested metrics.

    Returns:
        Dict: Updated state with analysis results for each suggested metric.
    """
    # Ensure suggested_metrics exists in the state
    suggested_metrics: List[str] = state.get("suggested_metrics", [])
    if not suggested_metrics:
        raise ValueError("No suggested metrics available for analysis.")

    logger.info(f"Starting analysis for {len(suggested_metrics)} suggested metrics.")

    for metric in suggested_metrics:
        logger.info(f"Analyzing metric: {metric}")
        try:
            # Set the current metric in state and analyze it
            state["selected_metric"] = metric
            state = analyze_metric(state)
        except Exception as e:
            logger.error(f"Error analyzing metric '{metric}': {e}")
            
            # Store the error in the analysis results
            if "analysis_results" not in state:
                state["analysis_results"] = {}

            state["analysis_results"][metric] = {
                "data": {},
                "analysis": f"Failed to analyze metric. Error: {str(e)}"
            }

    logger.info("Completed analysis for all suggested metrics.")

    # Log the final state
    log_state(state, "analyze_metrics")
    return state



def analyze_metric(state: Dict) -> Dict:
    """
    Retrieves data for the selected metric and performs analysis using the LLM.

    Args:
        state (Dict): The current workflow state containing the selected metric.

    Returns:
        Dict: Updated state with analysis results for the selected metric.
    """
    selected_metric = state.get("selected_metric", "")
    if not selected_metric:
        raise ValueError("No metric selected for analysis.")

    # Initialize variables for SQL data and analysis
    metric_data = {}
    analysis_result = ""

    # Fetch data for the selected metric using the SQL agent
    try:
        query = selected_metric
        logger.debug(f"Running SQL query: {query}")
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
        prompt = f""" Take the following {query}. Reorganize, reorder,  and rephrase it as a  straightforward question to be made to a Data Engineer. Be concise. Respond only with the new query"""
        enhanced_query = llm.invoke(prompt).content
        metric_data = sql_agent.run(query)
        logger.info(f"Data retrieved for metric '{selected_metric}': {metric_data}")

    except Exception as e:
        logger.error(f"Error retrieving data for metric '{selected_metric}': {e}")
        metric_data = {"error": f"Failed to retrieve data: {str(e)}"}

    # Perform analysis on the retrieved data using the LLM
    try:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
        prompt_template = f"""
        Analyze the following data for the metric '{selected_metric}':
        {json.dumps(metric_data, indent=2)}

        Provide insights on trends, risks, and potential problem areas.
        """

        logger.debug(f"Sending analysis prompt to LLM: {prompt_template}")
        analysis_result = llm.invoke(prompt_template).content
        logger.info(f"Analysis completed for metric '{selected_metric}'.")
    except Exception as e:
        logger.error(f"Error during analysis for metric '{selected_metric}': {e}")
        analysis_result = f"Analysis failed for metric '{selected_metric}'. Error: {str(e)}"

    # Update state with SQL data and analysis results
    if "sql_data" not in state:
        state["sql_data"] = {}

    state["sql_data"][selected_metric] = metric_data

    if "analysis_results" not in state:
        state["analysis_results"] = {}

    state["analysis_results"][selected_metric] = {
        "data": metric_data,
        "analysis": analysis_result
    }

    # Log the updated state
    log_state(state, "analyze_metric")
    return state


def decide_analysis_readiness(state: Dict) -> Dict:
    """
    Decides if the analysis is sufficient to proceed based on the input query, elicited metrics, and analyzed metrics.
    Uses an LLM to evaluate sufficiency.
    """
    def flatten_analysis_results(state: Dict) -> str:
        """
        Flattens and formats the analysis results from the state into a readable summary.

        Args:
            state (Dict): The workflow state containing analysis results.

        Returns:
            str: A formatted string summarizing the analysis results.
        """
        analysis_results = state.get("analysis_results", {})

        if not analysis_results:
            return "No analysis results available."

        summary = "### Analyzed Metrics:\n"
        for metric, details in analysis_results.items():
            data_summary = (
                f"Data: {len(details.get('data', []))} records"
                if isinstance(details.get("data"), list)
                else "Data: Unavailable"
            )
            analysis_summary = details.get("analysis", "Analysis: No insights available.")
            summary += f"- Metric: {metric}\n  {data_summary}\n  {analysis_summary}\n\n"

        return summary


    def fetch_available_data() -> str:
        """Fetches available data from the database and returns it as a formatted string."""
        try:
            available_data = list_available_data()
            return json.dumps(available_data, indent=2)
        except Exception as e:
            logger.error(f"Error fetching available data: {e}")
            return "Unavailable"

    def build_prompt(input_query: str, elicited_metrics: List[str], analyzed_metrics: Dict, available_data: str) -> str:
        """Constructs the LLM prompt dynamically based on the state."""
        analyzed_metrics_summary = flatten_analysis_results(state)
        return f"""
        Act as a Senior Credit Analyst. You are to the point and very thorough. You are succint and do not ask for more information if not needed.
        The user has asked the following question:
        "{input_query}"

        ### Suggested Metrics for Analysis:
        {', '.join(elicited_metrics) if elicited_metrics else "None"}

        ### Analyzed Metrics:
        {analyzed_metrics_summary}

        ### Available Data:
        {available_data}

        ### Instructions:
        0. Analyze the setence and its parts
        0.1. Extract what is required, what does the user want?
        1. Determine if the analyzed metrics are sufficient to formulate a logical and comprehensive answer to the user's question: {input_query}.
        2. If not, recommend additional metrics strictly based on the available data: {available_data}.
        3. Provide any specific calculations, including:
        - **Tables and Columns**: Specify the tables and columns required.
        - **Joins**: Mention necessary joins and keys.
        - **Formulas**: Include formulas using the available data.

        ### Output Format:
        Respond in JSON format:
        {{
            "sufficient": <true|false>,
            "additional_metrics": [<metric1>, <metric2>, ...],
            "details": "<any other necessary information>"
        }}
        """

    def parse_llm_response(response: str) -> Dict:
        """Parses the LLM response to extract sufficiency status and additional metrics."""
        try:
            response_data = json.loads(response)
            sufficient = response_data.get("sufficient", False)
            additional_metrics = response_data.get("additional_metrics", [])
            details = response_data.get("details", "")
        except Exception as e:
            logger.error(f"Error parsing LLM response: {e}")
            sufficient = False
            additional_metrics = []
            details = "Parsing error. Check the response format."

        return {"sufficient": sufficient, "additional_metrics": additional_metrics, "details": details}

    # Fetch required data
    available_data_str = fetch_available_data()
    input_query = state.get("input_query", "")
    elicited_metrics = state.get("suggested_metrics", [])
    analyzed_metrics = state.get("analysis_results", {})

    # Build and invoke LLM prompt
    prompt = build_prompt(input_query, elicited_metrics, analyzed_metrics, available_data_str)
    try:
        llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.7)
        llm_response = llm.invoke(prompt).content
        logger.debug(f"LLM Response: {llm_response}")
    except Exception as e:
        logger.error(f"Error during LLM evaluation: {e}")
        llm_response = json.dumps({"sufficient": False, "additional_metrics": [], "details": "LLM invocation error."})

    # Parse response and update state
    parsed_response = parse_llm_response(llm_response)
    state["sufficient_analysis"] = parsed_response["sufficient"]
    state["additional_metrics"] = parsed_response["additional_metrics"]
    state["details"] = parsed_response["details"]

    log_state(state, "decide_analysis_readiness")
    return state




def generate_report(state: Dict) -> Dict:
    """
    Generates a report by using an LLM, leveraging all available data from the current state,
    including query breakdown, SQL data, metrics, and analysis results.
    """
    # Extract required data from state
    sql_data = state.get("sql_data", {})
    analysis_results = state.get("analysis_results", {})
    input_query = state.get("input_query", "")
    sufficient_analysis = state.get("sufficient_analysis", False)
    suggested_metrics = state.get("suggested_metrics", [])
    selected_metric = state.get("selected_metric", "")

    # Summarize available SQL data
    sql_summary = f"SQL Data contains {len(sql_data)} analyses:\n"
    for analysis, results in sql_data.items():
        sql_summary += f"- {analysis}: {len(results) if isinstance(results, list) else 'Unknown'} records\n"

    # Summarize analysis results
    analysis_summary = "\nAnalysis Results:\n"
    for metric, details in analysis_results.items():
        analysis_summary += f"- {metric}: {details.get('analysis', 'No analysis available')}\n"

    # Summarize suggested metrics
    suggested_metrics_summary = (
        "\nSuggested Metrics:\n" + "\n".join(f"- {metric}" for metric in suggested_metrics)
        if suggested_metrics else "\nNo suggested metrics available."
    )

    # Summarize selected metric
    selected_metric_summary = (
        f"\nSelected Metric for Analysis: {selected_metric}\n"
        if selected_metric else "\nNo metric selected for analysis."
    )

    # Summarize sufficient analysis
    sufficiency_summary = (
        "\nAnalysis is deemed sufficient to answer the query."
        if sufficient_analysis else "\nAnalysis is insufficient; additional metrics may be required."
    )

    # Initialize the LLM
    llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

    # Prepare inputs for the LLM
    query_breakdown_prompt = f"""
    Break down the user's query into logical components and identify key areas of focus:
    {input_query}
    """

    reasoning_prompt = f"""
    Reason through the breakdown of the query and the following summaries:
    {sql_summary}
    {analysis_summary}
    {suggested_metrics_summary}
    {selected_metric_summary}
    {sufficiency_summary}
    """

    report_prompt = f"""
    Based on the reasoning, create an executive summary style report that includes:
    - Answer to the original query: {input_query}
    - Key insights and trends
    - Identified risks and opportunities
    - Recommendations for further actions or analyses

    Provide a concise and professional response.
    """

    # Step 1: Query Breakdown
    try:
        breakdown_chain = LLMChain(
            llm=llm,
            prompt=PromptTemplate(template="{query_breakdown}", input_variables=["query_breakdown"])
        )
        breakdown_result = breakdown_chain.run({"query_breakdown": query_breakdown_prompt})
        state["breakdown_result"] = breakdown_result  # Store in state
    except Exception as e:
        logger.error(f"Error during query breakdown: {e}")
        state["breakdown_result"] = "Error during query breakdown."


    # Step 2: Reasoning
    reasoning_prompt = f"""
    Based on the breakdown of the user's query:
    {state.get("breakdown_result", "No breakdown available.")}

    Reason through the available data and analyses with focus on answering {input_query}
    {sql_summary}
    {analysis_summary}
    {sufficiency_summary}
    """

    try:
        reasoning_chain = LLMChain(
            llm=llm,
            prompt=PromptTemplate(template="{reasoning}", input_variables=["reasoning"])
        )
        reasoning_result = reasoning_chain.run({"reasoning": reasoning_prompt})
        state["reasoning_result"] = reasoning_result  # Store in state
    except Exception as e:
        logger.error(f"Error during reasoning: {e}")
        state["reasoning_result"] = "Error during reasoning."


    # Step 3: Final Report Generation
    report_prompt = f"""
    You must provide a complete professional report with appropriate figures and formated numbers.

    ### Direct Response to Query:
    Provide a direct and concise answer to the user's query: {input_query}

    User Query Breakdown:
    {state.get("breakdown_result", "No breakdown available.")}

    Reasoning Insights:
    {state.get("reasoning_result", "No reasoning available.")}

    SQL Data Summary:
    {sql_summary}

    Analysis Results Summary:
    {analysis_summary}

    Recommendations:
    {sufficiency_summary}

    
    """

    try:
        report_chain = LLMChain(
            llm=llm,
            prompt=PromptTemplate(template="{report}", input_variables=["report"])
        )
        final_report = report_chain.run({"report": report_prompt})
        state["final_report"] = final_report  # Store in state
    except Exception as e:
        logger.error(f"Error during report generation: {e}")
        state["final_report"] = "Error during report generation."


    # Update the state with the generated report
    state["report"] = final_report
    log_state(state, "generate_report")

    return state


def log_state(state: Dict, step_name: str):
    """
    Logs the state after a step.
    """
    logger.info(f"--- State after {step_name} ---\n{json.dumps(state, indent=2)}")

# Workflow Builder
# Workflow Builder
#def build_stategraph():
#    """
#    Builds and compiles the workflow graph.
#    """
#    graph = StateGraph(state_schema=GraphState)
#
#    # Add nodes
#    graph.add_node("process_input", process_input_with_llm)
#    graph.add_node("define_objectives", define_objectives)
#    graph.add_node("assess_environment", assess_environment)
#    graph.add_node("elicit_metrics", elicit_metrics)
#    graph.add_node("ask_for_metric", ask_for_metric)
#    graph.add_node("analyze_metrics", analyze_metrics) 
#    graph.add_node("analyze_metric", analyze_metric)
#    graph.add_node("decide_readiness", decide_analysis_readiness)
#    graph.add_node("report_generator", generate_report)
#
#    # Add edges
#    graph.add_edge(START, "process_input")
#    graph.add_edge("process_input", "define_objectives")
#    graph.add_edge("define_objectives", "assess_environment")
#    graph.add_edge("assess_environment", "elicit_metrics")
#    graph.add_edge("elicit_metrics", "analyze_metrics")
#    graph.add_edge("analyze_metrics", "decide_readiness")
#    graph.add_conditional_edges(
#        "decide_readiness",
#        lambda state: "ask_for_metric" if not state["sufficient_analysis"] else "report_generator",
#        path_map={  
#            "ask_for_metric": "ask_for_metric",
#            "report_generator": "report_generator",
#        },
#    )
#    graph.add_edge("ask_for_metric", "analyze_metric")
#    graph.add_edge("analyze_metric", "decide_readiness")  
#    graph.add_edge("report_generator", END)
#
#    return graph.compile()
#
#
def build_stategraph():
    """
    Builds and compiles the updated workflow graph.
    """
    graph = StateGraph(state_schema=GraphState)

    # Add nodes
    graph.add_node("process_input", process_input_with_llm)
    graph.add_node("define_objectives", define_objectives)
    graph.add_node("assess_environment", assess_environment)
    graph.add_node("elicit_metrics", elicit_metrics)
    graph.add_node("select_top_metric", select_top_metric)  # New node to handle top metric selection
    graph.add_node("analyze_metric", analyze_metric)
    graph.add_node("decide_readiness", decide_analysis_readiness)
    graph.add_node("ask_for_metric", ask_for_metric)  # Handles exhausted metrics list
    graph.add_node("report_generator", generate_report)

    # Add edges
    graph.add_edge(START, "process_input")
    graph.add_edge("process_input", "define_objectives")
    graph.add_edge("define_objectives", "assess_environment")
    graph.add_edge("assess_environment", "elicit_metrics")
    graph.add_edge("elicit_metrics", "select_top_metric")
    graph.add_edge("select_top_metric", "analyze_metric")
    graph.add_edge("analyze_metric", "decide_readiness")

    # Add conditional edges for readiness
    graph.add_conditional_edges(
        "decide_readiness",
        lambda state: "report_generator" if state["sufficient_analysis"] else (
            "select_top_metric" if state["suggested_metrics"] or state["additional_metrics"] else "ask_for_metric"
        ),
        path_map={
            "report_generator": "report_generator",
            "select_top_metric": "select_top_metric",
            "ask_for_metric": "ask_for_metric",
        },
    )

    # Add loop from ask_for_metric back to select_top_metric
    graph.add_edge("ask_for_metric", "select_top_metric")

    graph.add_edge("report_generator", END)

    return graph.compile()



# Process the input query with LLM to extract initial state parameters

    
# Step 2: Initialize the initial state
initial_state = {
    "input_query": None,  # User's query
    "year": None,               # Placeholder for year
    "selic_rate": None,         # Placeholder for SELIC rate
    "sufficient_analysis": False,  # Whether sufficient analysis is done
    "suggested_metrics": [],    # List of metrics suggested by the LLM
    "selected_metric": "",      # The metric selected for analysis
    "analysis_results": {},     # Analysis results for metrics
    "sql_data": {},             # SQL data for analysis
    "future_data": {},          # Simulated future data
    "report": "",               # Final report
}

# Build and invoke the workflow
graph = build_stategraph()
result = graph.invoke(initial_state,  config={"recursion_limit": 100})
print(result.get("report", "No report generated."))



Processing your input query:
Input Query: ['What is our current fpd30?']

Extracting parameters using the LLM...


[32m2025-01-05 21:32:56.883[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mprocess_input_with_llm[0m:[36m138[0m - [31m[1mError processing input with LLM: the JSON object must be str, bytes or bytearray, not AIMessage[0m
[32m2025-01-05 21:32:56.883[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after process_input_with_llm ---
{
  "input_query": [
    "What is our current fpd30?"
  ],
  "year": null,
  "selic_rate": null,
  "sufficient_analysis": false,
  "suggested_metrics": [],
  "selected_metric": "",
  "analysis_results": {},
  "sql_data": {},
  "future_data": {},
  "report": ""
}[0m


Error encountered during processing. Using default values.
Please define the business goals for the credit policy:
For example: Increase revenue, expand market share, or manage risk.

Based on the business goals, define specific credit policy objectives:
For example: Reduce bad debts, support sales growth, or ensure compliance.


[32m2025-01-05 21:33:02.989[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after define_objectives ---
{
  "input_query": [
    "What is our current fpd30?"
  ],
  "year": null,
  "selic_rate": null,
  "sufficient_analysis": false,
  "suggested_metrics": [],
  "selected_metric": "",
  "analysis_results": {},
  "sql_data": {},
  "future_data": {},
  "report": "",
  "business_goals": [
    "learning"
  ],
  "credit_policy_goals": [
    "teaching"
  ]
}[0m
[32m2025-01-05 21:33:05.708[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after assess_environment ---
{
  "input_query": [
    "What is our current fpd30?"
  ],
  "year": null,
  "selic_rate": null,
  "sufficient_analysis": false,
  "suggested_metrics": [],
  "selected_metric": "",
  "analysis_results": {},
  "sql_data": {},
  "future_data": {},
  "report": "",
  "environment_assessment": "\n\n- Analysis of the current fpd30 (First Payment Defau



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mBorrower_Profiles, Default_Rates, Loan_Products, Market_Trends, Regulatory_Data[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Borrower_Profiles, Default_Rates'}`


[0m[33;1m[1;3m
CREATE TABLE "Borrower_Profiles" (
	"CPF" TEXT, 
	"Name" TEXT, 
	"Age" INTEGER, 
	"Income" REAL, 
	"Credit_Score" INTEGER, 
	"Debt_to_Income_Ratio" REAL, 
	"Region" TEXT
)

/*
3 rows from Borrower_Profiles table:
CPF	Name	Age	Income	Credit_Score	Debt_to_Income_Ratio	Region
632.015.897-21	Lucas Gabriel Rodrigues	29	2260.910543174554	584	0.1478227645744541	Ceará
658.702.413-08	Davi Lucca da Rocha	68	3030.4776122591534	336	0.5849483486915927	Sergipe
605.142.389-33	Diogo Azevedo	29	2286.101612447934	386	0.5757545300433974	Mato Grosso
*/


CREATE TABLE "Default_Rates" (
	"Borrower_Class" TEXT, 
	"Default_Rate" REAL, 
	"Loss_Given_Default" REAL, 
	"Exposure_at_Defa

[32m2025-01-05 21:33:14.584[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m415[0m - [1mData retrieved for metric '1. Calculate the average Credit Score of borrowers with an fpd30 status.': I don't have the data to calculate the average Credit Score of borrowers with an fpd30 status.[0m


[32;1m[1;3mI don't have the data to calculate the average Credit Score of borrowers with an fpd30 status.[0m

[1m> Finished chain.[0m


[32m2025-01-05 21:33:15.509[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m431[0m - [34m[1mSending analysis prompt to LLM: 
        Analyze the following data for the metric '1. Calculate the average Credit Score of borrowers with an fpd30 status.':
        "I don't have the data to calculate the average Credit Score of borrowers with an fpd30 status."

        Provide insights on trends, risks, and potential problem areas.
        [0m
[32m2025-01-05 21:33:18.159[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m433[0m - [1mAnalysis completed for metric '1. Calculate the average Credit Score of borrowers with an fpd30 status.'.[0m
[32m2025-01-05 21:33:18.159[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after analyze_metric ---
{
  "input_query": [
    "What is our current fpd30?"
  ],
  "year": null,
  "selic_rate": null,
  "sufficient_analysis": false,
  "suggested_metrics": [
 



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mBorrower_Profiles, Default_Rates, Loan_Products, Market_Trends, Regulatory_Data[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Borrower_Profiles, Default_Rates'}`


[0m[33;1m[1;3m
CREATE TABLE "Borrower_Profiles" (
	"CPF" TEXT, 
	"Name" TEXT, 
	"Age" INTEGER, 
	"Income" REAL, 
	"Credit_Score" INTEGER, 
	"Debt_to_Income_Ratio" REAL, 
	"Region" TEXT
)

/*
3 rows from Borrower_Profiles table:
CPF	Name	Age	Income	Credit_Score	Debt_to_Income_Ratio	Region
632.015.897-21	Lucas Gabriel Rodrigues	29	2260.910543174554	584	0.1478227645744541	Ceará
658.702.413-08	Davi Lucca da Rocha	68	3030.4776122591534	336	0.5849483486915927	Sergipe
605.142.389-33	Diogo Azevedo	29	2286.101612447934	386	0.5757545300433974	Mato Grosso
*/


CREATE TABLE "Default_Rates" (
	"Borrower_Class" TEXT, 
	"Default_Rate" REAL, 
	"Loss_Given_Default" REAL, 
	"Exposure_at_Defa

[32m2025-01-05 21:33:31.134[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m415[0m - [1mData retrieved for metric '2. Determine the percentage of borrowers in each region who have an fpd30 status.': I'm sorry, but it seems that the column "fpd30_status" is not available in the Borrower_Profiles table. Without this column, I am unable to determine the percentage of borrowers in each region who have an fpd30 status.[0m


[32;1m[1;3mI'm sorry, but it seems that the column "fpd30_status" is not available in the Borrower_Profiles table. Without this column, I am unable to determine the percentage of borrowers in each region who have an fpd30 status.[0m

[1m> Finished chain.[0m


[32m2025-01-05 21:33:32.164[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m431[0m - [34m[1mSending analysis prompt to LLM: 
        Analyze the following data for the metric '2. Determine the percentage of borrowers in each region who have an fpd30 status.':
        "I'm sorry, but it seems that the column \"fpd30_status\" is not available in the Borrower_Profiles table. Without this column, I am unable to determine the percentage of borrowers in each region who have an fpd30 status."

        Provide insights on trends, risks, and potential problem areas.
        [0m
[32m2025-01-05 21:33:34.136[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m433[0m - [1mAnalysis completed for metric '2. Determine the percentage of borrowers in each region who have an fpd30 status.'.[0m
[32m2025-01-05 21:33:34.136[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after analyze_metric ---
{
  "input_



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mBorrower_Profiles, Default_Rates, Loan_Products, Market_Trends, Regulatory_Data[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Borrower_Profiles, Default_Rates'}`


[0m[33;1m[1;3m
CREATE TABLE "Borrower_Profiles" (
	"CPF" TEXT, 
	"Name" TEXT, 
	"Age" INTEGER, 
	"Income" REAL, 
	"Credit_Score" INTEGER, 
	"Debt_to_Income_Ratio" REAL, 
	"Region" TEXT
)

/*
3 rows from Borrower_Profiles table:
CPF	Name	Age	Income	Credit_Score	Debt_to_Income_Ratio	Region
632.015.897-21	Lucas Gabriel Rodrigues	29	2260.910543174554	584	0.1478227645744541	Ceará
658.702.413-08	Davi Lucca da Rocha	68	3030.4776122591534	336	0.5849483486915927	Sergipe
605.142.389-33	Diogo Azevedo	29	2286.101612447934	386	0.5757545300433974	Mato Grosso
*/


CREATE TABLE "Default_Rates" (
	"Borrower_Class" TEXT, 
	"Default_Rate" REAL, 
	"Loss_Given_Default" REAL, 
	"Exposure_at_Defa

[32m2025-01-05 21:33:42.136[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m415[0m - [1mData retrieved for metric '3. Analyze the Debt-to-Income Ratio distribution among borrowers with fpd30.': I don't know.[0m


[32;1m[1;3mI don't know.[0m

[1m> Finished chain.[0m


[32m2025-01-05 21:33:43.157[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m431[0m - [34m[1mSending analysis prompt to LLM: 
        Analyze the following data for the metric '3. Analyze the Debt-to-Income Ratio distribution among borrowers with fpd30.':
        "I don't know."

        Provide insights on trends, risks, and potential problem areas.
        [0m
[32m2025-01-05 21:33:45.187[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m433[0m - [1mAnalysis completed for metric '3. Analyze the Debt-to-Income Ratio distribution among borrowers with fpd30.'.[0m
[32m2025-01-05 21:33:45.187[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after analyze_metric ---
{
  "input_query": [
    "What is our current fpd30?"
  ],
  "year": null,
  "selic_rate": null,
  "sufficient_analysis": false,
  "suggested_metrics": [
    "4. Calculate the Loss Given Default for borrowers with fpd30 status."



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mBorrower_Profiles, Default_Rates, Loan_Products, Market_Trends, Regulatory_Data[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Borrower_Profiles, Default_Rates'}`


[0m[33;1m[1;3m
CREATE TABLE "Borrower_Profiles" (
	"CPF" TEXT, 
	"Name" TEXT, 
	"Age" INTEGER, 
	"Income" REAL, 
	"Credit_Score" INTEGER, 
	"Debt_to_Income_Ratio" REAL, 
	"Region" TEXT
)

/*
3 rows from Borrower_Profiles table:
CPF	Name	Age	Income	Credit_Score	Debt_to_Income_Ratio	Region
632.015.897-21	Lucas Gabriel Rodrigues	29	2260.910543174554	584	0.1478227645744541	Ceará
658.702.413-08	Davi Lucca da Rocha	68	3030.4776122591534	336	0.5849483486915927	Sergipe
605.142.389-33	Diogo Azevedo	29	2286.101612447934	386	0.5757545300433974	Mato Grosso
*/


CREATE TABLE "Default_Rates" (
	"Borrower_Class" TEXT, 
	"Default_Rate" REAL, 
	"Loss_Given_Default" REAL, 
	"Exposure_at_Defa

[32m2025-01-05 21:33:51.070[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m415[0m - [1mData retrieved for metric '4. Calculate the Loss Given Default for borrowers with fpd30 status.': I don't know.[0m


[32;1m[1;3mI don't know.[0m

[1m> Finished chain.[0m


[32m2025-01-05 21:33:52.028[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m431[0m - [34m[1mSending analysis prompt to LLM: 
        Analyze the following data for the metric '4. Calculate the Loss Given Default for borrowers with fpd30 status.':
        "I don't know."

        Provide insights on trends, risks, and potential problem areas.
        [0m
[32m2025-01-05 21:33:54.200[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m433[0m - [1mAnalysis completed for metric '4. Calculate the Loss Given Default for borrowers with fpd30 status.'.[0m
[32m2025-01-05 21:33:54.200[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after analyze_metric ---
{
  "input_query": [
    "What is our current fpd30?"
  ],
  "year": null,
  "selic_rate": null,
  "sufficient_analysis": false,
  "suggested_metrics": [
    "5. Compare the Interest Rates offered to borrowers with fpd30 status to those without



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mBorrower_Profiles, Default_Rates, Loan_Products, Market_Trends, Regulatory_Data[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'Borrower_Profiles, Default_Rates, Loan_Products'}`


[0m[33;1m[1;3m
CREATE TABLE "Borrower_Profiles" (
	"CPF" TEXT, 
	"Name" TEXT, 
	"Age" INTEGER, 
	"Income" REAL, 
	"Credit_Score" INTEGER, 
	"Debt_to_Income_Ratio" REAL, 
	"Region" TEXT
)

/*
3 rows from Borrower_Profiles table:
CPF	Name	Age	Income	Credit_Score	Debt_to_Income_Ratio	Region
632.015.897-21	Lucas Gabriel Rodrigues	29	2260.910543174554	584	0.1478227645744541	Ceará
658.702.413-08	Davi Lucca da Rocha	68	3030.4776122591534	336	0.5849483486915927	Sergipe
605.142.389-33	Diogo Azevedo	29	2286.101612447934	386	0.5757545300433974	Mato Grosso
*/


CREATE TABLE "Default_Rates" (
	"Borrower_Class" TEXT, 
	"Default_Rate" REAL, 
	"Loss_Given_Default" REAL, 
	"E

[32m2025-01-05 21:34:03.377[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m415[0m - [1mData retrieved for metric '5. Compare the Interest Rates offered to borrowers with fpd30 status to those without.': I don't know.[0m


[32;1m[1;3mI don't know.[0m

[1m> Finished chain.[0m


[32m2025-01-05 21:34:04.356[0m | [34m[1mDEBUG   [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m431[0m - [34m[1mSending analysis prompt to LLM: 
        Analyze the following data for the metric '5. Compare the Interest Rates offered to borrowers with fpd30 status to those without.':
        "I don't know."

        Provide insights on trends, risks, and potential problem areas.
        [0m
[32m2025-01-05 21:34:06.284[0m | [1mINFO    [0m | [36m__main__[0m:[36manalyze_metric[0m:[36m433[0m - [1mAnalysis completed for metric '5. Compare the Interest Rates offered to borrowers with fpd30 status to those without.'.[0m
[32m2025-01-05 21:34:06.284[0m | [1mINFO    [0m | [36m__main__[0m:[36mlog_state[0m:[36m731[0m - [1m--- State after analyze_metric ---
{
  "input_query": [
    "What is our current fpd30?"
  ],
  "year": null,
  "selic_rate": null,
  "sufficient_analysis": false,
  "suggested_metrics": [],
  "selected_metric": "5. Compare the Interest Rates 

ValueError: No metric selected for analysis.

result

In [17]:
result['analysis_results']

{'1. Calculate the total revenue generated by prime customers only by summing up the income from approved loans for prime customers.': {'data': 'The total revenue generated by prime customers, with a credit score of 700 or higher, is approximately $10,822,300.',
  'analysis': "Trends:\n1. The total revenue generated by prime customers with a credit score of 700 or higher is significant at approximately $10,822,300. This indicates that prime customers are a valuable segment for generating revenue for the company.\n2. The fact that prime customers are contributing significantly to the total revenue suggests that the company has a strong customer base with good credit scores who are likely to repay their loans on time.\n\nRisks:\n1. Dependence on prime customers: Relying heavily on prime customers for revenue generation could pose a risk if the company's customer base is not diversified enough. Any changes in the behavior or financial status of prime customers could impact the company's r

In [20]:
print(result['report'])

Based on the analysis conducted on the impact of prime customers on revenue generation and approval rates, the following recommendations are provided:

1. Diversify Customer Segmentation: While prime customers contribute significantly to revenue, it is essential to diversify the customer base to mitigate risks associated with dependence on a single segment. Explore opportunities to attract and retain customers from other segments to ensure sustainable revenue growth.

2. Monitor Default Rates: Continuously monitor default rates among prime customers to identify trends and patterns that may indicate potential risks. Conduct thorough credit assessments and consider factors beyond credit score when making lending decisions to minimize default risks.

3. Optimize Interest Rates: Analyze the average interest rates offered to prime customers compared to non-prime customers to ensure that revenue generation is maximized while considering credit risk. Set interest rates strategically to attrac