In [None]:
from dotenv import load_dotenv
load_dotenv() # This line is crucial for loading your .env file

import os
import numpy as np
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
# Assuming prepare_chunks.py is a separate file that you control
from prepare_chunks import read_and_chunk_transcripts
import logging
import re # For parsing numbers from input string

# LangChain Imports
from langchain.agents import AgentExecutor, create_openai_tools_agent # Changed to create_openai_tools_agent
from langchain.tools import Tool
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage # Added SystemMessage


# Configure logging for better debugging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize OpenAI client. It's good practice to get API key from environment.
# Ensure OPENAI_API_KEY is set in your environment variables.
try:
    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
    if not client.api_key:
        raise ValueError("OPENAI_API_KEY environment variable not set.")
except ValueError as e:
    logging.error(f"Configuration Error: {e}")
    # Exit or handle gracefully if API key is missing
    exit("Exiting: OpenAI API key is missing. Please set OPENAI_API_KEY environment variable.")


# --- 1. Improve Chunking Strategy (Conceptual Change, implement in prepare_chunks.py) ---
# The way you chunk transcripts is critical. If chunks are too small, they lack context.
# If too large, they might contain irrelevant info diluting the similarity.
# Suggestions for `prepare_chunks.py`:
# A. Fixed size with overlap: e.g., 500 characters per chunk with 100 characters overlap.
#    This helps maintain context across chunk boundaries.
# B. Sentence-based chunking: Ensure chunks don't cut sentences in half.
# C. Paragraph-based chunking: If transcripts are paragraphed, use paragraphs as chunks.
# D. Recursive character text splitter (LangChain concept): Splits by paragraphs, then sentences, then words, etc.
#    This is more advanced but often yields better results.
# Make sure your `read_and_chunk_transcripts` function handles this effectively.
try:
    chunks = read_and_chunk_transcripts('transcripts/')
    if not chunks:
        logging.warning("No chunks read from 'transcripts/'. Ensure files exist and content is processed.")
        # Handle case where no chunks are loaded, e.g., exit or use dummy data
        exit("Exiting: No transcript chunks found. Please check 'transcripts/' directory and prepare_chunks.py.")
    logging.info(f"Successfully loaded {len(chunks)} chunks.")
except Exception as e:
    logging.error(f"Error reading and chunking transcripts: {e}")
    exit("Exiting: Failed to process transcripts. Check 'prepare_chunks.py' and 'transcripts/' directory.")


def embed_texts(texts, model="text-embedding-ada-002", batch_size=50):
    """
    Generates embeddings for a list of texts using OpenAI's embedding API.
    Handles batching and includes basic error handling.
    """
    if not texts:
        return []

    embeddings = []
    # Add a retry mechanism for robustness
    max_retries = 3
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        for attempt in range(max_retries):
            try:
                response = client.embeddings.create(
                    model=model,
                    input=batch
                )
                embeddings.extend([item.embedding for item in response.data])
                break # Break out of retry loop on success
            except Exception as e:
                logging.warning(f"Embedding API error on batch {i//batch_size + 1}, attempt {attempt + 1}: {e}")
                if attempt == max_retries - 1:
                    logging.error(f"Max retries reached for embedding batch {i//batch_size + 1}. Skipping batch.")
                    # Optionally, append dummy embeddings or raise the error
                # Add a small delay before retrying
                import time
                time.sleep(1 + attempt * 2) # Exponential backoff
    return embeddings

logging.info("Generating embeddings for chunks...")
chunk_embeddings = embed_texts(chunks)
if not chunk_embeddings:
    logging.error("Failed to generate embeddings for chunks. Check API key and network.")
    exit("Exiting: No chunk embeddings generated.")
logging.info(f"Generated {len(chunk_embeddings)} chunk embeddings.")


# --- Modified function to be used as a LangChain Tool ---
def retrieve_and_answer(query: str, top_k: int = 5) -> str:
    """
    Retrieves relevant chunks from the loaded knowledge base and uses an LLM
    to answer the query. This function is designed to be called as a LangChain Tool.

    Args:
        query (str): The user's question about financial literacy.
        top_k (int): The number of top similar chunks to retrieve.

    Returns:
        str: The AI's answer based on the retrieved context, or an error message.
    """
    logging.info(f"Tool 'retrieve_and_answer' received query: '{query}'")

    if not query.strip():
        return "Please provide a non-empty question for the retrieval tool."

    # Embed the query
    query_embedding = embed_texts([query])
    if not query_embedding:
        logging.error("Failed to generate embedding for the query within the tool.")
        return "Sorry, I couldn't process your question for retrieval at the moment."
    query_embedding = query_embedding[0]

    # Calculate similarities and get top_k indices
    similarities = cosine_similarity([query_embedding], chunk_embeddings)[0]
    effective_top_k = min(top_k, len(chunks))
    top_indices = np.argsort(similarities)[-effective_top_k:][::-1]

    context_parts = []
    for i in top_indices:
        context_parts.append(f"<DOCUMENT_START id={i}>\n{chunks[i]}\n<DOCUMENT_END>")
    context = "\n\n".join(context_parts)

    logging.info(f"\n🔎 Retrieved top {effective_top_k} chunks (indices: {[int(i) for i in top_indices]}) for tool:\n{context[:500]}...\n") # Log truncated context

    # Prompt for GPT, specifically tailored for the tool's interaction with the LLM
    # Note: The main agent's prompt will guide the overall conversation.
    tool_llm_prompt = f"""You are a financial literacy expert. Your goal is to answer questions using ONLY the provided context.
If the answer is not directly available or cannot be reasonably inferred from the context, state that you cannot answer based on the provided information.

<RETRIEVED_DOCUMENTS>
{context}
</RETRIEVED_DOCUMENTS>

Question: {query}

Answer:
"""
    logging.info("Calling LLM from within 'retrieve_and_answer' tool...")
    try:
        # Use the same client instance to avoid re-initializing
        response = client.chat.completions.create(
            model="gpt-4o", # Using gpt-4o as specified in original answer_query
            messages=[{"role": "user", "content": tool_llm_prompt}],
            temperature=0.5, # Consistent with previous setting
            max_tokens=700, # Consistent with previous setting
        )
        answer = response.choices[0].message.content.strip()
        logging.info("LLM response received by tool.")
        # If the LLM produces the "I don't know" phrase, standardize it for the agent
        if "i apologize, but i don't have enough information" in answer.lower() or \
           "cannot answer based on the provided information" in answer.lower():
            return "No sufficient information found in the knowledge base to answer that."
        return answer
    except Exception as e:
        logging.error(f"Error calling LLM API from tool: {e}")
        return "Internal tool error: Could not generate an answer."

# --- New and Modified Tool Functions with Calculation Capabilities ---

def calculate_debt_details(input_str: str) -> str:
    """
    Calculates estimated time to pay off a debt and total interest paid based on
    principal, annual interest rate, and monthly payment.
    Input format expected: "principal=<amount>, interest_rate=<percentage>, monthly_payment=<amount>"
    """
    logging.info(f"Tool 'calculate_debt_details' called with input: {input_str}")
    principal = None
    annual_interest_rate = None
    monthly_payment = None

    # Attempt to parse inputs
    principal_match = re.search(r"principal[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)
    rate_match = re.search(r"interest_rate[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)
    payment_match = re.search(r"monthly_payment[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)

    if principal_match:
        try: principal = float(principal_match.group(1).replace(',', ''))
        except ValueError: pass
    if rate_match:
        try: annual_interest_rate = float(rate_match.group(1).replace(',', ''))
        except ValueError: pass
    if payment_match:
        try: monthly_payment = float(payment_match.group(1).replace(',', ''))
        except ValueError: pass

    if None in [principal, annual_interest_rate, monthly_payment]:
        return (
            "To calculate debt details, I need the loan principal, the annual interest rate (as a percentage), "
            "and your monthly payment. "
            "Please provide them, for example: 'principal=10000, interest_rate=5, monthly_payment=200'."
        )

    if principal <= 0 or annual_interest_rate < 0 or monthly_payment <= 0:
        return "All input values (principal, interest rate, monthly payment) must be positive."

    # Convert annual rate to monthly decimal rate
    monthly_interest_rate = (annual_interest_rate / 100) / 12

    if monthly_payment <= (principal * monthly_interest_rate) and annual_interest_rate > 0:
        return "Your monthly payment is too low to ever pay off the principal, or just covers interest. You might need to increase your payment to see progress."

    # --- Amortization Calculation ---
    remaining_def recommend_savings(input_str: str) -> str:
    """
    Provides savings recommendations. If monthly income and spending are provided,
    it calculates a recommended 20% savings. Otherwise, it gives general guidelines
    and prompts for input.
    Input format expected: "income=<amount>, spending=<amount>" or just a general query.
    """
    logging.info(f"Tool 'recommend_savings' called with input: {input_str}")
    income = None
    spending = None

    # Attempt to parse income and spending from the input string
    income_match = re.search(r"income[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)
    spending_match = re.search(r"spending[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)

    if income_match:
        try:
            income = float(income_match.group(1).replace(',', ''))
        except ValueError:
            pass # Keep income as None if parsing fails
    if spending_match:
        try:
            spending = float(spending_match.group(1).replace(',', ''))
        except ValueError:
            pass # Keep spending as None if parsing fails

    if income is not None and spending is not None:
        if income < 0 or spending < 0:
            return "Income and spending must be non-negative values."
        if spending > income:
            return "Your spending seems to exceed your income. While saving is important, focusing on reducing spending or increasing income might be your first step."

        # Calculate based on 50/30/20 rule (20% for savings and debt repayment)
        recommended_savings = 0.20 * income
        remaining_after_savings = income - recommended_savings

        # Basic check for funds for needs/wants if 20% is saved
        needs_wants_budget = 0.80 * income

        return (
            f"Based on your monthly income of ${income:,.2f} and spending of ${spending:,.2f}:\n"
            f"Following the 50/30/20 rule, a recommended monthly savings amount (including debt repayment) is ${recommended_savings:,.2f} (20% of income).\n"
            f"This would leave ${needs_wants_budget:,.2f} for your needs and wants.\n"
            "Remember, consistency is key, and even small amounts add up over time."
        )
    else:
        return (
            "To give you a personalized savings recommendation, I need your monthly income and average monthly spending. "
            "A common guideline is the 50/30/20 rule: 50% for needs, 30% for wants, and 20% for savings and debt repayment. "
            "Please provide your monthly income and spending, for example: 'my income is 3000 and spending is 2000'."
        )

def get_budgeting_templates(input_str: str = "") -> str:
    """
    Describes common budgeting templates and methods.
    """
    logging.info(f"Tool 'get_budgeting_templates' called with input: {input_str}")
    return (
        "Budgeting templates can help you organize your finances. Common methods include:\n"
        "- **Spreadsheets:** Excel or Google Sheets offer great flexibility for custom budgets. "
        "You can find many free templates online.\n"
        "- **Budgeting Apps:** Apps like Mint, YNAB (You Need A Budget), or Personal Capital offer "
        "features like transaction tracking, goal setting, and visual reports.\n"
        "- **Pen and Paper:** A simple notebook can also work for tracking income and expenses.\n"
        "The key is to choose a method that you find easy to use and stick with."
    )

def get_expense_tracker_info(input_str: str = "") -> str:
    """
    Explains what an expense tracker is and its benefits.
    """
    logging.info(f"Tool 'get_expense_tracker_info' called with input: {input_str}")
    return (
        "An expense tracker helps you monitor where your money goes. Its benefits include:\n"
        "- **Understanding Spending Habits:** Reveals where you might be overspending.\n"
        "- **Budget Adherence:** Helps you stick to your budget and identify areas for adjustment.\n"
        "- **Financial Goal Achievement:** By seeing your spending, you can find more money for savings or debt.\n"
        "- **Tax Preparation:** Makes it easier to categorize expenses for tax purposes.\n"
        "Methods range from manual logging to using sophisticated apps."
    )

principal = principal
    total_interest_paid = 0
    months = 0
    max_months = 600 # Cap to prevent infinite loops for very low payments (50 years)

    while remaining_principal > 0 and months < max_months:
        interest_for_month = remaining_principal * monthly_interest_rate
        principal_paid_this_month = monthly_payment - interest_for_month

        if principal_paid_this_month <= 0 and remaining_principal > 0:
            # Payment is not reducing principal, or only barely
            return "With these inputs, it seems your monthly payment is not sufficient to pay off the principal within a reasonable timeframe (e.g., it only covers interest). You may need to increase your payment."

        remaining_principal -= principal_paid_this_month
        total_interest_paid += interest_for_month
        months += 1

        # Adjust last payment if remaining principal is very small negative due to floating point
        if remaining_principal < 0.01:
            # This handles cases where the last payment slightly overpays
            principal_paid_this_month += remaining_principal # Subtract the negative to add back
            total_interest_paid -= remaining_principal # Adjust total interest too
            remaining_principal = 0


    if remaining_principal > 0: # If loop exited due to max_months
        return (
            f"It would take more than {max_months} months (50 years) to pay off a principal of ${principal:,.2f} "
            f"with an annual interest rate of {annual_interest_rate}% and a monthly payment of ${monthly_payment:,.2f}. "
            "You might consider increasing your monthly payment."
        )
    else:
        years = months / 12
        return (
            f"To pay off a principal of ${principal:,.2f} with an annual interest rate of {annual_interest_rate}% "
            f"and a monthly payment of ${monthly_payment:,.2f}:\n"
            f"- Estimated time to pay off: {months} months ({years:.1f} years)\n"
            f"- Estimated total interest paid: ${total_interest_paid:,.2f}"
        )


def get_investment_planning_advice(input_str: str = "") -> str:
    """
    Provides general advice on investment planning.
    """
    logging.info(f"Tool 'get_investment_planning_advice' called with input: {input_str}")
    return (
        "Investment planning involves setting financial goals and creating a strategy to achieve them through investments. Key aspects include:\n"
        "- **Define Your Goals:** What are you saving for? (e.g., retirement, down payment, education)\n"
        "- **Assess Risk Tolerance:** How comfortable are you with market fluctuations? This influences your asset allocation.\n"
        "- **Diversification:** Spreading investments across different asset classes (stocks, bonds, real estate) to reduce risk.\n"
        "- **Long-term vs. Short-term:** Tailor investments based on your timeline.\n"
        "- **Regular Contributions:** Consistency is often key to compounding returns.\n"
        "It's often recommended to consult a financial advisor for personalized investment planning."
    )


# --- Define LangChain Tools ---
tools = [
    Tool(
        name="FinancialLiteracyRetriever",
        func=retrieve_and_answer,
        description="Useful for answering specific financial literacy questions by retrieving information from a knowledge base about topics like 401k, IRA, credit scores, mortgages, etc. Input should be a concise financial literacy question.",
    ),
    Tool(
        name="SavingsAdvisor",
        func=recommend_savings,
        description="Calculates a recommended savings amount based on provided monthly income and spending (e.g., 'income=3000, spending=2000'). If numbers are not provided, it gives general savings guidelines and prompts for input. Use this when the user asks about how much they should save or for personalized savings recommendations.",
    ),
    Tool(
        name="BudgetingTemplateInfo",
        func=get_budgeting_templates,
        description="Provides information about different types of budgeting templates and methods. Use this when the user asks about budgeting templates, how to start a budget, or tools for budgeting.",
    ),
    Tool(
        name="ExpenseTrackerInfo",
        func=get_expense_tracker_info,
        description="Explains what an expense tracker is and its benefits. Use this when the user asks about tracking expenses or managing spending.",
    ),
    Tool(
        name="DebtCalculator", # Renamed for clarity
        func=calculate_debt_details, # Points to the new calculation function
        description="Calculates the estimated time to pay off a debt and total interest paid. Requires specific inputs: 'principal=<amount>, interest_rate=<percentage>, monthly_payment=<amount>'. Use this when the user asks to calculate debt, loan payoff time, or total interest.",
    ),
    Tool(
        name="InvestmentPlanningAdvisor",
        func=get_investment_planning_advice,
        description="Offers general advice and principles for investment planning. Use this when the user asks about how to plan investments, investment strategies, or getting started with investing.",
    ),
]

# --- Setup LangChain Agent ---
logging.info("Setting up LangChain Agent...")

# Initialize the LLM for the agent itself
# Use gpt-4o as the orchestrator for the agent's reasoning
llm = ChatOpenAI(model="gpt-4o", temperature=0.7, openai_api_key=os.getenv("OPENAI_API_KEY"))

# Add memory for conversational context
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Define the agent's prompt for create_openai_tools_agent
prompt = ChatPromptTemplate.from_messages([
    SystemMessage(content="You are a friendly and helpful financial literacy chatbot. Your goal is to assist users with their questions about personal finance, investing, debt management, budgeting, and savings. You have several specialized tools to help you find information and provide advice. When a calculation is requested, ensure you ask for all necessary numerical inputs clearly, specifying the format (e.g., 'monthly income is 3000, spending is 2000' for savings, or 'principal=10000, interest_rate=5, monthly_payment=200' for debt calculation)."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"), # This is where the agent's internal thought process will be injected
])


# Create the OpenAI Tools agent
# This agent type is generally more robust with tool usage and structured outputs.
agent = create_openai_tools_agent(llm, tools, prompt)

# Create the Agent Executor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=memory, handle_parsing_errors=True)

logging.info("LangChain Agent setup complete.")

if __name__ == "__main__":
    print("Welcome to Financial Literacy Chatbot (LangChain Agent Demo). Type 'exit' to quit.")
    if not chunks or not chunk_embeddings:
        print("Chatbot cannot start due to missing data. Please check logs for errors.")
    else:
        while True:
            query = input("\nAsk your question: ")
            if query.lower() == "exit":
                break
            try:
                # The agent_executor handles the entire conversation
                response = agent_executor.invoke({"input": query})
                print(f"\n💬 Chatbot:\n{response['output']}")
            except Exception as e:
                
                logging.error(f"Error during agent execution: {e}")
                print("I apologize, I encountered an error trying to answer your question. Please try again.")


2025-06-26 22:42:23,691 - INFO - Successfully loaded 1827 chunks.
2025-06-26 22:42:23,693 - INFO - Generating embeddings for chunks...


✅ Loaded 1827 chunks from transcripts.


2025-06-26 22:42:24,115 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:25,131 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:25,817 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:26,486 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:27,264 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:27,637 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:28,919 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:29,559 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:30,241 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-06-26 22:42:30,610 - INFO - HTTP

Welcome to Financial Literacy Chatbot (LangChain Agent Demo). Type 'exit' to quit.


[1m> Entering new AgentExecutor chain...[0m


2025-06-26 22:45:01,646 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-06-26 22:45:02,014 - INFO - Tool 'calculate_debt_details' called with input: principal=12000, interest_rate=8, monthly_payment=250


[32;1m[1;3m
Invoking: `DebtCalculator` with `principal=12000, interest_rate=8, monthly_payment=250`


[0m[33;1m[1;3mTo pay off a principal of $12,000.00 with an annual interest rate of 8.0% and a monthly payment of $250.00:
- Estimated time to pay off: 59 months (4.9 years)
- Estimated total interest paid: $2,750.00[0m

2025-06-26 22:45:02,558 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3mTo pay off your $12,000 loan at an 8% interest rate with monthly payments of $250, it will take approximately 59 months (or about 4.9 years). Over this period, you will pay an estimated total interest of $2,750.[0m

[1m> Finished chain.[0m

💬 Chatbot:
To pay off your $12,000 loan at an 8% interest rate with monthly payments of $250, it will take approximately 59 months (or about 4.9 years). Over this period, you will pay an estimated total interest of $2,750.
