In [2]:
from dotenv import load_dotenv
load_dotenv() # This line is crucial for loading your .env file

import os
import logging
import re
from typing import List

# LangChain Imports
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain.tools import Tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferMemory
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.documents import Document # To work with LangChain's Document objects

# Pinecone Imports
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore

# --- Configure logging ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# --- Initialize OpenAI client (for LLM and Embeddings) ---
# It's good practice to ensure the API key is available
try:
    import openai
    openai.api_key = os.getenv("OPENAI_API_KEY")
    if not openai.api_key:
        raise ValueError("OPENAI_API_KEY environment variable not set.")
except ValueError as e:
    logging.error(f"Configuration Error: {e}")
    exit("Exiting: OpenAI API key is missing. Please set OPENAI_API_KEY environment variable.")
except Exception as e:
    logging.error(f"Error initializing OpenAI API key: {e}")
    exit("Exiting: Failed to set OpenAI API key.")

# --- Initialize LangChain's OpenAIEmbeddings for Pinecone retrieval ---
try:
    embeddings_model = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY"))
    logging.info("Initialized OpenAIEmbeddings model for retrieval.")
except Exception as e:
    logging.error(f"Error initializing OpenAIEmbeddings for retrieval: {e}")
    exit("Exiting: Failed to initialize OpenAIEmbeddings. Check API key.")

# --- Pinecone Configuration for Retrieval ---
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_ENVIRONMENT = os.getenv("PINECONE_ENVIRONMENT")
INDEX_NAME = "financial-literacy-chatbot" # Must match the index name used in pinecone_data_loader.py

if not PINECONE_API_KEY or not PINECONE_ENVIRONMENT:
    logging.error("Pinecone API key or environment not set. Please add PINECONE_API_KEY and PINECONE_ENVIRONMENT to your .env file.")
    exit("Exiting: Pinecone credentials missing for chatbot app.")

# --- Connect to Pinecone Vector Store ---
vectorstore = None
retriever = None
try:
    pc = Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENVIRONMENT)
    logging.info("Connected to Pinecone client.")

    # Only connect to existing index, do not create or upload here
    if INDEX_NAME not in pc.list_indexes():
        logging.error(f"Pinecone index '{INDEX_NAME}' does not exist. Please run 'pinecone_data_loader.py' first to create and populate it.")
        exit("Exiting: Pinecone index not found.")
    
    vectorstore = PineconeVectorStore(index_name=INDEX_NAME, embedding=embeddings_model)
    retriever = vectorstore.as_retriever()
    logging.info("Pinecone vector store and retriever initialized for existing index.")

except Exception as e:
    logging.error(f"Error connecting to Pinecone index for chatbot: {e}")
    exit("Exiting: Failed to connect to Pinecone. Ensure index exists and credentials are correct.")

# --- Knowledge Base Query Tool ---
def query_knowledge_base(query: str, top_k: int = 5) -> str:
    """
    Retrieves relevant documents from the Pinecone knowledge base and uses an LLM
    to answer the query. This function is designed to be called as a LangChain Tool.

    Args:
        query (str): The user's question about financial literacy.
        top_k (int): The number of top relevant documents to retrieve.

    Returns:
        str: The AI's answer based on the retrieved context, or a standardized error message.
    """
    logging.info(f"Tool 'query_knowledge_base' received query: '{query}'")

    if not query.strip():
        return "Please provide a non-empty question for the knowledge base tool."

    if retriever is None:
        logging.error("Retriever is not initialized. Cannot query knowledge base.")
        return "The knowledge base is not available right now. Please try again later."

    try:
        retrieved_docs: List[Document] = retriever.get_relevant_documents(query)

        if not retrieved_docs:
            return "No sufficient information found in the knowledge base for that query."

        context_parts = []
        for i, doc in enumerate(retrieved_docs):
            context_parts.append(f"<DOCUMENT_START id={i}>\n{doc.page_content}\n<DOCUMENT_END>")
        context = "\n\n".join(context_parts)

        logging.info(f"\n🔎 Retrieved top {len(retrieved_docs)} documents from Pinecone for tool:\n{context[:500]}...\n")

        llm_for_tool = ChatOpenAI(model="gpt-4o", temperature=0.5, openai_api_key=os.getenv("OPENAI_API_KEY"))
        response = llm_for_tool.invoke(
            tool_llm_prompt = f"""You are a financial literacy expert. Your goal is to answer questions using ONLY the information provided in the following retrieved documents.
If the answer is not directly available or cannot be reasonably inferred from the context, state that you cannot answer based on the provided information.

<RETRIEVED_DOCUMENTS>
{context}
</RETRIEVED_DOCUMENTS>

Question: {query}

Answer:
"""
        )
        answer = response.content.strip() # Get content from AIMessage
        logging.info("LLM response received by tool.")
        
        if "i apologize, but i don't have enough information" in answer.lower() or \
           "cannot answer based on the provided information" in answer.lower() or \
           "no sufficient information found" in answer.lower():
            return "No sufficient information found in the knowledge base to answer that."
        return answer
    except Exception as e:
        logging.error(f"Error calling LLM API or retrieving from Pinecone within tool: {e}")
        return "Internal tool error: Could not generate an answer."

# --- Calculation Tools (remain largely unchanged) ---
def recommend_savings(input_str: str) -> str:
    """
    Provides savings recommendations. If monthly income and spending are provided,
    it calculates a recommended 20% savings. Otherwise, it gives general guidelines
    and prompts for input.
    Input format expected: "income=<amount>, spending=<amount>" or just a general query.
    """
    logging.info(f"Tool 'recommend_savings' called with input: {input_str}")
    income = None
    spending = None

    income_match = re.search(r"income[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)
    spending_match = re.search(r"spending[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)

    if income_match:
        try: income = float(income_match.group(1).replace(',', ''))
        except ValueError: pass
    if spending_match:
        try: spending = float(spending_match.group(1).replace(',', ''))
        except ValueError: pass

    if income is not None and spending is not None:
        if income < 0 or spending < 0: return "Income and spending must be non-negative values."
        if spending > income: return "Your spending seems to exceed your income. While saving is important, focusing on reducing spending or increasing income might be your first step."

        recommended_savings = 0.20 * income
        needs_wants_budget = 0.80 * income

        return (
            f"Based on your monthly income of ${income:,.2f} and spending of ${spending:,.2f}:\n"
            f"Following the 50/30/20 rule, a recommended monthly savings amount (including debt repayment) is ${recommended_savings:,.2f} (20% of income).\n"
            f"This would leave ${needs_wants_budget:,.2f} for your needs and wants.\n"
            "Remember, consistency is key, and even small amounts add up over time."
        )
    else:
        return (
            "To give you a personalized savings recommendation, I need your monthly income and average monthly spending. "
            "A common guideline is the 50/30/20 rule: 50% for needs, 30% for wants, and 20% for savings and debt repayment. "
            "Please provide your monthly income and spending, for example: 'my income is 3000 and spending is 2000'."
        )

def get_budgeting_templates(input_str: str = "") -> str:
    """Describes common budgeting templates and methods."""
    logging.info(f"Tool 'get_budgeting_templates' called with input: {input_str}")
    return (
        "Budgeting templates can help you organize your finances. Common methods include:\n"
        "- **Spreadsheets:** Excel or Google Sheets offer great flexibility for custom budgets. "
        "You can find many free templates online.\n"
        "- **Budgeting Apps:** Apps like Mint, YNAB (You Need A Budget), or Personal Capital offer "
        "features like transaction tracking, goal setting, and visual reports.\n"
        "- **Pen and Paper:** A simple notebook can also work for tracking income and expenses.\n"
        "The key is to choose a method that you find easy to use and stick with."
    )

def get_expense_tracker_info(input_str: str = "") -> str:
    """Explains what an expense tracker is and its benefits."""
    logging.info(f"Tool 'get_expense_tracker_info' called with input: {input_str}")
    return (
        "An expense tracker helps you monitor where your money goes. Its benefits include:\n"
        "- **Understanding Spending Habits:** Reveals where you might be overspending.\n"
        "- **Budget Adherence:** Helps you stick to your budget and identify areas for adjustment.\n"
        "- **Financial Goal Achievement:** By seeing your spending, you can find more money for savings or debt.\n"
        "- **Tax Preparation:** Makes it easier to categorize expenses for tax purposes.\n"
        "Methods range from manual logging to using sophisticated apps."
    )

def calculate_debt_details(input_str: str) -> str:
    """
    Calculates estimated time to pay off a debt and total interest paid based on
    principal, annual interest rate, and monthly payment.
    Input format expected: "principal=<amount>, interest_rate=<percentage>, monthly_payment=<amount>"
    """
    logging.info(f"Tool 'calculate_debt_details' called with input: {input_str}")
    principal = None
    annual_interest_rate = None
    monthly_payment = None

    principal_match = re.search(r"principal[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)
    rate_match = re.search(r"interest_rate[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)
    payment_match = re.search(r"monthly_payment[=\s]*(\d[\d,\.]*)", input_str, re.IGNORECASE)

    if principal_match:
        try: principal = float(principal_match.group(1).replace(',', ''))
        except ValueError: pass
    if rate_match:
        try: annual_interest_rate = float(rate_match.group(1).replace(',', ''))
        except ValueError: pass
    if payment_match:
        try: monthly_payment = float(payment_match.group(1).replace(',', ''))
        except ValueError: pass

    if None in [principal, annual_interest_rate, monthly_payment]:
        return (
            "To calculate debt details, I need the loan principal, the annual interest rate (as a percentage), "
            "and your monthly payment. "
            "Please provide them, for example: 'principal=10000, interest_rate=5, monthly_payment=200'."
        )

    if principal <= 0 or annual_interest_rate < 0 or monthly_payment <= 0:
        return "All input values (principal, interest rate, monthly payment) must be positive."

    monthly_interest_rate = (annual_interest_rate / 100) / 12

    if monthly_payment <= (principal * monthly_interest_rate) and annual_interest_rate > 0:
        return "Your monthly payment is too low to ever pay off the principal, or just covers interest. You might need to increase your payment to see progress."

    remaining_principal = principal
    total_interest_paid = 0
    months = 0
    max_months = 600

    while remaining_principal > 0 and months < max_months:
        interest_for_month = remaining_principal * monthly_interest_rate
        principal_paid_this_month = monthly_payment - interest_for_month

        if principal_paid_this_month <= 0 and remaining_principal > 0:
            return "With these inputs, it seems your monthly payment is not sufficient to pay off the principal within a reasonable timeframe (e.g., it only covers interest). You may need to increase your payment."

        remaining_principal -= principal_paid_this_this_month
        total_interest_paid += interest_for_month
        months += 1

        if remaining_principal < 0.01:
            principal_paid_this_month += remaining_principal
            total_interest_paid -= remaining_principal
            remaining_principal = 0

    if remaining_principal > 0:
        return (
            f"It would take more than {max_months} months (50 years) to pay off a principal of ${principal:,.2f} "
            f"with an annual interest rate of {annual_interest_rate}% and a monthly payment of ${monthly_payment:,.2f}. "
            "You might consider increasing your monthly payment."
        )
    else:
        years = months / 12
        return (
            f"To pay off a principal of ${principal:,.2f} with an annual interest rate of {annual_interest_rate}% "
            f"and a monthly payment of ${monthly_payment:,.2f}:\n"
            f"- Estimated time to pay off: {months} months ({years:.1f} years)\n"
            f"- Estimated total interest paid: ${total_interest_paid:,.2f}"
        )

def get_investment_planning_advice(input_str: str = "") -> str:
    """Provides general advice on investment planning."""
    logging.info(f"Tool 'get_investment_planning_advice' called with input: {input_str}")
    return (
        "Investment planning involves setting financial goals and creating a strategy to achieve them through investments. Key aspects include:\n"
        "- **Define Your Goals:** What are you saving for? (e.g., retirement, down payment, education)\n"
        "- **Assess Risk Tolerance:** How comfortable are you with market fluctuations? This influences your asset allocation.\n"
        "- **Diversification:** Spreading investments across different asset classes (stocks, bonds, real estate) to reduce risk.\n"
        "- **Long-term vs. Short-term:** Tailor investments based on your timeline.\n"
        "- **Regular Contributions:** Consistency is often key to compounding returns.\n"
        "It's often recommended to consult a financial advisor for personalized investment planning."
    )


# --- Define LangChain Tools ---
tools = [
    Tool(
        name="FinancialLiteracyRetriever",
        func=query_knowledge_base, # Pointing to the new Pinecone-integrated function
        description="Useful for answering specific financial literacy questions by retrieving information from a comprehensive knowledge base about topics like 401k, IRA, credit scores, mortgages, etc. Input should be a concise financial literacy question.",
    ),
    Tool(
        name="SavingsAdvisor",
        func=recommend_savings,
        description="Calculates a recommended savings amount based on provided monthly income and spending (e.g., 'income=3000, spending=2000'). If numbers are not provided, it gives general savings guidelines and prompts for input. Use this when the user asks about how much they should save or for personalized savings recommendations.",
    ),
    Tool(
        name="BudgetingTemplateInfo",
        func=get_budgeting_templates,
        description="Provides information about different types of budgeting templates and methods. Use this when the user asks about budgeting templates, how to start a budget, or tools for budgeting.",
    ),
    Tool(
        name="ExpenseTrackerInfo",
        func=get_expense_tracker_info,
        description="Explains what an expense tracker is and its benefits. Use this when the user asks about tracking expenses or managing spending.",
    ),
    Tool(
        name="DebtCalculator",
        func=calculate_debt_details,
        description="Calculates the estimated time to pay off a debt and total interest paid. Requires specific inputs: 'principal=<amount>, interest_rate=<percentage>, monthly_payment=<amount>'. Use this when the user asks to calculate debt, loan payoff time, or total interest.",
    ),
    Tool(
        name="InvestmentPlanningAdvisor",
        func=get_investment_planning_advice,
        description="Offers general advice and principles for investment planning. Use this when the user asks about how to plan investments, investment strategies, or getting started with investing.",
    ),
]

# --- Setup LangChain Agent ---
logging.info("Setting up LangChain Agent...")

llm = ChatOpenAI(model="gpt-4o", temperature=0.7, openai_api_key=os.getenv("OPENAI_API_KEY"))
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

# Define the agent's prompt for create_openai_tools_agent
prompt = ChatPromptTemplate.from_messages([
    SystemMessage(content="You are a friendly and helpful financial literacy chatbot. Your goal is to assist users with their questions about personal finance, investing, debt management, budgeting, and savings. You have several specialized tools to help you find information and provide advice. When a calculation is requested, ensure you ask for all necessary numerical inputs clearly, specifying the format (e.g., 'monthly income is 3000, spending is 2000' for savings, or 'principal=10000, interest_rate=5, monthly_payment=200' for debt calculation)."),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder(variable_name="agent_scratchpad"),
])

# Create the OpenAI Tools agent
agent = create_openai_tools_agent(llm, tools, prompt)

# Create the Agent Executor
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, memory=memory, handle_parsing_errors=True)

logging.info("LangChain Agent setup complete.")

if __name__ == "__main__":
    print("Welcome to Financial Literacy Chatbot (LangChain Agent Demo). Type 'exit' to quit.")
    # Check if vectorstore is initialized before starting interaction
    if vectorstore is None:
        print("Chatbot cannot start: Pinecone vector store was not initialized. Check logs for errors.")
    else:
        while True:
            query = input("\nAsk your question: ")
            if query.lower() == "exit":
                break
            try:
                response = agent_executor.invoke({"input": query})
                print(f"\n💬 Chatbot:\n{response['output']}")
            except Exception as e:
                logging.error(f"Error during agent execution: {e}")
                print("I apologize, I encountered an error trying to answer your question. Please try again.")


2025-06-29 03:55:02,362 - INFO - Initialized OpenAIEmbeddings model for retrieval.
2025-06-29 03:55:02,551 - INFO - Connected to Pinecone client.
2025-06-29 03:55:03,718 - ERROR - Pinecone index 'financial-literacy-chatbot' does not exist. Please run 'pinecone_data_loader.py' first to create and populate it.
2025-06-29 03:55:03,748 - INFO - Pinecone vector store and retriever initialized for existing index.
2025-06-29 03:55:03,761 - INFO - Setting up LangChain Agent...
2025-06-29 03:55:04,068 - INFO - LangChain Agent setup complete.


Welcome to Financial Literacy Chatbot (LangChain Agent Demo). Type 'exit' to quit.


[1m> Entering new AgentExecutor chain...[0m


2025-06-29 03:55:20,054 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[32;1m[1;3mSavings and investments are both essential components of financial planning, but they serve different purposes and have distinct characteristics:

### Savings:
1. **Purpose**: Savings are typically set aside for short-term needs or emergencies. They are meant to be easily accessible when required.
2. **Liquidity**: Savings are usually kept in liquid forms such as cash or savings accounts, making them readily available.
3. **Risk**: Savings generally involve low risk since the money is stored in secure places like banks.
4. **Return**: The return on savings is usually lower, often limited to the interest earned in savings accounts.
5. **Time Horizon**: Savings are intended for short to medium-term needs, such as buying a car, vacation, or emergency fund.

### Investments:
1. **Purpose**: Investments are intended for long-term growth or income. They aim to grow wealth over time.
2. **Liquidity**: Investments can be less liquid, as they are often held in stocks, bonds, real e