# LangChain Explore

In [None]:
import os
import pandas as pd
from fredapi import Fred # For FRED API
from dotenv import load_dotenv # For loading .env file (optional, for local dev)

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import create_tool_calling_agent, AgentExecutor
# from langchain_core.tools import tool # Updated way to define tools
from langchain_core.messages import HumanMessage, AIMessage


from pydantic import BaseModel, Field
from typing import List, Optional

from langchain_core.callbacks import (
    AsyncCallbackManagerForToolRun,
    CallbackManagerForToolRun,
)
from langchain_core.tools import BaseTool
from langchain_core.tools.base import ArgsSchema


import os
from typing import List, Optional, Type, Any 
import pandas as pd
from fredapi import Fred 
from dotenv import load_dotenv 
# import numpy as np # No longer needed for std, mean here

from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tools import BaseTool
from pydantic import BaseModel, Field, field_validator, model_validator 
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.messages import HumanMessage, AIMessage
from datetime import datetime


In [None]:
# .env read
load_dotenv()

fred_api_key = os.getenv("FRED_API_KEY")
if not fred_api_key:
    raise ValueError("FRED_API_KEY not found in environment variables. Please set it.")

# Initialize FRED client
fred = Fred(api_key=fred_api_key)

In [None]:
# fred.stlouisfed.org, data.nasdaq.com/data/FRED-federal-reserve-economic-data

FRED_SERIES_IDS = {
    "Real GDP": "GDPC1",  # Quarterly, Billions of Chained 2017 Dollars
    "Nominal GDP": "GDP", # Quarterly, Billions of Dollars
    "CPI": "CPIAUCSL",  # Monthly, Consumer Price Index for All Urban Consumers: All Items
    "Core CPI": "CPILFESL", # Monthly, Consumer Price Index for All Urban Consumers: Less Food & Energy
    "Unemployment Rate": "UNRATE",  # Monthly, Percent
    "Federal Funds Rate": "DFF",  # Daily (we'll take the latest) -> often reported as FEDFUNDS (monthly avg) for summaries
    "10-Year Treasury": "DGS10", # Daily, 10-Year Treasury Constant Maturity Rate
    "Industrial Production": "INDPRO", # Monthly, Index
    "Personal Saving Rate": "PSAVERT", # Monthly, Percent
    "Retail Sales": "RSAFS", # Monthly, Millions of Dollars
}

In [27]:
DEFAULT_SERIES_TO_FETCH = ["Real GDP", "CPI", "Unemployment Rate", "Federal Funds Rate"]

# --- Input Schema for FRED Tool ---
class MacroDataInput(BaseModel):
    series_name_list: Optional[List[str]] = Field(
        default=None,
        description=f"Optional list of specific U.S. macroeconomic series names to fetch. Valid options include: {', '.join(FRED_SERIES_IDS.keys())}."
    )
    start_date: Optional[str] = Field(
        default=None, description="Optional start date (YYYY-MM-DD)."
    )
    end_date: Optional[str] = Field(
        default=None, description="Optional end date (YYYY-MM-DD)."
    )
    geo_location: Optional[str] = Field(
        default="USA", 
        description="Optional geographical location (e.g., 'USA', 'Texas', 'California'). Note: Most FRED series are national (USA). Specific regional series must be requested by their exact name if available (e.g., 'Texas Unemployment Rate'). This parameter primarily serves as context."
    )
    frequency: Optional[str] = Field(
        default=None,
        description="Optional desired data frequency. Examples: 'D' (Daily), 'W' (Weekly), 'M' (Monthly), 'Q' (Quarterly), 'A' (Annual). The tool will report the series' native frequency."
    )

    @field_validator('start_date', 'end_date', mode='before')
    @classmethod
    def validate_date_format(cls, value: Optional[str]) -> Optional[str]:
        if value is None: return None
        try: datetime.strptime(value, '%Y-%m-%d'); return value
        except ValueError: raise ValueError(f"Date '{value}' must be in YYYY-MM-DD format")

    @model_validator(mode='after')
    def validate_date_logic(self) -> 'MacroDataInput':
        if self.start_date and self.end_date:
            try:
                if datetime.strptime(self.end_date, '%Y-%m-%d') < datetime.strptime(self.start_date, '%Y-%m-%d'):
                    raise ValueError("end_date cannot be before start_date")
            except ValueError as e: raise ValueError(str(e))
        return self

# --- FRED Tool Definition ---
class GetFredMacroeconomicDataTool(BaseTool):
    name: str = "get_fred_macroeconomic_data"
    description: str = (
        "Fetches U.S. macroeconomic data from FRED. Can fetch the latest data, or data over a specified date range (YYYY-MM-DD). "
        "You can also specify a 'geo_location' (e.g., 'USA', 'Texas'; most data is national) and a desired 'frequency' (e.g., 'M' for monthly; tool will report native frequency). "
        "Use for current economic conditions, historical trends, specific U.S. indicators (GDP, CPI, unemployment, etc.). Returns data points, not statistical summaries."
    )
    args_schema: Type[BaseModel] = MacroDataInput
    return_direct: bool = False 

    def _run(
        self,
        series_name_list: Optional[List[str]] = None,
        start_date: Optional[str] = None,
        end_date: Optional[str] = None,
        geo_location: Optional[str] = "USA", 
        frequency: Optional[str] = None      
    ) -> str:
        if fred is None: return "Error: FRED API client not initialized."
        
        current_time_info = f"FRED data fetched on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S %Z')}"
        geo_info = f"Geo-location context: {geo_location if geo_location else 'Not specified (defaulting to USA)'}."
        freq_request_info = f"Requested frequency: {frequency if frequency else 'Not specified'}."
        
        data_summary_parts = [current_time_info, geo_info, freq_request_info]
        
        names_to_fetch = series_name_list
        if not names_to_fetch:
            if start_date or end_date or frequency: 
                 return f"{current_time_info}\n{geo_info}\n{freq_request_info}\nError: Dates, frequency, or geo-location were specified for FRED, but no series names were provided. Please specify which macroeconomic series you are interested in."
            names_to_fetch = DEFAULT_SERIES_TO_FETCH
            data_summary_parts.append(f"No specific FRED series requested. Returning latest for default set: {', '.join(names_to_fetch)}")

        for series_name in names_to_fetch:
            effective_series_name = series_name
            if geo_location and geo_location.lower() != "usa" and geo_location.lower() not in series_name.lower():
                potential_geo_series_id = FRED_SERIES_IDS.get(f"{geo_location} {series_name}")
                if potential_geo_series_id:
                    series_id = potential_geo_series_id
                    effective_series_name = f"{geo_location} {series_name}"
                else:
                    series_id = FRED_SERIES_IDS.get(series_name)
                    data_summary_parts.append(f"Note: For '{series_name}', specific data for '{geo_location}' may require a dedicated regional series ID. Fetching national data if available.")
            else:
                series_id = FRED_SERIES_IDS.get(series_name)

            if not series_id: 
                data_summary_parts.append(f"Warning: FRED series '{effective_series_name}' not recognized or ID not found.")
                continue
            
            series_info_str_header = f"--- FRED Data for {effective_series_name} ({series_id}) ---"
            try:
                series_data = fred.get_series(series_id, observation_start=start_date, observation_end=end_date)
                if series_data.empty: 
                    data_summary_parts.append(f"{series_info_str_header}\nNo data found for the specified criteria.")
                    continue
                
                info = fred.get_series_info(series_id)
                units = info.get('units_short','')
                native_freq_short = info.get('frequency_short','')
                native_freq_long = info.get('frequency','')
                series_title = info.get('title', effective_series_name)
                last_upd_fred = info.get('last_updated','N/A').split(" ")[0]
                
                series_info_str_header = f"--- FRED Data for '{series_title}' ({series_id}, Native Freq: {native_freq_long} ({native_freq_short}), Units: {units}, FRED Last Updated: {last_upd_fred}) ---"
                data_summary_parts.append(series_info_str_header)

                if start_date:
                    series_data = series_data[series_data.index >= pd.to_datetime(start_date)]
                if end_date:
                    series_data = series_data[series_data.index <= pd.to_datetime(end_date)]

                if series_data.empty:
                    data_summary_parts.append(f"No data points found within the precise date range: {start_date or 'earliest'} to {end_date or 'latest'}.")
                    continue
                
                max_points_to_display = 20 
                display_data = series_data.dropna() 

                if len(display_data) > max_points_to_display:
                    data_summary_parts.append(f"Displaying first {max_points_to_display // 2} and last {max_points_to_display // 2} points of {len(display_data)} total observations:")
                    for date_val, value in display_data.head(max_points_to_display // 2).items():
                        data_summary_parts.append(f"  {date_val.strftime('%Y-%m-%d')}: {value:.2f} {units}")
                    data_summary_parts.append("  ...")
                    for date_val, value in display_data.tail(max_points_to_display // 2).items():
                        data_summary_parts.append(f"  {date_val.strftime('%Y-%m-%d')}: {value:.2f} {units}")
                elif not display_data.empty:
                    data_summary_parts.append(f"{len(display_data)} observations found:")
                    for date_val, value in display_data.items():
                        data_summary_parts.append(f"  {date_val.strftime('%Y-%m-%d')}: {value:.2f} {units}")
                else:
                    data_summary_parts.append("No valid (non-NaN) data points found for the period.")

            except Exception as e: 
                data_summary_parts.append(f"{series_info_str_header}\nError fetching or processing: {str(e)}")
        
        return "\n".join(data_summary_parts) if len(data_summary_parts) > 3 else f"{current_time_info}\n{geo_info}\n{freq_request_info}\nNo FRED data processed."

    async def _arun(self, series_name_list: Optional[List[str]]=None, start_date: Optional[str]=None, end_date: Optional[str]=None, geo_location: Optional[str]="USA", frequency: Optional[str]=None) -> str:
        return self._run(series_name_list, start_date, end_date, geo_location, frequency)

# --- Agent Setup (LLM, Prompt, Agent, Executor) ---
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
MEMORY_KEY = "chat_history"

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", (
            "You are a model performance analyst investigating underwriting models for credit decisioning. "
            "Think step by step. First, understand the anomaly. Then, decide which metrics or data points are needed. "
            "You have access to the 'get_fred_macroeconomic_data' tool: For U.S. macroeconomic context (GDP, CPI, unemployment, etc.). "
            "   - This tool can specify 'series_name_list', 'start_date', 'end_date', 'geo_location' (default USA), and 'frequency' (e.g., M, Q, A). It returns data points and native frequency.\n"
            "After reviewing information from the tool, analyze the implications and decide what other information you need to examine. "
            f"Today's date is {datetime.now().strftime('%A, %B %d, %Y')}."
            )
        ),
        MessagesPlaceholder(variable_name=MEMORY_KEY, optional=True),
        ("human", "Investigate the following anomaly: {anomaly_description}\nInitial knowledge: {knowledge}"),
        MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

fred_tool_instance = GetFredMacroeconomicDataTool()
tools_list = [fred_tool_instance] # Only FRED tool is now in the list

agent = create_tool_calling_agent(llm, tools=tools_list, prompt=prompt)
agent_executor = AgentExecutor(
    agent=agent,
    tools=tools_list,
    verbose=True,
    handle_parsing_errors=True 
)

# --- Chat History and Interaction Logic ---
chat_history = []
def ask_agent(anomaly_description: str, knowledge: str):
    global chat_history
    current_chat_history = [msg for msg in chat_history if isinstance(msg, (HumanMessage, AIMessage))]
    response = agent_executor.invoke({
        "anomaly_description": anomaly_description, "knowledge": knowledge, "chat_history": current_chat_history
    })
    chat_history.append(HumanMessage(content=f"Investigated: {anomaly_description} (Knowledge: {knowledge})"))
    chat_history.append(AIMessage(content=response["output"]))
    return response["output"]

if __name__ == "__main__":
    if fred is None: print("FRED client not initialized. 'get_fred_macroeconomic_data' tool will error.")

    print(f"Model Performance Analyst Agent Initialized (Today: {datetime.now().strftime('%A, %B %d, %Y')}).")
    print("Type 'exit' to quit.\n")
    while True:
        anomaly_input = input("Describe the anomaly to investigate: ")
        if anomaly_input.lower() == 'exit': break
        knowledge_input = input("Provide any current knowledge/context (or press Enter if none): ")
        if knowledge_input.lower() == 'exit': break
        print("\nThinking...\n")
        try:
            agent_response = ask_agent(anomaly_description=anomaly_input, knowledge=knowledge_input)
            print(f"Analyst Agent: {agent_response}\n")
        except Exception as e:
            print(f"An error occurred: {e}\n")


Model Performance Analyst Agent Initialized (Today: Friday, May 16, 2025).
Type 'exit' to quit.


Thinking...



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `get_fred_macroeconomic_data` with `{'series_name_list': ['Unemployment Rate'], 'geo_location': 'Texas', 'start_date': '2023-01-01', 'end_date': '2025-05-16', 'frequency': 'M'}`
responded: To investigate the anomaly regarding the Texas unemployment rate compared to the U.S. unemployment rate, we need to gather the most recent unemployment data for both Texas and the U.S. 

Here are the steps we will take:

1. **Fetch the Texas Unemployment Rate**: We will retrieve the latest unemployment rate data specifically for Texas.
2. **Fetch the U.S. Unemployment Rate**: We will also retrieve the latest unemployment rate data for the entire United States.
3. **Compare the Data**: Once we have both data points, we can analyze and compare the unemployment rates to identify any anomalies.

Let's proceed to fetch the un

In [None]:
chat_history[0]

HumanMessage(content='Investigated: what is texas unemployment rate comparing with the US (Knowledge: )', additional_kwargs={}, response_metadata={})

In [37]:
chat_history[1]

AIMessage(content='Here are the unemployment rate data points for Texas and the U.S. from January 2023 to April 2025:\n\n### Texas Unemployment Rate\n- **January 2023**: 3.50%\n- **February 2023**: 3.60%\n- **March 2023**: 3.50%\n- **April 2023**: 3.40%\n- **May 2023**: 3.60%\n- **June 2023**: 3.60%\n- **July 2023**: 3.50%\n- **August 2023**: 3.70%\n- **September 2023**: 3.80%\n- **October 2023**: 3.90%\n- **November 2023**: 4.00%\n- **December 2023**: 4.10%\n- **January 2024**: 4.00%\n- **February 2024**: 4.10%\n- **March 2024**: 4.20%\n- **April 2024**: 4.20%\n- **May 2024**: 4.20%\n- **June 2024**: 4.10%\n- **July 2024**: 4.20%\n- **August 2024**: 4.20%\n- **September 2024**: 4.10%\n- **October 2024**: 4.10%\n- **November 2024**: 4.20%\n- **December 2024**: 4.10%\n- **January 2025**: 4.00%\n- **February 2025**: 4.10%\n- **March 2025**: 4.20%\n- **April 2025**: 4.20%\n\n### U.S. Unemployment Rate\n- **January 2023**: 3.50%\n- **February 2023**: 3.60%\n- **March 2023**: 3.50%\n- **Apr