In [37]:
# File: portfolio_manager.py
import os
import json
import yfinance as yf
import pandas as pd
from typing import TypedDict
from fredapi import Fred
from sec_edgar_downloader import Downloader
from langgraph.graph import StateGraph, END
from langchain_openai import AzureChatOpenAI
from neo4j import GraphDatabase
from langchain_neo4j import Neo4jGraph
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from langchain_groq import ChatGroq
from langchain_core.output_parsers import JsonOutputParser,StrOutputParser
from langchain_core.prompts import PromptTemplate

parser = JsonOutputParser()
strparser = StrOutputParser()


# ----------------- Configuration -----------------
FRED_API_KEY = os.getenv("FRED_API_KEY")
# AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")


from dotenv import load_dotenv
import os
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")


# Predefined portfolio constituents
PORTFOLIO_SYMBOLS = [
    'AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA',
    'JNJ', 'PG', 'V', 'MA', 'NVDA',          # 10 stocks
    'GLD',                                   # Gold ETF
    'TLT', 'BND'                             # Bond ETFs
]

SYMBOL_SECTORS = {
    # Stocks
    'AAPL': 'Technology',
    'MSFT': 'Technology',
    'GOOG': 'Technology',
    'AMZN': 'Consumer Discretionary',
    'TSLA': 'Consumer Discretionary',
    'JNJ': 'Healthcare',
    'PG': 'Consumer Staples',
    'V': 'Financials',
    'MA': 'Financials',
    'NVDA': 'Technology',
    
    # Alternative assets
    'GLD': 'Commodity',
    'TLT': 'Bonds',
    'BND': 'Bonds'
}

GOLD_SYMBOL = 'GLD'
BOND_SYMBOLS = ['TLT', 'BND']

# Initialize services
fred = Fred(api_key=FRED_API_KEY)

llm = ChatGroq(
    model="deepseek-r1-distill-qwen-32b",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)
# ----------------- Enhanced Data Ingestion -----------------
class DataIngestor:
    @staticmethod
    def get_market_data(period: str = "3y"):
        """Fetch historical prices for all portfolio assets"""
        return yf.download(PORTFOLIO_SYMBOLS, period=period,auto_adjust=False)['Adj Close']

    @staticmethod
    def get_economic_data():
        """Fetch key economic indicators from FRED"""
        return {
            'DGS10': fred.get_series('DGS10'),   # 10-Year Treasury Rate
            'CPI': fred.get_series('CPIAUCSL'),  # Consumer Price Index
            'UNRATE': fred.get_series('UNRATE'), # Unemployment Rate
            'GFDEBTN': fred.get_series('GFDEBTN') # Federal Debt
        }

# ----------------- Enhanced Knowledge Graph -----------------
class FinancialKG:
    def __init__(self):
        self.graph = Neo4jGraph(
            url=os.getenv("NEO4J_URI"),
            username=os.getenv("NEO4J_USERNAME"),
            password=os.getenv("NEO4J_PASSWORD"),
            database="neo4j",  # Default database
            # aura_instance_id=os.getenv("AURA_INSTANCEID"),
            # aura_instance_name=os.getenv("AURA_INSTANCENAME")
        )
        
        # Verify connection
        try:
            self.graph.query("RETURN 1 AS test")
            print("Connected to Neo4j AuraDB successfully")
        except Exception as e:
            print(f"Connection failed: {str(e)}")

    def create_asset_node(self, symbol: str):
        self.graph.query(
            """MERGE (a:Asset {symbol: $symbol})
            RETURN a""",
            params={"symbol": symbol}
        )

    def link_risk_factors(self, symbol: str, risk_data: dict):
        self.graph.query(
            """MATCH (a:Asset {symbol: $symbol})
            MERGE (r:Risk {name: $risk_name})
            MERGE (a)-[rel:EXPOSED_TO]->(r)
            SET rel.score = $score""",
            params={
                "symbol": symbol,
                "risk_name": risk_data['name'],
                "score": risk_data['score']
            }
        )

    def create_portfolio_structure(self):
        """Initialize portfolio nodes and relationships"""
        # Create asset classes
        self.graph.query("""
            MERGE (:AssetClass {name: 'Equities'})
            MERGE (:AssetClass {name: 'Fixed Income'})
            MERGE (:AssetClass {name: 'Commodities'})
        """)
        
        # Link assets to classes
        for symbol in PORTFOLIO_SYMBOLS:
            asset_class = 'Commodities' if symbol == GOLD_SYMBOL else \
                        'Fixed Income' if symbol in BOND_SYMBOLS else \
                        'Equities'
            self.graph.query(
                """
                MERGE (a:Asset {symbol: $symbol})
                MERGE (c:AssetClass {name: $class})
                MERGE (a)-[:BELONGS_TO]->(c)
                """,
                params={"symbol": symbol, "class": asset_class}
            )
# ----------------- Enhanced LangGraph Workflow -----------------
class PortfolioState(TypedDict):
    user_query: str
    symbols: list
    economic_data: dict
    market_data: pd.DataFrame
    risk_factors: dict
    constraints: dict
    weights: dict
    requirements: dict

def extract_requirements(state: PortfolioState):
    # Safely get query from state
    user_query = state.get("user_query", "")
    
    template = """Analyze portfolio request and extract:
    {query}

    Return JSON with:
    - risk_tolerance: low/medium/high
    - time_horizon: years
    - constraints: {{
        max_sector_allocation: {{sector: max_percent}},
        min_alternative_allocation: percentage
    }}"""

    prompt = PromptTemplate(template=template,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()})
    

    chain = prompt | llm | parser
    response = chain.invoke({"query": user_query})

    print("output from extract_requirements:", response)
    
    # response = llm.invoke(prompt)
    return response


def analyze_risks(state: PortfolioState):
    template_risk = """Analyze market risks for a portfolio containing {symbols} given:
    Economic Indicators: {econ_data}
    1-Year Volatility: {volatility}
    
    Output JSON with:
    1. required_bond_allocation (0-1)
    2. required_gold_allocation (0-1) 
    3. sector_risk_adjustments (sector: max_allocation)
    4. risk_scores (1-5 scale)
    5. scenario_analysis (recession/rate_hike cases)

    format should be like this-

    dict : {{
            "required_bond_allocation": float (0-1),
            "required_gold_allocation": float (0-1),
            "sector_risk_adjustments": {{
                "Technology": float (0-1),
                "Healthcare": float (0-1),
                ...
            }},
            "risk_scores": {{
                "interest_rate_risk": int (1-5),
                "inflation_risk": int (1-5),
                "geopolitical_risk": int (1-5),
                "market_volatility": int (1-5)
            }},
            "scenario_analysis": {{
                "recession": {{
                    "expected_loss": float (0-1),
                    "recommended_actions": list[str]
                }},
                "rate_hike": {{
                    "expected_loss": float (0-1),
                    "recommended_actions": list[str]
                }}
            }}
        }}
    
    
    
    """


    prompt = PromptTemplate(
    template=template_risk,
    input_variables=["econ_data", "econ_data", "volatility"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    econ_data = {k: v.iloc[-1] for k, v in state["economic_data"].items()}
    volatility = state["market_data"].pct_change().std().mean()

    chain = prompt | llm | parser
    
    response = chain.invoke({"symbols":PORTFOLIO_SYMBOLS,"econ_data": econ_data, "volatility": volatility})
    
    # response = llm.invoke(prompt.format(
    #     symbols=PORTFOLIO_SYMBOLS,
    #     econ_data=econ_data,
    #     volatility=round(volatility, 4)
    # ))

    print("output from analyze_risks:\n", response)
    return {"risk_factors": response}

def optimize_portfolio(state: PortfolioState):
    print("optimize portfolio function called",state["requirements"])
    prices = state["market_data"]
    returns = expected_returns.mean_historical_return(prices)
    cov_matrix = risk_models.exp_cov(prices)
    
    ef = EfficientFrontier(returns, cov_matrix)
    
    # Basic diversification constraint
    ef.add_constraint(lambda w: w <= 0.15)  # Max 15% per stock
    
    # Add bond allocation constraint
    bond_indices = [i for i, s in enumerate(PORTFOLIO_SYMBOLS) if s in BOND_SYMBOLS]
    if bond_indices:
        ef.add_constraint(lambda w: sum(w[i] for i in bond_indices) >= 
                         state["risk_factors"]["required_bond_allocation"])
    
    # Add gold allocation constraint
    if GOLD_SYMBOL in PORTFOLIO_SYMBOLS:
        gold_index = PORTFOLIO_SYMBOLS.index(GOLD_SYMBOL)
        ef.add_constraint(lambda w: w[gold_index] >= 
                         state["risk_factors"]["required_gold_allocation"])
    
    # Sector constraints from both user and risk analysis
    sector_map = [SYMBOL_SECTORS[s] for s in PORTFOLIO_SYMBOLS]
    for sector, max_alloc in {**state["requirements"]["constraints"]["max_sector_allocation"],
                              **state["risk_factors"]["sector_risk_adjustments"]}.items():
        sector_indices = [i for i, s in enumerate(sector_map) if s == sector]
        if sector_indices:
            ef.add_constraint(lambda w, si=sector_indices: sum(w[si]) <= max_alloc)
    
    # Optimize based on risk tolerance
    if state["requirements"]["risk_tolerance"] == 'low':
        ef.min_volatility()
    elif state["requirements"]["risk_tolerance"] == 'medium':
        ef.max_sharpe()
    else:
        target_return = returns.mean() * 1.2  # 20% higher than average
        ef.efficient_return(target_return)
    
    return {"weights": ef.clean_weights()}

def generate_report(state: PortfolioState):
    report_prompt = """Generate comprehensive portfolio analysis report with:
    - Current market risk assessment
    - Asset allocation rationale
    - Stress test scenarios
    - Rebalancing recommendations
    
    Portfolio Details:
    {weights}
    
    Risk Factors:
    {risk_factors}
    
    Economic Context:
    {econ_data}
    
    ouptput should be in JSON format like this-

    {{"report": str}}
    
    """

    prompt = PromptTemplate(
    template=report_prompt,
    input_variables=["weights", "risk_factors", "econ_data"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    chain = prompt | llm | parser
    
    econ_data = "\n".join([f"{k}: {v.iloc[-1]:.2f}" 
                         for k, v in state["economic_data"].items()])
    
    # response = llm.invoke(prompt.format(
    #     weights=json.dumps(state["weights"], indent=2),
    #     risk_factors=json.dumps(state["risk_factors"], indent=2),
    #     econ_data=econ_data
    # ))

    response = chain.invoke({"weights":json.dumps(state["weights"], indent=2),"risk_factors": json.dumps(state["risk_factors"], indent=2), "econ_data": econ_data})


    return {**state,"generate_report":response}

# ----------------- Workflow Setup -----------------
workflow = StateGraph(PortfolioState)

workflow.add_node("ingest_data", lambda s: {
    "market_data": DataIngestor.get_market_data(),
    "economic_data": DataIngestor.get_economic_data()
})

workflow.add_node("analyze_risks", analyze_risks)
workflow.add_node("optimize", optimize_portfolio)
workflow.add_node("generate_report", generate_report)

workflow.set_entry_point("ingest_data")
workflow.add_edge("ingest_data", "analyze_risks")
workflow.add_edge("analyze_risks", "optimize")
workflow.add_edge("optimize", "generate_report")
workflow.add_edge("generate_report", END)

# ----------------- Execution -----------------
def run_portfolio_analysis(query: str):
    kg = FinancialKG()
    kg.create_portfolio_structure()
    
    # Initialize state with all required fields
    state = {
        "user_query": query,
        "symbols": PORTFOLIO_SYMBOLS,
        "economic_data": {},
        "market_data": pd.DataFrame(),
        "risk_factors": {},
        "constraints": {},
        "weights": {},
        "requirements": {}
    }
    
    # Add this verification step
    if "user_query" not in state:
        raise ValueError("State missing required 'user_query' field")
        
    # Extract and merge requirements
    requirements = extract_requirements(state)
    state.update({"requirements":requirements})
    
    # Add data ingestion to workflow
    # state.update({
    #     "market_data": DataIngestor.get_market_data(),
    #     "economic_data": DataIngestor.get_economic_data()
    # })
    
    # Run workflow
    app = workflow.compile()
    results = app.invoke(state)
    
    return results

# Example usage
if __name__ == "__main__":
    query = """Optimize portfolio for medium risk tolerance with:
    - Maximum 25% tech sector exposure
    - Minimum 20% allocation to safe-haven assets
    - 5-year investment horizon"""
    
    result = run_portfolio_analysis(query)
    print("\nPortfolio Recommendation:")
    print(result)

Connected to Neo4j AuraDB successfully


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


output from extract_requirements: {'risk_tolerance': 'medium', 'time_horizon': 5, 'constraints': {'max_sector_allocation': {'tech': 25}, 'min_alternative_allocation': 20}}


[*********************100%***********************]  13 of 13 completed
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


output from analyze_risks:
 {'required_bond_allocation': 0.25, 'required_gold_allocation': 0.15, 'sector_risk_adjustments': {'Technology': 0.25, 'Healthcare': 0.2, 'Consumer Goods': 0.15, 'Financials': 0.15, 'Others': 0.2}, 'risk_scores': {'interest_rate_risk': 4, 'inflation_risk': 4, 'geopolitical_risk': 3, 'market_volatility': 2}, 'scenario_analysis': {'recession': {'expected_loss': 0.12, 'recommended_actions': ['Increase allocation to short-term bonds', 'Reduce exposure to high-beta stocks', 'Increase defensive sector allocations']}, 'rate_hike': {'expected_loss': 0.18, 'recommended_actions': ['Reduce duration of bond holdings', 'Increase cash reserves', 'Shift towards dividend-paying stocks']}}}
optimize portfolio function called {'risk_tolerance': 'medium', 'time_horizon': 5, 'constraints': {'max_sector_allocation': {'tech': 25}, 'min_alternative_allocation': 20}}


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"



Portfolio Recommendation:
{'user_query': 'Optimize portfolio for medium risk tolerance with:\n    - Maximum 25% tech sector exposure\n    - Minimum 20% allocation to safe-haven assets\n    - 5-year investment horizon', 'symbols': ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA', 'JNJ', 'PG', 'V', 'MA', 'NVDA', 'GLD', 'TLT', 'BND'], 'economic_data': {'DGS10': 1962-01-02    4.06
1962-01-03    4.03
1962-01-04    3.99
1962-01-05    4.02
1962-01-08    4.03
              ... 
2025-03-24    4.34
2025-03-25    4.31
2025-03-26    4.35
2025-03-27    4.38
2025-03-28    4.27
Length: 16499, dtype: float64, 'CPI': 1947-01-01     21.480
1947-02-01     21.620
1947-03-01     22.000
1947-04-01     22.000
1947-05-01     21.950
               ...   
2024-10-01    315.564
2024-11-01    316.449
2024-12-01    317.603
2025-01-01    319.086
2025-02-01    319.775
Length: 938, dtype: float64, 'UNRATE': 1948-01-01    3.4
1948-02-01    3.8
1948-03-01    4.0
1948-04-01    3.9
1948-05-01    3.5
             ... 
2024-10-01 

In [36]:
# File: portfolio_manager.py
import os
import json
import yfinance as yf
import pandas as pd
from typing import TypedDict, Dict, Any, List
from fredapi import Fred
from sec_edgar_downloader import Downloader
from langgraph.graph import StateGraph, END
from langchain_openai import AzureChatOpenAI
from neo4j import GraphDatabase
from langchain_neo4j import Neo4jGraph
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from langchain_groq import ChatGroq
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.prompts import PromptTemplate
from typing_extensions import TypedDict

parser = JsonOutputParser()
strparser = StrOutputParser()

# ----------------- Configuration -----------------
FRED_API_KEY = os.getenv("FRED_API_KEY")
NEO4J_URI = "bolt://localhost:7687"
NEO4J_AUTH = ("neo4j", "password")

from dotenv import load_dotenv
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

# Predefined portfolio constituents
PORTFOLIO_SYMBOLS = [
    'AAPL', 'MSFT', 'GOOG', 'AMZN', 'TSLA',
    'JNJ', 'PG', 'V', 'MA', 'NVDA',          # 10 stocks
    'GLD',                                   # Gold ETF
    'TLT', 'BND'                             # Bond ETFs
]

SYMBOL_SECTORS = {
    # Stocks
    'AAPL': 'Technology',
    'MSFT': 'Technology',
    'GOOG': 'Technology',
    'AMZN': 'Consumer Discretionary',
    'TSLA': 'Consumer Discretionary',
    'JNJ': 'Healthcare',
    'PG': 'Consumer Staples',
    'V': 'Financials',
    'MA': 'Financials',
    'NVDA': 'Technology',
    
    # Alternative assets
    'GLD': 'Commodity',
    'TLT': 'Bonds',
    'BND': 'Bonds'
}

GOLD_SYMBOL = 'GLD'
BOND_SYMBOLS = ['TLT', 'BND']

# Initialize services
fred = Fred(api_key=FRED_API_KEY)

llm = ChatGroq(
    model="deepseek-r1-distill-qwen-32b",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

# ----------------- Enhanced Data Ingestion -----------------
class DataIngestor:
    @staticmethod
    def get_market_data(period: str = "3y") -> pd.DataFrame:
        """Fetch historical prices for all portfolio assets"""
        return yf.download(PORTFOLIO_SYMBOLS, period=period, auto_adjust=False)['Adj Close']

    @staticmethod
    def get_economic_data() -> Dict[str, pd.Series]:
        """Fetch key economic indicators from FRED"""
        return {
            'DGS10': fred.get_series('DGS10'),   # 10-Year Treasury Rate
            'CPI': fred.get_series('CPIAUCSL'),  # Consumer Price Index
            'UNRATE': fred.get_series('UNRATE'), # Unemployment Rate
            'GFDEBTN': fred.get_series('GFDEBTN') # Federal Debt
        }

# ----------------- Enhanced Knowledge Graph -----------------
class FinancialKG:
    def __init__(self):
        self.graph = Neo4jGraph(
            url=os.getenv("NEO4J_URI"),
            username=os.getenv("NEO4J_USERNAME"),
            password=os.getenv("NEO4J_PASSWORD"),
            database="neo4j",
        )
        
        try:
            self.graph.query("RETURN 1 AS test")
            print("Connected to Neo4j successfully")
        except Exception as e:
            print(f"Connection failed: {str(e)}")

    def create_asset_node(self, symbol: str) -> None:
        self.graph.query(
            """MERGE (a:Asset {symbol: $symbol})
            RETURN a""",
            params={"symbol": symbol}
        )

    def link_risk_factors(self, symbol: str, risk_data: dict) -> None:
        self.graph.query(
            """MATCH (a:Asset {symbol: $symbol})
            MERGE (r:Risk {name: $risk_name})
            MERGE (a)-[rel:EXPOSED_TO]->(r)
            SET rel.score = $score""",
            params={
                "symbol": symbol,
                "risk_name": risk_data['name'],
                "score": risk_data['score']
            }
        )

    def create_portfolio_structure(self) -> None:
        """Initialize portfolio nodes and relationships"""
        self.graph.query("""
            MERGE (:AssetClass {name: 'Equities'})
            MERGE (:AssetClass {name: 'Fixed Income'})
            MERGE (:AssetClass {name: 'Commodities'})
        """)
        
        for symbol in PORTFOLIO_SYMBOLS:
            asset_class = 'Commodities' if symbol == GOLD_SYMBOL else \
                        'Fixed Income' if symbol in BOND_SYMBOLS else \
                        'Equities'
            self.graph.query(
                """
                MERGE (a:Asset {symbol: $symbol})
                MERGE (c:AssetClass {name: $class})
                MERGE (a)-[:BELONGS_TO]->(c)
                """,
                params={"symbol": symbol, "class": asset_class}
            )

# ----------------- State Definition -----------------
class PortfolioState(TypedDict):
    user_query: str
    symbols: List[str]
    economic_data: Dict[str, pd.Series]
    market_data: pd.DataFrame
    risk_factors: Dict[str, Any]
    constraints: Dict[str, Any]
    weights: Dict[str, float]
    requirements: Dict[str, Any]

# ----------------- Node Functions -----------------
def extract_requirements(state: PortfolioState) -> Dict[str, Any]:
    user_query = state.get("user_query", "")
    
    template = """Analyze portfolio request and extract:
    {query}

    Return JSON with:
    - risk_tolerance: low/medium/high
    - time_horizon: years
    - constraints: {{
        max_sector_allocation: {{sector: max_percent}},
        min_alternative_allocation: percentage
    }}"""

    prompt = PromptTemplate(
        template=template,
        input_variables=["query"],
        partial_variables={"format_instructions": parser.get_format_instructions()}
    )
    
    chain = prompt | llm | parser
    response = chain.invoke({"query": user_query})
    print("Output from extract_requirements:", response)
    return {**state,"requirements":response}

def analyze_risks(state: PortfolioState) -> Dict[str, Any]:
    template_risk = """Analyze market risks for a portfolio containing {symbols} given:
    Economic Indicators: {econ_data}
    1-Year Volatility: {volatility}
    
    Output JSON with:
    1. required_bond_allocation (0-1)
    2. required_gold_allocation (0-1) 
    3. sector_risk_adjustments (sector: max_allocation)
    4. risk_scores (1-5 scale)
    5. scenario_analysis (recession/rate_hike cases)"""

    prompt = PromptTemplate(
        template=template_risk,
        input_variables=["symbols", "econ_data", "volatility"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    econ_data = {k: v.iloc[-1] for k, v in state["economic_data"].items()}
    volatility = state["market_data"].pct_change().std().mean()

    chain = prompt | llm | parser
    response = chain.invoke({
        "symbols": PORTFOLIO_SYMBOLS,
        "econ_data": econ_data,
        "volatility": round(volatility, 4)
    })
    
    print("Output from analyze_risks:", response)
    return {**state,"risk_factors": response}

def optimize_portfolio(state: PortfolioState) -> Dict[str, Any]:
    print("optimize portfolio function called",state["requirements"])
    prices = state["market_data"]
    returns = expected_returns.mean_historical_return(prices)
    cov_matrix = risk_models.exp_cov(prices)
    
    ef = EfficientFrontier(returns, cov_matrix)
    ef.add_constraint(lambda w: w <= 0.15)  # Max 15% per stock
    
    # Bond allocation constraint
    bond_indices = [i for i, s in enumerate(PORTFOLIO_SYMBOLS) if s in BOND_SYMBOLS]
    if bond_indices:
        ef.add_constraint(lambda w: sum(w[i] for i in bond_indices) >= 
                         state["risk_factors"]["required_bond_allocation"])
    
    # Gold allocation constraint
    if GOLD_SYMBOL in PORTFOLIO_SYMBOLS:
        gold_index = PORTFOLIO_SYMBOLS.index(GOLD_SYMBOL)
        ef.add_constraint(lambda w: w[gold_index] >= 
                         state["risk_factors"]["required_gold_allocation"])
    
    # Sector constraints
    sector_map = [SYMBOL_SECTORS[s] for s in PORTFOLIO_SYMBOLS]
    for sector, max_alloc in {state["requirements"]["constraints"]["max_sector_allocation"],
                             state["risk_factors"]["sector_risk_adjustments"]}.items():
        sector_indices = [i for i, s in enumerate(sector_map) if s == sector]
        if sector_indices:
            ef.add_constraint(lambda w, si=sector_indices: sum(w[si]) <= max_alloc)
    
    # Optimize based on risk tolerance
    if state["requirements"]["risk_tolerance"] == 'low':
        ef.min_volatility()
    elif state["requirements"]["risk_tolerance"] == 'medium':
        ef.max_sharpe()
    else:
        target_return = returns.mean() * 1.2
        ef.efficient_return(target_return)
    
    # Clean weights and convert to proper format
    raw_weights = ef.clean_weights()
    cleaned_weights = {k: v for k, v in raw_weights.items() if v > 0.0001}  # Filter negligible weights
    
    print("Optimized weights:", cleaned_weights)
    return {**state,"weights": cleaned_weights}

def generate_report(state: PortfolioState) -> Dict[str, Any]:
    report_prompt = """Generate comprehensive portfolio analysis report with:
    - Current market risk assessment
    - Asset allocation rationale
    - Stress test scenarios
    - Rebalancing recommendations
    
    Portfolio Details:
    {weights}
    
    Risk Factors:
    {risk_factors}
    
    Economic Context:
    {econ_data}
    
    """
    
    prompt = PromptTemplate(
        template=report_prompt,
        input_variables=["weights", "risk_factors", "econ_data"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    chain = prompt | llm | strparser
    
    econ_data = "\n".join([f"{k}: {v.iloc[-1]:.2f}" 
                         for k, v in state["economic_data"].items()])
    
    response = chain.invoke({
        "weights": json.dumps(state["weights"], indent=2),
        "risk_factors": json.dumps(state["risk_factors"], indent=2),
        "econ_data": econ_data
    })
    
    print("Generated report:", response)
    return {**state,"report": response}


def ingest_data(state: PortfolioState):
    return {
        **state,
        "market_data": DataIngestor.get_market_data(),
        "economic_data": DataIngestor.get_economic_data()
    }


# ----------------- Workflow Setup -----------------
def initialize_workflow() -> StateGraph:
    workflow = StateGraph(PortfolioState)

    workflow.add_node("ingest_data", ingest_data)
    workflow.add_node("extract_requirements", extract_requirements)
    workflow.add_node("analyze_risks", analyze_risks)
    workflow.add_node("optimize", optimize_portfolio)
    workflow.add_node("generate_report", generate_report)

    workflow.set_entry_point("ingest_data")
    workflow.add_edge("ingest_data", "extract_requirements")
    workflow.add_edge("extract_requirements", "analyze_risks")
    workflow.add_edge("analyze_risks", "optimize")
    workflow.add_edge("optimize", "generate_report")
    workflow.add_edge("generate_report", END)

    return workflow

# ----------------- Execution -----------------
def run_portfolio_analysis(query: str) -> Dict[str, Any]:
    kg = FinancialKG()
    kg.create_portfolio_structure()
    
    # Initialize state with all required fields
    initial_state = PortfolioState(
        user_query=query,
        symbols=PORTFOLIO_SYMBOLS,
        economic_data={},
        market_data=pd.DataFrame(),
        risk_factors={},
        constraints={},
        weights={},
        requirements={}
    )
    
    # Create and run workflow
    app = initialize_workflow().compile()
    results = app.invoke(initial_state)
    
    return results

# Example usage
if __name__ == "__main__":
    query = """Optimize portfolio for medium risk tolerance with:
    - Maximum 25% tech sector exposure
    - Minimum 20% allocation to safe-haven assets
    - 5-year investment horizon"""
    
    result = run_portfolio_analysis(query)
    print("\nFinal Portfolio Recommendation:")
    # print(json.dumps(result, indent=2))

Connected to Neo4j successfully


[*********************100%***********************]  13 of 13 completed
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Output from extract_requirements: {'risk_tolerance': 'medium', 'time_horizon': 5, 'constraints': {'max_sector_allocation': {'tech': 25}, 'min_alternative_allocation': 20}}


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Output from analyze_risks: {'required_bond_allocation': 0.25, 'required_gold_allocation': 0.05, 'sector_risk_adjustments': {'Technology': 0.35, 'Healthcare': 0.2, 'Financials': 0.15, 'Consumer Goods': 0.1, 'Bonds': 0.25}, 'risk_scores': 3, 'scenario_analysis': {'recession': 'The portfolio is moderately resilient, with bonds and gold providing stability, while tech may underperform.', 'rate_hike': 'Tech stocks may hold up, but bonds could face headwinds, necessitating a balanced approach.'}}
optimize portfolio function called {'risk_tolerance': 'medium', 'time_horizon': 5, 'constraints': {'max_sector_allocation': {'tech': 25}, 'min_alternative_allocation': 20}}


TypeError: unhashable type: 'dict'

In [30]:
result

{'user_query': 'Optimize portfolio for medium risk tolerance with:\n    - Maximum 25% tech sector exposure\n    - Minimum 20% allocation to safe-haven assets\n    - 5-year investment horizon',
 'symbols': ['AAPL',
  'MSFT',
  'GOOG',
  'AMZN',
  'TSLA',
  'JNJ',
  'PG',
  'V',
  'MA',
  'NVDA',
  'GLD',
  'TLT',
  'BND'],
 'economic_data': {'DGS10': 1962-01-02    4.06
  1962-01-03    4.03
  1962-01-04    3.99
  1962-01-05    4.02
  1962-01-08    4.03
                ... 
  2025-03-24    4.34
  2025-03-25    4.31
  2025-03-26    4.35
  2025-03-27    4.38
  2025-03-28    4.27
  Length: 16499, dtype: float64,
  'CPI': 1947-01-01     21.480
  1947-02-01     21.620
  1947-03-01     22.000
  1947-04-01     22.000
  1947-05-01     21.950
                 ...   
  2024-10-01    315.564
  2024-11-01    316.449
  2024-12-01    317.603
  2025-01-01    319.086
  2025-02-01    319.775
  Length: 938, dtype: float64,
  'UNRATE': 1948-01-01    3.4
  1948-02-01    3.8
  1948-03-01    4.0
  1948-04-01  