In [None]:
import openai
import json
import langgraph
from langchain.schema import HumanMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, START, END
from typing import TypedDict
import os
import random
import difflib
import pandas as pd
from dotenv import load_dotenv
import re

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


json_path = r"C:/Users/Owner/Desktop/invest portfolio/10K_reports_summary.json"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

def organize_data_by_year(data):
    company_data = {}
    for report in data:
        file_name = report["file_name"]
        year = "2023" if "2023" in file_name else "2024"
        company_name = file_name.replace("_2023", "").replace("_2024", "").replace(".pdf", "")

        if company_name not in company_data:
            company_data[company_name] = {}
        
        summary = report["summary"]
        
        if isinstance(summary, str):
            try:
                summary = json.loads(summary)
            except json.JSONDecodeError:
                summary = {}  
        
        company_data[company_name][year] = summary

    return company_data

def translate_to_korean(text: str):
    """LLM을 사용하여 주어진 텍스트를 한국어로 번역"""
    translation_prompt = f"""
    다음 투자 견해를 한국어로 번역하세요.  
    금융 용어는 가능한 한 원래 의미를 유지해야 합니다.

    **번역할 내용:**  
    {text}

    **번역 결과:**  
    """
    
    response = llm.invoke([HumanMessage(content=translation_prompt)])
    return response.content.strip()

company_data = organize_data_by_year(data)

file_path = "C:/Users/Owner/Desktop/invest portfolio/dow_30_news.csv"
news_df = pd.read_csv(file_path)

# OpenAI LLM 설정 (변경 금지)
llm = ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=OPENAI_API_KEY)

class InvestmentState(TypedDict):
    views: str
    prev_views: str  # 이전 반복에서의 투자 견해 저장
    agreement_score: int
    iteration: int

def calculate_similarity(text1, text2):
    """두 개의 투자 견해 문자열을 비교하여 유사도를 계산"""
    return difflib.SequenceMatcher(None, text1, text2).ratio()

def update_agreement_score(state: InvestmentState, new_views: str):
    """감성 분석과 재무 분석 모두가 호출하여 `agreement_score`를 업데이트"""
    similarity_score = calculate_similarity(state["prev_views"], new_views)

    # 유사도가 높으면 agreement_score 증가, 낮으면 감소
    if similarity_score > 0.8:
        agreement_change = +1
    elif similarity_score < 0.5:
        agreement_change = -1
    else:
        agreement_change = 0

    new_agreement_score = max(1, min(10, state["agreement_score"] + agreement_change))

    return {
        "views": new_views,
        "prev_views": state["views"],  # 현재 견해를 prev_views로 저장하여 다음 비교에 사용
        "agreement_score": new_agreement_score,
        "iteration": state["iteration"] + 1
    }


# **📌 투자 견해 초기 생성 함수 (JSON 데이터 기반)**
def generate_initial_views(state: InvestmentState):
    context = ""
    for company, years in company_data.items():
        context += f"\n📌 {company}\n"
        for year, summary in years.items():
            context += f"🔹 {year}년 데이터:\n"
            context += f"- **Business Overview**: {summary.get('Business Overview', 'N/A')}\n"
            context += f"- **Key Risk Factors**: {summary.get('Key Risk Factors', 'N/A')}\n"
            context += f"- **Financial Summary**: {summary.get('Financial Summary', 'N/A')}\n"
            context += f"- **Management Insights**: {summary.get('Management Insights', 'N/A')}\n"

    prompt = f"""
    You are a financial analyst specializing in Black-Litterman model-based investment insights.
    Based on the summarized 10-K reports from 2023 and 2024, generate **max ten investment viewpoints** 
    using a comparative analysis of companies and their financial performance.

    **Company Data:**
    {context}

    **Instructions:**
    - Generate **max ten investment viewpoints** that must **always include specific companies**.
    - Each viewpoint **must contain an expected return percentage change (increase or decrease) and which company is**.
    - The expected return change **must be between 1% and 8%**.
    - Ensure that each viewpoint **directly compares two companies** (e.g., "Microsoft vs. Google") or **focuses on a single company's expected return change**.
    - Use financial trends, key risks, and management insights to justify each viewpoint.

    **Output Format:**
    - **Viewpoint 1**: ...
    - **Viewpoint 2**: ...
    """

    response = llm.invoke([HumanMessage(content=prompt)])
    print("\n🔵 [초기 투자 견해 생성 완료] 🔵\n", response.content)
    return {"views": response.content, "agreement_score": 0, "iteration": 0}


def sentiment_analysis_agent(state: InvestmentState):
    """감성 분석 후 투자 견해 업데이트 및 한국어 변환"""

    dow30_ticker_mapping = {
        "Apple": "AAPL", "Microsoft": "MSFT", "NVIDIA": "NVDA", "Amazon": "AMZN", 
        "JPMorgan Chase": "JPM", "Coca-Cola": "KO", "Tesla": "TSLA", "Walmart": "WMT"
    }

    mentioned_tickers = [ticker for company, ticker in dow30_ticker_mapping.items() if company in state["views"]]
    if not mentioned_tickers:
        print("⚠️ 감성 분석할 종목이 없음.")
        return state

    filtered_news_df = news_df[news_df["ticker"].isin(mentioned_tickers)].dropna(subset=["summary"])
    filtered_news_df["summary"] = filtered_news_df["summary"].fillna("").astype(str)

    if filtered_news_df.empty:
        print("⚠️ 관련 뉴스가 없음.")
        return state

    filtered_news_df = filtered_news_df.sort_values(by="datetime", ascending=False)

    summarized_news = {}
    for ticker in mentioned_tickers:
        subset = filtered_news_df[filtered_news_df["ticker"] == ticker]
        if subset.empty:
            continue  
        num_articles = random.randint(3, min(5, len(subset)))  
        selected_articles = subset.sample(n=num_articles, random_state=random.randint(1, 1000))["summary"].tolist()
        summarized_news[ticker] = " ".join(selected_articles)

    if not summarized_news:
        print("⚠️ 샘플링된 뉴스가 없음.")
        return state

    sentiment_prompt = f"""
    다음 뉴스 요약을 분석하여 감성을 판단하고, **한국어로만** 답변하세요.  
    각 종목의 뉴스 감성을 긍정(Positive), 부정(Negative), 중립(Neutral) 중 하나로 분류하세요.

    {summarized_news}
    """

    response = llm.invoke([HumanMessage(content=sentiment_prompt)])
    sentiment_results_korean = response.content.strip()  # 한국어로 응답받음

    print("\n🟡 [감성 분석 결과] 🟡")
    print(sentiment_results_korean)

    adjustment_prompt = f"""
    다음 투자 견해를 감성 분석 결과를 반영하여 수정하세요.  
    **모든 답변을 한국어로 작성하세요.**

    **현재 투자 견해:**  
    {state["views"]}

    **감성 분석 결과:**  
    {sentiment_results_korean}
    """

    adjusted_response = llm.invoke([HumanMessage(content=adjustment_prompt)])
    adjusted_views_korean = adjusted_response.content.strip()

    print("\n🟡 [감성 분석 이후 수정된 투자 견해] 🟡")
    print(adjusted_views_korean)

    return update_agreement_score(state, adjusted_views_korean)


def financial_analysis_agent(state: InvestmentState):
    """Analyzes financial data to assess and adjust investment expectations."""

    financial_data = pd.read_csv("/Users/wnsgud/workplace/DB보험금융공모전/financial_ratios/All_Financial_Ratios.csv")

    # 1. Extract company name from investor sentiment
    available_companies = financial_data["종목명"].unique()
    company_name = extract_company_name(state["views"], available_companies)

    if not company_name:
        return "❌ The company name could not be identified from the investor's statement. Please check again."

    # 2. Retrieve the latest financial data for the company
    company_data = financial_data[financial_data["종목명"] == company_name]

    if company_data.empty:
        return f"❌ No financial data found for {company_name}."

    # 3. Extract investor's expected return
    investor_expected_return = re.findall(r"\d+\.?\d*", state["views"])
    investor_expected_return = float(investor_expected_return[0]) / 100 if investor_expected_return else None

    if investor_expected_return is None:
        return "❌ The investor's expected return is missing. Please check again."

    # 4. **Generate a prompt to evaluate and adjust the expected return**
    financial_prompt = f"""
    You are a professional financial analyst. Evaluate whether the investor's expected return is realistic based on the provided financial data, and adjust it if necessary.

    **Investor’s Viewpoint**:
    "{state['views']}"

    **Latest Financial Data for {company_name}**:
    - Growth Factors: 
      - Total Asset Growth: {company_data.get("총자산 증가율", "N/A")}
      - Tangible Asset Growth: {company_data.get("유형자산 증가율", "N/A")}
      - Revenue Growth: {company_data.get("매출액 증가율", "N/A")}

    - Profitability Factors:
      - Operating Profit Margin: {company_data.get("매출액 영업이익률", "N/A")}
      - Net Profit Margin: {company_data.get("매출액 순이익률", "N/A")}
      - Return on Assets (ROA): {company_data.get("총자산 영업이익률", "N/A")}
      - Return on Equity (ROE): {company_data.get("총자산 순이익률", "N/A")}
      - Interest Coverage Ratio: {company_data.get("영업이익 이자보상비율", "N/A")}
      - EBITDA to Revenue: {company_data.get("EBITDA대 매출액", "N/A")}

    - Liquidity Factors:
      - Current Ratio: {company_data.get("유동비율", "N/A")}
      - Quick Ratio: {company_data.get("당좌비율", "N/A")}
      - Receivables to Payables Ratio: {company_data.get("매출채권/매입채무비율", "N/A")}

    - Stability Factors:
      - Debt-to-Equity Ratio: {company_data.get("부채비율", "N/A")}
      - Current Liabilities Ratio: {company_data.get("유동부채비율", "N/A")}
      - Debt Dependence: {company_data.get("차입금의존도", "N/A")}

    - Efficiency Factors:
      - Asset Turnover Ratio: {company_data.get("총자산회전율", "N/A")}
      - Inventory Turnover Ratio: {company_data.get("재고자산회전율", "N/A")}
      - Accounts Receivable Turnover Ratio: {company_data.get("매출채권회전율", "N/A")}

    🔢 **Expected Return Comparison**:
    - Investor’s Expected Return: {investor_expected_return * 100:.2f}%

    **Guidelines**:
    **Analyze all the financial data comprehensively**, including Growth, Profitability, Liquidity, Stability, and Efficiency, to determine if the investor's expected return is realistic.
    If the expected return is **too high or too low compared to the company’s financial performance**, adjust it accordingly.
    **Avoid using fixed numerical thresholds**. Instead, make a holistic judgment based on all financial indicators.
    **Output format**:
       - Final Expected Return: **X.XX%**
       - Adjustment Reason: **Explain whether the original expectation is realistic, too high, or too low, and why.**

    **Example Response (This is just an example; analyze the data before responding)**:
    - Final Expected Return: **8.50%**
    - Adjustment Reason: **The company's growth potential is somewhat limited, and its net profit margin is low. A 12% return expectation seems excessive, so it has been adjusted to a more realistic level.**
    """

    response = llm.invoke([HumanMessage(content=financial_prompt)])
    adjusted_views = response.content.strip()

    print("\n🔵 [Investment Perspective Adjusted Based on Financial Analysis] 🔵")
    print(adjusted_views)

    return update_agreement_score(state, adjusted_views)



# **📌 LangGraph 설정**
workflow = StateGraph(InvestmentState)

workflow.add_node("generate_views", generate_initial_views)
workflow.add_edge(START, "generate_views")

workflow.add_node("sentiment_analysis", sentiment_analysis_agent)
workflow.add_node("financial_analysis", financial_analysis_agent)

workflow.add_edge("generate_views", "sentiment_analysis")
workflow.add_edge("sentiment_analysis", "financial_analysis")
workflow.add_conditional_edges("financial_analysis", check_convergence)

# **📌 그래프 실행**
app = workflow.compile()
result = app.invoke({"views": "", "prev_views": "", "agreement_score": 5, "iteration": 0})

print("\n✅ **최종 보완된 투자 견해:**\n")
print(result["views"])  # 이미 한국어로 변환된 상태