In [7]:
import openai
import json
import langgraph
from langchain.schema import HumanMessage
from langchain_openai import ChatOpenAI
from langgraph.graph import StateGraph, START, END
from typing import TypedDict
import os
import random
import pandas as pd
from dotenv import load_dotenv
import re

load_dotenv()

True

In [None]:
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


json_path = r"C:/Users/Owner/Desktop/invest portfolio/10K_reports_summary.json"
file_path = "C:/Users/Owner/Desktop/invest portfolio/dow_30_news.csv"
news_df = pd.read_csv(file_path)


In [None]:
with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

def organize_data_by_year(data):
    company_data = {}
    for report in data:
        file_name = report["file_name"]
        year = "2023" if "2023" in file_name else "2024"
        company_name = file_name.replace("_2023", "").replace("_2024", "").replace(".pdf", "")

        if company_name not in company_data:
            company_data[company_name] = {}
        
        summary = report["summary"]
        
        if isinstance(summary, str):
            try:
                summary = json.loads(summary)
            except json.JSONDecodeError:
                summary = {}  
        
        company_data[company_name][year] = summary

    return company_data

def translate_to_korean(text: str):
    """LLM을 사용하여 주어진 텍스트를 한국어로 번역"""
    translation_prompt = f"""
    다음 투자 견해를 한국어로 번역하세요.  
    금융 용어는 가능한 한 원래 의미를 유지해야 합니다.

    **번역할 내용:**  
    {text}

    **번역 결과:**  
    """
    
    response = llm.invoke([HumanMessage(content=translation_prompt)])
    return response.content.strip()

company_data = organize_data_by_year(data)

file_path = "C:/Users/Owner/Desktop/invest portfolio/dow_30_news.csv"
news_df = pd.read_csv(file_path)

# OpenAI LLM 설정 (변경 금지)
llm = ChatOpenAI(model_name="gpt-4o-mini", openai_api_key=OPENAI_API_KEY)

class InvestmentState(TypedDict):
    views: str
    prev_views: str 
    iteration: int


# **📌 투자 견해 초기 생성 함수 (JSON 데이터 기반)**
def generate_initial_views(state: InvestmentState):
    context = ""
    for company, years in company_data.items():
        context += f"\n📌 {company}\n"
        for year, summary in years.items():
            context += f"🔹 {year}년 데이터:\n"
            context += f"- **Business Overview**: {summary.get('Business Overview', 'N/A')}\n"
            context += f"- **Key Risk Factors**: {summary.get('Key Risk Factors', 'N/A')}\n"
            context += f"- **Financial Summary**: {summary.get('Financial Summary', 'N/A')}\n"
            context += f"- **Management Insights**: {summary.get('Management Insights', 'N/A')}\n"

    prompt = f"""
    You are a financial analyst specializing in Black-Litterman model-based investment insights.
    Based on the summarized 10-K reports from 2023 and 2024, generate **max ten investment viewpoints** 
    using a comparative analysis of companies and their financial performance.

    **Company Data:**
    {context}

    **Instructions:**
    - Generate **max ten investment viewpoints** that must **always include specific companies**.
    - Each viewpoint **must contain an expected return percentage change (increase or decrease) and which company is**.
    - The expected return change **must be between 1% and 8%**.
    - Ensure that each viewpoint **directly compares two companies** (e.g., "Microsoft vs. Google") or **focuses on a single company's expected return change**.
    - Use financial trends, key risks, and management insights to justify each viewpoint.

    **Output Format:**
    - **Viewpoint 1**: ...
    - **Viewpoint 2**: ...
    """

    response = llm.invoke([HumanMessage(content=prompt)])
    print("\n🔵 [초기 투자 견해 생성 완료] 🔵\n", response.content)
    return {"views": response.content, "agreement_score": 0, "iteration": 0}


def sentiment_analysis_agent(state: InvestmentState):
    """Perform sentiment analysis on news data and update investment insights."""

    dow30_ticker_mapping = {
        "Apple": "AAPL", "Amgen": "AMGN", "Amazon": "AMZN", "Cisco": "CSCO", "Microsoft": "MSFT", "NVIDIA": "NVDA",
        "American Express": "AXP", "Boeing": "BA", "Caterpillar": "CAT", "Salesforce": "CRM", "Chevron": "CVX",
        "Disney": "DIS", "Goldman Sachs": "GS", "Home Depot": "HD", "Honeywell": "HON", "IBM": "IBM",
        "Johnson & Johnson": "JNJ", "JPMorgan Chase": "JPM", "Coca-Cola": "KO", "McDonald's": "MCD", "3M": "MMM",
        "Merck": "MRK", "Nike": "NKE", "Procter & Gamble": "PG", "Sherwin-Williams": "SHW", "Travelers": "TRV",
        "UnitedHealth": "UNH", "Visa": "V", "Verizon": "VZ", "Walmart": "WMT"
    }

    mentioned_tickers = [ticker for company, ticker in dow30_ticker_mapping.items() if company in state["views"]]
    if not mentioned_tickers:
        print("⚠️ No relevant stocks found for sentiment analysis.")
        return state

    filtered_news_df = news_df[news_df["ticker"].isin(mentioned_tickers)].dropna(subset=["summary"])
    filtered_news_df["summary"] = filtered_news_df["summary"].fillna("").astype(str)

    if filtered_news_df.empty:
        print("⚠️ No relevant news articles found.")
        return state

    filtered_news_df = filtered_news_df.sort_values(by="datetime", ascending=False)

    summarized_news = {}
    for ticker in mentioned_tickers:
        subset = filtered_news_df[filtered_news_df["ticker"] == ticker]
        if subset.empty:
            continue  
        num_articles = random.randint(3, min(5, len(subset)))  
        selected_articles = subset.sample(n=num_articles, random_state=random.randint(1, 1000))["summary"].tolist()
        summarized_news[ticker] = " ".join(selected_articles)

    if not summarized_news:
        print("⚠️ No sampled news articles found.")
        return state

    sentiment_prompt = f"""
    Analyze the sentiment of the following news summaries and classify them as Positive, Negative, or Neutral.

    **News Summaries:**
    {summarized_news}
    """

    response = llm.invoke([HumanMessage(content=sentiment_prompt)])
    sentiment_results = response.content.strip()

    print("\n🟡 [Sentiment Analysis Results] 🟡")
    print(sentiment_results)

    adjustment_prompt = f"""
    Modify the following investment viewpoints based on the sentiment analysis results.

    **Current Investment Viewpoints:**
    {state["views"]}

    **Sentiment Analysis Results:**
    {sentiment_results}

    **Instructions:**
    - Adjust expected return percentages based on sentiment.
    - Ensure that the updated viewpoints remain within the 1% to 8% range.
    """

    adjusted_response = llm.invoke([HumanMessage(content=adjustment_prompt)])
    adjusted_views = adjusted_response.content.strip()

    print("\n🟡 [Updated Investment Views after Sentiment Analysis] 🟡")
    print(adjusted_views)

    return {
        "views": adjusted_views,
        "prev_views": state["views"],
        "iteration": state["iteration"] + 1
    }




def financial_analysis_agent(state: InvestmentState):
    """재무 데이터 기반으로 감성 분석 기대 수익률이 적절한지 GPT API를 활용해 검증하고 조정하는 함수"""
    
    dow30_ticker_mapping = {
        "Apple": "AAPL", "Amgen": "AMGN", "Amazon": "AMZN", "Cisco": "CSCO", "Microsoft": "MSFT", "NVIDIA": "NVDA",
        "American Express": "AXP", "Boeing": "BA", "Caterpillar": "CAT", "Salesforce": "CRM", "Chevron": "CVX",
        "Disney": "DIS", "Goldman Sachs": "GS", "Home Depot": "HD", "Honeywell": "HON", "IBM": "IBM",
        "Johnson & Johnson": "JNJ", "JPMorgan Chase": "JPM", "Coca-Cola": "KO", "McDonald's": "MCD", "3M": "MMM",
        "Merck": "MRK", "Nike": "NKE", "Procter & Gamble": "PG", "Sherwin-Williams": "SHW", "Travelers": "TRV",
        "UnitedHealth": "UNH", "Visa": "V", "Verizon": "VZ", "Walmart": "WMT"
    }

    financial_df = pd.read_csv("/Users/wnsgud/workplace/DB보험금융공모전/financial_ratios/All_Financial_Ratios.csv")

    # 기존 투자 견해에서 종목명과 기대 수익률 추출
    viewpoints = state["views"].split("\n")
    adjusted_viewpoints = []
    
    for viewpoint in viewpoints:
        match = re.search(r'\*\*(.*?)\*\*.*?([+-]?\d+(?:\.\d+)?)%', viewpoint)
        if not match:
            adjusted_viewpoints.append(viewpoint)
            continue
        
        company, expected_return = match.groups()
        expected_return = float(expected_return)
        ticker = dow30_ticker_mapping.get(company)
        
        if not ticker:
            adjusted_viewpoints.append(viewpoint)
            continue
        
        # 해당 종목과 연도의 재무 데이터 가져오기
        latest_year = max(financial_df["연도"].astype(str).unique())
        company_financials = financial_df[(financial_df["종목명"] == ticker) & (financial_df["연도"].astype(str) == latest_year)]
        
        if company_financials.empty:
            adjusted_viewpoints.append(viewpoint)
            continue
        
        # 모든 재무 지표 가져오기
        financial_metrics = company_financials.iloc[0].to_dict()
        
        # GPT API를 사용하여 기대 수익률 조정
        prompt = f"""
        Given the financial indicators of {company}, adjust the expected return accordingly.
        
        **Financial Indicators:**
        Growth: {financial_metrics.get("총자산 증가율", "N/A")}, {financial_metrics.get("유형자산 증가율", "N/A")}, {financial_metrics.get("매출액 증가율", "N/A")}
        Profitability: {financial_metrics.get("매출액 영업이익률", "N/A")}, {financial_metrics.get("매출액 순이익률", "N/A")}, {financial_metrics.get("총자산 영업이익률", "N/A")}, {financial_metrics.get("총자산 순이익률", "N/A")}, {financial_metrics.get("영업이익 이자보상비율", "N/A")}, {financial_metrics.get("EBITDA대 매출액", "N/A")}
        Liquidity: {financial_metrics.get("유동비율", "N/A")}, {financial_metrics.get("당좌비율", "N/A")}, {financial_metrics.get("매출채권/매입채무비율", "N/A")}
        Stability: {financial_metrics.get("부채비율", "N/A")}, {financial_metrics.get("유동부채비율", "N/A")}, {financial_metrics.get("차입금의존도", "N/A")}
        Efficiency: {financial_metrics.get("총자산회전율", "N/A")}, {financial_metrics.get("재고자산회전율", "N/A")}, {financial_metrics.get("매출채권회전율", "N/A")}
        
        Adjust the expected return ({expected_return}%) based on the above indicators.
        
        Return the adjusted expected return as a single numerical value followed by a percentage sign.
        """
        
        response = llm.invoke([HumanMessage(content=prompt)])
        financial_expected_return = response.content.strip()
        
        # 기대 수익률을 1% ~ 8% 범위로 조정
        try:
            financial_expected_return = float(financial_expected_return.replace("%", ""))
            financial_expected_return = max(1, min(8, financial_expected_return))
        except ValueError:
            financial_expected_return = expected_return
        
        if abs(expected_return - financial_expected_return) > 2:
            viewpoint = re.sub(r'([+-]?\d+(?:\.\d+)?)%', f"{financial_expected_return:.1f}%", viewpoint)
        
        adjusted_viewpoints.append(viewpoint)
    
    updated_views = "\n".join(adjusted_viewpoints)
    
    print("\n🔵 [재무 데이터 검증 및 조정 후 투자 견해] 🔵")
    print(updated_views)
    
    return {
        "views": updated_views,
        "prev_views": state["views"],
        "iteration": state["iteration"] + 1
    }



def check_convergence(state: InvestmentState):
    """감성 분석과 재무 분석이 일정 수준까지 일치하면 종료"""
    if state["iteration"] >= 5:
        return END
    return "sentiment_analysis"


# **📌 LangGraph 설정**
workflow = StateGraph(InvestmentState)

workflow.add_node("generate_views", generate_initial_views)
workflow.add_edge(START, "generate_views")

workflow.add_node("sentiment_analysis", sentiment_analysis_agent)
workflow.add_node("financial_analysis", financial_analysis_agent)

workflow.add_edge("generate_views", "sentiment_analysis")
workflow.add_edge("sentiment_analysis", "financial_analysis")
workflow.add_conditional_edges("financial_analysis", check_convergence)

# **📌 그래프 실행**
app = workflow.compile()
result = app.invoke({"views": "", "prev_views": "", "agreement_score": 5, "iteration": 0})

print("\n✅ **최종 보완된 투자 견해:**\n")
print(result["views"]) 

FileNotFoundError: [Errno 2] No such file or directory: 'C:/Users/Owner/Desktop/invest portfolio/10K_reports_summary.json'