In [None]:
!pip install -q langchain_community langchain_groq langchain langchain-deepseek 

In [1]:
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
import pandas as pd 
import os, sys
# from google.colab import files
from dotenv import load_dotenv
from langchain_deepseek import ChatDeepSeek

In [4]:
load_dotenv()
DEEPSEEK_API_KEY = os.getenv('DEEPSEEK_API_KEY')
llm = ChatDeepSeek(
    # model="deepseek-reasoner",
    model="deepseek-chat",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key = DEEPSEEK_API_KEY
)

In [38]:
def llm_labling(headline, description, ticker):

    prompt = PromptTemplate.from_template(
        """
        You are a professional assistant in financial risk management.
        You will be given a stock ticker, a headline of a news and its description.
        Analyze the risk of the news on that stock and rate each major risk category from -10 to +10.
        -10 means Severe negative risk
        0 means No material impact
        10 means Significant positive impact/opportunity.
        Return rates of each risks in json string format.
        Focus on material relevance. If a risk does not apply, rate it 0.
        Do not add any descriptions and reasons in your answer.
        \n\n{ticker}
        \n\n{headline}
        \n\n{description} 
        
        The risk tree below.
        Risk Categories
        Market Risk
        – Interest Rate, Currency, Commodity, Volatility, Liquidity
        Financial Risk
        – Equity Valuation, Leverage, Cash Flow, Capital Structure
        Credit Risk
        – Credit Downgrade, Concentration, Bankruptcy, Counterparty
        Regulatory & Compliance Risk
        – Legal, AML, Regulatory Breach, KYC
        Political Risk
        – Geopolitical, Trade Sanctions, Instability
        Technology Risk
        – Cybersecurity, Outage, Data Breach
        Environmental Risk
        – Climate, Carbon Regulation, Natural Disasters
        Operational Risk
        – Process Failure, Human Error, Vendor
        Strategic Risk
        – Industry, Competition, M&A, Innovation
        Reputational Risk
        – Data Breach, ESG, Media/PR Crisis
        """
    )
    labeling_chain = (
        prompt
        |llm
        |StrOutputParser()
    )

    result = labeling_chain.invoke({'headline': headline, 'description':description, 'ticker':ticker})
    # analysis = result.split('/think>\n')[-1]
    # analysis = analysis.replace('\n', ' ')

    return result

In [7]:
out_root = './data/DSRONlabeledV2'
sc_root = './data/'

os.makedirs(out_root, exist_ok=True)

In [11]:
CSV_list = []
for file in os.listdir(sc_root):
    if not file.endswith('.csv'):
        continue

    CSV_list.append(file)

In [13]:
df = pd.read_csv(os.path.join(sc_root, file))
df.head()

Unnamed: 0,Datetime,headline,description,article,link
0,2025-06-26 23:44:00,"If Taiwan Semiconductor Is Near A Ceiling, Her...",Is Taiwan Semiconductor ready to take a breath...,Is Taiwan Semiconductor ready to take a breath...,https://finance.yahoo.com/m/0199bd52-694d-3088...
1,2025-06-26 23:05:00,"Want $1 Million in Retirement? Invest $100,000...","These companies are the best at what they do, ...",Nvidia figures to be a primary beneficiary of ...,https://finance.yahoo.com/news/want-1-million-...
2,2025-06-26 19:31:00,Taiwan Semiconductor Manufacturing Company,Taiwan Semiconductor Manufacturing Company (TS...,Credit - Courtesy Taiwan Semiconductor Manufac...,https://finance.yahoo.com/news/taiwan-semicond...
3,2025-06-26 17:00:00,"Trending tickers: Nvidia, Shell, Micron, Bumbl...",The latest investor updates on stocks that are...,Shares in chipmaker Nvidia (NVDA) rose more th...,https://finance.yahoo.com/news/nvidia-shell-mi...
4,2025-06-26 17:45:00,TSMC Just Fired a $10 Billion Warning Shot at ...,The chip giant's boldest forex move yet signal...,TSMC (NYSE:TSM) just announced a bold $10 bill...,https://finance.yahoo.com/news/tsmc-just-fired...


In [42]:
news_id = 4
ticker_id = 5

ticker = CSV_list[ticker_id].split('.')[0].split('_news_')[-1]
df = pd.read_csv(os.path.join(sc_root, CSV_list[ticker_id]))
headline = df['headline'].iloc[news_id]
description = df['description'].iloc[news_id]
ticker, headline, description

('COIN',
 'Equity Markets Close Higher as Trump Weighs Early Fed Chair Change',
 'US benchmark equity indexes closed higher on Thursday, following media reports that President Donald')

In [41]:
response = llm_labling(headline, description, ticker)
response

'```json\n{\n  "Market Risk": 0,\n  "Financial Risk": 0,\n  "Credit Risk": 0,\n  "Regulatory & Compliance Risk": 0,\n  "Political Risk": -2,\n  "Technology Risk": 0,\n  "Environmental Risk": 0,\n  "Operational Risk": -5,\n  "Reputational Risk": -8\n}\n```'

In [43]:
response = llm_labling(headline, description, ticker)
response

'```json\n{\n  "Market Risk": {\n    "Interest Rate": -2,\n    "Currency": 0,\n    "Commodity": 0,\n    "Volatility": -1,\n    "Liquidity": 0\n  },\n  "Financial Risk": {\n    "Equity Valuation": 0,\n    "Leverage": 0,\n    "Cash Flow": 0,\n    "Capital Structure": 0\n  },\n  "Credit Risk": {\n    "Credit Downgrade": 0,\n    "Concentration": 0,\n    "Bankruptcy": 0,\n    "Counterparty": 0\n  },\n  "Regulatory & Compliance Risk": {\n    "Legal": 0,\n    "AML": 0,\n    "Regulatory Breach": 0,\n    "KYC": 0\n  },\n  "Political Risk": {\n    "Geopolitical": 0,\n    "Trade Sanctions": 0,\n    "Instability": 0\n  },\n  "Technology Risk": {\n    "Cybersecurity": 0,\n    "Outage": 0,\n    "Data Breach": 0\n  },\n  "Environmental Risk": {\n    "Climate": 0,\n    "Carbon Regulation": 0,\n    "Natural Disasters": 0\n  },\n  "Operational Risk": {\n    "Process Failure": 0,\n    "Human Error": 0,\n    "Vendor": 0\n  },\n  "Strategic Risk": {\n    "Industry": 0,\n    "Competition": 0,\n    "M&A": 0,

In [21]:
with open('result.txt', 'w') as f:
    f.write(response)

In [None]:
risk_cata = ["Market Risk, Financial Risk, Credit Risk, Regulatory & Compliance Risk, Political Risk, Technology Risk, Environmental Risk, Operational Risk, Strategic Risk, Reputational"]

In [26]:
"Market Risk, Financial Risk, Credit Risk, Regulatory & Compliance Risk, Political Risk, Technology Risk, Environmental Risk, Operational Risk, Strategic Risk, Reputational".replace(', ', ', "')

'Market Risk, "Financial Risk, "Credit Risk, "Regulatory & Compliance Risk, "Political Risk, "Technology Risk, "Environmental Risk, "Operational Risk, "Strategic Risk, "Reputational'

In [27]:
llm_ass = ChatDeepSeek(
    # model="deepseek-reasoner",
    model="deepseek-chat",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    api_key = DEEPSEEK_API_KEY
)

In [35]:
prompt_ass = PromptTemplate.from_template(
        """
        You are a professional assistant.
        You will be given a risk rating description.
        Return the rating in a json string format
        \n\n
        {description}
        """
    )

ass_chain = (
        prompt_ass
        |llm_ass
        |StrOutputParser()
    )

In [36]:
ass_result = ass_chain.invoke({'description':response})

In [37]:
ass_result

'Here is the provided risk rating description formatted as a JSON string:\n\n```json\n{\n    "Market Risk": {\n        "Interest Rate": 0,\n        "Currency": -6,\n        "Commodity": 0,\n        "Volatility": 0,\n        "Liquidity": 0\n    },\n    "Financial Risk": {\n        "Equity Valuation": 0,\n        "Leverage": 0,\n        "Cash Flow": 0,\n        "Capital Structure": 0\n    },\n    "Credit Risk": {\n        "Credit Downgrade": 0,\n        "Concentration": 0,\n        "Bankruptcy": 0,\n        "Counterparty": 0\n    },\n    "Regulatory & Compliance Risk": {\n        "Legal": 0,\n        "AML": 0,\n        "Regulatory Breach": 0,\n        "KYC": 0\n    },\n    "Political Risk": {\n        "Geopolitical": -8,\n        "Trade Sanctions": 0,\n        "Instability": 0\n    },\n    "Technology Risk": {\n        "Cybersecurity": 0,\n        "Outage": 0,\n        "Data Breach": 0\n    },\n    "Environmental Risk": {\n        "Climate": 0,\n        "Carbon Regulation": 0,\n        "