## Pre-requisite Setup

In [19]:
# INSTALLING ALL LIBRARY DEPENDENCIES

!python -m pip install langchain langchain-core langchain-community langchain-experimental --quiet
!python -m pip install -U langchain-google-genai --quiet
!python -m pip install --upgrade --quiet  yfinance mlflow


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [20]:
# IMPORTING RELEVANT LIBRARIES

from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models import init_chat_model
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from pydantic import BaseModel, Field
from typing import List
import mlflow
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool
import json

In [21]:
# CONFIGURING MLFLOW TRACKING

mlflow.set_tracking_uri("http://20.75.92.162:5000/")
mlflow.set_experiment("Rohith - Stock Sentiment Analysis")
mlflow.langchain.autolog()

## Pipeline Scripts

In [None]:
# STEP 1: GENERATING STOCK TICKER FOR A COMPANY


ticker_prompt = ChatPromptTemplate.from_template(
    "Generate the stock market ticker symbol for {company_name}. "
    "Search for it if you don't know. Respond with ONLY the ticker symbol."
)
model_name='gemini-2.0-flash'
model = init_chat_model(model_name, model_provider="google_genai")

parser = StrOutputParser()

ticker_chain = ticker_prompt | model | parser


def generate_ticker_with_tracing(input_data: dict) -> str:
    with mlflow.start_run(run_name="Stock Ticker Extraction", nested = True):
        company_name = input_data["company_name"]
        mlflow.log_param("company_name", company_name)

        prompt_value = ticker_prompt.invoke(company_name)
        mlflow.log_text(prompt_value.to_string(), "ticker_prompt.txt")
        
        print("\nSTEP 1...")
        print(f"Generating stock code for {company_name}..")
        stock_code = ticker_chain.invoke(company_name)
        mlflow.log_param("stock_code", stock_code)
        print(f"Stock code for {company_name} is {stock_code}\n")     

        return stock_code
    

In [23]:
# input = {"company_name":"Google"}
# input["stock_code"] = generate_ticker_with_tracing(input)

In [None]:
# STEP 2: GENERATING NEWS FOR THE COMPANY BASED ON STOCK CODE

news_tool = YahooFinanceNewsTool()


def generate_news_with_tracing(input_data:dict) -> str:
    with mlflow.start_run(run_name="News Fetching", nested=True):
        company_name = input_data["company_name"]
        stock_code = input_data["stock_code"]
        
        mlflow.log_params({
            "company_name": input_data["company_name"],
            "stock_code": input_data["stock_code"]
        })
        print("\nSTEP 2...")
        print(f"Fetching news about {company_name} (Stock Code: {stock_code})...")
        newsdesc = news_tool.invoke(stock_code)
        mlflow.log_text(newsdesc, "news_results.txt")
        print(f"The latest news for {stock_code} is: \n {newsdesc}\n")
        
        return newsdesc

In [25]:
# input["newsdesc"] = generate_news_with_tracing(input)

In [None]:
# STEP 3: ANALYSING SENTIMENT BASED ON THE NEWS

class StockNewsAnalyser(BaseModel):
    company_name:str = Field(description="The name of the company analyzed.")
    stock_code:str = Field(description="The stock ticker of the company.")
    newsdesc:str = Field(description="A concise summary of the news provided.")
    sentiment:str = Field(description="Overall sentiment (e.g., 'Positive', 'Negative', 'Neutral').")
    people_names:List[str] = Field(description="List of people's names mentioned in the news.")
    places_names:List[str] = Field(description="List of places mentioned in the news.")
    other_companies_referred:List[str] = Field(description="List of other companies mentioned.")
    related_industries:List[str] = Field(description="List of industries related to the news.")
    market_implications:str = Field(description="Potential implications for the market or stock.")
    confidence_score:float = Field(description="Confidence in the sentiment analysis, from 0.0 to 1.0.")

analysis_parser = JsonOutputParser(pydantic_object=StockNewsAnalyser)

analysis_prompt = ChatPromptTemplate.from_messages([
    ("system", "You are an expert financial analyst. Your task is to analyze the provided news text "
               "about a company and generate a structured sentiment profile in JSON format. "
               "Extract named entities like people, places, and other companies. "
               "Also identify related industries, market implications, and provide a confidence score for your analysis. "
               "Follow these instructions:\n{format_instructions}"),
    ("human", "Here is the company information and recent news:\n"
              "Company: {company_name}\n"
              "Ticker: {stock_code}\n\n"
              "News Articles Snippets:\n{newsdesc}\n\n"
              "Please generate the detailed sentiment profile based on this information.")
]).partial(format_instructions=analysis_parser.get_format_instructions())


analysis_chain = analysis_prompt | model | analysis_parser


def analyse_news_with_tracing(input_data: dict) -> dict:
    with mlflow.start_run(run_name="Stock News Analysis", nested=True):
        
        company_name = input_data["company_name"]
        stock_code = input_data["stock_code"]

        print("\nSTEP 3...")
        print(f"Analysing latest stock news about {company_name} (Stock Code: {stock_code})...")

        mlflow.log_params({
            "company_name": company_name,
            "stock_code": stock_code
        })

        prompt_value = analysis_prompt.invoke(input_data)
        mlflow.log_text(prompt_value.to_string(), "analysis_prompt.txt")

        analysis = analysis_chain.invoke(input_data)

        with open("analysis_output.json", "w") as f:
            json.dump(analysis, f, indent = 2)

        mlflow.log_artifact("analysis_output.json")

        return analysis




In [27]:

# analyse_news_with_tracing(input)

In [28]:
# STEP 4: COMBINING ALL THE ABOVE STEPS & WRAPPING IT INTO A SINGLE EXPERIMENT

def run_full_pipeline(company_name: str):

    input_data = {"company_name": company_name}

    with mlflow.start_run(run_name = f"Sentiment Analysis for {company_name}") as run:
        print(f"Starting MLFlow run: {run.info.run_name}")

        input_data["stock_code"] = generate_ticker_with_tracing(input_data)
        input_data["newsdesc"] = generate_news_with_tracing(input_data)
        result = analyse_news_with_tracing(input_data)

        print("Pipeline finished. Final Sentiment Profile:")
        print(json.dumps(result, indent=2))

        # Log the final result as a JSON artifact in MLflow
        output_path = "sentiment_profile.json"
        with open(output_path, "w") as f:
            json.dump(result, f, indent=2)
        mlflow.log_artifact(output_path)

        print("Stock Analsysis Completed!!")


## Run the end-to-end pipeline

In [None]:
# REPLACE WITH YOUR DESIRED COMPANY EXCEPT TESLA(throws some weird error)

COMPANY_NAME_TO_ANALYSE = "Goldman Sachs" 

run_full_pipeline(COMPANY_NAME_TO_ANALYSE)

Starting MLFlow run: Sentiment Analysis for Goldman Sachs
STEP 1...
Generating stock code for Goldman Sachs..
Stock code for Goldman Sachs is GS

🏃 View run Stock Ticker Extraction at: http://20.75.92.162:5000/#/experiments/327546165073043682/runs/2e7c18e9a2ea45bbae80eb59f847dde7
🧪 View experiment at: http://20.75.92.162:5000/#/experiments/327546165073043682
STEP 2...
Fetching news about Goldman Sachs (Stock Code: GS)...
The latest news for GS is: 
 Does Goldman Sachs' (GS) Latest Debt Issuance Reveal a Shift in Long-Term Capital Strategy?
In the past week, Goldman Sachs Group announced a broad series of fixed-income offerings, including new callable senior notes and unsecured corporate bonds with maturities ranging from 2029 to 2045 and coupon rates between 4.02% and 5.55%. This flurry of capital market activity coincided with investment conferences and comes during a period of heightened optimism, reflecting the firm's ongoing focus on expanding its debt structure to support growth, 

In [32]:
COMPANY_NAME_TO_ANALYSE = "JP Morgan"

run_full_pipeline(COMPANY_NAME_TO_ANALYSE)

Starting MLFlow run: Sentiment Analysis for JP Morgan
STEP 1...
Generating stock code for JP Morgan..
Stock code for JP Morgan is JPM

🏃 View run Stock Ticker Extraction at: http://20.75.92.162:5000/#/experiments/327546165073043682/runs/5370f90847d74a8ea56a609bdfb8dc70
🧪 View experiment at: http://20.75.92.162:5000/#/experiments/327546165073043682
STEP 2...
Fetching news about JP Morgan (Stock Code: JPM)...
The latest news for JPM is: 
 JPMorgan (JPM) Leads Global Payments Market, Says Morgan Stanley After Executive Meeting
JPMorgan Chase & Co. (NYSE:JPM) ranks among the best fundamental stocks to buy right now. Morgan Stanley reaffirmed its Equalweight rating and $298 price target for JPMorgan Chase & Co. (NYSE:JPM) on September 15, emphasizing the bank’s leading position in global payments. The affirmation comes after Morgan Stanley met with Umar Farooq, Co-Head of Global […]

Jim Cramer Thinks Tesla, Inc. (TSLA) Might See Better Numbers
We recently published 9 Stocks on Jim Cramer’s