Install all the dependencies

In [18]:
!pip install langchain google-cloud-aiplatform mlflow yfinance exa_py langchain-google-vertexai


Collecting httpx<1,>=0.23.0 (from openai<2.0,>=1.48->exa_py)
  Using cached httpx-0.28.1-py3-none-any.whl.metadata (7.1 kB)
Using cached httpx-0.28.1-py3-none-any.whl (73 kB)
Installing collected packages: httpx
  Attempting uninstall: httpx
    Found existing installation: httpx 0.25.2
    Uninstalling httpx-0.25.2:
      Successfully uninstalled httpx-0.25.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
brave-search 0.1.8 requires httpx<0.26.0,>=0.25.2, but you have httpx 0.28.1 which is incompatible.[0m[31m
[0mSuccessfully installed httpx-0.28.1


In [19]:
!pip install brave-search langchain-community

Collecting httpx<0.26.0,>=0.25.2 (from brave-search)
  Using cached httpx-0.25.2-py3-none-any.whl.metadata (6.9 kB)
Using cached httpx-0.25.2-py3-none-any.whl (74 kB)
Installing collected packages: httpx
  Attempting uninstall: httpx
    Found existing installation: httpx 0.28.1
    Uninstalling httpx-0.28.1:
      Successfully uninstalled httpx-0.28.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-google-vertexai 2.1.2 requires httpx<1,>=0.28, but you have httpx 0.25.2 which is incompatible.[0m[31m
[0mSuccessfully installed httpx-0.25.2


Import all the dependencies required

In [20]:
import os
import mlflow
import yfinance as yf
from langchain.prompts import PromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain_google_vertexai import VertexAI
from langchain_community.tools import BraveSearch

In [21]:
# VertexAI setup
llm = VertexAI(model="gemini-2.0-flash", temperature=0.2)

In [22]:
#MLFlow setup
mlflow.set_tracking_uri("http://20.75.92.162:5000/")
mlflow.set_experiment("market_sentiment_analyzer - Shivam")
mlflow.start_run(run_name="sentiment_pipeline")

<ActiveRun: >

In [23]:
# Update the company as needed, for ex Google
company_name = "Google"
mlflow.log_param("company_name", company_name)

'Google'

In [24]:
# Get the company details to check stock code

def get_stock_code(company_name: str) -> str:
    try:
        ticker = yf.Ticker(company_name)
        return ticker.ticker if hasattr(ticker, "ticker") else "N/A"
    except Exception:
        return "N/A"

stock_code = get_stock_code(company_name)
mlflow.log_param("stock_code", stock_code)

print(f"Stock Code for {company_name}: {stock_code}")


Stock Code for Google: GOOGLE


In [25]:
# Checking top 5 headlines

# news_tool = BraveSearch()
# results = news_tool.run(company_name)

# news_summaries = [r["title"] + " - " + r["snippet"] for r in results[:5]]
# mlflow.log_param("news_count", len(news_summaries))

# print("Top 5 News Headlines:")
# for i, news in enumerate(news_summaries, 1):
#     print(f"{i}. {news}")

ticker = yf.Ticker(stock_code)

# Fetch recent news
news_articles = ticker.news[:5]

news_summaries = [
    f"{article['title']} - {article.get('publisher', 'UnKnown')}"
    for article in news_articles
]

mlflow.log_param("news_count", len(news_summaries))

print("Top 5 News Headlines:")
for i, news in enumerate(news_summaries, 1):
    print(f"{i}. {news}")


Top 5 News Headlines:


In [26]:
# Define Output JSON Schema

response_schemas = [
    ResponseSchema(name="company_name", description="Name of the company"),
    ResponseSchema(name="stock_code", description="Stock ticker symbol"),
    ResponseSchema(name="newsdesc", description="Brief description of news"),
    ResponseSchema(name="sentiment", description="Positive/Negative/Neutral"),
    ResponseSchema(name="people_names", description="List of people mentioned"),
    ResponseSchema(name="places_names", description="List of places mentioned"),
    ResponseSchema(name="other_companies_referred", description="Other companies mentioned"),
    ResponseSchema(name="related_industries", description="Industries referenced"),
    ResponseSchema(name="market_implications", description="Implications for the market"),
    ResponseSchema(name="confidence_score", description="Float between 0 and 1"),
]

output_parser = StructuredOutputParser.from_response_schemas(response_schemas)
format_instructions = output_parser.get_format_instructions()


In [27]:
# Prompt Template for Gemini

template = """
Analyze the following recent news about {company_name} ({stock_code}) 
and generate a **strict JSON** sentiment profile with No extra text.

News:
{news_summaries}

{format_instructions}

**Important:** Output only the JSON object exactly as specified, nothing else
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["company_name", "stock_code", "news_summaries"],
    partial_variables={"format_instructions": format_instructions}
)


In [28]:
prompt

PromptTemplate(input_variables=['company_name', 'news_summaries', 'stock_code'], input_types={}, partial_variables={'format_instructions': 'The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":\n\n```json\n{\n\t"company_name": string  // Name of the company\n\t"stock_code": string  // Stock ticker symbol\n\t"newsdesc": string  // Brief description of news\n\t"sentiment": string  // Positive/Negative/Neutral\n\t"people_names": string  // List of people mentioned\n\t"places_names": string  // List of places mentioned\n\t"other_companies_referred": string  // Other companies mentioned\n\t"related_industries": string  // Industries referenced\n\t"market_implications": string  // Implications for the market\n\t"confidence_score": string  // Float between 0 and 1\n}\n```'}, template='\nAnalyze the following recent news about {company_name} ({stock_code}) \nand generate a **strict JSON** sentiment profile with N

In [29]:
# Update and parse the output

_input = prompt.format(
    company_name=company_name,
    stock_code=stock_code,
    news_summaries="\n".join(news_summaries)
)

# Invoke Gemini
raw_output = llm.invoke(_input)

# Parse structured JSON
parsed_output = output_parser.parse(raw_output)

# Log in mlflow
mlflow.log_dict(parsed_output, "shivam_sentiment_output.json")

print("Structured Sentiment Output:")
parsed_output


Structured Sentiment Output:


{'company_name': 'Google',
 'stock_code': 'GOOGL',
 'newsdesc': 'Google is facing increased regulatory scrutiny and competition in the AI sector, potentially impacting its market share and future growth.',
 'sentiment': 'Negative',
 'people_names': 'None',
 'places_names': 'None',
 'other_companies_referred': 'None',
 'related_industries': 'Artificial Intelligence, Technology',
 'market_implications': "Potential decrease in Google's market share, increased competition in the AI sector, regulatory challenges.",
 'confidence_score': '0.8'}

In [30]:
# End mlflow run

mlflow.end_run()


🏃 View run sentiment_pipeline at: http://20.75.92.162:5000/#/experiments/938225766210056145/runs/50eee6bdbda4495f8d755446565c5eac
🧪 View experiment at: http://20.75.92.162:5000/#/experiments/938225766210056145


In [31]:
# Output with company name Google

"""
{'company_name': 'Google',
 'stock_code': 'GOOGL',
 'newsdesc': 'Google faces antitrust scrutiny over its dominance in online advertising and search, potentially leading to regulatory action and fines. The company is also investing heavily in AI and cloud computing to compete with rivals.',
 'sentiment': 'Negative',
 'people_names': 'None',
 'places_names': 'None',
 'other_companies_referred': 'None',
 'related_industries': 'Online Advertising, Search Engines, Artificial Intelligence, Cloud Computing',
 'market_implications': 'Increased regulatory risk, potential for market share shifts, increased competition in AI and cloud sectors.',
 'confidence_score': '0.8'}
 """

"\n{'company_name': 'Google',\n 'stock_code': 'GOOGL',\n 'newsdesc': 'Google faces antitrust scrutiny over its dominance in online advertising and search, potentially leading to regulatory action and fines. The company is also investing heavily in AI and cloud computing to compete with rivals.',\n 'sentiment': 'Negative',\n 'people_names': 'None',\n 'places_names': 'None',\n 'other_companies_referred': 'None',\n 'related_industries': 'Online Advertising, Search Engines, Artificial Intelligence, Cloud Computing',\n 'market_implications': 'Increased regulatory risk, potential for market share shifts, increased competition in AI and cloud sectors.',\n 'confidence_score': '0.8'}\n "