In [11]:
from utils import *

from pydantic import BaseModel, Field
from typing import Literal
from langchain_core.output_parsers import JsonOutputParser
from langchain.output_parsers import OutputFixingParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

In [13]:
load_dotenv()

file_list = get_file_paths('output', file_pattern='16.txt')
file_list
dfs = [pd.read_csv(file, sep='\t') for file in file_list]
dfs = pd.concat(dfs, ignore_index=True)
# dfs['description'] = dfs['title'] + '. ' + dfs['description']


In [14]:
dfs.head()

Unnamed: 0,link,guid,type,id,sponsored,title,description,pubDate
0,https://www.cnbc.com/2025/09/15/stock-market-t...,108199010,live_story,108199010,False,S&P 500 slides from record as traders await bi...,The three major indexes are coming off a winni...,"Tue, 16 Sep 2025 18:31:18 GMT"
1,https://www.cnbc.com/2025/09/16/trumps-fed-pre...,108199222,cnbcnewsstory,108199222,False,Trump's Fed pressure campaign will lead to hig...,The survey found that 41% of respondents surve...,"Tue, 16 Sep 2025 16:53:18 GMT"
2,https://www.cnbc.com/2025/09/16/google-gemini-...,108199421,cnbcnewsstory,108199421,False,"Google's Gemini tops Apple's App Store, snaggi...",Google appears to be gaining traction in the c...,"Tue, 16 Sep 2025 18:45:20 GMT"
3,https://www.cnbc.com/2025/09/16/charlie-kirk-t...,108199400,cnbcnewsstory,108199400,False,Charlie Kirk shooting suspect charged with agg...,Kirk's death at a Utah university spurred bipa...,"Tue, 16 Sep 2025 19:07:01 GMT"
4,https://www.cnbc.com/2025/09/15/openai-hires-f...,108199098,cnbcnewsstory,108199098,False,OpenAI hires former xAI CFO as Altman-Musk riv...,OpenAI has hired former xAI finance chief Mike...,"Tue, 16 Sep 2025 18:23:29 GMT"


In [15]:
docs = df_to_docs(dfs, content_column='description', metadata_columns=['link', 'guid', 'type', 'id', 'sponsored', 'pubDate'])
docs[:6]

[Document(metadata={'link': 'https://www.cnbc.com/2025/09/15/stock-market-today-live-updates.html', 'guid': '108199010', 'type': 'live_story', 'id': '108199010', 'sponsored': 'False', 'pubDate': 'Tue, 16 Sep 2025 18:31:18 GMT'}, page_content='The three major indexes are coming off a winning session.'),
 Document(metadata={'link': 'https://www.cnbc.com/2025/09/16/trumps-fed-pressure-campaign-will-lead-to-higher-inflation-weaker-growth-according-to-cnbc-survey.html', 'guid': '108199222', 'type': 'cnbcnewsstory', 'id': '108199222', 'sponsored': 'False', 'pubDate': 'Tue, 16 Sep 2025 16:53:18 GMT'}, page_content="The survey found that 41% of respondents surveyed believe that the president's actions are directly aimed at eliminating the Fed's independence, with 41% saying they are designed to limit them."),
 Document(metadata={'link': 'https://www.cnbc.com/2025/09/16/google-gemini-tops-apples-app-store-snagging-lead-spot-from-chatgpt.html', 'guid': '108199421', 'type': 'cnbcnewsstory', 'id':

In [16]:
all_text = ". ".join(doc.page_content for doc in docs)

# Define a rich persona in the system message with added expertise
system_message = SystemMessagePromptTemplate.from_template(
    """You are Ava, a sharp and insightful trader assistant of a hedge fund.
You provide clear, concise, and actionable insights based on news feeds focusing on sectors rotation.
In addition, for each sector provide outlook, primary catalyst, and trading insights.
In addition, Describe more pro-cyclical and geopolitical shift if any.
In addition, list bullish and bearish sectors/companies to look at into 2026.
In addition, provide TACO (Trump Always Chickens Out) trade if any.
Maintain a friendly, confident, and professional tone, making complex concepts accessible and useful."""
)

# Define the human message template with instructions and JSON schema
human_message = HumanMessagePromptTemplate.from_template(
    """Extract the information required from the news feed below.


News feed:
{news_feed}

"""
)
# Create the chat prompt template
chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])


llm = ChatOpenAI(temperature=0)
chain = chat_prompt | llm | StrOutputParser()

result = chain.invoke({"news_feed": all_text})
result

"Based on the news feed provided, here are the key insights and trends:\n\n### Sectors Rotation:\n1. **Tech Sector**: Google's advancements in consumer AI and Meta's acquisition of Manus are driving innovation in the tech sector.\n   - **Outlook**: Continued growth and innovation expected.\n   - **Primary Catalyst**: Advancements in artificial intelligence and strategic acquisitions.\n   - **Trading Insights**: Consider investing in tech companies focusing on AI and digital platforms.\n\n2. **Defense Sector**: European defense companies are critical amid rising geopolitical tensions.\n   - **Outlook**: Increased demand for defense capabilities.\n   - **Primary Catalyst**: Geopolitical uncertainties and security concerns.\n   - **Trading Insights**: Monitor defense stocks for potential opportunities.\n\n3. **Energy Sector**: Iran's airspace closure and U.S. threats impact oil prices and global energy markets.\n   - **Outlook**: Volatility expected in energy markets.\n   - **Primary Cata

In [17]:
import rich
print(result)

Based on the news feed provided, here are the key insights and trends:

### Sectors Rotation:
1. **Tech Sector**: Google's advancements in consumer AI and Meta's acquisition of Manus are driving innovation in the tech sector.
   - **Outlook**: Continued growth and innovation expected.
   - **Primary Catalyst**: Advancements in artificial intelligence and strategic acquisitions.
   - **Trading Insights**: Consider investing in tech companies focusing on AI and digital platforms.

2. **Defense Sector**: European defense companies are critical amid rising geopolitical tensions.
   - **Outlook**: Increased demand for defense capabilities.
   - **Primary Catalyst**: Geopolitical uncertainties and security concerns.
   - **Trading Insights**: Monitor defense stocks for potential opportunities.

3. **Energy Sector**: Iran's airspace closure and U.S. threats impact oil prices and global energy markets.
   - **Outlook**: Volatility expected in energy markets.
   - **Primary Catalyst**: Geopolit

In [17]:
# Define the Pydantic model for structured output
class Insights(BaseModel):
    company_or_institution: str = Field(..., description="Name of the listed company or institution")
    sector: Literal[
        "Commercial Services",
        "Communications",
        "Consumer Durables",
        "Consumer Non-Durables",
        "Consumer Services",
        "Distribution Services",
        "Electronic Technology",
        "Energy Minerals",
        "Finance",
        "Health Services",
        "Health Technology",
        "Industrial Services",
        "Non-Energy Minerals",
        "Process Industries",
        "Producer Manufacturing",
        "Retail Trade",
        "Technology Services",
        "Transportation",
        "Utilities"
        ] = Field(..., description="Name of the sector")
    asset_type: str = Field(..., description='Type of asset to trade')
    trading_decision: Literal["Buy", "Sell"] = Field(..., description="The trading decision signal")
    # signal: str = Field(..., description="Signal: buy or sell")
    motivation: str = Field(..., description="Reason for the trading decision, max 20 words")
    news_topic: str = Field(..., description="Topic of the news, e.g., balance sheet, market share, new appointments, or other. max 3 words")

In [18]:

# Create the base JSON output parser
base_parser = JsonOutputParser(pydantic_object=Insights)

# Create the output-fixing parser wrapping the base parser and using an LLM to fix errors
llm_for_fixing = ChatOpenAI(temperature=0)
fixing_parser = OutputFixingParser.from_llm(parser=base_parser, llm=llm_for_fixing)

# Define a rich persona in the system message with added expertise
system_message = SystemMessagePromptTemplate.from_template(
    """You are Ava, a sharp and insightful trader assistant with deep expertise in quantitative finance, advanced statistical models, and short selling techniques.
You provide clear, concise, and actionable investment insights based on news feeds.
Maintain a friendly, confident, and professional tone, making complex concepts accessible and useful."""
)

# Define the human message template with instructions and JSON schema
human_message = HumanMessagePromptTemplate.from_template(
    """Extract the investment decision from the news feed below.


News feed:
{news_feed}

"""
)

# Create the chat prompt template
chat_prompt = ChatPromptTemplate.from_messages([system_message, human_message])

# Compose the chain: prompt -> LLM -> fixing parser
llm = ChatOpenAI(temperature=0)
chain = chat_prompt | llm | fixing_parser

# Example news feed
news = docs[0].page_content

# Invoke the chain
result = chain.invoke({"news_feed": news})
print(result)

{'company_or_institution': 'Microsoft', 'sector': 'Technology Services', 'asset_type': 'Stock', 'trading_decision': 'Buy', 'motivation': 'Strong quarterly earnings and market cap surpassing $4 trillion', 'news_topic': 'Market Cap, Earnings, Growth'}


In [15]:
batch_inputs = []
for doc in docs:
    metadata_text = "\n".join(f"{k}: {v}" for k, v in (doc.metadata or {}).items())
    news_feed = f"{doc.page_content}\n\nMetadata:\n{metadata_text}"
    batch_inputs.append({"news_feed": news_feed})

batch_inputs

[{'news_feed': "Microsoft tops $4 trillion in market cap after hours, joining Nvidia in exclusive club. Based on its post-market trading, Microsoft has become the world's second $4 trillion company following quarterly earnings.  \n\nMetadata:\nlink: https://www.cnbc.com/2025/07/30/microsoft-market-cap-tops-4-trillion-after-hours-on-earnings-beat.html\nguid: 108179389\ntype: cnbcnewsstory\nid: 108179389\nsponsored: False\npubDate: Wed, 30 Jul 2025 22:27:00 GMT"},
 {'news_feed': 'Meta’s Reality Labs posts $4.53 billion loss in second quarter. Meta’s Reality Labs posts $4.53 billion loss in second quarter\n\nMetadata:\nlink: https://www.cnbc.com/2025/07/30/metas-reality-labs-second-quarter-2025.html\nguid: 108178021\ntype: cnbcnewsstory\nid: 108178021\nsponsored: False\npubDate: Wed, 30 Jul 2025 20:20:49 GMT'},
 {'news_feed': 'Trump has slapped steep tariffs on India. Here\'s why New Delhi did not rush into a deal with Washington. U.S. President Donald Trump on Wednesday announced 25% dut

In [19]:
result = chain.batch(batch_inputs[:5])
result

[{'company_or_institution': 'Microsoft',
  'sector': 'Technology Services',
  'asset_type': 'Stock',
  'trading_decision': 'Buy',
  'motivation': 'Market Cap Over $4 Trillion, Strong Quarterly Earnings',
  'news_topic': 'Market Share Growth'},
 {'company_or_institution': "Meta's Reality Labs",
  'sector': 'Technology Services',
  'asset_type': 'Stock',
  'trading_decision': 'Sell',
  'motivation': 'significant financial setback',
  'news_topic': 'Financial Performance'},
 {'company_or_institution': 'N/A',
  'sector': 'N/A',
  'asset_type': 'N/A',
  'trading_decision': 'N/A',
  'motivation': 'Monitor impact of tariffs on imports from India',
  'news_topic': 'Tariff Impact on Imports'},
 {'company_or_institution': 'U.S.',
  'sector': 'Trade',
  'asset_type': 'Tariffs',
  'trading_decision': 'Monitor',
  'motivation': 'Potential impact on international trade and market dynamics',
  'news_topic': 'Tariff Deadline'},
 {'company_or_institution': 'Federal Reserve',
  'sector': 'Finance',
  'a