In [237]:
pip install -r requirement.txt



In [238]:
import feedparser
import requests
from bs4 import BeautifulSoup
from pydantic import BaseModel
from google.colab import userdata
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq
from typing import Annotated,List

In [239]:
# Choose a feed
url = 'https://economictimes.indiatimes.com/markets/rssfeeds/1977021501.cms'

# Parse feed
feed = feedparser.parse(url)

In [240]:
class FeedItem(BaseModel):
  title: str
  summary: str
  published: str
  abstract: str

class Feed(BaseModel):
  summary: Annotated[list[str],"List of summaries"]


In [241]:
feeds: List[FeedItem] = []

In [242]:
def extract_article_text(url):
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.content, "html.parser")

        # Remove unwanted tags (ads, scripts, etc.)
        for tag in soup(['script', 'style', 'aside', 'footer', 'header', 'noscript', 'nav']):
            tag.decompose()

        # Target the main article body
        main_content = soup.find("div", {"class": "article-content"})

        if not main_content:
            # Try fallback class
            main_content = soup.find("div", {"class": "artText"})

        if main_content:
            text = main_content.get_text(separator="\n", strip=True)
            return text
        else:
            return "Main content not found"

    except Exception as e:
        return f"Error fetching article: {e}"

In [243]:
for entry in feed.entries:
    feed_item = FeedItem(
        title=entry.title,
        summary=extract_article_text(entry.link),
        published=entry.published,
        abstract="" # Add a placeholder for the abstract field
    )
    feeds.append(feed_item)

In [244]:
groq_api_key = userdata.get('groq_key')

In [245]:
llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="llama3-70b-8192"
)

In [246]:
prompt = ChatPromptTemplate.from_template("""
You are a financial assistant.
Summarize this news article into a short abstract:

\"\"\"{article}\"\"\"

Return only the abstract, no other text. unable to summarize the article return NA.
""")

In [247]:
for feed in feeds[:50]:
  chain = prompt | llm
  summary = chain.invoke({"article": feed.summary})
  feed.abstract = summary.content

In [248]:
feeds[0].abstract

"Religare Enterprises' board has approved a plan to raise Rs 1,500 crore through the preferential allotment of warrants to support business expansion and strategic initiatives, with the Burman family contributing Rs 750 crore and other investors contributing the remaining amount."

In [251]:
for feed in feeds[:50]:
  if feed.abstract == "NA":
    continue
  print(feed.abstract)

Religare Enterprises' board has approved a plan to raise Rs 1,500 crore through the preferential allotment of warrants to support business expansion and strategic initiatives, with the Burman family contributing Rs 750 crore and other investors contributing the remaining amount.
BEML's board to consider stock split on July 21, 2025, with no specified proportion; trading window to remain closed from July 1 till 48 hours after declaration of financial results.
Reliance Infrastructure's credit rating has been upgraded by three notches to 'IND B/Stable/IND A4' by India Ratings and Research, reflecting the company's significant deleveraging efforts and near-zero debt profile.
Gold prices surged by Rs 700 to Rs 99,370 per 10 grams in the national capital on Friday, driven by safe-haven demand amid escalating global trade tensions and tariff threats, which boosted investor appetite for precious metals.
Stocks fell on Friday as markets reacted to President Trump's latest tariff threats, with t