In [52]:
!pip install -q openai feedparser python-dotenv

import os
import textwrap
import feedparser
from dotenv import load_dotenv
from openai import OpenAI



In [53]:
import pandas as pd
from google import genai
from google.genai import types

In [54]:
GEMINI_API_KEY=GEMINI_API_KEY

client = genai.Client(api_key=GEMINI_API_KEY)

print("Loaded key length:", len(GEMINI_API_KEY))

Loaded key length: 39


In [55]:
import feedparser

RSS_FEEDS = {
    "Top Stories": "https://timesofindia.indiatimes.com/rssfeedstopstories.cms"
}

def fetch_news():
    all_articles = []
    for section, url in RSS_FEEDS.items():
        feed = feedparser.parse(url)
        for entry in feed.entries:
            all_articles.append({
                "section": section,
                "title": entry.title,
                "link": entry.link,
                "published": getattr(entry, "published", None),
                "summary": getattr(entry, "summary", ""),
            })
    return all_articles

articles = fetch_news()
len(articles), articles[0]


(46,
 {'section': 'Top Stories',
  'title': 'Last-minute air ticket cancellation? Up to 80% refund likely soon',
  'link': 'https://timesofindia.indiatimes.com/business/india-business/india-mulls-rolling-out-inbuilt-insurance-in-airfares-to-enable-last-minute-cancellation-refunds/articleshow/125509592.cms',
  'published': 'Sat, 22 Nov 2025 23:32:48 +0530',
  'summary': ''})

In [56]:
df_articles = pd.DataFrame(articles)
df_articles.head()


Unnamed: 0,section,title,link,published,summary
0,Top Stories,Last-minute air ticket cancellation? Up to 80%...,https://timesofindia.indiatimes.com/business/i...,"Sat, 22 Nov 2025 23:32:48 +0530",
1,Top Stories,'Won't shut down': Al-Falah University to worr...,https://timesofindia.indiatimes.com/city/farid...,"Sat, 22 Nov 2025 16:13:49 +0530",Al-Falah University assures parents it won't c...
2,Top Stories,"'Hand on shoulder, kiss on cheek': Gujarat cop...",https://timesofindia.indiatimes.com/city/ahmed...,"Sun, 23 Nov 2025 00:18:51 +0530",A 19-year-old BBA student reported sexual hara...
3,Top Stories,‘Don’t feel safe in Goa’: Russian DJ says 'rud...,https://timesofindia.indiatimes.com/city/goa/c...,"Sun, 23 Nov 2025 00:46:21 +0530","Two foreign women, a DJ and an actress, have a..."
4,Top Stories,Watch: PM Modi shares G20 highlights; calls di...,https://timesofindia.indiatimes.com/india/watc...,"Sun, 23 Nov 2025 10:27:32 +0530",Prime Minister Narendra Modi described his G20...


In [57]:
def build_section_text(df, section, top_n=5):
    """
    Take top N articles for a section and format them as bullet lines
    to feed into the Gemini prompt.
    """
    subset = df[df["section"] == section].head(top_n)
    lines = []
    for _, row in subset.iterrows():
        title = row["title"]
        link = row["link"]
        lines.append(f"- {title} ({link})")
    return "\n".join(lines)


In [62]:
import re

def summarize_section(section_name: str, raw_items_text: str) -> str:

    if not raw_items_text.strip():
        return f"(No headlines available for section: {section_name})"

    clean_items = re.sub(r"<[^>]+>", "", raw_items_text)

    prompt = f"""
You are a news summarization assistant.

Here are some {section_name} headlines from Times of India.

Task:
- Write 5 concise bullet points summarizing the key news.
- Avoid repetition.
- Be neutral and factual.
- Do NOT add extra news, only summarize what is given.

Headlines:
{clean_items}
"""

    response = client.models.generate_content(
        model="gemini-2.5-flash",
        contents=prompt,
        config=types.GenerateContentConfig(
            temperature=0.4,
            top_p=0.9,
            top_k=32,
            max_output_tokens=512,
        ),
    )

    pf = getattr(response, "prompt_feedback", None)
    if pf is not None and getattr(pf, "block_reason", None):
        print(f"[Gemini blocked content for section '{section_name}': {pf.block_reason}]")
        return f"(Content blocked by safety filters for section: {section_name})"

    if getattr(response, "text", None):
        return response.text.strip()

    texts = []
    for cand in getattr(response, "candidates", []) or []:
        content = getattr(cand, "content", None)
        if content is None:
            continue
        for part in getattr(content, "parts", []) or []:
            t = getattr(part, "text", None)
            if t:
                texts.append(t)

    full_text = "\n".join(texts).strip()

    if not full_text:
        print(f"[No text returned by Gemini for section '{section_name}']")
        return f"(No summary generated for section: {section_name})"

    return full_text


In [63]:
types
client

<google.genai.client.Client at 0x7925ec092870>

In [64]:
def build_daily_digest(df_articles):
    """
    Loop through sections, summarize each with Gemini,
    and combine into a single Markdown digest.
    """
    sections = df_articles["section"].unique()
    digest_parts = []

    intro = (
        "Times of India – Daily News Digest\n\n"
        "Automatically generated using an Agentic AI pipeline (Gemini) – "
        "this digest summarizes top headlines from Times of India RSS feeds. \n\n"
    )
    digest_parts.append(intro)

    for section in sections:
        raw_text = build_section_text(df_articles, section, top_n=5)
        if not raw_text.strip():
            continue

        summary = summarize_section(section, raw_text)

        section_block = f"{section}\n\n{summary}\n"
        digest_parts.append(section_block)

    full_digest = "\n\n".join(digest_parts)
    return full_digest



In [65]:
digest_markdown = build_daily_digest(df_articles)
print(digest_markdown)

Times of India – Daily News Digest

Automatically generated using an Agentic AI pipeline (Gemini) – this digest summarizes top headlines from Times of India RSS feeds. 



Top Stories

Here are 5 concise bullet points summarizing the key news:

*   India is considering a policy to allow up to 80% refunds for last-minute air ticket cancellations.
*   Al-Falah University has assured parents it will not shut down, despite an ED probe and NAAC notice.
*   A Gujarat police officer has been named in a molestation complaint by a teenager.
*   A Russian DJ reported feeling unsafe in Goa, alleging abuse by a police officer.
*   PM Modi shared highlights from the G20 summit, calling discussions with leaders "productive."

