<a href="https://www.kaggle.com/code/mdshahnewazibrahim/ai-news-summarizer-notebook?scriptVersionId=278671772" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import os
import requests
from bs4 import BeautifulSoup
import google.generativeai as gen
from kaggle_secrets import UserSecretsClient

# 1) Kaggle Secrets ‡¶•‡ßá‡¶ï‡ßá GOOGLE_API_KEY ‡¶Ü‡¶®‡¶õ‡¶ø
user_secrets = UserSecretsClient()
GOOGLE_API_KEY = user_secrets.get_secret("GOOGLE_API_KEY")
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

if not GOOGLE_API_KEY:
    raise RuntimeError("GOOGLE_API_KEY secret set ‡¶ï‡¶∞‡¶æ ‡¶®‡¶æ‡¶á ‚Äì Kaggle Secrets ‡¶è add ‡¶ï‡¶∞‡ßá ‡¶®‡¶æ‡¶ì‡•§")

# 2) Gemini configure
gen.configure(api_key=GOOGLE_API_KEY)

# 3) **‡¶è‡¶ñ‡¶æ‡¶®‡ßá model hardcode ‡¶ï‡¶∞‡¶≤‡¶æ‡¶Æ** ‚Äì ‡¶Ü‡¶∞ auto select ‡¶®‡ßü
MODEL_NAME = "gemini-2.5-flash"      # lite ‡¶¨‡ßç‡¶Ø‡¶¨‡¶π‡¶æ‡¶∞ ‡¶ï‡¶∞‡¶§‡ßá ‡¶ö‡¶æ‡¶á‡¶≤‡ßá: "gemini-2.5-flash-lite"
model = gen.GenerativeModel(MODEL_NAME)

print("ü§ñ Using Gemini model:", MODEL_NAME)


ü§ñ Using Gemini model: gemini-2.5-flash


In [2]:
def fetch_article_from_url(url: str):
    """Simple scraper: ‡¶∏‡¶¨ <p> ‡¶ü‡ßç‡¶Ø‡¶æ‡¶ó join ‡¶ï‡¶∞‡ßá article ‡¶¨‡¶æ‡¶®‡¶æ‡¶ö‡ßç‡¶õ‡¶ø."""
    resp = requests.get(url, timeout=20)
    resp.raise_for_status()

    soup = BeautifulSoup(resp.text, "html.parser")

    title = soup.title.string.strip() if soup.title else "Unknown Title"

    paragraphs = []
    for p in soup.find_all("p"):
        text = p.get_text(" ", strip=True)
        if len(text) > 40:   # ‡¶ñ‡ßÅ‡¶¨ ‡¶õ‡ßã‡¶ü noise ‡¶¨‡¶æ‡¶¶
            paragraphs.append(text)

    article_text = "\n".join(paragraphs)
    return title, article_text


In [3]:
def summarize_with_gemini(article_text: str, url: str = None, language: str = "bn") -> str:
    if not article_text or len(article_text.strip()) < 200:
        return "Article text is too short or could not be extracted."

    # input token ‡¶ï‡¶Æ ‡¶∞‡¶æ‡¶ñ‡¶æ‡¶∞ ‡¶ú‡¶®‡ßç‡¶Ø ‡¶™‡ßç‡¶∞‡¶æ‡ßü ~4000 character ‡¶è ‡¶ï‡ßá‡¶ü‡ßá ‡¶®‡¶ø‡¶ö‡ßç‡¶õ‡¶ø
    max_chars = 4000
    if len(article_text) > max_chars:
        article_text = article_text[:max_chars]

    if language == "bn":
        instruction = (
            "‡¶§‡ßÅ‡¶Æ‡¶ø ‡¶è‡¶ï‡¶ú‡¶® AI News Summarizer ‡¶è‡¶ú‡ßá‡¶®‡ßç‡¶ü‡•§ "
            "‡¶®‡¶ø‡¶ö‡ßá‡¶∞ ‡¶®‡¶ø‡¶â‡¶ú ‡¶Ü‡¶∞‡ßç‡¶ü‡¶ø‡¶ï‡ßá‡¶≤‡¶ü‡¶ø ‡¶™‡ßú‡ßá ‡ß´‚Äì‡ß≠‡¶ü‡¶ø ‡¶™‡ßü‡ßá‡¶®‡ßç‡¶ü‡ßá ‡¶∏‡¶π‡¶ú ‡¶≠‡¶æ‡¶∑‡¶æ‡ßü ‡¶¨‡¶æ‡¶Ç‡¶≤‡¶æ ‡¶∏‡¶æ‡¶∞‡¶æ‡¶Ç‡¶∂ ‡¶≤‡¶ø‡¶ñ‡ßã‡•§ "
            "‡¶Æ‡ßÇ‡¶≤ ‡¶ò‡¶ü‡¶®‡¶æ, ‡¶∏‡¶Æ‡ßü, ‡¶∏‡ßç‡¶•‡¶æ‡¶®, ‡¶ï‡¶æ‡¶∞‡¶£ ‡¶Ü‡¶∞ ‡¶™‡ßç‡¶∞‡¶≠‡¶æ‡¶¨ ‡¶Ø‡ßá‡¶® ‡¶™‡¶∞‡¶ø‡¶∑‡ßç‡¶ï‡¶æ‡¶∞ ‡¶•‡¶æ‡¶ï‡ßá‡•§"
        )
    else:
        instruction = (
            "You are an AI news summarization agent. "
            "Read the article and summarize it in 5‚Äì7 clear bullet points in English, "
            "covering who, what, when, where, why, and key outcomes."
        )

    prompt = f"""{instruction}

URL: {url or "N/A"}

Article:
\"\"\"
{article_text}
\"\"\""""

    try:
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Gemini error: {e}"


In [4]:
def summarize_news_url(url: str, language: str = "bn"):
    print("üåê Fetching article...")
    title, article_text = fetch_article_from_url(url)
    print("üìù Extracted article length:", len(article_text))

    print("ü§ñ Sending to Gemini 2.5-flash...")
    summary = summarize_with_gemini(article_text, url=url, language=language)

    return title, summary


In [5]:
url = "https://www.bbc.com/news/articles/c891jp9j79do"  # ‡¶è‡¶ñ‡¶æ‡¶®‡ßá ‡¶Ø‡ßá‡¶ï‡ßã‡¶®‡ßã news link ‡¶¶‡¶æ‡¶ì

title, summary = summarize_news_url(url, language="bn")  # "en" ‡¶¶‡¶ø‡¶≤‡ßá English summary

print("\n===== üì∞ ARTICLE TITLE =====")
print(title)

print("\n===== ‚ú® GENERATED SUMMARY =====\n")
print(summary)


üåê Fetching article...
üìù Extracted article length: 5239
ü§ñ Sending to Gemini 2.5-flash...

===== üì∞ ARTICLE TITLE =====
Trump says he will sue BBC for at least $1bn over Panorama edit

===== ‚ú® GENERATED SUMMARY =====

‡¶è‡¶ï‡¶ú‡¶® AI News Summarizer ‡¶è‡¶ú‡ßá‡¶®‡ßç‡¶ü ‡¶π‡¶ø‡¶∏‡ßá‡¶¨‡ßá, ‡¶¨‡¶ø‡¶¨‡¶ø‡¶∏‡¶ø ‡¶®‡¶ø‡¶â‡¶ú‡ßá‡¶∞ ‡¶Ü‡¶∞‡ßç‡¶ü‡¶ø‡¶ï‡ßá‡¶≤‡¶ü‡¶ø‡¶∞ ‡¶∏‡¶æ‡¶∞‡¶æ‡¶Ç‡¶∂ ‡¶®‡¶ø‡¶ö‡ßá ‡¶¶‡ßá‡¶ì‡ßü‡¶æ ‡¶π‡¶≤‡ßã:

**‡¶°‡ßã‡¶®‡¶æ‡¶≤‡ßç‡¶° ‡¶ü‡ßç‡¶∞‡¶æ‡¶Æ‡ßç‡¶™‡ßá‡¶∞ ‡¶¨‡¶ø‡¶¨‡¶ø‡¶∏‡¶ø‡¶ï‡ßá ‡¶Æ‡¶æ‡¶Æ‡¶≤‡¶æ ‡¶ï‡¶∞‡¶æ‡¶∞ ‡¶π‡ßÅ‡¶Æ‡¶ï‡¶ø: ‡¶Æ‡ßÇ‡¶≤ ‡¶ò‡¶ü‡¶®‡¶æ, ‡¶ï‡¶æ‡¶∞‡¶£ ‡¶ì ‡¶™‡ßç‡¶∞‡¶≠‡¶æ‡¶¨**

‡ßß. **‡¶Æ‡ßÇ‡¶≤ ‡¶ò‡¶ü‡¶®‡¶æ ‡¶ì ‡¶ï‡¶æ‡¶∞‡¶£:** ‡¶Æ‡¶æ‡¶∞‡ßç‡¶ï‡¶ø‡¶® ‡¶Ø‡ßÅ‡¶ï‡ßç‡¶§‡¶∞‡¶æ‡¶∑‡ßç‡¶ü‡ßç‡¶∞‡ßá‡¶∞ ‡¶∏‡¶æ‡¶¨‡ßá‡¶ï ‡¶™‡ßç‡¶∞‡ßá‡¶∏‡¶ø‡¶°‡ßá‡¶®‡ßç‡¶ü ‡¶°‡ßã‡¶®‡¶æ‡¶≤‡ßç‡¶° ‡¶ü‡ßç‡¶∞‡¶æ‡¶Æ‡ßç‡¶™ ‡¶¨‡¶ø‡¶¨‡¶ø‡¶∏‡¶ø (BBC) ‡¶è‡¶∞ ‡¶¨‡¶ø‡¶∞‡ßÅ‡¶¶‡ßç‡¶ß‡ßá ‡ßß ‡¶•‡ßá‡¶ï‡ßá ‡ß´ ‡¶¨‡¶ø‡¶≤‡¶ø‡¶Ø‡¶º‡¶® ‡¶°‡¶≤‡¶æ‡¶∞ ‡¶ï‡ßç‡¶∑‡¶§‡¶ø‡¶™‡ßÇ‡¶∞‡¶£ ‡¶ö‡ßá‡¶Ø‡¶º‡ßá ‡¶Æ‡¶æ‡¶Æ‡¶≤‡¶æ 

In [6]:
# from urllib.parse import urlparse

# def validate_url(url: str):
#     url = (url or "").strip()
#     if not url:
#         return False, "URL ‡¶´‡¶æ‡¶Å‡¶ï‡¶æ ‡¶¶‡ßá‡¶ì‡ßü‡¶æ ‡¶π‡ßü‡ßá‡¶õ‡ßá‡•§"

#     if not (url.startswith("http://") or url.startswith("https://")):
#         return False, "URL ‡¶Ö‡¶¨‡¶∂‡ßç‡¶Ø‡¶á http:// ‡¶Ö‡¶•‡¶¨‡¶æ https:// ‡¶¶‡¶ø‡ßü‡ßá ‡¶∂‡ßÅ‡¶∞‡ßÅ ‡¶π‡¶§‡ßá ‡¶π‡¶¨‡ßá‡•§"

#     parsed = urlparse(url)
#     if not parsed.netloc or "." not in parsed.netloc:
#         return False, "URL-‡¶è‡¶∞ ‡¶°‡ßã‡¶Æ‡ßá‡¶á‡¶® ‡¶Ö‡¶Ç‡¶∂‡¶ü‡¶ø ‡¶∏‡¶†‡¶ø‡¶ï ‡¶Æ‡¶®‡ßá ‡¶π‡¶ö‡ßç‡¶õ‡ßá ‡¶®‡¶æ‡•§"

#     # ‡¶ö‡¶æ‡¶á‡¶≤‡ßá ‡¶Ö‡¶§‡¶ø‡¶∞‡¶ø‡¶ï‡ßç‡¶§ ‡¶ö‡ßá‡¶ï ‡¶¶‡¶ø‡¶§‡ßá ‡¶™‡¶æ‡¶∞‡ßã (‡¶Ø‡ßá‡¶Æ‡¶® news site ‡¶ï‡¶ø‡¶®‡¶æ ‡¶á‡¶§‡ßç‡¶Ø‡¶æ‡¶¶‡¶ø)
#     return True, ""


In [7]:
# def run_news_summarizer(language: str = "bn"):
#     # 1) User input the URL
#     url = input("üìù ‡¶è‡¶ï‡¶ü‡¶ø news URL ‡¶¶‡¶ø‡¶®: ").strip()

#     # 2) Validate the URL
#     is_ok, msg = validate_url(url)
#     if not is_ok:
#         print("‚ùå Invalid URL:", msg)
#         return

#     # 3) Try summarizing
#     try:
#         title, summary = summarize_news_url(url, language=language)

#         print("\n===== üì∞ ARTICLE TITLE =====")
#         print(title)

#         print("\n===== ‚ú® GENERATED SUMMARY =====\n")
#         print(summary)
#     except Exception as e:
#         print("‚ö†Ô∏è ‡¶ï‡ßã‡¶®‡ßã ‡¶è‡¶ï‡¶ü‡¶æ ‡¶∏‡¶Æ‡¶∏‡ßç‡¶Ø‡¶æ ‡¶π‡ßü‡ßá‡¶õ‡ßá:", e)

# # ‡¶è‡¶ñ‡¶® ‡¶∂‡ßÅ‡¶ß‡ßÅ ‡¶è‡¶á ‡¶≤‡¶æ‡¶á‡¶®‡¶ü‡¶æ ‡¶ö‡¶æ‡¶≤‡¶æ‡¶≤‡ßá‡¶á ‡¶π‡¶¨‡ßá:
# run_news_summarizer(language="bn")   # English summary ‡¶ö‡¶æ‡¶á‡¶≤‡ßá language="en"
