In [11]:
CATEGORY_URLS = {
    "india": "https://indianexpress.com/section/india/?ref=l1_section",
    "entertainment": "https://indianexpress.com/section/entertainment/?ref=l1_section",
    "politics": "https://indianexpress.com/politics/?ref=l1_section",
    "sports": "https://indianexpress.com/section/sports/?ref=l1_section",
    "world": "https://indianexpress.com/section/world/?ref=l1_section",
    "business": "https://indianexpress.com/section/business/?ref=l1_section",
    "technology": "https://indianexpress.com/section/technology/?ref=l1_section"
}
JSON_NAMES = {
    "india": "india_news.json",
    "entertainment": "entertainment_news.json",
    "politics": "politics_news.json",  # NEW JSON WILL BE CREATED
    "sports": "sports_news.json",
    "world": "world_news.json",
    "business": "business_news.json",
    "technology": "technology_news.json"
}

In [12]:
import json, csv, random, requests
from bs4 import BeautifulSoup
index = "india"
url = CATEGORY_URLS[index]
json_path = JSON_NAMES[index]


# Load all headers from CSV
header_list = []
with open("headers.csv", "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        header_list.append({
            "User-Agent": row["User-Agent"],
            "Accept-Language": row["Accept-Language"]
        })

def get_random_header():
    return random.choice(header_list)


response = requests.get(url, headers=get_random_header())
soup = BeautifulSoup(response.text, "html.parser")

final_output = []

for art in soup.find_all("div", class_="articles"):
    try:
        img_tag = art.find("div", class_="snaps").find("img") if art.find("div", class_="snaps") else None
        img_url = (img_tag.get("data-src") or img_tag.get("src")) if img_tag else "N/A"

        text = art.find("div", class_="img-context")
        if not text: continue

        a = text.find("h2", class_="title").find("a") if text.find("h2", class_="title") else None
        headline = a.text.strip() if a else "N/A"
        link = a["href"] if a else "N/A"
        date = text.find("div", class_="date").text.strip() if text.find("div", class_="date") else "N/A"

        final_output.append({
            "primary_article": {
                "headline": headline,
                "author": "N/A",
                "article_link": link,
                "featured_image": img_url,
                "source_logo": "N/A",
                "source_name": "IndianExpress",
                "publish_date": date,
                "summary": "N/A"
            },
            "related_articles": [],
            "total_related_articles": 0
        })

    except:
        continue


# -----------------------------
#  UPDATE india_news.json HERE
# -----------------------------



# Load old data (if file exists)
try:
    with open(json_path, "r", encoding="utf-8") as f:
        old_data = json.load(f)
except:
    old_data = []   # file didn't exist ‚Üí start fresh

# Append new articles
updated_data = old_data + final_output

# Remove duplicates by article_link
unique = []
links = set()

for article in updated_data:
    link = article["primary_article"]["article_link"]
    if link not in links:
        links.add(link)
        unique.append(article)

# Save back to the same file
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(unique, f, indent=4, ensure_ascii=False)

print("Updated json with", len(final_output), "new articles.")
final_output[0:3]

Updated json with 25 new articles.


[{'primary_article': {'headline': 'After important gains at COP30, India tells developed world: Don‚Äôt expect us to fill in for your failures',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/article/india/india-cop30-developed-world-climate-change-10381582/',
   'featured_image': 'https://images.indianexpress.com/2025/11/COP30.jpg?w=450',
   'source_logo': 'N/A',
   'source_name': 'IndianExpress',
   'publish_date': 'November 23, 2025 14:06 IST',
   'summary': 'N/A'},
  'related_articles': [],
  'total_related_articles': 0},
 {'primary_article': {'headline': 'BJP lashes out after Jamiat Ulama-i-Hind President Arshad Madani claims discrimination of Muslims in India',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/article/india/arshad-madani-muslims-discrimination-india-congress-bjp-react-10381448/',
   'featured_image': 'https://images.indianexpress.com/2025/11/Arshad-Madani-.jpg?w=450',
   'source_logo': 'N/A',
   'source_name': 'IndianExpress',
 

In [20]:
import json, csv, random, requests
from bs4 import BeautifulSoup

index = "entertainment"
url = CATEGORY_URLS[index]
json_path = JSON_NAMES[index]
print(url)
print(json_path)

# Load all headers from CSV
header_list = []
with open("headers.csv", "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        header_list.append({
            "User-Agent": row["User-Agent"],
            "Accept-Language": row["Accept-Language"]
        })

def get_random_header():
    return random.choice(header_list)

response = requests.get(url, headers=get_random_header())
soup = BeautifulSoup(response.text, "html.parser")

final_output = []

# NEW selector for entertainment
for art in soup.find_all("article", class_="myie-articles"):
    try:
        img_wrap = art.find("div", class_="myie-snaps")
        img_tag = img_wrap.find("img") if img_wrap else None
        img_url = img_tag.get("data-src") or img_tag.get("src") if img_tag else "N/A"

        title_div = art.find("h2", class_="myie-title")
        a = title_div.find("a") if title_div else None
        headline = a.text.strip() if a else "N/A"
        link = a["href"] if a else "N/A"

        time_div = art.find("div", class_="my-time")
        date = time_div.text.strip() if time_div else "N/A"

        final_output.append({
            "primary_article": {
                "headline": headline,
                "author": "N/A",
                "article_link": link,
                "featured_image": img_url,
                "source_logo": "N/A",
                "source_name": "IndianExpress",
                "publish_date": date,
                "summary": "N/A"
            },
            "related_articles": [],
            "total_related_articles": 0
        })

    except Exception as e:
        print("Error:", e)
        continue

# -----------------------------
# UPDATE entertainment JSON
# -----------------------------
try:
    with open(json_path, "r", encoding="utf-8") as f:
        old_data = json.load(f)
except:
    old_data = []

updated_data = old_data + final_output

unique = []
links = set()

for article in updated_data:
    link = article["primary_article"]["article_link"]
    if link not in links:
        links.add(link)
        unique.append(article)

with open(json_path, "w", encoding="utf-8") as f:
    json.dump(unique, f, indent=4, ensure_ascii=False)

print("Updated json with", len(final_output), "new articles.")
final_output[:3]

https://indianexpress.com/section/entertainment/?ref=l1_section
entertainment_news.json
Updated json with 25 new articles.


[{'primary_article': {'headline': 'Netra Mantena‚ÄìVamsi Gadiraju wedding photos out: Donald Trump Jr among guests; Jennifer Lopez seen rehearsing',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/article/entertainment/bollywood/netra-mantena-vamsi-gadiraju-wedding-photos-out-donald-trump-jr-among-guests-jennifer-lopez-seen-rehearsing-10381673/',
   'featured_image': 'https://images.indianexpress.com/2025/11/Netra-Mantena-and-Vamsi-Gadirajus-grand-wedding-ll.jpg?w=270',
   'source_logo': 'N/A',
   'source_name': 'IndianExpress',
   'publish_date': 'November 23, 2025  15:18 IST',
   'summary': 'N/A'},
  'related_articles': [],
  'total_related_articles': 0},
 {'primary_article': {'headline': 'Rajinikanth, Nani, Jackie Shroff, Brahmanandam attend event celebrating 50 years of Mohan Babu. See photos, videos',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/article/entertainment/telugu/rajinikanth-nani-jackie-shroff-brahmanandam-attend-event-celebrating-

In [27]:
import json, csv, random, requests
from bs4 import BeautifulSoup

index = "politics"
url = CATEGORY_URLS[index]
json_path = JSON_NAMES[index]
print(url)

# Load all headers from CSV
header_list = []
with open("headers.csv", "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        header_list.append({
            "User-Agent": row["User-Agent"],
            "Accept-Language": row["Accept-Language"]
        })

def get_random_header():
    return random.choice(header_list)

response = requests.get(url, headers=get_random_header())
soup = BeautifulSoup(response.text, "html.parser")

final_output = []

# CORRECTED SELECTOR: Find the main wrapper first
wrapper = soup.find("div", class_="opinion-more-wrapper")

if wrapper:
    # Find the listing container
    listing = wrapper.find("div", class_="opinion-more-listing flex")
    
    if listing:
        # Now find all articles within the listing
        for art in listing.find_all("div", class_="o-opin-article"):
            try:
                # Author + Date
                writer = art.find("div", class_="news-writer-name")
                author = "N/A"
                date = "N/A"
                
                if writer:
                    author_link = writer.find("a")
                    if author_link:
                        author = author_link.text.strip()
                    
                    date_span = writer.find("span", class_="opinion-date")
                    if date_span:
                        date = date_span.text.strip()
                
                # Headline + link (from h4 inside the article)
                title = art.find("h4", class_="o-opin-article_title")
                headline = "N/A"
                link = "N/A"
                
                if title:
                    a = title.find("a")
                    if a:
                        headline = a.text.strip()
                        link = a.get("href", "N/A")
                        # Make sure link is absolute
                        if link != "N/A" and not link.startswith("http"):
                            link = "https://indianexpress.com" + link
                
                # Summary text
                summary_div = art.find("div", class_="opinion-news-para")
                summary = summary_div.text.strip() if summary_div else "N/A"
                
                # Image
                img_div = art.find("div", class_="opinion-news-figure")
                img_url = "N/A"
                
                if img_div:
                    # First try to find 'a' tag with img inside
                    img_link = img_div.find("a")
                    if img_link:
                        img_tag = img_link.find("img")
                    else:
                        img_tag = img_div.find("img")
                    
                    if img_tag:
                        img_url = img_tag.get("data-src") or img_tag.get("src") or "N/A"
                
                # Only add if we have at least a headline
                if headline != "N/A":
                    final_output.append({
                        "primary_article": {
                            "headline": headline,
                            "author": author,
                            "article_link": link,
                            "featured_image": img_url,
                            "source_logo": "N/A",
                            "source_name": "IndianExpress",
                            "publish_date": date,
                            "summary": summary
                        },
                        "related_articles": [],
                        "total_related_articles": 0
                    })
                    
            except Exception as e:
                print("Error parsing article:", e)
                continue
    else:
        print("‚ùå Could not find opinion-more-listing flex")
else:
    print("‚ùå Could not find opinion-more-wrapper")

print(f"‚úÖ Found {len(final_output)} articles")

# -----------------------------
# UPDATE politics JSON
# -----------------------------
try:
    with open(json_path, "r", encoding="utf-8") as f:
        old_data = json.load(f)
except:
    old_data = []

updated_data = old_data + final_output

# Deduplicate by link
unique = []
links = set()
for article in updated_data:
    link = article["primary_article"]["article_link"]
    if link not in links and link != "N/A":
        links.add(link)
        unique.append(article)

with open(json_path, "w", encoding="utf-8") as f:
    json.dump(unique, f, indent=4, ensure_ascii=False)

print(f"‚úÖ Updated json with {len(final_output)} new politics articles.")
print(f"üìä Total unique articles: {len(unique)}")

# Display first 3 articles
final_output[:3]

https://indianexpress.com/politics/?ref=l1_section
‚ùå Could not find opinion-more-listing flex
‚úÖ Found 0 articles
‚úÖ Updated json with 0 new politics articles.
üìä Total unique articles: 0


[]

In [22]:
import json, csv, random, requests
from bs4 import BeautifulSoup
index = "sports"
url = CATEGORY_URLS[index]
json_path = JSON_NAMES[index]
# Load all headers from CSV
header_list = []
with open("headers.csv", "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        header_list.append({
            "User-Agent": row["User-Agent"],
            "Accept-Language": row["Accept-Language"]
        })

def get_random_header():
    return random.choice(header_list)


response = requests.get(url, headers=get_random_header())
soup = BeautifulSoup(response.text, "html.parser")

final_output = []

for art in soup.find_all("div", class_="articles"):
    try:
        img_tag = art.find("div", class_="snaps").find("img") if art.find("div", class_="snaps") else None
        img_url = (img_tag.get("data-src") or img_tag.get("src")) if img_tag else "N/A"

        text = art.find("div", class_="img-context")
        if not text: continue

        a = text.find("h2", class_="title").find("a") if text.find("h2", class_="title") else None
        headline = a.text.strip() if a else "N/A"
        link = a["href"] if a else "N/A"
        date = text.find("div", class_="date").text.strip() if text.find("div", class_="date") else "N/A"

        final_output.append({
            "primary_article": {
                "headline": headline,
                "author": "N/A",
                "article_link": link,
                "featured_image": img_url,
                "source_logo": "N/A",
                "source_name": "IndianExpress",
                "publish_date": date,
                "summary": "N/A"
            },
            "related_articles": [],
            "total_related_articles": 0
        })

    except:
        continue


# -----------------------------
#  UPDATE india_news.json HERE
# -----------------------------



# Load old data (if file exists)
try:
    with open(json_path, "r", encoding="utf-8") as f:
        old_data = json.load(f)
except:
    old_data = []   # file didn't exist ‚Üí start fresh

# Append new articles
updated_data = old_data + final_output

# Remove duplicates by article_link
unique = []
links = set()

for article in updated_data:
    link = article["primary_article"]["article_link"]
    if link not in links:
        links.add(link)
        unique.append(article)

# Save back to the same file
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(unique, f, indent=4, ensure_ascii=False)

print("Updated json with", len(final_output), "new articles.")
final_output[0:3]

https://indianexpress.com/section/sports/?ref=l1_section
Updated json with 25 new articles.


[{'primary_article': {'headline': 'Senuran Muthusamy ‚Äì ‚Äòorigins in India‚Äô, but ‚Äòfirmly South African‚Äô',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/article/sports/cricket/senuran-muthusamy-origins-in-india-but-firmly-south-african-10381724/',
   'featured_image': 'https://images.indianexpress.com/2025/11/Senuran-Muthusamy-AP-Photo-2.jpg?resize=450,253',
   'source_logo': 'N/A',
   'source_name': 'IndianExpress',
   'publish_date': 'November 23, 2025 16:10 IST',
   'summary': 'N/A'},
  'related_articles': [],
  'total_related_articles': 0},
 {'primary_article': {'headline': '‚ÄòThere is one area we need to improve, which is our catching‚Äô: Aakash Chopra',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/article/sports/cricket/there-is-one-area-we-need-to-improve-which-is-our-catching-aakash-chopra-10381622/',
   'featured_image': 'https://images.indianexpress.com/2025/11/rahul_92e8ba.jpg?resize=450,253',
   'source_logo': 'N/A',
   'sour

In [28]:
import json, csv, random, requests
from bs4 import BeautifulSoup
index = "world"
url = CATEGORY_URLS[index]
json_path = JSON_NAMES[index]
print(url)
# Load all headers from CSV
header_list = []
with open("headers.csv", "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        header_list.append({
            "User-Agent": row["User-Agent"],
            "Accept-Language": row["Accept-Language"]
        })

def get_random_header():
    return random.choice(header_list)


response = requests.get(url, headers=get_random_header())
soup = BeautifulSoup(response.text, "html.parser")

final_output = []

for art in soup.find_all("div", class_="articles"):
    try:
        img_tag = art.find("div", class_="snaps").find("img") if art.find("div", class_="snaps") else None
        img_url = (img_tag.get("data-src") or img_tag.get("src")) if img_tag else "N/A"

        text = art.find("div", class_="img-context")
        if not text: continue

        a = text.find("h2", class_="title").find("a") if text.find("h2", class_="title") else None
        headline = a.text.strip() if a else "N/A"
        link = a["href"] if a else "N/A"
        date = text.find("div", class_="date").text.strip() if text.find("div", class_="date") else "N/A"

        final_output.append({
            "primary_article": {
                "headline": headline,
                "author": "N/A",
                "article_link": link,
                "featured_image": img_url,
                "source_logo": "N/A",
                "source_name": "IndianExpress",
                "publish_date": date,
                "summary": "N/A"
            },
            "related_articles": [],
            "total_related_articles": 0
        })

    except:
        continue


# -----------------------------
#  UPDATE india_news.json HERE
# -----------------------------



# Load old data (if file exists)
try:
    with open(json_path, "r", encoding="utf-8") as f:
        old_data = json.load(f)
except:
    old_data = []   # file didn't exist ‚Üí start fresh

# Append new articles
updated_data = old_data + final_output

# Remove duplicates by article_link
unique = []
links = set()

for article in updated_data:
    link = article["primary_article"]["article_link"]
    if link not in links:
        links.add(link)
        unique.append(article)

# Save back to the same file
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(unique, f, indent=4, ensure_ascii=False)

print("Updated json with", len(final_output), "new articles.")
final_output[0:3]

https://indianexpress.com/section/world/?ref=l1_section
Updated json with 0 new articles.


[]

In [34]:
import json, csv, random, requests
from bs4 import BeautifulSoup

index = "business"
url = CATEGORY_URLS[index]
json_path = JSON_NAMES[index]

print(url)

# Load all headers from CSV
header_list = []
with open("headers.csv", "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        header_list.append({
            "User-Agent": row["User-Agent"],
            "Accept-Language": row["Accept-Language"]
        })

def get_random_header():
    return random.choice(header_list)

response = requests.get(url, headers=get_random_header())
soup = BeautifulSoup(response.text, "html.parser")

final_output = []

# BUSINESS: all articles inside <div class="o-opin-article">
for art in soup.find_all("div", class_="o-opin-article"):
    try:
        # AUTHOR + DATE
        writer = art.select_one("div.news-writer-name")
        author = writer.select_one("a").text.strip() if writer and writer.select_one("a") else "N/A"
        date = writer.select_one("span.opinion-date").text.strip() if writer and writer.select_one("span.opinion-date") else "N/A"

        # HEADLINE (CSS SELECTOR FIX)
        a = art.select_one("a.opinion-news-title")
        headline = a.text.strip() if a else "N/A"
        link = a["href"] if a else "N/A"

        # SUMMARY
        summary_block = art.select_one("div.opinion-news-para")
        summary = summary_block.text.strip() if summary_block else "N/A"

        # IMAGE
        img_tag = art.select_one("div.opinion-news-figure img")
        img_url = img_tag.get("data-src") or img_tag.get("src") if img_tag else "N/A"

        final_output.append({
            "primary_article": {
                "headline": headline,
                "author": author,
                "article_link": link,
                "featured_image": img_url,
                "source_logo": "N/A",
                "source_name": "IndianExpress",
                "publish_date": date,
                "summary": summary
            },
            "related_articles": [],
            "total_related_articles": 0
        })

    except Exception as e:
        print("Error:", e)
        continue


# -----------------------------
# UPDATE business JSON
# -----------------------------
try:
    with open(json_path, "r", encoding="utf-8") as f:
        old_data = json.load(f)
except:
    old_data = []

updated_data = old_data + final_output

# REMOVE DUPLICATES
unique = []
seen = set()

for article in updated_data:
    link = article["primary_article"]["article_link"]
    if link not in seen:
        seen.add(link)
        unique.append(article)

with open(json_path, "w", encoding="utf-8") as f:
    json.dump(unique, f, indent=4, ensure_ascii=False)

print("Updated json with", len(final_output), "new business articles.")
final_output[:3]

https://indianexpress.com/section/business/?ref=l1_section
Updated json with 25 new business articles.


[{'primary_article': {'headline': 'India Inc pins hopes on implementation of labour codes by states for effective transition',
   'author': 'Ravi Dutta Mishra',
   'article_link': 'https://indianexpress.com/article/business/india-inc-pins-hopes-on-implementation-of-labour-codes-by-states-for-effective-transition-10379196/',
   'featured_image': 'https://images.indianexpress.com/2025/11/EMPL14id.jpeg?w=300',
   'source_logo': 'N/A',
   'source_name': 'IndianExpress',
   'publish_date': 'Nov 21, 2025',
   'summary': "India's largest IT industry association Nasscom said the government‚Äôs notification bringing key provisions of the labour codes into effect marks a significant moment in India‚Äôs labour reform journey."},
  'related_articles': [],
  'total_related_articles': 0},
 {'primary_article': {'headline': 'Centre may widen ‚Äòobscene‚Äô content net to ‚Äòhalf truths,‚Äô criticism of social, public figures',
   'author': 'Soumyarendra Barik',
   'article_link': 'https://indianexpress

In [36]:
import json, csv, random, requests
from bs4 import BeautifulSoup

index = "technology"
url = CATEGORY_URLS[index]
json_path = JSON_NAMES[index]
print(url)

# Load headers
header_list = []
with open("headers.csv", "r", encoding="utf-8") as f:
    for row in csv.DictReader(f):
        header_list.append({
            "User-Agent": row["User-Agent"],
            "Accept-Language": row["Accept-Language"]
        })

def get_random_header():
    return random.choice(header_list)

response = requests.get(url, headers=get_random_header())
soup = BeautifulSoup(response.text, "html.parser")

final_output = []

# MAIN: all technology news rows
for art in soup.find_all("div", class_="area-row"):

    try:
        # -------------------------------
        # IMAGE
        # -------------------------------
        img_tag = art.select_one("figure.thumb-wrap img")
        img_url = (
            img_tag.get("data-src") or 
            img_tag.get("src") 
        ) if img_tag else "N/A"

        # -------------------------------
        # CATEGORY
        # -------------------------------
        category_tag = art.select_one("span.category")
        category = category_tag.text.strip() if category_tag else "N/A"

        # -------------------------------
        # DATE
        # -------------------------------
        date_tag = art.select_one("time.time-stamp")
        date = date_tag.text.strip() if date_tag else "N/A"

        # -------------------------------
        # HEADLINE
        # -------------------------------
        h = art.select_one("h3.list-heading")
        headline = h.text.strip() if h else "N/A"

        # -------------------------------
        # LINK
        # The <a> wrapping the whole content area
        # -------------------------------
        a_tag = art.find("a", href=True)
        link = a_tag["href"] if a_tag else "N/A"

        # -------------------------------
        # SUMMARY
        # Technology section DOES NOT have summary text in list view
        # -------------------------------
        summary = "N/A"

        final_output.append({
            "primary_article": {
                "headline": headline,
                "author": "N/A",
                "article_link": link,
                "featured_image": img_url,
                "source_logo": "N/A",
                "source_name": "IndianExpress",
                "publish_date": date,
                "summary": summary,
                "category": category
            },
            "related_articles": [],
            "total_related_articles": 0
        })

    except Exception as e:
        print("Error:", e)
        continue


# -------------------------------
# UPDATE JSON FILE
# -------------------------------
try:
    with open(json_path, "r", encoding="utf-8") as f:
        old_data = json.load(f)
except:
    old_data = []

updated_data = old_data + final_output

unique = []
seen = set()

for article in updated_data:
    link = article["primary_article"]["article_link"]
    if link not in seen:
        seen.add(link)
        unique.append(article)

with open(json_path, "w", encoding="utf-8") as f:
    json.dump(unique, f, indent=4, ensure_ascii=False)

print(f"Updated {json_path} with {len(final_output)} new articles.")
print(f"Total unique articles: {len(unique)}")

final_output[:3]

https://indianexpress.com/section/technology/?ref=l1_section
Updated technology_news.json with 25 new articles.
Total unique articles: 70


[{'primary_article': {'headline': 'Gemini 3 vs Gemini 3 Pro vs Gemini 3 DeepThink: A quick guide to Google‚Äôs latest AI models',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/section/technology/artificial-intelligence/',
   'featured_image': 'https://images.indianexpress.com/2025/11/Tech-feature-images159.jpg?w=320',
   'source_logo': 'N/A',
   'source_name': 'IndianExpress',
   'publish_date': 'Nov 23, 2025',
   'summary': 'N/A',
   'category': 'Artificial Intelligence'},
  'related_articles': [],
  'total_related_articles': 0},
 {'primary_article': {'headline': 'Google targets 1000x compute growth, aims to double capacity every 6 months: Report',
   'author': 'N/A',
   'article_link': 'https://indianexpress.com/section/technology/artificial-intelligence/',
   'featured_image': 'https://images.indianexpress.com/2025/05/Sundar-Pichai.jpg?w=320',
   'source_logo': 'N/A',
   'source_name': 'IndianExpress',
   'publish_date': 'Nov 23, 2025',
   'summary': 'N/A',
   'c