In [1]:
import requests
from bs4 import BeautifulSoup


def get_article(article_url):
    page = str(requests.get(article_url).content.decode("utf-8", "ignore"))
    soup = BeautifulSoup(page, "html.parser")
    article = {}

    # Getting the article's title
    article["title"] = soup.find(
        attrs={"class": "article_title"}).string.strip()

    # Getting the summary
    article["summary"] = soup.find(
        attrs={"class": "article_desc"}).string.strip()

    # Getting the timestamp
    time_date_element = soup.find(attrs={"class": "article_schedule"})
    time_date_string = ""
    for element in time_date_element.contents:
        if element and element.string.strip():
            time_date_string += element.string.strip()
    article["timestamp"] = time_date_string

    # Getting article author
    author_element = soup.select_one(".content_block span")
    article["author"] = author_element.string

    # Getting the image url
    article["img_url"] = soup.select_one(".article_image img")["data-src"]

    # Getting the article's content
    content = soup.select(".content_wrapper > p")
    article["content"] = " ".join(
        [c.string for c in content if c.string])
    # Getting all the tags
    tags = soup.select(".tags_first_line > a")
    article["tags"] = [tag.string.strip("#") for tag in tags]

    return article

In [2]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

links = []
for i in range(10):
	# Target URL
	url = f"https://www.moneycontrol.com/news/business/stocks/page-{i}/"

	# Send GET request
	headers = {'User-Agent': 'Mozilla/5.0'}
	response = requests.get(url, headers=headers)

	# Check if request was successful
	if response.status_code == 200:
		soup = BeautifulSoup(response.text, 'html.parser')

		# Find all elements with class 'clearfix' and id containing 'newsfix'
		containers = soup.find_all(class_='clearfix')

		target_prefix = "https://www.moneycontrol.com/news/"


		for container in containers:
			for a in container.find_all('a', href=True):
				full_url = urljoin(url, a['href'])
				if full_url.startswith(target_prefix):
					links.append(full_url)

	else:
		print(f"Failed to fetch page. Status code: {response.status_code}")
updated_links = [i for i in set(links) if 'https://www.moneycontrol.com/news/business/markets/' in i and i != 'https://www.moneycontrol.com/news/business/markets/']

In [4]:
import requests
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed

# Multithreaded scraping
all_articles = []
with ThreadPoolExecutor(max_workers=10) as executor:
    future_to_url = {executor.submit(get_article, link): link for link in updated_links}
    for future in as_completed(future_to_url):
        link = future_to_url[future]
        try:
            article = future.result()
            all_articles.append(article)
            print("✅ Completed:", link)
        except Exception as e:
            print("❌ Failed:", link, "| Reason:", str(e))

✅ Completed: https://www.moneycontrol.com/news/business/markets/ahmedabad-plane-crash-impact-hotel-online-ticket-booking-firms-stocks-fall-up-to-4-13113231.html
✅ Completed: https://www.moneycontrol.com/news/business/markets/nazara-tech-shares-surge-7-after-15-42-lakh-shares-change-hands-in-rs-190-crore-block-deal-13117666.html
✅ Completed: https://www.moneycontrol.com/news/business/markets/three-individuals-settle-case-with-sebi-in-brightcom-group-limited-case-13111434.html
✅ Completed: https://www.moneycontrol.com/news/business/markets/futile-to-predict-future-stick-to-asset-allocation-dharma-says-kotaks-nilesh-shah-amid-market-turmoil-12987853.html
✅ Completed: https://www.moneycontrol.com/news/business/markets/top-gainers-and-losers-today-stocks-that-moved-the-most-on-june-12-13113435.html
✅ Completed: https://www.moneycontrol.com/news/business/markets/sterlite-technologies-shares-rise-over-10-percent-to-hit-three-year-high-on-bsnl-s-bharatnet-order-13112262.html
✅ Completed: https

In [34]:
from datetime import datetime, timedelta
import re


# Function to parse the `timestamp` field into a datetime object
def parse_article_timestamp(timestamp):
    try:
        # Remove trailing whitespace or newline chars
        timestamp = timestamp.strip()

        # Example: "June 13, 2025/ 09:30 IST" → remove "/ IST"
        timestamp = re.sub(r"/\s*\d{2}:\d{2}\s*IST", "", timestamp)

        # Then match full pattern again: get date and time
        match = re.search(r'([A-Za-z]+\s+\d{1,2},\s+\d{4})/?\s*(\d{2}:\d{2})?', timestamp)
        if match:
            date_part = match.group(1)
            time_part = match.group(2) if match.group(2) else "00:00"
            full_string = f"{date_part} {time_part}"
            return datetime.strptime(full_string, "%B %d, %Y %H:%M")
    except Exception as e:
        print(f"Error parsing timestamp: {timestamp} | {e}")
        return None

# Filter articles
now = datetime.now()
past_24h = now - timedelta(hours=48)

recent_articles = []
for article in all_articles:
    timestamp = article.get("timestamp", "")
    dt = parse_article_timestamp(timestamp)
    if dt and dt >= past_24h:
        recent_articles.append(article)

# Output
print(f"Found {len(recent_articles)} articles in the last 24 hours")
for article in recent_articles:
    print(article["title"], "|", article["timestamp"])


Found 19 articles in the last 24 hours
Sensex down 850 pts, Nifty below 24,650 as Israel strikes Iran; India VIX spikes 10% | June 13, 2025/ 09:30 IST
Sensex, Nifty set for a weak opening as Israel-Iran tension escalate; Key levels to track on June 13 | June 13, 2025/ 08:39 IST
First Tick: Here are the top global cues for today’s trade | June 13, 2025/ 08:07 IST
Sensex plunges 700 pts, Nifty below 24,700 amid Middle East tensions; bank, auto, energy stocks drag | June 13, 2025/ 11:43 IST
Crude sensitive stocks plunge as oil prices surge after Israel attacks Iran; HPCL, Asian Paints, others down up to 4% | June 13, 2025/ 16:19 IST
Trade Spotlight: How should you trade Ajanta Pharma, Hyundai Motor India, Max Healthcare Institute, Nelcast, Cera Sanitaryware and others on June 13? | June 13, 2025/ 06:26 IST
European airline shares slide on Israel-Iran tensions: Lufthansa, Air France KLM, Airbus shares plunge up to 4% | June 13, 2025/ 16:21 IST
IndiGo promoter Interglobe Enterprises may sel

In [35]:
recent_articles

[{'title': 'Sensex down 850 pts, Nifty below 24,650 as Israel strikes Iran; India VIX spikes 10%',
  'summary': 'PSU Bank, Smallcap 100, and Nifty Auto led the declines, slipping 1.60 percent, 1.53 percent, and 1.49 percent, respectively.',
  'timestamp': 'June 13, 2025/ 09:30 IST',
  'author': 'Moneycontrol News',
  'img_url': 'https://images.moneycontrol.com/static-mcnews/2025/05/20250315034541_sensex_stocks_nifty.jpg?impolicy=website&width=770&height=431',
  'tags': []},
 {'title': 'Sensex, Nifty set for a weak opening as Israel-Iran tension escalate; Key levels to track on June 13',
  'summary': 'Foreign institutional investors (FIIs) and foreign portfolio investors (FPIs) offloaded shares worth Rs 3,831 crore, while domestic institutional investors (DIIs) made net purchases of Rs 9,393 crore on June 12.',
  'timestamp': 'June 13, 2025/ 08:39 IST',
  'author': 'Moneycontrol News',
  'img_url': 'https://images.moneycontrol.com/static-mcnews/2025/06/20250612013258_bse_nse_stockmarket