In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# List of (date, article_url) tuples for the example articles
articles = [
    ("2018-12-14", "https://www.moneycontrol.com/news/business/markets/year-in-review-10-key-events-that-charted-market-direction-in-2018-3274721.html"),
    ("2018-12-23", "https://www.businesstoday.in/markets/stocks/story/sensex-nifty-stock-market-nifty-50-rbi-yearend-2018-126099-2018-12-24")
]

def get_article_text(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract main article text - this depends on site's HTML structure
        # Common tags: article, div with class "article", "content", "post-content", etc.
        
        # Try multiple common patterns
        article_text = ""
        if soup.find("article"):
            article_text = ' '.join(p.get_text() for p in soup.find("article").find_all("p"))
        elif soup.find("div", class_="content"):
            article_text = ' '.join(p.get_text() for p in soup.find("div", class_="content").find_all("p"))
        elif soup.find("div", class_="article"):
            article_text = ' '.join(p.get_text() for p in soup.find("div", class_="article").find_all("p"))
        else:
            # fallback: get all paragraphs
            article_text = ' '.join(p.get_text() for p in soup.find_all("p"))

        return article_text.strip()

    except Exception as e:
        print(f"Error fetching {url}: {e}")
        return ""

# Collect data
data = []
for date, url in articles:
    print(f"Scraping: {url}")
    content = get_article_text(url)
    data.append({"date": date, "article_link": url, "article_content": content})

# Create DataFrame
df = pd.DataFrame(data)
print(df.head())

# Optionally save to CSV
df.to_csv("articles_2018.csv", index=False)
