In [1]:
import requests
from bs4 import BeautifulSoup

def scrape_nyt_article(url):
    # Set custom headers including a User-Agent
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36'
    }

    # Send a GET request to the URL
    try:
        response = requests.get(url, headers=headers)

        # Check if the response indicates a login requirement
        if response.status_code == 401:  # Unauthorized
            print("Login is required to access this page.")
            return None
        elif response.status_code == 403:  # Forbidden
            print("Access to this page is forbidden. You may need to check for login requirements or scraping restrictions.")
            return None
        elif response.status_code == 200:  # Successful response
            print("Page accessed successfully.")
            return response.text
        else:
            print(f"Failed to access the page. Status code: {response.status_code}")
            return None

    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")
        return None

def parse_article(content):
    # Parse the article content using BeautifulSoup
    soup = BeautifulSoup(content, 'html.parser')

    # Safely extract title
    title_tag = soup.find('h1')
    title = title_tag.get_text() if title_tag else "No Title Found"

    # Safely extract paragraphs
    paragraphs = soup.find_all('p')
    article_text = "\n".join([para.get_text() for para in paragraphs if para])  # Added check if para exists

    return title, article_text

# URL of the New York Times article
url = "https://www.investing.com/news/press-releases/cureduchenne-partners-with-blizzard-entertainment-and-world-of-warcraft-for-limitedtime-charity-pet-program-93CH-3684052" # Scrape the article
page_content = scrape_nyt_article(url)

if page_content:
    # Parse and print the article content
    title, article_text = parse_article(page_content)
    print(f"Title: {title}\n")
    print("Article Content:\n")
    print(article_text)



Page accessed successfully.
Title: CureDuchenne Partners with Blizzard Entertainment and World of Warcraft for Limited-Time Charity Pet Program

Article Content:

Inspired by "The Remarkable Life of  Ibelin" Documentary, Players in World of Warcraft can Support Global Nonprofit and Fund Duchenne Muscular Dystrophy Research 
NEWPORT BEACH, Calif., Oct. 25, 2024 /PRNewswire/ -- Blizzard Entertainment and the massively multiplayer online role-playing game, World of Warcraft ®, are teaming up with CureDuchenne, a global nonprofit dedicated to funding and finding a cure for Duchenne muscular dystrophy, to launch an in-game promotion that will raise funds for the nonprofit and accelerate a cure for the rare neuromuscular disease.    The charity partnership is inspired by the incredible story of Mats Steen, which was captured in the newly released award-winning Netflix (NASDAQ:NFLX) documentary, The Remarkable Life of Ibelin. Mats, a Norwegian World of Warcraft player known in game as Ibelin,