In [None]:
# Code for Multiple pages

import requests
from bs4 import BeautifulSoup
import pandas as pd

def check_response_status(url):

    try:
        response = requests.get(url)
        if response.status_code == 200:
            print(f"Success: Status code {response.status_code}")
        else:
            print(f"Failed: Status code {response.status_code}")
        return response
    except requests.RequestException as e:
        print(f"An error occurred: {e}")
        return None

def get_news_release_links(base_url, pages=1):
    links = []
    for page in range(1, pages + 1):
        url = f"{base_url}&page={page}"
        response = check_response_status(url)

        if response is None or response.status_code != 200:
            print(f"Skipping page {page} due to error.")
            continue

        soup = BeautifulSoup(response.content, 'html.parser')
        for link in soup.find_all('a', class_='news-release', href=True):
            full_url = f"https://www.prnewswire.com{link['href']}"  
            links.append(full_url)

    return links

def extract_news_content(url):
    response = check_response_status(url)

    if response is None or response.status_code != 200:
        print(f"Skipping URL due to error: {url}")
        return None

    soup = BeautifulSoup(response.content, 'html.parser')

    # Extract title
    headline_section = soup.find('div', class_='row detail-headline')
    if headline_section:
        title_element = headline_section.find('h1')
        title_text = title_element.get_text(strip=True) if title_element else "No title found"
    else:
        title_text = "No title found"

    # Extract date
    date_element = soup.find('p', class_='mb-no')
    date_text = date_element.get_text(strip=True) if date_element else "No date found"

    # Extract "News provided by"
    news_provider_section = soup.find('div', class_='col-lg-8 col-md-8 col-sm-7 swaping-class-left')
    if news_provider_section:
        provider_element = news_provider_section.find('strong')
        news_provided_by = provider_element.get_text(strip=True) if provider_element else "No provider found"
    else:
        news_provided_by = "No provider found"

    # Extract body
    body_section = soup.find('div', class_='col-lg-10 col-lg-offset-1')
    body_section_2 = soup.find('div', class_='col-sm-10 col-sm-offset-1')
    # Initialize an empty string to hold the body text
    body_text = ""

    # If body_section exists, add its text to body_text
    if body_section:
        body_text += body_section.get_text(separator='\n', strip=True)

    # If body_section_2 exists, concatenate its text to body_text
    if body_section_2:
        body_text += "\n" + body_section_2.get_text(separator='\n', strip=True)

    # If neither section exists, set a default message
    if not body_text:
        body_text = "No body content found"

    return {
        "url": url,
        "title": title_text,
        "date": date_text,
        "news_provided_by": news_provided_by,
        "body": body_text
    }


base_url = "https://www.prnewswire.com/search/news/?keyword=MULTIPLE%20SCLEROSIS"


news_release_links = get_news_release_links(base_url, pages=16)


news_contents = []
for link in news_release_links:
    content = extract_news_content(link)
    if content:
        news_contents.append(content)

# Save the extracted data to an Excel file
df = pd.DataFrame(news_contents) 
excel_file = "www.prnewswire.com_MULTIPLE SCLEROSIS.xlsx"
df.to_excel(excel_file, index=False)

print(f"Data has been saved to {excel_file}.")
