In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.options import Options
import pandas as pd
from datetime import datetime
import os
import sys

# Function to scrape news headlines from the given website
def scrape_news(website):
    try:
        # Set up headless Firefox WebDriver
        options = webdriver.FirefoxOptions()
        options.add_argument("--headless")
        driver = webdriver.Firefox(options=options)
        driver.maximize_window()
        
        # Open the website
        driver.get(website)
        
        # Find all news containers
        containers = driver.find_elements(By.XPATH, '//div[@class="teaser__copy-container"][./a]')
        
        titles = []
        subtitles = []
        links = []
        
        # Iterate through each news container
        for container in containers:
            try:
                # Extract title, subtitle, and link for each news item
                title = container.find_element(By.CSS_SELECTOR, 'a > span').get_attribute("data-original-text")
                subtitle = container.find_element(By.CSS_SELECTOR, 'a > h3').get_attribute("data-original-text")
                link = container.find_element(By.CSS_SELECTOR, 'a').get_attribute("href")
                titles.append(title)
                subtitles.append(subtitle)
                links.append(link)
            except Exception as e:
                # Handle any errors that occur during scraping individual news items
                print(f"Error while scraping news item: {e}")
        
        return titles, subtitles, links
    except Exception as e:
        # Handle any errors that occur during the entire scraping process
        print(f"Error occurred during scraping: {e}")
        return [], [], []

if __name__ == "__main__":
    # Define the website URL
    website = "https://www.thesun.co.uk/sport/football/"
    
    # Get current date in the format DDMMYYYY
    now = datetime.now()
    day_month_year = now.strftime("%d%m%Y")
    
    # Scrape news headlines from the website
    titles, subtitles, links = scrape_news(website)
    
    # Create a DataFrame from the scraped data
    dict = {'Titles': titles, 'Subtitles': subtitles, 'Links': links}
    headlines_df = pd.DataFrame(dict)
    
    # Generate the output file name based on current date
    file_name = f'headlines-{day_month_year}.csv'
    
    # Get the directory where the script is located
    script_dir = os.path.dirname(os.path.abspath(__file__))
    
    # Construct the full path for the output CSV file
    final_path = os.path.join(script_dir, file_name)
    
    # Save the DataFrame to a CSV file
    headlines_df.to_csv(final_path, index=False)

    driver.quit()
