# Selenium Modules

https://selenium-python.readthedocs.io

https://pypi.org/project/webdriver-manager/

# xpath tutorial

https://www.youtube.com/watch?v=NhG__BL8zFo&ab_channel=AutomationZone

pdf = https://www.red-gate.com/simple-talk/wp-content/uploads/imported/1269-Locators_groups_1_0_2.pdf?file=4938

In [None]:
# Using Chrome
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

In [None]:
# Use with Chromium
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromiumService
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.utils import ChromeType

driver = webdriver.Chrome(service=ChromiumService(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install()))

In [None]:
# Use with Firefox
from selenium import webdriver
from selenium.webdriver.firefox.service import Service as FirefoxService
from webdriver_manager.firefox import GeckoDriverManager

driver = webdriver.Firefox(service=FirefoxService(GeckoDriverManager().install()))

In [None]:
# Hidden mode

options = Options()
options.headless = True

# Drive Settings
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

In [None]:
def access_website():
    """Go to access website"""
    driver.get(URL)

In [None]:
titles = list()
subtitles = list()
links = list()
my_dict = {'title': titles, 'subtitles': subtitles, 'link': links}

def extract_information():
    """Extracts the information of titles, subtitles and links of every pages"""
    containers = driver.find_elements(by='xpath', value='//div[@class="teaser__copy-container"]')

    for container in containers:
        title = container.find_element(by='xpath', value='./a/h2').text
        subtitle = container.find_element(by='xpath', value='./a/p').text
        link = container.find_element(by='xpath', value='./a').get_attribute('href')
        titles.append(title)
        subtitles.append(subtitle)
        links.append(link)

In [None]:
from datetime import datetime

def day_mouth_year():
    """ Return the date on format DD-MM-YYYY"""
    now = datetime.now()
    return now.strftime('%d%m%Y')

In [None]:
import os
import sys

PATH = os.path.dirname(sys.executable)

def filename():
    """Returns the final name of the file"""
    return os.path.join(PATH, f'headlines-{day_mouth_year()}.csv')

In [None]:
import pandas as pd

def export_to_csv():
    """Convert the dataframe file to format csv"""
    df_headlines = pd.DataFrame(my_dict)
    df_headlines.to_csv(filename())

In [None]:
# Alerts
driver.switch_to_alert().accept()
driver.switch_to_alert().dismiss()

In [None]:
# Accept the cookies and answer radio button questions and close and return to parent

try:
    iframe = driver.find_element(by='xpath', '//iframe[@title="xpath"]')
    driver.switch_to_frame(iframe)

    driver.find_element(by='xpath', '//').send_keys(Keys.ENTER)

    driver.switch_to.default_content()
except:
    pass

In [None]:
# disable notifications on browser

from selenium.webdriver.chrome.options import Options

option = Options()
option.add.argument('--disable-notifications')
driver = webdriver.Chrome(service=service, options=options)

In [None]:
# create a new tab

driver.execute_script('window.open("URL");')

# change to new tab
driver.switch_to.window(driver.window_handles[1])

# Automaçao de Notícia do site the sun

In [None]:
# selenium 4
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
from datetime import datetime
import pandas as pd
import os
import sys

In [None]:
# Hidden mode
options = Options()
options.headless = True

# Drive Settings
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

# Global Variables
WEBSITE = 'https://www.thesun.co.uk/sport/football/'
PATH = os.path.dirname(sys.executable)

titles = list()
subtitles = list()
links = list()
my_dict = {'title': titles, 'subtitles': subtitles, 'link': links}


def day_mouth_year():
    """ Return the date on format DD-MM-YYYY"""
    now = datetime.now()
    return now.strftime('%d%m%Y')


def access_website():
    """Go to access website"""
    driver.get(WEBSITE)


def extract_information():
    """Extracts the information of titles, subtitles and links of every pages"""
    containers = driver.find_elements(by='xpath', value='//div[@class="teaser__copy-container"]')

    for container in containers:
        title = container.find_element(by='xpath', value='./a/h2').text
        subtitle = container.find_element(by='xpath', value='./a/p').text
        link = container.find_element(by='xpath', value='./a').get_attribute('href')
        titles.append(title)
        subtitles.append(subtitle)
        links.append(link)


def filename():
    """Returns the final name of the file"""
    return os.path.join(PATH, f'headlines-{day_mouth_year()}.csv')


def export_to_csv():
    """Convert the dataframe file to format csv"""
    df_headlines = pd.DataFrame(my_dict)
    df_headlines.to_csv(filename())


if __name__ == '__main__':
    access_website()
    extract_information()
    export_to_csv()
    driver.quit()
