# Web Scraping with Selenium

Define the URL with the data you want to scrape (dynamic tables) and the path to the Chrome driver

In [None]:
pathchrome = 'Insert path to Chrome driver here'
url = "Insert URL here

Read the URL and Chrome driver path from a config file
Unless a file_path is specified, the config file should be in the same folder as the script

In [None]:
def read_parameters(file_path):
    parameters = {}
    with open(file_path, 'r') as file:
        for line in file:
            key, value = line.strip().split(' = ')
            parameters[key] = value
    return parameters

config_file_path = 'config.txt'
parameters = read_parameters(config_file_path)

pathchrome = parameters['pathchrome']
url = parameters['url']

print(f"pathchrome: {pathchrome}")
print(f"url: {url}")


Start the web driver and navigate to the webpage
Attention - The user has to set the filter manually.
You will have 30 seconds to do so before the script starts scraping the data.
The scrapped data will be saved in an Excel file - change the name of the file if you want to save it under a different name or a different location.

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import openpyxl

# set up the web driver
chrome_service = Service(executable_path=pathchrome)
driver = webdriver.Chrome(service=chrome_service)

# navigate to the webpage
driver.get(url)

# wait for 30 seconds for the user to set the filter manually
time.sleep(30)

# wait for the element to load
#You need to specify the class name of the table you want to scrape - you can find it in the HTML code of the webpage using the Inspect tool.
wait = WebDriverWait(driver, 10)
element = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ui-grid-row')))

# scrape data from all pages
all_data = []
while True:
    # get the data from each row on the current page
    rows = driver.find_elements(By.CLASS_NAME, 'ui-grid-row')
    for row in rows:
        cells = row.find_elements(By.CLASS_NAME, 'ui-grid-cell-contents')
        data = [cell.text for cell in cells]
        all_data.append(data)

    # go to the next page if it exists
    try:
        next_button = driver.find_element(By.XPATH, '//button[@ng-click="vm.goToNextPage()"]')
        if next_button.is_enabled():
            next_button.click()
            time.sleep(3) # wait for the next page to load
        else:
            break # no more pages
    except:
        break # no more pages

# close the web driver
driver.quit()

# write the data to an Excel file
wb = openpyxl.Workbook()
ws = wb.active
for row in all_data:
    ws.append(row)
wb.save('file_name.xlsx')
