In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait, Select

In [27]:
# Set up Chrome options
chrome_options = Options()
chrome_options.page_load_strategy = 'normal'

In [82]:
# Set up the Chrome driver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)
driver.set_page_load_timeout(30)

page_url = 'https://www.compare-school-performance.service.gov.uk/download-data'
try:
    driver.get(page_url)
except TimeoutException:
    print("The page took too long to load!")

# Step 1: Accept Cookies and Hide Message
try:
    # Wait for the "Accept Cookies" button to be clickable
    accept_cookies_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "acceptAnalyticsCookiesTrue"))
    )
    # Click the "Accept Cookies" button
    accept_cookies_button.click()
    print("Cookies accepted.")
    
    # Wait for the "Hide this message" button to be clickable
    hide_message_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, "confirm-cookie-settings"))
    )
    # Click the "Hide this message" button
    hide_message_button.click()
    print("Message hidden.")
except TimeoutException:
    print("Cookie-related buttons took too long to appear or be clickable.")
except Exception as e:
    print(f"Error interacting with cookie-related buttons: {e}")

Cookies accepted.
Message hidden.


In [83]:
# Step 2: Select year from the dropdown
try:
    # Wait for the dropdown to be present
    dropdown_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "form-control"))
    )
    # Create a Select object and choose a year (replace '2023' with the desired year)
    select = Select(dropdown_element)
    select.select_by_visible_text("2022 to 2023")  # Replace "2023" with the desired option
except TimeoutException:
    print("Dropdown menu took too long to load!")
except Exception as e:
    print(f"Error interacting with the dropdown: {e}")

# Step 3: Click the "Continue" button
try:
    # Wait for the button to be clickable
    continue_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CLASS_NAME, "button"))
    )
    continue_button.click()
except TimeoutException:
    print("Continue button took too long to become clickable!")
except Exception as e:
    print(f"Error interacting with the button: {e}")

# Step 4: Select Radio Button and Drill Down
try:
    # Wait for the radio button to be present
    radio_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "AllEnglandRadio"))
    )
    # Click the radio button
    radio_button.click()
    print("Radio button selected.")
except TimeoutException:
    print("Radio button took too long to load!")
except Exception as e:
    print(f"Error interacting with the radio button: {e}")

# Step 5: Click the "Continue" button again
try:
    # Wait for the button to be clickable
    continue_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CLASS_NAME, "button"))
    )
    continue_button.click()
    print("Continue button clicked.")
except TimeoutException:
    print("Continue button took too long to become clickable!")
except Exception as e:
    print(f"Error interacting with the continue button: {e}")

# Step 6: Select Checkbox and Drill Down
try:
    # Wait for the checkbox to be present
    checkbox = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "Datatype_gen_0"))
    )
    # Click the checkbox
    checkbox.click()
    print("Checkbox selected.")
except TimeoutException:
    print("Checkbox took too long to load!")
except Exception as e:
    print(f"Error interacting with the checkbox: {e}")

# Step 7: Click the "Continue" button again
try:
    # Wait for the button to be clickable
    continue_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CLASS_NAME, "button"))
    )
    continue_button.click()
    print("Continue button clicked.")
except TimeoutException:
    print("Continue button took too long to become clickable!")
except Exception as e:
    print(f"Error interacting with the continue button: {e}")

# Step 8: Click the Download Link
try:
    # Wait for the link to be present
    download_link = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "download-file-format-by-csv-link"))
    )
    # Click the download link
    download_link.click()
    print("CSV download link clicked.")
except TimeoutException:
    print("Download link took too long to load!")
except Exception as e:
    print(f"Error interacting with the download link: {e}")

# Step 9: Click the "Back to download" Button
try:
    # Wait for the "Back to download" button to be clickable
    back_to_download_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.LINK_TEXT, "Back to download"))
    )
    # Click the "Back to download" button
    back_to_download_button.click()
    print("Back to download button clicked.")
except TimeoutException:
    print("Back to download button took too long to become clickable!")
except Exception as e:
    print(f"Error interacting with the back to download button: {e}")

Radio button selected.
Continue button clicked.
Checkbox selected.
Continue button clicked.
CSV download link clicked.


# Cleaning Data