In [19]:
from datetime import datetime as dt
import re
import os
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.chrome.options import Options


from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

## Overview - Using Selenium to scrape Florida Election winners from 1996-current

# Setup
#### Setting up driver with download path/options

In [47]:
# Set up Chrome options for automated downloads
chrome_options = Options()
download_path = ""  # put what is relevant for you
# originally set custom download path, but defaulted to download file to reduce unneccesary bug fixing
chrome_options.add_experimental_option("prefs", {
    "download.default_directory": download_path,
    "download.prompt_for_download": False,
    "safebrowsing.enabled": True
})

driver = webdriver.Chrome(options=chrome_options)

#### Loading initial URL, switching to the correct frame

In [50]:
driver.get("https://results.elections.myflorida.com/")
driver.switch_to.frame("WinRaces")

#### Getting list of relevant elections from drop down element

In [36]:
dropdown_element = Select(driver.find_element(By.NAME, 'SelectElection'))

list_of_elections = [option.text for option in dropdown_element.options]
list_of_elections2 = list_of_elections[1:-31] # removing all elections before 1996


relevant_elections = []
for year in list_of_elections2:
    if year.count("Pres") == 0 :   ## this is done to filter out presidential primaries 
        relevant_elections.append(year)
    
for elect in relevant_elections:
    print(elect)

2024 Primary Election
2022 General Election
2022 Primary Election
2020 General Election
2020 Primary Election
2018 General Election
2018 Primary Election
2016 General Election
2016 Primary Election
2014 General Election
2014 Primary Election
2012 General Election
2012 Primary Election
2010 General
2010 Primary
2008 General
2008 Primary
2006 General
2006 Primary
2004 General
2004 Primary
2002 General
2002 Primary
2000 General
2000 Runoff
2000 Primary
1998 General
1998 Runoff
1998 Primary
1996 General
1996 Runoff
1996 Primary


# Main Loop
#### Goes through each election option above by
1. Selecting next relevant election in drop down menu
2. Navigating to each election's relevant Download page
3. Downloading and renaming the file to be easier to read
4. Resetting the driver to the election selection page to prepare for the next loop

In [39]:
is_runoff = True 
for election in relevant_elections:
    
    dropdown_element.select_by_visible_text(election)
    
    
    #updating the driver to the new URL and finding relevant elements
    driver.get(driver.current_url)
    time.sleep(2)
    driver.switch_to.frame("WinRaces")
    dropdown_element = Select(driver.find_element(By.NAME, 'SelectElection'))
    partial_link = driver.find_element(By.PARTIAL_LINK_TEXT, "Download")
    
    #clicking "Donwload" to go to each election's download page, updating driver to new window
    partial_link.click()
    time.sleep(2)
    driver.switch_to.window(driver.window_handles[1])
    download_button = driver.find_element(By.CSS_SELECTOR, 'input[type="SUBMIT"][name="FormsButton2"][value="Download"]')
    #download_button.click()
    
  
    #extracting relevant information from substring to determine standard download name and replacement download name
        #website downloads all files by naming them by corresponding election date ex: '11082022Election.txt'
    election_date_url = driver.current_url

    match = re.search(r"ElectionDate=([^&]*)&DATA", election_date_url)
    if match:
        election_date = match.group(1)
    else:
        print("No match found")
        print("No match found")
            # Reformatting the  to MM/DD/YYYY with leading zeros
    date_obj = dt.strptime(election_date, "%m/%d/%Y")
    standardized_date = date_obj.strftime("%m/%d/%Y")
    og_file_name = standardized_date.replace("/","") + "Election.txt"
    new_file_name = election.replace(" ",'').replace("Election","") + ".txt"

    #downloading and renaming file

    download_button.click()
    time.sleep(2)
    original_file = os.path.join(download_path, og_file_name)  #original file that will be replaced
    new_file = os.path.join(download_path, new_file_name)

    if os.path.exists(original_file):  ## replacing old file name with proper file name
        os.rename(original_file, new_file)
    else:
        print("Download failed or file not found.")

    #closing download tab and updating to elections tab
    driver.close()
    driver.switch_to.window(driver.window_handles[0])
    driver.switch_to.frame("WinRaces")
    dropdown_element = Select(driver.find_element(By.NAME,'SelectElection'))

    
    #

In [52]:
driver.close()

# Testing

#### Single test setting below
series of tests to create the for loop functionality for a single option

In [13]:
dropdown_element.select_by_visible_text(relevant_elections[0])
time.sleep(1)
driver.get(driver.current_url)
time.sleep(2)
driver.switch_to.frame("WinRaces")
dropdown_element = Select(driver.find_element(By.NAME, 'SelectElection'))

In [14]:
partial_link = driver.find_element(By.PARTIAL_LINK_TEXT, "Download")
partial_link.click()

In [17]:
driver.switch_to.window(driver.window_handles[1])


In [19]:
download_button = driver.find_element(By.CSS_SELECTOR, 'input[type="SUBMIT"][name="FormsButton2"][value="Download"]')
download_button.click()

In [21]:
## extracting the date substring from current URL, renaming file
election_date_url = driver.current_url

match = re.search(r"ElectionDate=([^&]*)&DATA", election_date_url)

if match:
    election_date = match.group(1)
    print("Original Date:",election_date)
else:
    print("No match found")

# Reformatting the  to MM/DD/YYYY with leading zeros
date_obj = dt.strptime(election_date, "%m/%d/%Y")
# Reformat to MM/DD/YYYY with leading zeros
standardized_date = date_obj.strftime("%m/%d/%Y")
print("Standardized Date: ", standardized_date)

Original Date: 11/8/2022
Standardized Date:  11/08/2022


In [23]:
og_file_name = standardized_date.replace("/","") + "Election.txt"
print(og_file_name)

11082022Election.txt


#### Functionality tests
1. changing drop down menu options and updating elements to correspond with the new URL
2. incorporating above features into loop
3. extracting URL date substrings in order to find the downloaded files and rename them 

In [None]:
## one by one selection
dropdown_element.select_by_visible_text(relevant_elections[0])
time.sleep(1)
print(driver.current_url, " ",relevant_elections[0])

driver.get(driver.current_url)
time.sleep(2)
driver.switch_to.frame("WinRaces")
dropdown_element = Select(driver.find_element(By.NAME, 'SelectElection'))
dropdown_element.select_by_visible_text(relevant_elections[1])
print(driver.current_url," ",relevant_elections[1])

In [None]:
##working for loop
for election in relevant_elections:
    dropdown_element.select_by_visible_text(election)
    time.sleep(1)
    driver.get(driver.current_url)
    time.sleep(1)
    driver.switch_to.frame("WinRaces")
    dropdown_element = Select(driver.find_element(By.NAME, 'SelectElection'))

In [None]:
## extracting the date substring from current URL, renaming file
election_date_url = driver.current_url

if match:
    election_date = match.group(1)
    print("Original Date:",election_date)
else:
    print("No match found")

# Reformatting the  to MM/DD/YYYY with leading zeros
date_obj = dt.strptime(election_date, "%m/%d/%Y")
# Reformat to MM/DD/YYYY with leading zeros
standardized_date = date_obj.strftime("%m/%d/%Y")
print("Standardized Date: ", standardized_date)

In [None]:
#file renaming logic

original_file = os.path.join(download_path, "'Expend.txt'")  #original file that will be replaced
    # the website downloads all files as 'Expend.txt' 
    
    download_name = cycle.replace(' ','_').replace(':','').replace('/','&').strip() + '.txt' #actual file name
    new_file = os.path.join(download_path, download_name)
   
    if os.path.exists(original_file):  ## replacing old file name with proper file name
        os.rename(original_file, new_file)
    else:
        print("Download failed or file not found.")