# Save Data using Selenium

Failed to find API for data from https://ocrdata.ed.gov/flex/Reports.aspx?type=school
<br>Using Python library Selenium to control Chrome browser and automate typing, clicking, and saving.

In [None]:
from pathlib import Path
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

## Download Discrimination Specific Data

In [None]:
def grab_data(city, state, new_path):
    retries = 0
    while retries < 4:
        try:
            # Using Chrome to access web
            driver = webdriver.Chrome()

            # Open the website
            driver.get('https://ocrdata.ed.gov/flex/Reports.aspx?type=school')

            # Find city box
            city_box = driver.find_element_by_id("ctl00_div_ucSearch_tbSearchCity")

            # Send city information
            city_box.send_keys(city)

            # Find State option box and select desired State
            el = driver.find_element_by_id('ctl00_div_ucSearch_ddlSearchState')
            for option in el.find_elements_by_tag_name('option'):
                if option.text == state:
                    option.click()
                    break

            # Find search button
            search_button = driver.find_element_by_name('ctl00$div$ucSearch$btnSearchParams')
            search_button.click()

            # Find I Accept button
            buttons = driver.find_elements_by_xpath("//*[contains(text(), 'I Accept')]")
            for btn in buttons:
                btn.click()  

            # Explicit Wait
            bully_box = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, "ctl00_div_ucReportsList_reportGroupRepeater_ctl02_reportListRepeater_ctl01_reportLink")))
            bully_box.click()

            # Custom function for Explicit Wait
            # The Export button is available before it is "clickable", need to wait
            def find(driver):
                export_button = driver.find_element_by_name('ctl00$div$ucReportViewer$btnExport')
                if export_button:
                    try: 
                        export_button.click()
                        return True
                    except:
                        return False
                else:
                    return False

            secs = 120
            export_button = WebDriverWait(driver, secs).until(find)

            # Wait until file dowloads
            download_path = Path('/Users/kevinwebb/Downloads/')
            csv_list = [csv_file for csv_file in download_path.glob('*.csv')]
            while not csv_list:
                csv_list = [csv_file for csv_file in download_path.glob('*.csv')]

            # Move and rename file
            csv_path = csv_list[0]
            city_csv = city + ".csv"
            csv_path.rename(new_path + city_csv)
            csv_path = csv_list[0]
            if csv_path.exists():
                csv_path.unlink()

            driver.quit()
            break

        except Exception as e:
            print("{} Attempt: {}".format(city, retries))
            driver.quit()
            retries += 1

In [None]:
# Read in all cities in the USA
# This is needed to grab data from every city from the Civil Rights Data Collection
geo_df = pd.read_csv('https://raw.githubusercontent.com/grammakov/USA-cities-and-states/master/us_cities_states_counties.csv',delimiter="|")

# We'll be examing Georgia in this analysis
ga_cities = geo_df[geo_df["State short"] == "GA"].City.unique()

In [None]:
for city in ga_cities:
    grab_data(city,"Georgia","georgia/")

## Download Teacher Specific Data

In [None]:
def grab_teacher_data(city, state, new_path):
    retries = 0
    while retries < 4:
        try:
            # Using Chrome to access web
            driver = webdriver.Chrome()

            # Open the website
            driver.get('https://ocrdata.ed.gov/flex/Reports.aspx?type=school')

            # Find city box
            city_box = driver.find_element_by_id("ctl00_div_ucSearch_tbSearchCity")

            # Send city information
            city_box.send_keys(city)

            # Find State option box and select desired State
            el = driver.find_element_by_id('ctl00_div_ucSearch_ddlSearchState')
            for option in el.find_elements_by_tag_name('option'):
                if option.text == state:
                    option.click()
                    break

            # Find search button
            search_button = driver.find_element_by_name('ctl00$div$ucSearch$btnSearchParams')
            search_button.click()

            # Find I Accept button
            buttons = driver.find_elements_by_xpath("//*[contains(text(), 'I Accept')]")
            for btn in buttons:
                btn.click()  

            # Explicit Wait
            try:
                bully_box = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, "ctl00_div_ucReportsList_reportGroupRepeater_ctl11_reportListRepeater_ctl00_reportLink")))
            except:
                print("inner")
                buttons = driver.find_elements_by_xpath("//*[contains(text(), 'No entities found. Please revise search criteria and try again.')]")
                if buttons:
                    break
            bully_box.click()

            # Custom function for Explicit Wait
            # The Export button is available before it is "clickable", need to wait
            def find(driver):
                export_button = driver.find_element_by_name('ctl00$div$ucReportViewer$btnExport')
                if export_button:
                    try: 
                        export_button.click()
                        return True
                    except:
                        return False
                else:
                    return False

            secs = 120
            export_button = WebDriverWait(driver, secs).until(find)

            # Wait until file dowloads
            download_path = Path('/Users/kevinwebb/Downloads/')
            csv_list = [csv_file for csv_file in download_path.glob('*.csv')]
            while not csv_list:
                csv_list = [csv_file for csv_file in download_path.glob('*.csv')]

            # Move and rename file
            csv_path = csv_list[0]
            city_csv = city + ".csv"
            csv_path.rename((new_path / city_csv).as_posix())
            csv_path = csv_list[0]
            if csv_path.exists():
                csv_path.unlink()

            driver.quit()
            break

        except Exception as e:
            print("{} Attempt: {}".format(city, retries))
            driver.quit()
            retries += 1

In [None]:
# Read in all cities in the USA
# This is needed to grab data from every city from the Civil Rights Data Collection
geo_df = pd.read_csv('https://raw.githubusercontent.com/grammakov/USA-cities-and-states/master/us_cities_states_counties.csv',delimiter="|")

# We'll be examing Georgia in this analysis
ga_cities = geo_df[geo_df["State short"] == "GA"].City.unique()

teacher_dir = Path("georgia/teacher/")
if not teacher_dir.exists():
    teacher_dir.mkdir()
    
for city in ga_cities:
    grab_teacher_data(city,"Georgia",teacher_dir)
    