In [1]:
# Import packages for ...
## pulling data from XML and HTML files
from bs4 import BeautifulSoup 

## automating web browser interaction
from selenium import webdriver # module containing implementations of browser drivers
from webdriver_manager.chrome import ChromeDriverManager # Chrome driver 
from selenium.webdriver.support import expected_conditions as EC # method for writing code that waits until conditions are met 
from selenium.webdriver.support.ui import WebDriverWait # method for writing code that implements implicit or explicit waits
from selenium.webdriver.common.by import By # method for locating elements by their attributes
from selenium.webdriver import ActionChains # module for implementing browser interactions 

## data manipulation
import pandas as pd
from datetime import datetime
import numpy as np
import time
import os

## plotting
import matplotlib.pyplot as plt

In [2]:
# Open up a chrome page
driver = webdriver.Chrome(ChromeDriverManager().install())
url = "https://ida.ussc.gov/analytics/saw.dll?Dashboard"
driver.get(url)

[WDM] - Current google-chrome version is 84.0.4147
[WDM] - Get LATEST driver version for 84.0.4147


 


[WDM] - Driver [/Users/kate/.wdm/drivers/chromedriver/mac64/84.0.4147.30/chromedriver] found in cache


In [3]:
# Open a dropdown of a particular category, toggle a dropdown for a known value and update the dashboard
def one_checkbox(checkbox_value, check_status, category, val=1):
    """
    checkbox_value: "White", "Black", "Hispanic", "Other"
    checked_status: "true" or "None"
    category: "Race"
    val: for "Other" the val is either 1 or 0
    """
    
    num=filters.get(category)
    dropdown = driver.find_elements_by_xpath("//img[@src='/bicustom/res/s_IDA/master/selectdropdown_ena.png']")[num].click()
    time.sleep(2)
    
    if checkbox_value == "Other":
        if category == "Race":
            parent_elem = driver.find_elements_by_xpath("//div[@title='" + checkbox_value + "']")[val]
        elif category == "Crime Type":
            parent_elem = driver.find_elements_by_xpath("//div[@title='" + checkbox_value + "']")[val]
    else:
        parent_elem = driver.find_element_by_xpath("//div[@title='" + checkbox_value + "']")

    child_elements = parent_elem.find_element_by_xpath(".//*").find_element_by_xpath(".//*")

    if child_elements.get_attribute("type") == 'checkbox':
        print("Element is a checkbox")

        if check_status == child_elements.get_attribute("checked"):
            print("Checkbox status as expected: {}".format(checkbox_value))
        else:
            # Select the checkbox
            child_elements.click()
            # Click out so that the page can reload
            driver.find_element_by_xpath("//body").click()
            print("Checkbox updated: {}".format(checkbox_value)) 

    else:
        print("Element is not a checkbox")
        
    num=filters.get(category)
    dropdown = driver.find_elements_by_xpath("//img[@src='/bicustom/res/s_IDA/master/selectdropdown_ena.png']")[num].click()
    

## Import .py function

In [6]:
from sentencing_outcomes import *

In [5]:
# Reload the script
import sys, importlib
importlib.reload(sys.modules['sentencing_outcomes'])

<module 'sentencing_outcomes' from '/Users/kate/Documents/Data Science/dashboard-web-scraping/sentencing_outcomes.py'>

In [None]:
# use expand list when gathering data from tables
expand_list()

### Run stuff - Sentence Length

In [7]:
# Navigate to Sentencing Outcomes, Sentence Length page
nav_to_sentencingoutcomes(driver, "Sentence Length")

In [None]:
sentence_length_all = pd.DataFrame() # Instantiate a df

In [8]:
r = race[0] # spin through all
ct = crime_type[0] # spin through all
sentence_length_all['Race'] = r
sentence_length_all['Crime Type'] = ct

In [12]:
## Select the right filters
one_checkbox(r, "true", "Race", val=0) # toggle 

Element is a checkbox
Checkbox status as expected: White


In [11]:
one_checkbox(ct, "true", "Crime Type", val=0) # toggle

ElementNotInteractableException: Message: element not interactable: element has zero size
  (Session info: chrome=84.0.4147.125)


In [None]:
## Append the data to DF
sentence_length_all = sentence_length_all.append(sentence_length(driver, r, ct), ignore_index=True)

In [None]:
## Reset the filters - Race
toggle_dropdown(driver, 'Race')
time.sleep(5)
unselect_all(driver, race)

In [None]:
## Reset the filters - Crime Type
toggle_dropdown(driver, 'Crime Type')
time.sleep(5)
unselect_all(driver, crime_type)

In [None]:
sentence_length_all

# Beautiful Soup - Sentencing outcomes

## Plea Status

In [None]:
def all_plea_status(race):
    plea_status_all = pd.DataFrame()
    for i in race:
        tick_checkbox(i, "true", "Race")
        time.sleep(5)
        expand_list()
        plea_status_all = plea_status_all.append(plea_status(i), ignore_index=True)
        time.sleep(5)
        tick_checkbox(i, "None", "Race")
        time.sleep(5)
    return(plea_status_all)

In [None]:
race = ['White','Black','Hispanic', 'Other']
plea_status_all = all_plea_status(race)
plea_status_all

In [None]:
plea_status_all.to_csv('Plea Status All.csv')

## Sentence Type

In [None]:
def all_sentencing_type(race):
    plea_status_all = pd.DataFrame()
    for i in race:
        tick_checkbox(i, "true", "Race")
        time.sleep(5)
        expand_list()
        time.sleep(5)
        sentencing_type_all = sentencing_type_all.append(sentence_type(i), ignore_index=True)
        time.sleep(5)
        tick_checkbox(i, "None", "Race")
        time.sleep(5)
    return(sentencing_type_all)


In [None]:
race = ['White','Black','Hispanic', 'Other']
sentencing_type_all = all_sentencing_type(race)
sentencing_type_all

In [None]:
sentence_type_all = sentence_type_all.append(sentence_type("Other"), ignore_index=True)

In [None]:
sentence_type_all