In [1]:
# Import packages for ...
## pulling data from XML and HTML files
from bs4 import BeautifulSoup 

## automating web browser interaction
from selenium import webdriver # module containing implementations of browser drivers
from webdriver_manager.chrome import ChromeDriverManager # Chrome driver 
from selenium.webdriver.support import expected_conditions as EC # method for writing code that waits until conditions are met 
from selenium.webdriver.support.ui import WebDriverWait # method for writing code that implements implicit or explicit waits
from selenium.webdriver.common.by import By # method for locating elements by their attributes
from selenium.webdriver import ActionChains # module for implementing browser interactions 

## data manipulation
import pandas as pd
from datetime import datetime
import numpy as np
import time

## plotting
import matplotlib.pyplot as plt

In [103]:
# Open up a chrome page
driver = webdriver.Chrome(ChromeDriverManager().install())
url = "https://ida.ussc.gov/analytics/saw.dll?Dashboard"
driver.get(url)

[WDM] - Current google-chrome version is 84.0.4147
[WDM] - Get LATEST driver version for 84.0.4147
[WDM] - Driver [/Users/kate/.wdm/drivers/chromedriver/mac64/84.0.4147.30/chromedriver] found in cache


 


In [217]:
# Run the following once so that the dropdown with checkboxes opens up
filters = {"Fiscal Year":0,"Ciruit":1,"State":2,"District":3,"Race":4,
          "Gender":5,"Age":6,"Citizenship":7,"Education":8,"Crime Type":9,
          "Category":10}

crime_type = ["Administration of Justice", "Antitrust","Arson","Assault","Bribery/Corruption",
              "Burglary/Trespass","Child Pornography","Commercialized Vice","Drug Possession",
              "Drug Trafficking","Environmental","Extortion/Racketeering","Firearms",
              "Food and Drug","Forgery/Counter/Copyright","Fraud/Theft/Embezzlement","Immigration",
              "Individual Rights","Kidnapping","Manslaughter","Money Laundering","Murder",
              "National Defense","Obscenity/Other Sex Offenses","Prison Offenses","Robbery","Sexual Abuse",
              "Stalking/Harassing","Tax"]
state = ["Alabama","Alaska","Arizona","Arkansas","California","Colorado","Connecticut","Delaware",
         "District Of Columbia","Florida","Georgia","Guam","Hawaii","Idaho","Illinois","Indiana","Iowa",
         "Kansas","Kentucky","Louisiana","Maine","Mariana Islands","Maryland","Massachusetts","Michigan",
         "Minnesota","Mississippi","Missouri","Montana","Nebraska","Nevada","New Hampshire","New Jersey",
         "New Mexico","New York","North Carolina","North Dakota","Ohio","Oklahoma","Oregon","Pennsylvania",
         "Puerto Rico","Rhode Island","South Carolina","South Dakota","Tennessee","Texas","Utah","Vermont",
         "Virgin Islands","Virginia","Washington","West Virginia","Wisconsin","Wyoming"]


# Open a dropdown of a particular category, toggle a dropdown for a known value and update the dashboard
def tick_checkbox(checkbox_value, check_status, category):
    """
    elem list options are "White", "Black", "Hispanic", "Other"
    checked parameter options are "true", "None"
    """
    
    num=filters.get(category)
    dropdown = driver.find_elements_by_xpath("//img[@src='/bicustom/res/s_IDA/master/selectdropdown_ena.png']")[num].click()
    time.sleep(1)
    
    if category == "Race":
        parent_elem = driver.find_elements_by_xpath("//div[@title='" + checkbox_value + "']")[0]
    elif category == "Crime Type":
        parent_elem = driver.find_elements_by_xpath("//div[@title='" + checkbox_value + "']")[1]
    else:
        parent_elem = driver.find_element_by_xpath("//div[@title='" + checkbox_value + "']")

    child_elements = parent_elem.find_element_by_xpath(".//*").find_element_by_xpath(".//*")

    if child_elements.get_attribute("type") == 'checkbox':
        print("Element is a checkbox")

        if check_status == child_elements.get_attribute("checked"):
            print("Checkbox status as expected: {}".format(checkbox_value))
        else:
            # Select the checkbox
            child_elements.click()
            # Click out so that the page can reload
            driver.find_element_by_xpath("//body").click()
            print("Checkbox updated: {}".format(checkbox_value)) 

    else:
        print("Element is not a checkbox")

# Select all or unselect elements in the dropdown
def select_all(checkbox_list, check_status): 
    """
    This functions selects/unselects all checkboxes in the drop down.
    Before running this function make sure the drop down list is open.
    checkbox_list: refers to the names of checkbox elements
    check_status: "true" or "None"
    """
    for i in checkbox_list:
        parent_elem = driver.find_element_by_xpath("//div[@title='" + i + "']")
        child_elements = parent_elem.find_element_by_xpath(".//*").find_element_by_xpath(".//*")

        if child_elements.get_attribute("type") == 'checkbox':
            if check_status == child_elements.get_attribute("checked"):
                print("Checkbox status as expected: {}".format(i))
            else:
                # Select the checkbox
                child_elements.click()
                print("Checkbox updated: {}".format(i)) 
        else:
            print("Element is not a checkbox")
    
    # Click out so that the page can reload
    driver.find_element_by_xpath("//body").click()

# Opens up the drop down list
def open_category(category):
    num=filters.get(category)
    driver.find_elements_by_xpath("//img[@src='/bicustom/res/s_IDA/master/selectdropdown_ena.png']")[num].click()

# Expands the table so that all rows are visible
def expand_list():
    # Expand the list so that all crimes are visible
    try:
        driver.find_element_by_xpath("//img[contains(@src,'/analytics/res/v-*xNdJt5L9yA/s_blafp/viewui/pivot/showallrows_ena.png')]").click() 
        #driver.find_element_by_xpath("//img[contains(@src,'/analytics/res/v-*xNdJt5L9yA/s_blafp/viewui/pivot/showallrows_ena.png')]").click() 
    except AttributeError:
        pass
    except NameError:
        pass

In [216]:
tick_checkbox("Other", "true", "Race")

Element is a checkbox
Checkbox updated: Other


In [159]:
open_category('Crime Type')
time.sleep(5)
select_all(crime_type, "true")

Checkbox status as expected: Administration of Justice
Checkbox status as expected: Antitrust
Checkbox status as expected: Arson
Checkbox status as expected: Assault
Checkbox status as expected: Bribery/Corruption
Checkbox status as expected: Burglary/Trespass
Checkbox status as expected: Child Pornography
Checkbox status as expected: Commercialized Vice
Checkbox status as expected: Drug Possession
Checkbox status as expected: Drug Trafficking
Checkbox status as expected: Environmental
Checkbox status as expected: Extortion/Racketeering
Checkbox status as expected: Firearms
Checkbox status as expected: Food and Drug
Checkbox status as expected: Forgery/Counter/Copyright
Checkbox status as expected: Fraud/Theft/Embezzlement
Checkbox status as expected: Immigration
Checkbox status as expected: Individual Rights
Checkbox status as expected: Kidnapping
Checkbox status as expected: Manslaughter
Checkbox status as expected: Money Laundering
Checkbox status as expected: Murder
Checkbox status

In [218]:
expand_list()

# Beautiful Soup

## Sentecing Type

In [219]:
def sentence_type(race):
    
    page_source = driver.page_source
    soup = BeautifulSoup(page_source, 'lxml')

    data = []
    # Find the table containing Sentence Type by Type of Crime
    table = soup.find('td', attrs={'class':'PTChildPivotTable'})
    # Append the body only
    table_body = table.find('tbody')

    # Spin through every row extracting information
    rows = table_body.find_all('tr')
    for row in rows:
        cols = row.find_all('td')
        cols = [ele.text.strip() for ele in cols]
        data.append([ele for ele in cols]) # Do not get rid of empty values

    sentence_type_columns = ['Race', 'Crime', 'Total N', 'Total %', 'Fine Only N', 'Fine Only %',
         'Prison Only N', 'Prison Only %', 'Prison and Alternatives N',
         'Prison and Alternatives %', 'Probation Only N', 'Probation Only %',
         'Probation and Alternatives N', 'Probation and Alternatives %']

    df_sentece_type = pd.DataFrame(data[16::2],columns=sentence_type_columns)
    df_sentece_type['Race'] = race
    
    return df_sentece_type


In [220]:
sentence_type_all = sentence_type_all.append(sentence_type("Other"), ignore_index=True)

In [221]:
sentence_type_all

Unnamed: 0,Race,Crime,Total N,Total %,Fine Only N,Fine Only %,Prison Only N,Prison Only %,Prison and Alternatives N,Prison and Alternatives %,Probation Only N,Probation Only %,Probation and Alternatives N,Probation and Alternatives %
0,Black,Grand Total,15128,100.0%,53,0.4%,13416,88.7%,498,3.3%,800,5.3%,361,2.4%
1,Black,Administration of Justice,152,100.0%,-,,104,68.4%,11,7.2%,32,21.1%,5,3.3%
2,Black,Arson,23,100.0%,-,,21,91.3%,1,4.3%,1,4.3%,-,
3,Black,Assault,192,100.0%,1,0.5%,175,91.1%,2,1.0%,13,6.8%,1,0.5%
4,Black,Bribery/Corruption,103,100.0%,2,1.9%,72,69.9%,7,6.8%,16,15.5%,6,5.8%
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,Other,Robbery,53,100.0%,-,,50,94.3%,2,3.8%,-,,1,1.9%
107,Other,Sexual Abuse,180,100.0%,-,,165,91.7%,13,7.2%,1,0.6%,1,0.6%
108,Other,Stalking/Harassing,42,100.0%,-,,38,90.5%,4,9.5%,-,,-,
109,Other,Tax,46,100.0%,2,4.3%,27,58.7%,2,4.3%,10,21.7%,5,10.9%
