# Selenium to download CSV files

In [65]:

def request_me_death_records(start_date, end_date, download_dir):
    """Download deaths within provided date range from Medical Examiner
    
    Input:
        start_date: str (%m/%d/%Y)
        end_date: str (%m/%d/%Y)
        download_dir: str

    Returns:
        -- (CSV file downloaded to specified directory)
    """
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from pathlib import Path
    import datetime
    import shutil
    import os

    # Validate input parameters
    if not start_date:
        raise ValueError("start_date is a required parameter")

    try:
        datetime.datetime.strptime(start_date, '%m/%d/%Y').date()
    except:
        raise ValueError("start_date must be a date with format (%m/%d/%Y)")

    if not end_date:
        raise ValueError("end_date is a required parameter")

    try:
        datetime.datetime.strptime(end_date, '%m/%d/%Y').date()
    except:
        raise ValueError("end_date must be a date with format (%m/%d/%Y)")

    if not download_dir:
        raise ValueError("download_dir is a required parameter")

    try:
        Path(download_dir)
    except:
        raise ValueError("download_dir must be a filepath")


    # To prevent download dialog
    options = webdriver.FirefoxOptions()
    #options.add_argument('-profile', '/path/to/profile')
    options.add_argument('-headless')
    options.set_preference('browser.download.folderList', 2) # custom location
    options.set_preference('browser.download.manager.showWhenStarting', False)
    options.set_preference('browser.download.dir', download_dir)
    options.set_preference('browser.helperApps.neverAsk.saveToDisk', 'text/csv')

    # Start browser
    driver = webdriver.Firefox(options=options)
    driver.implicitly_wait(10)

    driver.get("https://mepublic.tarrantcounty.com/?linklocation=Iwantto&linkname=Case%20Findings/Press%20Releases%3C")
    assert "Tarrant County" in driver.title

    # Set the Start Date for search
    elem = driver.find_element(By.ID, "ucMainControl_txtFromDate")
    elem.clear()
    elem.send_keys(start_date)

    # Set End Date for search
    elem = driver.find_element(By.ID, "ucMainControl_txtToDate")
    elem.clear()
    elem.send_keys(end_date)

    # Submit search and download CSV file
    # The implicit_wait(10) appears to cover the search/download time well
    elem = driver.find_element(By.ID, "ucMainControl_btnFind").click()
    elem = driver.find_element(By.ID, "ucMainControl_btnDownloadTop").click()
    assert "No results found." not in driver.page_source

    # Close brower
    driver.close()

    # Rename and move downloaded data file to download directory
    start_dt = datetime.datetime.strptime(start_date, '%m/%d/%Y').strftime('%Y-%m-%d')
    end_dt = datetime.datetime.strptime(end_date, '%m/%d/%Y').strftime('%Y-%m-%d')

    filename = max([download_dir + "\\" + f for f in os.listdir(download_dir)], key=os.path.getctime)
    shutil.move(filename, 
                os.path.join(download_dir, f"me-deaths-{start_dt}-to-{end_dt}.csv"))

In [None]:
import pandas as pd
from pathlib import Path
import os
import time

# Compile input parameters
start_date_list = pd.date_range('2006-10-01','2023-12-31', 
              freq='MS').strftime("%m/%d/%Y").tolist()

end_date_list = pd.date_range('2006-10-01','2023-12-31', 
              freq='ME').strftime("%m/%d/%Y").tolist()

months_tuples = zip(start_date_list, end_date_list)

download_dir = str(Path(os.getcwd()).joinpath('data'))

# Iterate and download
for start_date, end_date in months_tuples:
    request_me_death_records(start_date, end_date, download_dir)
    time.sleep(2)

# Concatenate Data

In [92]:
from pathlib import Path
import pandas as pd
import numpy as np
import re

def manual_separation(bad_line):
    """Function for pandas.read_csv() when a bad-line is read.
    
    Some fields in the Medical Examiner's CSV files contain
    commas, throwing off pandas.read_csv(). This function
    simply joins those fields back together with two changes:

    (1) Removes comma (,) from EXAM_TYPE 'Incision(Head, Incision)'
    (2) Removes comma (,) from POLICE_DEPT if 'Durant,  OK Police'
    """
    bad_string = ', '.join(bad_line)
    # Turn: Incision(Head, Incision) 
    # Into: Incision((Head|Abdomen)
    good_string = re.sub(r'Incision\(([A-Za-z]+),\s*([A-Za-z]+)\)', 'Incision(\g<1>|\g<2>)', bad_string)
    # Single oddity
    good_string = re.sub(r'Durant,  OK Police Department', 'Durant OK Police Department', good_string)

    return good_string.split(',')


# Iterate over downloaded files, combining them into a single dataframe
df_list = list()
for i, fil in enumerate(Path('data').glob('me-deaths-*')):

    tmp_df = pd.read_csv(fil,
                         on_bad_lines=manual_separation,
                         engine="python")
    
    if tmp_df['Case #'].dtype == 'object':
        tmp_df = pd.read_csv(fil,
                             index_col=False)

    df_list.append(tmp_df)

# Combine list of dataframes
me = pd.concat(df_list)
me.reset_index(drop=True, inplace=True)

# Manual corrections (READY_FOR_TRANSPORT sometimes shifts a field in the CSV)
me.loc[(me['Report Completed'].str.strip().isin(['Yes', 'No'])) & 
       (~me['Ready for Transport'].str.strip().isin(['Released', 'No'])), 'Type of Exam'] = me['Type of Exam'] + '|' + me['Ready for Transport']

me.loc[(me['Report Completed'].str.strip().isin(['Yes', 'No'])) & 
       (~me['Ready for Transport'].str.strip().isin(['Released', 'No'])), 'Ready for Transport'] = me['Unnamed: 23']

# Strip extra whitespace
for col in me.columns:
    if me[col].dtype == 'object':
        me[col] = me[col].str.strip().replace('\s+', ' ', regex=True)
        me[col] = me[col].replace({'':np.NaN})

# Rename fields
me.columns = ['CASE_NO', 'COUNTY', 'NAME', 'DATE_OF_BIRTH', 'AGE', 'DATE_OF_DEATH',
              'TIME_OF_DEATH', 'RACE', 'SEX', 'POLICE_DEPT', 'POLICE_SVC_NO',
              'DECEASED_ADDRESS', 'OCCURRED_LOCATION', 'PLACE_OF_DEATH',
              'PLACE_OF_DEATH_ADDRESS', 'CAUSE_OF_DEATH', 'MANNER_OF_DEATH',
              'PROSECTOR', 'EDR_NO', 'CERT_OF_DEATH_AMENDMENT',
              'REPORT_COMPLETED', 'EXAME_TYPE', 'TRANSPORT_READY',
              'Unnamed: 23']

me.drop(columns = 'Unnamed: 23', inplace=True)

# Write to disk
me.to_csv('data/medical-examiner-data-through-2023.tsv', sep='\t', index=False)

In [121]:
me

Unnamed: 0,CASE_NO,COUNTY,NAME,DATE_OF_BIRTH,AGE,DATE_OF_DEATH,TIME_OF_DEATH,RACE,SEX,POLICE_DEPT,...,PLACE_OF_DEATH,PLACE_OF_DEATH_ADDRESS,CAUSE_OF_DEATH,MANNER_OF_DEATH,PROSECTOR,EDR_NO,CERT_OF_DEATH_AMENDMENT,REPORT_COMPLETED,EXAME_TYPE,TRANSPORT_READY
0,611544,Tarrant,Cooper Shaunte M.,9/29/1970,36.0,10/1/2006,3:51 AM,Black,F,,...,Arlington Memorial Hospital,800 West Randol Mill Road Arlington Texas 76012,SICKLE CELL DISEASE,NATURAL,Carl Wigren M.D.,,Completed,Yes,Autopsy,Released
1,611549,Tarrant,Magaw Paul,9/23/1952,54.0,10/1/2006,1:38 PM,White,M,,...,North Hills Medical Center,4401 Booth Calloway Road North Richland Hills ...,HYPERTENSIVE ATHEROSCLEROTIC CARDIOVASCULAR DI...,NATURAL,Gary Sisler,,Completed,Yes,Incision(Head|Abdomen),Released
2,611550,Tarrant,Browner B H,6/26/1954,52.0,10/1/2006,12:48 PM,Black,M,Fort Worth Police Department,...,bedroom floor,3318 S. Jennings #A Fort Worth Texas 00000,HYPERTENSIVE CARDIOVASCULAR DISEASE,NATURAL,Lloyd White M.D.,,,Yes,Incision(Chest Only),Released
3,611551,Parker,Stuckey Adonna Sheryce,10/4/1953,52.0,10/1/2006,10:20 AM,White,F,Parker County Sheriff's Office,...,Private residence Decedent found in her bedroom.,615 Bennett Road Millsap Texas 76066,ACUTE MORPHINE INTOXICATION,ACCIDENT,Carl Wigren M.D.,,Completed,Yes,Autopsy,Released
4,611553,Tarrant,Epley Cyliss T,3/18/2005,1.0,10/1/2006,7:12 PM,White,M,Keller Police Department,...,ER,801 7th Ave Fort Worth Texas 76104,FRESH WATER DROWNING Due to:FELL INTO SWIMMING...,ACCIDENT,Lloyd White M.D.,,,Yes,Autopsy,Released
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51226,2323582,Tarrant,Dotson Deandre Deshon,11/10/1989,34.0,12/31/2023,1:06 PM,Black,M,Fort Worth Police Department,...,Trauma ICU,1575 S Main St Fort Worth Texas 76119,GUNSHOT WOUND OF THE HEAD,HOMICIDE,Michael Chaump.M.D,,,No,Autopsy,Released
51227,2323583,Tarrant,Evans Shawn Patrick,4/16/1982,41.0,12/31/2023,5:22 PM,White,M,Fort Worth Police Department,...,extended stay hotel,3261 NE Loop 820 123 Fort Worth Texas 76137,MIXED DRUG TOXICITY (FENTANYL METHAMPHETAMINE),ACCIDENT,Stacey Murthy M.D.,,Completed,Yes,External Exam,Released
51228,2323584,Parker,Holmes Robby Monroe,3/11/2000,23.0,12/31/2023,2:58 PM,White,M,Parker County Sheriff's Office,...,Private residence,4950 Lonestar Rd. Unincorporated Parker County...,PENDING,PENDING,David Rosenbaum M.D.,,,No,Autopsy,Released
51229,2323588,Parker,Owens Esther,4/28/1953,70.0,12/31/2023,6:29 PM,White,F,Parker County Sheriff's Office,...,Private residence,132 Apollo Drive Unincorporated Parker County ...,PENDING,PENDING,Michael Chaump.M.D,,,No,External Exam,Released
