### Gathering Data from NamUs

NamUs is the National Missing and Unidentified Persons System, which is financed by the United States Department of Justice. NamUs does not provide an API, however it does provide a searchable interface.

In this notebook, we will use Selenium with beautifulSoup to retrieve and save data from NamUs as CSV.

In [1]:
# Import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.select import Select
from webdriver_manager.chrome import ChromeDriverManager
import time
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
# initialize global driver
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)




In [3]:
# constants
LOCATION = ['Maine']
COLUMNS = [
    'Case Number',
    'DLC',
    'Last Name',
    'First Name',
    'Missing Age',
    'City',
    'County',
    'State',
    'Sex',
    'Race',
    'Date Modified'
]

In [4]:
# find state filter
driver.get("https://www.namus.gov/MissingPersons/Search")
section_on_circumstances = driver.find_element_by_id('Circumstances')
labels_in_section = section_on_circumstances.find_elements_by_tag_name('label')

state_input_box = None

for label in labels_in_section:
    if (label.text == "State"):
        state_input_box = label.find_element_by_tag_name('input')
        # add state filter
        for state in LOCATION:
            state_input_box.send_keys(state)
            state_input_box.send_keys(Keys.ENTER)


In [5]:
# navigate to list view
buttons_action = driver.find_elements_by_class_name('button-box')[0].find_elements_by_tag_name('input')
buttons_action[1].click()

In [6]:
# show 100 results at a time
time.sleep(1.5)
dropdown_selection_results = driver.find_element_by_xpath("//label/span[contains(text(),'Results')]/following-sibling::select")
Select(dropdown_selection_results).select_by_value('100')
time.sleep(1.5)

In [7]:
# get row results and store in dataframes
df = pd.DataFrame(columns=COLUMNS)
soup = BeautifulSoup(driver.page_source, 'lxml')
rows = soup.find('div', class_='ui-grid-canvas').contents

for row in rows:
    if row != ' ':
        cells = row.find_all('div', class_='ui-grid-cell-contents')
        cells_text = map(lambda cell: cell.text.strip(), cells)
        new_df = pd.DataFrame([list(cells_text)], columns=COLUMNS)
new_df


Unnamed: 0,Case Number,DLC,Last Name,First Name,Missing Age,City,County,State,Sex,Race,Date Modified
0,MP3976,05/11/1986,Moreau,Kimberly,17 Years,Jay,Franklin,ME,Female,White / Caucasian,06/24/2022
