### Gathering Data from NamUs

NamUs is the National Missing and Unidentified Persons System, which is financed by the United States Department of Justice. NamUs does not provide an API, however it does provide a searchable interface.

In this notebook, we will use Selenium with beautifulSoup to retrieve and save data from NamUs as CSV.

In [1]:
# Import libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.select import Select
from webdriver_manager.chrome import ChromeDriverManager
import time
from bs4 import BeautifulSoup
import pandas as pd
import re

In [2]:
# initialize global driver
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)




In [3]:
# Constants
CASE_NUMBER_KEY = 'Case Number'
LOCATION = ['Maine']

IMG_COLUMNS = [
    'Case Number',
    'Image Link'
]

INFO_COLUMNS = [
    'Case Number',
    'DLC',
    'Last Name',
    'First Name',
    'Missing Age',
    'City',
    'County',
    'State',
    'Sex',
    'Race',
    'Date Modified'
]

MAX_ROWS_PER_PAGE = 100

In [4]:
# find state filter
def apply_state_filters():
    section_on_circumstances = driver.find_element_by_id('Circumstances')
    labels_in_section = section_on_circumstances.find_elements_by_tag_name('label')

    state_input_box = None

    for label in labels_in_section:
        if (label.text == "State"):
            state_input_box = label.find_element_by_tag_name('input')
            # add state filter
            for state in LOCATION:
                state_input_box.send_keys(state)
                state_input_box.send_keys(Keys.ENTER)
    # navigate to list view
    buttons_action = driver.find_elements_by_class_name('button-box')[0].find_elements_by_tag_name('input')
    buttons_action[1].click()
    time.sleep(1.5)


In [5]:
# show 100 results at a time
def rows_to_show(num_rows):
    dropdown_selection_results = driver.find_element_by_xpath("//label/span[contains(text(),'Results')]/following-sibling::select")
    Select(dropdown_selection_results).select_by_value(f'{num_rows}')
    time.sleep(1.5)

In [6]:
def get_info_results():
    # navigate to list view get row results and store in dataframes
    driver.find_element_by_xpath("//i[@class=\"icon-list\"]").click()
    time.sleep(1.5)

    info_df = pd.DataFrame(columns=INFO_COLUMNS)
    soup = BeautifulSoup(driver.page_source, 'lxml')
    rows = soup.find('div', class_='ui-grid-canvas').contents
    
    for row in rows:
        if row != ' ':
            cells = row.find_all('div', class_='ui-grid-cell-contents')
            cells_text = map(lambda cell: cell.text.strip(), cells)
            new_info_df = pd.DataFrame([list(cells_text)], columns=INFO_COLUMNS)
            info_df = info_df.append(new_info_df, ignore_index=True)

    return info_df

In [7]:
def get_image_results():
    # navigate to grid view
    driver.find_element_by_xpath("//i[@class=\"icon-grid-six\"]").click()
    time.sleep(1.5)

    image_df = pd.DataFrame(columns=IMG_COLUMNS)
    soup = BeautifulSoup(driver.page_source, 'lxml')
    rows = soup.find('search-results-gallery').find_all('div', class_='card-stack')

    for row in rows:
        img_src = 'namus.gov' + row.find('img')['src']
        case_number_line = row.find('div', class_='top-row').find('span', class_='data-label').text.strip()
        case_number_start = re.search('#',case_number_line).span()[1]
        case_number = case_number_line[case_number_start:]
        new_img_df = pd.DataFrame([[case_number, img_src]], columns=IMG_COLUMNS)
        image_df = image_df.append(new_img_df, ignore_index=True)

    return image_df

In [8]:
print('Navigating to namus.gov...')
driver.get("https://www.namus.gov/MissingPersons/Search")

print('Adding selected states to filter...')
apply_state_filters()

print(f'Setting {MAX_ROWS_PER_PAGE} rows per page...')
rows_to_show(MAX_ROWS_PER_PAGE)

print('Gathering info...')
info_df = get_info_results()

print('Gathering images...')
image_df = get_image_results()

print('Merging Data...')
merged_df = pd.merge(left=info_df, right=image_df, left_on=CASE_NUMBER_KEY, right_on=CASE_NUMBER_KEY)


Navigating to namus.gov...
Adding selected states to filter...
Setting 100 rows per page...
Gathering info...


  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=True)
  info_df = info_df.append(new_info_df, ignore_index=Tru

Gathering images...


  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df,

Merging Data...


  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)
  image_df = image_df.append(new_img_df, ignore_index=True)


In [9]:
merged_df


Unnamed: 0,Case Number,DLC,Last Name,First Name,Missing Age,City,County,State,Sex,Race,Date Modified,Image Link
0,MP92565,06/09/2022,Conaway,James,66 Years,Big W Township,Somerset,ME,Male,White / Caucasian,06/24/2022,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
1,MP93172,06/06/2022,Lacher,Graham,37 Years,Bangor,Penobscot,ME,Male,White / Caucasian,07/11/2022,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
2,MP91650,04/13/2022,Carver,Randy,66 Years,Holden,Penobscot,ME,Male,White / Caucasian,07/12/2022,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
3,MP90300,02/18/2022,Paradee,Jared,43 Years,Rockport,Knox,ME,Male,White / Caucasian,04/05/2022,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
4,MP89506,02/11/2022,Lang,Nicholas,41 Years,Kittery,York,ME,Male,White / Caucasian,03/08/2022,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
...,...,...,...,...,...,...,...,...,...,...,...,...
95,MP19763,08/24/1986,Meyer,Kaye,80 Years,Monson,Piscataquis,ME,Female,White / Caucasian,06/24/2022,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
96,MP35491,08/11/1986,Simpson,Harold,28 Years,Livermore,Androscoggin,ME,Male,White / Caucasian,06/24/2022,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
97,MP35636,06/06/1986,Thompson,Stephen,38 Years,Oakland,Kennebec,ME,Male,White / Caucasian,12/03/2021,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...
98,MP23215,06/02/1986,Letarte,Philip,64 Years,Woodland,Aroostook,ME,Male,White / Caucasian,12/03/2021,namus.gov/api/CaseSets/NamUs/MissingPersons/Ca...


In [10]:
driver.quit()