### Load all of the packages

In [258]:
from bs4 import BeautifulSoup
import requests
import sys
import pandas as pd
from geopy.geocoders import Nominatim

### Download the .HTML of a Certain Page and Name

In [140]:
def get_file(first_name ="George", last_name="Washington", page="1"):
    # define search url based on user input
    url = (
        f"https://www.findagrave.com/memorial/search?firstname={first_name}&"
        f"middlename=&lastname={last_name}&birthyear=&birthyearfilter=&"
        "deathyear=&deathyearfilter=&location=&locationId=&memorialid=&mcid=&"
        f"linkedToName=&datefilter=&orderby=b&plot=&page={page}#sr-1075")

    page_HTML = requests.get(url) # HTML from URL

    # successful url request code is 200
    if page_HTML.status_code != 200:
        sys.exit("Connection Failed.") # stop execution and return error

    return page_HTML


### Go Through All Entries for a Certain Name and Return a Dataframe

In [247]:
def get_info(first_name ="George", last_name="Washington"):
    #Define the empty dataframe
    data_table = pd.DataFrame(columns = ["Names", "Dates", "Location of Grave"])

    # get HTML of specified page number
    page = get_file(first_name, last_name, "1")

    # Parse HTML into Beautiful Soup format
    parsed = BeautifulSoup(page.content, "html.parser")

    #Find the div containing the maximum number of pages
    max_page = parsed.find(id = "gotoPage")

    #max_page functions as a dictionary where we can enter the
    #string of any element in the div to find its value
    max_page = max_page["max"]


    #iterate through each page
    for index in range(int(max_page)):

        # get HTML of specified page number
        page = get_file(first_name, last_name, str(int(index) + 1))

        # Parse HTML into Beautiful Soup format
        parsed = BeautifulSoup(page.content, "html.parser")

        #Create a list of all tag with an attribute class "memorial-item"
        #tags act like containers that contain attributes and text
        grave_infos = parsed.find_all(class_ = "memorial-item")

        #Go through each item in the list
        for grave_info in grave_infos:
            #First check name tag exists and contains text and includes first and last name
            if grave_info.find("i") is not None and grave_info.find("i").string is not None and first_name in grave_info.find("i").string and last_name in grave_info.find("i").string:
                name = grave_info.find("i").string
                #Check that birth and death date exist and does not contain unknown
                if grave_info.find(class_ = "birthDeathDates") is not None and "unknown" not in grave_info.find(class_ = "birthDeathDates").string:
                    #find the text with class = birthDeathDates
                    date = grave_info.find(class_ = "birthDeathDates").string
                else: date = "NA"

                #make sure that the grave data actually exists
                if grave_info.find("p", attrs = {'class':'addr-cemet'}) is not None:
                    #find the text in <p> with class = addr-cemet
                    grave_address = grave_info.find("p", attrs = {'class':'addr-cemet'}).string
                else: grave_address = "NA"

                #record the date in the Dates column
                data_table = data_table.append({"Names": name, "Dates": date, "Location of Grave": grave_address}, ignore_index= True)

    #split birthday and death date
    data_table[["Birth Date", "Death Date"]] = data_table["Dates"].str.split("–", expand = True, n=1)
    #get rid of any NA values
    data_table = data_table.dropna()

    return data_table

In [250]:
data = get_info()


In [256]:
data["Birth Date"] = pd.to_datetime(data["Birth Date"])
data

ParserError: Unknown string format: 7 ??? 1891 

In [None]:

data["Birth Date", "Death Date"] = pd.to_datetime(data["Birth Date", "Death Date"], infer_datetime_format=True, format="%Y")

#Save dataframe as a csv
data.to_csv (r'/home/phillip/github/deceased-george-washington-stats/DataFrames/export_dataframe.csv', index = False, header=True)

