In [1]:
import requests
import json
import redis
import pandas as pd
from urllib.parse import urlencode

In [2]:
# caching
redis_client = redis.Redis(host = 'localhost', port = 6379, db = 0)

In [3]:
# My api here

In [4]:
def extract_lat_lng(address, address_type = 'administrative_area_level_2', update = False):
    """
    Takes in an address and return the latitude and longitude of the place and the county, state, and country it belongs
    """
    
    lat, lng, county_p, county, county_lat, county_lng, state, country = None, None, None, None, None, None, None, None
    
    place = fetch_place(address, update) # returns a list of places
    if not place:
        return [address] + [None] * 8
    
    # prioritizing the political category, e.g., Ireland as a country over a natural establishment
    if len(place) > 1:
        political_place = list(filter(lambda p: len(p['types']) > 1 and p['types'][1] == 'political', place))
        if political_place:
            place = political_place[0]
        else:
            place = place[0]
    else:
        place = place[0]
    
    place_type = place['types'][0]
    
    # extracting the latitude and longitude of the place
    location = place['geometry']['location']
    lat = location['lat']
    lng = location['lng']

    # extracting the county, state, and country the place belongs to, if any
    for level in place['address_components']:
        if level['types'][0] == 'administrative_area_level_2':
            county = level['long_name']
        if level['types'][0] == 'administrative_area_level_1':
            state = level['short_name']
        if level['types'][0] == 'country':
            country = level['short_name']
                
    # extracting the latitute and longitute of the county
    if county and place_type == 'administrative_area_level_2':
        county_lat = lat
        county_lng = lng
    elif county:
        county_p = fetch_place(f"{county}, {state}, {country}", update)[0]
        county_lat = county_p['geometry']['location']['lat']
        county_lng = county_p['geometry']['location']['lng']

    return [address, place_type, lat, lng, county, county_lat, county_lng, state, country]

In [5]:
def fetch_place(address, update:bool = False):
    """
    takes in an address and get the json data of the place. If not found in cache then would 
    call the google map API to fetch data.
    """
    
    cases = ("Richmond, Henrico County, Va", "St. Louis, St. Louis County, Mo", "New York City, New York County, N.Y", "Metropolitan City of Palermo, Sicily, IT")
    fixed = ("Richmond, Va", "St. Louis, Mo", "New York City, N.Y", "Palermo, Sicily, Italy")
    if address in cases:
        for pair in zip(cases, fixed):
            if address in pair[0]:
                address = pair[1]
    
    place_key = f"{address}_place"
    place = redis_client.get(place_key)
    
    if update:
        place = None
    
    if not place:
        print('Could not find place in cache. Retrieving from Google Maps API...')
        endpoint = f"https://maps.googleapis.com/maps/api/geocode/json"
        params = {"address": address, "key": api_key}
        url_params = urlencode(params)
        url = f"{endpoint}?{url_params}"
        r = requests.get(url)
        if r.status_code not in range(200, 299):
            place = {}
        else:
            place = r.json()['results']
        
        redis_client.set(place_key, json.dumps(place))
    
    else:
        print('Found place in cache, serving from redis...')
        place = json.loads(place)
        
    return place

In [6]:
# example
fetch_place("County Monaghan, Ireland", update=False) # can't find this place

Found place in cache, serving from redis...


[]

In [7]:
df = pd.read_csv("results/bioguide_birth_places_schools.csv")
df.shape

(12967, 27)

In [8]:
birthplaces = df["birthplace"]
locations = [None] * len(birthplaces)

for i in range(len(birthplaces)):
    print(i, end=' ')
    birthplace = birthplaces[i]
    if type(birthplace) is not str: # missing value
        pass
    else:
        location = extract_lat_lng(birthplaces[i], update=False)
        locations[i] = location

0 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3 Found place in cache, serving from redis...
4 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5 6 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12 Found place in cache, serving from redis...
13 

Found place in cache, serving from redis...
651 Found place in cache, serving from redis...
652 Found place in cache, serving from redis...
Found place in cache, serving from redis...
653 Found place in cache, serving from redis...
Found place in cache, serving from redis...
654 Found place in cache, serving from redis...
Found place in cache, serving from redis...
655 Found place in cache, serving from redis...
Found place in cache, serving from redis...
656 Found place in cache, serving from redis...
Found place in cache, serving from redis...
657 Found place in cache, serving from redis...
Found place in cache, serving from redis...
658 659 Found place in cache, serving from redis...
Found place in cache, serving from redis...
660 Found place in cache, serving from redis...
Found place in cache, serving from redis...
661 Found place in cache, serving from redis...
Found place in cache, serving from redis...
662 Found place in cache, serving from redis...
Found place in cache, servin

1089 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1090 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1091 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1092 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1093 Found place in cache, serving from redis...
1094 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1095 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1096 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1097 Found place in cache, serving from redis...
1098 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1099 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1100 Found place in cache, serving from redis...
Found place in c

Found place in cache, serving from redis...
1517 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1518 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1519 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1520 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1521 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1522 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1523 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1524 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1525 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1526 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1527 Found place in cache,

2125 Found place in cache, serving from redis...
2126 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2127 Found place in cache, serving from redis...
2128 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2129 Found place in cache, serving from redis...
2130 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2131 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2132 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2133 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2134 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2135 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2136 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2137 Found place

2624 Found place in cache, serving from redis...
2625 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2626 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2627 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2628 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2629 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2630 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2631 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2632 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2633 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2634 Found place in cache, serving from redis...
Found place in cache, serving from redis...
2635 Found place in c

3033 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3034 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3035 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3036 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3037 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3038 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3039 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3040 Found place in cache, serving from redis...
3041 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3042 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3043 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3044 Found place in c

3558 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3559 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3560 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3561 Found place in cache, serving from redis...
3562 Found place in cache, serving from redis...
3563 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3564 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3565 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3566 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3567 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3568 Found place in cache, serving from redis...
3569 Found place in cache, serving from redis...
Found place in cache, serving from redis...
3570 Found place

4068 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4069 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4070 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4071 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4072 Found place in cache, serving from redis...
4073 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4074 Found place in cache, serving from redis...
4075 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4076 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4077 Found place in cache, serving from redis...
4078 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4079 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4080 Found place

4500 Found place in cache, serving from redis...
4501 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4502 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4503 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4504 Found place in cache, serving from redis...
4505 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4506 Found place in cache, serving from redis...
4507 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4508 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4509 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4510 Found place in cache, serving from redis...
4511 Found place in cache, serving from redis...
Found place in cache, serving from redis...
4512 Found place in cache, serving from redis...
Found place

Found place in cache, serving from redis...
5093 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5094 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5095 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5096 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5097 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5098 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5099 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5100 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5101 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5102 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5103 Found place in cache,

5573 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5574 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5575 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5576 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5577 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5578 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5579 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5580 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5581 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5582 Found place in cache, serving from redis...
Found place in cache, serving from redis...
5583 Found place in cache, serving from redis...
Found place in cache,

Found place in cache, serving from redis...
6011 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6012 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6013 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6014 Found place in cache, serving from redis...
6015 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6016 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6017 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6018 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6019 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6020 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6021 Found place in cache, serving from redis...
Found place in cache,

Found place in cache, serving from redis...
6682 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6683 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6684 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6685 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6686 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6687 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6688 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6689 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6690 Found place in cache, serving from redis...
Found place in cache, serving from redis...
6691 Found place in cache, serving from redis...
6692 Found place in cache, serving from redis...
Found place in cache,

7190 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7191 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7192 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7193 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7194 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7195 Found place in cache, serving from redis...
7196 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7197 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7198 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7199 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7200 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7201 Found place in c

7624 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7625 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7626 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7627 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7628 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7629 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7630 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7631 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7632 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7633 Found place in cache, serving from redis...
Found place in cache, serving from redis...
7634 7635 Found place in cache, serving from redis...
Found place in c

8266 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8267 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8268 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8269 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8270 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8271 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8272 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8273 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8274 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8275 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8276 Found place in cache, serving from redis...
Found place in cache,

8770 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8771 Found place in cache, serving from redis...
8772 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8773 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8774 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8775 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8776 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8777 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8778 Found place in cache, serving from redis...
8779 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8780 Found place in cache, serving from redis...
8781 Found place in cache, serving from redis...
Found place in cache, serving from redis...
8782 Found place

9204 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9205 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9206 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9207 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9208 Found place in cache, serving from redis...
9209 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9210 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9211 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9212 Found place in cache, serving from redis...
9213 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9214 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9215 Found place in cache, serving from redis...
Found place in c

9845 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9846 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9847 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9848 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9849 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9850 Found place in cache, serving from redis...
9851 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9852 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9853 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9854 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9855 Found place in cache, serving from redis...
Found place in cache, serving from redis...
9856 Found place in c

10353 Found place in cache, serving from redis...
10354 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10355 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10356 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10357 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10358 Found place in cache, serving from redis...
10359 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10360 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10361 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10362 Found place in cache, serving from redis...
10363 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10364 Found place in cache, serving from redis...
Found place in cache, serving from redis...
1036

10774 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10775 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10776 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10777 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10778 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10779 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10780 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10781 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10782 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10783 Found place in cache, serving from redis...
Found place in cache, serving from redis...
10784 Found place in cache, serving from redis...
Found plac

Found place in cache, serving from redis...
11439 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11440 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11441 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11442 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11443 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11444 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11445 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11446 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11447 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11448 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11449 Found plac

Found place in cache, serving from redis...
11954 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11955 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11956 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11957 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11958 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11959 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11960 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11961 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11962 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11963 Found place in cache, serving from redis...
Found place in cache, serving from redis...
11964 Found plac

Found place in cache, serving from redis...
12383 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12384 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12385 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12386 Found place in cache, serving from redis...
12387 Found place in cache, serving from redis...
12388 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12389 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12390 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12391 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12392 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12393 Found place in cache, serving from redis...
Found place in cache, serving from redis...
12394 Foun

In [9]:
# number of locality as initial input unit
locality_num = 0
for location in locations:
    try:
        if location[1] == 'locality':
            locality_num += 1
    except TypeError:
        pass
locality_num

10182

In [10]:
for i in range(len(locations)):
    if locations[i] is None:
        locations[i] = [None] * 9
df_locations = pd.DataFrame(locations, columns = ["address", "place_type", "lat", "lng", "county", "county_lat", "county_lng", "state", "country"])
df_locations

Unnamed: 0,address,place_type,lat,lng,county,county_lat,county_lng,state,country
0,"Fairfield, Jefferson County, Iowa",locality,41.007611,-91.963691,Jefferson County,41.023636,-91.909924,IA,US
1,"Selma, Dallas County, Ala",locality,32.407359,-87.021101,Dallas County,32.233214,-87.142289,AL,US
2,"Omaha, Douglas County, Nebr",locality,41.256537,-95.934503,Douglas County,41.314812,-96.195132,NE,US
3,"Todd County, Ky",administrative_area_level_2,36.833864,-87.142289,Todd County,36.833864,-87.142289,KY,US
4,"Johnstown, Dauphin County, Pa",locality,40.326741,-78.921970,Cambria County,40.489423,-78.747621,PA,US
...,...,...,...,...,...,...,...,...,...
12962,"Albemarle County, Va",administrative_area_level_2,38.056709,-78.611500,Albemarle County,38.056709,-78.611500,VA,US
12963,"New London, Conn",locality,41.355654,-72.099521,New London County,41.518519,-72.046816,CT,US
12964,"Brooklyn, N.Y",political,40.678178,-73.944158,Kings County,40.652876,-73.959494,NY,US
12965,"Columbia County, Ga",administrative_area_level_2,33.509851,-82.258330,Columbia County,33.509851,-82.258330,GA,US


In [11]:
df.loc[df_locations['lat'].isnull()]['birthplace'].to_numpy()

array([nan, nan, nan, nan, 'State', 'County Monaghan, Ireland', 'State',
       nan, nan, nan, nan, 'State', nan, nan, nan, nan, nan, nan,
       'Manat&iacute', nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
       'State', 'July', nan, nan, nan, nan, nan, nan, nan, 'State', nan,
       nan, 'State', nan, nan, nan, nan, 'State', nan, nan, nan, nan,
       'St. Lukes Parish, S.C', 'State', nan, 'State', nan, nan, nan,
       'State', nan, nan, nan, nan, nan, nan, nan, nan, nan], dtype=object)

In [12]:
df_geocoded = pd.concat([df, df_locations], axis=1)
df_geocoded.head()

Unnamed: 0.1,Unnamed: 0,usCongressBioId,familyName,givenName,middleName,honorificPrefix,unaccentedFamilyName,unaccentedGivenName,unaccentedMiddleName,birthDate,...,public school,address,place_type,lat,lng,county,county_lat,county_lng,state,country
0,0,W000374,White,Hays,Baxter,Mr.,White,Hays,Baxter,1855-09-21,...,0,"Fairfield, Jefferson County, Iowa",locality,41.007611,-91.963691,Jefferson County,41.023636,-91.909924,IA,US
1,1,L000226,Lehman,William,,,Lehman,William,,1913,...,0,"Selma, Dallas County, Ala",locality,32.407359,-87.021101,Dallas County,32.233214,-87.142289,AL,US
2,2,F000260,Ford,Gerald,Rudolph,Mr.,Ford,Gerald,Rudolph,1913-07-14,...,1,"Omaha, Douglas County, Nebr",locality,41.256537,-95.934503,Douglas County,41.314812,-96.195132,NE,US
3,3,M000777,Mills,Roger,Quarles,Mr.,Mills,Roger,Quarles,1832-03-30,...,0,"Todd County, Ky",administrative_area_level_2,36.833864,-87.142289,Todd County,36.833864,-87.142289,KY,US
4,4,W000724,Woods,John,,Mr.,Woods,John,,1794-10-18,...,0,"Johnstown, Dauphin County, Pa",locality,40.326741,-78.92197,Cambria County,40.489423,-78.747621,PA,US


In [13]:
df_geocoded.to_csv("results/geocoded_birthplaces.csv", index=False)