In [1]:
import requests
import tempfile
import os

In [None]:
def get_census_geocode_batch_results(
        inputpath : str,
        outputpath : str,
        header : bool = True,
        chunksize : int = 5000,
        id_col_name : str = 'id',
        append : bool = False

):
    
    col_titles = [
        id_col_name,
        "address",
        "match",
        "matchtype",
        "parsed",
        "coordinate",
        "tigerlineid",
        "side",
        "statefp",
        "countyfp",
        "tract",
        "block",
    ]

    url = "https://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
    data = {
        'benchmark': 'Public_AR_Current',
        'vintage': 'Current_Current',
    }

    with open(inputpath) as f:
        addresses = f.readlines()

    start_row = int(header)
    total_rows = len(addresses) - start_row

    if not append:
        with open(outputpath, "w") as f:
            f.write(','.join(col_titles) + '\n')

    with tempfile.TemporaryDirectory() as dirname:
        end_row = start_row + chunksize
        while start_row < total_rows:
            chunk = addresses[start_row:end_row]

            filepath = os.path.join(dirname, f'file{start_row}.csv')
            with open(filepath, 'w') as f:
                f.writelines(chunk)

            files = {
                'addressFile': (filepath, open(filepath, 'rb'), 'text/csv')
            }

            response = requests.post(url, data=data, files=files)

            with open(outputpath, "ab") as f:
                f.write(response.content)

            print(f'{min([end_row - 1, total_rows])} of {total_rows} complete')

            start_row = end_row
            end_row = end_row + chunksize



In [15]:
get_census_geocode_batch_results(
    'data/interim/test.csv',
    'data/interim/results.csv',
    chunksize=30
)

30 of 99 complete
60 of 99 complete
90 of 99 complete
99 of 99 complete


In [16]:
import pandas as pd

In [17]:
pd.read_csv('data/interim/results.csv')

Unnamed: 0,id,address,match,matchtype,parsed,coordinate,tigerlineid,side,statefp,countyfp,tract,block
0,22,"4927 S MARTIN LUTHER KING DR, CHICAGO, IL, 60615",Match,Exact,"4927 S DR MARTIN LUTHER KING JR DR, CHICAGO, I...","-87.616142433768,41.805239999244",111770690.0,L,17.0,31.0,381900.0,2003.0
1,23,"4523 S EVANS AVE, CHICAGO, IL, 60653",Match,Exact,"4523 S EVANS AVE, CHICAGO, IL, 60653","-87.608185831112,41.812601953534",605499311.0,L,17.0,31.0,836000.0,3013.0
2,24,"4143 S WABASH AVE, CHICAGO, IL, 60653",Match,Exact,"4143 S WABASH AVE, CHICAGO, IL, 60653","-87.624647644609,41.819347919061",111770447.0,L,17.0,31.0,380600.0,3009.0
3,25,"535 W 62ND ST, CHICAGO, IL, 60621",Match,Exact,"535 W 62ND ST, CHICAGO, IL, 60621","-87.638420048578,41.781662839884",111830330.0,L,17.0,31.0,834600.0,1064.0
4,26,"641 E PERSHING RD, CHICAGO, IL, 60653",Match,Exact,"641 E PERSHING RD, CHICAGO, IL, 60653","-87.611472361508,41.823841240029",111770732.0,R,17.0,31.0,380100.0,1003.0
...,...,...,...,...,...,...,...,...,...,...,...,...
94,94,"3627 S COTTAGE GROVE AVE, CHICAGO, IL, 60653",Match,Exact,"3627 S COTTAGE GROVE AVE, CHICAGO, IL, 60653","-87.609270571227,41.828751568456",111770706.0,L,17.0,31.0,360200.0,1003.0
95,95,"443 W 17TH ST, CHICAGO, IL, 60616",Match,Exact,"443 W 17TH ST, CHICAGO, IL, 60616","-87.638317328683,41.858808854467",111827819.0,L,17.0,31.0,841900.0,2069.0
96,96,"1706 W JACKSON BLVD, CHICAGO, IL, 60612",Match,Exact,"1706 W JACKSON BLVD, CHICAGO, IL, 60612","-87.669581753635,41.877636094037",111766270.0,R,17.0,31.0,838100.0,2021.0
97,97,"606 E 50TH ST, CHICAGO, IL, 60615",Match,Exact,"606 E 50TH ST, CHICAGO, IL, 60615","-87.611055970049,41.804060060312",605499793.0,L,17.0,31.0,843600.0,2023.0
