In [None]:
from src.batchcensusgeocode import get_census_geocode_batch_results

In [5]:
def get_single_address(address):
    url = 'https://geocoding.geo.census.gov/geocoder/geographies/onelineaddress'
    data = {
        'benchmark': 'Public_AR_Current',
        'vintage': 'Current_Current',
        'format': 'json',
        'address': address
    }

    response = requests.post(url, data=data)

    data = response.json()

    return data['result']['addressMatches']

def break_address_ties(outputpath):
    with open(outputpath, 'r') as f:
        reader = csv.reader(f)
        all_lines = list(reader)

    for line in all_lines:
        if line[2] in ['No_Match', 'Tie']:
            res = get_single_address(line[1])
            if len(res) > 0:
                line[2] = 'Match'
                line[3] = 'Exact'
                line[4] = res[0]['matchedAddress']
                line[5] = f'{res[0]['coordinates']['x']},{res[0]['coordinates']['y']}'
                line[6] = res[0]['tigerLine']['tigerLineId']
                line[7] = res[0]['tigerLine']['side']
                
                for k in res[0]['geographies'].keys():
                    if 'Census Block' in k:
                        block_obj = res1[0]['geographies'][k][0]
                        line[8] = block_obj['STATE']
                        line[9] = block_obj['COUNTY']
                        line[10] = block_obj['TRACT']
                        line[11] = block_obj['BLOCK']
                        break
    
    with open(outputpath, 'w') as outfile:
        writer = csv.writer(outfile)
        writer.writerows(all_lines) 

In [None]:
def get_census_geocode_batch_results(
        inputpath : str,
        outputpath : str,
        header : bool = True,
        chunksize : int = 5000,
        id_col_name : str = 'id',
        append : bool = False,
        breakties : bool = False

):
    
    col_titles = [
        id_col_name,
        "address",
        "match",
        "matchtype",
        "parsed",
        "coordinate",
        "tigerlineid",
        "side",
        "statefp",
        "countyfp",
        "tract",
        "block",
    ]

    url = "https://geocoding.geo.census.gov/geocoder/geographies/addressbatch"
    data = {
        'benchmark': 'Public_AR_Current',
        'vintage': 'Current_Current',
    }

    with open(inputpath) as f:
        addresses = f.readlines()

    start_row = int(header)
    total_rows = len(addresses) - start_row

    if not append:
        with open(outputpath, "w") as f:
            f.write(','.join(col_titles) + '\n')

    with tempfile.TemporaryDirectory() as dirname:
        end_row = start_row + chunksize
        while start_row < total_rows:
            chunk = addresses[start_row:end_row]

            filepath = os.path.join(dirname, f'file{start_row}.csv')
            with open(filepath, 'w') as f:
                f.writelines(chunk)

            files = {
                'addressFile': (filepath, open(filepath, 'rb'), 'text/csv')
            }

            response = requests.post(url, data=data, files=files)

            with open(outputpath, "ab") as f:
                f.write(response.content)

            print(f'{min([end_row - 1, total_rows])} of {total_rows} complete')

            start_row = end_row
            end_row = end_row + chunksize

    if breakties:
        print('Breaking ties...')
        break_address_ties(outputpath)



In [9]:
get_census_geocode_batch_results(
    'data/interim/test.csv',
    'data/interim/results2.csv',
    chunksize=30,
    breakties=True
)

30 of 99 complete
60 of 99 complete
90 of 99 complete
99 of 99 complete
breaking ties


In [20]:
import pandas as pd

In [21]:
pd.read_csv('data/interim/results.csv')

Unnamed: 0,id,address,match,matchtype,parsed,coordinate,tigerlineid,side,statefp,countyfp,tract,block
0,22,"4927 S MARTIN LUTHER KING DR, CHICAGO, IL, 60615",Match,Exact,"4927 S DR MARTIN LUTHER KING JR DR, CHICAGO, I...","-87.616142433768,41.805239999244",111770690.0,L,17.0,31.0,381900.0,2003.0
1,23,"4523 S EVANS AVE, CHICAGO, IL, 60653",Match,Exact,"4523 S EVANS AVE, CHICAGO, IL, 60653","-87.608185831112,41.812601953534",605499311.0,L,17.0,31.0,836000.0,3013.0
2,24,"4143 S WABASH AVE, CHICAGO, IL, 60653",Match,Exact,"4143 S WABASH AVE, CHICAGO, IL, 60653","-87.624647644609,41.819347919061",111770447.0,L,17.0,31.0,380600.0,3009.0
3,25,"535 W 62ND ST, CHICAGO, IL, 60621",Match,Exact,"535 W 62ND ST, CHICAGO, IL, 60621","-87.638420048578,41.781662839884",111830330.0,L,17.0,31.0,834600.0,1064.0
4,26,"641 E PERSHING RD, CHICAGO, IL, 60653",Match,Exact,"641 E PERSHING RD, CHICAGO, IL, 60653","-87.611472361508,41.823841240029",111770732.0,R,17.0,31.0,380100.0,1003.0
...,...,...,...,...,...,...,...,...,...,...,...,...
94,94,"3627 S COTTAGE GROVE AVE, CHICAGO, IL, 60653",Match,Exact,"3627 S COTTAGE GROVE AVE, CHICAGO, IL, 60653","-87.609270571227,41.828751568456",111770706.0,L,17.0,31.0,360200.0,1003.0
95,95,"443 W 17TH ST, CHICAGO, IL, 60616",Match,Exact,"443 W 17TH ST, CHICAGO, IL, 60616","-87.638317328683,41.858808854467",111827819.0,L,17.0,31.0,841900.0,2069.0
96,96,"1706 W JACKSON BLVD, CHICAGO, IL, 60612",Match,Exact,"1706 W JACKSON BLVD, CHICAGO, IL, 60612","-87.669581753635,41.877636094037",111766270.0,R,17.0,31.0,838100.0,2021.0
97,97,"606 E 50TH ST, CHICAGO, IL, 60615",Match,Exact,"606 E 50TH ST, CHICAGO, IL, 60615","-87.611055970049,41.804060060312",605499793.0,L,17.0,31.0,843600.0,2023.0


In [63]:
res1 = get_single_address('900-906 S BISHOP STREET, CHICAGO, IL, 60607')

In [64]:
res1

[{'tigerLine': {'side': 'R', 'tigerLineId': '641801079'},
  'geographies': {'States': [{'STATENS': '01779784',
     'GEOID': '17',
     'CENTLAT': '+40.1006068',
     'AREAWATER': '6215985268',
     'STATE': '17',
     'BASENAME': 'Illinois',
     'STUSAB': 'IL',
     'OID': '27490100360203',
     'LSADC': '00',
     'FUNCSTAT': 'A',
     'INTPTLAT': '+40.1028754',
     'DIVISION': '3',
     'NAME': 'Illinois',
     'REGION': '2',
     'OBJECTID': 35,
     'CENTLON': '-089.1500794',
     'AREALAND': '143778736090',
     'INTPTLON': '-089.1526108',
     'MTFCC': 'G4000'}],
   'Combined Statistical Areas': [{'POP100': '',
     'GEOID': '176',
     'CENTLAT': '+41.6973467',
     'AREAWATER': '6301083014',
     'BASENAME': 'Chicago-Naperville, IL-IN-WI',
     'OID': '2619034687907999',
     'LSADC': 'M0',
     'FUNCSTAT': 'S',
     'INTPTLAT': '+41.6992315',
     'NAME': 'Chicago-Naperville, IL-IN-WI CSA',
     'OBJECTID': 178,
     'CSA': '176',
     'CENTLON': '-088.0058328',
     'INTPT

In [None]:
coords = f'{res1[0]['coordinates']['x']},{res1[0]['coordinates']['y']}'

'-87.662975665134,41.870441813596'

In [None]:
parsed = res1[0]['matchedAddress']

'906 S BISHOP ST, CHICAGO, IL, 60607'

In [75]:
tigerid = res1[0]['tigerLine']['tigerLineId']
side = res1[0]['tigerLine']['side']

In [81]:
statefp = res1[0]['geographies']['States'][0]['GEOID']

In [None]:
for k in res1[0]['geographies'].keys():
    if 'Census Block' in k:
        block_obj = res1[0]['geographies'][k][0]
        


{'SUFFIX': '', 'GEOID': '170312831002001', 'CENTLAT': '+41.8707298', 'BLOCK': '2001', 'AREAWATER': '0', 'STATE': '17', 'BASENAME': '2001', 'OID': '210701006291813', 'LSADC': 'BK', 'FUNCSTAT': 'S', 'INTPTLAT': '+41.8707298', 'NAME': 'Block 2001', 'OBJECTID': 3993514, 'TRACT': '283100', 'CENTLON': '-087.6629047', 'BLKGRP': '2', 'AREALAND': '21661', 'INTPTLON': '-087.6629047', 'MTFCC': 'G5040', 'LWBLKTYP': 'L', 'UR': 'U', 'COUNTY': '031'}


In [None]:
get_single_address('123 Fake Street, Erlang, NC')

{'result': {'input': {'address': {'address': '123 Fake Street, Erlang, NC'},
   'vintage': {'isDefault': True,
    'id': '4',
    'vintageName': 'Current_Current',
    'vintageDescription': 'Current Vintage - Current Benchmark'},
   'benchmark': {'isDefault': True,
    'benchmarkDescription': 'Public Address Ranges - Current Benchmark',
    'id': '4',
    'benchmarkName': 'Public_AR_Current'}},
  'addressMatches': []}}

In [94]:
test_arr = [
    ['a', 'b', 'c'],
    ['d', 'e', 'f'],
    ['g', 'h', 'i']
]

In [98]:
for i, arr in enumerate(test_arr):
    if i == 1:
        arr[2] ='4'
        print(arr)

print(test_arr)

['d', 'e', '4']
[['a', 'b', 'c'], ['d', 'e', '4'], ['g', 'h', 'i']]
