In [None]:
import pandas as pd
import requests

In [None]:
# load data
addr = pd.read_csv('../data/housing_inspections_an.csv', encoding='iso-8859-1')
addr = addr[['BLDGNO_confidential', 'STNAME', 'STTYPE', 'QUAD']]
addr = addr.rename(columns = {'BLDGNO_confidential': 'BLDG'})
print(addr.shape)
addr.head()

In [None]:
# assess missing values and drop duplicates
print(addr.isnull().sum())
addr = addr.dropna()
addr = addr.drop_duplicates()
addr.shape

In [None]:
# convert building number to int
addr['BLDG'] = addr['BLDG'].astype(int).astype(str)
addr.head()

In [None]:
# review http://citizenatlas.dc.gov/newwebservices/locationverifier.asmx?op=findLocation2
host = 'http://citizenatlas.dc.gov'
url = '/newwebservices/locationverifier.asmx/findLocation2'
headers = {'Content-Type': 'application/x-www-form-urlencoded',
           'Content-Length': 'length'}

# access, parse, and transform
def get_address_info(addr_str):
    # sends a post request to MAR API to retrieve address information
    data = {'str': addr_str, 'f': 'json'}
    r = requests.post(url=host+url, data=data, headers=headers)
    parsed = r.json()
    info = parsed['returnDataset']['Table1'][0]
    return info

In [None]:
# test function
test_addr = '2722 olive st  nw'
print(get_address_info(test_addr))

In [None]:
# automate info retrieval
addr['ADDRESS'] = addr.BLDG + ' ' + addr.STNAME + ' ' + addr.STTYPE + ' ' + addr.QUAD
addr.head()

In [None]:
# convert dataframe to list of dictionaries
addr_dict = addr.to_dict(orient = 'records')
addr_dict[:2]

In [None]:
# add info to address dictionaries (this takes awhile so only doing the first 10)
addr_info = []
for row in addr_dict[:10]:
        info = get_address_info(row['ADDRESS'])
        addr_info.append({**row, **info})
addr_info[:1]

In [None]:
# store
addr_df = pd.DataFrame(addr_info)
addr_df.to_csv('../data/illegal_constructions_addresses.csv', index=False)
addr_df.head()