From c648086da858e1b77610d5b89985b4fd1c2bd4c2 Mon Sep 17 00:00:00 2001 From: palewire Date: Fri, 14 Jul 2017 13:42:39 -0700 Subject: [PATCH] Docs --- README.md | 129 +++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 117 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index de5071b..bdef482 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Importing the library >>> import censusbatchgeocoder ``` -According to the [official Census documentation](https://www.documentcloud.org/documents/3894452-Census-Geocoding-Services-API.html), the input file is expected to contain a comma-delimited list of addresses segmented into the following fields: +According to the [official Census documentation](https://www.documentcloud.org/documents/3894452-Census-Geocoding-Services-API.html), the input is expected to contain the following fields: * ``id``: Your unique identifier for the record * ``address``: Structure number and street name (required) @@ -33,6 +33,8 @@ According to the [official Census documentation](https://www.documentcloud.org/d * ``state``: State (optional) * ``zipcode``: ZIP Code (optional) +You can geocode a comma-delimited file from the filesystem. Results are returned as a list of dictionaries. + An example could look like this: ```text @@ -41,15 +43,33 @@ id,address,city,state,zipcode 2,202 W. 1st Street,Los Angeles,CA,90012 ``` -Geocoding a comma-delimited file from the filesystem. Results are returned as a list of dictionaries. +Which is then passed in like this: ```python ->>> censusbatchgeocoder.geocode("./my_file.csv") -[{'input_address': '202 W. 1st Street, Los Angeles, CA, 90012', +>>> results = censusbatchgeocoder.geocode("./my_file.csv") +``` + +The results are returned with the following columns from the Census + +* ``id``: The unique id provided with the record. +* ``returned_address``: The address that was submitted to the geocoder. +* ``geocoded_address``: The address of the match returned by the geocoder. +* ``is_match``: Whether or not the geocoder found a match. +* ``is_exact``: The precision of the match. +* ``coordinates``: The longitude and latitude of the match. +* ``tiger_line``: The Census TIGER line of the match. +* ``side``: The side of the Census TIGER line of the match. +* ``state_fips``: The FIPS state code identifying the state of the match. +* ``county_fips``: The FIPS county code identifying the county of the match. +* ``tract``: The Census tract of the match. + +```python +>>> print results +[{'geocoded_address': '202 W. 1st Street, Los Angeles, CA, 90012', 'block': '1034', 'coordinates': '-118.24456,34.053005', 'county_fips': '037', - 'geocoded_address': '202 W 1ST ST, LOS ANGELES, CA, 90012', + 'returned_address': '202 W 1ST ST, LOS ANGELES, CA, 90012', 'id': '2', 'is_exact': 'Exact', 'is_match': 'Match', @@ -57,11 +77,11 @@ Geocoding a comma-delimited file from the filesystem. Results are returned as a 'state_fips': '06', 'tiger_line': '141618115', 'tract': '207400'}, - {'input_address': '1600 Pennsylvania Ave NW, Washington, DC, 20006', + {'geocoded_address': '1600 Pennsylvania Ave NW, Washington, DC, 20006', 'block': '1031', 'coordinates': '-77.03535,38.898754', 'county_fips': '001', - 'geocoded_address': '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502', + 'returned_address': '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502', 'id': '1', 'is_exact': 'Non_Exact', 'is_match': 'Match', @@ -71,18 +91,103 @@ Geocoding a comma-delimited file from the filesystem. Results are returned as a 'tract': '006202'}] ``` -You can also geocode an in-memory file object. +Any extra metadata fields included in the file are still present in the returned data. + +So the ``my_metadata`` column here... + +```text +id,address,city,state,zipcode,my_metadata +1,1600 Pennsylvania Ave NW,Washington,DC,20006,foo +2,202 W. 1st Street,Los Angeles,CA,90012,bar +``` + +.. is still there after you geocode. + +```python +>>> censusbatchgeocoder.geocode("./my_file.csv") +[{'address': '1600 Pennsylvania Ave NW', + 'block': '1031', + 'city': 'Washington', + 'coordinates': '-77.03535,38.898754', + 'county_fips': '001', + 'geocoded_address': '1600 Pennsylvania Ave NW, Washington, DC, 20006', + 'id': '1', + 'is_exact': 'Non_Exact', + 'is_match': 'Match', + 'my_metadata': 'foo', + 'returned_address': '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502', + 'side': 'L', + 'state': 'DC', + 'state_fips': '11', + 'tiger_line': '76225813', + 'tract': '006202', + 'zipcode': '20006'}, + {'address': '202 W. 1st Street', + 'block': '1034', + 'city': 'Los Angeles', + 'coordinates': '-118.24456,34.053005', + 'county_fips': '037', + 'geocoded_address': '202 W. 1st Street, Los Angeles, CA, 90012', + 'id': '2', + 'is_exact': 'Exact', + 'is_match': 'Match', + 'my_metadata': 'bar', + 'returned_address': '202 W 1ST ST, LOS ANGELES, CA, 90012', + 'side': 'L', + 'state': 'CA', + 'state_fips': '06', + 'tiger_line': '141618115', + 'tract': '207400', + 'zipcode': '90012'}] +``` + +#### Custom column names + +If you column headers do not exactly match those expected by the geocoder you can override themself. + +So a file like this: + +```text +foo,bar,baz,bada,boom +1,521 SWARTHMORE AVENUE,PACIFIC PALISADES,CA,90272-4350 +2,2015 W TEMPLE STREET,LOS ANGELES,CA,90026-4913 +``` + +Can be mapped like this: + +```python +>>> censusbatchgeocoder.geocode( + self.weird_path, + id="foo", + address="bar", + city="baz", + state="bada", + zipcode="boom" +) +``` + +#### Optional columns + +The state and ZIP Code columns are optional. If your data doesn't have them, pass ``None`` as keyword arguments. + +```python +>>> censusbatchgeocoder.geocode("./my_file.csv", state=None, zipcode=None) +``` + +#### File objects + +You can also geocode an in-memory file object of data in CSV format. ```python >>> my_data = """id,address,city,state,zipcode 1,1600 Pennsylvania Ave NW,Washington,DC,20006 2,202 W. 1st Street,Los Angeles,CA,90012""" >>> censusbatchgeocoder.geocode(io.StringIO(my_data)) -[{'address': '202 W. 1st Street, Los Angeles, CA, 90012', +[{'geocoded_address': '202 W. 1st Street, Los Angeles, CA, 90012', 'block': '1034', 'coordinates': '-118.24456,34.053005', 'county_fips': '037', - 'geocoded_address': '202 W 1ST ST, LOS ANGELES, CA, 90012', + 'returned_address': '202 W 1ST ST, LOS ANGELES, CA, 90012', 'id': '2', 'is_exact': 'Exact', 'is_match': 'Match', @@ -90,11 +195,11 @@ You can also geocode an in-memory file object. 'state_fips': '06', 'tiger_line': '141618115', 'tract': '207400'}, - {'address': '1600 Pennsylvania Ave NW, Washington, DC, 20006', + {'geocoded_address': '1600 Pennsylvania Ave NW, Washington, DC, 20006', 'block': '1031', 'coordinates': '-77.03535,38.898754', 'county_fips': '001', - 'geocoded_address': '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502', + 'returned_address': '1600 PENNSYLVANIA AVE NW, WASHINGTON, DC, 20502', 'id': '1', 'is_exact': 'Non_Exact', 'is_match': 'Match',