In [4]:
# TRY TO GET ZIP CODES FROM THE CENSUS SITE AND CAPTURE ALL THE DATA YOU CAN TO BUILD A GEOCODE DATA SET

In [None]:
# explanation about how to build entity codes
# https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html

In [1]:
a = '''
Area Type                     Number of Digits    Example GEOID
State                         2                   48
County                        2+3=5               48201
County Subdivision            2+3+5=10            4820192975
Places                        2+5=7               4835000
Census Tract                  2+3+6=11            48201223100
Block Group                   2+3+6+1=12          4.82012E+11
Block                         2+3+6+4=15          4.82012E+14
Congressional District        2+2=4               902
State Legislative District    2+3=5               9033
ZCTA                          5                   20746
'''

In [2]:
import pandas as pd
from io import StringIO
import urllib.request, json 
import csv

In [3]:
# FUNCTIONS

In [47]:
def remove_qualifiers(place_name):
    s = place_name.replace(' town', '').replace(' Town', '').replace(' city', '').replace(' county', '').replace(' County', '').replace(' village', '').replace(' Village', '').replace(' CDP', ' Census Designated Place')
    s = s.replace("DeFuniak", "De Funiak")
    s = s.replace("AFB", "Air Force Base")
    return s

In [42]:
def get_type(place_name):
    if " town" in place_name or " Town" in place_name:
        return "town"
    elif " city" in place_name:
        return "city"
    elif " county" in place_name or " County" in place_name:
        return "county"
    elif " village" in place_name or " Village" in place_name:
        return "village"
    elif " CDP" in place_name:
        return "cdp"

In [6]:
# STATES

In [7]:
state_response = urllib.request.urlopen(f"https://cbb.census.gov/arcgis/rest/services/Census_EMS/Census/MapServer/11/query?f=json&resultOffset=0&resultRecordCount=100&where=1=1&outFields=NAME,STATE&returnGeometry=false")
state_json_string = state_response.read().decode('utf-8')
state_json_obj = json.loads(state_json_string)
state_json_features = state_json_obj['features']

state_df = pd.DataFrame(columns=['STATE_NAME', 'FIPS_CODE'])
for sf in state_json_features:
    new_row = {
       'STATE_NAME':sf["attributes"]["NAME"],
       'FIPS_CODE':sf["attributes"]["STATE"]
    }   
    state_df = pd.concat([state_df, pd.DataFrame(new_row, index=[0])], ignore_index=True)

print(state_df)

                                      STATE_NAME FIPS_CODE
0                                  West Virginia        54
1                                        Florida        12
2                                       Illinois        17
3                                      Minnesota        27
4                                       Maryland        24
5                                   Rhode Island        44
6                                          Idaho        16
7                                  New Hampshire        33
8                                 North Carolina        37
9                                        Vermont        50
10                                   Connecticut        09
11                                      Delaware        10
12                                    New Mexico        35
13                                    California        06
14                                    New Jersey        34
15                                     Wisconsin        

In [8]:
# STATE ABBREVIATIONS

In [9]:
abbrev_response = urllib.request.urlopen(f"https://www2.census.gov/geo/docs/reference/state.txt")
abbrev_text = abbrev_response.read().decode('utf-8').replace('|', ',').strip()

with open('geo_state_abbrev.csv', 'w') as f:
    f.write(abbrev_text)

abbrev_df = pd.read_csv("geo_state_abbrev.csv")
print(abbrev_df)

    STATE STUSAB                   STATE_NAME  STATENS
0       1     AL                      Alabama  1779775
1       2     AK                       Alaska  1785533
2       4     AZ                      Arizona  1779777
3       5     AR                     Arkansas    68085
4       6     CA                   California  1779778
5       8     CO                     Colorado  1779779
6       9     CT                  Connecticut  1779780
7      10     DE                     Delaware  1779781
8      11     DC         District of Columbia  1702382
9      12     FL                      Florida   294478
10     13     GA                      Georgia  1705317
11     15     HI                       Hawaii  1779782
12     16     ID                        Idaho  1779783
13     17     IL                     Illinois  1779784
14     18     IN                      Indiana   448508
15     19     IA                         Iowa  1779785
16     20     KS                       Kansas   481813
17     21 

In [10]:
merge_df = state_df.merge(abbrev_df, on="STATE_NAME")
merge_df[["STATE_NAME", "FIPS_CODE", "STUSAB"]]

Unnamed: 0,STATE_NAME,FIPS_CODE,STUSAB
0,West Virginia,54,WV
1,Florida,12,FL
2,Illinois,17,IL
3,Minnesota,27,MN
4,Maryland,24,MD
5,Rhode Island,44,RI
6,Idaho,16,ID
7,New Hampshire,33,NH
8,North Carolina,37,NC
9,Vermont,50,VT


In [11]:
fedcodes_df = pd.read_csv("fedcodes_data.csv")

In [5]:
state_codes = ["01","02","04","05","06","08","09","10","11","12","13","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","44","45","46","47","48","49","50","51","53","54","55","56"]

In [50]:
get_geometry = "true"
geo_df = pd.DataFrame()
    
for index, row in merge_df[1:2].iterrows():
    # todo get more fields or add additional requests for other types of entities
    fips_code = row["FIPS_CODE"]
    response = urllib.request.urlopen(f"https://cbb.census.gov/arcgis/rest/services/Census_EMS/Census/MapServer/4/query?f=json&resultOffset=0&resultRecordCount=1000&where=STATE='{fips_code}'&orderByFields=STATE,PLACE&outFields=GEOID,NAME,PLACE,STATE&returnGeometry={get_geometry}&spatialRel=esriSpatialRelIntersects")
    json_string = response.read().decode('utf-8')
    json_obj = json.loads(json_string)
    #print(type(json_obj))
    json_features = json_obj['features']
    for f in json_features:
        
        place_name = remove_qualifiers(f["attributes"]["NAME"])
        
        cond_1 = fedcodes_df.feature_name == remove_qualifiers(place_name)
        cond_2 = fedcodes_df.state_name == row["STATE_NAME"]
        #cond_3 = fedcodes_df.feature_class == "Populated Place"
        #cond_4 = fedcodes_df.feature_class == "Census"
        match = fedcodes_df[(cond_1) & (cond_2)]
        county_code = "0"#match.iloc[0]['county_numeric']
        if((match.shape[0] > 1) or (match.shape[0] < 1)):
            print(f"{match.shape} - {f['attributes']['NAME']} - {place_name} - {row['STATE_NAME']}")
        
        new_row = {
            'GEOID':f["attributes"]["GEOID"],
            'PLACE_NAME':place_name,
            'PLACE_ID':f["attributes"]["PLACE"],
            'FIPS_CODE':row["FIPS_CODE"],
            'TYPE':get_type(place_name),
            'STATE_NAME':row["STATE_NAME"],
            'STATE_ABBREVIATION':row["STUSAB"],
            'COUNTY_CODE':county_code
        }
        geo_df = pd.concat([geo_df, pd.DataFrame(new_row, index=[0])], ignore_index=True)

#print(geo_df)
#geo_df.to_csv("geo_data.csv")

(3, 22) - Bay Lake city - Bay Lake - Florida
(2, 22) - Englewood CDP - Englewood Census Designated Place - Florida
(2, 22) - Flagler Beach city - Flagler Beach - Florida
(4, 22) - Four Corners CDP - Four Corners Census Designated Place - Florida
(2, 22) - Goldenrod CDP - Goldenrod Census Designated Place - Florida
(0, 22) - Greenacres city - Greenacres - Florida
(0, 22) - Gulf Gate Estates CDP - Gulf Gate Estates Census Designated Place - Florida
(2, 22) - High Point CDP - High Point Census Designated Place - Florida
(0, 22) - Indiantown CDP - Indiantown Census Designated Place - Florida
(0, 22) - Islamorada, Village of Islands village - Islamorada, of Islands - Florida
(0, 22) - Jan Phyl Village CDP - Jan Phyl Census Designated Place - Florida
(0, 22) - Lake Worth city - Lake Worth - Florida
(2, 22) - Lighthouse Point city - Lighthouse Point - Florida
(2, 22) - Live Oak city - Live Oak - Florida
(2, 22) - Longboat Key town - Longboat Key - Florida
(2, 22) - Longwood city - Longwood - 