## Gazetteer county data

[National Counties Gazeteer File for U.S. county geographic data, 2017](https://www.census.gov/geographies/reference-files/time-series/geo/gazetteer-files.2017.html#list-tab-264479560)

In [1]:
from bs4 import BeautifulSoup
import requests 

import pandas as pd

In [2]:
# National Counties Gazeteer File
with open('../data/2017_Gaz_counties_national.txt') as file:
    lines = file.readlines()
    table_rows = []
    for line in lines:
        row = line.split('\t')
        if row[3][-17:] == ' City and Borough':
            row[3] = row[3][:-17]
        if row[3][-8:] == ' Borough':
            row[3] = row[3][:-8]
        if row[3][-7:] == ' County':
            row[3] = row[3][:-7]
        if row[3][-12:] == ' Census Area':
            row[3] = row[3][:-12]
        if row[3][-5:] == ' city':
            row[3] = row[3][:-5]
        if row[3][-13:] == ' Municipality':
            row[3] = row[3][:-13]
        if row[3][-10:] == ' Municipio':
            row[3] = row[3][:-10]
        if row[3][-7:] == ' Parish':
            row[3] = row[3][:-7]
        table_rows.append(row)

In [5]:
county_data = pd.DataFrame(table_rows[1:],columns=table_rows[0])

In [6]:
# Food Environment Atlas does not include PR counties
PR_indices = county_data.loc[county_data['USPS'] == 'PR'].index
county_data = county_data.drop(PR_indices)

In [7]:
# Rename columns to be consistent
county_data = county_data.rename({
    'USPS': 'State',
    'GEOID': 'FIPS',
    'NAME': 'County',
    county_data.columns[-1]: 'INTPTLONG'
}, axis=1)

In [8]:
# Remove space from longitude entries
county_data['INTPTLONG'] = county_data['INTPTLONG'].apply(lambda a: float(a))

In [9]:
county_data

Unnamed: 0,State,FIPS,ANSICODE,County,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
0,AL,01001,00161526,Autauga,1539614693,25744269,594.449,9.94,32.532237,-86.646440
1,AL,01003,00161527,Baldwin,4117605847,1133109409,1589.817,437.496,30.659218,-87.746067
2,AL,01005,00161528,Barbour,2292144656,50538698,885.002,19.513,31.870253,-85.405104
3,AL,01007,00161529,Bibb,1612165763,9603798,622.461,3.708,33.015893,-87.127148
4,AL,01009,00161530,Blount,1670079465,15039864,644.821,5.807,33.977358,-86.566440
...,...,...,...,...,...,...,...,...,...,...
3137,WY,56037,01609192,Sweetwater,27005738206,166246301,10426.974,64.188,41.660328,-108.875677
3138,WY,56039,01605083,Teton,10351785153,570864021,3996.847,220.412,44.048662,-110.426072
3139,WY,56041,01605084,Uinta,5391911952,16345637,2081.829,6.311,41.284726,-110.558947
3140,WY,56043,01605085,Washakie,5798120186,10455585,2238.667,4.037,43.878831,-107.669052


In [10]:
county_data.to_csv('../data/county_census_data.csv', index=False)