Permalink
Browse files

initial

  • Loading branch information...
neilkod committed Oct 7, 2012
1 parent dc23ec0 commit 207c39fc2a02885175d047a70816b0e06abaf8f0
Showing with 113,760 additions and 0 deletions.
  1. +58 −0 county_fips_codes_for_zip_codes.py
  2. +3,195 −0 data/FIPS_CountyName.txt
  3. +3,237 −0 data/app_a03.txt
  4. +534 −0 data/la_database.csv
  5. +58 −0 data/state.txt
  6. +42,523 −0 data/zip_code_database.csv
  7. +29,470 −0 data/zips.txt
  8. +34,685 −0 output/zip_code_to_fips_county_code.txt
@@ -0,0 +1,58 @@
+#!/bin/python
+# source files:
+# http://quickfacts.census.gov/qfd/download/FIPS_CountyName.txt
+# http://www.unitedstateszipcodes.org/zip_code_database.csv
+
+import sys, re
+counties = {}
+no_match = []
+zip_fips = {}
+fips_regex = re.compile('^[0-9]{5}')
+fips_data = open('data/FIPS_CountyName.txt').readlines()
+zip_data = open('data/zip_code_database.csv').readlines()
+output_file = open('output/zip_code_to_fips_county_code.txt','w')
+
+for line in fips_data:
+ data = line.strip()
+ fips_cd = data[0:5]
+ if fips_regex.match(fips_cd) and fips_cd[2:5] != '000':
+ county_name, state_cd = data[6:].split(',')
+ state_cd = state_cd.replace(' ','')
+ county_name = county_name.upper().replace('.','')
+ counties[(state_cd, county_name)] = fips_cd
+
+# now, read the zip code database file
+# warning, this is a super-quick hack and this is O(sucky). it could be
+# much more efficient.
+# we should really build an intermediate list of unique counties found in the
+# zip code file but a) i'm being lazy and b) this is small data. runtime is
+# quick.
+
+# in a few places, we need to clean text. we'll do the following
+# - convert it to upper case
+# - remove periods (i.e. ST.)
+# - remove the surrounding double-quotes
+
+for line in zip_data:
+ data = line.strip().split(',')
+ zip_code = data[0].replace('"','')
+ state_cd = data[5].replace('"','').upper()
+ if len(state_cd) != 2:
+ continue
+ county_name = data[6].replace('"','').upper().replace('.','')
+
+ try:
+ fips_cd_for_zip_code = counties[(state_cd, county_name)]
+ zip_fips[zip_code] = fips_cd_for_zip_code
+ output_file.write("%s\t%s\t%s\t%s\n" %
+ (zip_code, fips_cd_for_zip_code, county_name, state_cd))
+ #print zip_code, zip_fips
+ except KeyError:
+ no_match.append((state_cd, county_name))
+output_file.close()
+print "found county matches for %d zip codes" % len(zip_fips.items())
+print "-------"
+print "couldn't find a match for the following %d items." %len(set(no_match))
+print "investigate possible data issues."
+for itm in set(no_match):
+ print itm
Oops, something went wrong.

0 comments on commit 207c39f

Please sign in to comment.