In [1]:
CENSUS_TRACTS_DIR = "census_tracts"
# Units are miles
STATIC_SQUARE_SIZE = 0.5
LATITUDE_MILES = 69
LONGITUDE_MILES = 54.6
LATITUDE_DELTA = STATIC_SQUARE_SIZE/LATITUDE_MILES
LONGITUDE_DELTA = STATIC_SQUARE_SIZE/LONGITUDE_MILES

In [2]:
import os
import xml.etree.ElementTree as ET
import collections
import json

In [3]:
# Read all census tracts and put points in dict
census_tracts_bounding = {}
census_tracts_points = {}
for fname in os.listdir(CENSUS_TRACTS_DIR):
    latitudes = []
    longitudes = []
    try:
        # Parse the XML file
        root = ET.parse("{}/{}".format(CENSUS_TRACTS_DIR, fname)).getroot()
        for type_tag in root.findall("node"):
            d = type_tag.attrib
            latitudes.append(float(d["lat"]))
            longitudes.append(float(d["lon"]))
        census_tracts_points[fname.strip(".txt")] = list(zip(latitudes, longitudes))
        census_tracts_bounding[fname.strip(".txt")] = [(min(latitudes), min(longitudes)), (max(latitudes), max(longitudes))]
    except:
        print(fname)

15_01.txt
17_60.txt
17_81.txt
18_46.txt
18_58.txt
19_19.txt
23_04.txt
23_12.txt
24_30.txt
2_05.txt
3_07.txt
9800.txt
census_tracts
grid_info_multiple.json
grid_info_smaller.json
grid_info_smaller_2018_fips.json


In [4]:
print(LATITUDE_DELTA)
print(LONGITUDE_DELTA)
print(census_tracts_bounding)

0.007246376811594203
0.009157509157509158
{'10': [(30.2466966, -97.7372751), (30.2610402, -97.704501)], '11': [(30.2513725, -97.7525511), (30.2764108, -97.7325736)], '12': [(30.2646268, -97.7711274), (30.2837225, -97.7502073)], '13_03': [(30.2492114, -97.7727875), (30.2674419, -97.7547002)], '13_04': [(30.2375251, -97.781282), (30.2579639, -97.7647744)], '13_05': [(30.2388816, -97.7686167), (30.2647459, -97.7456538)], '13_07': [(30.2264478, -97.7763429), (30.2459388, -97.759622)], '13_08': [(30.2227335, -97.769551), (30.242111, -97.7535743)], '14_01': [(30.239774, -97.7523189), (30.2607712, -97.7406076)], '14_02': [(30.2338337, -97.747743), (30.253675, -97.7351365)], '14_03': [(30.2324581, -97.740971), (30.2478612, -97.7293873)], '15_03': [(30.3149621, -97.7280535), (30.3393452, -97.704392)], '15_04': [(30.3367125, -97.7372102), (30.3560073, -97.7132279)], '15_05': [(30.3261224, -97.7397113), (30.3454009, -97.7195964)], '16_02': [(30.2741228, -97.7858466), (30.2971899, -97.764007)], '1

In [8]:
def in_grid(grid_lat, grid_lon, lat, lon):
    return (grid_lat <= lat <= grid_lat + LATITUDE_DELTA and grid_lon <= lon <= grid_lon + LONGITUDE_DELTA)

In [12]:
census_tract_grids = collections.defaultdict(list)
for tract_name in census_tracts_bounding:
    grid_lat = census_tracts_bounding[tract_name][0][0]
    grid_lon = census_tracts_bounding[tract_name][0][1]
    while grid_lat < census_tracts_bounding[tract_name][1][0]:
        while grid_lon < census_tracts_bounding[tract_name][1][1]:
            # if there exists a point in the osm file for this tract that is in this square, add this square for this tract
            for lat, lon in census_tracts_points[tract_name]:
                if in_grid(grid_lat, grid_lon, lat, lon):
                    census_tract_grids[tract_name].append([grid_lat, grid_lon])
                    break
            grid_lon += LONGITUDE_DELTA
        grid_lon = census_tracts_bounding[tract_name][0][1]
        grid_lat += LATITUDE_DELTA

In [13]:
print("Number of grids is: {}".format(sum([len(v) for v in census_tract_grids.values()])))

Number of grids is: 5189


In [89]:
output_dict = {}
output_dict["NUMBER_GRIDS"] = sum([len(v) for v in census_tract_grids.values()])
output_dict["GRID_SIZE"] = STATIC_SQUARE_SIZE
output_dict["LATITUDE_DELTA"] = LATITUDE_DELTA
output_dict["LONGITUDE_DELTA"] = LONGITUDE_DELTA
output_dict["census_tracts_bounding"] = census_tracts_bounding
output_dict["census_tract_grids"] = census_tract_grids

In [90]:
json.dump(output_dict, open("census_tracts_info_2.json", "w"))

In [None]:
# To search
# Find all possible tracts it can be by searching through census_tracts_bounding
# Search through the possible tracts to see which square it falls under --> Now we 
# know which tract it is in and which grid it lies in so we can index into the time matrix