In [1]:
%load_ext autoreload
%autoreload
import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>
    div.output_area{
        max-height:10000px;
        overflow:scroll;
    }
</style>"""))

import uszipcode
import shapefile
from os.path import join
import numpy as np
import pygeohash as geohash
from timeUtils import clock, elapsed
from shapely.geometry.polygon import Polygon
from shapely.geometry import Point
from random import uniform
import folium

basedir="/Users/tgadf/Downloads"

Notebook Last Run Initiated: 2018-08-31 19:57:54.816569


In [2]:
def getBBox(shape):
    bbox = [float('%.6f' % coord) for coord in shape.bbox]
    return bbox

def getShapeGeos(poly, prec, geo, geos, ignores, depth, debug=False):
    if debug:
        print("getShapeGeos({0})".format(geo))
    if geo in ignores:
        return
    
    if depth > 150:
        return
    lat,long  = geohash.decode_exactly(geo)[:2]
    pnt  = Point(long,lat)
    if poly.contains(pnt) is False:
        if debug:
            print("    Adding {0} to list of ignores ({1})".format(geo, len(ignores)))
        ignores.add(geo)
        return
    else:
        geos.add(geo)
        ignores.add(geo)
        if debug:
            print("    Adding {0} to make {1} total geos".format(geo, len(geos)))
        dp = depth+1
        neighbors = set(geohash.neighbors(geo)).difference(ignores)
        #print(depth, len(geos), len(ignores), neighbors)
        for neighbor in list(neighbors):
            if debug:
                print("  Testing {0} from {1} at {2}".format(neighbor, geo, depth))
            getShapeGeos(poly=poly, prec=prec, geo=neighbor, geos=geos, ignores=ignores, depth=dp, debug=debug)
        return
    
def addGeos(geos, geo, init=False):
    nAdd  = 0
    nGeos = len(geos)
    geos.add(geo)
    neighbors = set(geohash.neighbors(geo)).difference(geos)
    for neighbor in list(neighbors):
        geos.add(neighbor)
        nneighbors = set(geohash.neighbors(neighbor)).difference(geos)
        for nneighbor in list(nneighbors):
            geos.add(nneighbor)
    dAdd = len(geos) - nGeos
    if dAdd > 0:
        return 0
    else:
        if init:
            return 0
        return 1
    
    
def addLinearGeos(irec, nshapes, shape, prec, maxmiss=None, debug=True):
    geos = set()
    for i,pnt in enumerate(shape.points):
        long,lat = pnt
        geo      = geohash.encode(latitude=lat, longitude=long, precision=prec)
        geos.add(geo)
    if debug:
        print("Added {0} geos (guess = N/A) from {1} points.".format(len(geos), len(shape.points)))
    return geos


def addShapeGeos(irec, nshapes, shape, prec, geos, maxmiss=None, debug=True):
    from random import random
    bbox = getBBox(shape)
    lng0      = min([bbox[0], bbox[2]])
    lngrange  = abs(bbox[0] - bbox[2])
    lat0      = min([bbox[1], bbox[3]])
    latrange  = abs(bbox[1] - bbox[3])
    dEW   = 111*latrange
    dNS   = 111*lngrange
    area  = dEW * dNS
    if prec == 5:
        geoarea = 4.9*4.9 / 2
    elif prec == 6:
        geoarea = 1.2*0.61
    elif prec == 7:
        geoarea = 0.152*0.152 / 2
    elif prec == 8:
        geoarea = 0.038*0.019 / 3
    else:
        raise ValueError("No idea about {0}".format(prec))
        geoarea = None
    from numpy import ceil
    ngeoguess = int(ceil(area / geoarea))
    minMR = 2500
    if maxmiss is None:
        maxmiss = min([max([ngeoguess,5]), minMR])

    nmiss=0
    nrounds   = 0
    maxR = 100000
    maxrounds = min([max([5*ngeoguess, 10]), maxR])
    if maxrounds == maxR and False:
        debug=True
    if debug:
        print("  addShapeGeos({0}/{1}\tKm EW = {2}, Km NS  = {3}, Sq Km = {4}, ngeos = {5})".format(irec, nshapes, round(dEW,1), round(dNS,1), round(area,1), ngeoguess), end="\t----> ")
    for i in range(maxrounds):
        nrounds += 1
        genlat = latrange*random() + lat0
        genlng = lngrange*random() + lng0
        geo    = geohash.encode(latitude=genlat, longitude=genlng, precision=prec)
        retval = addGeos(geos, geo)
        if retval == 0:
            nmiss = 0
        else:
            nmiss += 1
        if debug and False: print("\t",i,"\tMiss:\t",nmiss,"Ngeos:\t",len(geos))
        if nmiss > maxmiss:
            break
    if debug:
        print("Added {0} geos (guess = {1}) with {2} round and {3} misses in the end.".format(len(geos), ngeoguess, nrounds, nmiss))
    return 

In [3]:
import gzip
import pickle

def save_zipped_pickle(obj, filename, protocol=-1):
    with gzip.open(filename, 'wb') as f:
        pickle.dump(obj, f, protocol)
        
def load_zipped_pickle(filename):
    with gzip.open(filename, 'rb') as f:
        loaded_object = pickle.load(f)
        return loaded_object

def saveGeoData(shapeData, geoShapeMap, Nshapes, ngeos, prefix):
    print("\n")
    import pickle
    fname = "{0}-data.p".format(prefix)
    print("There are {0} entries in the saved file.".format(len(shapeData)))
    pickle.dump(shapeData, open(fname, "wb"))
    print("Saved shape data to {0}".format(fname))
    print("\n")

    fname = "{0}-geos.p".format(prefix)
    print("There are {0} entries in the saved file.".format(ngeos))
    pickle.dump(geoShapeMap, open(fname, "wb"))
    print("Saved shape data to {0}".format(fname))
    print("\n")

In [4]:
geoID = 4
latID = 13
lngID = 14
wtrID = 11
lndID = 12
ids   = [geoID, latID, lngID, lndID, wtrID]

def splitGeoID(geoID):
    stateID  = geoID[:2]
    shape1ID = geoID[2:5]
    shape2ID = geoID[5:11]
    shape3ID = geoID[11:15]
    return [stateID, shape1ID, shape2ID, shape3ID]

def getRecordInfo(record):
    info = [record[x] for x in ids]
    info[1] = float(info[1])
    info[2] = float(info[2])
    return info

# HERE

In [5]:
from pandas import DataFrame
import pickle
here_data = pickle.load(open("here/HEREdata.p", "rb"))
here_data['lat'] = here_data['lat']/1e5
here_data['lon'] = here_data['lon']/1e5
use_here_data = here_data[here_data['iso_country_code'] == "USA"]

In [22]:
cats = {}
key  = None
for i,row in enumerate(open("here_cat_id.dat").readlines()):
    if i % 2 == 0:
        key = row.replace("\n", "")
        continue
    else:
        value = row.replace("\n", "")
        cats[key] = value
        key = None
for k,v in cats.items():
    print(k,v)
import pickle
pickle.dump(cats, open("here/catIDs.p", "wb"))

2084 Winery
3578 ATM
4013 TrainStation
4100 CommuterRailStation
4170 BusStation
4482 FerryTerminal
4493 Marina
4580 PublicSportsAirport
4581 Airport
5000 BusinessFacility
5400 GroceryStore
5511 AutoDealerships
5512 AutoDealership-UsedCars
5540 PetrolandGasolineStation
5571 MotorcycleDealership
5800 Restaurant
5813 Nightlife
5999 HistoricalMonument
6000 Bank
6512 Shopping
7011 Hotel
7012 SkiResort
7013 OtherAccommodation
7014 SkiLift
7389 TouristInformation
7510 RentalCarAgency
7520 ParkingLot
7521 ParkingGarageandHouse
7522 ParkandRide
7538 AutoServiceandMaintenance
7832 Cinema
7897 RestArea
7929 PerformingArts
7933 BowlingCentre
7940 SportsComplex
7947 ParkandRecreationArea
7985 Casino
7990 ConventionandExhibitionCentre
7992 GolfCourse
7994 CivicandCommunityCentre
7996 AmusementPark
7997 SportsCentre
7998 IceSkatingRink
7999 TouristAttraction
8060 Hospital
8200 HigherEducation
8211 School
8231 Library
8410 Museum
8699 AutomobileClub
9050 BicycleSharingLocation
9051 BicycleParking
9052

In [26]:
shapeData   = {}
geoShapeMap = {}
ngeos = {}
shapeval = 'here'
Nshapes = 0
prec = 7
catIDs = pickle.load(open("here/catIDs.p", "rb"))
for row,rowdata in use_here_data.iterrows():
    if Nshapes % 250000 == 0 and Nshapes > 0:
        print(row,Nshapes)
    Nshapes += 1
    geo    = geohash.encode(latitude=rowdata['lat'], longitude=rowdata['lon'], precision=7)
    irec   = row
    catID  = str(int(rowdata['cat_id']))
    geoid  = catID
    name   = catIDs.get(catID)
    #geoid  = rowdata['poi_id']
    #htype  = rowdata['cat_id']
    
    if name is None:
        print(rowdata)
        1/0
    if shapeData.get(catID) is None:
        shapeData[catID] = {}
        shapeData[catID][geoid]   = {"Name": name}
        geoShapeMap[catID] = {}
        geoShapeMap[catID][geoid] = set()
    geoShapeMap[catID][geoid].add(geo)
    
for catID,catData in shapeData.items():
    shapeval = catIDs[catID]
    ngeos = len(geoShapeMap[catID][catID])
    print("Found {0} of type {1}".format(ngeos,shapeval))
    saveGeoData(shapeData[catID], geoShapeMap[catID], Nshapes, ngeos, "here/{0}-{1}".format(shapeval, prec))

402753 250000
805836 500000
1209512 750000
1613642 1000000
2017407 1250000
2420441 1500000
2824037 1750000
3227998 2000000
3631825 2250000
4034628 2500000
Found 235584 of type AutoServiceandMaintenance



There are 1 entries in the saved file.
Saved shape data to here/AutoServiceandMaintenance-7-data.p



There are 235584 entries in the saved file.
Saved shape data to here/AutoServiceandMaintenance-7-geos.p



Found 323799 of type Restaurant



There are 1 entries in the saved file.
Saved shape data to here/Restaurant-7-data.p



There are 323799 entries in the saved file.
Saved shape data to here/Restaurant-7-geos.p



Found 67882 of type GroceryStore



There are 1 entries in the saved file.
Saved shape data to here/GroceryStore-7-data.p



There are 67882 entries in the saved file.
Saved shape data to here/GroceryStore-7-geos.p



Found 117064 of type SpecialtyStore



There are 1 entries in the saved file.
Saved shape data to here/SpecialtyStore-7-data.p



There are 117064 entries

Saved shape data to here/Winery-7-data.p



There are 2797 entries in the saved file.
Saved shape data to here/Winery-7-geos.p



Found 6223 of type Cinema



There are 1 entries in the saved file.
Saved shape data to here/Cinema-7-data.p



There are 6223 entries in the saved file.
Saved shape data to here/Cinema-7-geos.p



Found 495 of type TaxiStand



There are 1 entries in the saved file.
Saved shape data to here/TaxiStand-7-data.p



There are 495 entries in the saved file.
Saved shape data to here/TaxiStand-7-geos.p



Found 1458 of type TruckStopandPlaza



There are 1 entries in the saved file.
Saved shape data to here/TruckStopandPlaza-7-data.p



There are 1458 entries in the saved file.
Saved shape data to here/TruckStopandPlaza-7-geos.p



Found 12789 of type BusinessFacility



There are 1 entries in the saved file.
Saved shape data to here/BusinessFacility-7-data.p



There are 12789 entries in the saved file.
Saved shape data to here/BusinessFacility-7-geos.p



Found 

In [27]:
import pickle
prec=7
from glob import glob
from os.path import basename,splitext
vals    = glob("here/*-{0}-geos.p".format(prec))
vals    = [splitext(basename(x))[0].split('-')[0] for x in vals]
geomap  = {}
records = {}
for mtype in vals:
    try:
        recdata = pickle.load(open("here/{0}-{1}-data.p".format(mtype, prec), "rb"))
        geodata = pickle.load(open("here/{0}-{1}-geos.p".format(mtype, prec), "rb"))
    except:
        continue
    ngeos = 0
    for geoid,geos in geodata.items():
        ngeos += len(geos)
        for geo in geos:
            if geomap.get(geo) is None:
                geomap[geo] = {}
            geomap[geo][mtype] = geoid

    for geoid,rec in recdata.items():
        records[geoid] = rec['Name']
        break
                
    print("{0}\t{1}\t---> {2} <---".format(mtype,ngeos,len(geomap)))
                
fname="geomap-{0}-HERE.p".format(prec)
print("Writing {0}".format(fname))
pickle.dump(geomap,  open(fname, "wb"))

fname="georec-{0}-HERE.p".format(prec)
print("Writing {0}".format(fname))
pickle.dump(records,  open(fname, "wb"))

Airport	4425	---> 4425 <---
AmusementPark	1193	---> 5617 <---
AnimalPark	585	---> 6197 <---
ATM	121532	---> 127591 <---
AutoDealerships	16548	---> 143006 <---
AutomobileClub	1217	---> 143879 <---
AutoServiceandMaintenance	235584	---> 354208 <---
Bank	99003	---> 363865 <---
BicycleParking	43	---> 363895 <---
BicycleService	1	---> 363896 <---
BicycleSharingLocation	221	---> 364049 <---
Bookstore	4399	---> 366820 <---
BorderCrossing	177	---> 366995 <---
BusinessFacility	12789	---> 376850 <---
BusStation	479	---> 377201 <---
Campground	5719	---> 382773 <---
CargoCentre	648	---> 383386 <---
Casino	846	---> 384095 <---
Cinema	6223	---> 388203 <---
CityHall	10011	---> 396559 <---
CivicandCommunityCentre	2115	---> 398392 <---
ClothingStore	64110	---> 440864 <---
CoffeeShop	33400	---> 456402 <---
CommuterRailStation	2511	---> 457967 <---
ConsumerElectronicsStore	50084	---> 480920 <---
ConvenienceStore	102068	---> 543551 <---
ConventionandExhibitionCentre	1075	---> 544359 <---
CourtHouse	4748	--

In [25]:
geomap

{'dpme5s6': {'Airport': '4581'},
 '9v1zzpn': {'Airport': '4581'},
 'bd4pmre': {'Airport': '4581'},
 'c87vnr6': {'Airport': '4581'},
 'dr5rzjx': {'Airport': '4581'},
 'dnt66hf': {'Airport': '4581'},
 'dnx07hw': {'Airport': '4581'},
 'djjutcy': {'Airport': '4581'},
 'dn3kfev': {'Airport': '4581'},
 'djbqxxy': {'Airport': '4581'},
 'dr4r72q': {'Airport': '4581'},
 'c22wnmu': {'Airport': '4581'},
 'djg1xcm': {'Airport': '4581'},
 'djdf8yj': {'Airport': '4581'},
 'dr4brjf': {'Airport': '4581', 'RentalCarAgency': '7510'},
 'c2shjmp': {'Airport': '4581'},
 'dr46zeq': {'Airport': '4581'},
 'djn1tzb': {'Airport': '4581'},
 'dr03v1f': {'Airport': '4581'},
 '9ufxs35': {'Airport': '4581'},
 'c8p2nt8': {'Airport': '4581'},
 '9rvrqjk': {'Airport': '4581'},
 'cb8yer8': {'Airport': '4581'},
 '9ppxvxd': {'Airport': '4581'},
 'b6x1nx9': {'Airport': '4581'},
 '9qs01t1': {'Airport': '4581'},
 'c27x9yp': {'Airport': '4581'},
 'dn6tgg6': {'Airport': '4581'},
 '9zr8yjv': {'Airport': '4581'},
 '9qc13t7': {'Ai

# Venues

In [66]:
from os.path import basename, dirname
shapeval = "Major_Sport_Venues"
try:
    sf = shapefile.Reader(join(basedir, shapeval, shapeval))
except:
    raise ValueError("No shapefile!")
fields      = sf.fields
shapeData   = {}
geoShapeMap = {}
Nshapes   = len(sf.shapes())
ngeos     = 0
totalgeos = 0
prec      = 7
show      = False

start,cmt = clock("Analyzing {0}\t{1}".format(shapeval, Nshapes))
if show:
    print("\n\nFields -> {0}".format(fields))


ivtype=None
for i,val in enumerate(fields):
    if val[0] == "NAICS_DESC":
        ivtype = i-1
        break

irec = -1
for shapeRec in sf.iterShapeRecords():
    irec += 1

    ## Record
    record = shapeRec.record
    if show: raise ValueError("Stopping here: {0}".format(record))
    geoid = record[1]
    name  = record[2]
    vtype = record[ivtype]
    shapeData[geoid] = {"Name": name, "Record": irec, "Type": vtype}

    geos  = getGeos(shapeRec.shape, irec, Nshapes, prec=7)
    
    if len(geos) > 0:
        newgeos = set()
        rad = 200
        long, lat = shapeRec.shape.points[0]
        nmiss = 0
        for i in range(10000):
            dLg = uniform(-rad, rad) / 111000
            dLa = uniform(-rad, rad) / 111000
            pnt = (lat + dLa, long + dLg)
            dist = haversine((lat, long), pnt)
            if dist < rad/1000:
                geo    = geohash.encode(latitude=pnt[0], longitude=pnt[1], precision=7)
                if geo not in newgeos:
                    newgeos.add(geo)
                    nmiss = 0
                else:
                    nmiss += 1

            if nmiss > 200:
                break

        geoShapeMap[geoid] = newgeos
        ngeos += len(newgeos)

print("Found {0} geos from {1}".format(ngeos, shapeval))
saveGeoData(shapeData, geoShapeMap, Nshapes, ngeos, "{0}-{1}".format(shapeval, prec))

Current Time is Mon Aug 13, 2018 11:40:23 for Analyzing Major_Sport_Venues	784
Found 8935 geos from Major_Sport_Venues



There are 784 entries in the saved file.
Saved shape data to Major_Sport_Venues-7-data.p



There are 8935 entries in the saved file.
Saved shape data to Major_Sport_Venues-7-geos.p





# Airports (US)

In [54]:
from os.path import basename, dirname
shapeval = "Airports"
try:
    sf = shapefile.Reader(join(basedir, shapeval, shapeval))
except:
    raise ValueError("No shapefile!")
fields      = sf.fields
shapeData   = {}
geoShapeMap = {}
Nshapes   = len(sf.shapes())
ngeos     = 0
totalgeos = 0
prec      = 8
show      = False

start,cmt = clock("Analyzing {0}\t{1}".format(shapeval, Nshapes))
if show:
    print("\n\nFields -> {0}".format(fields))


iname=None
for i,val in enumerate(fields):
    if val[0] == "FullName":
        iname = i-1
        break
itype=None
for i,val in enumerate(fields):
    if val[0] == "OwnerType":
        itype = i-1
        break
iacres=None
for i,val in enumerate(fields):
    if val[0] == "Acres":
        iacres = i-1
        break

irec = -1
from numpy import sqrt
from haversine import haversine
from random import uniform
for shapeRec in sf.iterShapeRecords():
    irec += 1
    if irec % 1000 == 0:
        print(irec)

    ## Record
    record = shapeRec.record
    if show: raise ValueError("Stopping here: {0}".format(record))
    geoid = record[1]
    name  = record[iname]
    size  = record[iacres]
    atype = record[itype]
    shapeData[geoid] = {"Name": name, "Record": irec, "Type": atype, "Size": size}
        
    geos  = getGeos(shapeRec.shape, irec, Nshapes, prec=7)

    if size is not None:
        newgeos = set()
        rad = 63.6167*sqrt(size)/2
        long, lat = shapeRec.shape.points[0]
        nmiss = 0
        for i in range(10000):
            dLg = uniform(-rad, rad) / 111000
            dLa = uniform(-rad, rad) / 111000
            pnt = (lat + dLa, long + dLg)
            dist = haversine((lat, long), pnt)
            if dist < rad/1000:
                geo    = geohash.encode(latitude=pnt[0], longitude=pnt[1], precision=7)
                if geo not in newgeos:
                    newgeos.add(geo)
                    nmiss = 0
                else:
                    nmiss += 1

            if nmiss > 200:
                break
            
        geoShapeMap[geoid] = newgeos
        ngeos += len(newgeos)
    else:
        geoShapeMap[geoid] = geos
        ngeos += len(geos)

print("Found {0} geos from {1}".format(ngeos, shapeval))
saveGeoData(shapeData, geoShapeMap, Nshapes, ngeos, "{0}-{1}".format(shapeval, 7))

Current Time is Mon Aug 13, 2018 11:12:34 for Analyzing Airports	19460
0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
Found 507046 geos from Airports



There are 19460 entries in the saved file.
Saved shape data to Airports-7-data.p



There are 507046 entries in the saved file.
Saved shape data to Airports-7-geos.p





In [None]:
# Metra Stations (Chicago)

# Metro Stations (DC)

In [21]:
savename = "Metro_Stations_Regional"
shapevals = ["Metro_Stations_Regional", "Kmlmetrastations"]
shapeData   = {}
geoShapeMap = {}
for shapeval in shapevals:
    from os.path import basename, dirname
    #shapeval = "Metro_Stations_Regional"
    try:
        sf = shapefile.Reader(join(basedir, shapeval, shapeval))
    except:
        raise ValueError("No shapefile!")
    fields      = sf.fields
    Nshapes   = len(sf.shapes())
    totalgeos = 0
    ngeos     = 0
    prec      = 7
    show      = False

    start,cmt = clock("Analyzing {0}\t{1}".format(shapeval, Nshapes))
    if show:
        print("\n\nFields -> {0}".format(fields))


    irec = -1
    for shapeRec in sf.iterShapeRecords():
        irec += 1

        ## Record
        record = shapeRec.record
        if show: raise ValueError("Stopping here: {0}".format(record))
        if shapeval == "Kmlmetrastations":
            name   = record[0]
            geoval = record[0]
        if shapeval == "Metro_Stations_Regional":
            name   = record[2]
            geoval = record[1]
        geoid = "{0}{1}".format(shapeval,geoval)
        shapeData[geoid] = {"Name": name, "Record": irec}

        if shapeval == "Metro_Stations_Regional":
            geos  = getGeos(shapeRec.shape, irec, Nshapes, prec)
        else:
            geos  = getGeos(shapeRec.shape, irec, Nshapes, prec)
        geoShapeMap[geoid] = geos
        ngeos += len(geos)
    print("Found {0} geos from {1}".format(ngeos, savename))
    
saveGeoData(shapeData, geoShapeMap, Nshapes, ngeos, "{0}-{1}".format(savename, prec))

Current Time is Tue Aug 21, 2018 11:12:46 for Analyzing Metro_Stations_Regional	91
Found 91 geos from Metro_Stations_Regional
Current Time is Tue Aug 21, 2018 11:12:46 for Analyzing Kmlmetrastations	247
Found 247 geos from Metro_Stations_Regional



There are 325 entries in the saved file.
Saved shape data to Metro_Stations_Regional-7-data.p



There are 247 entries in the saved file.
Saved shape data to Metro_Stations_Regional-7-geos.p





# Amtrak Stations Data

In [64]:
from os.path import basename, dirname
shapeval = "Amtrak_Stations"
try:
    sf = shapefile.Reader(join(basedir, shapeval, shapeval))
except:
    raise ValueError("No shapefile!")
fields      = sf.fields
shapeData   = {}
geoShapeMap = {}
Nshapes   = len(sf.shapes())
ngeos     = 0
totalgeos = 0
prec      = 7
show      = False

start,cmt = clock("Analyzing {0}\t{1}".format(shapeval, Nshapes))
if show:
    print("\n\nFields -> {0}".format(fields))


irec = -1
for shapeRec in sf.iterShapeRecords():
    irec += 1

    ## Record
    record = shapeRec.record
    if show: raise ValueError("Stopping here: {0}".format(record))
    geoid = record[1]
    name  = record[2]
    shapeData[geoid] = {"Name": name, "Record": irec}
        
    geos  = getGeos(shapeRec.shape, irec, Nshapes, prec=7)

    
    if len(geos) > 0:
        newgeos = set()
        rad = 100
        long, lat = shapeRec.shape.points[0]
        nmiss = 0
        for i in range(10000):
            dLg = uniform(-rad, rad) / 111000
            dLa = uniform(-rad, rad) / 111000
            pnt = (lat + dLa, long + dLg)
            dist = haversine((lat, long), pnt)
            if dist < rad/1000:
                geo    = geohash.encode(latitude=pnt[0], longitude=pnt[1], precision=7)
                if geo not in newgeos:
                    newgeos.add(geo)
                    nmiss = 0
                else:
                    nmiss += 1

            if nmiss > 200:
                break

        geoShapeMap[geoid] = newgeos
        ngeos += len(newgeos)

print("Found {0} geos from {1}".format(ngeos, shapeval))
saveGeoData(shapeData, geoShapeMap, Nshapes, ngeos, "{0}-{1}".format(shapeval, 7))

Current Time is Mon Aug 13, 2018 11:37:01 for Analyzing Amtrak_Stations	529
Found 2574 geos from Amtrak_Stations



There are 529 entries in the saved file.
Saved shape data to Amtrak_Stations-7-data.p



There are 2574 entries in the saved file.
Saved shape data to Amtrak_Stations-7-geos.p





# Combine Single Geo Data

In [23]:
import pickle
prec=7
vals    = ['Amtrak_Stations', 'Metro_Stations_Regional', 'Airports', 'Major_Sport_Venues'] #, 'alleys-and-parking']
geomap  = {}
records = {}
for mtype in vals:
    prec=7
    if mtype == "Airports":
        prec=7
    recdata = pickle.load(open("{0}-{1}-data.p".format(mtype, prec), "rb"))
    geodata = pickle.load(open("{0}-{1}-geos.p".format(mtype, prec), "rb"))
    ngeos = 0
    for geoid,geos in geodata.items():
        ngeos += len(geos)
        for geo in geos:
            if geomap.get(geo) is None:
                geomap[geo] = {}
            if geomap[geo].get(mtype) is None:
                geomap[geo][mtype] = set()
            geomap[geo][mtype].add(geoid)

    records[mtype] = {}
    for geoid,rec in recdata.items():
        records[mtype][geoid] = rec['Name']
                
    print("{0}\t{1}\t---> {2} <---".format(mtype,ngeos,len(geomap)))
                
fname="geomap-{0}-loc.p".format(prec)
print("Writing {0}".format(fname))
pickle.dump(geomap,  open(fname, "wb"))

fname="georec-{0}-loc.p".format(prec)
print("Writing {0}".format(fname))
pickle.dump(records,  open(fname, "wb"))

Amtrak_Stations	2574	---> 2568 <---
Metro_Stations_Regional	325	---> 2885 <---
Airports	507046	---> 509286 <---
Major_Sport_Venues	8935	---> 517863 <---
Writing geomap-7-loc.p
Writing georec-7-loc.p


In [69]:
records

{'NYP': 'Name',
 'WAS': 'Name',
 'PHL': 'Name',
 'CHI': 'Name',
 'BOS': 'Name',
 'LAX': 'Name',
 'SAC': 'Name',
 'BAL': 'Name',
 'ALB': 'Name',
 'PVD': 'Name',
 'SAN': 'Name',
 'WIL': 'Name',
 'BWI': 'Name',
 'NWK': 'Name',
 'SEA': 'Name',
 'NHV': 'Name',
 'BBY': 'Name',
 'MKE': 'Name',
 'PDX': 'Name',
 'EMY': 'Name',
 'LNC': 'Name',
 'HAR': 'Name',
 'BFD': 'Name',
 'RTE': 'Name',
 'TRE': 'Name',
 'BON': 'Name',
 'STM': 'Name',
 'SOL': 'Name',
 'DAV': 'Name',
 'MET': 'Name',
 'FNO': 'Name',
 'MTZ': 'Name',
 'IRV': 'Name',
 'RVR': 'Name',
 'OKJ': 'Name',
 'SBA': 'Name',
 'STL': 'Name',
 'OSD': 'Name',
 'FUL': 'Name',
 'SKN': 'Name',
 'RIC': 'Name',
 'OLT': 'Name',
 'ANA': 'Name',
 'LOR': 'Name',
 'SFA': 'Name',
 'BNL': 'Name',
 'SJC': 'Name',
 'SNC': 'Name',
 'HUD': 'Name',
 'RHI': 'Name',
 'PAO': 'Name',
 'HNF': 'Name',
 'ALX': 'Name',
 'NOL': 'Name',
 'CLT': 'Name',
 'NCR': 'Name',
 'SUI': 'Name',
 'POR': 'Name',
 'SPI': 'Name',
 'KIN': 'Name',
 'CHM': 'Name',
 'MKA': 'Name',
 'NLC': 

# Parking (DC)

In [28]:
from os.path import basename, dirname
shapeval = "alleys-and-parking"
try:
    sf = shapefile.Reader(join(basedir, shapeval, shapeval))
except:
    raise ValueError("No shapefile!")
fields      = sf.fields
shapeData   = {}
geoShapeMap = {}
Nshapes   = len(sf.shapes())
ngeos     = 0
totalgeos = 0
prec      = 7
show      = False

start,cmt = clock("Analyzing {0}\t{1}".format(shapeval, Nshapes))
if show:
    print("\n\nFields -> {0}".format(fields))


irec = -1
for shapeRec in sf.iterShapeRecords():
    irec += 1

    ## Record
    record = shapeRec.record
    if show: raise ValueError("Stopping here: {0}".format(record))
    geoid = "Parking{0}".format(record[1])
    name  = record[3]
    shapeData[geoid] = {"Name": name, "Record": irec}
    
    geos  = getGeos(shapeRec.shape, irec, Nshapes, prec)
    geoShapeMap[geoid] = geos
    ngeos += len(geos)

print("Found {0} geos from {1}".format(ngeos, shapeval))
saveGeoData(shapeData, geoShapeMap, Nshapes, ngeos, "{0}-{1}".format(shapeval, prec))

Current Time is Mon Aug 20, 2018 14:41:05 for Analyzing alleys-and-parking	9429
  addShapeGeos(0/9429	Km EW = 0.1, Km NS  = 0.1, Sq Km = 0.0, ngeos = 1)	----> Added 25 geos (guess = 1) with 10 round and 9 misses in the end.
  addShapeGeos(500/9429	Km EW = 0.0, Km NS  = 0.0, Sq Km = 0.0, ngeos = 1)	----> Added 25 geos (guess = 1) with 10 round and 9 misses in the end.
  addShapeGeos(1000/9429	Km EW = 0.1, Km NS  = 0.1, Sq Km = 0.0, ngeos = 2)	----> Added 25 geos (guess = 2) with 10 round and 9 misses in the end.
  addShapeGeos(1500/9429	Km EW = 0.0, Km NS  = 0.1, Sq Km = 0.0, ngeos = 1)	----> Added 25 geos (guess = 1) with 10 round and 9 misses in the end.
  addShapeGeos(2000/9429	Km EW = 0.0, Km NS  = 0.1, Sq Km = 0.0, ngeos = 1)	----> Added 25 geos (guess = 1) with 10 round and 9 misses in the end.
  addShapeGeos(2500/9429	Km EW = 0.1, Km NS  = 0.2, Sq Km = 0.0, ngeos = 1)	----> Added 25 geos (guess = 1) with 10 round and 9 misses in the end.
  addShapeGeos(3000/9429	Km EW = 0.0, Km N

# Building Data (from Microsoft)

In [3]:
import zipfile
from glob import glob
from os import mkdir
from os.path import splitext, basename, dirname, join, exists
zipfiles = glob(join(basedir, "buildings", "*.zip"))
for zipname in zipfiles:
    statedir = dirname(zipname)
    name     = splitext(basename(zipname))[0]
    dirval   = join(statedir, name)
    if exists(dirval):
        continue
    try:
        mkdir(dirval)
    except:
        pass
    print("Unzipping {0}".format(zipname))
    zip_ref = zipfile.ZipFile(zipname, 'r')
    zip_ref.extractall(dirval)
    zip_ref.close()

Unzipping /Users/tgadf/Downloads/buildings/DistrictofColumbia.zip
Unzipping /Users/tgadf/Downloads/buildings/Illinois.zip
Unzipping /Users/tgadf/Downloads/buildings/Maryland.zip
Unzipping /Users/tgadf/Downloads/buildings/Virginia.zip


In [15]:
from os import mkdir
from json import load
from os.path import splitext, basename, dirname, join, exists, isdir
from collections import Counter
from glob import glob
from shapely.geometry import Polygon
files = glob(join(basedir, "buildings", "*", "*.geojson"))
for ifile in files:
    print("Reading {0}".format(ifile))
    geodata = load(open(ifile))
    gtype = geodata['type']
    geomtypes = Counter() 
    for i,feature in enumerate(geodata['features']):
        geom = feature.get('geometry')
        if geom is not None:
            geomtype = geom.get('type')
            geomtypes[geomtype] += 1
            if geomtype is not None:
                if geomtype == "Polygon":
                    points = geom.get('coordinates')
                    properties = geom.get('properties')
                    polygon = Polygon(points[0])
                    
                    print(points,properties)
    print(geomtypes.most_common())
    break

Reading /Users/tgadf/Downloads/buildings/DistrictofColumbia/DistrictofColumbia.geojson
POLYGON ((-77.014904 38.816248, -77.014842 38.816395, -77.015056 38.816449, -77.015117 38.816302, -77.014904 38.816248))
3.47054999997643e-08


ZeroDivisionError: division by zero

In [5]:
x = [[[-77.014904, 38.816248], [-77.014842, 38.816395], [-77.015056, 38.816449], [-77.015117, 38.816302], [-77.014904, 38.816248]]]

In [10]:
import numpy as np
y = np.asarray(x[0])

In [11]:
y.shape

(5, 2)

# Load State-Level Data

In [4]:
import zipfile
from glob import glob
from os import mkdir
from os.path import splitext, basename, dirname, join, exists
zipfiles = glob(join(basedir, "stateshapes", "*Columbia*.zip"))
for zipname in zipfiles:
    statedir = dirname(zipname)
    name     = splitext(basename(zipname))[0]
    dirval   = join(statedir, name)
    if exists(dirval):
        continue
    try:
        mkdir(dirval)
    except:
        pass
    zip_ref = zipfile.ZipFile(zipname, 'r')
    zip_ref.extractall(dirval)
    zip_ref.close()

In [35]:
from collections import Counter
from os.path import basename, dirname, exists, splitext, isdir
from glob import glob
import zipfile
from os import mkdir
from shutil import rmtree

prec=7
shapevals = ["Trans_AirportPoint", "Trans_AirportRunway", "Trans_RailFeature", "Trans_RoadSegment2", "Trans_TrailSegment", "Trans_RoadSegment", "Trans_RoadSegment3"]
shapevals = ["Trans_AirportPoint", "Trans_AirportRunway"]

states = ["11", "17", "24", "51"]
states = ["10"]
states = [str(x) for x in range(20,80)]
basevals = ["TrailSegment", "RailFeature", "RoadSegment"]
show=False
for state in states:
    newdirname = None
    for baseval in basevals:
        shapevals     = ["{0}{1}".format(baseval, x) for x in range(20)]
        shapevals[0]  = baseval
        dirvals       = glob(join(basedir, "stateshapes", "TRAN_{0}_*_GU_STATEORTERRITORY".format(state), "Shape"))
        if len(dirvals) == 0:
            try:
                zipname = glob(join(basedir, "stateshapes", "TRAN_{0}_*_GU_STATEORTERRITORY.zip".format(state)))[0]
            except:
                continue
            if exists(zipname):
                print("Unzipping {0}".format(zipname))
                zip_ref = zipfile.ZipFile(zipname, 'r')
                dirval  = join(join(basedir, "stateshapes", splitext(basename(zipname))[0]))
                if exists(dirval):
                    pass
                else:
                    mkdir(dirval)
                newdirname = dirval
                print("    Unzipping to {0}".format(dirval))
                zip_ref.extractall(dirval)
                zip_ref.close()
                
        
        scntr = Counter()
        remainCntr = Counter()

        shapeData   = {}
        geoShapeMap = {}
        totalgeos   = 0
        keepFrac    = 0
        totalFrac   = 0

        for shapeval in shapevals:
            for dirval in dirvals:
                if newdirname is None:
                    newdirname = dirname(dirval)
                try:
                    sf = shapefile.Reader(join(dirval, "Trans_{0}".format(shapeval)))
                except:
                    continue
                fields = sf.fields
                shapes = sf.shapes()
                Nshapes = len(shapes)
                ngeos = 0

                start,cmt = clock("Analyzing {0}\t{1}\t{2}".format(state, shapeval, Nshapes))
                if show:
                    print("\n\nFields -> {0}".format(fields))

                maxmiss=None
                if shapeval.startswith("Airport"):
                    itype=None
                    for i,val in enumerate(fields):
                        if val[0] == "FAA_AIRPOR":
                            itype = i-1
                            break
                if shapeval.startswith("RailFeature"):
                    maxmiss=50 
                    itype=None
                    for i,val in enumerate(fields):
                        if val[0] == "LENGTHKM":
                            itype = i-1
                            break
                if shapeval.startswith("TrailSegment"):
                    itype=None
                    for i,val in enumerate(fields):
                        if val[0] == "SOURCE_D00":
                            itype = i-1
                            break
                if shapeval.startswith("RoadSegment"):
                    maxmiss=50
                    itype=None
                    for i,val in enumerate(fields):
                        if val[0] == "PERMANENT_":
                            itype = i-1
                            break
                    iint=None
                    for i,val in enumerate(fields):
                        if val[0] == "INTERSTATE":
                            iint = i-1
                            break



                irec = -1
                ngeos = 0
                for shapeRec in sf.iterShapeRecords():
                    totalFrac += 1
                    keep = True
                    irec += 1

                    if totalFrac % 50000 == 0 and totalFrac > 0:
                        print("\tKeeping {0} of {1} = {2}".format(keepFrac, totalFrac, round(100*keepFrac/totalFrac,1)))

                    if show:
                        print(len(shapeRec.shape.points), shapeRec.record)
                        #continue

                    ## Record
                    record = shapeRec.record
                    extra  = []
                    if shapeval.startswith("AirportPoint"):
                        geoid = record[itype]
                        name  = record[itype+1]
                        test  = record[itype+2]
                        try:
                            int(test)
                        except:
                            continue
                    elif shapeval.startswith("AirportRunway"):
                        geoid  = record[itype]
                        name   = record[itype+1]
                    elif shapeval.startswith("RailFeature"):
                        length = record[itype]
                        name   = record[itype+3]
                        geoid  = name
                    elif shapeval.startswith("TrailSegment"):
                        length = record[itype]
                        name   = record[itype+3]
                        geoid  = name
                    elif shapeval.startswith("RoadSegment"):
                        geoid  = record[itype]
                        name   = record[itype+21]
                        inter  = "-".join(record[iint:iint+4])
                        usrte  = "-".join(record[iint+4:iint+8])
                        strte  = "-".join(record[iint+8:iint+12])
                        ctrte  = record[iint+12]
                        street = record[iint+15]
                        scntr[street] += 1
                        extra  = [inter, usrte, strte, ctrte, street]

                        ## Interstate
                        isInterstate = False
                        if inter != '---':
                            isInterstate = True

                        ## USRte
                        isUSRte = False
                        if usrte != '---':
                            isUSRte = True

                        ## StateRte
                        isStateRte = False
                        if strte != '---':
                            isStateRte = True

                        ## Highway
                        isHighway = False
                        try:
                            if any([street.find(" {0}".format(x)) != -1 for x in ['Hw ', 'Hwy', 'Pkwy', 'Hwy', 'Fwy', 'Tollway', 'Expy']]):
                                isHighway = True
                        except:
                            pass

                        ## Major Road
                        isMajorRd = False
                        try:
                            if any([street.find(" {0}".format(x)) != -1 for x in ['Ave', 'Blvd']]):
                                isMajorRd = True
                        except:
                            pass

                        ## Connections
                        isConnection = False
                        try:
                            if any([street.find(" {0}".format(x)) != -1 for x in ['Bridge ', ' Bdg']]):
                                isConnection = True
                        except:
                            pass
                    else:
                        print(record)
                        1/0

                    
                    if shapeval.startswith("TrailSegment"):
                        geos = addLinearGeos(irec, Nshapes, shapeRec.shape, prec, debug=False)
                        ngeos += len(geos)
                        totalgeos += ngeos
                        catID = "Trail"
                        geoid = catID
                        name  = catID
                        if shapeData.get(catID) is None:
                            shapeData[catID] = {}
                            shapeData[catID][geoid]   = {"Name": name}
                            geoShapeMap[catID] = {}
                            geoShapeMap[catID][geoid] = set()
                        for geo in geos:
                            geoShapeMap[catID][geoid].add(geo)

                    
                    if shapeval.startswith("RailFeature"):
                        geos = addLinearGeos(irec, Nshapes, shapeRec.shape, prec, debug=False)
                        ngeos += len(geos)
                        totalgeos += ngeos
                        catID = "Rail"
                        geoid = catID
                        name  = catID
                        if shapeData.get(catID) is None:
                            shapeData[catID] = {}
                            shapeData[catID][geoid]   = {"Name": name}
                            geoShapeMap[catID] = {}
                            geoShapeMap[catID][geoid] = set()
                        for geo in geos:
                            geoShapeMap[catID][geoid].add(geo)


                    if shapeval.startswith("RoadSegment"):                
                        if not any([isInterstate, isUSRte, isStateRte, isHighway, isMajorRd, isConnection]):       
                            continue
                        else:
                            keepFrac += 1

                        geos = addLinearGeos(irec, Nshapes, shapeRec.shape, prec, debug=False)
                        ngeos += len(geos)
                        totalgeos += ngeos
                        if isInterstate:
                            catID = "Interstate"
                            geoid = catID
                            name  = catID
                            if shapeData.get(catID) is None:
                                shapeData[catID] = {}
                                shapeData[catID][geoid]   = {"Name": name}
                                geoShapeMap[catID] = {}
                                geoShapeMap[catID][geoid] = set()
                            for geo in geos:
                                geoShapeMap[catID][geoid].add(geo)
                        if isUSRte:
                            catID = "USRte"
                            geoid = catID
                            name  = catID
                            if shapeData.get(catID) is None:
                                shapeData[catID] = {}
                                shapeData[catID][geoid]   = {"Name": name}
                                geoShapeMap[catID] = {}
                                geoShapeMap[catID][geoid] = set()
                            for geo in geos:
                                geoShapeMap[catID][geoid].add(geo)
                        if isStateRte:
                            catID = "StateRte"
                            geoid = catID
                            name  = catID
                            if shapeData.get(catID) is None:
                                shapeData[catID] = {}
                                shapeData[catID][geoid]   = {"Name": name}
                                geoShapeMap[catID] = {}
                                geoShapeMap[catID][geoid] = set()
                            for geo in geos:
                                geoShapeMap[catID][geoid].add(geo)
                        if isHighway:
                            catID = "Highway"
                            geoid = catID
                            name  = catID
                            if shapeData.get(catID) is None:
                                shapeData[catID] = {}
                                shapeData[catID][geoid]   = {"Name": name}
                                geoShapeMap[catID] = {}
                                geoShapeMap[catID][geoid] = set()
                            for geo in geos:
                                geoShapeMap[catID][geoid].add(geo)
                        if isMajorRd:
                            catID = "MajorRd"
                            geoid = catID
                            name  = catID
                            if shapeData.get(catID) is None:
                                shapeData[catID] = {}
                                shapeData[catID][geoid]   = {"Name": name}
                                geoShapeMap[catID] = {}
                                geoShapeMap[catID][geoid] = set()
                            for geo in geos:
                                geoShapeMap[catID][geoid].add(geo)
                        if isConnection:
                            catID = "Connection"
                            geoid = catID
                            name  = catID
                            if shapeData.get(catID) is None:
                                shapeData[catID] = {}
                                shapeData[catID][geoid]   = {"Name": name}
                                geoShapeMap[catID] = {}
                                geoShapeMap[catID][geoid] = set()
                            for geo in geos:
                                geoShapeMap[catID][geoid].add(geo)

                print("Found {0} geos from {1}-{2}".format(ngeos, state, shapeval))


        for catID,catData in shapeData.items():
            shapeval = catID
            ngeos = len(geoShapeMap[catID][catID])
            print("Found {0} of type {1}/{2}".format(ngeos,baseval,shapeval))
            saveGeoData(shapeData[catID], geoShapeMap[catID], 0, ngeos, "roads/{0}-{1}-{2}-{3}".format(baseval, shapeval, state, prec))
            
    if newdirname is not None:
        if isdir(newdirname):
            print("--->>> Removing {0} directory <<<---".format(newdirname))
            rmtree(newdirname)

Unzipping /Users/tgadf/Downloads/stateshapes/TRAN_20_Kansas_GU_STATEORTERRITORY.zip
    Unzipping to /Users/tgadf/Downloads/stateshapes/TRAN_20_Kansas_GU_STATEORTERRITORY
Current Time is Fri Aug 31, 2018 21:30:01 for Analyzing 20	RailFeature	6276
Found 39609 geos from 20-RailFeature
Found 27299 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-20-7-data.p


There are 27299 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-20-7-geos.p


Current Time is Fri Aug 31, 2018 21:30:07 for Analyzing 20	RoadSegment	250000
	Keeping 8067 of 50000 = 16.1
	Keeping 14341 of 100000 = 14.3
	Keeping 22407 of 150000 = 14.9
	Keeping 29447 of 200000 = 14.7
	Keeping 36399 of 250000 = 14.6
Found 106296 geos from 20-RoadSegment
Current Time is Fri Aug 31, 2018 21:30:36 for Analyzing 20	RoadSegment2	250000
	Keeping 44370 of 300000 = 14.8
	Keeping 50781 of 350000 = 14.5
	Keeping 58712 of 400000 = 14.7
	Keeping 65144 of 450000 = 14.5


Unzipping /Users/tgadf/Downloads/stateshapes/TRAN_23_Maine_GU_STATEORTERRITORY.zip
    Unzipping to /Users/tgadf/Downloads/stateshapes/TRAN_23_Maine_GU_STATEORTERRITORY
Current Time is Fri Aug 31, 2018 21:34:18 for Analyzing 23	RailFeature	1588
Found 15701 geos from 23-RailFeature
Found 12745 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-23-7-data.p


There are 12745 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-23-7-geos.p


Current Time is Fri Aug 31, 2018 21:34:23 for Analyzing 23	RoadSegment	250000
	Keeping 8134 of 50000 = 16.3
	Keeping 16072 of 100000 = 16.1
	Keeping 24047 of 150000 = 16.0
	Keeping 32113 of 200000 = 16.1
	Keeping 40211 of 250000 = 16.1
Found 130988 geos from 23-RoadSegment
Current Time is Fri Aug 31, 2018 21:34:50 for Analyzing 23	RoadSegment2	19842
Found 10022 geos from 23-RoadSegment2
Found 17078 of type RoadSegment/USRte


There are 1 entries in the saved file.
Saved shape da

Current Time is Fri Aug 31, 2018 21:37:25 for Analyzing 26	RoadSegment	250000
	Keeping 8452 of 50000 = 16.9
	Keeping 16967 of 100000 = 17.0
	Keeping 25270 of 150000 = 16.8
	Keeping 33683 of 200000 = 16.8
	Keeping 42387 of 250000 = 17.0
Found 110038 geos from 26-RoadSegment
Current Time is Fri Aug 31, 2018 21:37:55 for Analyzing 26	RoadSegment2	250000
	Keeping 51274 of 300000 = 17.1
	Keeping 60021 of 350000 = 17.1
	Keeping 68745 of 400000 = 17.2
	Keeping 77569 of 450000 = 17.2
	Keeping 86268 of 500000 = 17.3
Found 113688 geos from 26-RoadSegment2
Current Time is Fri Aug 31, 2018 21:38:25 for Analyzing 26	RoadSegment3	250000
	Keeping 95124 of 550000 = 17.3
	Keeping 103687 of 600000 = 17.3
	Keeping 112458 of 650000 = 17.3
	Keeping 121245 of 700000 = 17.3
	Keeping 129992 of 750000 = 17.3
Found 112575 geos from 26-RoadSegment3
Current Time is Fri Aug 31, 2018 21:38:51 for Analyzing 26	RoadSegment4	87979
	Keeping 138641 of 800000 = 17.3
Found 37702 geos from 26-RoadSegment4
Found 48229 of ty

Unzipping /Users/tgadf/Downloads/stateshapes/TRAN_29_Missouri_GU_STATEORTERRITORY.zip
    Unzipping to /Users/tgadf/Downloads/stateshapes/TRAN_29_Missouri_GU_STATEORTERRITORY
Current Time is Fri Aug 31, 2018 21:42:51 for Analyzing 29	RailFeature	7015
Found 50266 geos from 29-RailFeature
Found 35484 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-29-7-data.p


There are 35484 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-29-7-geos.p


Current Time is Fri Aug 31, 2018 21:42:57 for Analyzing 29	RoadSegment	250000
	Keeping 8924 of 50000 = 17.8
	Keeping 18581 of 100000 = 18.6
	Keeping 28137 of 150000 = 18.8
	Keeping 37625 of 200000 = 18.8
	Keeping 46355 of 250000 = 18.5
Found 146277 geos from 29-RoadSegment
Current Time is Fri Aug 31, 2018 21:43:29 for Analyzing 29	RoadSegment2	250000
	Keeping 55395 of 300000 = 18.5
	Keeping 65065 of 350000 = 18.6
	Keeping 74396 of 400000 = 18.6
	Keeping 83932 of 450000 = 1

Unzipping /Users/tgadf/Downloads/stateshapes/TRAN_32_Nevada_GU_STATEORTERRITORY.zip
    Unzipping to /Users/tgadf/Downloads/stateshapes/TRAN_32_Nevada_GU_STATEORTERRITORY
Current Time is Fri Aug 31, 2018 21:47:30 for Analyzing 32	RailFeature	1757
Found 16948 geos from 32-RailFeature
Found 12473 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-32-7-data.p


There are 12473 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-32-7-geos.p


Current Time is Fri Aug 31, 2018 21:47:35 for Analyzing 32	RoadSegment	250000
	Keeping 6995 of 50000 = 14.0
	Keeping 13671 of 100000 = 13.7
	Keeping 22179 of 150000 = 14.8
	Keeping 29810 of 200000 = 14.9
	Keeping 36727 of 250000 = 14.7
Found 117018 geos from 32-RoadSegment
Current Time is Fri Aug 31, 2018 21:48:03 for Analyzing 32	RoadSegment2	68527
	Keeping 43673 of 300000 = 14.6
Found 31678 geos from 32-RoadSegment2
Found 34697 of type RoadSegment/Highway


There are 1 entri

Found 20214 geos from 35-RailFeature
Found 15219 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-35-7-data.p


There are 15219 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-35-7-geos.p


Current Time is Fri Aug 31, 2018 21:51:17 for Analyzing 35	RoadSegment	250000
	Keeping 5778 of 50000 = 11.6
	Keeping 10976 of 100000 = 11.0
	Keeping 16327 of 150000 = 10.9
	Keeping 22251 of 200000 = 11.1
	Keeping 28170 of 250000 = 11.3
Found 97298 geos from 35-RoadSegment
Current Time is Fri Aug 31, 2018 21:51:47 for Analyzing 35	RoadSegment2	250000
	Keeping 32756 of 300000 = 10.9
	Keeping 39105 of 350000 = 11.2
	Keeping 44928 of 400000 = 11.2
	Keeping 50295 of 450000 = 11.2
	Keeping 55460 of 500000 = 11.1
Found 96572 geos from 35-RoadSegment2
Current Time is Fri Aug 31, 2018 21:52:12 for Analyzing 35	RoadSegment3	58103
	Keeping 61419 of 550000 = 11.2
Found 20679 geos from 35-RoadSegment3
Found 71094 of type RoadSegmen

Saved shape data to roads/RoadSegment-MajorRd-37-7-geos.p


--->>> Removing /Users/tgadf/Downloads/stateshapes/TRAN_37_North_Carolina_GU_STATEORTERRITORY directory <<<---
Unzipping /Users/tgadf/Downloads/stateshapes/TRAN_38_North_Dakota_GU_STATEORTERRITORY.zip
    Unzipping to /Users/tgadf/Downloads/stateshapes/TRAN_38_North_Dakota_GU_STATEORTERRITORY
Current Time is Fri Aug 31, 2018 21:57:10 for Analyzing 38	RailFeature	2469
Found 26187 geos from 38-RailFeature
Found 21173 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-38-7-data.p


There are 21173 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-38-7-geos.p


Current Time is Fri Aug 31, 2018 21:57:15 for Analyzing 38	RoadSegment	250000
	Keeping 13448 of 50000 = 26.9
	Keeping 26920 of 100000 = 26.9
	Keeping 40352 of 150000 = 26.9
	Keeping 53995 of 200000 = 27.0
	Keeping 67574 of 250000 = 27.0
Found 266505 geos from 38-RoadSegment
Current Time is Fri Aug

Saved shape data to roads/RoadSegment-Connection-40-7-data.p


There are 218 entries in the saved file.
Saved shape data to roads/RoadSegment-Connection-40-7-geos.p


--->>> Removing /Users/tgadf/Downloads/stateshapes/TRAN_40_Oklahoma_GU_STATEORTERRITORY directory <<<---
Unzipping /Users/tgadf/Downloads/stateshapes/TRAN_41_Oregon_GU_STATEORTERRITORY.zip
    Unzipping to /Users/tgadf/Downloads/stateshapes/TRAN_41_Oregon_GU_STATEORTERRITORY
Current Time is Fri Aug 31, 2018 22:03:08 for Analyzing 41	RailFeature	5207
Found 35192 geos from 41-RailFeature
Found 23885 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-41-7-data.p


There are 23885 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-41-7-geos.p


Current Time is Fri Aug 31, 2018 22:03:15 for Analyzing 41	RoadSegment	250000
	Keeping 5894 of 50000 = 11.8
	Keeping 12229 of 100000 = 12.2
	Keeping 19225 of 150000 = 12.8
	Keeping 24928 of 200000 = 12.5
	Keep

Current Time is Fri Aug 31, 2018 22:08:32 for Analyzing 45	RailFeature	3057
Found 34531 geos from 45-RailFeature
Found 28853 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-45-7-data.p


There are 28853 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-45-7-geos.p


Current Time is Fri Aug 31, 2018 22:08:39 for Analyzing 45	RoadSegment	250000
	Keeping 15733 of 50000 = 31.5
	Keeping 30779 of 100000 = 30.8
	Keeping 46546 of 150000 = 31.0
	Keeping 61697 of 200000 = 30.8
	Keeping 77297 of 250000 = 30.9
Found 263622 geos from 45-RoadSegment
Current Time is Fri Aug 31, 2018 22:09:15 for Analyzing 45	RoadSegment2	250000
	Keeping 92240 of 300000 = 30.7
	Keeping 107559 of 350000 = 30.7
	Keeping 122766 of 400000 = 30.7
	Keeping 138015 of 450000 = 30.7
	Keeping 153360 of 500000 = 30.7
Found 259933 geos from 45-RoadSegment2
Current Time is Fri Aug 31, 2018 22:09:46 for Analyzing 45	RoadSegment3	41724
Found 42514 geos 

Current Time is Fri Aug 31, 2018 22:13:32 for Analyzing 48	RoadSegment	250000
	Keeping 6388 of 50000 = 12.8
	Keeping 12668 of 100000 = 12.7
	Keeping 19139 of 150000 = 12.8
	Keeping 24811 of 200000 = 12.4
	Keeping 31304 of 250000 = 12.5
Found 80457 geos from 48-RoadSegment
Current Time is Fri Aug 31, 2018 22:14:02 for Analyzing 48	RoadSegment2	250000
	Keeping 37406 of 300000 = 12.5
	Keeping 43909 of 350000 = 12.5
	Keeping 50092 of 400000 = 12.5
	Keeping 56391 of 450000 = 12.5
	Keeping 62748 of 500000 = 12.5
Found 81266 geos from 48-RoadSegment2
Current Time is Fri Aug 31, 2018 22:14:31 for Analyzing 48	RoadSegment3	250000
	Keeping 68382 of 550000 = 12.4
	Keeping 74993 of 600000 = 12.5
	Keeping 81153 of 650000 = 12.5
	Keeping 87433 of 700000 = 12.5
	Keeping 93914 of 750000 = 12.5
Found 80702 geos from 48-RoadSegment3
Current Time is Fri Aug 31, 2018 22:15:01 for Analyzing 48	RoadSegment4	250000
	Keeping 100111 of 800000 = 12.5
	Keeping 106553 of 850000 = 12.5
	Keeping 112406 of 900000 = 

Current Time is Fri Aug 31, 2018 22:19:30 for Analyzing 51	TrailSegment	438
Found 39352 geos from 51-TrailSegment
Found 38291 of type TrailSegment/Trail


There are 1 entries in the saved file.
Saved shape data to roads/TrailSegment-Trail-51-7-data.p


There are 38291 entries in the saved file.
Saved shape data to roads/TrailSegment-Trail-51-7-geos.p


Current Time is Fri Aug 31, 2018 22:19:33 for Analyzing 51	RailFeature	5099
Found 45710 geos from 51-RailFeature
Found 34925 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-51-7-data.p


There are 34925 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-51-7-geos.p


Current Time is Fri Aug 31, 2018 22:19:40 for Analyzing 51	RoadSegment	250000
	Keeping 5845 of 50000 = 11.7
	Keeping 12305 of 100000 = 12.3
	Keeping 18857 of 150000 = 12.6
	Keeping 25105 of 200000 = 12.6
	Keeping 30972 of 250000 = 12.4
Found 84516 geos from 51-RoadSegment
Current Time is Fri Aug 

Unzipping /Users/tgadf/Downloads/stateshapes/TRAN_55_Wisconsin_GU_STATEORTERRITORY.zip
    Unzipping to /Users/tgadf/Downloads/stateshapes/TRAN_55_Wisconsin_GU_STATEORTERRITORY
Current Time is Fri Aug 31, 2018 22:24:44 for Analyzing 55	RailFeature	4639
Found 32915 geos from 55-RailFeature
Found 24529 of type RailFeature/Rail


There are 1 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-55-7-data.p


There are 24529 entries in the saved file.
Saved shape data to roads/RailFeature-Rail-55-7-geos.p


Current Time is Fri Aug 31, 2018 22:24:49 for Analyzing 55	RoadSegment	250000
	Keeping 11190 of 50000 = 22.4
	Keeping 21909 of 100000 = 21.9
	Keeping 32357 of 150000 = 21.6
	Keeping 43930 of 200000 = 22.0
	Keeping 54824 of 250000 = 21.9
Found 183356 geos from 55-RoadSegment
Current Time is Fri Aug 31, 2018 22:25:19 for Analyzing 55	RoadSegment2	250000
	Keeping 65419 of 300000 = 21.8
	Keeping 76218 of 350000 = 21.8
	Keeping 87601 of 400000 = 21.9
	Keeping 98522 of 450000 

In [None]:
if False:
    # Ave, Pkwy, Blvd, Hwy, Fwy
    cnts = Counter()
    for item in remainCntr.most_common():
        for val in item[0].split():
            cnts[val] += 1
        #print(item)

    for item in cnts.most_common(100):
        print(item)

# Create State Data

In [40]:
import pickle
prec=7
from glob import glob
from os.path import basename,splitext
vals     = glob("roads/*-{0}-geos.p".format(prec))
basevals = set([splitext(basename(x))[0].split('-')[0] for x in vals])
mtypes   = set([splitext(basename(x))[0].split('-')[1] for x in vals])
states   = set([splitext(basename(x))[0].split('-')[2] for x in vals])
geomap   = {}
records  = {}
ngeos    = 0
totalgeos = 0
for mtype in mtypes:
    for state in states:
        for baseval in basevals:
            try:
                recname = "roads/{0}-{1}-{2}-{3}-data.p".format(baseval, mtype, state, prec)
                recdata = pickle.load(open(recname, "rb"))
            except:
                print("Could not open {0}".format(recname))
                continue
            try:
                geoname = "roads/{0}-{1}-{2}-{3}-geos.p".format(baseval, mtype, state, prec)
                geodata = pickle.load(open(geoname, "rb"))
            except:
                print("Could not open {0}".format(geoname))
                continue
            ngeos = 0

            for geoid,geos in geodata.items():
                ngeos += len(geos)
                totalgeos += ngeos
                for geo in geos:
                    if geomap.get(geo) is None:
                        geomap[geo] = {}
                    geomap[geo][mtype] = geoid

            for geoid,rec in recdata.items():
                records[geoid] = rec['Name']
                break

    print("{0}\t{1}\t---> {2} <---".format(mtype,ngeos,len(geomap)))
         
print("Found {0} geos in total".format(totalgeos))
fname="geomap-{0}-Roads.p".format(prec)
print("Writing {0}".format(fname))
pickle.dump(geomap,  open(fname, "wb"))

fname="georec-{0}-Roads.p".format(prec)
print("Writing {0}".format(fname))
pickle.dump(records,  open(fname, "wb"))

Could not open roads/TrailSegment-MajorRd-33-7-data.p
Could not open roads/RailFeature-MajorRd-33-7-data.p
Could not open roads/TrailSegment-MajorRd-46-7-data.p
Could not open roads/RailFeature-MajorRd-46-7-data.p
Could not open roads/TrailSegment-MajorRd-19-7-data.p
Could not open roads/RailFeature-MajorRd-19-7-data.p
Could not open roads/TrailSegment-MajorRd-22-7-data.p
Could not open roads/RailFeature-MajorRd-22-7-data.p
Could not open roads/TrailSegment-MajorRd-72-7-data.p
Could not open roads/RailFeature-MajorRd-72-7-data.p
Could not open roads/TrailSegment-MajorRd-56-7-data.p
Could not open roads/RailFeature-MajorRd-56-7-data.p
Could not open roads/TrailSegment-MajorRd-35-7-data.p
Could not open roads/RailFeature-MajorRd-35-7-data.p
Could not open roads/TrailSegment-MajorRd-11-7-data.p
Could not open roads/RailFeature-MajorRd-11-7-data.p
Could not open roads/TrailSegment-MajorRd-25-7-data.p
Could not open roads/RailFeature-MajorRd-25-7-data.p
Could not open roads/TrailSegment-Maj

Could not open roads/RoadSegment-Rail-37-7-data.p
Could not open roads/TrailSegment-Rail-4-7-data.p
Could not open roads/RoadSegment-Rail-4-7-data.p
Could not open roads/TrailSegment-Rail-36-7-data.p
Could not open roads/RoadSegment-Rail-36-7-data.p
Could not open roads/TrailSegment-Rail-5-7-data.p
Could not open roads/RoadSegment-Rail-5-7-data.p
Could not open roads/TrailSegment-Rail-34-7-data.p
Could not open roads/RoadSegment-Rail-34-7-data.p
Could not open roads/TrailSegment-Rail-27-7-data.p
Could not open roads/RoadSegment-Rail-27-7-data.p
Could not open roads/TrailSegment-Rail-41-7-data.p
Could not open roads/RoadSegment-Rail-41-7-data.p
Could not open roads/TrailSegment-Rail-49-7-data.p
Could not open roads/RoadSegment-Rail-49-7-data.p
Could not open roads/TrailSegment-Rail-29-7-data.p
Could not open roads/RoadSegment-Rail-29-7-data.p
Could not open roads/TrailSegment-Rail-32-7-data.p
Could not open roads/RoadSegment-Rail-32-7-data.p
Could not open roads/TrailSegment-Rail-47-7-d

Could not open roads/TrailSegment-Highway-72-7-data.p
Could not open roads/RailFeature-Highway-72-7-data.p
Could not open roads/TrailSegment-Highway-56-7-data.p
Could not open roads/RailFeature-Highway-56-7-data.p
Could not open roads/TrailSegment-Highway-35-7-data.p
Could not open roads/RailFeature-Highway-35-7-data.p
Could not open roads/TrailSegment-Highway-11-7-data.p
Could not open roads/RailFeature-Highway-11-7-data.p
Could not open roads/TrailSegment-Highway-25-7-data.p
Could not open roads/RailFeature-Highway-25-7-data.p
Could not open roads/TrailSegment-Highway-31-7-data.p
Could not open roads/RailFeature-Highway-31-7-data.p
Could not open roads/TrailSegment-Highway-38-7-data.p
Could not open roads/RailFeature-Highway-38-7-data.p
Could not open roads/TrailSegment-Highway-17-7-data.p
Could not open roads/RailFeature-Highway-17-7-data.p
Could not open roads/TrailSegment-Highway-28-7-data.p
Could not open roads/RailFeature-Highway-28-7-data.p
Could not open roads/TrailSegment-Hig

Could not open roads/RailFeature-Trail-6-7-data.p
Could not open roads/RoadSegment-Trail-6-7-data.p
Could not open roads/RailFeature-Trail-51-7-data.p
Could not open roads/RoadSegment-Trail-51-7-data.p
Could not open roads/TrailSegment-Trail-15-7-data.p
Could not open roads/RailFeature-Trail-15-7-data.p
Could not open roads/RoadSegment-Trail-15-7-data.p
Could not open roads/TrailSegment-Trail-44-7-data.p
Could not open roads/RailFeature-Trail-44-7-data.p
Could not open roads/RoadSegment-Trail-44-7-data.p
Could not open roads/TrailSegment-Trail-50-7-data.p
Could not open roads/RailFeature-Trail-50-7-data.p
Could not open roads/RoadSegment-Trail-50-7-data.p
Could not open roads/TrailSegment-Trail-55-7-data.p
Could not open roads/RailFeature-Trail-55-7-data.p
Could not open roads/RoadSegment-Trail-55-7-data.p
Could not open roads/TrailSegment-Trail-39-7-data.p
Could not open roads/RailFeature-Trail-39-7-data.p
Could not open roads/RoadSegment-Trail-39-7-data.p
Could not open roads/TrailSe

Could not open roads/TrailSegment-Interstate-12-7-data.p
Could not open roads/RailFeature-Interstate-12-7-data.p
Could not open roads/TrailSegment-Interstate-42-7-data.p
Could not open roads/RailFeature-Interstate-42-7-data.p
Could not open roads/TrailSegment-Interstate-53-7-data.p
Could not open roads/RailFeature-Interstate-53-7-data.p
Could not open roads/TrailSegment-Interstate-2-7-data.p
Could not open roads/RailFeature-Interstate-2-7-data.p
Could not open roads/TrailSegment-Interstate-24-7-data.p
Could not open roads/RailFeature-Interstate-24-7-data.p
Could not open roads/TrailSegment-Interstate-21-7-data.p
Could not open roads/RailFeature-Interstate-21-7-data.p
Could not open roads/TrailSegment-Interstate-30-7-data.p
Could not open roads/RailFeature-Interstate-30-7-data.p
Could not open roads/TrailSegment-Interstate-8-7-data.p
Could not open roads/RailFeature-Interstate-8-7-data.p
Could not open roads/TrailSegment-Interstate-10-7-data.p
Could not open roads/RailFeature-Interstate

Could not open roads/TrailSegment-StateRte-38-7-data.p
Could not open roads/RailFeature-StateRte-38-7-data.p
Could not open roads/TrailSegment-StateRte-17-7-data.p
Could not open roads/RailFeature-StateRte-17-7-data.p
Could not open roads/TrailSegment-StateRte-28-7-data.p
Could not open roads/RailFeature-StateRte-28-7-data.p
Could not open roads/TrailSegment-StateRte-54-7-data.p
Could not open roads/RailFeature-StateRte-54-7-data.p
Could not open roads/TrailSegment-StateRte-26-7-data.p
Could not open roads/RailFeature-StateRte-26-7-data.p
Could not open roads/TrailSegment-StateRte-20-7-data.p
Could not open roads/RailFeature-StateRte-20-7-data.p
Could not open roads/TrailSegment-StateRte-48-7-data.p
Could not open roads/RailFeature-StateRte-48-7-data.p
Could not open roads/TrailSegment-StateRte-1-7-data.p
Could not open roads/RailFeature-StateRte-1-7-data.p
Could not open roads/TrailSegment-StateRte-45-7-data.p
Could not open roads/RailFeature-StateRte-45-7-data.p
Could not open roads/

In [46]:
geomap

{'dqcm0mn': {'StateRte': 'StateRte', 'Highway': 'Highway'},
 'dqcm1ze': {'StateRte': 'StateRte', 'Highway': 'Highway'},
 'dqchzff': {'StateRte': 'StateRte', 'Highway': 'Highway'},
 'dqcm1wv': {'StateRte': 'StateRte', 'MajorRd': 'MajorRd'},
 'dqcm0kg': {'StateRte': 'StateRte',
  'MajorRd': 'MajorRd',
  'Highway': 'Highway'},
 'dqchzg1': {'StateRte': 'StateRte',
  'MajorRd': 'MajorRd',
  'Highway': 'Highway'},
 'dqcm1zd': {'StateRte': 'StateRte', 'Highway': 'Highway'},
 'dqcm0kd': {'StateRte': 'StateRte',
  'MajorRd': 'MajorRd',
  'Highway': 'Highway'},
 'dqcm0k9': {'StateRte': 'StateRte',
  'MajorRd': 'MajorRd',
  'Highway': 'Highway'},
 'dqcm2bv': {'StateRte': 'StateRte', 'MajorRd': 'MajorRd'},
 'dqcm0tg': {'StateRte': 'StateRte',
  'MajorRd': 'MajorRd',
  'Highway': 'Highway'},
 'dqcm2b4': {'StateRte': 'StateRte', 'MajorRd': 'MajorRd'},
 'dqcm0hq': {'StateRte': 'StateRte', 'Highway': 'Highway'},
 'dqcjtug': {'StateRte': 'StateRte', 'MajorRd': 'MajorRd'},
 'dqcm0t0': {'StateRte': 'Stat

In [22]:
import pickle
prec=7

show=False
from os.path import basename, dirname
from glob import glob
mtype = "Road"
geofiles = glob(join(basedir, "Trans_RoadSegment*-*_Illinois-{0}-geos.p".format(prec)))
recfiles = glob(join(basedir, "Trans_RoadSegment*-*_Illinois-{0}-data.p".format(prec)))
print("Found {0} and {1} files".format(len(geofiles), len(recfiles)))

geomap  = {}
records = {"Interstate": {}, "StateRte": {}, "USRte": {}, "Highway": {}, "MajorRd": {}, "Connection": {}}

from collections import Counter
cnt = Counter()
for recfile in recfiles:
    recdata = pickle.load(open(recfile, "rb"))
    print(recfile)
    for geoid,recdata in recdata.items():
        if recdata[0]:
            records["Interstate"][geoid]  = "Interstate"
        if recdata[1]:
            records["USRte"][geoid]  = "USRte"
        if recdata[2]:
            records["StateRte"][geoid]  = "StateRte"
        if recdata[3]:
            records["Highway"][geoid]  = "Highway"
        if recdata[4]:
            records["MajorRd"][geoid]  = "MajorRd"
        if recdata[5]:
            records["Connection"][geoid]  = "Connection"
    print("Found {0} roads".format(len(records)))
     

ngeos = 0
for geofile in geofiles:
    geodata = pickle.load(open(geofile, "rb"))
    print(geofile)
    for geoid,geos in geodata.items():
        if not any([records["Interstate"].get(geoid), records["USRte"].get(geoid), records["StateRte"].get(geoid), records["CountyRd"].get(geoid), records["LocalRd"].get(geoid)]):
            continue
        ngeos += len(geos)
        for geo in geos:
            if geomap.get(geo) is None:
                geomap[geo] = {}
            for k in records.keys():
                if records[k].get(geoid) is not None:
                    geomap[geo][k] = geoid
    print("Found {0} roads".format(len(geomap)))


                
fname="geomap-{0}-{1}.p".format(prec, mtype)
print("Writing {0}".format(fname))
pickle.dump(geomap,  open(fname, "wb"))

fname="georec-{0}-{1}.p".format(prec, mtype)
print("Writing {0}".format(fname))
pickle.dump(records,  open(fname, "wb"))

Found 0 and 0 files
Writing geomap-7-Road.p
Writing georec-7-Road.p


# Create Individual Records

In [17]:
prec = 6
stdirs=[]
if True:
    #stdirs.append("County_2010Census_DP1")
    #stdirs.append("CBSA_2010Census_DP1")
    #stdirs.append("CSA_2010Census_DP1")
    #stdirs.append("Place_2010Census_DP1")
    stdirs.append("MetDiv_2010Census_DP1")
    #stdirs.append("CouSub_2010Census_DP1")
    stdirs.append("ZCTA_2010Census_DP1")
    #stdirs.append("Tract_2010Census_DP1")

In [31]:
%load_ext autoreload
%autoreload
from timeUtils import clock, elapsed
show=False


names=set()

#stdirs.append("CD111_2010Census_DP1")
#stdirs.append("SLDL_2010Census_DP1")
#stdirs.append("SLDU_2010Census_DP1")

#stdirs.append("ELSD_2010Census_DP1")
#stdirs.append("SCSD_2010Census_DP1")
#stdirs.append("UNSD_2010Census_DP1")
#stdirs.append("SubMCD_2010Census_DP1")

for stdir in stdirs:
    start,cmt = clock("Analyzing {0}".format(stdir))
    print("Loading shape file")
    sf = shapefile.Reader(join(basedir, stdir, stdir))
        
    mtype=stdir.split("_")[0]

    shapes = sf.shapes()
    Nshapes = len(shapes)
    modval  = 500
    if Nshapes > 10000:
        modval = 1000
    elif Nshapes > 5000:
        modval = 250
    elif Nshapes > 1000:
        modval = 100
    else:
        if Nshapes < 500:
            modval = 10
        else:
            modval = 50
    print("There are {0} shapes in this file.".format(len(shapes)))

    fields = sf.fields
    if show:
        print("Fields -> {0}".format(fields))

    ipop=None
    for i,val in enumerate(fields):
        if val[0] == "DP0010001":
            ipop = i-1
            break

    ihouse=None
    for i,val in enumerate(fields):
        if val[0] == "DP0130001":
            ihouse = i-1
            break

    ihouseocc=None
    for i,val in enumerate(fields):
        if val[0] == "DP0180001":
            ihouseocc = i-1
            break
            
    print("  Pop       -> {0}".format(ipop))
    print("  House     -> {0}".format(ihouse))
    print("  Occupancy -> {0}".format(ihouseocc))
    elapsed(start, cmt)
    
    geoShapeMap = {}
    shapeData   = {}
    import pickle
    from numpy import linspace
    start,_ = clock("Analyzing shape files")
    irec  = 0
    for shapeRec in sf.iterShapeRecords():
        ## Record
        record = shapeRec.record
        geoid = record[0]
        name  = record[1]
        memi  = record[2]
        area  = record[-1]
        pops  = record[ipop:ipop+19]
        house = record[ihouse:ihouse+15]
        occpy = record[ihouseocc:ihouseocc+9]
        
        #names.add(name)
        #continue
        
        if mtype == "CBSA":
            ptype = None
            if not isinstance(name,str):
                continue
            if name.find("Metro Area") != -1:
                memi = "Metro"
                name = name.replace("Metro Area", "").strip()
            elif name.find("Micro Area") != -1:
                memi = "Micro"
                name = name.replace("Micro Area", "").strip()
            else:
                print(name)
                1/0
        
        if mtype == "CSA":
            ptype = None
            if not isinstance(name,str):
                continue
            if name.find(" CSA") != -1:
                name = name.replace(" CSA", "").strip()
            else:
                print(name)
                1/0
        
        if mtype == "Place":
            ptype = None
            if not isinstance(name,str):
                continue
            name = name.replace("(balance)", "").strip()
            if name.endswith(" city"):
                memi = "City"
                name = name[:-5]
            elif name.endswith(" municipality"):
                memi = "Municipality"
                name = name[:-13]
            elif name.endswith(" borough"):
                memi = "Borough"
                name = name[:-8]
            elif name.endswith(" city and borough"):
                memi = "City/Borough"
                name = name[:-17]
            elif name.endswith(" town"):
                memi = "Town"
                name = name[:-5]
            elif name.endswith(" CDP"):
                memi = "CDP"
                name = name[:-4]
            elif name.endswith(" village"):
                memi = "Village"
                name = name[:-8]
            elif name.endswith(" unified government"):
                memi = "Unified Government"
                name = name[:-19]
            elif name.endswith(" consolidated government"):
                memi = "Consolidated Government"
                name = name[:-24]
            elif name.endswith(" metro government"):
                memi = "Metro Government"
                name = name[:-17]
            elif name.endswith(" metropolitan government"):
                memi = "Metropolitan Government"
                name = name[:-24]
            elif name.endswith(" urban county"):
                memi = "Urban County"
                name = name[:-13]
            elif name.endswith(" County"):
                memi = "County"
            else:
                continue
                
        
        if mtype == "MetDiv":
            ptype = None
            if not isinstance(name,str):
                continue
            if name.find("Metro Area") != -1:
                memi = "Metro"
                name = name.replace("Metro Area", "").strip()
            elif name.find("Micro Area") != -1:
                memi = "Micro"
                name = name.replace("Micro Area", "").strip()
            else:
                print(name)
                
        
        pop    = pops[0]
        try:
            young  = round(float(sum(pops[1:5]))/pop,2)
            adult  = round(float(sum(pops[5:12]))/pop,2)
            old    = round(float(sum(pops[12:]))/pop,2)
        except:
            young = 0.0
            adult = 0.0
            old   = 0.0
        
        nhouse  = house[0]
        try:
            ffamily = round(float(house[1])/nhouse,2)
        except:
            ffamily = 0.0
        nhouseunits = occpy[0]
        try:
            foccpy = round(float(occpy[1])/nhouseunits,2)
        except:
            foccpy = 0.0
        
        if show:
            print(record)
            print(pops)
            print(pop,young,adult,old)
            print(house)
            print(nhouse,ffamily)
            print(occpy)
            print(nhouseunits,foccpy)
            #print(pop)
            #print(young)
            #print(adult)
            #print(old)

        try:
            float(memi)
            shapeData[geoid] = {"Name": name, "Record": irec, "Area": area, "Type": None, "Housing": [nhouse,ffamily], "Occupancy": [nhouseunits, foccpy], "Pop": [pop,young,adult,old]}
        except:
            shapeData[geoid] = {"Name": name, "Record": irec, "Area": area, "Type": memi, "Housing": [nhouse,ffamily], "Occupancy": [nhouseunits, foccpy], "Pop": [pop,young,adult,old]}
            
            
        if show:
            print(shapeData[geoid])
            1/0
        Nsd = len(shapeData)
        if irec % modval == 0:
            elapsed(start, "{0}/{1}\t{2}".format(irec,Nshapes,name), showTime=False)


    if len(names) == 0:
        fname = "{0}-{1}-data.p".format(stdir, prec)
        elapsed(start, "Analyzed {0} shapes".format(Nshapes))
        print("There are {0} entries in the saved file.".format(len(shapeData)))
        pickle.dump(shapeData, open(fname, "wb"))
        print("Saved shape data to {0}".format(fname))
        print("\n\n")
    
for name in names:
    print(name)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


ZeroDivisionError: division by zero

# Create Individual Geos

In [11]:
for stdir in stdirs:
    start,cmt = clock("Analyzing {0}".format(stdir))
    print("Loading shape file")
    sf = shapefile.Reader(join(basedir, stdir, stdir))
    skips=set()
        
    mtype=stdir.split("_")[0]

    shapes = sf.shapes()
    Nshapes = len(shapes)
    modval  = 500
    if Nshapes > 10000:
        modval = 1000
    elif Nshapes > 5000:
        modval = 500
    elif Nshapes > 1000:
        modval = 200
    else:
        if Nshapes < 500:
            modval = 10
        else:
            modval = 50
    print("There are {0} shapes in this file.".format(len(shapes)))
    irec  = 0
    geoShapeMap={}
    ngeos = 0
    for shapeRec in sf.iterShapeRecords():
        ## Record
        record = shapeRec.record
        geoid = record[0]
        
        ## Shape
        shape   = shapeRec.shape
        poly    = Polygon(shape.points)
        center  = poly.centroid
        pnt     = center
        long0   = center.x
        lat0    = center.y
        geo     = geohash.encode(latitude=lat0, longitude=long0, precision=prec)
        geos    = set([geo])
        ignores = set()
        debug=False
        if irec % 100 == 0:
            debug=True
        addShapeGeos(irec, Nshapes, shape, prec, geos, debug=debug)
        nAddGeos = len(geos)
        irec += 1
        
        if irec % modval == 0:
            elapsed(start, "{0}/{1}\t{2} ({3})".format(irec,Nshapes,ngeos,len(skips)), showTime=False)

        new = False
        n   = 0
        if False:
            for geoval,gval in lookup.items():
                if poly.contains(Point(gval[1],gval[0])) is True:
                    dn = len(geos)
                    addGeos(geos, geo)
                    n += len(geos) - dn
                    new = True
                    skips.add(geo)
            if new:
                print("  Added {0} unknown geohashs to {1}".format(n, geoid))
            
        if False:
            if groupings.get(geo[:3]) is not None:
                if groupings[geo[:3]].get(geo[:4]) is not None:
                    print("  Checking {0}".format(geo[:4]))
                    for geoval in groupings[geo[:3]][geo[:4]]:
                        gval = lookup[geoval]
                        #print(gval[1],gval[0])
                        if poly.contains(Point(gval[1],gval[0])) is True:
                            dn = len(geos)
                            addGeos(geos, geo)
                            n += len(geos) - dn
                            new = True
                            skips.add(geo)
            if new:
                print("  Added {0} unknown geohashs to {1}".format(n, geoid))
                            
        geoShapeMap[geoid] = geos
        ngeos += len(geos)
        
    fname = "{0}-{1}-geos.p".format(stdir, prec)
    elapsed(start, "Analyzed {0} shapes".format(Nshapes))
    print("There are {0} entries in the saved file.".format(len(geoShapeMap)))
    print("There are {0} geos in the saved file.".format(ngeos))
    pickle.dump(geoShapeMap, open(fname, "wb"))
    print("Saved shape data to {0}".format(fname))
    print("\n\n")

NameError: name 'stdirs' is not defined

# Create Final Geo Record Data

In [3]:
geomap={}
from os.path import exists
import pickle
missings={}
for stdir in stdirs:
    print("---> {0}".format(stdir))
    fname = '{0}-{1}-data.p'.format(stdir, prec)
    print(exists(fname))
    data = {}
    statedata = {}
    try:
        stdata = pickle.load(open(fname, 'rb'))
    except:
        print("Could not open {0}".format(fname))
        continue
        
    mtype=stdir.split("_")[0]

    for geoid,geodata in stdata.items():
        pop     = geodata["Pop"]
        name    = geodata["Name"]
        area    = geodata["Area"]
        ptype   = geodata["Type"]
        housing = geodata["Housing"]
        occupy  = geodata["Occupancy"]
        
        data[geoid] = {"Name": name, "Area": area, "Type": ptype, "Housing": housing, "Occupancy": occupy, "Population": pop}
        if mtype == "County":
            statedata[geoid[:2]] = {"Name": name}
            
    print(len(data))
    fname="geo{0}-{1}.p".format(mtype,prec)
    print("Writing {0}".format(fname))
    #saveJoblib(data, fname)
    pickle.dump(data,  open(fname, "wb"))
    
    if len(statedata) > 0:
        print(len(statedata))
        fname="geo{0}-{1}.p".format("State",prec)
        print("Writing {0}".format(fname))
        #saveJoblib(data, fname)
        pickle.dump(statedata,  open(fname, "wb"))    

---> Tract_2010Census_DP1
True
74002
Writing geoTract-5.p
---> ZCTA_2010Census_DP1
True
33120
Writing geoZCTA-5.p
---> County_2010Census_DP1
True
3221
Writing geoCounty-5.p
52
Writing geoState-5.p
---> CBSA_2010Census_DP1
True
951
Writing geoCBSA-5.p
---> CSA_2010Census_DP1
True
127
Writing geoCSA-5.p
---> Place_2010Census_DP1
True
29246
Writing geoPlace-5.p
---> MetDiv_2010Census_DP1
True
29
Writing geoMetDiv-5.p
---> CouSub_2010Census_DP1
True
36642
Writing geoCouSub-5.p


# Create Final Merged GeoMap

In [56]:
geomap={}
for stdir in stdirs:
    fname = '{0}-{1}-geos.p'.format(stdir, prec)
    data = {}
    statedata = {}
    try:
        stdata = pickle.load(open(fname, 'rb'))
    except:
        print("Could not open {0}".format(fname))
        continue
        
    mtype=stdir.split("_")[0]

    print("{0}\t{1}".format(mtype,len(stdata)))
    ngeos = 0
    for geoid, geos in stdata.items():
        ngeos += len(geos)
        for geo in geos:
            if geomap.get(geo) is None:
                geomap[geo] = {}
            if geomap[geo].get(mtype) is None:
                geomap[geo][mtype] = set()
            if mtype == "County":
                geomap[geo]["State"] = set()
            geomap[geo][mtype].add(geoid)
            if mtype == "County":                
                geomap[geo]["State"].add(geoid[:2])

    print("{0}\t{1}\t---> {2} <---".format(mtype,ngeos,len(geomap)))
                
fname="geomap-{0}.p".format(prec)
print("Writing {0}".format(fname))
pickle.dump(geomap,  open(fname, "wb"))

County	3221
County	2417994	---> 1745966 <---
CBSA	955
CBSA	632680	---> 1749584 <---
CSA	128
CSA	184734	---> 1751589 <---
Place	29514
Place	824825	---> 1751713 <---
MetDiv	29
MetDiv	23493	---> 1751726 <---
CouSub	36642
CouSub	3442709	---> 1901975 <---
ZCTA	33120
ZCTA	2322527	---> 1902131 <---
Tract	74002
Tract	4394178	---> 1928486 <---
Writing geomap-5.p


# Rerun Geos for overlapping geohashs

In [5]:
fname="geomap-{0}.p".format(prec)
import pickle
geomap = pickle.load(open(fname, "rb"))
overlaps = {}
for geo,geodata in geomap.items():
    for mtype,geoids in geodata.items():
        if len(geoids) > 1:
            for geoid in geoids:
                if overlaps.get(mtype) is None:
                    overlaps[mtype] = set()
                overlaps[mtype].add(geoid)

174949


In [8]:
help(shapely)

NameError: name 'shapely' is not defined