# HERE POI Shapefile Decoding

In [1]:
## Basic stuff
#%load_ext autoreload
#%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))


## Python Version
import sys
print("Python: {0}".format(sys.version))


## Install
import shapefile
import geohash
from timeUtils import clock, elapsed
from shapely.geometry.polygon import Polygon
from shapely.geometry import Point
from random import uniform
from fsUtils import isFile
from ioUtils import showSize, saveJoblib
from geoUtils import *
from geospatialUtils import saveGeoData, getBB
import pickle
from glob import glob
from os.path import basename,splitext,join
from collections import Counter

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

Python: 3.6.5 |Anaconda custom (x86_64)| (default, Apr 26 2018, 08:42:37) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2018-11-15 10:51:34.824158


In [2]:
# Global Params
basedir="/Users/tgadf/Downloads/here"
prec=7
mainkey="HEREPOI"

# Load HERE POI Data

In [None]:
from pandas import DataFrame
from os.path import join
import pickle
here_filename = join(basedir, "HEREdata.p")
if not isFile(here_filename):
    raise ValueError("HERE data {0} does not exist".format(here_filename))
print("Loading HERE POI data from {0}".format(here_filename))
here_data = pickle.load(open(here_filename, "rb"))
here_data['lat'] = here_data['lat']/1e5
here_data['lon'] = here_data['lon']/1e5
use_here_data = here_data[here_data['iso_country_code'] == "USA"]

In [None]:
fastfood=["ConvenienceStore", "CoffeeShop"]
restaurant=["Restaurant"]
homefood=["GroceryStore", "Pharmacy"]
lodging=["Hotel","SkiResort"]
commercial=["ClothingStore","ConsumerElectronicsStore","Bookstore", "DepartmentStore", "Shopping", 'OfficeSupplyandServiceStore', 'HomeSpecialtyStore']
commercial+=['SportingGoodStore','SpecialtyStore', "HomeImprovementandHardwareStore", "Bank", "ATM"]
auto=['AutoDealerships', 'AutomobileClub', 'AutoServiceandMaintenance', 'RentalCarAgency', 'TruckDealership', 'MotorcycleDealership']
fuel=['PetrolandGasolineStation']
cycle=['BicycleParking', 'BicycleService', 'BicycleSharingLocation']
municipal=["Library", "PostOffice", "PoliceStation", "CityHall","CivicandCommunityCentre", "ConventionandExhibitionCentre", "CourtHouse"]
industrial=["CargoCentre", "BusinessFacility"]
building=["Embassy", 'MilitaryBase']
medical=["Hospital"]
attraction=["AmusementPark", "AnimalPark", "Museum", "TouristAttraction", "TouristInformation", "Winery","PerformingArts","HistoricalMonument"]
sport=["GolfCourse"]
entertainment=["Cinema", "Casino"]
transit=['CommuterRailStation', 'BusStation', 'FerryTerminal', 'TransportationService', 'TrainStation', 'Airport', 'TaxiStand', 'HighwayExit', "WeighStation"]
recreation=["SportsComplex", "Trailhead", "Dock", "Marina", "Campground","ParkandRecreationArea"]
school=["School"]
college=["HigherEducation"]
parking=['ParkandRide', 'ParkingGarageandHouse', 'ParkingLot', 'RestArea', 'TruckStopandPlaza', 'TruckParking', 'BorderCrossing', 'DeliveryEntrance']

# Create Individual HERE Data

In [None]:
shapeData   = {}
geoShapeMap = {}
ngeos = {}
shapeval = 'here'
Nshapes = 0
catIDs_filename = join(basedir, "catIDs.p")
if not isFile(catIDs_filename):
    raise ValueError("Categery IDs data {0} does not exist".format(catIDs_filename))

catIDs = pickle.load(open(catIDs_filename, "rb"))
for row,rowdata in use_here_data.iterrows():
    if Nshapes % 250000 == 0 and Nshapes > 0:
        print(row,Nshapes)
        break
    Nshapes += 1
    geo    = geohash.encode(latitude=rowdata['lat'], longitude=rowdata['lon'], precision=7)
    irec   = row
    catID  = str(int(rowdata['cat_id']))
    geoid  = catID
    name   = catIDs.get(catID)
    #geoid  = rowdata['poi_id']
    #htype  = rowdata['cat_id']
    
    fclass = name
    if fclass in fastfood:
        fclass = "fastfood"
    elif fclass in restaurant:
        fclass = "restaurant"
    elif fclass in homefood:
        fclass = "grocery"
    elif fclass in lodging:
        fclass = "lodging"
    elif fclass in commercial:
        fclass = "commercial"
    elif fclass in auto:
        fclass = "auto"
    elif fclass in municipal:
        fclass = "municipal"
    elif fclass in industrial:
        fclass = "industrial"
    elif fclass in building:
        fclass = "building"
    elif fclass in medical:
        fclass = "medical"
    elif fclass in attraction:
        fclass = "attraction"
    elif fclass in sport:
        fclass = "sport"
    elif fclass in entertainment:
        fclass = "entertainment"
    elif fclass in recreation:
        fclass = "recreation"
    elif fclass in school:
        fclass = "school"
    elif fclass in college:
        fclass = "college"
    elif fclass in cycle:
        fclass = "cycling"
    elif fclass in transit:
        fclass = "transit"
    elif fclass in parking:
        fclass = "parking"
    elif fclass in fuel:
        fclass = "fuel"
    else:
        raise ValueError("fClass {0} was not found!".format(fclass))    
    
    catID = fclass
    geoid = fclass
    name  = fclass
    
    if name is None:
        print(rowdata)
        1/0
    if shapeData.get(catID) is None:
        shapeData[catID] = {}
        shapeData[catID][geoid]   = {"Name": name}
        geoShapeMap[catID] = {}
        geoShapeMap[catID][geoid] = set()
    geoShapeMap[catID][geoid].add(geo)
    
for catID,catData in shapeData.items():
    shapeval = catID
    ngeos = len(geoShapeMap[catID][catID])
    print("Found {0} of type {1}".format(ngeos,shapeval))
    saveGeoData(shapeData[catID], geoShapeMap[catID], Nshapes, ngeos, join(basedir, "{0}-{1}".format(shapeval, prec)))

# Combine HERE Data Into Standard Format

In [3]:
vals    = glob(join(basedir, "*-{0}-geos.p".format(prec)))
vals    = [splitext(basename(x))[0].split('-')[0] for x in vals]
geomap  = {}
records = {}
for mtype in vals:
    try:
        recdata = pickle.load(open(join(basedir, "{0}-{1}-data.p".format(mtype, prec)), "rb"))
        geodata = pickle.load(open(join(basedir, "{0}-{1}-geos.p".format(mtype, prec)), "rb"))
    except:
        continue
    ngeos = 0
    for geoid,geos in geodata.items():
        ngeos += len(geos)
        for geo in geos:
            if geomap.get(geo) is None:
                geomap[geo] = Counter()
            geomap[geo][mtype] += 1

    for geoid,rec in recdata.items():
        records[geoid] = rec['Name']
        break
                
    print("{0}\t{1}\t---> {2} <---".format(mtype,ngeos,len(geomap)))


for geo in geomap.keys():
    geomap[geo] = dict(geomap[geo])
    
fname="geomap-{0}-{1}.p".format(prec, mainkey)
print("Writing {0}".format(fname))
saveJoblib(geomap, fname)
showSize(fname)

fname="georec-{0}-{1}.p".format(prec, mainkey)
print("Writing {0}".format(fname))
saveJoblib(records, fname)
showSize(fname)

attraction	3788	---> 3788 <---
auto	27999	---> 31746 <---
building	185	---> 31927 <---
college	563	---> 32472 <---
commercial	56562	---> 86953 <---
cycling	20	---> 86969 <---
entertainment	729	---> 87538 <---
fastfood	13191	---> 98203 <---
fuel	11419	---> 101956 <---
grocery	11414	---> 110012 <---
industrial	1379	---> 111159 <---
lodging	7050	---> 117697 <---
medical	883	---> 118566 <---
municipal	6634	---> 124510 <---
parking	3080	---> 127028 <---
recreation	9517	---> 136103 <---
restaurant	43907	---> 168426 <---
school	11816	---> 179779 <---
sport	1463	---> 181179 <---
transit	5915	---> 186738 <---
Writing geomap-7-HEREPOI.p
  --> This file is 2.0MB.
  --> This file is 2.0MB.
Writing georec-7-HEREPOI.p
  --> This file is 238B.
  --> This file is 238B.


# Folium Plots (if needed)

In [None]:
import folium
m = folium.Map(location=[40, -95], zoom_start=5)
data = pickle.load(open(join(basedir, "parking-{0}-geos.p".format(prec)), "rb"))
for name,geos in data.items():
    for geo in geos:
        points = getBB(geo, istuple=True)
        folium.PolyLine(points, color='darkred', weight=2, opacity=1).add_to(m)    
data = pickle.load(open(join(basedir, "college-{0}-geos.p".format(prec)), "rb"))
for name,geos in data.items():
    for geo in geos:
        points = getBB(geo, istuple=True)
        folium.PolyLine(points, color='darkblue', weight=2, opacity=1).add_to(m)    
data = pickle.load(open(join(basedir, "grocery-{0}-geos.p".format(prec)), "rb"))
for name,geos in data.items():
    for geo in geos:
        points = getBB(geo, istuple=True)
        folium.PolyLine(points, color='darkgreen', weight=2, opacity=1).add_to(m)

In [None]:
map_filename = join(basedir, "map.html")
m.save(map_filename)
showSize(map_filename)