# OSM Shapefile Decoding

In [1]:
## Basic stuff
#%load_ext autoreload
#%autoreload
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("""<style>div.output_area{max-height:10000px;overflow:scroll;}</style>"""))


## Python Version
import sys
print("Python: {0}".format(sys.version))


## Install
import shapefile
import geohash
from timeUtils import clock, elapsed
from shapely.geometry.polygon import Polygon
from shapely.geometry import Point
from random import uniform
from fsUtils import isFile
from ioUtils import showSize, saveJoblib
from geoUtils import *
from geospatialUtils import saveGeoData, getBB, rmZipDir, getGeos
import pickle
from glob import glob
from os.path import basename,splitext,join
from collections import Counter

import datetime as dt
start = dt.datetime.now()
print("Notebook Last Run Initiated: "+str(start))

Python: 3.6.5 |Anaconda custom (x86_64)| (default, Apr 26 2018, 08:42:37) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]
Notebook Last Run Initiated: 2018-11-19 14:35:19.177408


In [1]:
# Global Params
basedir="/Users/tgadf/Downloads/osm"
prec=7
mainkey="OSM"

# Parse OSM Data

In [10]:
from ioUtils import loadJoblib
buildingLookup = loadJoblib("buildingLookup.p")
buildingMap    = loadJoblib("buildingMap.p")
buildingConv   = loadJoblib("buildingConv.p")

fastfood=["fastfood", "foodcourt","convenience", "cafe"]
restaurant=["restaurant"]
homefood=["greengrocer", "pharmacy", "supermarket", "chemist"]
lodging=["hotel", "motel", "hostel","chalet","bedandbreakfast","guesthouse", "caravansite"]
commercial=["bookshop","butcher","hairdresser", "furnitureshop", "doityourself", "gardencentre", "newsagent", "toyshop","mall","laundry","mobilephoneshop","bank","atm"]
commercial+=["bicyclerental","bicycleshop","bakery","beautyshop","videoshop","outdoorshop", 'jeweller','kiosk', 'shoeshop','sportsshop', "travelagent"]
commercial+=['florist','giftshop', "departmentstore", "computershop"]
vending=['vendingany','vendingcigarette','vendingmachine','vendingparking']
commercial+=vending
auto=['cardealership','carrental','carrepair','carsharing','carwash']
municipal=["firestation", "prison", "library", "postoffice", "postbox", "police", "townhall","communitycentre","artscentre", "publicbuilding", "courthouse"]
industrial=["wastewaterplant", "windmill","watermill","waterworks", "recycling", "recyclingclothes", "recyclingglass", "recyclingmetal", "recyclingpaper", "commstower", 'watertower','waterwell', "tower"]
building=["embassy"]
medical=["hospital", "dentist", "doctors", "optician", "veterinary","nursinghome"]
attraction=["attraction","fountain", "museum", "touristinfo", "viewpoint", "zoo","themepark","monument","picnicsite","fort","castle","battlefield","archaeological","memorial", "lighthouse", "observationtower", "artwork"]
religious=["graveyard","waysidecross","waysideshrine","ruins"]
sport=["golfcourse", "huntingstand", "icerink", "sportscentre", "track", "swimmingpool"]
entertainment=["bar", "pub", "nightclub", "theatre", "cinema"]
recreation=["stadium", "campsite","biergarten","dogpark","park", "pitch", "playground"]
school=["kindergarten","school"]
college=["college","university"]
general=['alpinehut','bench','beverages','camerasurveillance','clothes','drinkingwater','shelter','stationery','telephone','toilet','wastebasket']                

increase={}

def parseOSM(dirval, prec, key=""):
    from os.path import basename, dirname, join, splitext
    from glob import glob
    from collections import Counter
    state = basename(dirval).replace("-latest-free.shp", "")
    useUnique = False
    osmpath = dirval
    osms     = [splitext(basename(x))[0] for x in glob(join(osmpath, "*{0}*.shp".format(key)))]
    blgCntr = Counter()
    nameCntr = Counter()
    nameLookup = {}
    
    
    shapeData   = {}
    geoShapeMap = {}
    ngeos     = 0
    totalgeos = 0
    show      = False
    cntr      = Counter()
        
    print("Found the following {0} shape files: {1}".format(len(osms), ", ".join(osms)))
    for osm in osms:
        print(osm)
        shapeval = osm
        
        files = glob(join(basedir, "{0}-{1}-{2}*".format(state, osm, prec)))
        if len(files) > 0:
            print("Already processed this one...")
            #continue

        try:
            sf = shapefile.Reader(join(osmpath, osm))
        except:
            raise ValueError("No shapefile!")
        fields      = sf.fields
        Nshapes   = len(sf.shapes())

        start,cmt = clock("\n\nAnalyzing {0}\t{1}".format(shapeval, Nshapes))
        if show:
            print("Fields -> {0}".format(fields))

        irec = -1
        for shapeRec in sf.iterShapeRecords():
            irec += 1
            if irec % 2500 == 0 and irec > 0:
                if True:
                    print("Processed {0}/{1} records. Found {2} geos so far...".format(irec, Nshapes, ngeos))

            ## Record
            record = shapeRec.record
            if show:
                print("Stopping at 1st record: {0}".format(record))
                break
            if osm in ["gis_osm_buildings_a_free_1"]:
                if useUnique:
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = "building"
                fclass = record[2]
                name   = record[3]
                ftype  = record[4]
                if buildingLookup.get(name) is not None:
                    fclass = buildingLookup[name]
                    geoid  = fclass
                else:
                    if len(ftype) > 0:
                        fclass = ftype
                        geoid  = fclass
                    else:
                        found=None
                        for k,v in buildingMap.items():
                            if k in name.split():
                                fclass = v
                                found  = v
                                geoid  = fclass
                                break
                            if name.find("{0} ".format(k)) != -1 or name.find(" {0}".format(k)) != -1:
                                fclass = v
                                found  = v
                                geoid  = fclass
                                break
                                
                        if found is None:
                            nameCntr[name] += 1
                            if nameLookup.get(name) is None:
                                if len(ftype) > 0:
                                    nameLookup[name] = ftype
                            for val in name.split():
                                blgCntr[val] += 1
                if buildingConv.get(geoid):
                    geoid = buildingConv[geoid]
                if buildingConv.get(fclass):
                    fclass = buildingConv[fclass]
            elif osm in ["gis_osm_pofw_a_free_1", "gis_osm_pofw_free_1"]:
                if useUnique:
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                #name   = record[3]
                if fclass.find("christian_") != -1:
                    fclass = "christian"
                elif fclass.find("muslim_") != -1:
                    fclass = "muslim"
                else:
                    fclass = fclass
                geoid = fclass
            elif osm in ["gis_osm_traffic_a_free_1", "gis_osm_traffic_free_1"]:
                if useUnique:
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                name   = record[3]
                if fclass.find("bicycle") != -1:
                    fclass = "bicycle"
                elif fclass.find("parking_") != -1:
                    fclass = "parking"
                elif fclass.find("mini_roundabout") != -1 or fclass.find("turning_circle") != -1:
                    fclass = "roundabout"
                elif fclass.find("speed_camera") != -1:
                    fclass = "speedcamera"
                elif fclass.find("motorway_junction") != -1:
                    fclass = "ramp"
                elif fclass.find("lock_gate") != -1:
                    fclass = "gate"
                elif fclass.find("traffic_signals") != -1:
                    fclass = "signal"
                elif fclass.find("street_lamp") != -1:
                    fclass = "lamp"
                else:
                    fclass = fclass
                    
                if fclass not in ["parking","fuel"]:
                    continue
                geoid = fclass
            elif osm in ["gis_osm_transport_a_free_1", "gis_osm_transport_free_1"]:
                if useUnique:
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                name   = record[3]
                if fclass.find("railway_") != -1:
                    fclass = fclass.replace("railway", "rail")
                fclass = fclass.replace("_", "")
                
                if fclass.startswith("bus"):
                    fclass = "bus"
                elif fclass.startswith("rail"):
                    fclass = "rail"
                elif fclass.startswith("ferry"):
                    fclass = "ferry"
                elif fclass.startswith("tram"):
                    fclass = "tram"
                elif fclass.startswith("taxi"):
                    fclass = "taxi"
                else:
                    raise ValueError("fClass {0} was not recognized!".format(fclass))
                
                geoid = fclass
            elif osm in ["gis_osm_pois_a_free_1", "gis_osm_pois_free_1"]:
                if useUnique:
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                name   = record[3]
                fclass = fclass.replace("_", "")
                
                if fclass in fastfood:
                    fclass = "fastfood"
                elif fclass in restaurant:
                    fclass = "restaurant"
                elif fclass in homefood:
                    fclass = "grocery"
                elif fclass in lodging:
                    fclass = "lodging"
                elif fclass in commercial:
                    fclass = "commercial"
                elif fclass in auto:
                    fclass = "auto"
                elif fclass in municipal:
                    fclass = "municipal"
                elif fclass in industrial:
                    fclass = "industrial"
                elif fclass in building:
                    fclass = "building"
                elif fclass in medical:
                    fclass = "medical"
                elif fclass in attraction:
                    fclass = "attraction"
                elif fclass in religious:
                    fclass = "religious"
                elif fclass in sport:
                    fclass = "sport"
                elif fclass in entertainment:
                    fclass = "entertainment"
                elif fclass in recreation:
                    fclass = "recreation"
                elif fclass in school:
                    fclass = "school"
                elif fclass in college:
                    fclass = "college"
                elif fclass in general:
                    fclass = "public"
                else:
                    raise ValueError("fClass {0} was not found!".format(fclass))
                geoid = fclass
            elif osm in ["gis_osm_water_a_free_1", "gis_osm_waterways_free_1"]:
                if useUnique:                    
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                #name   = record[3]
                geoid = fclass
            elif osm in ["gis_osm_places_a_free_1", "gis_osm_places_free_1"]:
                if useUnique:                    
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                #name   = record[3]
                geoid = fclass
            elif osm in ["gis_osm_natural_a_free_1", "gis_osm_natural_free_1"]:
                if useUnique:                    
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                #name   = record[3]
                geoid = fclass
            elif osm in ["gis_osm_landuse_a_free_1", "gis_osm_landuse_free_1"]:
                if useUnique:                    
                    geoid  = record[0]
                else:
                    code   = record[1]
                    geoid  = code
                fclass = record[2]
                #name   = record[3]
                geoid = fclass
            else:
                print("Not recording {0}".format(osm))
                break
#            elif osm in ["gis_osm_places_a_free_1", "gis_osm_places_free_1"]:
#                if useUnique:
#                    geoid  = record[0]
#                else:
#                    code   = record[1]
#                    geoid  = code
#                fclass = record[2]
#                pop    = record[3]
#                name   = record[4]

            shapeData[geoid] = {"Name": fclass, "Record": irec}
            cntr[geoid] += 1
            #print(irec,fclass,geoid)
            #continue
            #if irec > 1000: 1/0

            record = shapeRec.record
            shape  = shapeRec.shape
            points = shape.points
            
            geos = getGeos(shape, prec=prec, linear=False, returnKeys=True, debug=False)

            if False:
                useLinear = False
                if len(points) == 1 or useLinear is True:
                    geos = getGeos(shape, prec=prec, linear=True, returnKeys=True)
                    useLinear = True
                    geos = addLinearGeos(irec, Nshapes, shape, prec, debug=False)
                else:
                    geos = getInitGeo(shape)
                    geos = addShapeGeos(irec, Nshapes, shape, prec, geos, debug=False)
                    if geos is None:
                        useLinear = True
                        geos = addLinearGeos(irec, Nshapes, shape, prec, debug=False) 

            if geoShapeMap.get(geoid) is None:
                geoShapeMap[geoid] = {}
            for geo in geos:
                geoShapeMap[geoid][geo] = 1
            #geoShapeMap[geoid] = geoShapeMap[geoid].union(geos)
            ngeos += len(geos)

        print("Finished: {0}".format(cntr.most_common(100)))
        print("\n")
            
    found = False
    for k,v in cntr.most_common():
        if k.find(" ") != -1 or k.find(";") != -1 or k[0].isupper():
            print("buildingConv[\"{0}\"] = \"{1}\"".format(k, k))
            found = True
    print("\n")
    for k,v in nameCntr.most_common(200):
        print("commerce[\"{0}\"] = \"{1}\"".format(k, nameLookup.get(k)))
    if len(shapeData) > 0:
        for geoid in geoShapeMap.keys():
            geoShapeMap[geoid] = list(geoShapeMap[geoid].keys())
        print("Found {0} geos from {1}".format(ngeos, shapeval))
        saveGeoData(shapeData, geoShapeMap, Nshapes, ngeos, join(basedir, "{0}-{1}-{2}".format(state, key, prec)))

In [15]:
import zipfile
from glob import glob
from os import mkdir
from os.path import splitext, basename, dirname, join, exists
zipfiles = glob(join(basedir, "*.zip"))
for zipname in zipfiles:
    statedir = dirname(zipname)
    name     = splitext(basename(zipname))[0]
    dirval   = join(statedir, name) 
    if exists(dirval):
        print("Directory {0} already exists".format(dirval))
        rmZipDir(dirval)
        continue
    try:
        mkdir(dirval)
    except:
        pass
    zip_ref = zipfile.ZipFile(zipname, 'r')
    print("Extract {0}".format(zipname))
    zip_ref.extractall(dirval)
    zip_ref.close()
    
    #try:
    if True:
        parseOSM(dirval, 7, 'place')
    #except:
        rmZipDir(dirval)
    
    rmZipDir(dirval)

Extract /Users/tgadf/Downloads/osm/alabama-latest-free.shp.zip
Found the following 2 shape files: gis_osm_places_a_free_1, gis_osm_places_free_1
gis_osm_places_a_free_1
Current Time is Mon Nov 19, 2018 12:58:03 for 

Analyzing gis_osm_places_a_free_1	1182
Finished: [('city', 531), ('town', 381), ('island', 85), ('county', 66), ('village', 56), ('locality', 44), ('hamlet', 16), ('farm', 3)]


gis_osm_places_free_1
Current Time is Mon Nov 19, 2018 12:58:14 for 

Analyzing gis_osm_places_free_1	6154
Processed 2500/6154 records. Found 340090 geos so far...
Processed 5000/6154 records. Found 342590 geos so far...
Finished: [('hamlet', 5472), ('city', 543), ('town', 464), ('village', 387), ('island', 241), ('county', 133), ('locality', 70), ('suburb', 16), ('farm', 10)]




Found 343744 geos from gis_osm_places_free_1


There are 9 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/alabama-place-7-data.p


There are 343744 entries in the saved file.
Saved shape data to

Found the following 2 shape files: gis_osm_places_a_free_1, gis_osm_places_free_1
gis_osm_places_a_free_1
Current Time is Mon Nov 19, 2018 12:59:46 for 

Analyzing gis_osm_places_a_free_1	20
Finished: [('suburb', 17), ('island', 2), ('town', 1)]


gis_osm_places_free_1
Current Time is Mon Nov 19, 2018 12:59:46 for 

Analyzing gis_osm_places_free_1	40
Finished: [('suburb', 49), ('island', 5), ('locality', 3), ('town', 1), ('national_capital', 1), ('farm', 1)]




Found 743 geos from gis_osm_places_free_1


There are 6 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/district-of-columbia-place-7-data.p


There are 743 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/district-of-columbia-place-7-geos.p


  ---->>> Removed /Users/tgadf/Downloads/osm/district-of-columbia-latest-free.shp
Extract /Users/tgadf/Downloads/osm/florida-latest-free.shp.zip
Found the following 2 shape files: gis_osm_places_a_free_1, gis_osm_places_free_1
gis_osm_plac

Found the following 2 shape files: gis_osm_places_a_free_1, gis_osm_places_free_1
gis_osm_places_a_free_1
Current Time is Mon Nov 19, 2018 13:01:09 for 

Analyzing gis_osm_places_a_free_1	826
Finished: [('city', 544), ('island', 186), ('town', 58), ('village', 29), ('locality', 6), ('hamlet', 2), ('farm', 1)]


gis_osm_places_free_1
Current Time is Mon Nov 19, 2018 13:01:11 for 

Analyzing gis_osm_places_free_1	1786
Finished: [('hamlet', 1029), ('city', 555), ('village', 496), ('town', 192), ('island', 191), ('county', 105), ('locality', 41), ('farm', 2), ('suburb', 1)]




Found 73399 geos from gis_osm_places_free_1


There are 9 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/kansas-place-7-data.p


There are 73399 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/kansas-place-7-geos.p


  ---->>> Removed /Users/tgadf/Downloads/osm/kansas-latest-free.shp
Extract /Users/tgadf/Downloads/osm/kentucky-latest-free.shp.zip
Found the followi

Found the following 2 shape files: gis_osm_places_a_free_1, gis_osm_places_free_1
gis_osm_places_a_free_1
Current Time is Mon Nov 19, 2018 13:02:24 for 

Analyzing gis_osm_places_a_free_1	391
Finished: [('town', 177), ('village', 90), ('city', 42), ('locality', 42), ('island', 27), ('county', 10), ('hamlet', 2), ('suburb', 1)]


gis_osm_places_free_1
Current Time is Mon Nov 19, 2018 13:02:27 for 

Analyzing gis_osm_places_free_1	3411
Processed 2500/3411 records. Found 71887 geos so far...
Finished: [('hamlet', 2884), ('village', 322), ('town', 218), ('island', 165), ('county', 92), ('locality', 63), ('city', 50), ('suburb', 8)]




Found 72798 geos from gis_osm_places_free_1


There are 8 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/mississippi-place-7-data.p


There are 72798 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/mississippi-place-7-geos.p


  ---->>> Removed /Users/tgadf/Downloads/osm/mississippi-latest-free.shp
Extract

Finished: [('village', 555), ('island', 476), ('locality', 408), ('town', 72), ('city', 38), ('farm', 13), ('hamlet', 10), ('suburb', 2), ('county', 1)]


gis_osm_places_free_1
Current Time is Mon Nov 19, 2018 13:03:19 for 

Analyzing gis_osm_places_free_1	6869
Processed 2500/6869 records. Found 155448 geos so far...
Processed 5000/6869 records. Found 157948 geos so far...
Finished: [('hamlet', 4962), ('village', 1492), ('island', 1079), ('locality', 521), ('town', 221), ('county', 57), ('city', 55), ('suburb', 29), ('farm', 28)]




Found 159817 geos from gis_osm_places_free_1


There are 9 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/new-york-place-7-data.p


There are 159817 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/new-york-place-7-geos.p


  ---->>> Removed /Users/tgadf/Downloads/osm/new-york-latest-free.shp
Extract /Users/tgadf/Downloads/osm/north-carolina-latest-free.shp.zip
Found the following 2 shape files: gis_osm_p

Found the following 2 shape files: gis_osm_places_a_free_1, gis_osm_places_free_1
gis_osm_places_a_free_1
Current Time is Mon Nov 19, 2018 13:04:33 for 

Analyzing gis_osm_places_a_free_1	72
Finished: [('island', 36), ('town', 15), ('locality', 12), ('city', 8), ('farm', 1)]


gis_osm_places_free_1
Current Time is Mon Nov 19, 2018 13:04:33 for 

Analyzing gis_osm_places_free_1	446
Finished: [('hamlet', 268), ('island', 94), ('village', 72), ('town', 46), ('city', 16), ('locality', 16), ('county', 5), ('farm', 1)]




Found 10285 geos from gis_osm_places_free_1


There are 8 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/rhode-island-place-7-data.p


There are 10285 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/rhode-island-place-7-geos.p


  ---->>> Removed /Users/tgadf/Downloads/osm/rhode-island-latest-free.shp
Extract /Users/tgadf/Downloads/osm/south-carolina-latest-free.shp.zip
Found the following 2 shape files: gis_osm_places_a

Found the following 2 shape files: gis_osm_places_a_free_1, gis_osm_places_free_1
gis_osm_places_a_free_1
Current Time is Mon Nov 19, 2018 13:05:54 for 

Analyzing gis_osm_places_a_free_1	907
Finished: [('island', 399), ('locality', 218), ('city', 181), ('town', 93), ('village', 9), ('farm', 4), ('hamlet', 3)]


gis_osm_places_free_1
Current Time is Mon Nov 19, 2018 13:06:00 for 

Analyzing gis_osm_places_free_1	2925
Processed 2500/2925 records. Found 125133 geos so far...
Finished: [('hamlet', 1891), ('locality', 575), ('island', 506), ('town', 272), ('village', 221), ('city', 201), ('suburb', 107), ('county', 39), ('farm', 20)]




Found 125558 geos from gis_osm_places_free_1


There are 9 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/washington-place-7-data.p


There are 125558 entries in the saved file.
Saved shape data to /Users/tgadf/Downloads/osm/washington-place-7-geos.p


  ---->>> Removed /Users/tgadf/Downloads/osm/washington-latest-free.shp
Extrac

In [24]:
# gis_osm_buildings_a_free_1.shp	gis_osm_pofw_a_free_1.shp	gis_osm_traffic_a_free_1.shp
# gis_osm_landuse_a_free_1.shp	gis_osm_pofw_free_1.shp		gis_osm_traffic_free_1.shp
# gis_osm_natural_a_free_1.shp	gis_osm_pois_a_free_1.shp	gis_osm_transport_a_free_1.shp
# gis_osm_natural_free_1.shp	gis_osm_pois_free_1.shp		gis_osm_transport_free_1.shp
# gis_osm_places_a_free_1.shp	gis_osm_railways_free_1.shp	gis_osm_water_a_free_1.shp
# gis_osm_places_free_1.shp	gis_osm_roads_free_1.shp	gis_osm_waterways_free_1.shp