In [None]:
%matplotlib inline

# Extract data from mongoDB and generate geometries, write back

This notebook goes through raw OSM data (raw ways and nodes) and writes back all Polygons and Linestrings to a derived ways table, including lengths and areas.  

**Derived ways is dropped and re-inserted!**

Created on:  2016-10-27  
Last update: 2016-12-09  
Contact: michael.szell@gmail.com (Michael Szell)

## Preliminaries

### Parameters

In [1]:
cityname = "boston"

### Imports

In [3]:
# preliminaries
from __future__ import unicode_literals
import sys
import csv
import os
import math
import pprint
pp = pprint.PrettyPrinter(indent=4)
from collections import defaultdict
import time
import datetime
import numpy as np
from scipy import stats
import pyprind
import itertools
import logging
from ast import literal_eval as make_tuple
from collections import OrderedDict

import json
from shapely.geometry import mapping, shape, LineString, LinearRing, Polygon, MultiPolygon
import shapely
import shapely.ops as ops
from functools import partial
import pyproj
from scipy import spatial
from haversine import haversine

import pymongo
from pymongo import MongoClient

# plotting stuff
import matplotlib.pyplot as plt

### Create folder structure

In [4]:
if not os.path.exists("citydata"):
    os.makedirs("citydata")
if not os.path.exists("logs"):
    os.makedirs("logs")
if not os.path.exists("output"):
    os.makedirs("output")
if not os.path.exists("output/" + cityname + "/carin"):
    os.makedirs("output/" + cityname + "/carin")
if not os.path.exists("output/" + cityname + "/carout"):
    os.makedirs("output/" + cityname + "/carout")
if not os.path.exists("output/" + cityname + "/bikein"):
    os.makedirs("output/" + cityname + "/bikein")
if not os.path.exists("output/" + cityname + "/bikeout"):
    os.makedirs("output/" + cityname + "/bikeout")

### DB Connection

In [None]:
client = MongoClient()
db_raw = client[cityname+'_raw']
nodes_raw = db_raw['nodes']
cursor = nodes_raw.find({})
numnodes = cursor.count()
ways_raw = db_raw['ways']
cursor = ways_raw.find({})
numways = cursor.count()

db_derived = client[cityname+'_derived']
db_derived.drop_collection('ways')
db_derived = client[cityname+'_derived']
ways_derived = db_derived['ways']

## Polygons and Linestrings from raw to derived

In [None]:
bar = pyprind.ProgBar(numways, bar_char='█', update_interval=1)
nodesinserted = 0
nodestotal = 0
for i,way in enumerate(ways_raw.find()):
    bar.update(item_id = i)
    tempgeojson = {}
    tempgeojson["_id"] = way["_id"]
    try:
        tempgeojson["properties"] = way['tags']
    except:
        tempgeojson["properties"] = {}
    tempgeojson["type"] = "Feature"
    tempgeojson["geometry"] = {"type":"", "coordinates":[]} 
    coords = []
    for nodeid in way["nodes"]:
        nodestotal += 1
        for n in nodes_raw.find({"_id": nodeid}):
            nodesinserted += 1
            coords.append([n["loc"]["coordinates"][0], n["loc"]["coordinates"][1]])
    tempgeojson["geometry"]["coordinates"] = coords
    if way["nodes"][0] == way["nodes"][-1]:
        tempgeojson["geometry"]["type"] = "Polygon"
    else:
        tempgeojson["geometry"]["type"] = "LineString"
    ways_derived.insert_one(tempgeojson)
    
nodesmissing = nodestotal - nodesinserted
print("Done. Nodes missing: "+ str(nodesmissing) + ", out of " + str(nodestotal))

## Calculate lengths and areas of ways and save back to mongoDB

In [None]:
client = MongoClient()
db_derived = client[cityname+'_derived']
ways_derived = db_derived['ways']

In [None]:
cursor = ways_derived.find({"geometry.type": "LineString"})
numLineStrings = cursor.count()
bar = pyprind.ProgBar(numLineStrings, bar_char='█', update_interval=1)
for i,way in enumerate(ways_derived.find({"geometry.type": "LineString"})):
    bar.update(item_id = i)
    npway = np.asarray(way["geometry"]["coordinates"])
    distances = [1000*haversine(npway[i][::-1], npway[i+1][::-1]) for i in range(npway.shape[0]-1)]
    ways_derived.update_one({'_id': way["_id"]}, {"$set": {"properties_derived.length": sum(distances)}}, upsert=False)

cursor = ways_derived.find({"geometry.type": "Polygon"})
numPolygons = cursor.count()
bar = pyprind.ProgBar(numPolygons, bar_char='█', update_interval=1)
for i,way in enumerate(ways_derived.find({"geometry.type": "Polygon"})):
    bar.update(item_id = i)
    npway = np.asarray(way["geometry"]["coordinates"])
    distances = [1000*haversine(npway[i][::-1], npway[i+1][::-1]) for i in range(npway.shape[0]-1)]
    ways_derived.update_one({'_id': way["_id"]}, {"$set": {"properties_derived.length": sum(distances)}}, upsert=False)
    # Following area calculating code from: http://gis.stackexchange.com/questions/127607/area-in-km-from-polygon-of-coordinates
    try: # IllegalArgumentException: Invalid number of points in LinearRing found 3 - must be 0 or >= 4
        geom = Polygon(npway)
        geom_area = ops.transform(
            partial(
                pyproj.transform,
                pyproj.Proj(init='EPSG:4326'),
                pyproj.Proj(
                    proj='aea',
                    lat1=geom.bounds[1],
                    lat2=geom.bounds[3])),
            geom)
        # Export the area in m^2
        ways_derived.update_one({'_id': way["_id"]}, {"$set": {"properties_derived.area": geom_area.area}}, upsert=False)
    except:
        print("Something went wrong: " + str(i))
        pass