# Load graph data

In [1]:
import networkx as nx
import json
import numpy as np
import gmplot #for plotting to google maps

#load the shapefile generated from an OSMnx pull (see v4.1 OSMnx to generate SHP.ipynb)
#g = nx.read_shp('data/from_osmnx/edges/edges.shp')
#g = nx.read_shp('data/from_osmnx_2017_05_11/edges/edges.shp')
#g = nx.read_shp('data/from_osmnx_2017_05_20/edges/edges.shp')
g = nx.read_shp('data/from_osmnx_2017_05_21_all_private/edges/edges.shp')

#returns array of coordinates for any edge in the graph
def get_path(n0, n1):
    """If n0 and n1 are connected nodes in the graph, this function
    return an array of point coordinates along the road linking
    these two nodes."""
    return np.array(json.loads(g[n0][n1]['Json'])['coordinates'])

#returns in KILOMETERS
EARTH_R = 6372.8
def geocalc(lat0, lon0, lat1, lon1):
    """Return the distance (in km) between two points in 
    geographical coordinates."""
    lat0 = np.radians(lat0)
    lon0 = np.radians(lon0)
    lat1 = np.radians(lat1)
    lon1 = np.radians(lon1)
    dlon = lon0 - lon1
    y = np.sqrt(
        (np.cos(lat1) * np.sin(dlon)) ** 2
         + (np.cos(lat0) * np.sin(lat1) 
         - np.sin(lat0) * np.cos(lat1) * np.cos(dlon)) ** 2)
    x = np.sin(lat0) * np.sin(lat1) + \
        np.cos(lat0) * np.cos(lat1) * np.cos(dlon)
    c = np.arctan2(y, x)
    return EARTH_R * c

#Now, we define a function computing a path's length.
#This function has been found in StackOverflow. http://stackoverflow.com/questions/8858838/need-help-calculating-geographical-distance

#order of lats, lons was incorrect in tutorial! Corrected here
def get_path_length(path):
    return np.sum(geocalc(path[:-1,1], path[:-1,0], path[1:,1], path[1:,0]))

print('finished')

finished


## Load graph data > Add nodes and connect them

Load the data (a Shapefile dataset) with NetworkX. 
This dataset contains detailed information about all roads in San Francisco County. 
NetworkX's read_shp function returns a graph, where each node is a geographical position, 
and each edge contains information about the road linking the two nodes.

In lieu of Census data, SF Gov has a full, connected graph of streets that can be found here: https://data.sfgov.org/Geographic-Locations-and-Boundaries/Streets-of-San-Francisco/4ugn-hf48

In [2]:
"""
    Shapefile is incomplete, so here we manually create edges known to exist
"""
#given two nodes, build a node between them
#nodes must be in (lon, lat) tuple
def create_edge(a,b):
    g.add_edge(a, b)
    g[a][b]['Json'] = '{ "type": "LineString" ,"coordinates" : [' + str([p for p in a]) + ', ' + str([p for p in b]) + ']}'


#road has a gate at the end of public part
p1 = (-122.38385529999999, 37.7423869)
p2 = (-122.382517, 37.741975)
create_edge(p1, p2) #lon/lat

'''extend edge
    inputs: an edge, and a tuple representing a non-existent node
    outputs: the edge is now extended to a new node (original edge endpoint now a coordinate), that node splits the nearest edge
'''
def extend_edge(my_edge, my_point):
    #for the point, find the edge it's on and split it into two edges, connected at the point
    split_edge(my_point)
    
    #for the edge, find the closest end point and create a new edge to the point
    closest, farthest = orient_edge_endpoints(my_point[::-1], my_edge)
    create_edge(closest, my_point[::-1])
    
'''split an edge with a given point on it
    inputs: the point that is being added to the graph
    output: the edge, previously A-B is now two edges, A-C & C-B
'''
def split_edge(my_point):
    my_edge = find_closest_edge(my_point)
    create_edge(my_edge[0], my_point[::-1])
    create_edge(my_edge[1], my_point[::-1])
    
#given a point, return the closest edge; optionally return its distance
def find_closest_edge(my_point, method='all', return_distance=False):

    min_distance_to_coord_edge = 100

#iterate through edges and record distance to closest point on edge
    for n0, n1 in g.edges_iter():

#use two methods to look for closest road: (1) by line segment using perpendicular distance, (2) by edge's coordinates
#(2) is necessary for curving roads...I think
        if n0 == n1:
            continue
        
        my_distance_perp = distance_from_line(n0[::-1], n1[::-1], my_point)
        my_distance_coord = distance_from_edge_coord(n0,n1,my_point)
        
        #for debugging
        if method == 'perp':
            my_distance = my_distance_perp
        elif method == 'coord':
            my_distance = my_distance_coord
        else:
            my_distance = min(my_distance_perp, my_distance_coord)
        
        if my_distance < min_distance_to_coord_edge:
            min_distance_to_coord_edge = my_distance
            closest_edge = [n0, n1]
            
    if return_distance:
        return closest_edge, min_distance_to_coord_edge
            
    return closest_edge

#for a given point, find the edge it is closest to
#first, find it's closest point on an infinite line defined by two nodes
#use https://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line#Line_defined_by_two_points
#if that point IS NOT actually between the nodes, take the closest node

def distance_from_line(p,q,x):

#==Part 1===
#p & q make a line; how far is x from this infinite line?

#vertical traverse
    a = p[1] - q[1]

#horizontal traverse
    b = q[0] - p[0]

#ax+by+c = 0
    c = -a * p[0] - b * p[1]

    
    
#==Part 2== does this closest-line-point actually lie between nodes?
#Ax + By + C = 0 ==> y = (-Ax - C)/B

#first, calculate what the closest point is, holding x constant
#second, calculate the distance between that closest point and the actual point
#finally, check whether the closest point actually lies on the line between the edge's endpoints
#if not, use the closest endpoint

    if b == 0:
# vertical line: ==> x = (-C - By) / A ==> x = -C / A
#thus, ignore y and only take the distance between x and the latitude of our point
    #print('vertical line')
        dist =  abs(a*x[0] + c) / abs(a)
        closest_y = x[1]

# horizontal line: ==> y = (-Ax - C) / B ==> y = -C / B
#thus, ignore x and only take the distance between y and longitude of our point
    elif a == 0:
    #print('horizontal line')
        dist = abs(b* x[1] + c) / abs(b)
        closest_y = -c / b
    

#not a vertical or horizontal line? proceed with full formula
    elif a * b != 0:
#distance = |Ax0 + By0 + C| / sqrt(a^2 + b^s)
    #print('diagnoal line')
        dist = abs(a*x[0] + b*x[1] + c) / (a**2 + b**2)**0.5
        closest_y = (-1 * a * x[0] - c) / b
    
    else:
        print('error!')
        return

#lat&lon must be constrained by nodes' lat/lons
    p_x = distance_points(p, [x[0], closest_y])
    q_x = distance_points(q, [x[0], closest_y])
    p_q = distance_points(p,q)

    error = 1e-10
    if(abs(p_x + q_x - p_q) < error):
        return dist
    
#print('closest point not on edge')
    return min(distance_points(p,x), distance_points(q,x))

#given two 2-d points, return their distance
def distance_points(a,b):
    return np.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2)

def distance_from_edge_coord(a,b,my_point):
    path = get_path(a,b)
    my_distances = np.sum((path[:,::-1] - my_point)**2, axis=1)
    return min(my_distances**0.5)

'''orient edge endpoints
    inputs: a node and an edge, in any orientation
    outputs: two nodes (the edge's endpoints) in order of closest and farthest
'''
def orient_edge_endpoints(my_node, my_edge):
    distance_0 = distance_points(my_node, my_edge[0])
    distance_1 = distance_points(my_node, my_edge[1])
    
    #if a tie, arbitrarily take the first point
    if distance_0 <= distance_1:
        return my_edge[0], my_edge[1]
    if distance_1 < distance_0:
        return my_edge[1], my_edge[0]
    
this_point = (37.7999762, -122.413069)
this_edge = [(-122.4124276, 37.7997612), (-122.41301, 37.799689)]
#not needed with openstreetmap edits
#extend_edge(this_edge, this_point)

#being directional means being unable to find edges unless in correct order
#so switch to unidirected
g = g.to_undirected()

sg_all = list(nx.connected_component_subgraphs(g.to_undirected()))

#grab the biggest subgraph
max_len = -1
sg_final = sg_all[0]
for sg in sg_all:
    x = len(sg)
    if x > max_len:
        max_len = x
        sg_final = sg
        
# establish all the nodes
nodes = np.array(sg_final.nodes())

#update graph by computing the distance between any two connected nodes
for n0, n1 in sg_final.edges_iter():
    path = get_path(n0, n1)
    distance = get_path_length(path)
    sg_final.edge[n0][n1]['distance'] = distance
    
print('finished')    

finished


## Load graph data > Visually see graph on a map

https://github.com/vgm64/gmplot
library for googplotting

In [8]:
"""
    Plotting the full graph
"""

#for an edge, pull all coordinates
def pull_coords(my_edge):
    return np.array(json.loads(g[e[0]][e[1]]['Json'])['coordinates'])

#plot said coordinates from ALL edges in Shapefile
a_map = gmplot.GoogleMapPlotter.from_geocode("San Francisco")

for e in g.edges_iter():
    a_map.plot(pull_coords(e)[:,1], pull_coords(e)[:,0], 'red', edge_width=2, alpha=0.5)

a_map.draw('test plots/v5 using OSMnx all.html')
print('finished')

finished


# Load run history

## Load run history > map a KML

# Plot runs

In [4]:
import glob
from datetime import datetime #when reading TCX, need to convert dates

#look for all these files, compare it to the KML dates and add it to the map

def pull_from_tcxs(file_pattern, dates, a_map, color):
#loop through all matching files
    for file in glob.glob(file_pattern):

#compare it against dates from KML and only proceed if it is new data 
        file_date = file.split('tapiriik')[1][1:11]
        file_date = datetime.strptime(file_date, '%Y-%m-%d')
        if file_date in dates:
            continue
        print(file_date)

#open it via beautifulsoup
        soup = BeautifulSoup(open(file), "html.parser")

#set up arrays for point data
        lats = []
        lons = []

#not all points contain location data, so loop through 'position' tags and grab the parent
        for pos in soup.find_all('position'):
            tp = pos.parent

#pick out the latitude and longitude (lat/lon need to be floats for plotting)
            this_lat = float(pos.latitudedegrees.string)
            this_lon = float(pos.longitudedegrees.string)

#append trackpoint's data to master lists
            lats.append(this_lat)
            lons.append(this_lon)

#add this route to the plot
        a_map.plot(lats, lons, color, edge_width=4, alpha=0.3)

#given a google map, KML file and color: add to the map for every route manually and record the dates

def pull_from_kml(a_map, file, color):
#initialize list of dates, will later compare to figure which tcx files needed
    dates = []

#grab kml file
    soup = BeautifulSoup(open(file), "html.parser")

#loop through all routes (placemark tags)
    for p in soup.find_all('placemark'):
        #pull date of run
        this_date = p.find('name').text
        if this_date == 'BLOCKED':
            continue
        dates.append(datetime.strptime(this_date, '%m/%d/%y'))

        #pull out lat/lon pairs for each run
        coords = p.coordinates.text.split()

        lats = []
        lons = []

        for c in coords:
            this_coord = c.split(',')
            lats.append(float(this_coord[1]))
            lons.append(float(this_coord[0]))

        a_map.plot(lats, lons, color, edge_width=4, alpha=0.3)

    return dates

#pull from manually built KML
file = 'Running SF.kml'

#define the map area
kml_map = gmplot.GoogleMapPlotter.from_geocode("San Francisco")

#generate plot of all the KML based runs
kml_dates = pull_from_kml(kml_map, file, 'cornflowerblue')

#tcs files are synced from garmin via https://tapiriik.com/
files = '../../../Apps/tapiriik/*.tcx'
pull_from_tcxs(files, kml_dates, kml_map, 'crimson')

#generate all plots
kml_map.draw('runs 2017 06 18.html')
print('all runs mapped')

2014-01-13 00:00:00
2014-01-19 00:00:00
2014-07-15 00:00:00
2014-08-28 00:00:00
2014-12-16 00:00:00
2015-01-10 00:00:00
2015-03-28 00:00:00
2015-07-08 00:00:00
2016-01-06 00:00:00
2016-07-11 00:00:00
2016-08-15 00:00:00
2016-08-15 00:00:00
2016-11-15 00:00:00
2016-11-16 00:00:00
2016-11-24 00:00:00
2016-11-27 00:00:00
2016-11-29 00:00:00
2016-11-30 00:00:00
2016-12-15 00:00:00
2017-01-03 00:00:00
2017-01-08 00:00:00
2017-01-09 00:00:00
2017-01-30 00:00:00
2017-03-05 00:00:00
2017-03-07 00:00:00
2017-03-11 00:00:00
2017-03-11 00:00:00
2017-03-16 00:00:00
2017-03-20 00:00:00
2017-03-22 00:00:00
2017-03-23 00:00:00
2017-03-25 00:00:00
2017-03-26 00:00:00
2017-03-26 00:00:00
2017-03-28 00:00:00
2017-03-31 00:00:00
2017-04-03 00:00:00
2017-04-05 00:00:00
2017-04-08 00:00:00
2017-04-11 00:00:00
2017-04-12 00:00:00
2017-04-13 00:00:00
2017-04-16 00:00:00
2017-04-18 00:00:00
2017-04-20 00:00:00
2017-04-24 00:00:00
2017-04-25 00:00:00
2017-04-26 00:00:00
2017-05-01 00:00:00
2017-05-02 00:00:00


# scratch

In [9]:
import random

In [11]:
random.randrange(0,446)

130

In [17]:
this_coord = coord_to_node(this_point)
find_closest_edge(this_coord)

[(-122.455143, 37.7926388), (-122.4555011, 37.7933413)]

In [36]:
coords = [c[::-1] for c in json.loads(g[(-122.455143, 37.7926388)][(-122.4555011, 37.7933413)]['Json'])['coordinates']]

## scratch -> read from GraphML

In [31]:
for t in test_kmls:
    snap_kml(t)

1/29/13
plotted to test plots/v5 test_markers - 18.html
2/28/13
not in SF
plotted to test plots/v5 test_markers - 30.html
4/17/13
not in SF
plotted to test plots/v5 test_markers - 43.html
5/31/13
plotted to test plots/v5 test_markers - 59.html
6/13/13
plotted to test plots/v5 test_markers - 60.html
6/17/13
plotted to test plots/v5 test_markers - 61.html
10/9/13
plotted to test plots/v5 test_markers - 100.html
10/17/13
plotted to test plots/v5 test_markers - 103.html
12/10/13
plotted to test plots/v5 test_markers - 123.html
6/15/14
plotted to test plots/v5 test_markers - 164.html
7/24/14
plotted to test plots/v5 test_markers - 178.html
9/22/14
plotted to test plots/v5 test_markers - 200.html
10/09/14
plotted to test plots/v5 test_markers - 205.html
12/15/14
plotted to test plots/v5 test_markers - 231.html
2/9/15
plotted to test plots/v5 test_markers - 257.html
9/15/15
plotted to test plots/v5 test_markers - 300.html
11/25/15
plotted to test plots/v5 test_markers - 317.html
1/13/16
plott

In [33]:
#pull a route
test_kmls = [18, 30, 43] +\
list(range(59, 62)) +\
[100, 103, 123, 130, 164, 178, 200, 205, 231, 257, 300, 317, 331, 343, 387] +\
list(range(399, 401))


print('finished')

finished


In [1]:
import networkx as nx
from shapely.geometry import LineString
from bs4 import BeautifulSoup #library for tcx reading
import gmplot #for plotting to google maps
import numpy as np

#g = nx.read_gpickle("2017_05_29 all_private cleaned_coords.gpickle")
#every edge has a 'length' attribute which is in meters

g = nx.read_gpickle("2017_06_08 all_private cleaned_coords real_multidi.gpickle")
g = g.to_undirected()

#grab kml file
file = 'Running SF.kml'
soup = BeautifulSoup(open(file), "html.parser")

sg_all = list(nx.connected_component_subgraphs(g.to_undirected()))

#grab the biggest subgraph
max_len = -1
sg_final = sg_all[0]
for sg in sg_all:
    x = len(sg)
    if x > max_len:
        max_len = x
        sg_final = sg
        
# establish all the nodes
nodes = np.array(sg_final.nodes())

print('finished')

finished


In [2]:
#given two 2-d points, return their distance
def distance_points(a,b):
    return np.sqrt((a[0] - b[0])**2 + (a[1] - b[1])**2)

In [3]:
def snap_kml(t):
    a_run = soup.find_all('placemark')[t]
    print(a_run.find('name').get_text())

#pull out lat/lon pairs for each run
    a_run_coords = a_run.coordinates.text.split()

    a_map = plot_kml(a_run_coords)
    location = 'test plots/v5 test_markers - ' + str(t) + '.html'
    a_map.draw(location)
    
    print('plotted to ' + location)

In [30]:
#given a KML run, plot it on google maps
#inferring closest nodes, edges
def plot_kml(my_coords):
#a_run = soup.find_all('placemark')[130]
#a_run_coords = a_run.coordinates.text.split()
#my_coords = a_run_coords

    threshold = 25 / 100000
    from_nodes = []
    to_nodes = []
    keys = []
    colors = []

    sf_limit = 1.0

    a_map = gmplot.GoogleMapPlotter.from_geocode("San Francisco")

    lats = []
    lons = []

    #take apart the individual points
    for c in my_coords:
        this_coord = c.split(',')
        lats.append(float(this_coord[1]))
        lons.append(float(this_coord[0]))

    for lat, lon in zip(lats, lons):
        this_point = (lat, lon)

    #if distance is < threshold, then just use the closest node.  Else, find the closest edge.  And plot it 

    # need to incorporate coordinates within the edge?
        closest_node, dist = find_closest_node(this_point, return_distance=True)

    #is the run in SF?
        if dist > sf_limit:
            print('not in SF')
            a_map = gmplot.GoogleMapPlotter(lat, lon, 13)
            #a_map = gmplot.GoogleMapPlotter.from_geocode("San Francisco")
            a_map.scatter(lats, lons, size=15, color='black', marker=False)
            #return a_map
            break

        c_key = 0

        if dist < threshold:
            my_color = 'blue'
            if from_nodes:
    #connect the last point to this node
                cn = connect_nodes(to_nodes[-1], closest_node)
            else: #starting condition
                from_nodes.extend([-1, closest_node]) #we'll throw this out later
                to_nodes.extend([closest_node])
                keys.extend([0])
                colors.extend([my_color])
                continue


        else:
#            print('guessing edge for ' + str(this_point) + ', distance: ' + str(dist))
            ce, c_key = find_closest_edge(this_point, 'all')
            if from_nodes:
                #order matters here, we want to connect nodes that are next to each other, rather than make a long journey
                cn = connect_edge(to_nodes[-1], ce)


            else: #starting condition
                cn = ce #just use the edge
                from_nodes.extend([-1])
            my_color = 'red'

    #take the added nodes and pop them on
        for cni in cn:
            from_nodes.extend([cni])
            to_nodes.extend([cni])
            keys.extend([0])
            colors.extend([my_color])
        #correct in case the final edge was added
        keys[-1] = c_key

        a_map.scatter([lat], [lon], size=15, color=my_color, marker=False)

    #remove self-connections
    segments = pd.DataFrame(data={'from': from_nodes[1:-1], 'to': to_nodes[1:], 'key': keys[1:], 'color': colors[1:]})
    segments = segments[segments['from'] != segments['to']]

    segments.apply(lambda s: plot_edge_gmap(s['from'], s['to'], a_map, s['color'], key=s['key']), axis=1)

    return a_map

#a_map.draw('test plots/v5.2 multigraph kml.html')
print('finished')

finished


In [6]:
def find_closest_node(my_point, return_distance=False):
    min_d = 100

    for n in g.nodes_iter():
        c = pull_node_coordinate(n)[::-1]
        d = distance_points(c, my_point)
        if d < min_d:
            min_d = d
            closest_n = n

    if return_distance:
        return closest_n, min_d
    return closest_n

In [8]:
#given a node (int) return the coordinate (lon,lat)
def pull_node_coordinate(my_node):
    return g.node[my_node]['coordinate']

In [10]:
#given a point, return the closest edge with key; optionally return its distance

def find_closest_edge(my_point, method='all', return_distance=False):
#my_point = my_coord
#method='all'
#return_distance = True

    min_distance_to_coord_edge = 100

    #iterate through edges and record distance to closest point on edge
    for n0, n1 in g.edges_iter():

        n0_coord = g.node[n0]['coordinate']
        n1_coord = g.node[n1]['coordinate']

        #use two methods to look for closest road: (1) by line segment using perpendicular distance, (2) by edge's coordinates
        #(2) is necessary for curving roads...I think
        if n0_coord == n1_coord:
            continue

        #go get closest coordinate in all the edges
        my_distance_coord, my_key_coord = distance_from_edge_coord(n0, n1,my_point[::-1])

        #loop through all edges' coordinates to find the closest by perp
        cs_distances = []
        my_distance_perp = 1000
        my_key_perp = -1
        for k in g[n0][n1]:
            cs =  g[n0][n1][k]['coordinates']
            for i in range(len(cs)-1):
                if cs[i] == cs[i+1]:
                    continue
                cs_distances.append(distance_from_line(cs[i], cs[i+1], my_point[::-1]))
            d = min(cs_distances)
            if d < my_distance_perp:
                my_distance_perp = d
                my_key_perp = k


        #for debugging
        if (method == 'perp') | (my_distance_perp < my_distance_coord):
            my_distance = my_distance_perp
            my_key = my_key_perp
        elif (method == 'coord') | (my_distance_coord <= my_distance_perp):
            my_distance = my_distance_coord
            my_key = my_key_coord

        if my_distance < min_distance_to_coord_edge:
            min_distance_to_coord_edge = my_distance
            closest_edge = [n0, n1]
            closest_edge_key = my_key

    if return_distance:
        return closest_edge, closest_edge_key, min_distance_to_coord_edge

    return closest_edge, closest_edge_key

print('finished')

finished


In [13]:
#given two edge endnodes and a point, return the distance of the closest edge and that edge's key
def distance_from_edge_coord(a,b,my_point):
#a=65283442
#b=65283457
#my_point = my_point[::-1]

    min_distance = 1000
    min_key = -1

    for k in g[a][b]:
        path = get_path(a,b,k)
        my_distances = [distance_points(c,my_point) for c in path]
        my_distance = min(my_distances)
        if my_distance < min_distance:
            min_distance = my_distance
            min_key = k
    return min_distance, min_key

In [15]:
#returns array of coordinates for any edge in the graph
def get_path(n0, n1, k=0):
    """If n0 and n1 are connected nodes in the graph, this function
    return an array of point coordinates along the road linking
    these two nodes."""
    return g[n0][n1][k]['coordinates']

In [17]:
#for a given point, find the edge it is closest to
#first, find it's closest point on an infinite line defined by two nodes
#use https://en.wikipedia.org/wiki/Distance_from_a_point_to_a_line#Line_defined_by_two_points
#if that point IS NOT actually between the nodes, take the closest node

def distance_from_line(p,q,x):

#==Part 1===
#p & q make a line; how far is x from this infinite line?

#vertical traverse
    a = p[1] - q[1]

#horizontal traverse
    b = q[0] - p[0]

#ax+by+c = 0
    c = -a * p[0] - b * p[1]

    
    
#==Part 2== does this closest-line-point actually lie between nodes?
#Ax + By + C = 0 ==> y = (-Ax - C)/B

#first, calculate what the closest point is, holding x constant
#second, calculate the distance between that closest point and the actual point
#finally, check whether the closest point actually lies on the line between the edge's endpoints
#if not, use the closest endpoint

    if b == 0:
# vertical line: ==> x = (-C - By) / A ==> x = -C / A
#thus, ignore y and only take the distance between x and the latitude of our point
    #print('vertical line')
        dist =  abs(a*x[0] + c) / abs(a)
        closest_y = x[1]

# horizontal line: ==> y = (-Ax - C) / B ==> y = -C / B
#thus, ignore x and only take the distance between y and longitude of our point
    elif a == 0:
    #print('horizontal line')
        dist = abs(b* x[1] + c) / abs(b)
        closest_y = -c / b
    

#not a vertical or horizontal line? proceed with full formula
    elif a * b != 0:
#distance = |Ax0 + By0 + C| / sqrt(a^2 + b^s)
    #print('diagnoal line')
        dist = abs(a*x[0] + b*x[1] + c) / (a**2 + b**2)**0.5
        closest_y = (-1 * a * x[0] - c) / b
    
    else:
        print('error!')
        return

#lat&lon must be constrained by nodes' lat/lons
    p_x = distance_points(p, [x[0], closest_y])
    q_x = distance_points(q, [x[0], closest_y])
    p_q = distance_points(p,q)

    error = 1e-10
    if(abs(p_x + q_x - p_q) < error):
        return dist
    
#print('closest point not on edge')
    return min(distance_points(p,x), distance_points(q,x))

print('finished')

finished


In [19]:
#given two nodes anywhere on the graph, return all necessary intermediary nodes
def connect_nodes(n0, n1):
    return nx.shortest_path(sg_final, 
                        source=n0, 
                        target=n1,
                        weight='length')

In [21]:
#take a node and an edge, orient the edge's endpoints toward the node and return the path connecting the two
def connect_edge(my_node, my_edge):
    closest, farthest = orient_edge_endpoints(my_node, my_edge)
    
    #if node is on edge, just return the other edge endpoint
    if closest == my_node:
        return [farthest]
    
    my_trio = connect_nodes(my_node, closest)[1:]
    my_trio.extend([farthest])
    return my_trio

In [23]:
'''orient edge endpoints
    inputs: a node and an edge, in any orientation
    outputs: two nodes (the edge's endpoints) in order of closest and farthest
'''
def orient_edge_endpoints(my_node, my_edge):

    my_node_coordinate = pull_node_coordinate(my_node)
    my_edge_coord_0 = pull_node_coordinate(my_edge[0])
    my_edge_coord_1 = pull_node_coordinate(my_edge[1])

    distance_0 = distance_points(my_node_coordinate, my_edge_coord_0)
    distance_1 = distance_points(my_node_coordinate, my_edge_coord_1)

    #if a tie, arbitrarily take the first point
    if distance_0 <= distance_1:
        return my_edge[0], my_edge[1]
    if distance_1 < distance_0:
        return my_edge[1], my_edge[0]

print('finished')

finished


In [25]:
import pandas as pd

In [27]:
#given two nodes, add their (default primary) edge to the map
def plot_edge_gmap(n0, n1, my_map, color='purple', nodes=False, key=0):
#check to make sure nodes aren't the same
#n0=65309308
#n1=65309306
#my_map=gmplot.GoogleMapPlotter.from_geocode("San Francisco")
#color='red'
#key = 0
#nodes=False

    if n0 != n1:
        my_path = get_path(n0, n1, key)
        my_lats = [c[1] for c in my_path]
        my_lons = [c[0] for c in my_path]
        my_map.plot(my_lats, my_lons, color, edge_width=5, alpha=0.4)

        if nodes:
            my_map.scatter(my_lats, my_lons, size=5, color=color, marker=False)

#        my_map.draw('test plots/v5.2 multigraph kml.html')
#        print('mapped')

In [28]:
#130
snap_kml(130)

1/18/14
guessing edge for (37.790574, -122.42795000000001), distance: 0.000320410190228
guessing edge for (37.791354, -122.453839), distance: 0.000455783040488
guessing edge for (37.792473, -122.454128), distance: 0.000606439815646
guessing edge for (37.792567, -122.45431100000002), distance: 0.000693332568106
guessing edge for (37.792516, -122.45470800000001), distance: 0.000452000929199
guessing edge for (37.792355, -122.454772), distance: 0.000467101102546
guessing edge for (37.79255, -122.45489000000002), distance: 0.000268131385692
guessing edge for (37.792651, -122.45553399999999), distance: 0.000391190286157
guessing edge for (37.793007, -122.455813), distance: 0.000457206845967
guessing edge for (37.798933, -122.45908500000002), distance: 0.000256414118188
guessing edge for (37.795788, -122.453356), distance: 0.000371052556929
guessing edge for (37.793558, -122.451704), distance: 0.000329138284016
guessing edge for (37.793372, -122.451071), distance: 0.000415465774767
guessing 

In [37]:
coords

[[37.7926388, -122.455143],
 [37.7926453, -122.4551384],
 [37.7926867, -122.455095],
 [37.7927255, -122.4550743],
 [37.7928151, -122.4550864],
 [37.7929038, -122.4551098],
 [37.7929888, -122.4551324],
 [37.7930681, -122.4551647],
 [37.7931372, -122.4552294],
 [37.7931912, -122.4552956],
 [37.7932957, -122.4554444],
 [37.7933413, -122.4555011]]

In [180]:
a_map = gmplot.GoogleMapPlotter.from_geocode("San Francisco")
my_color = 'red'
plot_edge_gmap(65309308, 65309306, a_map, my_color, key=0)
a_map.draw('test plots/v5.2 multigraph kml.html')

In [169]:
segments.iloc[71]

from    65309308
key            0
to      65309306
Name: 99, dtype: int64

In [176]:
my_path = get_path(n0,n1,key)
my_lats = [c[1] for c in my_path]
my_lons = [c[0] for c in my_path]
my_map.plot(my_lats, my_lons, color, edge_width=5, alpha=0.4)
my_map.draw('test plots/v5.2 multigraph kml.html')

In [177]:
my_path

[(-122.4566826, 37.8017177),
 (-122.4566187, 37.8017135),
 (-122.4565576, 37.8017037),
 (-122.4564954, 37.8016883),
 (-122.4564359, 37.801669),
 (-122.4563565, 37.8016398),
 (-122.4561881, 37.8015711)]

In [153]:
get_path(n0,n1,key)

KeyError: 65308182

In [165]:
segments = pd.DataFrame(data={'from': from_nodes[1:-1], 'to': to_nodes[1:], 'key': keys[1:]})
segments = segments[segments['from'] != segments['to']]

In [14]:
a_run = soup.find_all('placemark')[130]
a_run_coords = a_run.coordinates.text.split()

In [15]:
#input a messy coord 'lon, lat, 0.0' and return the networkx node (lat, lon)
def coord_to_node(c):
    this_coord = c.split(',')
    lat = float(this_coord[1])
    lon = float(this_coord[0])
    this_point = (lat, lon)
    return this_point

In [16]:
this_point = a_run_coords[20]
plot_closest_edge(this_point)

NameError: name 'plot_closest_edge' is not defined

In [43]:
#def plot_closest_edge(my_point, color='purple'):
this_point = a_run_coords[69]
my_point = this_point
color = 'blue'

my_coord = coord_to_node(my_point)
my_edge, my_edge_key = find_closest_edge(my_coord, 'all')

my_map = gmplot.GoogleMapPlotter(my_coord[0], my_coord[1], 18)
plot_edge_gmap(my_edge[0], my_edge[1], my_map, color, key = my_edge_key)

my_map.scatter([my_coord[0]], [my_coord[1]], color='red', marker=False, size=5)

my_map_file = 'test plots/v5 - closest edge.html'
my_map.draw(my_map_file)
print('mapped to ' + my_map_file)


mapped to test plots/v5 - closest edge.html


In [49]:
#https://www.darrinward.com/lat-long/
for v in g[65283442][65283457].values():
    print(v['length'])
    for c in v['coordinates']:
        print(c[1],',', c[0])
    

41.6258733755
37.7936516 , -122.4503161
37.7935466 , -122.450228
37.7935164 , -122.4502137
37.7934851 , -122.4502101
37.7934564 , -122.4502183
37.7934317 , -122.4502341
37.7933396 , -122.4503458
85.3602145105
37.7933396 , -122.4503458
37.7932633 , -122.4504527
37.7932457 , -122.4505056
37.7932436 , -122.450549
37.793254 , -122.450585
37.7932743 , -122.4506267
37.793297 , -122.450646
37.7933336 , -122.4506655
37.7933726 , -122.4506693
37.7934014 , -122.4506574
37.7934316 , -122.4506296
37.7936516 , -122.4503161


In [107]:
my_map = gmplot.GoogleMapPlotter(my_coord[0], my_coord[1], 18)

thresh = 0.001
for n in g.nodes_iter():
    c = pull_node_coordinate(n)[::-1]
    d = distance_points(c, my_coord)
    if d < thresh:
        my_map.scatter([c[0]], [c[1]], color='red', marker=False, size=d*20000)
        print(d, n)
        
my_map_file = 'test plots/v5.2 - closest nodes.html'
my_map.draw(my_map_file)

0.000581416374042 65309510
0.000285295986646 65283442
0.00043811319313 65283457
0.000810914181662 1769694191
0.000534820063204 65316958


In [119]:
"""
    Plotting the full graph
"""

#plot said coordinates from ALL edges in Shapefile
a_map = gmplot.GoogleMapPlotter.from_geocode("San Francisco")

plot_edge_gmap(65283442, 65283457, a_map, 'blue')
    
a_map.draw('test plots/v5 missing edge.html')
print('finished')

finished


In [123]:
#northeast 
g[65283457][65283442][0]['coordinates'] 

True

In [66]:
g.node[n1]

{'coordinate': (-122.4503161, 37.7936516),
 'osmid': 65283457,
 'x': -122.4503161,
 'y': 37.7936516}

In [72]:
#do any edges have multiple keys?
for a,b in g.edges_iter():
    l = len(g[a][b].keys())
    if l > 1:
        print(a,b)

65295762 65295797
65295762 65295797
65307109 2609017309
65307109 2609017309
4294017522 4294017571
4294017522 4294017571
302481936 302481931
302481936 302481931
1804730958 1804730952
1804730958 1804730952
1601995449 1601995499
1601995449 1601995499
890045139 890045140
890045139 890045140
1601995492 1602010328
1601995492 1602010328
1785845249 1366261132
1785845249 1366261132
65350487 65350510
65350487 65350510
4238705695 4238705696
4238705695 4238705696
315360356 315360357
315360356 315360357
2678063292 2678050676
2678063292 2678050676
1167787178 65375338
1167787178 65375338
65373362 300759134
65373362 300759134
3711403248 3454091822
3711403248 3454091822
3717633583 3718959234
3717633583 3718959234
4295558436 2875566805
4295558436 2875566805
4766860604 4766860616
4766860604 4766860616
2715003792 2715003792
2715003792 2715003792
65350551 1743527630
65350551 1743527630
830572057 65280690
830572057 65280690
866289290 386899314
866289290 386899314
1041008495 1041008463
1041008495 1041008463


In [73]:
g[315360356][315360357]

{0: {'coordinates': [(-122.4818602, 37.7883204), (-122.4805385, 37.7882526)],
  'highway': 'service',
  'length': 116.38921357890044,
  'oneway': False,
  'osmid': 28690801,
  'service': 'parking_aisle'},
 1: {'geometry': <shapely.geometry.linestring.LineString at 0x166ca0c57f0>,
  'highway': 'service',
  'length': 153.46396511852544,
  'oneway': False,
  'osmid': 28690803,
  'service': 'parking_aisle'}}