In [1]:
import osmnx as ox
import networkx as nx
from sklearn.neighbors import KDTree
import matplotlib.pyplot as plt
import numpy as np
import json
import hashlib 


In [11]:
# distance from station for which adjacent nodes should be retrieved
distance=100

In [24]:
def get_central_node(graph,lat, long,distance=100):
    """given a geocoded loction and a graph return 
        find the node closests to the location """
    graph_df,_ = ox.graph_to_gdfs(graph)
    graph_tree = KDTree(graph_df[['y', 'x']], metric='euclidean')
    adress_idx = graph_tree.query([(lat,long)], k=1, return_distance=False)[0]
    closest_node = graph_df.iloc[adress_idx].index.values[0]
    return closest_node

    

In [25]:
def open_json(file_path):
    with open(file_path) as file:
        data_dict = json.loads(file.read())
    return data_dict
    

In [26]:
def get_neighbourhood(graph):
    '''extract the lat long locations'''
    lats = [float(y) for _, y in graph.nodes(data='y')]
    lngs = [float(x) for _, x in graph.nodes(data='x')]
    return zip(lats, lngs)

In [27]:
def process_graph(graph, data_dict):
    """process a single graph, getting all adjacent nodes and 
    and  add them to a the dictionary mapping the stations"""
    
#     getting all information from the graph object
    zipper = get_neighbourhood(graph)
    nodes = list(graph.nodes)
    lat_n_longs = list(zipper)
    
#     mapping adjacent streets
    
    adj_list = list(graph.adjacency())
    for i, node in enumerate(nodes):
        data_dict['stations'][str(node)]={}
        data_dict['stations'][str(node)]['type'] = 'street'
        data_dict['stations'][str(node)]['lat'] = lat_n_longs[i][0]
        data_dict['stations'][str(node)]['lng'] = lat_n_longs[i][1]
        data_dict['stations'][str(node)]['next_stops'] = list(adj_list[i][1].keys())
    
    return

In [28]:
def process_stations(file_path, new_file,distance=100):
    """processing all stations from the input file"""
#     opening the json and converting to dict
    data_dict = open_json(file_path)
    temp_dict = {'stations':{}}
    for key in data_dict['stations'].keys():
#         creating the graph and finding the closes node to the station and appending it
        
        lat=data_dict['stations'][key]['lat']
        long=data_dict['stations'][key]['lng']
        graph=ox.core.graph_from_point((lat,long), distance=distance)
        central_node=get_central_node(graph, lat, long)
        data_dict['stations'][key]['next_stops'].append(central_node)
        
        process_graph(graph,temp_dict)    
        temp_dict['stations'][str(central_node)]['next_stops'].append(key)
        return temp_dict,central_node, graph
        
    data_dict['stations'].update(temp_dict['stations'])
    
    with open(new_file,'w') as file:
        data_dict_as_json=json.dump(data_dict,file,default=str)
            

In [76]:
def process_stations_debug(file_path, new_file,distance=100):
    """processing all stations from the input file"""
#     opening the json and converting to dict
    data_dict = open_json(file_path)
    temp_dict = {'stations':{}}
    for key in data_dict['stations'].keys():
#         creating the graph and finding the closes node to the station and appending it
        print(key)
        lat=data_dict['stations'][key]['lat']
        long=data_dict['stations'][key]['lng']
        graph=ox.core.graph_from_point((lat,long), distance=distance)
        central_node=get_central_node(graph, lat, long)
        data_dict['stations'][key]['next_stops'].append(central_node)
        
        process_graph(graph,temp_dict)    
        temp_dict['stations'][str(central_node)]['next_stops'].append(key)
        
        return temp_dict,central_node, graph
        
    data_dict['stations'].update(temp_dict['stations'])
    
    with open(new_file,'w') as file:
        data_dict_as_json=json.dump(data_dict,file,default=str)
            

In [77]:
data,node,g=process_stations_debug('data/test.json','data/test_out.json')

2094266776


In [4]:
with open ('stations-large.json') as f:
    data=f.read()

In [9]:
data=json.loads(data)

In [125]:
hops=np.random.randint(3,10)
hop=0
path=[]
station=str(2094266776)
start=node
current_node=start
while hop <= hops: 
    print(current_node)
    next_node = np.random.choice(data['stations'][str(current_node)]['next_stops'])
    while next_node == station:
        if len(data['stations'][str(current_node)]['next_stops']):
            break
        else:
            np.random.choice(data['stations'][str(current_node)]['next_stops'])
    path.append(current_node)
    current_node = next_node
    hop = hop+1

3810179831
2777153497
2777153451
2777153445
2777153497
2777153508
2777153497
3810179831


In [165]:
def prune_nodes(central_node,nodes, adj_list,lat_n_longs,n_paths=20, max_hops=20):
    start = central_node
    paths = []
    for i in range(n_paths):
        hop =0
        hops = max_hops
        current_node =start
        while hop <=hops:
            node_index = nodes.index(current_node)
            next_node = np.random.choice(list(adj_list[node_index][1].keys()))
            paths.append(current_node)
            current_node = next_node
            hop = hop+1
    set_path=set(paths)
    
    nodes_to_keep = [node for node in nodes if node in set_path]
    nodes_to_keep_indexes = [nodes.index(node) for node in nodes_to_keep]
    print(nodes_to_keep)
    pruned_adj_list = [adj_list[index] for index in nodes_to_keep_indexes]
    pruned_lat_n_longs = [lat_n_longs[index] for index in nodes_to_keep_indexes]
    
    return nodes_to_keep,pruned_adj_list,pruned_lat_n_longs
    

In [183]:
def process_graph_test(graph, data_dict, central_node, max_hops=10,n_paths=2):
    """process a single graph, getting all adjacent nodes and 
    and  add them to a the dictionary mapping the stations"""
    
#     getting all information from the graph object
    zipper = get_neighbourhood(graph)
    nodes = list(graph.nodes)
    lat_n_longs = list(zipper)
    
#     mapping adjacent streets
    
    adj_list = list(graph.adjacency())
    
    
#     pruning the nodes
    
    nodes,adj_list,lat_n_longs=prune_nodes(central_node,nodes,adj_list,lat_n_longs,n_paths,max_hops)
    
    
    for i, node in enumerate(nodes):
        data_dict['stations'][str(node)]={}
        data_dict['stations'][str(node)]['type'] = 'street'
        data_dict['stations'][str(node)]['lat'] = lat_n_longs[i][0]
        data_dict['stations'][str(node)]['lng'] = lat_n_longs[i][1]
#         data_dict['stations'][str(node)]['next_stops'] = list(adj_list[i][1].keys())
        data_dict['stations'][str(node)]['next_stops'] = [element for element in list(adj_list[i][1].keys()) if element in nodes]
        
    
    
    
    return 

In [184]:
test_data={'stations':{'2094266776': {'type': 'station',
  'next_stops': ['55189765',
   '2094266781',
   '2094266782',
   '4655287361',
   '3824719830',
   '735967926'],
  'lat': 52.5216433,
  'lng': 13.4140891}}}

In [46]:
g=ox.core.graph_from_point((52.5216433,13.4140891), distance=100)

In [47]:
central_node=get_central_node(g,52.5216433,13.4140891)

In [138]:
central_node

3810179831

In [187]:
process_graph_test(g,test_data,central_node)


[4934187009, 2777153538, 4397315896, 2095738481, 2777153441, 2777153451, 2777025998, 2777153497, 3810179814, 3810179816, 2777026033, 3810179831, 3810179832, 4934187007]


In [188]:
central_node

3810179831

In [189]:
test_data    

{'stations': {'2094266776': {'type': 'station',
   'next_stops': ['55189765',
    '2094266781',
    '2094266782',
    '4655287361',
    '3824719830',
    '735967926'],
   'lat': 52.5216433,
   'lng': 13.4140891},
  '4934187009': {'type': 'street',
   'lat': 52.5217286,
   'lng': 13.4140343,
   'next_stops': [3810179832, 3810179831, 2095738481]},
  '4917707293': {'type': 'street',
   'lat': 52.521624,
   'lng': 13.4142698,
   'next_stops': [2095738480, 5867575839]},
  '5867575839': {'type': 'street',
   'lat': 52.5215679,
   'lng': 13.4141504,
   'next_stops': [4917707293]},
  '3527851368': {'type': 'street',
   'lat': 52.520808,
   'lng': 13.4147409,
   'next_stops': [3527851370]},
  '3527851370': {'type': 'street',
   'lat': 52.5207676,
   'lng': 13.4146979,
   'next_stops': [3527851368, 3527851375]},
  '3527851375': {'type': 'street',
   'lat': 52.5209506,
   'lng': 13.4136317,
   'next_stops': [2777153445, 3527851370]},
  '2095738480': {'type': 'street',
   'lat': 52.5216604,
   'ln

In [77]:
[1,2].index(1)

0

In [78]:
del[1,2][1]

In [94]:
def prune_nodes(central_node,nodes, adj_list,lat_n_longs,n_paths=20, max_hops=20):
    start = central_node
    nodes_on_paths = []
    
    for i in range(n_paths):
        current_paths=[]
        hop =0
        hops = max_hops
        current_node = start
        while hop <= hops:
            node_index = nodes.index(current_node)
            try:
                next_node = np.random.choice(list(adj_list[node_index][1].keys()))
                current_paths.append(current_node)
                current_node = next_node
                hop = hop+1
            except:
                ValueError
            finally:
                break
        print(current_paths)
        nodes_on_paths.append(current_paths)
    
    set_path=set(inner for outer in nodes_on_paths for inner in outer)
    
    nodes_to_keep = [node for node in nodes if node in set_path]
    nodes_to_keep_indexes = [nodes.index(node) for node in nodes_to_keep]
    pruned_adj_list = [adj_list[index] for index in nodes_to_keep_indexes]
    pruned_lat_n_longs = [lat_n_longs[index] for index in nodes_to_keep_indexes]
    
    return nodes_to_keep,pruned_adj_list,pruned_lat_n_longs
    


def process_graph(graph, data_dict,central_node,n_paths=2,max_hops=20):
    """process a single graph, getting all adjacent nodes and
    and add them to a the dictionary mapping the stations"""

    #     getting all information from the graph object
    zipper = get_neighbourhood(graph)
    nodes = list(graph.nodes)
    lat_n_longs = list(zipper)
    adj_list = list(graph.adjacency())


    nodes,adj_list,lat_n_longs=prune_nodes(central_node,nodes,adj_list,lat_n_longs,n_paths,max_hops)

    #     mapping adjacent streets
    for i, node in enumerate(nodes):
        data_dict["stations"][str(node)] = {}
        data_dict["stations"][str(node)]["type"] = "street"
        data_dict["stations"][str(node)]["lat"] = lat_n_longs[i][0]
        data_dict["stations"][str(node)]["lng"] = lat_n_longs[i][1]
        next_stops = list(map(str, adj_list[i][1].keys()))
        # data_dict["stations"][str(node)]["next_stops"] = next_stops
        data_dict['stations'][str(node)]['next_stops'] = [stop for stop in next_stops if stop in nodes]x

[1, 2]


IndexError: list index out of range

In [193]:
t=[(735967926, {2938442114: {0: {'osmid': 19801554, 'oneway': True, 'lanes': '2', 'name': 'Alexanderstraße', 'highway': 'secondary', 'maxspeed': '50', 'length': 25.962}}, 768038173: {0: {'osmid': [309155392, 316092515, 718140761, 710761402, 30826268], 'oneway': True, 'lanes': ['2', '3'], 'ref': 'B 1', 'name': 'Grunerstraße', 'highway': 'primary', 'maxspeed': '50', 'length': 125.453}}})]

In [198]:
t[0][1].keys()

dict_keys([2938442114, 768038173])