In [18]:
import numpy as np
import networkx as nx
import os

# Call this function to get the data 
def get_data(city='Detroit', mode='bike', root_path='../data/'):
    folders = os.listdir(root_path)
    for folder in folders:
        if folder == city:
            files = os.listdir(root_path+folder)
            bike_data = [file for file in files if mode in file][0]
            path = root_path + folder+ '/'+ bike_data;
    return nx.read_graphml(path)

G = get_data(city='Amsterdam')
print(type(G))

<class 'networkx.classes.multidigraph.MultiDiGraph'>


#### Define helper functions to compute metrics

In [9]:
def convert_lengths_to_floats(G):
    """
    Changes the attributes of the edges of graph G such that the 'length' field
    is a float rather than a string
    """
    for u, v, d in list(G.edges(data=True)):
        d['length'] = float(d['length'])

def get_edge_length(G, u, v):
    """
    given two nodes in graph G, return the straight line distance 
    (as the crow flies) between them
    """
    xdist = float(G.nodes[u]['x']) - float(G.nodes[v]['x'])
    ydist = float(G.nodes[u]['y']) - float(G.nodes[v]['y'])
    length = (xdist**2 + ydist**2)**0.5
    return length

def make_new_random_edge(G, u=None):
    """
    Return a tuple of nodes representing a random new and unique edge
    """
    if not u:
        u = np.random.choice(G.nodes(), 1)
        u = u[0]
    
    available = [node for node in G.nodes() if node not in G[u]]
    
    # choose v if not specified
    v = np.random.choice(available, 1)
    v = v[0]
    return (u, v)

    
def calc_directness(G, u, v):
    """
    Calculate the directness between two nodes u and v
    Directness is the ratio between:
        - the shortest node path length (number of edges times the length of each edge)
        - the straight line distance between those two nodes
    """
    path_length = nx.shortest_path_length(G, source=u, target=v, weight='length')
    direct_length = get_edge_length(G, u, v)
    return direct_length/path_length

def calc_avg_directness(G, n):
    """
        Given a weakly connected directed graph component, G
        Pick n pairs of random nodes, and caluclate the average directness
    """
    d = 0
    for _ in range(n):
        start = np.random.choice(G.nodes(), 1)
        start = start[0]
        reachable = nx.descendants(G, start)
        if not reachable:
            continue
        end = np.random.choice(list(reachable), 1)
        end = end[0]

        d += (calc_directness(G, start, end) / n);
    return d

def euclidean_dist_vec(y1, x1, y2, x2):
    '''
    Calculate the euclidean distance between two points.
    '''
    distance = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
    return distance


#### Define Methods for connecting the Graphs

In [10]:
def L2S(wcc):
    '''
    Find the closest pair of nodes between two different connected components.
    ---
    wcc: list connected components

    returns: dict nodes i and j and distance
    '''
    closest_pair = {'i': 0, 'j': 0, 'dist': np.inf}
    for i in wcc[0].nodes(data=True):
        i_coord = (i[1]['y'], i[1]['x'])
        for j in wcc[1].nodes(data=True):
            j_coord = (j[1]['y'], j[1]['x'])
            dist = euclidean_dist_vec(i_coord[0], i_coord[1], j_coord[0], j_coord[1])
            if dist < closest_pair['dist']:
                closest_pair['i'] = i[0]
                closest_pair['j'] = j[0]
                closest_pair['dist'] = dist
    return closest_pair

def L2C(wcc):
    pass

def R2C(wcc):
    pass

def Cloest(wcc):
    pass

In [20]:
convert_lengths_to_floats(G)
# Separate the graph into components
wcc = [cc for cc in nx.weakly_connected_component_subgraphs(G)]
wcc.sort(key=len, reverse=True)

ds = []
for component in wcc:
    num_edges = len(component.edges())
    print(num_edges**0.7)
    ds.append(calc_avg_directness(component, int(num_edges**0.7)))
ds

1542.9889316429894
537.1604048840761
407.45050160792084
82.20897432444137
55.706800523621254
51.64289998966078
44.450423945260695
42.223429631135296
31.471349101673205
18.57972790794881
26.851891249034473
26.851891249034473
27.192717810640893
25.818044053663876
24.766135972560896
19.372887813476257
15.026881656708994
23.332955706220968
14.363119032269166
22.232868548954176
21.486367070547505
13.685935953338415
16.106200780469578
18.379173679952558
12.046132367247342
16.73908130811767
10.304113218507691
15.892870350080608
14.363119032269166
13.91322853856461
14.807048556237302
15.462474735549584
14.58580765539925
13.685935953338415
14.58580765539925
13.685935953338415
13.685935953338415
9.25013070082624
8.141810630738087
9.25013070082624
7.85466234994081
9.783209271758404
12.286035066475314
11.804164196559913
11.804164196559913
10.560356962676234
11.31370849898476
7.56294171712541
10.304113218507691
6.656775051475125
10.813962975130146
10.560356962676234
10.304113218507691
10.3041132185

[0.6664898420626921,
 0.6970381129651153,
 0.5945987602077466,
 0.7966443182097342,
 0.913935680936523,
 0.8426513330202036,
 0.7271056269791917,
 0.9303508477220322,
 0.9174241313751663,
 0.6425619532590084,
 0.8707012645948882,
 0.8911927674047524,
 0.6879188816747258,
 0.8623737597117307,
 0.8600089118225679,
 0.8288333160947053,
 0.9784919116847259,
 0.8720204477088336,
 0.37727437557361176,
 0.8423140427595599,
 0.9654973163968246,
 0.9214759322086172,
 0.8969676562278696,
 0.9877107863404373,
 0.8860750785707527,
 0.7861441407780793,
 0.5403492053454014,
 0.9958611415435674,
 0.9856087672471072,
 0.9420853611398186,
 0.8263460176978227,
 0.8712042138645373,
 0.9582628062560652,
 0.8247859363860923,
 0.9310518322688068,
 0.8879244149178426,
 0.9230586312009708,
 0.9251662827481771,
 0.8522993284991589,
 0.5438303710765897,
 0.8362033544558698,
 0.7723212650656464,
 0.8831116053572506,
 0.9915166348552603,
 0.9798828064335937,
 0.9076709350984559,
 0.949175775033624,
 0.99438149174

In [None]:
def new_city(wcc, algy):
    """
    wcc = Sorted list of weakly connected subgraphs
    algy = desired path-adding algorithm
    
    """
    
    runs = len(wcc) - 1
    added_edges = []
    tot_length = 0
    for i in range(runs):
        added_edge = algy(wcc[i:])
        edge = (added_edge['i'],added_edge['j'],added_edge['dist'])
        added_edges.append(edge)
        tot_length = tot_length+edge[2]
    return added_edges, tot_length     