In [1]:
import numpy as np
import networkx as nx
import os

# Call this function to get the data 
def get_data(city='Detroit', mode='bike', root_path='../data/'):
    folders = os.listdir(root_path)
    for folder in folders:
        if folder == city:
            files = os.listdir(root_path+folder)
            bike_data = [file for file in files if mode in file][0]
            path = root_path + folder+ '/'+ bike_data;
    return nx.read_graphml(path)

G = get_data()
print(type(G))

<class 'networkx.classes.digraph.DiGraph'>


#### Define helper functions to compute metrics

In [2]:
def convert_lengths_to_floats(G):
    """
    Changes the attributes of the edges of graph G such that the 'length' field
    is a float rather than a string
    """
    for u, v, d in list(G.edges(data=True)):
        d['length'] = float(d['length'])

def get_edge_length(G, u, v):
    """
    given two nodes in graph G, return the straight line distance 
    (as the crow flies) between them
    """
    xdist = float(G.nodes[u]['x']) - float(G.nodes[v]['x'])
    ydist = float(G.nodes[u]['y']) - float(G.nodes[v]['y'])
    length = (xdist**2 + ydist**2)**0.5
    return length

def make_new_random_edge(G, u=None):
    """
    Return a tuple of nodes representing a random new and unique edge
    """
    if not u:
        u = np.random.choice(G.nodes(), 1)
        u = u[0]
    
    available = [node for node in G.nodes() if node not in G[u]]
    
    # choose v if not specified
    v = np.random.choice(available, 1)
    v = v[0]
    return (u, v)

    
def calc_directness(G, u, v):
    """
        Calculate the directness between two nodes u and v
        Directness is the ratio between:
            - the shortest node path length (number of edges times the length of each edge)
            - the straight line distance between those two nodes
        If v cannot be reached from u, then directness is zero
        
        G: An nx graph
        u: the starting node
        v: the ending node
        
    """
    try:
        path_length = nx.shortest_path_length(G, source=u, target=v, weight='length')
        direct_length = new_edge_length(G, u, v)
        ret = direct_length/path_length
    except:
        return 0

    return ret


def calc_avg_directness(G, pairs):
    """
    Calculates the average directness of n pairs of nodes. All of these nodes are already connected.
    G: An nx graph
    n: The number of pairs to use
    """
    d = 0
    n = len(pairs)
    for pair in pairs:
        d += calc_directness(G, pair[0], pair[1]) / n

    return d
        

def get_components(G):
    '''
    Get the connected components of G
    G: An nx graph
    
    Outputs
    wcc: A list of the components sorted by size
    '''
    wcc = [cc for cc in nx.weakly_connected_component_subgraphs(G)]
    wcc.sort(key=len, reverse=True)
    return wcc

def calc_lcc(G):
    '''
    Returns the number of nodes in the largest component of graph G
    G: An nx graph
    '''
    wcc = get_components(G)
    return len(wcc[0])

def euclidean_dist_vec(y1, x1, y2, x2):
    '''
    Calculate the euclidean distance between two points.
    '''
    distance = ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
    return distance

def connectedness(G):
    N = len(G)
    wcc = get_components(G)
    largest = wcc[0]
    N_ = len(largest)
    return N_/N

def make_random_pairs(G_bike, G_car, n = 250):
    '''
    Creates n pairs of random nodes from graph G
    G: an nx graph
    n: the number of pairs to return
    '''
    bike_pairs = []
    car_pairs = []
    for _ in range(n):
        bike_pair = np.random.choice(G_bike.nodes(), 2)
        b1 = G_bike.nodes[bike_pair[0]]
        b2 = G_bike.nodes[bike_pair[1]]
        bike_pairs.append(bike_pair)
                

        u = ox.get_nearest_node(G_car, (b1['x'], b1['y']))
        v = ox.get_nearest_node(G_car, (b2['x'], b2['y']))
        car_pairs.append((u,v))
    return bike_pairs, car_pairs

#### Define Methods for connecting the Graphs

In [3]:
def L2S(wcc):
    '''
    Find the closest pair of nodes between two different connected components.
    ---
    wcc: list connected components

    returns: dict nodes i and j and distance
    '''
    closest_pair = {'i': 0, 'j': 0, 'dist': np.inf}
    for i in wcc[0].nodes(data=True):
        i_coord = (i[1]['y'], i[1]['x'])
        for j in wcc[1].nodes(data=True):
            j_coord = (j[1]['y'], j[1]['x'])
            dist = euclidean_dist_vec(i_coord[0], i_coord[1], j_coord[0], j_coord[1])
            if dist < closest_pair['dist']:
                closest_pair['i'] = i[0]
                closest_pair['j'] = j[0]
                closest_pair['dist'] = dist
    return closest_pair

def L2C(wcc):
    closest_pair = {'i': 0, 'j': 0, 'dist': np.inf}
    for i in wcc[0].nodes(data=True):
        i_coord = (i[1]['y'], i[1]['x'])
        for j in wcc[1:]:
            for k in j.nodes(data=True):
                j_coord = (k[1]['y'], k[1]['x'])
                dist = euclidean_dist_vec(float(i_coord[0]), float(i_coord[1]), float(j_coord[0]), float(j_coord[1]))
                if dist < closest_pair['dist']:
                    closest_pair['i'] = i[0]
                    closest_pair['j'] = k[0]
                    closest_pair['dist'] = dist
    return closest_pair

def R2C(wcc):
    '''
    Find the closest pair of nodes between two different connected components.
    ---
    wcc: list connected components

    returns: dict nodes i and j and distance
    '''
    closest_pair = {'i': 0, 'j': 0, 'dist': np.inf}
    num_clusters = len(wcc)
    cluster = np.random.choice(len(wcc))
    for i in wcc[cluster].nodes(data=True):
        i_coord = (i[1]['y'], i[1]['x'])
        for w,j in enumerate(wcc[1:]):
            if w == cluster - 1:
                break
            for k in j.nodes(data=True):
                j_coord = (k[1]['y'], k[1]['x'])
                dist = euclidean_dist_vec(float(i_coord[0]), float(i_coord[1]), float(j_coord[0]), float(j_coord[1]))
                if dist < closest_pair['dist']:
                    closest_pair['i'] = i[0]
                    closest_pair['j'] = k[0]
                    closest_pair['dist'] = dist
    return closest_pair

def Closest(wcc):
    closest_pair = {'i': 0, 'j': 0, 'dist': np.inf}
    for v,i in enumerate(wcc[0:]):
            for u in i.nodes(data=True):
                i_coord = (u[1]['y'], u[1]['x'])
                for w,j in enumerate(wcc[0:]):
                    if w == v:
                        break
                    for k in j.nodes(data=True):
                        j_coord = (k[1]['y'], k[1]['x'])
                        dist = euclidean_dist_vec(float(i_coord[0]), float(i_coord[1]), float(j_coord[0]), float(j_coord[1]))
                        if dist < closest_pair['dist']:
                            closest_pair['i'] = u[0]
                            closest_pair['j'] = k[0]
                            closest_pair['dist'] = dist
    return closest_pair


In [4]:
def new_city(G, algy, length, random_pairs):
    """
    wcc = Sorted list of weakly connected subgraphs
    algy = desired path-adding algorithm
    
    """
    added_edges = []
    tot_length = [0]
    directness_random = []
    lccs = []
    connectedness_rank  =[]
    i = 0
    while sum(tot_length) < length:
        wcc = get_components(G)
        print(sum(tot_length))
        lccs.append(len(wcc[0]))
        directness_random.append(calc_avg_directness(G, random_pairs))
        
        connected_rank = connectedness_rank.append(connectedness(G))
        added_edge = algy(wcc)
        edge = (added_edge['i'],added_edge['j'],added_edge['dist'])
        G.add_edge(edge[0], edge[1], length = edge[2])
        added_edges.append(edge)
        tot_length.append(edge[2])
        
        
        i = i+1
    wcc = get_components(G)
    lccs.append(len(wcc[0]))
        directness_random.append(calc_avg_directness(G, random_pairs))

    connected_rank = connectedness_rank.append(connectedness(G))
    return G, added_edges, tot_length, directness_random, lccs, connectedness_rank

IndentationError: unexpected indent (<ipython-input-4-6ba3198e81f0>, line 30)

In [None]:
name = 'Detroit'
G_bike = get_data(name)
G_car = get_data(name, mode='drive')

len(G)
bike_pairs, car_pairs = make_random_pairs(G_bikes, G_cars, n=500)

G_new, edges, lengths, directness, lccs, connectedness_rank = new_city(G, L2S, 35000, bike_pairs)