In [1]:
import numpy as np
import networkx as nx
import os

# Call this function to get the data 
def get_data(city='Detroit', mode='bike', root_path='../data/'):
    folders = os.listdir(root_path)
    for folder in folders:
        if folder == city:
            files = os.listdir(root_path+folder)
            bike_data = [file for file in files if mode in file][0]
            path = root_path + folder+ '/'+ bike_data;
    return nx.read_graphml(path)

G = get_data()
print(type(G))

<class 'networkx.classes.digraph.DiGraph'>


In [21]:
# for n, d in list(G.nodes(data=True)):
#     print(d)
len(G.nodes())

3663

In [20]:
# print(G.edges(('6316199', '1360288038'), data=True))

for u, v, d in list(G.edges(data=True)):
    d['length'] = float(d['length'])

# G.edges(('6316199', '1360288038'), data=True)

In [9]:
def pick_random(G, num=2):
    nodes = np.random.choice(G.nodes(), num)
    return nodes

def compute_path_lengths(num=1000):
    failed = 0
    path_length = 0
    for i in range(num): 
        u,v = pick_random(G)
        try: 
            path_length += nx.shortest_path_length(G, source=u, target=v, weight='length')
        except:
            failed += 1
    return failed/num, path_length
        

In [10]:
num = len(G.nodes()) // 10
failed, path_length = compute_path_lengths(num)
print(failed, path_length/(num*(1-failed)))


ZeroDivisionError: float division by zero

In [21]:
def new_edge_length(G, u=None, v=None):
    """
        given two nodes in graph G, return the straight line distance (as the crow flies) between them
    """
    if not u and not v:
        # randomly choose u and v
        pass
        u, v = np.random.choice(G.nodes(), 2)
        while v in G[u]:
            u,v = np.random.choice(G.nodes(), 2)
            
    elif not u and v:
        # randomly choose u
        pass
        u = pick_random(G, 1)
        while v in G[u]:
            u = np.random.choice(G.nodes(), 1)
            
    elif u and not v:
        pass
        # randomly choose v
        v = pick_random(G, 1)
        while v in G[u]:
            u = np.random.choice(G.nodes(), 1)
    
    xdist = float(G.nodes[u]['x']) - float(G.nodes[v]['x'])
    ydist = float(G.nodes[u]['y']) - float(G.nodes[v]['y'])
    length = (xdist**2 + ydist**2)**0.5
    return length

def calc_directness(G, u, v):
    """
        Calculate the directness between two nodes u and v
        Directness is the ratio between:
            - the shortest node path length (number of edges times the length of each edge)
            - the straight line distance between those two nodes
        If v cannot be reached from u, then directness is zero
        
        G: An nx graph
        u: the starting node
        v: the ending node
        
    """
    try:
        path_length = nx.shortest_path_length(G, source=u, target=v, weight='length')
        direct_length = new_edge_length(G, u, v)
        ret = direct_length/path_length
        return ret
    except:
        return 0

    


In [24]:
wcc = [cc for cc in nx.weakly_connected_component_subgraphs(G)]
wcc.sort(key=len, reverse=True)
G2 = wcc[0]
d = []
n = 100
# pick 1000 random edges from the largest component and calculate the average directness d (NEEDS WORK)

def calc_avg_directness_connected(G, n):
    """
    Calculates the average directness of n pairs of nodes. All of these nodes are already connected.
    G: An nx graph
    n: The number of pairs to use
    """
    d = 0
    for _ in range(n):
        start = np.random.choice(G.nodes(), 1)
        start = start[0]
        reachable = nx.descendants(G, start)
        if not reachable:
            continue
        end = np.random.choice(list(reachable), 1)
        end = end[0]
        
        direct = calc_directness(G, start, end)
        d += (direct / n);
    return d

def calc_avg_directness_random(G, n):
    """
    Calculates the average directness of n pairs of nodes. These nodes are chosen at random. 
    If they are not connected, the directness is zero
    
    G: An nx graph
    n: the number of pairs to use
    """
    d = 0
    for _ in range(n):
        pairs = np.random.choice(G.nodes(), 2)
        d += calc_directness(G, pairs[0], pairs[1]) / n
    return d
        

def get_components(G):
    '''
    Get the connected components of G
    G: An nx graph
    
    Outputs
    wcc: A list of the components sorted by size
    '''
    wcc = [cc for cc in nx.weakly_connected_component_subgraphs(G)]
    wcc.sort(key=len, reverse=True)
    return wcc

def calc_lcc(G):
    '''
    Returns the number of nodes in the largest component of graph G
    G: An nx graph
    '''
    wcc = get_components(G)
    return len(wcc[0])

In [26]:
calc_avg_directness_random(G, 100)

0.0823985730572991

In [17]:
def L2C(wcc):
    '''
    Find the closest pair of nodes between two different connected components.
    ---
    wcc: list connected components

    returns: dict nodes i and j and distance
    '''
    closest_pair = {'i': 0, 'j': 0, 'dist': np.inf}
    for i in wcc[0].nodes(data=True):
        i_coord = (i[1]['y'], i[1]['x'])
        for j in wcc[1:]:
            for k in j.nodes(data=True):
                j_coord = (k[1]['y'], k[1]['x'])
                dist = euclidean_dist_vec(float(i_coord[0]), float(i_coord[1]), float(j_coord[0]), float(j_coord[1]))
                if dist < closest_pair['dist']:
                    closest_pair['i'] = i[0]
                    closest_pair['j'] = k[0]
                    closest_pair['dist'] = dist
    return closest_pair


In [18]:
def new_city(G, algy):
    """
    wcc = Sorted list of weakly connected subgraphs
    algy = desired path-adding algorithm
    
    """
    wcc = [cc for cc in nx.weakly_connected_component_subgraphs(G)]
    wcc.sort(key=len, reverse=True)
    runs = len(wcc) - 1
    added_edges = []
    tot_length = []
    for i in range(runs):
        added_edge = algy(wcc[i:])
        edge = (added_edge['i'],added_edge['j'],added_edge['dist'])
        added_edges.append(edge)
        tot_length.append(edge[2])
        wcc = [cc for cc in nx.weakly_connected_component_subgraphs(G)]
        wcc.sort(key=len, reverse=True)
        
    return added_edges, tot_length   

In [19]:
new_city(G, L2C)

NameError: name 'euclidean_dist_vec' is not defined