#### Introduction as to why the notebook
Notebook created to work with redmond.json file and to check whether:  
1. There are any ways that are not connected to any other ways (single roads)
2. All the sidewalks are connected (Are there more than one component in the graph)
3. Find out the cardinality of the graph

In [1]:
import numpy as np
import os
import pandas as pd
import json
from glob import glob
import networkx as nx

#### Supporting functions for the main method

##### Get all the ways from the given input file
The input to this function is a list of features from the input file.  
We check for only sidewalk data in this

In [2]:
def get_ways(features_list):
    '''
    Returns list of list of coordinates.
    Outer list is the element
    Inner list is the coordinates within an element
    '''
    coord_list = []
    for elem in features_list:
        if(elem['properties']['footway'] == 'sidewalk'):
            coord_list.append(elem['geometry']['coordinates'])
    return coord_list

##### Make a dictionary of the coordinates and the ways in which they belong to
Helpful later for drawing the graph and to do analysis on top of it.

In [3]:
def get_coord_dict(coord_list):
    '''
    Returns dictionary of coordinate
    keys : unique coordinates
    values : ways to which the coord belongs
    '''
    coord_dict = dict()
    for id, elem in enumerate(coord_list):
        for point in elem:
            if(str(point) not in coord_dict.keys()):
                coord_dict[str(point)] = [id]
            else:
                coord_dict[str(point)].append(id)
    return coord_dict

In [4]:
def get_coord_df(coord_list):
    '''
    Return df with two columns: origin and dest
    ******** Note to Rakesh : This to be used for connected components in networkx
    '''
    data = {'origin':[], 'dest': []}
    df = pd.DataFrame(data)
    for elem in coord_list:
        # df = df.append([str(elem[0]), str(elem[-1])])
        df = df.append({'origin':str(elem[0]), 'dest':str(elem[-1])}, ignore_index=True)
    return df

##### Get discarded ways - ways that are not connected to any other way
The input to this function is a list of coordinates and the dictionary of coordinates.
The idea is to check  
- for a node in the list, check the length of the "value" of that _node_ as "key" in the dictionary
- if the length > 1, it means the node is present in at least 2 ways and this way is safe
- only if all the nodes in a way have length 1 in the dictionary, then it is not connected to any other way

In [5]:
def get_disc_way_ids(coord_list, coord_dict):
    '''
    Get way IDs to be discared (ways that are not part of any other ways)
    '''
    disc_way_ids = []
    for id, elem in enumerate(coord_list):
        ctr = 0
        for point in elem:
            if len(coord_dict[str(point)]) > 1:
                break
            else:
                ctr += 1
        if ctr == len(elem):
            disc_way_ids.append(id)
    return disc_way_ids

##### Get Way IDs from the subgraph - 
The parameters for this function are a _subgraph_ and _ways_. Given a subgraph, we need to traceback the ways this graph is made of. 

In [6]:
def get_way_from_subgraph(sgraph, df):
    '''
    Networkx gives subgraphs. This function is to infer the 'way id' from the given subgraph.
    Args : subgraph and df
    Retruns : set of ways that belong to subgraph
    '''   
    way_set = set()
    for ind, edge in enumerate(sgraph.edges):        
        #Coordinates can be interchanged between origin and dest. Hence checking both ways
        #either s1 or s2 gets a value which is index of the way from df, the given edge belongs to        
        s1 = pd.Series((df['origin'] == edge[-1]) & (df['dest'] == edge[0]))
        s2 = pd.Series((df['origin'] == edge[0]) & (df['dest'] == edge[-1]))        
        s = s1[s1].index.tolist() + s2[s2].index.tolist()
        way_set.update(s)        
    return list(way_set)

#### Main Function Steps
1. Get all the ways in the file
2. Check which ways are alone (Not connected to any other way) and separate them out
3. Of the remaining ways (that are connected to at least one another way), draw a graph
4. Check how many subgraphs (Components) are there in the whole network
5. Get the cardinality of the components (# of ways in those components and the way id)

In [7]:
if __name__ == '__main__':
    path = "OSW\TestData" # Changed the path here to a relative one
    os.chdir(path)
    json_files = glob("redmond.geojson")
    print("Number of json files :", len(json_files))
    
    for ind, file in enumerate(json_files):
        print('-'*10)
        print('Processing File : {}'.format(file))
        with open(file) as data_json:
            data_dict = json.load(data_json)
    
        Ways = get_ways(data_dict['features'])
        print("Number of sidewalks in this file : ", len(Ways))
        
        coord_dict = get_coord_dict(Ways)
        #print("Coordinates Dictionary : \n", coord_dict)
        
        df = get_coord_df(Ways)
        #print("DataFrame : \n{}".format(df))
        
        disc_way_ids = get_disc_way_ids(Ways, coord_dict)
        #print(coord_dict.items())  
        #print("Number of isolated ways: ", len(disc_way_ids))

Number of json files : 1
----------
Processing File : redmond.geojson
Number of ways in this file :  2462


In [8]:
print("Number of isolated sidewalks: ", len(disc_way_ids))

Number of isolated sidewalks:  73


In [10]:
Connected_Walks = Ways.copy()
for x in sorted(disc_way_ids, reverse = True):  
    del Connected_Walks[x] 
    
#printing modified list
print("Number of sidewalks in the file : ", len(Ways))
print("Number of isolated sidewalks: ", len(disc_way_ids))
print("Number of Connected sidewalks in the file : ", len(Connected_Walks))

Number of sidewalks in the file :  2462
Number of isolated sidewalks:  73
Number of Connected sidewalks in the file :  2389


Connected_Ways now have all the ways that are connected to atleast one another way

In [11]:
Connected_df = get_coord_df(Connected_Walks)
Connected_FG = nx.from_pandas_edgelist(Connected_df, source='origin', target='dest')
print("Is the entire graph connected : ", nx.is_connected(Connected_FG))
print("Number of subgraphs : ", nx.number_connected_components(Connected_FG))
#cc = nx.connected_components(Connected_FG)
#for x in cc:
    #print(len(x))

Is the entire graph connected :  False
Number of subgraphs :  324


We can get all the subgraphs and their information into a list

In [12]:
S = [Connected_FG.subgraph(c).copy() for c in nx.connected_components(Connected_FG)]

Given a subgraph, get the ways (IDs) that are part of it

In [13]:
way_set = get_way_from_subgraph(S[1], Connected_df)
print(way_set)

[1, 2083, 2053, 2054, 2224, 2032, 2227, 340, 2043, 2044, 2045, 2046]


#### Analysis data

##### Number of coordinates in ways

In [None]:
Coordinates_in_ways = []
for x in range(0,len(Ways)):
    Coordinates_in_ways.append(len(Ways[x]))
freq = {} 
for item in Coordinates_in_ways: 
    if (item in freq): 
        freq[item] += 1
    else: 
        freq[item] = 1

for key, value in freq.items(): 
    print ("% d : % d"%(key, value)) 

##### Number of ways at a coordinate

In [None]:
ways_at_coordinates = []
for x in coord_dict:
    ways_at_coordinates.append((len(coord_dict[x])))
ways_at_coordinates
freq = {} 
for item in ways_at_coordinates: 
    if (item in freq): 
        freq[item] += 1
    else: 
        freq[item] = 1

for key, value in freq.items(): 
    print ("% d : % d"%(key, value)) 

##### Discard this - not needed.
This is to encode the nodes to a numeric value for drawing the graph using Networkx
Created to overcome a simple error while drawing graph using networkx  
_Edge_attr = True_ does not need to be present. 

In [None]:
ddata = {'origin':[], 'dest': [], 'source':[], 'destination':[]}
ddf = pd.DataFrame(ddata)
Encoding = {}
n = 0
src_val, dest_val = 0,0
for elem in range(0,len(Ways)):
    
    if (str(Ways[elem][0]) not in Encoding.keys()):
        Encoding[str(Ways[elem][0])] = n
        src_val = n
        n = n+1
    else:
        src_val = Encoding[str(Ways[elem][0])]
    
    if (str(Ways[elem][-1]) not in Encoding.keys()):
        Encoding[str(Ways[elem][-1])] = n
        dest_val = n
        n = n+1
    else:
        dest_val = Encoding[str(Ways[elem][-1])]
    
    ddf = ddf.append({'origin':str(Ways[elem][0]), 'dest':str(Ways[elem][-1]), 'source':src_val, 'destination':dest_val}, ignore_index=True)
#type(Ways)
ddf

In [None]:
import networkx as nx
FG = nx.from_pandas_edgelist(ddf, source='source', target='destination', edge_attr=True)
nx.draw_networkx(FG, with_labels=True)

In [None]:
print(nx.is_connected(FG))
print(nx.number_connected_components(FG))
cc = nx.connected_components(FG)
#for x in cc:
    #print(x)
    #print(len(x))