In [46]:
import pandas as pd
import numpy as np
from collections import deque
import json

Load and preprocess the data table.

In [47]:
location=pd.read_csv('data/EDUCATIONAL_LOCATIONS.csv')
location.dropna(subset=['POINT_X','POINT_Y','OBJECTID','Site_Zipcode','Mail_Zipcode'],inplace=True)

In [48]:
idList=location['OBJECTID'].astype(int).tolist()
siteList=location['Site_Zipcode'].astype(int).tolist()
mailList=location['Mail_Zipcode'].astype(int).tolist()
graphList={}
zipcodeSet=set()
locSet=set()

Set up the graph.

In [49]:
for i in range (len(idList)):
    if siteList[i] not in zipcodeSet:
        graphList[siteList[i]]=set()
        zipcodeSet.add(siteList[i])
    graphList[siteList[i]].add(idList[i])
    if mailList[i] not in zipcodeSet:
        graphList[mailList[i]]=set()
        zipcodeSet.add(mailList[i])
    graphList[mailList[i]].add(idList[i])
    graphList[idList[i]]={siteList[i],mailList[i]}
    locSet.add(idList[i])

Export the graph to a file.

In [50]:
graphAll={}
graphAll['graph']={key:list(value) for key,value in graphList.items()}
graphAll['zipcode']=list(zipcodeSet)
graphAll['location']=list(locSet)
with open('data/cache_graph.json','w') as f:
    json.dump(graphAll,f)

Function: load the grah from a file.

In [51]:
def loadGraph(path):
    '''
    load the graph fromjson file

    Parameters
    ----------
    path : str
        the file path

    Returns
    ----------
    graphSet : dict
        the graph content
    graphKey : set
        the set of zipcode
    graphLoc : set
        the set of location id

    '''
    with open(path) as f:
        graph=json.load(f)
    graphVal=graph['graph']
    graphKey=set(graph['zipcode'])
    graphLoc=set(graph['location'])
    graphSet={key:set(value) for key,value in graphVal.items()}
    return graphSet,graphKey,graphLoc

In [52]:
def BFS(graph, A,locs):
    '''
    use BFS to get the distance from A to all points

    Parameters
    ----------
    graph : dict
        the dictionary of nodes and their adjacent nodes
    locs : set
        a set of all nodes currently in graph
    A : int
        the starter

    Returns
    ----------
    dist : dict
        the dictionary of distance from starter A to other nodes

    '''
    dist={key:-1 for key in locs.keys()}
    dist[A]=0
    visited=set()
    queue=deque()
    visited.add(A)
    queue.append(A)
    while queue:
        curr=queue.popleft()
        for ele in graph[curr]:
            if ele not in visited:
                dist[ele]=dist[curr]+1
                queue.append(ele)
                visited.add(ele)

    return dist