In [1]:
%load_ext autoreload
%autoreload

import networkx as nx
import networkx.algorithms as algos
from networkx.algorithms import approximation
from networkTrips import organizeTrips
from timeUtils import clock, elapsed, getDateTime
from haversine import haversine
from ioUtils import loadJoblib
from geocluster import geoClusters
from geoUtils import convertMetersToLat, convertLatToMeters, convertMetersToLong, convertLongToMeters
from geoclusterUtils import genCenters, genCluster, genClusters, genTripsBetweenClusters

In [135]:
class network():
    def __init__(self, directed=True, debug=False):
        self.debug = debug
        self.directed = directed
        
        self.orderedEdges    = None
        self.edgeDict        = None
        self.orderedVertices = None
        self.nodeDict        = None
        
        if self.directed is True:
            self.g = nx.DiGraph()
        else:
            self.g = nx.Graph()
            
    def setDebug(self, debug):
        self.debug = debug
        
    def getNetwork(self):
        return self.g
    
    
    ################################################################################################
    # Vertices / Nodes / Location
    ################################################################################################    
    def showVertices(self):
        for nodename,node in self.g.nodes_iter(data=True):
            print(nodename,'\t',node)
                
    def showEdges(self):
        for edgename,edge in self.g.adj.items():
            print(edgename,'\t',edge)
                
                

        
    ################################################################################################
    # Vertices / Nodes / Location
    ################################################################################################    
    def addVertex(self, name, attrs={}):
        self.g.add_node(u=name, attr_dict=attrs)
        if self.debug:
            print("  Added node: [{0}]".format(", ".join(names)))
                    
    def updateVertexAttrs(self, attrs):
        if not isinstance(attrs, dict):
            print("Cannot add vertex attrs because the input is not a dict")
            return
        nx.set_node_attributes(G=self.g, values=attrs, name=None)
        self.orderVertices()
        
    def setNodeDict(self):
        ## There is some weirdness with the way the node attrs are initially stored
        self.nodeDict = {u: d for (u,d) in self.g.nodes(data=True)}
        self.nodeDict = {k: v[None] for k,v in self.nodeDict.items()}
        
    def orderVertices(self, metric='Centrality'):
        self.setNodeDict()
        if metric == 'Centrality':
            from networkx.algorithms import degree_centrality
            tmp = degree_centrality(self.g)
        elif metric == 'Counts':
            tmp = {u: d['N'] for (u,d) in self.g.nodes(data=True)}
        else:
            raise ValueError("Metric {0} is not used for vertex ordering".format(metric))           
        self.orderedVertices = sorted(tmp, key=tmp.get, reverse=True)
        
    def getVertices(self, ordered=True):
        if self.orderedVertices is not None:
            return self.orderedVertices
        else:
            self.orderVertices()
            return self.orderedVertices
        
    def getVertexNumByName(self, name, debug=True):
        vertexList = self.getVertices()
        try:
            vertexNum = vertexList.index(name)
        except:
            if debug:
                print("Could not get vertex number for name {0}".format(name))
            vertexNum = None
        return vertexNum
        
    def getVertexData(self, vertexNum, debug=True):
        vertexList = self.getVertices()
        if vertexNum >= len(vertexList):
            if debug:
                print("Vertex num {0} is greater than Vertex list length {1}".format(vertexNum, len(vertexList)))
            return None
        try:
            vertexName = vertexList[vertexNum]
        except:
            if debug:
                print("Could not get Vertex name from Vertex list for num {0}".format(vertexNum))
            vertexName = None
        return self.getVertexDataByName(vertexName, debug=debug)
            
    def getVertexDataByName(self, name, debug=True):
        try:
            vertexData = self.nodeDict[name]
        except:
            if debug:
                print("Could not get Vertex data for Vertex name {0}".format(name))
            vertexData = None
        return vertexData        
            
        
        
    ################################################################################################
    # Edges / Trips
    ################################################################################################    
    def addEdge(self, names, attrs={}, sort=False):
        if not isinstance(names, (tuple,list,set)):
            print("Cannot add edge {0} because the names need to come in a tuple/list/set.".format(names))
            return
        if len(names) == 2:
            if sort is True:
                names = sorted([str(x) for x in names])
            else:
                names = [str(x) for x in names]
        else:
            print("Cannot add edge {0} because we need two entries in the tuple/list/set.".format(names))
            return
        
        self.g.add_edge(u=names[0], v=names[1], attr_dict=attrs)
        if self.debug:
            print("  Added edge: [{0}]".format(", ".join(names)))
            
    def updateEdgeAttrs(self, attrs):
        if not isinstance(attrs, dict):
            print("Cannot add edge attrs because the input is not a dict")
            return
        nx.set_edge_attributes(G=self.g, values=attrs)
        self.orderEdges()
        
    def setEdgeDict(self):
        self.edgeDict = {(u, v): d for (u,v,d) in self.g.edges(data=True)}
        
    def orderEdges(self, metric='Weight'):
        self.setEdgeDict()
        tmp = {(u,v): d[metric] for (u,v,d) in self.g.edges(data=True)}
        self.orderedEdges = sorted(tmp, key=tmp.get, reverse=True)
        
    def getEdges(self, ordered=True):
        if self.orderedEdges is not None:
            return self.orderedEdges
        else:
            self.orderEdges()
            return self.orderedEdges
        
    def getEdgeNumByName(self, name, debug=True):
        edgeList = self.getEdges()
        try:
            edgeNum = edgeList.index(name)
        except:
            if debug:
                print("Could not get edge number for name {0}".format(name))
            edgeNum = None
        return edgeNum
        
    def getEdgeData(self, edgeNum, debug=True):
        edgeList = self.getEdges()
        if edgeNum >= len(edgeList):
            if debug:
                print("Edge num {0} is greater than edge list length {1}".format(edgeNum, len(edgeList)))
            return None
        try:
            edgeName = edgeList[edgeNum]
        except:
            if debug:
                print("Could not get edge name from edge list for num {0}".format(edgeNum))
            edgeName = None
        return self.getEdgeDataByName(edgeName, debug=debug)
            
    def getEdgeDataByName(self, name, debug=True):
        try:
            edgeData = self.edgeDict[name]
        except:
            if debug:
                print("Could not get edge data for edge name {0}".format(name))
                print(self.edgeDict.keys())
            edgeData = None
        return edgeData

In [136]:
class driverNetwork(network):
    def __init__(self, trips):
        network.__init__(self, directed=False, debug=False)
        if trips is not None:
            if isinstance(trips, dict):
                self.name          = trips.get('device')
                self.edgeMetrics   = trips.get('edgeMetrics')
                self.vertexMetrics = trips.get('vertexMetrics')
                self.vertexMetrics = {str(k): v for k,v in self.vertexMetrics.items()}
                self.homeMetrics   = trips.get('homeMetrics')
                print("Creating a driver network with {0} vertices and {1} edges.".format(len(self.vertexMetrics), len(self.edgeMetrics)))
            else:
                raise ValueError("Input trips must be a dictionary of edgeMetrics, vertexMetrics, and homeMetrics (optional)")
        else:
            raise ValueError("Input trips is None!")

    def create(self):
        for edgename,edgedata in self.edgeMetrics.items():
            self.addEdge(edgename, edgedata)
        self.updateVertexAttrs(self.vertexMetrics)

In [137]:
dn = driverNetwork(trips)
dn.create()
g = dn.getNetwork()

Creating a driver network with 20 vertices and 185 edges.


In [141]:
#dn.nodeDict[dn.getVertices()[0]]
#dn.getVertexData(0)
#

# Load Data

In [15]:
#######################################################################################
# Generate Clusted Data
#######################################################################################
genData = True
if genData:
    genMax  = 75
    distMax = 500
    raw  = genClusters(20, 250, latRange=[29.8, 30.2], lngRange=[49.8, 50.2], dist="gauss", maxrad=genMax)
    gc   = geoClusters(key="dummy", points=raw, distMax=distMax, debug=False)
    gc.findClusters(seedMin=2, debug=False)
    df   = genTripsBetweenClusters(n=1000, gc=gc, returnDF=True)
else:
    df   = loadJoblib("trips.p")

Selected 1000 randomized trips
Found Start/End for the 1000 randomized trips
Converting (1000, 2, 2) trips to a DataFrame


In [16]:
# Show Data (if needed)
df.head()

Unnamed: 0,lat0,long0,lat1,long1,total_miles,duration,start,end
0,29.899831,50.016455,30.082264,49.829376,27.13146,542.6292,2018-11-06 09:48:58.411831,2018-11-06 09:58:01.041031
1,29.901109,50.089031,30.059112,49.94289,22.512274,450.24547,2018-11-06 09:48:58.411831,2018-11-06 09:56:28.657301
2,30.059212,49.942921,29.947437,50.083943,18.408803,368.176064,2018-11-06 09:48:58.411831,2018-11-06 09:55:06.587895
3,30.058988,49.942687,29.813068,50.04505,29.069615,581.39229,2018-11-06 09:48:58.411831,2018-11-06 09:58:39.804121
4,29.899925,50.089815,29.838258,49.943459,15.690012,313.800248,2018-11-06 09:48:58.411831,2018-11-06 09:54:12.212079


In [17]:
#######################################################################################
# Cluster Geo Data (Lat, Long)
#######################################################################################
points         = df[["lat0", "long0"]]
points.columns = ["lat", "long"]
pnts           = df[["lat1", "long1"]]
pnts.columns   = ["lat", "long"]    
points         = points.append(pnts)



#######################################################################################
# Create Clusters
#######################################################################################
debug=True
gc   = geoClusters(key="dummy", points=points, distMax=300, debug=debug)
gc.findClusters(seedMin=2, debug=debug)
if debug:
    print("Found {0} clusters using {1} cells and {2} counts".format(gc.getNClusters(), gc.getNCells(), gc.getNCounts()))

    
    
#######################################################################################
# Set Nearest Clusters
#######################################################################################
if debug:
    start, cmt = clock("Finding Nearest Clusters for Start of Trips")
geoResults = df[['lat0', 'long0']].apply(gc.getNearestClusters, axis=1).values
df["geo0"] = [x[0] for x in geoResults]
if debug:
    elapsed(start, cmt)
    start, cmt = clock("Finding Nearest Clusters for End of Trips")
geoResults = df[['lat1', 'long1']].apply(gc.getNearestClusters, axis=1).values
df["geo1"] = [x[0] for x in geoResults]    
if debug:
    elapsed(start, cmt)

    
    
#######################################################################################
# Organize Trips for Network
#######################################################################################
trips = organizeTrips(df=df, gc=gc, debug=False, requireGood=False)

Current Time is Tue Nov 06, 2018 09:49:17 for Converting 2000 Points To Correct Format
Data has correct format with a (2000, 2) shape.
Current Time is Tue Nov 06, 2018 09:49:17 for Done with Converting 2000 Points To Correct Format
Process [Done with Converting 2000 Points To Correct Format] took 0 seconds.
Current Time is Tue Nov 06, 2018 09:49:17 for Finding Geohash (BitLen=8) Values from 2000 Points
Current Time is Tue Nov 06, 2018 09:49:17 for Done with Finding Geohash (BitLen=8) Values from 2000 Points
Process [Done with Finding Geohash (BitLen=8) Values from 2000 Points] took 0 seconds.
Current Time is Tue Nov 06, 2018 09:49:17 for Finding Geohash (BitLen=8) Frequency Values from Geohash DataFrame
Current Time is Tue Nov 06, 2018 09:49:17 for Done with Finding Geohash (BitLen=8) Frequency Values from Geohash DataFrame
Process [Done with Finding Geohash (BitLen=8) Frequency Values from Geohash DataFrame] took 0 seconds.
Current Time is Tue Nov 06, 2018 09:49:17 for Finding Cluster

In [None]:
# Show data frame (if needed)
from pandasUtils import castDateTime
df['start'] = castDateTime(df['start'])
df['end'] = castDateTime(df['end'])
df.head()

# Run the Network

In [None]:
%load_ext autoreload
%autoreload

from networkTrips import organizeTrips

In [None]:
x = {1: "hoi", 2: "test"}

In [None]:
x

In [None]:
x = {str(k): v for k,v in x.items()}

In [None]:
x