In [1]:
%load_ext autoreload
%autoreload

import networkx as nx
import networkx.algorithms as algos
from networkx.algorithms import approximation
from networkTrips import organizeTrips
from networkAlgos import networkAlgos
from timeUtils import clock, elapsed, getDateTime
from collections import Counter
from haversine import haversine
from ioUtils import loadJoblib
from pandasUtils import getRowData, getColData, dropColumns
from networkAlgos import networkAlgos
from edgeInfo import edgeInfo
from vertexInfo import vertexInfo
from networkCategories import categories
from place import getPlaceData
from cbsa import getCBSAData
from csa import getCSAData
from metdiv import getMetDivData
from county import getCountyData
from state import getStateData
from geocluster import geoClusters
from geoUtils import convertMetersToLat, convertLatToMeters, convertMetersToLong, convertLongToMeters
from geoclusterUtils import genCenters, genCluster, genClusters, genTripsBetweenClusters

_, _ = clock("Last Run")

Current Time is Wed Nov 28, 2018 16:30:30 for Last Run


In [6]:
#######################################################################################
# Generate Clusted Data
#######################################################################################
genData = False
if genData:
    cls     = 20
    total   = 500
    genMax  = 75
    distMax = 500
    raw  = genClusters(cls, 250, latRange=[29.8, 30.2], lngRange=[49.8, 50.2], dist="gauss", maxrad=genMax)
    gc   = geoClusters(key="dummy", points=raw, distMax=distMax, debug=False)
    gc.findClusters(seedMin=2, debug=False)
    df   = genTripsBetweenClusters(n=total, gc=gc, returnDF=True)
    df["device"] = "dummy"    
    
    tmpdf = loadJoblib("/Users/tgadfort/Downloads/r4hIDs.p").sample(n=total, replace=True)
    tojoin = tmpdf.sample(cls)
    tojoin["cl"] = ["cl{0}".format(x) for x in range(cls)]

    df['cl'] = df['cl0']
    drops = [x for x in tojoin.columns if x.startswith("Geo1")]
    tojoinCL0 = dropColumns(tojoin, columns=drops, inplace=False)
    test = df.merge(tojoinCL0, on='cl')

    test['cl'] = test['cl1']
    drops = [x for x in tojoin.columns if x.startswith("Geo0")]
    tojoinCL1 = dropColumns(tojoin, columns=drops, inplace=False)
    test = test.merge(tojoinCL1, on='cl')

    gpsdata = test
    dropColumns(gpsdata, columns=["cl", "cl0", "cl1"])
    gpsdata.replace('nan', 0, inplace=True)
else:
    fname = "/Users/tgadf/Downloads/gpsTripsOakRidge.p"
    print("Loading {0}".format(fname))
    gpsdata = loadJoblib(fname)    

_, _ = clock("Last Run")

Loading /Users/tgadf/Downloads/gpsTripsOakRidge.p
Current Time is Wed Nov 28, 2018 16:33:36 for Last Run


## Show Data (if needed)

In [7]:
gpsdata.head()

Unnamed: 0,device,Start,End,total_miles,heading0,lat0,long0,heading1,lat1,long1,...,Geo0CENSUSCousub,Geo1CENSUSCousub,Geo0CENSUSPlace,Geo1CENSUSPlace,Geo0CENSUSMetdiv,Geo1CENSUSMetdiv,Geo0CENSUSCsa,Geo1CENSUSCsa,Geo0CENSUSCbsa,Geo1CENSUSCbsa
0,353162075845793,2018-01-05 16:34:48,2018-01-05 16:52:44,4.5,210,29.305593,-94.814782,240,29.270787,-94.828297,...,4816791445,4816791445,4828068,4828068,0,0,288,288,26420,26420
1,352252066676025,2018-04-11 12:03:33,2018-04-11 12:38:29,28.2,258,39.556667,-94.328497,276,39.24359,-94.450912,...,2904940844,2904742050,2940826,2938000,0,0,312,312,28140,28140
2,352252069073014,2018-10-19 18:36:45,2018-10-19 18:44:17,1.5,96,27.638193,-80.438187,258,27.640823,-80.45353,...,1206193510,1206193510,1274150,1276937,0,0,442,442,42680,42680
3,352252069073014,2018-10-26 18:35:46,2018-10-26 18:46:45,3.6,330,27.638383,-80.399235,276,27.640792,-80.453505,...,1206193510,1206193510,1274150,1276937,0,0,442,442,42680,42680
4,352252068844514,2017-04-13 20:10:24,2017-04-13 20:27:32,8.4,156,30.33451,-87.137922,252,30.387688,-87.064705,...,1203392691,1211392218,0,1271842,0,0,0,0,37860,37860


## Subselect (if needed)

In [8]:
device  = '352252060173789'
gpsdata = gpsdata[gpsdata['device'] == device]
print("Keeping {0} rows".format(gpsdata.shape[0]))

Keeping 3066 rows


# Cluster and Sort Trips

In [10]:
i  = 0
nd = gpsdata['device'].nunique() 
for device, df in gpsdata.groupby('device'):
    print('Key = {0}'.format(device),'\tRun = {0}/{1}'.format(i,nd),'\tTrips = {0}'.format(df.shape[0]))
    i += 1

    #######################################################################################
    # Cluster Geo Data (Lat, Long)
    #######################################################################################
    points         = df[["lat0", "long0"]]
    points.columns = ["lat", "long"]
    pnts           = df[["lat1", "long1"]]
    pnts.columns   = ["lat", "long"]    
    points         = points.append(pnts)



    #######################################################################################
    # Create Clusters
    #######################################################################################
    debug=True
    gc   = geoClusters(key="dummy", points=points, distMax=300, debug=debug)
    gc.findClusters(seedMin=4, debug=debug)
    if debug:
        print("Found {0} clusters using {1} cells and {2} counts".format(gc.getNClusters(), gc.getNCells(), gc.getNCounts()))



    #######################################################################################
    # Set Nearest Clusters
    #######################################################################################
    if debug:
        start, cmt = clock("Finding Nearest Clusters for Start of Trips")
    geoResults = df[['lat0', 'long0']].apply(gc.getNearestClusters, axis=1).values
    df["geo0"] = [x[0] for x in geoResults]
    if debug:
        elapsed(start, cmt)
        start, cmt = clock("Finding Nearest Clusters for End of Trips")
    geoResults = df[['lat1', 'long1']].apply(gc.getNearestClusters, axis=1).values
    df["geo1"] = [x[0] for x in geoResults]    
    if debug:
        elapsed(start, cmt)



    #######################################################################################
    # Organize Trips for Network
    #######################################################################################
    trips = organizeTrips(df=df, gc=gc, debug=False, requireGood=False)

Key = 352252060173789 	Run = 0/1 	Trips = 3066
Current Time is Wed Nov 28, 2018 16:34:27 for Converting 6132 Points To Correct Format
Data has correct format with a (6132, 2) shape.
Current Time is Wed Nov 28, 2018 16:34:27 for Done with Converting 6132 Points To Correct Format
Process [Done with Converting 6132 Points To Correct Format] took 0 seconds.
Current Time is Wed Nov 28, 2018 16:34:27 for Finding Geohash (BitLen=8) Values from 6132 Points
Current Time is Wed Nov 28, 2018 16:34:27 for Done with Finding Geohash (BitLen=8) Values from 6132 Points
Process [Done with Finding Geohash (BitLen=8) Values from 6132 Points] took 0 seconds.
Current Time is Wed Nov 28, 2018 16:34:27 for Finding Geohash (BitLen=8) Frequency Values from Geohash DataFrame
Current Time is Wed Nov 28, 2018 16:34:27 for Done with Finding Geohash (BitLen=8) Frequency Values from Geohash DataFrame
Process [Done with Finding Geohash (BitLen=8) Frequency Values from Geohash DataFrame] took 0 seconds.
Current Time i

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Current Time is Wed Nov 28, 2018 16:34:32 for Done with Finding Nearest Clusters for Start of Trips
Process [Done with Finding Nearest Clusters for Start of Trips] took 3 seconds.
Current Time is Wed Nov 28, 2018 16:34:32 for Finding Nearest Clusters for End of Trips


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  trips['start'] = castDateTime(trips['Start'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  trips['end']   = castDateTime(trips['End'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http:/

Current Time is Wed Nov 28, 2018 16:34:35 for Done with Finding Nearest Clusters for End of Trips
Process [Done with Finding Nearest Clusters for End of Trips] took 3 seconds.


In [11]:
# Show data if needed
df.head()

Unnamed: 0,device,Start,End,total_miles,heading0,lat0,long0,heading1,lat1,long1,...,Geo1CENSUSMetdiv,Geo0CENSUSCsa,Geo1CENSUSCsa,Geo0CENSUSCbsa,Geo1CENSUSCbsa,geo0,geo1,start,end,date
946,352252060173789,2018-02-01 15:56:11,2018-02-01 17:08:03,1.5,60,35.894323,-84.172757,66,35.900737,-84.151165,...,0,314,314,28940,28940,cl33,cl35,2018-02-01 15:56:11,2018-02-01 17:08:03,2018-02-01
953,352252060173789,2018-03-17 20:06:12,2018-03-17 20:08:00,0.1,66,35.900952,-84.149263,72,35.90074,-84.151158,...,0,314,314,28940,28940,cl35,cl35,2018-03-17 20:06:12,2018-03-17 20:08:00,2018-03-17
4374,352252060173789,2017-11-01 11:26:21,2017-11-01 11:32:45,1.8,348,35.71999,-84.342105,138,35.718357,-84.367467,...,0,314,314,28940,28940,cl24,cl6,2017-11-01 11:26:21,2017-11-01 11:32:45,2017-11-01
4376,352252060173789,2017-09-08 15:12:41,2017-09-08 15:43:06,10.3,48,35.782707,-84.279475,252,35.718437,-84.367578,...,0,314,314,28940,28940,cl59,cl6,2017-09-08 15:12:41,2017-09-08 15:43:06,2017-09-08
4378,352252060173789,2018-05-09 13:22:14,2018-05-09 13:46:36,4.3,288,35.725037,-84.343327,72,35.718428,-84.367603,...,0,314,314,28940,28940,cl0,cl6,2018-05-09 13:22:14,2018-05-09 13:46:36,2018-05-09


In [12]:
class network():
    def __init__(self, directed=True, debug=False):
        self.debug = debug
        self.directed = directed
        
        self.orderedEdges    = None
        self.edgeDict        = None
        self.orderedVertices = None
        self.nodeDict        = None
        
        if self.directed is True:
            self.g = nx.DiGraph()
        else:
            self.g = nx.Graph()

        self.eInfo            = edgeInfo(self.g, self.debug)
        self.getEdges         = self.eInfo.getEdges
        self.getEdge          = self.eInfo.getEdgeData
        self.getEdgeAttrs     = self.eInfo.getAttrGroups
        self.setEdgeFeature   = self.eInfo.setEdgeFeature        
        self.getEdgeNum       = self.eInfo.getEdgeNumByName
        
        self.vInfo            = vertexInfo(self.g, self.debug)
        self.getVertices      = self.vInfo.getVertices
        self.getVertex        = self.vInfo.getVertexData
        self.getVertexByName  = self.vInfo.getVertexDataByName
        self.getVertexAttrs   = self.vInfo.getAttrGroups
        self.setVertexFeature = self.vInfo.setVertexFeature        
        self.getVertexNum     = self.vInfo.getVertexNumByName
            
    def setDebug(self, debug):
        self.debug = debug
        
    def getNetwork(self):
        return self.g
    
    
    def update(self):
        self.eInfo.orderEdges()
        self.vInfo.orderVertices()
        
            
    def flattenAttrs(self):
        self.eInfo.flattenEdgeAttrs()
        self.vInfo.flattenVertexAttrs()
        
    
    def collectAttrs(self):
        self.eInfo.collectEdgeAttrs()
        self.vInfo.collectVertexAttrs()
    
    
    ################################################################################################
    # Show Network Data
    ################################################################################################    
    def showVertices(self):
        for nodename,node in self.g.nodes_iter(data=True):
            print(nodename,'\t',node)
                
    def showEdges(self):
        for edgename,edge in self.g.adj.items():
            print(edgename,'\t',edge)
                
                

        
    ################################################################################################
    # Vertices / Nodes / Location (Initial Functions)
    ################################################################################################    
    def addVertex(self, name, attrs={}):
        self.g.add_node(u=name, attr_dict=attrs)
        if self.debug:
            print("  Added node: [{0}]".format(", ".join(names)))
                    
    def updateVertexAttrs(self, attrs):
        if not isinstance(attrs, dict):
            print("Cannot add vertex attrs because the input is not a dict")
            return
        nx.set_node_attributes(G=self.g, values=attrs, name=None)
            
        
        
    ################################################################################################
    # Edges / Trips (Initial Functions)
    ################################################################################################    
    def addEdge(self, names, attrs={}, sort=False):
        if not isinstance(names, (tuple,list,set)):
            print("Cannot add edge {0} because the names need to come in a tuple/list/set.".format(names))
            return
        if len(names) == 2:
            if sort is True:
                names = sorted([str(x) for x in names])
            else:
                names = [str(x) for x in names]
        else:
            print("Cannot add edge {0} because we need two entries in the tuple/list/set.".format(names))
            return
        
        self.g.add_edge(names[0], names[1], attr_dict=attrs)
        if self.debug:
            print("  Added edge: [{0}]".format(", ".join(names)))
            
    def updateEdgeAttrs(self, attrs):
        if not isinstance(attrs, dict):
            print("Cannot add edge attrs because the input is not a dict")
            return
        nx.set_edge_attributes(G=self.g, values=attrs)

In [13]:
class driverNetwork(network):
    def __init__(self, trips):
        network.__init__(self, directed=False, debug=False)
        
        self.categories        = categories(debug)
        self.getCategories     = self.categories.getCategories
        self.getCategory       = self.categories.getCategory
        self.getPermCategories = self.categories.getPermCategories
        self.getPermCategory   = self.categories.getPermCategory  
        self.getHomeRatioCategory = self.categories.getHomeRatioCategory
        self.getIntervalCategory = self.categories.getIntervalCategory
        
        
        if trips is not None:
            if isinstance(trips, dict):
                self.name          = trips.get('device')
                self.edgeMetrics   = trips.get('edgeMetrics')
                self.vertexMetrics = trips.get('vertexMetrics')
                self.vertexMetrics = {str(k): v for k,v in self.vertexMetrics.items()}
                self.homeMetrics   = trips.get('homeMetrics')
                print("Creating a driver network with {0} vertices and {1} edges.".format(len(self.vertexMetrics), len(self.edgeMetrics)))
            else:
                raise ValueError("Input trips must be a dictionary of edgeMetrics, vertexMetrics, and homeMetrics (optional)")
        else:
            raise ValueError("Input trips is None!")

            
    ####################################################################################
    # Create Network
    ####################################################################################
    def create(self, debug=False):
        for edgename,edgedata in self.edgeMetrics.items():
            self.addEdge(edgename, edgedata)
        self.updateVertexAttrs(self.vertexMetrics)
        self.update()
        self.flattenAttrs()
        self.collectAttrs()

        
    ####################################################################################
    # Compute Network Attributes
    ####################################################################################
    def computeNetworkAttrs(self, debug=False):
        self.netAlgos = networkAlgos()
        results = self.netAlgos.compute(self.g)
        self.nodeAttrs = results['Nodes']
        self.edgeAttrs = results['Edges']
        self.edgeAttrs['edge_weight'] = self.eInfo.getEdgeWeights().values() # add weights
        self.netAttrs  = results['Net']
        if debug:
            print("  Created {0} attributes for {1} vertices".format(self.nodeAttrs.shape[1], self.nodeAttrs.shape[0]))
            print("  Created {0} attributes for {1} edges".format(self.edgeAttrs.shape[1], self.edgeAttrs.shape[0]))
            print("  Created {0} attributes for the network".format(len(self.netAttrs)))


    ####################################################################################
    # Perform Lookup for Census Data
    ####################################################################################
    def fillCensusData(self, debug=False):
        if debug:
            print("Filling Vertex Census Data")
        verydebug=False

        censusKeys = [k for k,v in self.getVertexAttrs().items() if v == "Census"]
        getCensusData = {"CensusCbsa": getCBSAData, "CensusCsa": getCSAData, "CensusCounty": getCountyData, "CensusMetdiv": getMetDivData, "CensusPlace": getPlaceData, "CensusState": getStateData}
        for key in censusKeys:
            if getCensusData.get(key) is None:
                continue
            for vertexName in self.getVertices():
                vertex = self.getVertexByName(vertexName, 'attr')
                if verydebug:
                    print("  --> Vertex Number {0} and ID {1}".format(vertexNum, vertexID))

                value   = vertex[key]
                
                
                if isinstance(value, list):
                    try:
                        #mc    = value.most_common(1)
                        value = value[0][0]
                    except:
                        print("There was an error getting most common {0}".format(key))
                        value = None        
                else:
                    print("Input {0} is type {1}".format(value, type(value)))
                    
                try:
                    lookup       = getCensusData[key](str(value))
                    features     = self.categories.getFeatures(key, lookup, debug)
                except:
                    raise ValueError("Something went wrong with census lookup for {0} and value {1}".format(key, value))

                for lookupName,lookupValue in features.items():
                    featureName = "".join([key,lookupName])
                    self.setVertexFeature(vertexName, featureName, lookupValue)
                
                if verydebug is True:
                    print("\t: {0}, {1} == {2} ({3})".format(key, value, lookup, features))
                    
        if verydebug:
            raise ValueError("Stoppping after verydebug is True")
            

    def fillGeospatialData(self, debug=False):
        if debug:
            print("Filling Vertex Geospatial Data")
        verydebug=False

        groupings = ["HEREPOI", "OSM", "Road", "Terminal"]
        for grouping in groupings:
            keys = [k for k,v in self.getVertexAttrs().items() if v == grouping]            
            for vertexName in self.getVertices():
                vertex = self.getVertexByName(vertexName, 'attr')
                if verydebug:
                    print("  --> Vertex Number {0} and ID {1}".format(vertexNum, vertexID))

                for key in keys:
                    value   = vertex[key]

                    result = None
                    if isinstance(value, list):
                        try:
                            test = value[0][0]
                            if test is None:
                                result = 'N'
                            else:
                                if test == 1.0:
                                    result = 'Y'
                                else:
                                    result = 'N'
                        except:
                            result = 'N'
                    else:
                        print("Input {0} is type {1}".format(value, type(value)))

                    self.setVertexFeature(vertexName, key, result)
                    if verydebug is True:
                        print("\t: {0}, {1} == {2}".format(key, value, result))
                    
        if verydebug:
            raise ValueError("Stoppping after verydebug is True")                    
            

    def fillInternalData(self, debug=False):
        if debug:
            print("Filling Vertex Internal Data")
        verydebug=False

        keys = [k for k,v in self.getVertexAttrs().items() if v == "General"]
        for vertexName in self.getVertices():
            vertex = self.getVertexByName(vertexName, 'attr')
            if verydebug:
                print("  --> Vertex Number {0} and ID {1}".format(vertexNum, vertexID))

            for key in keys:
                value   = vertex[key]
                feature = self.categories.getFeatures(key, value, debug)
                if isinstance(feature, dict):
                    feature = feature.get('Name')
                self.setVertexFeature(vertexName, key, feature)
                if verydebug is True:
                    print("\t: {0}, {1} == {2}".format(key, value, feature))
                    
        if verydebug:
            raise ValueError("Stoppping after verydebug is True")                             
            

    def fillEdgeData(self, debug=False):
        if debug:
            print("Filling Edge Data")
        verydebug=False

        for edgeName in self.getEdges():
            features = [self.getVertexByName(x, 'feat') for x in tuple(edgeName)]
            for key,feat1 in features[0].items():
                feat2 = features[1][key]
                value = [str(feat1), str(feat2)]
                self.setEdgeFeature(edgeName, key, value)
                if verydebug is True:
                    print("\t: {0}, {1} == {2}".format(edgeName, key, value))
                    
        if verydebug:
            raise ValueError("Stoppping after verydebug is True")

In [None]:
%load_ext autoreload
%autoreload

from edgeInfo import edgeInfo
from vertexInfo import vertexInfo
from networkCategories import categories

dn = driverNetwork(trips)
dn.create()
dn.computeNetworkAttrs(debug=True)
dn.fillCensusData(debug=True)
dn.fillGeospatialData(debug=True)
dn.fillInternalData(debug=True)
dn.fillEdgeData(debug=True)
g = dn.getNetwork()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Creating a driver network with 88 vertices and 575 edges.


In [None]:
tmp2 = tmp[tmp['Geo1CensusCsaID'] == 'nan']
tmp2

In [None]:
#df.fillna(0, inplace=True)
#df.replace('nan', None, inplace=True)
#tmp = list(df['Geo1CensusCsaID']) + list(df['Geo0CensusCsaID'])
#tmp
#df[df == 'nan']
tmp = df[df['Geo1CensusCsaID'] == 'nan']
tmp["Geo1CensusCsaID"] == 'nan'
tmp.replace('nan', 0, inplace=True)
tmp["Geo1CensusCsaID"]
#dn.getVertex(0, 'feat')

In [14]:
class networkFeatures():
    def __init__(self, dn):
        self.dn = dn
        self.features = {}
        
        self.categories        = categories(debug)
        self.getCategories     = self.categories.getCategories
        self.getCategory       = self.categories.getCategory
        self.getPermCategories = self.categories.getPermCategories
        self.getPermCategory   = self.categories.getPermCategory  
        self.getHomeRatioCategory = self.categories.getHomeRatioCategory
        self.getIntervalCategory = self.categories.getIntervalCategory
        
        

    #################################################################################################################
    # Vertex/Edge Counts
    #################################################################################################################
    def fillVertexCounts(self, debug=False):
        if debug:
            print("Filling Vertex Counts")
            
        featureNames = None
            
        from collections import Counter
        vertexCounts = {"N": {}, 3: {}, 10: {}, 25: {}}

        for vertexNum,vertexName in enumerate(self.dn.getVertices()):
            vertex = dn.getVertexByName(vertexName, 'feat')
            if featureNames is None:
                featureNames = list(vertex.keys())
                for featureName in featureNames:
                    featCats = self.getCategories(featureName)
                    if featCats is not None:
                        for cat in featCats:
                            key = "".join([featureName,cat])
                            for cutoff in ["N",3,10,25]:
                                vertexCounts[cutoff][key]  = 0
                    else:
                        for cutoff in ["N",3,10,25]:
                            vertexCounts[cutoff][featureName]  = 0
                        
            
            for featureName in featureNames:                
                value = vertex[featureName]
                featCats = self.getCategories(featureName)
                if featCats is not None and value in featCats:
                    key = "".join([featureName,value])
                    vertexCounts["N"][key] += 1
                    for cutoff in [3,10,25]:
                        if vertexNum < cutoff:
                            vertexCounts[cutoff][key] += 1
                            
                            
        retval = {}
        for cutoff,cutoffData in vertexCounts.items():
            for key,value in cutoffData.items():
                if retval.get(key) is None:
                    retval[key] = {}
                if isinstance(cutoff, int):
                    retval[key]["".join(["Top", str(cutoff)])] = value
                else:
                    retval[key][cutoff] = value
                                
        self.features["Vertex_Counts"] = retval
        
            

    #################################################################################################################
    # Vertex/Edge Properties
    #################################################################################################################
    def fillObjectProperties(self, objectData, debug=False):
        try:
            diffVtx0Vtx1  = float(objectData[0] - objectData[1])
        except:
            diffVtx0Vtx1  = None

        try:
            diffVtx1Vtx2  = float(objectData[1] - objectData[2])
        except:
            diffVtx1Vtx2  = None

        try:
            diffVtx0Vtx12 = float(objectData[0] - objectData[1] - objectData[2])
        except:
            diffVtx0Vtx12 = None

        try:
            qvals = list(objectData.quantile(q=[0.05,0.25,0.5,0.75,0.95]))
        except:
            qvals = [None, None, None, None, None]

        retval = {"Diff_First_Second":  diffVtx0Vtx1,
                  "Diff_Second_Third":  diffVtx1Vtx2,
                  "Diff_Top3":         diffVtx0Vtx12,
                  "Very_Low_Quantile":  qvals[0],
                  "Low_Quantile":      qvals[1],
                  "Mid_Quantile":      qvals[2],
                  "High_Quantile":     qvals[3],
                  "Very_High_Quantile": qvals[4]}
        return retval
        

    def fillVertexProperties(self, debug=False):
        if debug:
            print("Filling Vertex Properties")

        retval = {}
        vertexAttrs = self.dn.nodeAttrs
        for attribute in vertexAttrs.columns:
            vertexData = getColData(vertexAttrs, colnames=attribute)            
            retval[attribute] = self.fillObjectProperties(vertexData)

        self.features["Vertex_Properties"] = retval
        

    def fillEdgeProperties(self, debug=False):
        if debug:
            print("Filling Edge Properties")

        retval = {}
        edgeAttrs = self.dn.edgeAttrs
        for attribute in edgeAttrs.columns:
            edgeData = getColData(edgeAttrs, colnames=attribute)
            retval[attribute] = self.fillObjectProperties(edgeData)

        self.features["Edge_Properties"] = retval

        
        
    #################################################################################################################
    # Top Vertex/Edge Features
    #################################################################################################################
    def fillIndividualVertexFeatures(self, debug=False):
        key = "Vertex_Top5"
        retval = {}
        
        for vertexNum in range(5):
            vertex = dn.getVertex(vertexNum, 'feat')
            retval["{0}".format(vertexNum)] = self.fillIndividualObjectFeatures(vertexNum, vertex, debug=debug)
            
        self.features[key] = retval
        
        
    def fillIndividualEdgeFeatures(self, debug=False):
        key = "Edge_Top5"
        retval = {}
        
        for edgeNum in range(5):
            edge = dn.getEdge(edgeNum, 'feat')
            retval["{0}".format(edgeNum)] = self.fillIndividualObjectFeatures(edgeNum, edge, debug=debug)
            
        self.features[key] = retval
        
        
    def fillIndividualObjectFeatures(self, objectNum, objectData, debug=False):
        retval = {}
        retval['Rank'] = objectNum
        for featureName, featureValue in objectData.items():
            retval[featureName] = featureValue
        return retval
        
        
    def fillNetworkFeatures(self, debug=False):
        key = "Network"
        retval = {}
        
        netAttrs = self.dn.netAttrs
        for featureName, featureValue in netAttrs.items():
            retval[featureName] = featureValue
            
        self.features[key] = retval


    def fillHomeFeatures(self, debug=False):
        key = "Home"
        retval = {}
        
        vertexName = str(dn.homeMetrics['Vtx'])
        vertexData = dn.getVertexByName(vertexName, 'feat')
        vertexNum  = dn.getVertexNum(vertexName)
        retval["Rank"] = vertexNum
        ratio = dn.homeMetrics['Ratio']
        ratio_significance = self.getHomeRatioCategory(ratio, debug)
        retval["Ratio"]    = ratio_significance
        retval["Days"]     = dn.homeMetrics['Days']
        retval["Days"], _  = self.getIntervalCategory(retval["Days"], debug)
        for featureName, featureValue in vertexData.items():
            retval[featureName] = featureValue

        self.features[key] = retval
        
                
        
    #################################################################################################################
    # Feature Correlations
    #################################################################################################################
    def fillFeatureCorrelations(self, debug=False):
        key = "Vertex_Corr"
        retval = {}
        
        vertexAttrs = self.dn.nodeAttrs
        for i,attribute1 in enumerate(vertexAttrs.columns):
            vertexData1 = getColData(vertexAttrs, colnames=attribute1)
            for j,attribute2 in enumerate(vertexAttrs.columns):
                if j <= i:
                    continue
                    
                vertexData2 = getColData(vertexAttrs, colnames=attribute2)               
                try:
                    corr = vertexData1.corr(vertexData2)
                except:
                    corr = None
                retval["_".join([attribute1, attribute2])] = corr

        self.features[key] = retval
        
        key = "Edge_Corr"
        retval = {}
        
        edgeAttrs = self.dn.edgeAttrs
        for i,attribute1 in enumerate(edgeAttrs.columns):
            edgeData1 = getColData(edgeAttrs, colnames=attribute1)
            for j,attribute2 in enumerate(edgeAttrs.columns):
                if j <= i:
                    continue
                    
                edgeData2 = getColData(edgeAttrs, colnames=attribute2)               
                try:
                    corr = edgeData1.corr(edgeData2)
                except:
                    corr = None
                retval["_".join([attribute1, attribute2])] = corr

        self.features[key] = retval
        
        


    

    #######################################################################################################################
    #
    # Create DataFrame
    #
    #######################################################################################################################
    def fixType(self, value):
        import numpy as np
        if isinstance(value, tuple):
            value = str(value)
        elif isinstance(value, np.int64):
            value = int(value)
        elif isinstance(value, np.float64):
            value = float(value)
        elif isinstance(value, str):
            value = str(value)
        elif isinstance(value, float):
            value = float(value)
        elif isinstance(value, int):
            value = int(value)
        elif isinstance(value, type(None)):
            value = None
        else:
            raise ValueError("Unknown Type: {0} --> {1}".format(type(value), value))
        return value
                        
    def getFeatureDataFrame(self, debug=False):
        from pandas import DataFrame
        from collections import Counter
        features = {}
        cntr = Counter()
        for category, categorydata in self.features.items():
            for feature, featuredata in categorydata.items():
                if isinstance(featuredata, dict):
                    for subfeature, subfeaturedata in featuredata.items():
                        key = "_".join([category,feature,subfeature])
                        key = "".join([s.title() for s in key.split("_")])
                        value = self.fixType(subfeaturedata)
                        features[key] = value
                else:
                    key = "_".join([category,feature])
                    key = "".join([s.title() for s in key.split("_")])
                    value = self.fixType(featuredata)
                    features[key] = value
        
        if debug:
            print("Created Data Frame with {0} features".format(len(features)))

        if False:
            features['Device'] = self.device
            if self.expectedFeatures is not None:
                if len(features) != self.expectedFeatures:
                    print("\nThere are only {0}/{1} features for {2}!!!\n".format(len(features), self.expectedFeatures, self.device))
                    self.printFeatures()
                    raise ValueError("\nThere are only {0}/{1} features for {2}!!!\n".format(len(features), self.expectedFeatures, self.device))

        df = DataFrame(features, index=[0])
        return df

In [15]:
nf = networkFeatures(dn)
nf.fillEdgeProperties()
nf.fillVertexCounts()
nf.fillVertexProperties()
nf.fillIndividualVertexFeatures()
nf.fillIndividualEdgeFeatures()
nf.fillNetworkFeatures()
nf.fillHomeFeatures()
nf.fillFeatureCorrelations()

In [16]:
nf.features

{'Edge_Properties': {'edge_betweenness_centrality': {'Diff_First_Second': -0.0071301247771836,
   'Diff_Second_Third': -0.0011388393741334924,
   'Diff_Top3': -0.015399088928500693,
   'Very_Low_Quantile': 0.005882352941176473,
   'Low_Quantile': 0.007596978185213479,
   'Mid_Quantile': 0.00825057295645531,
   'High_Quantile': 0.009355459355459356,
   'Very_High_Quantile': 0.01061855718718464},
  'edge_weight': {'Diff_First_Second': 12.0,
   'Diff_Second_Third': -5.0,
   'Diff_Top3': -10.0,
   'Very_Low_Quantile': 1.0,
   'Low_Quantile': 1.0,
   'Mid_Quantile': 2.0,
   'High_Quantile': 5.0,
   'Very_High_Quantile': 8.099999999999994}},
 'Vertex_Counts': {'CensusCbsaName': {'N': 0,
   'Top3': 0,
   'Top10': 0,
   'Top25': 0},
  'CensusCbsaType': {'N': 0, 'Top3': 0, 'Top10': 0, 'Top25': 0},
  'CensusCbsaPop': {'N': 0, 'Top3': 0, 'Top10': 0, 'Top25': 0},
  'CensusCbsaHousing': {'N': 0, 'Top3': 0, 'Top10': 0, 'Top25': 0},
  'CensusCbsaArea': {'N': 0, 'Top3': 0, 'Top10': 0, 'Top25': 0},
  '

In [None]:
nf.getFeatureDataFrame().T

In [None]:
[v,x for v,x in trips["vertexMetrics"].items()]

# Load Data

# Run the Network

In [None]:
dn.getVertexByName(str(dn.homeMetrics['Vtx']))

In [None]:
dn.homeMetrics
dn.

In [None]:
[x[1]['CoM'] for x in g.nodes(data=True)]

In [None]:
results = runNetworkAlgorithms(g)

In [None]:
results["Net"]

In [None]:
#set(a.keys()).intersection(set(b.keys()))
dn.vertexAttrs

In [None]:
#idx.reindex = list(g.edges())
#df = DataFrame(x['Edges'])
#df.columns.droplevel()
#df.reindex = list(g.edges())
df.reset_index(drop=True)

In [None]:
nodeAlgos = []
edgeAlgos = []
netAlgos  = []
noneAlgos = []
for k,v in x.items():
    if v is None:
        noneAlgos.append(k)
        continue
    if isinstance(v, dict):
        if len(v) == 20:
            nodeAlgos.append(k)
        elif len(v) == 186:
            edgeAlgos.append(k)
        else:
            netAlgos.append(k)
    else:
        netAlgos.append(k)

In [None]:
#nodeAlgos
#edgeAlgos
import json
json.dumps(edgeAlgos)
#netAlgos
#runAlgos(algosToRun, g)

In [None]:
x

In [None]:

runAlgos(algosToRun, g)

In [None]:

#runAlgos(algosToRun, g)

In [None]:

runAlgos(algosToRun, g)

In [None]:

runAlgos(algosToRun, g)

In [None]:

runAlgos(algosToRun, g)

In [None]:

#algosToRun.append(algos.eulerian_circuit)
runAlgos(algosToRun, g)

In [None]:
#runAlgos(algosToRun, g)

In [None]:
#runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:
algosToRun = []
#algosToRun.append(algos.node_classification.harmonic_function)
#algosToRun.append(algos.node_classification.local_and_global_consistency)
#runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:
#runAlgos(algosToRun, g)

In [None]:
algosToRun = []
#algosToRun.append(algos.sigma)
#algosToRun.append(algos.omega)
#runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:
#runAlgos(algosToRun, g)

In [None]:

runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:

runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:
runAlgos(algosToRun, g)

In [None]:
# Get Sparse Matrix
algosToRun = []
algosToRun.append(linalg.attr_sparse_matrix)
#algosToRun.append(convert_matrix.from_scipy_sparse_matrix)
algosToRun.append(convert_matrix.to_pandas_adjacency)
#runAlgos(algosToRun, g)

In [None]:
convert_matrix.to_pandas_edgelist(g)

In [None]:
G = nx.complete_graph(5)
A = nx.nx_agraph.to_agraph(G)
H = nx.nx_agraph.from_agraph(A)

In [None]:
import pygraphviz as pgv
print(A)

In [None]:
print(nx.draw(g))

In [None]:
import holoviews as hv
hv.extension('bokeh')

In [None]:
import copy
import networkx
import matplotlib.pyplot as plt

# Generate a graph.
# Here I chose an ER graph.
g = nx.erdos_renyi_graph(20, 0.3)

# Get positions.
# Here I use the spectral layout and add a little bit of noise.
pos = nx.layout.spectral_layout(g)
pos = nx.spring_layout(g, pos=pos, iterations=50)

# Create position copies for shadows, and shift shadows
pos_shadow = copy.deepcopy(pos)
shift_amount = 0.006
for idx in pos_shadow:
    pos_shadow[idx][0] += shift_amount
    pos_shadow[idx][1] -= shift_amount

#~~~~~~~~~~~~
# Draw graph
#~~~~~~~~~~~~
fig = plt.figure(frameon=False)
ax = fig.add_axes([0, 0, 1, 1])
ax.axis('off')

nx.draw_networkx_nodes(g, pos_shadow, node_color='k', alpha=0.5)
nx.draw_networkx_nodes(g, pos, node_color="#3182bd", linewidths=1)
nx.draw_networkx_edges(g, pos, width=1)

In [None]:
import warnings
warnings.filterwarnings('ignore')

G = nx.Graph(day="Stackoverflow")
df_nodes = pd.read_csv('../input/stack_network_nodes.csv')
df_edges = pd.read_csv('../input/stack_network_links.csv')

for index, row in df_nodes.iterrows():
    G.add_node(row['name'], group=row['group'], nodesize=row['nodesize'])
    
for index, row in df_edges.iterrows():
    G.add_weighted_edges_from([(row['source'], row['target'], row['value'])])
    
color_map = {1:'#f09494', 2:'#eebcbc', 3:'#72bbd0', 4:'#91f0a1', 5:'#629fff', 6:'#bcc2f2',  
             7:'#eebcbc', 8:'#f1f0c0', 9:'#d2ffe7', 10:'#caf3a6', 11:'#ffdf55', 12:'#ef77aa', 
             13:'#d6dcff', 14:'#d2f5f0'} 

plt.figure(figsize=(25,25))
options = {
    'edge_color': '#FFDEA2',
    'width': 1,
    'with_labels': True,
    'font_weight': 'regular',
}
colors = [color_map[G.node[node]['group']] for node in G]
sizes = [G.node[node]['nodesize']*10 for node in G]

"""
Using the spring layout : 
- k controls the distance between the nodes and varies between 0 and 1
- iterations is the number of times simulated annealing is run
default k=0.1 and iterations=50
"""
nx.draw(G, node_color=colors, node_size=sizes, pos=nx.spring_layout(G, k=0.25, iterations=50), **options)
ax = plt.gca()
ax.collections[0].set_edgecolor("#555555") 
plt.show()

In [None]:
dn.edgeDict[('0', '1')].values()

In [None]:

    minmaxWeight = [0.0, 2.5]
    print("Number of Edges: {0}".format(nEdges))
    nRange=5
    if nEdges > 100000:
        minmaxWeight[1] = 2
        nRange=6
        weightSize = [power(x,11) for x in linspace(minmaxWeight[0], minmaxWeight[1], nRange)]
    elif nEdges > 50000:
        minmaxWeight[1] = 2
        weightSize = [power(x,9) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    elif nEdges > 25000:
        weightSize = [power(x,8) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    elif nEdges > 10000:
        weightSize = [power(x,7) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    elif nEdges > 2000:
        weightSize = [power(x,6) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    elif nEdges > 1000:
        weightSize = [power(x,5) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    elif nEdges > 500:
        weightSize = [power(x,4) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    elif nEdges > 100:
        weightSize = [power(x,3) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    else:
        weightSize = [power(x,2) for x in linspace(minmaxWeight[0], minmaxWeight[1], 5)]
    scale = 2.5/amax(weightSize)
    weightSize = [x*scale for x in weightSize]

In [None]:
for k,k2,v in g.edges(data=True):
    print(v)
    break
#self.nodeDict = {u: d for (u,d) in self.g.nodes(data=True)}


In [None]:
from collections import Counter
x = Counter()
x[3] += 1
x

In [None]:
x.get(4)

In [None]:
from pandas import Series
tmp = Series([1, 3, 45,6 ,8, 34])

In [None]:
list(tmp.quantile(q=[0.05,0.95]))

In [None]:
g.edges(data=True)

In [None]:
#test