# Headers

In [2]:

import pandas as pd 
from collections import defaultdict
# import igraph as ig



# Class for Session Node to store time of visit for User -> POI 


In [3]:

class SessionNode:
    def __init__(self, hourSlot):
        self.hour = hourSlot
        


# Class for User Node to store user info


In [4]:

class UserNode:
    def __init__(self, userId):
        self.userId = userId



# Class for Point of Interest (POI) Node to store various POIs like malls, parks, restaurants, visiting places etc..


In [5]:

class POINode:
    def __init__(self, venueId, venueTypeId=None, venueName=None, latitude=None, longitude=None):
        self.venueId = venueId
        self.venueTypeId = venueTypeId
        self.venueName = venueName
        self.location = (latitude, longitude)
        


# Class for graph that represents POIs, user info and time of visits


In [53]:

class Graph:
    def __init__(self):
        self.userToSessionLinks = defaultdict(list) # userNode  -> [(sessionNode, weight)]
        self.sessionToPOILinks = defaultdict(list)  # sessionNode -> [(POINode, weight)]
        
        #auxiliary lookups for handling redundant data from dataset
        self.userIdToUserNodes = {}
        self.venueIdToPOINOdes = {}
    
    
    def loadGraph(self,dataset):
        
        ##testing
        number_of_users_data_to_consider = 20
        
        data = pd.read_csv(dataset) 
        for index, row in data.head(number_of_users_data_to_consider).iterrows():
            #print(row['userID'], row['venueID'])
            #userID,venueID,date,year,monthOfYear,dayOfWeek,dayOfMonth,dayHour,visitCount
            userNode = None
            poiNode = None
            sessionNode = None
            
            userId = row['userID']
            if userId in self.userIdToUserNodes:
                userNode = self.userIdToUserNodes[userId]
            else:
                userNode = UserNode(userId) 
                self.userIdToUserNodes[userId] = userNode
            
            venueId = row['venueID']
            if venueId in self.venueIdToPOINOdes:
                poiNode = self.venueIdToPOINOdes[venueId]
            else:
                poiNode = POINode(venueId)
                self.venueIdToPOINOdes[venueId] = poiNode
            
            hourSlot = row['dayHour']
            
            if userNode in self.userToSessionLinks:
                sessionNodeWeights = self.userToSessionLinks[userNode]
                #find hour slot in existing sessionNodes
                foundHourSlot = False
                for sNode in sessionNodeWeights:
                    #import pdb; pdb.set_trace()
                    sessionNode = sNode[0]
                    if sessionNode.hour == hourSlot:
                        sNode[1] += 1    #incrementing weight for the hour slot 
                        foundHourSlot = True
                        break
                if not foundHourSlot:
                    sessionNode = SessionNode(hourSlot)
                    self.userToSessionLinks[userNode].append([sessionNode, 1])
            
            else:
                #no userNode mapping
                sessionNode = SessionNode(hourSlot)
                self.userToSessionLinks[userNode] = [[sessionNode, 1]]
            
            
            if sessionNode in self.sessionToPOILinks:
                poiNodeWeights = self.sessionToPOILinks[sessionNode]
                foundPOINode = False
                for pNode in poiNodeWeights:
                    if pNode[0] == poiNode:
                        pNode[1] += 1
                        foundPOINode = True
                        break
                if not foundPOINode:
                    self.sessionToPOILinks[sessionNode].append([poiNode, 1])
            else:
                self.sessionToPOILinks[sessionNode] = [[poiNode, 1]]
            
    
    
    def lookupFromGraph(self, userId):
        pass
        

        
graph = Graph()
graph.loadGraph("export.csv")
    

In [60]:
graph = Graph()
graph.loadGraph("export.csv")
print(graph.userIdToUserNodes)
print ('----------------------')
print(graph.userToSessionLinks)
print ('----------------------')
print (graph.venueIdToPOINOdes)
print ('----------------------')
print (graph.sessionToPOILinks)

{7509: <__main__.UserNode instance at 0x7f0440f20170>, 9317: <__main__.UserNode instance at 0x7f0440f32bd8>}
[(7509, 7509), (9317, 9317)]
----------------------
defaultdict(<type 'list'>, {<__main__.UserNode instance at 0x7f0440f32bd8>: [[<__main__.SessionNode instance at 0x7f0440f322d8>, 1], [<__main__.SessionNode instance at 0x7f0440f32488>, 2], [<__main__.SessionNode instance at 0x7f0440f32290>, 4], [<__main__.SessionNode instance at 0x7f0440f32f80>, 1], [<__main__.SessionNode instance at 0x7f0440f32320>, 2], [<__main__.SessionNode instance at 0x7f0440f325a8>, 1], [<__main__.SessionNode instance at 0x7f0440f32248>, 1]], <__main__.UserNode instance at 0x7f0440f20170>: [[<__main__.SessionNode instance at 0x7f0440f20908>, 1], [<__main__.SessionNode instance at 0x7f0440f203f8>, 2], [<__main__.SessionNode instance at 0x7f0440f20cf8>, 1], [<__main__.SessionNode instance at 0x7f0440f32cf8>, 1], [<__main__.SessionNode instance at 0x7f0440f32518>, 2], [<__main__.SessionNode instance at 0x7f0

In [13]:
## using graph viz for visualizing graph
## Bad library. Instead of this, using D3JS in below cell for visualizing graph
graph = Graph()
graph.loadGraph("export.csv")
userval = None
user = graph.userIdToUserNodes
for item in user:
    userval = item
print (userval)
counter = 'a'
userNode_counter = counter
dot.node(counter, str(user))
counter = chr(ord(counter) + 1) 
sessions = graph.userToSessionLinks

dot = Digraph(comment='Sample graph for user: ' + str(userval))
edges = []

for sessionnode in sessions[user[userval]]:
    dot.node(counter , str(sessionnode[0]))
    edges.append(userNode_counter + counter)
    ##session - to - POI nodes
    sessionNode_counter = counter
    counter = chr(ord(counter) + 1)
    for poiNode in graph.sessionToPOILinks[sessionnode[0]]:
        dot.node(counter, str(poiNode[0]))
        edges.append(sessionNode_counter + counter)
        counter = chr(ord(counter) + 1)
print (edges)
dot.edges(edges)
dot.render('graph-viz.gv', view=True)

In [57]:
## Using D3JS for visualizing graph
## Read D3-Graph-Visualization/readme.txt on how to visuaize graph in D3JS

import json
from collections import OrderedDict

def generate_d3js_json():
    nodes_to_index_map = OrderedDict()
    node_index = 0
    links =[]
    
    for usernode in graph.userIdToUserNodes:
        if str(graph.userIdToUserNodes[usernode]) not in nodes_to_index_map:
                nodes_to_index_map[str(graph.userIdToUserNodes[usernode])] = (node_index, "U "+str(graph.userIdToUserNodes[usernode].userId)) 
                node_index += 1
        
    for usernode in graph.userToSessionLinks:
        for sessionnode, weight in graph.userToSessionLinks[usernode]:
            if str(sessionnode) not in nodes_to_index_map:
                nodes_to_index_map[str(sessionnode)] = (node_index, "H "+str(sessionnode.hour))
                node_index += 1
            temp = {}
            ## creating node index mappings for d3js
            temp["source"] = nodes_to_index_map[str(usernode)][0]
            temp["target"] = nodes_to_index_map[str(sessionnode)][0]
            #temp["weight"] = weight
            temp["weight"] = 30 #defaulting to a value
            links.append(temp)
            
    for sessionnode in graph.sessionToPOILinks:
        for poinode, weight in graph.sessionToPOILinks[sessionnode]:
            if str(poinode) not in nodes_to_index_map:
                nodes_to_index_map[str(poinode)] = (node_index, "POI "+str(poinode.venueId))
                node_index += 1            
            temp = {}
            ## creating node index mappings for d3js
            temp["source"] = nodes_to_index_map[str(sessionnode)][0]
            temp["target"] = nodes_to_index_map[str(poinode)][0]
            #temp["weight"] = weight
            temp["weight"] = 1 #defaulting to a value
            links.append(temp)
    
    nodes_list = []
    for item in nodes_to_index_map:
        temp = {}
        #temp["name"] = item.split(" instance at ")[-1][:-1]  #storing node hash
        temp["name"] = str(nodes_to_index_map[item][1])  #storing node id (venueId/hourSlot/userId)
        nodes_list.append(temp)
        
    
    d3json_dict = {}
    d3json_dict["nodes"] = list(nodes_list)
    d3json_dict["links"] = links
    
    print (json.dumps(d3json_dict, indent=2))

# wrapper
generate_d3js_json()

{
  "nodes": [
    {
      "name": "U 7509"
    }, 
    {
      "name": "U 9317"
    }, 
    {
      "name": "H 15"
    }, 
    {
      "name": "H 17"
    }, 
    {
      "name": "H 13"
    }, 
    {
      "name": "H 0"
    }, 
    {
      "name": "H 10"
    }, 
    {
      "name": "H 14"
    }, 
    {
      "name": "H 12"
    }, 
    {
      "name": "H 23"
    }, 
    {
      "name": "H 17"
    }, 
    {
      "name": "H 19"
    }, 
    {
      "name": "H 21"
    }, 
    {
      "name": "H 1"
    }, 
    {
      "name": "H 20"
    }, 
    {
      "name": "POI 11492"
    }, 
    {
      "name": "POI 1956"
    }, 
    {
      "name": "POI 5222"
    }, 
    {
      "name": "POI 2297"
    }, 
    {
      "name": "POI 47021"
    }, 
    {
      "name": "POI 28838"
    }, 
    {
      "name": "POI 221221"
    }, 
    {
      "name": "POI 64"
    }, 
    {
      "name": "POI 7489"
    }, 
    {
      "name": "POI 34034"
    }, 
    {
      "name": "POI 16436"
    }, 
    {
      "name": "POI