In [None]:
from pygraphblas import *
#from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import operator
import sys
sys.path.append("..")
from loader.data_loader import DataLoader
from algorithms.search import naive_bfs_levels



In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

### Load data

In [None]:
data_dir = '../../csvs/o1k/'
data_format = 'csv'
loader = DataLoader(data_dir, data_format)

#vertices, mappings, matrices = loader.load_all_csvs()

person = loader.load_vertex('person')
comment = loader.load_vertex('comment')

replyOf = loader.load_edge('replyOf', comment, comment)
replyOfTransposed = replyOf.transpose()
knows = loader.load_edge('knows', person, person)
hasCreator = loader.load_edge('hasCreator', comment, person)
hasCreatorTransposed = hasCreator.transpose()



### Queries

In [None]:
# Query 1
def shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, num_of_interactions, person1_id, person2_id):

    person1_id_remapped = person.id2index[person1_id]
    person2_id_remapped = person.id2index[person2_id]

    # pattern: a hacky way to cast to UINT64 because count is required instead of existence
    personA_to_comment2 = hasCreatorTransposed.pattern(UINT64) @ replyOf
    
    person_to_person = personA_to_comment2.mxm(hasCreator, mask=knows)
    
    person_to_person_filtered = person_to_person.select(lib.GxB_GT_THUNK, num_of_interactions)
    
    overlay_graph = person_to_person_filtered.pattern()
    if num_of_interactions == -1:
        overlay_graph = knows
        
    levels = naive_bfs_levels(overlay_graph, person1_id_remapped)
    
    
    result = levels[person2_id_remapped] - 1 # Get hop count
    
    return result
    

In [None]:
x = shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, 1, 786, 799)
print(f'RESULT: {x}', x==4)
x = shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, 1, 422, 736)
print(f'RESULT: {x}', x==-1)
x = shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, 1, 858, 587)
print(f'RESULT: {x}', x==4)
x = shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, -1, 266, 106)
print(f'RESULT: {x}', x==3)
x = shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, 0, 58, 402)
print(f'RESULT: {x}', x==3)
x = shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, 105, 608, 3) 
print(f'RESULT: {x}', x==-1)

In [None]:
# Optimized version: do not create overlay graph but investigate investigate KNOWS edges on-the-fly.
def step_frontier(frontier, seen, num_of_interactions, numpersons, hasCreator, replyOf, knows):
    frontierPersonIndices = frontier.to_lists()[0]

    # # bad and ugly
    # sel = Matrix.from_lists(frontierPersonIndices, frontierPersonIndices, [1]*len(frontierPersonIndices), numpersons, numpersons)
    # if num_of_interactions >= 0:
    #     FreqComm1 = sel.mxm(knows).mxm(hasCreatorTransposed).mxm(replyOf          ).mxm(hasCreator, mask=knows).select(lib.GxB_GT_THUNK, num_of_interactions)
    #     FreqComm2 = sel.mxm(knows).mxm(hasCreatorTransposed).mxm(replyOfTransposed).mxm(hasCreator, mask=knows).select(lib.GxB_GT_THUNK, num_of_interactions)
    #     FreqComm = FreqComm1*FreqComm2
    #     FreqComm = FreqComm.transpose()
    #     next = FreqComm.reduce_vector().pattern()
    # else:
    #     next = frontier.vxm(knows)

    # good
    sel = Matrix.from_lists(frontierPersonIndices, frontierPersonIndices, [1]*len(frontierPersonIndices), numpersons, numpersons)
    if num_of_interactions >= 0:
        FreqComm1 = sel.mxm(hasCreatorTransposed).mxm(replyOf          ).mxm(hasCreator, mask=knows).select(lib.GxB_GT_THUNK, num_of_interactions)
        FreqComm2 = sel.mxm(hasCreatorTransposed).mxm(replyOfTransposed).mxm(hasCreator, mask=knows).select(lib.GxB_GT_THUNK, num_of_interactions)
        FreqComm = FreqComm1*FreqComm2
        FreqComm = FreqComm.transpose()
        next = FreqComm.reduce_vector().pattern()
    else:
        next = frontier.vxm(knows)

    # print(next, next.type)
    # print(next.to_string())

    return next

def shortest_distance_over_frequent_communication_paths_opt(person, replyOf, knows, hasCreator, num_of_interactions, person1_id, person2_id):
    person1_id_remapped = person.id2index[person1_id]
    person2_id_remapped = person.id2index[person2_id] 

    numpersons = len(person.id2index)
    frontier1 = Vector.from_lists([person1_id_remapped], [True], numpersons)
    frontier2 = Vector.from_lists([person2_id_remapped], [True], numpersons)
    seen1 = frontier1
    seen2 = frontier2

    for level in range(1, numpersons//2):
        #print("===== " + str(level) + " =====")
        #print("frontier persons: " + str(frontierPersonIndices))

        # frontier 1
        next1 = step_frontier(frontier1, seen1, num_of_interactions, numpersons, hasCreator, replyOf, knows)

        # emptied the component of person1
        if next1.nvals == 0:
            return -1
        # has frontier1 intersected frontier2's previous state?
        intersection1 = next1 * seen2
        if intersection1.nvals > 0:
            return level*2-1

        # frontier 2
        next2 = step_frontier(frontier2, seen2, num_of_interactions, numpersons, hasCreator, replyOf, knows)

        # emptied the component of person2
        if next2.nvals == 0:
            return -1
        # do frontier1 and frontier2's current states intersect?
        intersection2 = next1 * next2
        if intersection2.nvals > 0:
            return level*2

        # step the frontiers
        seen1 = seen1 + next1
        frontier1 = next1

        seen2 = seen2 + next2
        frontier2 = next2

In [None]:
x = shortest_distance_over_frequent_communication_paths_opt(person, replyOf, knows, hasCreator, 1, 786, 799)
print(f'RESULT: {x}', x==4)
x = shortest_distance_over_frequent_communication_paths_opt(person, replyOf, knows, hasCreator, 1, 422, 736)
print(f'RESULT: {x}', x==-1)
x = shortest_distance_over_frequent_communication_paths_opt(person, replyOf, knows, hasCreator, 1, 858, 587)
print(f'RESULT: {x}', x==4)
x = shortest_distance_over_frequent_communication_paths_opt(person, replyOf, knows, hasCreator, -1, 266, 106)
print(f'RESULT: {x}', x==3)
x = shortest_distance_over_frequent_communication_paths_opt(person, replyOf, knows, hasCreator, 0, 58, 402)
print(f'RESULT: {x}', x==3)
x = shortest_distance_over_frequent_communication_paths_opt(person, replyOf, knows, hasCreator, 105, 608, 3) 
print(f'RESULT: {x}', x==-1)
