In [None]:
from pygraphblas import *
from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import operator
import sys
sys.path.append("..")
from loader.data_loader import DataLoader



In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

### Load data

In [None]:
data_dir = '../../csvs/o1k/'
data_format = 'csv'
loader = DataLoader(data_dir, data_format)

#vertices, mappings, matrices = loader.load_all_csvs()

person = loader.load_vertex('person')
comment = loader.load_vertex('comment')

replyOf = loader.load_edge('replyOf', comment, comment)
knows = loader.load_edge('knows', person, person)
hasCreator = loader.load_edge('hasCreator', comment, person)



### Queries

In [None]:
def pattern_vec(vec):
    res=Vector.from_type(BOOL, vec.size)
    vec.apply(BOOL.ONE, out=res)
    return res
    

def naive_bfs_levels(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        result_vector: vector of hops to all other nodes
    '''
    
    result_vector = Vector.from_type(UINT64, matrix.nrows)  
    known_nodes_vector = Vector.from_type(BOOL, matrix.nrows)
    
    known_nodes_vector[source] = True
    not_done = True
    level = 1
    
    while not_done and level <= matrix.nrows:
        result_vector[:,known_nodes_vector] = level
        known_nodes_vector = result_vector.vxm(matrix, mask=result_vector, 
                  desc=descriptor.ooco)
        not_done = known_nodes_vector.reduce_bool()
        level += 1
        print(result_vector)
    return result_vector

# This is not needed for Q1, but keeping it in case we'd need it
def naive_bfs_parents(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        parent_vertices_vector: parent vertices vector
    '''
    wavefront_vector = Vector.from_type(INT64, matrix.nrows)
    vertex_index_vector = Vector.from_list([x for x in range(matrix.nrows)])
    parent_vertices_vector = Vector.from_type(INT64, matrix.nrows)
    wavefront_vector[source] = source
    level = 1
    not_done = True
    print(F'SOURCE: {source}')
    while not_done and level <= matrix.nrows:
        parent_vertices_vector_boolean_1 = pattern_vec(parent_vertices_vector)
        with semiring.MIN_FIRST:
            parent_vertices_vector = wavefront_vector.vxm(matrix, mask=parent_vertices_vector_boolean_1, desc=descriptor.ooco)
        parent_vertices_vector_boolean_2 = pattern_vec(parent_vertices_vector)
        wavefront_vector = vertex_index_vector.emult(parent_vertices_vector_boolean_2, mask=parent_vertices_vector_boolean_1.vector[0], desc=descriptor.ooco)
        level += 1
        log.info(level)
        not_done = wavefront_vector.reduce_bool()
    return parent_vertices_vector


In [None]:
# Query 1
def shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, num_of_interactions, person1_id, person2_id):

    person1_id_remapped = person.index2id[person1_id]
    person2_id_remapped = person.index2id[person2_id] 

    hasCreatorTransposed = hasCreator.transpose()

    personA_to_comment2 = hasCreatorTransposed @ replyOf
    
    person_to_person = personA_to_comment2.mxm(hasCreator, mask=knows)
    
    person_to_person_filtered = person_to_person.select(lib.GxB_GT_THUNK, num_of_interactions)
    
    overlay_graph = person_to_person_filtered.pattern()
    if num_of_interactions == -1:
        overlay_graph = knows
        
    levels = naive_bfs_levels(overlay_graph, person1_id_remapped)
    
    
    result = levels[person2_id_remapped] - 1 # Get hop count
    
    return result
    

In [None]:
x = shortest_distance_over_frequent_communication_paths(person, replyOf, knows, hasCreator, 0 , 58, 402)
print(f'RESULT: {x}')