In [None]:
from pygraphblas import *
from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import operator
from data_loader import DataLoader



In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

### Load data

In [None]:
data_dir = 'sf1k-converted/'
loader = DataLoader(data_dir)

vertices, mappings, matrices = loader.load_all_csvs()

### Queries

In [None]:
def pattern_vec(vec):
    res=Vector.from_type(BOOL, vec.size)
    vec.apply(BOOL.ONE, out=res)
    return res
    

def naive_bfs_levels(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        result_vector: vector of hops to all other nodes
    '''
    
    result_vector = Vector.from_type(UINT64, matrix.nrows)  
    known_nodes_vector = Vector.from_type(BOOL, matrix.nrows)
    
    known_nodes_vector[source] = True
    not_done = True
    level = 1
    
    while not_done and level <= matrix.nrows:
        result_vector[:,known_nodes_vector] = level
        known_nodes_vector = result_vector.vxm(matrix, mask=result_vector, 
                  desc=descriptor.ooco)
        not_done = known_nodes_vector.reduce_bool()
        level += 1
        print(result_vector)
    return result_vector

# This is not needed for Q1, but keeping it in case we'd need it
def naive_bfs_parents(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        parent_vertices_vector: parent vertices vector
    '''
    wavefront_vector = Vector.from_type(INT64, matrix.nrows)
    vertex_index_vector = Vector.from_list([x for x in range(matrix.nrows)])
    parent_vertices_vector = Vector.from_type(INT64, matrix.nrows)
    wavefront_vector[source] = source
    level = 1
    not_done = True
    print(F'SOURCE: {source}')
    while not_done and level <= matrix.nrows:
        parent_vertices_vector_boolean_1 = pattern_vec(parent_vertices_vector) #p
        with semiring.MIN_FIRST:
            parent_vertices_vector = wavefront_vector.vxm(matrix, mask=parent_vertices_vector_boolean_1, desc=descriptor.ooco)
        parent_vertices_vector_boolean_2 = pattern_vec(parent_vertices_vector) #p
        wavefront_vector = vertex_index_vector.emult(parent_vertices_vector_boolean_2, mask=parent_vertices_vector_boolean_1.vector[0], desc=descriptor.ooco)
        level += 1
        log.info(level)
        not_done = wavefront_vector.reduce_bool()
    return parent_vertices_vector


In [None]:
# Query 1
def shortest_distance_over_frequent_communication_paths(matrices, num_of_interactions, person1_id, person2_id):    
    person1_id_remapped = mappings['person'][person1_id]
    person2_id_remapped = mappings['person'][person2_id]

    log.info(f'person1 ID {person1_id} remapped to {person1_id_remapped}')    
    log.info(f'person2 ID {person2_id} remapped to {person2_id_remapped}')    

    hasCreatorTransposed = matrices['hasCreator'].transpose()

    personA_to_comment2 = hasCreatorTransposed @ matrices['replyOf']
    
    person_to_person = personA_to_comment2.mxm(matrices['hasCreator'], mask=matrices['knows'])
    
    #person_to_person_transposed = person_to_person.transpose()
    #person_to_person_mutual = person_to_person + person_to_person_transposed
    #print(f'Person to person mutual: {person_to_person_mutual}')
    
#     knows_zeros = matrices['knows'].apply(0, out=Matrix.from_type(BOOL, matrices['knows'].nrows, matrices['knows'].ncols))
        
#     person_to_person = person_to_person + knows_zeros
#     print(f'Person to person: {person_to_person}')

    person_to_person_filtered = person_to_person.select(lib.GxB_GT_THUNK, num_of_interactions)
    print(f'Person to person filtered: {person_to_person_filtered}')
    
    overlay_graph = person_to_person_filtered.pattern()
    if num_of_interactions == -1:
        overlay_graph = matrices['knows']
    print(f'Overlay graph is: {overlay_graph}')
    levels = naive_bfs_levels(overlay_graph, person1_id_remapped)
    
    # Test SP based on neo 
    # Test query 5
#     print(levels[mappings['person'][858]]) 
#     print(levels[mappings['person'][46]])
#     print(levels[mappings['person'][31]])
#     print(levels[mappings['person'][162]])
#     print(levels[mappings['person'][587]])
    
    result = levels[person2_id_remapped] - 1 # Get hop count
    
    return result
    

In [None]:
x = shortest_distance_over_frequent_communication_paths(matrices,0 , 58, 402)
print(f'RESULT: {x}')