In [None]:
from pygraphblas import *
from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import operator
from data_loader import DataLoader



In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

### Load data

In [None]:
data_dir = 'sf1k-converted/'
loader = DataLoader(data_dir)

vertices, mappings, matrices = loader.load_all_csvs()

### Queries

In [None]:
def pattern_vec(vec):
    res=Vector.from_type(BOOL, vec.size)
    vec.apply(BOOL.ONE, out=res)
    return res
    

# This is not needed for Q1, but keeping it in case we'd need it
def naive_bfs_levels(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        result_vector: vector of hops to all other nodes
    '''
    
    result_vector = Vector.from_type(UINT64, matrix.nrows)  
    known_nodes_vector = Vector.from_type(BOOL, matrix.nrows)
    
    known_nodes_vector[source] = True
    not_done = True
    level = 1
    
    while not_done and level <= matrix.nrows:
        result_vector[:,known_nodes_vector] = level
        known_nodes_vector = result_vector.vxm(matrix, mask=result_vector, 
                  desc=descriptor.ooco)
        not_done = known_nodes_vector.reduce_bool()
        level += 1
    return result_vector

def naive_bfs_parents(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        parent_vertices_vector: parent vertices vector
    '''
    wavefront_vector = Vector.from_type(INT64, matrix.nrows)
    unvisited_vertices_vector = Vector.from_type(BOOL, matrix.nrows)
    vertex_index_vector = Vector.from_list([x for x in range(matrix.nrows)])
    parent_vertices_vector = Vector.from_type(INT64, matrix.nrows)
    wavefront_vector[source] = source
    level = 1
    not_done = True
    
    while not_done and level <= matrix.nrows:
        parent_vertices_vector_boolean = pattern_vec(parent_vertices_vector)
        with semiring.MIN_FIRST:
            parent_vertices_vector = wavefront_vector.vxm(matrix, mask=parent_vertices_vector_boolean, desc=descriptor.ooco)
        # PROBLEM HERE
        wavefront_vector = parent_vertices_vector_boolean.emult(vertex_index_vector, mask=parent_vertices_vector_boolean, desc=descriptor.ooco)

        level += 1
        not_done = wavefront_vector.reduce_bool()
    return parent_vertices_vector


In [None]:
# Query 1
def shortest_distance_over_frequent_communication_paths(matrices, num_of_interactions, person1_id, person2_id, vertices):
    hasCreatorTransposed = matrices['hasCreator'].transpose()
    
    
    personA_to_comment2 = hasCreatorTransposed @ matrices['replyOf']
    person_to_person = personA_to_comment2.mxm(matrices['hasCreator'], mask=matrices['knows'])
    person_to_person_mutual = person_to_person + person_to_person.transpose()
    person_to_person_mutual_filtered = person_to_person_mutual.select('<', num_of_interactions)    
    person_to_person_mutual_filtered = person_to_person_mutual_filtered.pattern()
    
    
    parents_vec = naive_bfs_parents(person_to_person_mutual_filtered, person1_id)
    shortest_path = []
    parent = person2_id
    shortest_path.append(parent)
    while parent != person1_id:
        parent = parents_vec[parent]
        shortest_path.append(parent)
    
    original_ids_shortest_path = [vertices[x] for x in shortest_path]
    return original_ids_shortest_path
    

In [None]:
p1_id = 4
p2_id = 5
num_of_interactions = 5
shortest_distance_over_frequent_communication_paths(matrices, num_of_interactions, p1_id, p2_id, vertices)

In [None]:
#Testing around vec.emult()
a = Vector.from_type(UINT64, 3)
b = Vector.from_type(UINT64, 3)
c = Vector.from_type(UINT64, 3)
d = Vector.from_type(BOOL, 3)
a[0] = 2
a[1] = 2
b[0] = 3
b[1] = 6
c[1] = 12
d[1] = True

#x = a.emult(b, mask=d, desc=descriptor.ooco)
#x = a.emult(b, mask=c)
#x = a.emult(d, mask=a)
print(x.to_string())