In [None]:
from pygraphblas import *
from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import operator
from data_loader import DataLoader
from timeit import default_timer as timer

In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

In [None]:
data_dir = 'sf1k-converted/'
loader = DataLoader(data_dir)

vertices, mappings, matrices = loader.load_all_csvs()

In [None]:
def MemberFriends(t):
    tagID = mappings['tag'][t]
    tagVector = Vector.from_type(BOOL,matrices['hasTag'].ncols)
    tagVector[tagID] = True
    relevantForumsVector = tagVector.vxm(matrices['hasTag'].transpose())
    relevantPeopleVector = relevantForumsVector.vxm(matrices['hasMember'])
    resultMatrix = Matrix.from_type(BOOL,relevantPeopleVector.nvals,relevantPeopleVector.nvals) 
    matrices['knows'].extract_matrix(relevantPeopleVector.to_lists()[0],relevantPeopleVector.to_lists()[0],out=resultMatrix)
    
    n=0
    idList=[]
    for value in relevantPeopleVector:
        idList+=[[n,value[0]]]
        n += 1

    return resultMatrix,idList

In [None]:
def naive_bfs_levels(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        result_vector: vector of hops to all other nodes
    '''
    
    result_vector = Vector.from_type(UINT64, matrix.nrows)  
    known_nodes_vector = Vector.from_type(BOOL, matrix.nrows)
    
    known_nodes_vector[source] = True
    not_done = True
    level = 1
    
    while not_done and level <= matrix.nrows:
        result_vector[:,known_nodes_vector] = level
        known_nodes_vector = result_vector.vxm(matrix, mask=result_vector, 
                  desc=descriptor.ooco)
        not_done = known_nodes_vector.reduce_bool()
        level += 1
    return result_vector

In [None]:
def query4(k,t):
    matrix,idList = MemberFriends(t)
    resultList = []
    for value in idList:
        bfsResultVector = naive_bfs_levels(matrix,value[0])
        n = len(idList)
        r = bfsResultVector.nvals
        s = 0
        for entry in bfsResultVector:
            s += entry[1]-1
        score = ( (r-1)*(r-1) ) / ( (n-1) * s )
        resultList += [[vertices['person'][value[1]],score]]

    resultList.sort(key=operator.itemgetter(0))    
    resultList.sort(key=operator.itemgetter(1), reverse=True)

    for value in resultList[:k]:
        print(value)

In [None]:
def load_extra_columns(filename, column_names):
    with open(data_dir + filename, newline='') as csv_file:
        reader = csv.DictReader(csv_file, delimiter='|', quotechar='"')
        
        print(reader.fieldnames)
        
        full_column_names = []
        for columnName in column_names:
            # find full column name with type info after ':'
            full_column_name, = [fullName for fullName in reader.fieldnames
                               if fullName.split(':')[0]==columnName]
            full_column_names.append(full_column_name)
        
        if len(full_column_names) == 1:
            full_column_name = full_column_names[0]
            result = [row[full_column_name] for row in reader]
        else:
            result = [[row[fullColumnName] for fullColumnName in full_column_names] for row in reader]
        
        return result
        
tagNames = load_extra_columns('tag.csv', ['name'])

In [None]:
start = timer()

#'Bill_Clinton'.id = 2779

# TODO: harmonize query input format, use tag.name instead of id
tagIndex = tagNames.index('Bill_Clinton')
tagId = vertices['tag'][tagIndex]

query4(3,tagId)
end = timer()
print(end - start)
# Expected result:
# 385 492 819 % centrality values 0.5290135396518375 0.5259615384615384 0.5249520153550864

In [None]:
def MSBFS_levels(matrix,sourceVertices):
    frontier = sourceVertices
    resultMatrix = Matrix.from_type(UINT64,sourceVertices.nrows,sourceVertices.ncols)
    level = 0
    notDone = True
    while(notDone and level < matrix.nrows):
        resultMatrix.assign_scalar(level,mask=frontier)
        with semiring.LOR_LAND_BOOL:
            frontier = frontier.mxm(matrix,mask=resultMatrix.pattern(),desc=descriptor.ooco)
        notDone = frontier.reduce_bool()
        level += 1
    return resultMatrix

In [None]:
def query4_msbfs(k,t):
    matrix,idList = MemberFriends(t)
    i = matrix.nrows
    sources = Matrix.from_lists(range(i),range(i),[True]*i)
    resultMatrix = MSBFS_levels(matrix,sources)
    resultList = []
    n = resultMatrix.nrows
    for value in idList:
        vec = resultMatrix.extract_row(value[0])
        r = vec.nvals
        s = vec.reduce_int()
        score = ( (r-1)*(r-1) ) / ( (n-1) * s )
        resultList += [[vertices['person'][value[1]],score]] 
    
    resultList.sort(key=operator.itemgetter(0))    
    resultList.sort(key=operator.itemgetter(1), reverse=True)

    for value in resultList[:k]:
        print(value)

In [None]:
start = timer()

tagIndex = tagNames.index('Bill_Clinton')
tagId = vertices['tag'][tagIndex]

query4_msbfs(3,tagId)
end = timer()
print(end - start)