In [None]:
from pygraphblas import *
from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import operator
from data_loader import DataLoader
from timeit import default_timer as timer

In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

In [None]:
data_dir = 'csvs/o1k/'
data_format = 'csv'
loader = DataLoader(data_dir, data_format)

person = loader.load_vertex('person')
forum = loader.load_vertex('forum')
tag = loader.load_vertex('tag')
tagNames = loader.load_extra_columns('tag', ['name'])

knows = loader.load_edge('knows', person, person)
hasTag = loader.load_edge('hasTag', forum, tag)
hasMember = loader.load_edge('hasMember', forum, person)


#vertices, mappings, matrices = loader.load_all_csvs()

In [None]:
def MemberFriends(t):
    tagIndex = tagNames.index(t)
    tagVector = Vector.from_type(BOOL, hasTag.ncols)
    tagVector[tagIndex] = True
    relevantForumsVector = tagVector.vxm(hasTag.transpose())
    relevantPeopleVector = relevantForumsVector.vxm(hasMember)
    resultMatrix = Matrix.from_type(BOOL,relevantPeopleVector.nvals,relevantPeopleVector.nvals) 
    knows.extract_matrix(relevantPeopleVector.to_lists()[0],relevantPeopleVector.to_lists()[0],out=resultMatrix)
    
    n=0
    idList=[]
    for value in relevantPeopleVector:
        idList+=[[n,value[0]]]
        n += 1

    return resultMatrix,idList

In [None]:
def naive_bfs_levels(matrix, source):
    '''
    Input:
        matrix: adjacency matrix describing the graph
        source: source node index
    Returns:
        result_vector: vector of hops to all other nodes
    '''
    
    result_vector = Vector.from_type(UINT64, matrix.nrows)  
    known_nodes_vector = Vector.from_type(BOOL, matrix.nrows)
    
    known_nodes_vector[source] = True
    not_done = True
    level = 1
    
    while not_done and level <= matrix.nrows:
        result_vector[:,known_nodes_vector] = level
        known_nodes_vector = result_vector.vxm(matrix, mask=result_vector, 
                  desc=descriptor.ooco)
        not_done = known_nodes_vector.reduce_bool()
        level += 1
    return result_vector

In [None]:
def query4(k,t):
    matrix,idList = MemberFriends(t)
    resultList = []
    for value in idList:
        bfsResultVector = naive_bfs_levels(matrix,value[0])
        n = len(idList)
        r = bfsResultVector.nvals
        s = 0
        for entry in bfsResultVector:
            s += entry[1]-1
        if(s == 0 or n-1 == 0):
            score = 0
        else:
            score = ( (r-1)*(r-1) ) / ( (n-1) * s )
        resultList += [[person.id2index[value[1]],score]]

    resultList.sort(key=operator.itemgetter(0))    
    resultList.sort(key=operator.itemgetter(1), reverse=True)

    return resultList[:k]

In [None]:
start = timer()
print(query4(3,'Bill_Clinton'))
end = timer()
print(end - start)
# Expected result:
# 385 492 819 % centrality values 0.5290135396518375 0.5259615384615384 0.5249520153550864

In [None]:
def push_pull_bfs_levels(matrix,source):
    

    level = 1
    push = True

    result_vector = Vector.from_type(UINT64,matrix.nrows)
    frontier_vector = Vector.from_type(BOOL,matrix.nrows)
    frontier_vector[source] = True
    
    #Heuristic to decide if we need to switch between push and pull
    r_before = frontier_vector.nvals / matrix.nrows
    #Threshold to switch between push and pull
    threshold = 0.2

    not_done = True
    while(not_done and level <= matrix.nrows):
        result_vector[:,frontier_vector] = level
        
        if(push):
            with semiring.ANY_PAIR:
                next_vector = frontier_vector.vxm(matrix,mask=result_vector,desc=descriptor.ooco)
        if(not push):
            with semiring.ANY_PAIR:
                next_vector =  matrix.mxv(frontier_vector,mask=result_vector,desc=descriptor.ooco)
        
        frontier_vector = next_vector

        r = frontier_vector.nvals / matrix.nrows
        
        if(r > r_before and r > threshold): push = False
        if(r < r_before and r < threshold): push = True

        r_before = r

        not_done = frontier_vector.reduce_bool()
        level+=1


    return result_vector
    

In [None]:
def query4_push_pull_bfs(k,t):
    matrix,idList = MemberFriends(t)
    resultList = []
    for value in idList:
        bfsResultVector = push_pull_bfs_levels(matrix,value[0])
        n = len(idList)
        r = bfsResultVector.nvals
        s = 0
        for entry in bfsResultVector:
            s += entry[1]-1
        if(s == 0 or n-1 == 0):
            score = 0
        else:
            score = ( (r-1)*(r-1) ) / ( (n-1) * s )
        resultList += [[person.id2index[value[1]],score]]

    resultList.sort(key=operator.itemgetter(0))    
    resultList.sort(key=operator.itemgetter(1), reverse=True)

    return resultList[:k]

In [None]:
start = timer()
print(query4_push_pull_bfs(3,'Bill_Clinton'))
end = timer()
print(end - start)
# Expected result:
# 385 492 819 % centrality values 0.5290135396518375 0.5259615384615384 0.5249520153550864

In [None]:
def MSBFS_levels(matrix,sourceVertices):
    frontier = sourceVertices
    resultMatrix = Matrix.from_type(UINT64,sourceVertices.nrows,sourceVertices.ncols)
    level = 0
    notDone = True
    while(notDone and level < matrix.nrows):
        resultMatrix.assign_scalar(level,mask=frontier)
        with semiring.LOR_LAND_BOOL:
            frontier = frontier.mxm(matrix,mask=resultMatrix.pattern(),desc=descriptor.ooco)
        notDone = frontier.reduce_bool()
        level += 1
    return resultMatrix

In [None]:
def query4_msbfs(k,t):
    matrix,idList = MemberFriends(t)
    i = matrix.nrows
    sources = Matrix.from_lists(range(i),range(i),[True]*i)
    resultMatrix = MSBFS_levels(matrix,sources)
    resultList = []
    n = resultMatrix.nrows
    for value in idList:
        vec = resultMatrix.extract_row(value[0])
        r = vec.nvals
        s = vec.reduce_int()
       
        if(s == 0 or n-1 == 0):
            score = 0
        else:
            score = ( (r-1)*(r-1) ) / ( (n-1) * s )
        
        resultList += [[person.id2index[value[1]],score]] 
    
    resultList.sort(key=operator.itemgetter(0))    
    resultList.sort(key=operator.itemgetter(1), reverse=True)

    return resultList[:k]

In [None]:
start = timer()

for input in [
    [3, 'Bill_Clinton'],[4,'You_Make_Me_Wanna...'],
    [3,'The_Diary_of_Horace_Wimp'],
    [5,'Brian_Dabul'],
    [6,'Muhammad_Ali_Jinnah'],
    [3,'Alexandra_of_Denmark'],]:
        
         print(query4_msbfs(input[0],input[1]))

end = timer()
print(end - start)