In [None]:
from pygraphblas import *
from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import os
import operator
from data_loader import DataLoader
from timeit import default_timer as timer

In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

In [None]:
data_dir = 'csvs/o1k/'
data_format = 'csv'

files  = glob.glob(f'{data_dir}*isLocatedIn*.csv')
for file in files:
    directory,fileName = file.split('/')
    from_node, edge_name, to_node = fileName.split('_')
    edge_name = edge_name[0].upper() + (edge_name[1:])
    edge_name = "{}{}".format(from_node,edge_name)
    os.rename(file,"{}/{}_{}_{}".format(directory,from_node,edge_name,to_node))

In [None]:
loader = DataLoader(data_dir, data_format)

person = loader.load_vertex('person')
place = loader.load_vertex('place')
organisation = loader.load_vertex('organisation')
tag = loader.load_vertex('tag')

placeNames = loader.load_extra_columns('place',['name'])

isPartOf = loader.load_edge('isPartOf', place, place)
personIsLocatedIn = loader.load_edge('personIsLocatedIn', person, place)
organisationIsLocatedIn = loader.load_edge('organisationIsLocatedIn', organisation, place)
workAt = loader.load_edge('workAt', person, organisation)
studyAt = loader.load_edge('studyAt', person, organisation)
knows = loader.load_edge('knows', person, person)
hasInterest = loader.load_edge('hasInterest', person, tag)

#vertices, mappings, matrices = loader.load_all_csvs()

In [None]:
#Query 3
def RelevantPeopleInPlace(placeName):
    placeID = placeNames.index(placeName)
    #Relevant places
    isPartOfTransposed = isPartOf.transpose()
    placeVector = Vector.from_type(BOOL,isPartOfTransposed.nrows)
    placeVector[placeID] = True
    relevantPlacesVector = placeVector + placeVector.vxm(isPartOfTransposed) + placeVector.vxm(isPartOfTransposed).vxm(isPartOfTransposed)
    #People located in the given place
    peopleInThePlaceVector = personIsLocatedIn.mxv(relevantPlacesVector)
    #People working at a Company or studying at a University located in the given place
    organisationsVector = organisationIsLocatedIn.mxv(relevantPlacesVector)
    with semiring.LOR_LAND_BOOL:
        peopleWorkAtVector = workAt.mxv(organisationsVector)
        peopleStudyAtVector = studyAt.mxv(organisationsVector) 
    #All the relevant people in the given place    
    with binaryop.PLUS_BOOL:    
        relevantPeopleVector = peopleWorkAtVector + peopleStudyAtVector + peopleInThePlaceVector   
    return relevantPeopleVector

In [None]:
def HHopKnows(h,vec):
    if(h < 2) : return vec.vxm(knows)
    mtx = knows
    while(h-1 > 0):
        mtx = mtx.mxm(knows)
        h -= 1
    return(vec.vxm(mtx) + HHopKnows(h-1,vec))

In [None]:
def query3(k,h,p):
    relevantPeopleVector = RelevantPeopleInPlace(p)

    #Maping the id-s back, sorting, then remapping
    mapedPersonIds = relevantPeopleVector.to_lists()[0]
    mapedPersonIds[:] = [person.id2index[id] for id in mapedPersonIds]
    mapedPersonIds.sort()
    remapedSortedPersonIds = mapedPersonIds
    remapedSortedPersonIds[:] = [person.index2id[id] for id in mapedPersonIds]

    #A list of maped id-s, sorted by the original id-s
    personList = list(zip(remapedSortedPersonIds,relevantPeopleVector.to_lists()[1]))
    resultList = []

    for _person in personList:
        personVector = Vector.from_type(BOOL,relevantPeopleVector.size)
        personVector[_person[0]] = True
        
        personKnowsHHopVector = HHopKnows(h,personVector)
        personKnowsHHopVector *= relevantPeopleVector     #only the relevant people are needed
        
        personHasInterestVector = personVector.vxm(hasInterest)
        numTagsVector = Vector.from_type(INT64,relevantPeopleVector.size)    

        for tag in personHasInterestVector:
            tagVector = Vector.from_type(BOOL,personHasInterestVector.size)
            tagVector[tag[0]] = True
            peopleInterestedInTagVector = tagVector.vxm(hasInterest.transpose())
            numTagsVector += peopleInterestedInTagVector
        
        numTagsVector[_person[0]] = 0    #p1|p1 should be 0
        numTagsVector *= personKnowsHHopVector

        personID = person.id2index[_person[0]]
        for value in numTagsVector:
            valueID = person.id2index[value[0]]
            if(value[1] != 0):
                for element in resultList:
                    if(personID == element[1] and valueID == element[0]):
                        break
                else:
                    resultList += [[personID,valueID,value[1]]]

    resultList.sort(key = operator.itemgetter(1))  
    resultList.sort(key = operator.itemgetter(0))
    resultList.sort(key = operator.itemgetter(2), reverse = True)

    for element in resultList[:k]:
        print('{}|{}|{}'.format(element[0],element[1],element[2]))

In [None]:
#India.id = 38
start = timer()
query3(3,2,'India')
end = timer()
print(end -start)

In [None]:
#Query 3
def RelevantPeopleInPlaceMatrix(placeName):
    placeID = placeNames.index(placeName)
    #Relevant places
    isPartOfTransposed = isPartOf.transpose()
    placeVector = Vector.from_type(BOOL,isPartOfTransposed.nrows)
    placeVector[placeID] = True
    relevantPlacesVector = placeVector + placeVector.vxm(isPartOfTransposed) + placeVector.vxm(isPartOfTransposed).vxm(isPartOfTransposed)
    #People located in the given place
    peopleInThePlaceVector = personIsLocatedIn.mxv(relevantPlacesVector)
    #People working at a Company or studying at a University located in the given place
    organisationsVector = organisationIsLocatedIn.mxv(relevantPlacesVector)
    with semiring.LOR_LAND_BOOL:
        peopleWorkAtVector = workAt.mxv(organisationsVector)
        peopleStudyAtVector = studyAt.mxv(organisationsVector) 
    #All the relevant people in the given place    
    with binaryop.PLUS_BOOL:    
        relevantPeopleVector = peopleWorkAtVector + peopleStudyAtVector + peopleInThePlaceVector   
    
    #Creating a diagonal matrix from the people ids
    diagMtx = Matrix.from_lists(relevantPeopleVector.to_lists()[0],relevantPeopleVector.to_lists()[0],relevantPeopleVector.to_lists()[1], knows.nrows, knows.ncols)

    #Selecting the relevant rows and columns by multiplying the 'knows' matrix from the right then from the left with the diagonal matrix
    returnMtx = diagMtx.mxm(knows).mxm(diagMtx).pattern()
    
    return returnMtx

In [None]:
def powerMatrix(p,mtx):
    if(p == 1): return mtx
    return mtx.mxm(powerMatrix(p-1,mtx))

In [None]:
def sortTriples(triple):
    if(person.id2index[triple[0]] > person.id2index[triple[1]]):
        return -triple[2], person.id2index[triple[1]], person.id2index[triple[0]]
    return -triple[2], person.id2index[triple[0]], person.id2index[triple[1]]

In [None]:
def query3_opt(k,h,p):
    maskMtx = RelevantPeopleInPlaceMatrix(p)
    for i in range(h,h+1): maskMtx = maskMtx.eadd(powerMatrix(i,maskMtx))
    
    resultMatrix = hasInterest.mxm(hasInterest.transpose(),mask=maskMtx)
    resultMatrix = resultMatrix.triu().offdiag()
    import heapq
    result = heapq.nsmallest(k,resultMatrix,key=sortTriples)
    for res in result:
        if(person.id2index[res[0]] > person.id2index[res[1]]):
            print('{}|{}|{}'.format(person.id2index[res[1]], person.id2index[res[0]],res[2]))
        else:
            print('{}|{}|{}'.format(person.id2index[res[0]], person.id2index[res[1]],res[2]))

In [None]:
start = timer()
query3_opt(3,2,'India')
end = timer()
print(end -start)