In [None]:
from pygraphblas import *
from _pygraphblas import lib
import pygraphblas.descriptor
import csv
import sys
import logging
import glob
import os
import operator
import sys
sys.path.append("..")
from loader.data_loader import DataLoader
from timeit import default_timer as timer

In [None]:
# Setup logger
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)-5s %(message)s'))
log = logging.getLogger(__name__)
log.propagate = False
log.addHandler(handler)
log.setLevel(logging.INFO)

In [None]:
data_dir = '../../csvs/o1k/'
data_format = 'csv'

In [None]:
loader = DataLoader(data_dir, data_format)

person = loader.load_vertex('person')
place = loader.load_vertex('place')
organisation = loader.load_vertex('organisation')
tag = loader.load_vertex('tag')

placeNames = loader.load_extra_columns('place',['name'])

isPartOf = loader.load_edge('isPartOf', place, place)
personIsLocatedIn = loader.load_edge('isLocatedIn', person, place)
organisationIsLocatedIn = loader.load_edge('isLocatedIn', organisation, place)
workAt = loader.load_edge('workAt', person, organisation)
studyAt = loader.load_edge('studyAt', person, organisation)
knows = loader.load_edge('knows', person, person)
hasInterest = loader.load_edge('hasInterest', person, tag)

#vertices, mappings, matrices = loader.load_all_csvs()

In [None]:
def RelevantPeopleInPlace(placeName):
    placeID = placeNames.index(placeName)
    #Relevant places
    isPartOfTransposed = isPartOf.transpose()
    placeVector = Vector.from_type(BOOL,isPartOfTransposed.nrows)
    placeVector[placeID] = True
    relevantPlacesVector = placeVector + placeVector.vxm(isPartOfTransposed) + placeVector.vxm(isPartOfTransposed).vxm(isPartOfTransposed)
    #People located in the given place
    peopleInThePlaceVector = personIsLocatedIn.mxv(relevantPlacesVector)
    #People working at a Company or studying at a University located in the given place
    organisationsVector = organisationIsLocatedIn.mxv(relevantPlacesVector)
    with semiring.LOR_LAND_BOOL:
        peopleWorkAtVector = workAt.mxv(organisationsVector)
        peopleStudyAtVector = studyAt.mxv(organisationsVector) 
    #All the relevant people in the given place    
    with binaryop.PLUS_BOOL:    
        relevantPeopleVector = peopleWorkAtVector + peopleStudyAtVector + peopleInThePlaceVector
    return relevantPeopleVector

In [None]:
def HHopKnows(h,vec):
    if(h < 2) : return vec.vxm(knows)
    mtx = knows
    while(h-1 > 0):
        mtx = mtx.mxm(knows)
        h -= 1
    return(vec.vxm(mtx) + HHopKnows(h-1,vec))

In [None]:
def query3(k,h,p):
    relevantPeopleVector = RelevantPeopleInPlace(p)

    #Maping the id-s back, sorting, then remapping
    mapedPersonIds = relevantPeopleVector.to_lists()[0]
    mapedPersonIds[:] = [person.id2index[id] for id in mapedPersonIds]
    mapedPersonIds.sort()
    remapedSortedPersonIds = mapedPersonIds
    remapedSortedPersonIds[:] = [person.index2id[id] for id in mapedPersonIds]

    #A list of maped id-s, sorted by the original id-s
    personList = list(zip(remapedSortedPersonIds,relevantPeopleVector.to_lists()[1]))
    resultList = []

    for _person in personList:
        personVector = Vector.from_type(BOOL,relevantPeopleVector.size)
        personVector[_person[0]] = True
        
        personKnowsHHopVector = HHopKnows(h,personVector)
        personKnowsHHopVector *= relevantPeopleVector     #only the relevant people are needed
        
        personHasInterestVector = personVector.vxm(hasInterest)
        numTagsVector = Vector.from_type(INT64,relevantPeopleVector.size)    

        for tag in personHasInterestVector:
            tagVector = Vector.from_type(BOOL,personHasInterestVector.size)
            tagVector[tag[0]] = True
            peopleInterestedInTagVector = tagVector.vxm(hasInterest.transpose())
            numTagsVector += peopleInterestedInTagVector
        
        numTagsVector[_person[0]] = 0    #p1|p1 should be 0
        numTagsVector *= personKnowsHHopVector

        personID = person.id2index[_person[0]]
        for value in numTagsVector:
            valueID = person.id2index[value[0]]
            if(value[1] != 0):
                for element in resultList:
                    if(personID == element[1] and valueID == element[0]):
                        break
                else:
                    resultList += [[personID,valueID,value[1]]]

    resultList.sort(key = operator.itemgetter(1))  
    resultList.sort(key = operator.itemgetter(0))
    resultList.sort(key = operator.itemgetter(2), reverse = True)

    for element in resultList[:k]:
        print('{}|{}|{}'.format(element[0],element[1],element[2]))

In [None]:
#India.id = 38
start = timer()
query3(3,2,'India')
end = timer()
print(end -start)

In [None]:
def RelevantPeopleInPlaceMatrix(placeName):
    placeID = placeNames.index(placeName)
    #Relevant places
    isPartOfTransposed = isPartOf.transpose()
    placeVector = Vector.from_type(BOOL,isPartOfTransposed.nrows)
    placeVector[placeID] = True
    relevantPlacesVector = placeVector + placeVector.vxm(isPartOfTransposed) + placeVector.vxm(isPartOfTransposed).vxm(isPartOfTransposed)
    #People located in the given place
    peopleInThePlaceVector = personIsLocatedIn.mxv(relevantPlacesVector)
    #People working at a Company or studying at a University located in the given place
    organisationsVector = organisationIsLocatedIn.mxv(relevantPlacesVector)
    with semiring.LOR_LAND_BOOL:
        peopleWorkAtVector = workAt.mxv(organisationsVector)
        peopleStudyAtVector = studyAt.mxv(organisationsVector) 
    #All the relevant people in the given place    
    with binaryop.PLUS_BOOL:    
        relevantPeopleVector = peopleWorkAtVector + peopleStudyAtVector + peopleInThePlaceVector   
    
    #Creating a diagonal matrix from the people ids
    diagMtx = Matrix.from_lists(relevantPeopleVector.to_lists()[0],relevantPeopleVector.to_lists()[0],relevantPeopleVector.to_lists()[1], knows.nrows, knows.ncols)
    return diagMtx

In [None]:
def powerMatrix(p,mtx):
    if(p == 1): return mtx
    return mtx.mxm(powerMatrix(p-1,mtx))

In [None]:
def sortTriples(triple):
    if(person.id2index[triple[0]] > person.id2index[triple[1]]):
        return -triple[2], person.id2index[triple[1]], person.id2index[triple[0]]
    return -triple[2], person.id2index[triple[0]], person.id2index[triple[1]]

In [None]:
def query3_opt(k,h,p):
    diagMtx = RelevantPeopleInPlaceMatrix(p)
    maskMtx = knows
    for i in range(h,h+1): maskMtx = maskMtx.eadd(powerMatrix(i,maskMtx))
    #Selecting the relevant rows and columns by multiplying the mask matrix from the right then from the left with the diagonal matrix
    maskMtx = diagMtx.mxm(maskMtx).mxm(diagMtx).pattern()

    resultMatrix = hasInterest.mxm(hasInterest.transpose(),mask=maskMtx)
    resultMatrix = resultMatrix.triu().offdiag()
    import heapq
    result = heapq.nsmallest(k,resultMatrix,key=sortTriples)
    for res in result:
        if(person.id2index[res[0]] > person.id2index[res[1]]):
            print('{}|{}|{}'.format(person.id2index[res[1]], person.id2index[res[0]],res[2]))
        else:
            print('{}|{}|{}'.format(person.id2index[res[0]], person.id2index[res[1]],res[2]))
    
    if(len(result) < k):
        result_zeros = heapq.nsmallest(k,maskMtx.triu().offdiag(),key=sortTriples)
        remaining = k - len(result)
        while(remaining > 0):
            res = result_zeros.pop(0)
            if(resultMatrix.get(res[0],res[1]) == None):
                if(person.id2index[res[0]] > person.id2index[res[1]]):
                    print('{}|{}|{}'.format(person.id2index[res[1]], person.id2index[res[0]],0))
                else:
                    print('{}|{}|{}'.format(person.id2index[res[0]], person.id2index[res[1]],0))
                remaining -= 1

In [None]:
start = timer()
query3_opt(3,2,'India')
end = timer()
print(end -start)

In [None]:
print("\n\nBrazil")
query3_opt(4, 3, 'Brazil') #	29|31 29|38 29|39 29|59 % common interest counts 1 1 1 1
print("\n\nVietnam")
query3_opt(9, 8, 'Vietnam') #	404|978 404|979 404|980 404|983 404|984 404|985 404|987 404|990 404|992 % common interest counts 1 1 1 1 1 1 1 1 1
print("\n\nAustralia")
query3_opt(5, 4, 'Australia') #	8|16 8|17 8|18 8|19 8|163 % common interest counts 0 0 0 0 0
print('\n\nAsia')
query3_opt(3, 2, 'Asia')  #   361|812 174|280 280|812 % common interest counts 4 3 3
print('\n\nIndonesia')
query3_opt(4, 3, 'Indonesia') # 396|398 363|367 363|368 363|372 % common interest counts 2 1 1 1
print('\n\nEgypt')
query3_opt(3, 2, 'Egypt') # 110|116 106|110 106|112 % common interest counts 1 0 0
print('\n\nItaly')
query3_opt(3, 2, 'Italy') # 420|825 421|424 10|414 % common interest counts 1 1 0
print('\n\nChengdu')
query3_opt(5, 4, 'Chengdu') # 590|650 590|658 590|614 590|629 590|638 % common interest counts 1 1 0 0 0
print('\n\nPeru')
query3_opt(3, 2, 'Peru') # 65|766 65|767 65|863 % common interest counts 0 0 0
print('\n\nDemocratic_Republic_of_the_Congo')
query3_opt(3, 2, 'Democratic_Republic_of_the_Congo') # 99|100 99|101 99|102 % common interest counts 0 0 0
print('\n\nAnkara')
query3_opt(7, 6, 'Ankara') # 891|898 890|891 890|895 890|898 890|902 891|895 891|902 % common interest counts 1 0 0 0 0 0 0
print('\n\nLuoyang')
query3_opt(3, 2, 'Luoyang') # 565|625 653|726 565|653 % common interest counts 2 1 0
print('\n\nTaiwan')
query3_opt(4, 3, 'Taiwan') # 795|798 797|798 567|795 567|796 % common interest counts 1 1 0 0