In [1]:
import rdflib
import re
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON
from pyjarowinkler import distance

# Load Word Embeddings

In [2]:
import logging
from six import iteritems

In [3]:
%%time
glove_vectors_file = "glove.6B.300d.txt"
glove_wordmap = {}
with open(glove_vectors_file, "r", encoding="utf-8") as glove:
    for line in glove:
        name, vector = tuple(line.split(" ", 1))
        glove_wordmap[name] = np.fromstring(vector, sep=" ")

Wall time: 50.5 s


In [4]:
#sparql = SPARQLWrapper("http://134.117.101.79:8890/sparql/")
sparql = SPARQLWrapper("http://dbpedia.org/sparql")

In [5]:
def evaluate_similarity_score(wemb, ar):
    res = []
    for x in ar:
        if x[0] in wemb and x[1] in wemb:
            vector1 = wemb[x[0]]
            vector2 = wemb[x[1]]  
            res.append(np.dot(vector1,vector2)/(np.linalg.norm(vector1)*(np.linalg.norm(vector2))))
        else:
            res.append(-1)
    return res

# isEntityInDatabase Method

In [6]:
def isEntityInDatabase(s):
    return s.startswith("http")

In [7]:
def countOutgoingEdges(obj):
    queryString = "SELECT count(DISTINCT ?s) as ?c WHERE { ?s ?p <" + obj +">}"
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    #print(queryString)
    for result in results["results"]["bindings"]:
        return int(result['c']['value'])
countOutgoingEdges("http://dbpedia.org/resource/Viking_Press")

1019

# MakeQueryString Method

In [8]:
def makeQueryString(obj, filterStringsPredicates, filterStringSubjects, literals):
    if obj in literals:
        result = "SELECT distinct ?s ?p WHERE { " + "{?s ?p " + obj + " . " + "} FILTER ("
        for s in filterStringsPredicates:
            fsp = "!regex(str(?p), '" + s + "' , 'i') && "
            result += fsp
        for s in filterStringSubjects:
            fss = "!regex(str(?s), '" + s + "' , 'i') && "
            result += fss
        result = result[:result.rindex("&&")] + ")}"
    else:
        result = "SELECT distinct ?s ?p ?j ?k WHERE { "
        incomingQuery = "{?s ?p <" + obj + "> . FILTER ("
        outgoingQuery =  "{<" + obj + "> ?j ?k. FILTER ("
        
        for s in filterStringsPredicates:
            fsp = "!regex(str(?p), '" + s + "' , 'i') && "
            incomingQuery += fsp
            fsj = "!regex(str(?j), '" + s + "' , 'i') && "
            outgoingQuery += fsj
            
        for s in filterStringSubjects:
            fss = "!regex(str(?s), '" + s + "' , 'i') && "
            fss2 = "!regex(str(?k), '" + s + "' , 'i') && "
            incomingQuery += fss
            outgoingQuery += fss2
        
        incomingQuery = incomingQuery[:incomingQuery.rindex("&&")] + ")}"
        outgoingQuery = outgoingQuery[:outgoingQuery.rindex("&&")] + ")}"
        result = result + incomingQuery + " UNION " + outgoingQuery + "}"
    return result

In [9]:
def makeQueryStringWithThreshold(obj, filterStringsPredicates, filterStringSubjects, literals):
    th = 10000
    if obj in literals:
        result = "SELECT distinct ?s ?p WHERE { " + "{?s ?p " + obj + " . " + "} FILTER ("
        for s in filterStringsPredicates:
            fsp = "!regex(str(?p), '" + s + "' , 'i') && "
            result += fsp
        for s in filterStringSubjects:
            fss = "!regex(str(?s), '" + s + "' , 'i') && "
            result += fss
        result = result[:result.rindex("&&")] + ")}"
    else:
        count = countOutgoingEdges(obj)
        if count < th:
            result = "SELECT distinct ?s ?p ?j ?k WHERE { "
            incomingQuery = "{?s ?p <" + obj + "> . FILTER ("
            outgoingQuery =  "{<" + obj + "> ?j ?k. FILTER ("

            for s in filterStringsPredicates:
                fsp = "!regex(str(?p), '" + s + "' , 'i') && "
                incomingQuery += fsp
                fsj = "!regex(str(?j), '" + s + "' , 'i') && "
                outgoingQuery += fsj

            for s in filterStringSubjects:
                fss = "!regex(str(?s), '" + s + "' , 'i') && "
                fss2 = "!regex(str(?k), '" + s + "' , 'i') && "
                incomingQuery += fss
                outgoingQuery += fss2

            incomingQuery = incomingQuery[:incomingQuery.rindex("&&")] + ")}"
            outgoingQuery = outgoingQuery[:outgoingQuery.rindex("&&")] + ")}"
            result = result + incomingQuery + " UNION " + outgoingQuery + "}"
        else:
            result = "SELECT distinct ?j ?k WHERE { "
            incomingQuery = "{?s ?p <" + obj + "> . FILTER ("
            outgoingQuery =  "{<" + obj + "> ?j ?k. FILTER ("

            for s in filterStringsPredicates:
                fsp = "!regex(str(?p), '" + s + "' , 'i') && "
                incomingQuery += fsp
                fsj = "!regex(str(?j), '" + s + "' , 'i') && "
                outgoingQuery += fsj

            for s in filterStringSubjects:
                fss = "!regex(str(?s), '" + s + "' , 'i') && "
                fss2 = "!regex(str(?k), '" + s + "' , 'i') && "
                incomingQuery += fss
                outgoingQuery += fss2

            incomingQuery = incomingQuery[:incomingQuery.rindex("&&")] + ")}"
            outgoingQuery = outgoingQuery[:outgoingQuery.rindex("&&")] + ")}"
            result = result + " " + outgoingQuery + "}"
    return result

# Triple Class 

In [10]:
class Triple:
    def __init__(self):
        self.subject = None
        self.object = None
        self.predicate = None
        self.cost = None
        self.previousTriple = None
        #the list of connecting seeds
        self.seeds = []
    
    def __str__(self):
        if(self.subject is not None and self.predicate is not None and self.object is not None):
            # print connecting seeds
            connectSeeds = "["
            for seed in self.seeds:
                connectSeeds += seed + ", "
            connectSeeds = connectSeeds[:connectSeeds.rindex(',')] + "]"
            return "{0}  --  {1}  --  {2}  -- {3}  ({4:.2f})".format(self.subject, self.predicate, self.object, connectSeeds, abs(self.cost))
        else:
            return "Not Well Defined Triple"
        
    def __eq__(self,other):
        if(self.subject == other.getSubject() and self.object == other.getObject() and self.predicate == other.getPredicate()):
            return True
        else:
            return False
        
    def __gt__(self,other):
        if self.cost > other.getCost():
            return True
        else:
            return False
    
    def setSeeds(self, fs):
        if fs not in self.seeds:
            self.seeds.append(fs)
            self.seeds.sort()
            
    def getSeeds(self):
        return self.seeds
    
    def setPreviousTriple(self, pt):
        self.previousTriple = pt
        
    def getPreviousTriple(self):
        return self.previousTriple
    
    def updateSeeds(self, newSeeds):
        for seed in newSeeds:
            self.setSeeds(seed)
            
    def setSubject(self, sub):
        self.subject = sub
    def setObject(self, obj):
        self.object = obj
    def setPredicate(self, pre):
        self.predicate = pre
    def setCost(self, c):
        self.cost = c
    def getCost(self):
        return self.cost
    def getSubject(self):
        return self.subject
    def getObject(self):
        return self.object
    def getPredicate(self):
        return self.predicate

# Read most frequent predicates

In [11]:
%%time
frequentPredicates = {}
with open("FreqP.txt", "r") as freqP:
    for line in freqP:
        li = line.split("\t")
        name = li[0][li[0].rindex('/')+1:]
        frequency = int(li[1])
        frequentPredicates[name] = int(frequency)

Wall time: 125 ms


# Expand Method

In [12]:
def expandWithThreshold(queryTriples, eg, di, predicatesToMatch, wemb):
    newQueryTriples = []
    matchingTriples = []
    
    tmp = []
    print("queryTriple size : " + str(len(queryTriples)))
    for queryObject in queryTriples:
        queryString = makeQueryStringWithThreshold(queryObject.getObject(), filterStringPredicates,filterStringSubjects, literalsToConnect)
        print("----- Current Query ------")
        print(queryString)
        print("--------------------------")
        
        sparql.setQuery(queryString)
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()

        for result in results["results"]["bindings"]:
            
            if 's' in result:
                newTriple = Triple()
                newTriple.setSubject(queryObject.getObject())
                newTriple.setPredicate(result["p"]["value"])
                newTriple.setObject(result["s"]["value"])
                newTriple.setPreviousTriple(queryObject)
                    
                for seed in queryObject.getSeeds():
                    newTriple.setSeeds(seed)
                    
                if newTriple not in expandedGraph:
                        expandedGraph.append(newTriple)
                
                #Apply word embedding for costs and modify matchingTriples
                if newTriple not in matchingTriples:
                    matchingTriples.append(newTriple)                       
                
                if(isEntityInDatabase(result["s"]["value"])):                                   
                    if newTriple.getObject() not in tmp:
                        newQueryTriples.append(newTriple)
                    
                    #Check if it's in tmp, if yes, add triple into duplicatedItems
                    if newTriple.getObject() in tmp:
                        #print("add duplicated Items: " + newTriple.getObject())
                        addDuplicatedItems(newTriple, eg, di)
                    else:
                        tmp.append(newTriple.getObject())
                        
            else:
                                    
                newTriple = Triple()
                newTriple.setSubject(queryObject.getObject())
                newTriple.setPredicate(result["j"]["value"])                
                newTriple.setObject(result["k"]["value"])
                newTriple.setPreviousTriple(queryObject)

                for seed in queryObject.getSeeds():
                    newTriple.setSeeds(seed)

                #Apply word embedding for costs and modify matchingTriples
                if newTriple not in matchingTriples:
                    matchingTriples.append(newTriple)

                if newTriple not in expandedGraph:
                    expandedGraph.append(newTriple)
                    
                if(isEntityInDatabase(result["k"]["value"])):
                    #print(result["k"]["value"] + "   ---   " + result["j"]["value"])

                    if newTriple.getObject() not in tmp:
                        newQueryTriples.append(newTriple)
                        
                    #Check if it's in tmp, if yes, add triple into duplicatedItems
                    if newTriple.getObject() in tmp:
                        #print("add duplicated Items: " + newTriple.getObject())
                        addDuplicatedItems(newTriple, eg, di)
                    else:
                        tmp.append(newTriple.getObject())
                        
        #computeCostsBaseline(predicatesToMatch, matchingTriples, wemb)
        computeCosts(predicatesToMatch, matchingTriples, wemb)
    return newQueryTriples, matchingTriples

In [13]:
def expand(queryTriples, eg, di, predicatesToMatch, wemb):
    newQueryTriples = []
    matchingTriples = []
    
    tmp = []
    print("queryTriple size : " + str(len(queryTriples)))
    for queryObject in queryTriples:
        queryString = makeQueryString(queryObject.getObject(), filterStringPredicates,filterStringSubjects, literalsToConnect)
        print("----- Current Query ------")
        print(queryString)
        print("--------------------------")
        
        sparql.setQuery(queryString)
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()

        for result in results["results"]["bindings"]:
            
            if 's' in result:
                newTriple = Triple()
                newTriple.setSubject(queryObject.getObject())
                newTriple.setPredicate(result["p"]["value"])
                newTriple.setObject(result["s"]["value"])
                newTriple.setPreviousTriple(queryObject)
                    
                for seed in queryObject.getSeeds():
                    newTriple.setSeeds(seed)
                    
                if newTriple not in expandedGraph:
                        expandedGraph.append(newTriple)
                
                #Apply word embedding for costs and modify matchingTriples
                if newTriple not in matchingTriples:
                    matchingTriples.append(newTriple)                       
                
                if(isEntityInDatabase(result["s"]["value"])):                                   
                    if newTriple.getObject() not in tmp:
                        newQueryTriples.append(newTriple)
                    
                    #Check if it's in tmp, if yes, add triple into duplicatedItems
                    if newTriple.getObject() in tmp:
                        #print("add duplicated Items: " + newTriple.getObject())
                        addDuplicatedItems(newTriple, eg, di)
                    else:
                        tmp.append(newTriple.getObject())
                        
            else:
                                    
                newTriple = Triple()
                newTriple.setSubject(queryObject.getObject())
                newTriple.setPredicate(result["j"]["value"])                
                newTriple.setObject(result["k"]["value"])
                newTriple.setPreviousTriple(queryObject)

                for seed in queryObject.getSeeds():
                    newTriple.setSeeds(seed)

                #Apply word embedding for costs and modify matchingTriples
                if newTriple not in matchingTriples:
                    matchingTriples.append(newTriple)

                if newTriple not in expandedGraph:
                    expandedGraph.append(newTriple)
                    
                if(isEntityInDatabase(result["k"]["value"])):
                    #print(result["k"]["value"] + "   ---   " + result["j"]["value"])

                    if newTriple.getObject() not in tmp:
                        newQueryTriples.append(newTriple)
                        
                    #Check if it's in tmp, if yes, add triple into duplicatedItems
                    if newTriple.getObject() in tmp:
                        #print("add duplicated Items: " + newTriple.getObject())
                        addDuplicatedItems(newTriple, eg, di)
                    else:
                        tmp.append(newTriple.getObject())
                        
        computeCostsBaseline(predicatesToMatch, matchingTriples, wemb)
    return newQueryTriples, matchingTriples

# addDuplicatedItems Method

In [14]:
def addDuplicatedItems(ntp, eg, di):
    for tp in eg:
        if tp.getObject() == ntp.getObject() and tp not in di:
            di.append(tp)
            if tp.getSeeds() != ntp.getSeeds():
                updateSeedsInExpandedGraph(tp,ntp,eg)           
    #di.append(ntp)
    return "Dup"

In [15]:
def updateSeedsInExpandedGraph(tp,ntp,eg):
    old1 = tp.getSeeds()[:]
    old2 = ntp.getSeeds()[:]
    ntp.updateSeeds(tp.getSeeds())
    newS = ntp.getSeeds()
    
    for triple in eg:
        if triple.getSeeds() == old1 or triple.getSeeds() == old2:
            triple.updateSeeds(newS)

# Reduction Tests

In [16]:
def reductionTestsDegreeOne(eg):
    
    degreeOneNodes = []
    
    for tp1 in eg:
        duplicatedObject = False
        isLeafNode = True
        for tp2 in eg:
            if tp2.getPreviousTriple() == tp1:
                isLeafNode = False
            if tp1.getObject() == tp2.getObject() and not tp1.getSubject() == tp2.getSubject():
                duplicatedObject = True
                break
        if not duplicatedObject and isLeafNode:
            degreeOneNodes.append(tp1)
    print("Degree One Nodes size: " + str(len(degreeOneNodes)))
    for tp in degreeOneNodes:
        eg.remove(tp)

#TODO: Fix Bug
def keepMinEdge(eg):
    
    triplesToBeRemoved = []
    
    for tp1 in eg:
        minTriple = tp1
        for tp2 in eg:
            if tp1.getSubject() == tp2.getSubject() and tp1.getObject() == tp2.getObject and not tp1.getPredicate() == tp2.getPredicate():
                if tp2.getCost() < tp1.getCost():
                    minTriple = tp2
                    if tp1 not in triplesToBeRemoved:
                        triplesToBeRemoved.append(tp1)
                else:
                    if tp2 not in triplesToBeRemoved:
                        triplesToBeRemoved.append(tp2)
    for tp in triplesToBeRemoved:
        eg.remove(tp)
    

# checkConnection Method

In [17]:
def checkConnection(ltc, di):
    if len(ltc) == 1:
        return False
    else:
        ltc.sort()
        for tp in di:
            if tp.getSeeds() == ltc:
                return True
        return False

# ComputeCosts Method

In [18]:
def computeCosts(predicatesToMatch, matchingTriples, wemb):
    th = 50
    predicateList = []
    
    for tp in matchingTriples:
        predicate = tp.getPredicate()
        predicateList.append(predicate[predicate.rindex('/')+1:])
        
    for y in predicatesToMatch:
        ar = []
        for p in predicateList:
            """
            # Remove StopWords from predicates
            newp = re.sub( r"([A-Z])", r" \1", p).split()
            i = 0
            while i < len(newp):
                if newp[i].lower() in swl:
                    newp.pop(i)
                else:
                    i = i + 1
            newp = "".join(newp)
            ar.append([y, newp])
            """
            ar.append([y,p])
        #ar = np.array(ar)
        
        result = evaluate_similarity_score(wemb, ar)
        #print("size of result :" + str(len(result)) + '  ----  size of matching Triples : ' + str(len(matchingTriples)))
        for x in range(len(result)):
            #when two comparing predicates are both not in the word embedding, then using JW distance instead
            #TODO: What if one of them is in word embedding?
            if result[x] == -1:
                #print(y + "  ---  " + predicateList[x])
                result[x] = dw(y, predicateList[x])
            elif (1 - result[x]) * 100 > th:
                result[x] = dw(y, predicateList[x])
                
            if matchingTriples[x].getCost() is None:
                matchingTriples[x].setCost((1 - result[x]) * 100)
                print(str(matchingTriples[x]) + ' set by ' + y)
            elif matchingTriples[x].getCost() > (1 - result[x]) * 100:
                matchingTriples[x].setCost((1 - result[x]) * 100)
                print(str(matchingTriples[x]) + ' set by ' + y)
    
    index = 0
    
    while index < len(matchingTriples):
        if matchingTriples[index].getCost() > th:
            matchingTriples.pop(index)
        else:
            index += 1

# Baseline 1: Using JW distance only

In [19]:
def computeCostsBaseline(predicatesToMatch, matchingTriples, wemb):

    th = 40
    predicateList = []
    
    for tp in matchingTriples:
        predicate = tp.getPredicate()
        predicateList.append(predicate[predicate.rindex('/')+1:])
        
    for y in predicatesToMatch:
        ar = []
        for p in predicateList:
            """
            # Remove StopWords from predicates
            newp = re.sub( r"([A-Z])", r" \1", p).split()
            i = 0
            while i < len(newp):
                if newp[i].lower() in swl:
                    newp.pop(i)
                else:
                    i = i + 1
            newp = "".join(newp)
            #print(newp + " -- > after swl")
            ar.append([y, newp])
            """
            ar.append([y,p]) # not using stopwords
        for x in range(len(ar)):
            #print("predicates pair: " + ar[x][0] + "  ---  " + ar[x][1])
            if len(ar[x][0]) == 0 or len(ar[x][1]) == 0:
                result = 0
            else:
                result = dw(ar[x][0], ar[x][1])
                
            if matchingTriples[x].getCost() is None:
                matchingTriples[x].setCost((1 - result) * 100)
            elif matchingTriples[x].getCost() > (1 - result) * 100:
                matchingTriples[x].setCost((1 - result) * 100)
    
    index = 0
    
    while index < len(matchingTriples):
        if matchingTriples[index].getCost() > th:
            matchingTriples.pop(index)
        else:
            index += 1

# Identify Entities and predicates in keywords
###### 1. check if the lowercased literal is a property/ontology in dataset. If yes, then it is a predicate.
###### 2. check if the convertFirstToCapital literal is a property in dataset. If yes, then it is a predicate.
###### 3. check if the convertFirstToCapital literal is a resource in dataset. If yes, then it is an entity
###### 4. Treat every literal as a predicate.

In [20]:
def isResourceInDataset3(literal):
    jw_th = 0.7
    
    queryString = 'select distinct ?s where {?s ?p "'+ literal.lower() + '"@en}'
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    for result in results["results"]["bindings"]:
        #print(result["s"]["value"])
        sub = result["s"]["value"]
        if 'property' in sub:
            if dw(sub[sub.rindex('/')+1:],literal) > jw_th:
                return False
    
    literal = convertFirstToCapital(literal)

    queryString = 'select distinct ?s where {?s ?p "'+ literal + '"@en}'
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    propertyInSub = False
    resourceInSub = False
    for result in results["results"]["bindings"]:
        #print(result["s"]["value"])
        sub = result["s"]["value"]
        try:
            p = dw(sub[sub.rindex('/') +1:],literal)
        except:
            print("Substring not found, treat as a property")
            return False
        
        if 'property' in sub:
            if  p > jw_th:
                propertyInSub = True
        elif 'resource' in sub:
            if p > jw_th:
                resourceInSub = True
                
    if propertyInSub:
        return False
    elif resourceInSub:
        return True
    else:
        return False

# Test version
## Using only resource as entity

In [21]:
def isResourceInDataset2(literal):
    jw_th = 0.7
    
    #Original keyword:
    queryString = 'select distinct ?s where {?s ?p "'+ literal + '"@en}'
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    for result in results["results"]["bindings"]:
        #print(result["s"]["value"])
        sub = result["s"]["value"]
        #print(sub)
        try:
            p = dw(sub[sub.rindex('/') +1:],literal)
        except:
            print("Find a record with no / in lowercased")
            
        if 'resource' in sub and p > jw_th:
            return True
        
    #Lowercase:
    queryString = 'select distinct ?s where {?s ?p "'+ literal.lower() + '"@en}'
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    for result in results["results"]["bindings"]:
        #print(result["s"]["value"])
        sub = result["s"]["value"]
        #print(sub)
        try:
            p = dw(sub[sub.rindex('/') +1:],literal)
        except:
            print("Find a record with no / in lowercased")
            
        if 'resource' in sub and p > jw_th:
            return True
    
    #First letter capitalized
    literal = convertFirstToCapital(literal)
    
    queryString = 'select distinct ?s where {?s ?p "'+ literal + '"@en}'
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    for result in results["results"]["bindings"]:
        #print(result["s"]["value"])
        sub = result["s"]["value"]
        #print(sub+ " cftc")
        try:
            p = dw(sub[sub.rindex('/') +1:],literal)
        except:
            print("Find a record with no / in CFTC")
        
        if 'resource' in sub and p > jw_th:
            return True
                
    return False

# Test Version, Using Count(*)
## Assumption: if a keyword is a property, then it is not an entity

In [22]:
def isPredicateInDataset(literal):
    count_th = 100
    
    print("Current literal: " + literal)
    #Test for property
    queryString = 'select distinct Count(?ss) as ?c where {?s ?p "'+ literal + '"@en. ?ss ?s ?j}'
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    for result in results["results"]["bindings"]:
        res = int(result["c"]["value"])
        print(str(res) + " in property")
        if res > count_th:
            return 1
            #print("True")
        
    #Test for entity
    queryString = 'select distinct Count(?ss) as ?c where {{?s ?p "'+ literal + '"@en. ?ss ?j ?s} UNION {?s ?p "'+ literal + '"@en. ?s ?j ?ss}}'
    sparql.setQuery(queryString)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    
    for result in results["results"]["bindings"]:
        res = int(result["c"]["value"])
        print(str(res) + " in sub/obj")
        if res > count_th:
            return 0

    return -1
    

In [23]:
def indentifyEntitiesAndPredicates(keywords):
    entity = []
    pre = []
    
    
    li = keywords.split(',')
    
    for x in range(len(li)):
        li[x] = li[x].strip()
        
        pre.append(li[x])
        
        toIdentify = set()
        toIdentify.add(li[x])
        toIdentify.add(li[x].lower())
        toIdentify.add(convertFirstToCapital(li[x]))
        
        print("to identify-------------")
        for x in toIdentify:
            print(x)
        print("to identify-------------")
        
        possibleEnt = []
        for x in toIdentify:
            res = isPredicateInDataset(x)
            if  res == 1:
                possibleEnt.clear()
                break
            elif res == 0:
                possibleEnt.append(x)
        
        for x in possibleEnt:
            entity.append(x)
        
    for x in range(len(entity)):
        entity[x] = '"' + entity[x] + '"@en'
    return entity,pre

In [24]:
def indentifyEntitiesAndPredicates2(keywords):
    entity = []
    pre = []
    
    li = keywords.split(',')
    
    for x in range(len(li)):
        li[x] = li[x].strip()
        #print("current keyword: " + li[x])
        
        # we treat every keyword as a predicate
        if isPredicateInDataset(li[x]):
            pre.append(li[x])
            continue
        elif not isPredicateInDataset(li[x]):
            entity.append(li[x])
            pre.append(li[x])
        elif isPredicateInDataset(li[x].lower()):
            pre.append(li[x].lower())
            continue
        elif not isPredicateInDataset(li[x].lower()):
            entity.append(li[x].lower())
            pre.append(li[x].lower())
        elif isPredicateInDataset(convertFirstToCapital(li[x])):
            pre.append(convertFirstToCapital(li[x]))
            continue
        elif not isPredicateInDataset(convertFirstToCapital(li[x])):
            entity.append(convertFirstToCapital(li[x]))
            pre.append(convertFirstToCapital(li[x]))
        
    for x in range(len(entity)):
        entity[x] = '"' + entity[x] + '"@en'
    return entity,pre


# Jaro_Winkler Distance

In [25]:
def dw(string1, string2):
    return distance.get_jaro_distance(string1, string2, winkler=True, scaling=0.1)

In [26]:
"""
def dw(string1, string2):
    jaro = jaro_distance(string1, string2)
    prefix = 0
    if len(string1) > len(string2):
        string2, string1 = string1, string2
        
    for index, char in enumerate(string1[:4]):
        #print(string1 + " === " + string2)
        if char == string2[index]:
            #print(char)
            prefix = prefix + 1
        else:
            break

    if (jaro > 0.7):
        return jaro + ((prefix * 0.1) * (1 - jaro))
    else:
        return jaro

def jaro_distance(string1, string2):

    if len(string1) < len(string2):
        longerString = string2
        shorterString = string1
    else:
        longerString = string1
        shorterString = string2
    
    # Should be rounded down
    allowedRange = (len(longerString) // 2) - 1
    mappingIndices = [-1] * len(shorterString)
    shortMached = []
    longMatched = []
    matches = 0

    for index, char in enumerate(shorterString):
        for secondIndex in range(max(0, index - allowedRange), min(len(longerString), index + allowedRange + 1)):
            if char == longerString[secondIndex]:
                matches = matches + 1
                mappingIndices[index] = secondIndex
                shortMached.append(char)
                longMatched.insert(secondIndex, char)
                break

    halfTranspositions = 0
    for naturalIndex in range(0, len(shortMached)):
        if (mappingIndices[naturalIndex] != naturalIndex)  & (shortMached[naturalIndex] != longMatched[naturalIndex]):
            halfTranspositions = halfTranspositions + 1
    
    if matches == 0:
        return 0
    
    return ((matches / len(longerString)) + (matches / len(shorterString)) + ((matches - (halfTranspositions // 2))/matches)) / 3
"""

'\ndef dw(string1, string2):\n    jaro = jaro_distance(string1, string2)\n    prefix = 0\n    if len(string1) > len(string2):\n        string2, string1 = string1, string2\n        \n    for index, char in enumerate(string1[:4]):\n        #print(string1 + " === " + string2)\n        if char == string2[index]:\n            #print(char)\n            prefix = prefix + 1\n        else:\n            break\n\n    if (jaro > 0.7):\n        return jaro + ((prefix * 0.1) * (1 - jaro))\n    else:\n        return jaro\n\ndef jaro_distance(string1, string2):\n\n    if len(string1) < len(string2):\n        longerString = string2\n        shorterString = string1\n    else:\n        longerString = string1\n        shorterString = string2\n    \n    # Should be rounded down\n    allowedRange = (len(longerString) // 2) - 1\n    mappingIndices = [-1] * len(shorterString)\n    shortMached = []\n    longMatched = []\n    matches = 0\n\n    for index, char in enumerate(shorterString):\n        for secondInd

In [27]:
"""
def make1shorter(word1, word2):
    if len(word1) > len(word2):
        # Make sure word1 is shorter
        temp = word1
        word1 = word2
        word2 = temp

    return word1, word2

def dj(word1, word2):
    if len(word1) == 0 or len(word2) == 0:
        raise Exception("Not words, mate")

    word1, word2 = make1shorter(word1, word2)

    word2chars = list(word2)
    m = 0
    for char in word1:
        if char in word2chars:
            m += 1
            word2chars.pop(word2chars.index(char))

    t = 0
    for i in range(len(word1)):
        if word1[i] != word2[i]:
            t += 1

    if m == 0:
        return 0
    #return 1/3*(m/len(word1) + m/len(word2) + (m - t / 2)/m)
    return 1/3*(m/len(word1) + m/len(word2) + (m - t)/m)

def dw(word1, word2, p=0.1, lmax=4):
    word1, word2 = make1shorter(word1, word2)

    dj_ = dj(word1, word2)

    l = 0
    for i in range(min(len(word1), lmax)):
        if word1[i] == word2[i]:
            l += 1
        else:
            break

    return dj_ + l * p * (1 - dj_)
"""

'\ndef make1shorter(word1, word2):\n    if len(word1) > len(word2):\n        # Make sure word1 is shorter\n        temp = word1\n        word1 = word2\n        word2 = temp\n\n    return word1, word2\n\ndef dj(word1, word2):\n    if len(word1) == 0 or len(word2) == 0:\n        raise Exception("Not words, mate")\n\n    word1, word2 = make1shorter(word1, word2)\n\n    word2chars = list(word2)\n    m = 0\n    for char in word1:\n        if char in word2chars:\n            m += 1\n            word2chars.pop(word2chars.index(char))\n\n    t = 0\n    for i in range(len(word1)):\n        if word1[i] != word2[i]:\n            t += 1\n\n    if m == 0:\n        return 0\n    #return 1/3*(m/len(word1) + m/len(word2) + (m - t / 2)/m)\n    return 1/3*(m/len(word1) + m/len(word2) + (m - t)/m)\n\ndef dw(word1, word2, p=0.1, lmax=4):\n    word1, word2 = make1shorter(word1, word2)\n\n    dj_ = dj(word1, word2)\n\n    l = 0\n    for i in range(min(len(word1), lmax)):\n        if word1[i] == word2[i]:\n 

# Convert first letter in literals to capital case and lower case for the rest

In [28]:
def convertFirstToCapital(s):
    res = s.split()
    for x in range(len(res)):
        res[x] = res[x][0].upper() + res[x][1:].lower()
    res = " ".join(res)
    return res

# Main Program

In [None]:
%%time

#get stop words list
swl = []
with open('StopWords.txt','r') as sw:
    swlines = sw.readlines()
    for x in swlines:
        swl.append(x.strip())

with open('qald5-3.txt','r',encoding='utf-8') as f:
    lines = f.readlines()

with open('test_res3.txt','w',encoding='utf-8') as fw:
    for line in lines:
        filterStringPredicates = ["wikiPageWikiLink","wikiPageRedirects","wikiPageDisambiguates", "Thing","wikiPageUsesTemplate","rdf-syntax-ns#type"]
        filterStringSubjects = ["entity", "Category", "wikidata","owl#Thing", "http://wikidata.dbpedia.org/resource/Q"]
        expandedGraph = []
        duplicatedItems = []

        literalsToConnect, predicates = indentifyEntitiesAndPredicates(line.strip())
        fw.write("-----Question------\n")
        fw.write(line + '\n')
        fw.write("-------------------\n")
        qtps = []
        for literal in literalsToConnect:
            triple = Triple()
            triple.setObject(literal)
            triple.setSeeds(literal)
            qtps.append(triple)

        fw.write('--entities--\n')
        print('---- entities ----')
        for tp in qtps:
            fw.write(str(tp.getObject()) + '\n')
            print(tp.getObject())
        print(predicates)
        # Test Purpose
        #"""
        levelOfExpansion = 1
        while(levelOfExpansion < 3 and not checkConnection(literalsToConnect, duplicatedItems)):
            #add mtps to a new list
            qtps, mtps = expand(qtps, expandedGraph, duplicatedItems, predicates, glove_wordmap)
            levelOfExpansion += 1

        print("Matching Triples: -----   size: " + str(len(mtps)))
        mtps.sort()
        for tp in mtps:
            fw.write(str(tp) + '\n')
            print(tp)
        if checkConnection(literalsToConnect, duplicatedItems):
            reductionTestsDegreeOne(expandedGraph)
            fw.write("Size of expanded graph: "+ str(len(expandedGraph)) + '\n')
            for tp in expandedGraph:
                fw.write(str(tp) + '\n')
        
        #for x in expandedGraph:
        #    print(x)
        #        """

In [None]:
for x in expandedGraph:
    if 'capital' in x.getPredicate():
        print(x)

# Baseline using exact match for predicates

In [38]:
%%time

#get stop words list
swl = []
with open('StopWords.txt','r') as sw:
    swlines = sw.readlines()
    for x in swlines:
        swl.append(x.strip())

with open('./Test2/st2.txt','r',encoding='utf-8') as f:
    lines = f.readlines()

with open('./Test2/stree2_res.txt','w',encoding='utf-8') as fw:
    for line in lines:
        filterStringPredicates = ["wikiPageWikiLink","wikiPageRedirects","wikiPageDisambiguates", "Thing","wikiPageUsesTemplate","rdf-syntax-ns#type"]
        filterStringSubjects = ["entity", "Category", "wikidata","owl#Thing", "http://wikidata.dbpedia.org/resource/Q"]
        expandedGraph = []
        duplicatedItems = []

        literalsToConnect, predicates = indentifyEntitiesAndPredicates(line.strip())
        fw.write("-----Question------\n")
        fw.write(line + '\n')
        fw.write("-------------------\n")
        qtps = []
        for literal in literalsToConnect:
            triple = Triple()
            triple.setObject(literal)
            triple.setSeeds(literal)
            qtps.append(triple)

        fw.write('--entities--\n')
        print('---- entities ----')
        for tp in qtps:
            fw.write(str(tp.getObject()) + '\n')
            print(tp.getObject())
        print(predicates)
        # Test Purpose
        #"""
        levelOfExpansion = 1
        while(levelOfExpansion < 4 and not checkConnection(literalsToConnect, duplicatedItems)):
            #add mtps to a new list
            qtps, mtps = expandWithThreshold(qtps, expandedGraph, duplicatedItems, predicates, glove_wordmap)
            levelOfExpansion += 1

        print("Matching Triples: -----   size: " + str(len(mtps)))
        mtps.sort()
        for tp in mtps:
            fw.write(str(tp) + '\n')
            print(tp)
        if checkConnection(literalsToConnect, duplicatedItems):
            reductionTestsDegreeOne(expandedGraph)
            fw.write("Size of expanded graph: "+ str(len(expandedGraph)) + '\n')
            for tp in expandedGraph:
                fw.write(str(tp) + '\n')

to identify-------------
politicians
Politicians
to identify-------------
Current literal: politicians
0 in property
0 in sub/obj
Current literal: Politicians
0 in property
910 in sub/obj
to identify-------------
columbia university
Columbia University
to identify-------------
Current literal: columbia university
0 in property
0 in sub/obj
Current literal: Columbia University
0 in property
62027 in sub/obj
---- entities ----
"Politicians"@en
"Columbia University"@en
['Politicians', 'Columbia University']
queryTriple size : 2
----- Current Query ------
SELECT distinct ?s ?p WHERE { {?s ?p "Politicians"@en . } FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i')

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Dewey_Jackson_Short> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Dewey_Jackson_Short> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Sambrial> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Sambrial> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Category' , 

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Jathekey> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Jathekey> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Category' , 

----- Current Query ------
SELECT distinct ?j ?k WHERE {  {<http://dbpedia.org/resource/Columbia_University> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Category' , 'i') && !regex(str(?k), 'wikidata' , 'i') && !regex(str(?k), 'owl#Thing' , 'i') && !regex(str(?k), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )}}
--------------------------
http://dbpedia.org/resource/Columbia_University  --  http://www.w3.org/2000/01/rdf-schema#label  --  Columbia University  -- ["Columbia University"@en]  (56.00) set by Politicians
http://dbpedia.org/resource/Columbia_University  --  http://www.w3.org/2000/01/rdf-schema#label  --  جامعة كولومبيا  -- ["Columbia University"@en]  (56.00) set by Politicians
http://d

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Columbia_University_College_of_Physicians_and_Surgeons> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Columbia_University_College_of_Physicians_and_Surgeons> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rd

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Pupin_Hall> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Pupin_Hall> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Category

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Alfred_Lerner_Hall> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Alfred_Lerner_Hall> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(st

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Columbia_University_School_of_General_Studies> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Columbia_University_School_of_General_Studies> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' 

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Columbia_University_College_of_Dental_Medicine> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Columbia_University_College_of_Dental_Medicine> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Columbia_University_School_of_Professional_Studies> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Columbia_University_School_of_Professional_Studies> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Columbia_University_Graduate_School_of_Journalism> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Columbia_University_Graduate_School_of_Journalism> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-n

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/University_of_Portland> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/University_of_Portland> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://en.wikipedia.org/wiki/Politicians> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://en.wikipedia.org/wiki/Politicians> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Cate

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/P._F._Dahler> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/P._F._Dahler> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Cate

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Thalekunnil_Basheer> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Thalekunnil_Basheer> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/List_of_Delegates_to_the_United_States_House_of_Representatives_from_American_Samoa> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/List_of_Delegates_to_the_United_States_House_of_Representatives_from_American_Samoa> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Prokop_Murra> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Prokop_Murra> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Cate

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/James_Farley> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/James_Farley> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Cate

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Yun_Geun-su> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Yun_Geun-su> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Catego

http://dbpedia.org/resource/Kurt_Vieweg  --  http://xmlns.com/foaf/0.1/primaryTopic  --  http://en.wikipedia.org/wiki/Kurt_Vieweg  -- ["Politicians"@en]  (43.00) set by Politicians
http://dbpedia.org/resource/Kurt_Vieweg  --  http://www.w3.org/2000/01/rdf-schema#label  --  Kurt Vieweg  -- ["Politicians"@en]  (56.00) set by Politicians
http://dbpedia.org/resource/Kurt_Vieweg  --  http://www.w3.org/2000/01/rdf-schema#comment  --  Kurt Vieweg (* 29. Oktober 1911 in Göttingen; † 2. Dezember 1976 in Greifswald) war einer der führenden Landwirtschaftspolitiker in den frühen Jahren der DDR. Er war zeitweise Generalsekretär der VdgB, Abgeordneter der Volkskammer und Mitglied des ZK der SED.  -- ["Politicians"@en]  (57.00) set by Politicians
http://dbpedia.org/resource/Kurt_Vieweg  --  http://www.w3.org/2000/01/rdf-schema#comment  --  Kurt Vieweg (born 29 October 1911 in Göttingen, died 2 December 1976 in Greifswald) was one of the leading agricultural politicians in the early years of the GDR.

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Healyite_Nationalist> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Healyite_Nationalist> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !rege

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/María_Elena_Barrera> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/María_Elena_Barrera> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Idham_Chalid> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Idham_Chalid> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Cate

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Support_for_military_action_against_Iran> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Support_for_military_action_against_Iran> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && 

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Alfred_Walton> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Alfred_Walton> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Ca

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Ryu_Sun-jeong> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Ryu_Sun-jeong> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Ca

http://dbpedia.org/resource/Politicians_of_The_Wire  --  http://xmlns.com/foaf/0.1/primaryTopic  --  http://en.wikipedia.org/wiki/Politicians_of_The_Wire  -- ["Politicians"@en]  (43.00) set by Politicians
http://dbpedia.org/resource/Politicians_of_The_Wire  --  http://dbpedia.org/ontology/spouse  --  http://dbpedia.org/resource/Cedric_Daniels  -- ["Politicians"@en]  (49.00) set by Politicians
http://dbpedia.org/resource/Politicians_of_The_Wire  --  http://www.w3.org/2000/01/rdf-schema#label  --  Politicians of The Wire  -- ["Politicians"@en]  (56.00) set by Politicians
http://dbpedia.org/resource/Politicians_of_The_Wire  --  http://www.w3.org/2000/01/rdf-schema#comment  --  The following are politicians, family members, and assistants administrating the politics of Baltimore on The Wire.  -- ["Politicians"@en]  (57.00) set by Politicians
http://dbpedia.org/resource/Politicians_of_The_Wire  --  http://dbpedia.org/ontology/wikiPageID  --  9076052  -- ["Politicians"@en]  (48.00) set by Po

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Anatolii_Hrytsenko> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Anatolii_Hrytsenko> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(st

http://dbpedia.org/resource/George_H._Roderick  --  http://xmlns.com/foaf/0.1/primaryTopic  --  http://en.wikipedia.org/wiki/George_H._Roderick  -- ["Politicians"@en]  (43.00) set by Politicians
http://dbpedia.org/resource/George_H._Roderick  --  http://dbpedia.org/ontology/predecessor  --  http://dbpedia.org/resource/Dewey_Jackson_Short  -- ["Politicians"@en]  (48.00) set by Politicians
http://dbpedia.org/resource/George_H._Roderick  --  http://dbpedia.org/property/after  --  http://dbpedia.org/resource/Chester_R._Davis  -- ["Politicians"@en]  (57.00) set by Politicians
http://dbpedia.org/resource/George_H._Roderick  --  http://dbpedia.org/property/before  --  http://dbpedia.org/resource/Dewey_Jackson_Short  -- ["Politicians"@en]  (58.00) set by Politicians
http://dbpedia.org/resource/George_H._Roderick  --  http://dbpedia.org/property/before  --  http://dbpedia.org/resource/William_F._Schaub  -- ["Politicians"@en]  (58.00) set by Politicians
http://dbpedia.org/resource/George_H._Rode

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://de.dbpedia.org/resource/Dewey_Jackson_Short> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://de.dbpedia.org/resource/Dewey_Jackson_Short> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Washington_DC> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Washington_DC> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Ca

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Galena,_Missouri> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Galena,_Missouri> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://commons.wikimedia.org/wiki/Special:FilePath/DeweyJacksonShort.jpg> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://commons.wikimedia.org/wiki/Special:FilePath/DeweyJacksonShort.jpg> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Dwight_D._Eisenhower> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Dwight_D._Eisenhower> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !rege

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Missouri's_14th_congressional_district> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Missouri's_14th_congressional_district> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !reg

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Missouri's_7th_congressional_district> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Missouri's_7th_congressional_district> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Carl_Vinson> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Carl_Vinson> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Catego

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Dewey_Jackson_Short__1> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Dewey_Jackson_Short__1> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Joseph_William_Martin,_Jr.> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Joseph_William_Martin,_Jr.> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , '

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/James_F._Fulbright> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/James_F._Fulbright> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(st

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Charles_H._Brown> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Charles_H._Brown> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k

----- Current Query ------
SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Representative> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Representative> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), '

KeyboardInterrupt: 

In [None]:
for x in expandedGraph:
    print(x)

# Test Above

In [None]:
%%time
levelOfExpansion = 1
while(levelOfExpansion < 3 or not checkConnection(literalsToConnect, duplicatedItems)):
    #add mtps to a new list
    qtps, mtps = expand(qtps, expandedGraph, duplicatedItems, predicates, we)
    levelOfExpansion += 1

print("Matching Triples: -----   size: " + str(len(mtps)))
for tp in mtps:
    print(tp)

In [None]:
print("Size of expanded graph: "+ str(len(expandedGraph)))
for tp in expandedGraph:
    print(tp)

In [None]:
print("Size of duplicatedItems: "+ str(len(duplicatedItems)))
for tp in duplicatedItems:
    print(tp)

In [None]:
reductionTestsDegreeOne(expandedGraph)
print("Size of expanded graph: "+ str(len(expandedGraph)))
for tp in expandedGraph:
    print(tp)

In [None]:
keepMinEdge(expandedGraph)
print("Size of expanded graph: "+ str(len(expandedGraph)))
for tp in expandedGraph:
    print(tp)

# Test Purpose -- Main

In [59]:
k = [['mayor', 'leader'],['wife','spouse']]
result = evaluate_similarity_score(glove_wordmap, k)
print(result)

[0.2869392010279788, 0.5253383210285519]


In [56]:
dw('Tom Cruise','starring')

0.56

In [None]:
filterStringPredicates = ["wikiPageWikiLink","wikiPageRedirects","wikiPageDisambiguates", "Thing","wikiPageUsesTemplate","rdf-syntax-ns#type"]
filterStringSubjects = ["entity", "Category", "wikidata","owl#Thing", "http://wikidata.dbpedia.org/resource/Q"]
expandedGraph = []
duplicatedItems = []
line = 'Alberta, admit, province'
literalsToConnect, predicates = indentifyEntitiesAndPredicates(line.strip())
qtps = []
for literal in literalsToConnect:
    triple = Triple()
    triple.setObject(literal)
    triple.setSeeds(literal)
    qtps.append(triple)

for tp in qtps:
    print(tp.getObject())

# Test Purpose

levelOfExpansion = 1
while(levelOfExpansion < 3 and not checkConnection(literalsToConnect, duplicatedItems)):
    #add mtps to a new list
    qtps, mtps = expand(qtps, expandedGraph, duplicatedItems, predicates, we)
    levelOfExpansion += 1

print("Matching Triples: -----   size: " + str(len(mtps)))
for tp in mtps:
    print(tp)

In [None]:
print(isPredicateInDataset('height'))

In [None]:
literal = 'film'
queryString = 'select ?s ?p where {<http://dbpedia.org/resource/World_of_Warcraft> ?p ?s}'
#queryString = "SELECT distinct ?s ?p ?j ?k WHERE { {?s ?p <http://dbpedia.org/resource/Canada> . FILTER (!regex(str(?p), 'wikiPageWikiLink' , 'i') && !regex(str(?p), 'wikiPageRedirects' , 'i') && !regex(str(?p), 'wikiPageDisambiguates' , 'i') && !regex(str(?p), 'Thing' , 'i') && !regex(str(?p), 'wikiPageUsesTemplate' , 'i') && !regex(str(?p), 'rdf-syntax-ns#type' , 'i') && !regex(str(?s), 'entity' , 'i') && !regex(str(?s), 'Category' , 'i') && !regex(str(?s), 'wikidata' , 'i') && !regex(str(?s), 'owl#Thing' , 'i') && !regex(str(?s), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )} UNION {<http://dbpedia.org/resource/Canada> ?j ?k. FILTER (!regex(str(?j), 'wikiPageWikiLink' , 'i') && !regex(str(?j), 'wikiPageRedirects' , 'i') && !regex(str(?j), 'wikiPageDisambiguates' , 'i') && !regex(str(?j), 'Thing' , 'i') && !regex(str(?j), 'wikiPageUsesTemplate' , 'i') && !regex(str(?j), 'rdf-syntax-ns#type' , 'i') && !regex(str(?k), 'entity' , 'i') && !regex(str(?k), 'Category' , 'i') && !regex(str(?k), 'wikidata' , 'i') && !regex(str(?k), 'owl#Thing' , 'i') && !regex(str(?k), 'http://wikidata.dbpedia.org/resource/Q' , 'i') )}}"
sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    print(result["p"]["value"] + "  ---  " + result["s"]["value"])
#print(results["results"]["bindings"])

In [None]:
print(dw('official color','officialSchoolColor'))

In [76]:
print(indentifyEntitiesAndPredicates("tall"))

Current literal: tall
0 in property
507 in sub/obj
Current literal: tall
0 in property
507 in sub/obj
Current literal: Tall
0 in property
1790 in sub/obj
(['"tall"@en'], ['tall'])


# Identify E-P on QALD-5

In [61]:
%%time

#get stop words list
swl = []
with open('StopWords.txt','r') as sw:
    swlines = sw.readlines()
    for x in swlines:
        swl.append(x.strip())

with open('ttt.txt','r',encoding='utf-8') as f:
    lines = f.readlines()

with open('BaselineJWresult.txt','w',encoding='utf-8') as fw:
    for line in lines:
        filterStringPredicates = ["wikiPageWikiLink","wikiPageRedirects","wikiPageDisambiguates", "Thing","wikiPageUsesTemplate","rdf-syntax-ns#type"]
        filterStringSubjects = ["entity", "Category", "wikidata","owl#Thing", "http://wikidata.dbpedia.org/resource/Q"]
        expandedGraph = []
        duplicatedItems = []

        literalsToConnect, predicates = indentifyEntitiesAndPredicates(line.strip())
        fw.write("-----Question------\n")
        fw.write(line + '\n')
        fw.write("-------------------\n")
        qtps = []
        for literal in literalsToConnect:
            triple = Triple()
            triple.setObject(literal)
            triple.setSeeds(literal)
            qtps.append(triple)

        fw.write('--entities--\n')
        print('---- entities ----')
        for tp in qtps:
            fw.write(str(tp.getObject()) + '\n')
            print(tp.getObject())
        print(predicates)

to identify-------------
Timezone
timezone
to identify-------------
Current literal: Timezone
0 in property
167 in sub/obj
Current literal: timezone
0 in property
701 in sub/obj
to identify-------------
San Pedro De Atacama
San Pedro de Atacama
san pedro de atacama
to identify-------------
Current literal: San Pedro De Atacama
0 in property
0 in sub/obj
Current literal: San Pedro de Atacama
0 in property
967 in sub/obj
Current literal: san pedro de atacama
0 in property
0 in sub/obj
---- entities ----
"Timezone"@en
"timezone"@en
"San Pedro de Atacama"@en
['timezone', 'San Pedro de Atacama']
to identify-------------
Salt Lake City
salt lake city
to identify-------------
Current literal: Salt Lake City
0 in property
11678 in sub/obj
Current literal: salt lake city
0 in property
0 in sub/obj
to identify-------------
time zone
Time Zone
to identify-------------
Current literal: time zone
757765 in property
---- entities ----
"Salt Lake City"@en
['Salt Lake City', 'time zone']
Wall time: 3.

# Entity Similarity

In [3]:
with open("refinedLabels.dat",'r',encoding = "utf-8") as rl:
    rlLines = rl.readlines()
refinedLabels = []
for x in rlLines:
    newWords = x.strip()
    if newWords.startswith("'"):
        refinedLabels.append(newWords[2:-5])
    else:
        refinedLabels.append(newWords[1:-4])
print(len(refinedLabels))

14966461


In [36]:
%%time
bestMatch = {"score":0, "word":"Null"}
testWord = "Jack Kerouac"
for x in refinedLabels:
    currentScore = dw(x, testWord)
    if currentScore > bestMatch["score"]:
        bestMatch["score"] = currentScore
        bestMatch["word"] = x
print(bestMatch["word"])

Jack Kerouac
Wall time: 2min 1s


In [44]:
print(dw('wife','starring'))

0.0
