In [None]:
import rdflib
import html
g = rdflib.Graph().parse('./fb15k-wikitop2021-yago-facts.nt', format='ntriples')
g += rdflib.Graph().parse('./fb15k-wikitop2021-yago-full-types.nt', format='ntriples')

from rdflib.namespace import RDF
for line in open('fb15k-wikitop2021.tsv').readlines()[1:]:
    subject = rdflib.URIRef( line.split('\t')[6][1:-1] ) # take the URI in column 6
    g.add( (subject, RDF.type, rdflib.URIRef('http://example.com/popularEntity')) )
    
g.remove((None, rdflib.URIRef('http://schema.org/image') , None))
g.remove((None, rdflib.URIRef('http://schema.org/url') , None))
g.serialize(destination='20q-updated.ttl', format='turtle')

In [None]:
r = g.query("""
select *  where 
{
    ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity> .
    ?s <http://schema.org/url> ?o .
} 
""")
for b in r.bindings:
    print(*[f'{x}' for x in b.values()])

## Code

In [26]:
%load_ext ipython_sparql_pandas

The ipython_sparql_pandas extension is already loaded. To reload it, use:
  %reload_ext ipython_sparql_pandas


In [35]:
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper('http://DESKTOP-CELL0BF:7200/repositories/20qbig')
sparql.setReturnFormat(JSON)

In [36]:
#from the query results extract a list that contains the counts after splitting on certain attributes
PosAttr=['?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity> .']
NegAttr=[]
AttrHistory = ['<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity>']
def extractCountAndAttr(result):
    number = []
    listOfObj = []
    listOfPred= []
    #ret = result.queryAndConvert()
    for attr in result["results"]["bindings"]:
        listOfObj.append(attr['o']["value"])
        listOfPred.append(attr['p']["value"])
    return(listOfObj,listOfPred)

def generateQuestion(listOfPred, listOfObj, index):
    if ('<' + str(listOfPred[index]) +'> <'+str(listOfObj[index])+ '>') in AttrHistory:
        return generateQuestion(listOfPred, listOfObj, index+1)
        
    else:
        AttrHistory.append('<' + str(listOfPred[index]) +'> <'+str(listOfObj[index])+ '>')
    userAnswer = input(f'Does the thing you are looking for have the attribute: {listOfPred[index].split("/")[-1]} {listOfObj[index].split("/")[-1]}?' )
    
    if userAnswer.startswith('y'):
        PosAttr.append('?s <' + str(listOfPred[index]) +'> <'+str(listOfObj[index])+ '>.')    
        
    elif userAnswer.startswith('n'):
        NegAttr.append('FILTER NOT EXISTS {{ \n ?s <' + str(listOfPred[index]) +'> <'+str(listOfObj[index])+ '>. }}')   
        
    else:
        return generateQuestion(listOfPred, listOfObj, index+1)
    NegFilters = "\n".join(NegAttr)
    PosFilters = "\n".join(PosAttr)
    return (PosFilters, NegFilters)


  
def updateQuery(left, PosFilters, NegFilters):
    FilterQuestion =  (f"""
            select (count(*) as ?count) ?p ?o  where 
            {{
            
            {PosFilters}
             ?s ?p ?o .
            {NegFilters}
            }} 
            FILTER (!isBlank(?o)) 
            group by ?p ?o 
            ORDER BY ABS( {left} - ?count )
            Limit 10
            
            """) 
        
    return FilterQuestion

In [37]:

def numberleft(PosFilters,NegFilters): 
    query =  f"""
            select ?s where 
            {{
            
            {PosFilters}
            ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity> .
            {NegFilters} 
            FILTER (!isBlank(?o)) 
            }}
            
            """ 
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert() 
    return len(qres["results"]["bindings"])

In [38]:
#Gives back the amount of popular entities left after querying
def popentities(PosFilters,NegFilters):
    query =  f"""
            select ?s where 
            {{
            
            {PosFilters}
            ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity> .
            {NegFilters} 
            FILTER (!isBlank(?o)) 
            }}
            
            """ 
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert() 
    return qres["results"]["bindings"]

In [39]:
# resets are variables
def game_reset():
    PosAttr= ['?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity> .']
    NegAttr= ['']
    AttrHistory = ['<http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity>']
    left = numberleft("".join(PosAttr), "".join(NegAttr))
    FilterQuestion =  f"""
        select (count(*) as ?count) ?p ?o  where 
        {{

        ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://example.com/popularEntity> .
        ?s ?p ?o .
        FILTER (!isBlank(?o)) 
        }} 
        group by ?p ?o 
        ORDER BY ABS( %s - ?count )
        limit 10
        """ %((left/2))
    sparql.setQuery(FilterQuestion)
    sparql.setReturnFormat(JSON)
    qres = sparql.query().convert()
    listOfObj ,listOfPred = extractCountAndAttr(qres)
    return (left, qres, PosAttr, NegAttr, AttrHistory)

In [41]:
#without printing results
import random

# Resets all variables
left,qres,PosAttr,NegAttr,AttrHistory = game_reset()
i=0
outofguess = []

while i < 20:
   
    # list of Pred and Obj for questions
    listOfObj ,listOfPred = extractCountAndAttr(qres)
    
    # if only 1 popular entity is left break
    if left<=1:
        break
    
    # ask question and add filters to query
    print('Question: %s' %(i+1))
    PosFilters, NegFilters = generateQuestion(listOfPred, listOfObj, 0)
    
    # number of popular entities left
    left = numberleft(PosFilters, NegFilters)
    
    # run query
    query = updateQuery(left/2,PosFilters, NegFilters)
    sparql.setQuery(query)
    qres = sparql.queryAndConvert()
    
    i+=1

# Prints answer
if i != 20:
    for attr in popentities(PosFilters, NegFilters):
        print(*[f'Your Answer is: {x.split("/")[-1]}' for x in [attr['s']['value']]][0:10]) 

# ran out of questions
else:
    print('Final Question:')
    for j in popentities(PosFilters, NegFilters):
            outofguess.append(f'{j["s"]["value"].split("/")[-1][:40]:40s}')
    guess = random.choice(outofguess)
    print(guess)
    answer = (input('Is this correct?').startswith('y'))
    if answer is True:
        print('Win')
    else:
        print('game over')

SyntaxError: invalid syntax (<ipython-input-41-0691293bdce2>, line 38)

In [None]:
# with count print
import random

# Resets all variables
left,qres,PosAttr,NegAttr,AttrHistory = game_reset()
i=0
outofguess = []

while i < 20:
    
    # list of Pred and Obj for questions
    listOfObj ,listOfPred = extractCountAndAttr(qres)
    
    # if only 1 popular entity is left break
    if left<=1:
        break
     
    # ask question and add filters to query
    print('Question: %s' %(i+1))
    PosFilters, NegFilters = generateQuestion(listOfPred, listOfObj, 0)
    
    # number of popular entities left
    left = numberleft(PosFilters, NegFilters)
    
    # run query
    query = updateQuery(left/2,PosFilters, NegFilters)
    sparql.setQuery(query)
    qres = sparql.queryAndConvert()
    
    # prints splits for top Pred and Objs 
    print(left)
    for r in qres["results"]["bindings"]:
        print(*[f'{x.split("/")[-1][:40]:40s}' for x in [r['count']["value"],r['p']["value"],r['o']["value"]]])
        
    i+=1

# Prints answer
if i != 20:
    for attr in popentities(PosFilters, NegFilters):
        print(*[f'Your Answer is: {x.split("/")[-1]}' for x in [attr['s']['value']]][0:10])
        
# ran out of questions
else:
    print('Final Question:')
    for j in popentities(PosFilters, NegFilters):
            outofguess.append(f'{j[0].split("/")[-1][:40]:40s}')
    guess = random.choice(outofguess)
    print(guess)
    answer = (input('Is this correct?').startswith('y'))
    if answer is True:
        print('Win')
    else:
        print('game over')

In [None]:
# with guess print
import random

# Resets all variables
left, qres, PosAttr, NegAttr, AttrHistory = game_reset()
i=0
outofguess = []

while i < 20:
   
    # list of Pred and Obj for questions
    listOfObj ,listOfPred = extractCountAndAttr(qres)
    
    # if only 1 popular entity is left break
    if left<=1:
        break
        
    # ask question and add filters to query
    print('Question: %s' %(i+1))
    PosFilters, NegFilters = generateQuestion(listOfPred, listOfObj, 0)
    
    # number of popular entities left
    left = numberleft(PosFilters, NegFilters)
    
    # run query
    query = updateQuery(left/2, PosFilters, NegFilters)
    sparql.setQuery(query)
    qres = sparql.queryAndConvert()
    
    # prints possible entities left
    print('Possible Guesses:')
    for subject in popentities(PosFilters, NegFilters):
        print(*[f'{x.split("/")[-1]}' for x in [subject['s']['value']]][0:10]) 
        
    print()
    i+=1

# Prints answer
if i != 20:
    for attr in popentities(PosFilters, NegFilters):
        print(*[f'Your Answer is: {x.split("/")[-1]}' for x in [attr['s']['value']]][0:10])
        
# ran out of questions
else:
    print('Final Question:')
    for j in popentities(PosFilters, NegFilters):
            outofguess.append(f'{j[0].split("/")[-1][:40]:40s}')
    guess = random.choice(outofguess)
    print(guess)
    answer = (input('Is this correct?').startswith('y'))
    if answer is True:
        print('Win')
    else:
        print('game over')

### Notes
- football teams 
    - some football teams do not have the type football team

- does it have one of the following attributes: type human, ..., ... ? 
    -to deal with too small splits
    
- try to include numerical data; older than 1950? 

- make attributes of form does it have relation r to any object o
- look at incoming relations, not just outgoing ones
