In [140]:
# ! pip install sparqlwrapper
import pandas as pd
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

In [141]:
sparql.setQuery("""
SELECT ?item ?itemLabel ?country ?countryLabel ?participants ?participantsLabel ?winner ?winnerLabel

WHERE {
  ?item wdt:P3450 wd:Q19317;
      wdt:P17 ?country.
  ?item wdt:P3450 wd:Q19317;
      wdt:P1132 ?participants.
  ?item wdt:P3450 wd:Q19317;
      wdt:P1346 ?winner.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [142]:
results_df = pd.io.json.json_normalize(results['results']['bindings'])
results_df.iloc[1]

  results_df = pd.io.json.json_normalize(results['results']['bindings'])


item.type                                                       uri
item.value                    http://www.wikidata.org/entity/Q46934
winner.type                                                     uri
winner.value                 http://www.wikidata.org/entity/Q676899
participants.datatype      http://www.w3.org/2001/XMLSchema#decimal
participants.type                                           literal
participants.value                                               24
country.type                                                    uri
country.value                    http://www.wikidata.org/entity/Q96
itemLabel.xml:lang                                               en
itemLabel.type                                              literal
itemLabel.value                                 1982 FIFA World Cup
countryLabel.xml:lang                                            en
countryLabel.type                                           literal
countryLabel.value                              

In [143]:
results_df= results_df[['item.value', 'itemLabel.value', 'winnerLabel.value',\
            'participantsLabel.value', 'countryLabel.value']].sort_values(by = "itemLabel.value")
results_df

Unnamed: 0,item.value,itemLabel.value,winnerLabel.value,participantsLabel.value,countryLabel.value
6,http://www.wikidata.org/entity/Q48432,1930 FIFA World Cup,Uruguay national football team,13,Uruguay
22,http://www.wikidata.org/entity/Q1477177,1934 FIFA World Cup,Italy national association football team,16,Italy
11,http://www.wikidata.org/entity/Q131591,1938 FIFA World Cup,Italy national association football team,15,France
12,http://www.wikidata.org/entity/Q132515,1950 FIFA World Cup,Uruguay national football team,13,Brazil
10,http://www.wikidata.org/entity/Q131075,1954 FIFA World Cup,Germany national association football team,16,Switzerland
14,http://www.wikidata.org/entity/Q132533,1958 FIFA World Cup,Brazil national football team,16,Sweden
17,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile
16,http://www.wikidata.org/entity/Q134202,1966 FIFA World Cup,England national association football team,16,United Kingdom
15,http://www.wikidata.org/entity/Q132664,1970 FIFA World Cup,Brazil national football team,16,Mexico
18,http://www.wikidata.org/entity/Q166121,1974 FIFA World Cup,Germany national association football team,16,Germany


# Post processing

In [144]:
# Change the teams name to keep the country name only
results_df['winnerLabel.value'] = [x.split(' ')[0] for x in results_df['winnerLabel.value']]

# 2002 in both South Korea and Japan
results_df = results_df[results_df['itemLabel.value'] != '2002 FIFA World Cup']
results_df = results_df.append({'item.value': 'http://www.wikidata.org/entity/Q19317', 'itemLabel.value': '2002 FIFA World Cup', 'winnerLabel.value': 'Brazil', 'participantsLabel.value': '32', 'countryLabel.value': 'South Korea & Japan'}, ignore_index=True)

# reset the index correctly
results_df.sort_values(by = "itemLabel.value", inplace = True)
results_df = results_df.reset_index(drop=True)

results_df

  results_df = results_df.append({'item.value': 'http://www.wikidata.org/entity/Q19317', 'itemLabel.value': '2002 FIFA World Cup', 'winnerLabel.value': 'Brazil', 'participantsLabel.value': '32', 'countryLabel.value': 'South Korea & Japan'}, ignore_index=True)


Unnamed: 0,item.value,itemLabel.value,winnerLabel.value,participantsLabel.value,countryLabel.value
0,http://www.wikidata.org/entity/Q48432,1930 FIFA World Cup,Uruguay,13,Uruguay
1,http://www.wikidata.org/entity/Q1477177,1934 FIFA World Cup,Italy,16,Italy
2,http://www.wikidata.org/entity/Q131591,1938 FIFA World Cup,Italy,15,France
3,http://www.wikidata.org/entity/Q132515,1950 FIFA World Cup,Uruguay,13,Brazil
4,http://www.wikidata.org/entity/Q131075,1954 FIFA World Cup,Germany,16,Switzerland
5,http://www.wikidata.org/entity/Q132533,1958 FIFA World Cup,Brazil,16,Sweden
6,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil,16,Chile
7,http://www.wikidata.org/entity/Q134202,1966 FIFA World Cup,England,16,United Kingdom
8,http://www.wikidata.org/entity/Q132664,1970 FIFA World Cup,Brazil,16,Mexico
9,http://www.wikidata.org/entity/Q166121,1974 FIFA World Cup,Germany,16,Germany


# Create Quiz Bank

In [145]:
import random
def quiz_bank(df):
    quiz_bank = {}
    for ind,val in df.iterrows(): #['itemLabel.value'].unique():
        df1 = df.drop([ind], axis=0)
        r = [random.randint(0,len(df1)-1) for _ in range(3)]
        
        question_1 = "Quel est le pays organisateur ?"
        r_answer_1 = val['countryLabel.value']
        answers_1 = df1.reset_index().loc[r,'countryLabel.value']
        
        question_2 = "Quel est le pays vainqueur ?"
        r_answer_2 = val['winnerLabel.value']
        answers_2 = df1.reset_index().loc[r,'winnerLabel.value']
        
        question_3 = "Quel est le nombre de pays participants ?"
        r_answer_3 = val['participantsLabel.value']
        answers_3 = df1.reset_index().loc[r,'participantsLabel.value']
        
        questions = [question_1, question_2, question_3]
        answers = [r_answer_1,r_answer_2,r_answer_3]
        alt_answers = [answers_1,answers_2,answers_3]
        
        quiz_bank[val['itemLabel.value']] = {"Question": questions, "R_Answers": answers, "Answers": alt_answers}
    return quiz_bank

In [147]:
quiz = quiz_bank(results_df)
quiz['2006 FIFA World Cup']

{'Question': ['Quel est le pays organisateur ?',
  'Quel est le pays vainqueur ?',
  'Quel est le nombre de pays participants ?'],
 'R_Answers': ['Germany', 'Italy', '32'],
 'Answers': [16    South Korea & Japan
  8                  Mexico
  15                 France
  Name: countryLabel.value, dtype: object,
  16    Brazil
  8     Brazil
  15    France
  Name: winnerLabel.value, dtype: object,
  16    32
  8     16
  15    32
  Name: participantsLabel.value, dtype: object]}