In [10]:
# ! pip install sparqlwrapper
import pandas as pd
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

In [11]:
# Q19317 : FIFA World Cup
# P3450 : sports season of league or competition
# P17 : country
# P1132 : number of participants
# P1346 : winner
# P3279 : statistical leader | meilleur buteur

sparql.setQuery("""
SELECT ?item ?itemLabel ?country ?countryLabel ?participants ?participantsLabel ?winner ?winnerLabel ?leader ?leaderLabel

WHERE {
  ?item wdt:P3450 wd:Q19317;
      wdt:P17 ?country.
  ?item wdt:P3450 wd:Q19317;
      wdt:P1132 ?participants.
  ?item wdt:P3450 wd:Q19317;
      wdt:P1346 ?winner.
  ?item wdt:P3450 wd:Q19317;
      wdt:P3279 ?leader.

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [12]:
# sparql.setQuery("""

# SELECT ?item ?itemLabel ?mascot ?mascotLabel

# WHERE {
#   ?item wdt:P3450 wd:Q19317;
#       wdt:P822 ?mascot.

#   SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en".
# }

# """)

# sparql.setReturnFormat(JSON)
# mascots = sparql.query().convert()

# mascots_df = pd.io.json.json_normalize(mascots['results']['bindings'])
# mascots

In [13]:
results

{'head': {'vars': ['item',
   'itemLabel',
   'country',
   'countryLabel',
   'participants',
   'participantsLabel',
   'winner',
   'winnerLabel',
   'leader',
   'leaderLabel']},
 'results': {'bindings': [{'item': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q37285'},
    'leader': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q80471'},
    'winner': {'type': 'uri',
     'value': 'http://www.wikidata.org/entity/Q676899'},
    'participants': {'datatype': 'http://www.w3.org/2001/XMLSchema#decimal',
     'type': 'literal',
     'value': '32'},
    'country': {'type': 'uri', 'value': 'http://www.wikidata.org/entity/Q183'},
    'itemLabel': {'xml:lang': 'en',
     'type': 'literal',
     'value': '2006 FIFA World Cup'},
    'countryLabel': {'xml:lang': 'en', 'type': 'literal', 'value': 'Germany'},
    'participantsLabel': {'type': 'literal', 'value': '32'},
    'winnerLabel': {'xml:lang': 'en',
     'type': 'literal',
     'value': 'Italy national assoc

In [14]:
results_df = pd.io.json.json_normalize(results['results']['bindings'])
results_df.iloc[1]

  results_df = pd.io.json.json_normalize(results['results']['bindings'])


item.type                                                       uri
item.value                    http://www.wikidata.org/entity/Q46934
leader.type                                                     uri
leader.value                 http://www.wikidata.org/entity/Q178628
winner.type                                                     uri
winner.value                 http://www.wikidata.org/entity/Q676899
participants.datatype      http://www.w3.org/2001/XMLSchema#decimal
participants.type                                           literal
participants.value                                               24
country.type                                                    uri
country.value                    http://www.wikidata.org/entity/Q96
itemLabel.xml:lang                                               en
itemLabel.type                                              literal
itemLabel.value                                 1982 FIFA World Cup
countryLabel.xml:lang                           

In [15]:
results_df= results_df[['item.value', 'itemLabel.value', 'winnerLabel.value',\
            'participantsLabel.value', 'countryLabel.value', 'leaderLabel.value']].sort_values(by = "itemLabel.value")
results_df

Unnamed: 0,item.value,itemLabel.value,winnerLabel.value,participantsLabel.value,countryLabel.value,leaderLabel.value
6,http://www.wikidata.org/entity/Q48432,1930 FIFA World Cup,Uruguay national football team,13,Uruguay,Guillermo Stábile
34,http://www.wikidata.org/entity/Q1477177,1934 FIFA World Cup,Italy national association football team,16,Italy,Oldřich Nejedlý
12,http://www.wikidata.org/entity/Q131591,1938 FIFA World Cup,Italy national association football team,15,France,Leônidas
13,http://www.wikidata.org/entity/Q132515,1950 FIFA World Cup,Uruguay national football team,13,Brazil,Ademir de Menezes
11,http://www.wikidata.org/entity/Q131075,1954 FIFA World Cup,Germany national association football team,16,Switzerland,Sándor Kocsis
15,http://www.wikidata.org/entity/Q132533,1958 FIFA World Cup,Brazil national football team,16,Sweden,Just Fontaine
23,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Dražan Jerković
22,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Leonel Sánchez
21,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Valentin Ivanov
20,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Vavá


# Post processing

In [16]:
# Change the teams name to keep the country name only
results_df['winnerLabel.value'] = [x.split(' ')[0] for x in results_df['winnerLabel.value']]

# 2002 in both South Korea and Japan
results_df = results_df[results_df['itemLabel.value'] != '2002 FIFA World Cup']
results_df = results_df.append({'item.value': 'http://www.wikidata.org/entity/Q19317', 'itemLabel.value': '2002 FIFA World Cup',
'winnerLabel.value': 'Brazil', 'participantsLabel.value': '32',
'countryLabel.value': 'South Korea & Japan',
'leaderLabel.value': 'José Luis Chilavert'}, ignore_index=True)

# leaderLabel.value : cast to string
results_df['leaderLabel.value'] = results_df['leaderLabel.value'].astype(str)
# concatenate the leaders for each year
results_df = results_df.groupby(
    ['item.value', 'itemLabel.value', 'winnerLabel.value', 'participantsLabel.value', 'countryLabel.value']
        ).agg({'leaderLabel.value': ', '.join}).reset_index()
# keep an only leader for each year
results_df.loc[results_df['itemLabel.value'] == '1962 FIFA World Cup', 'leaderLabel.value'] = 'Garrincha'
results_df.loc[results_df['itemLabel.value'] == '1994 FIFA World Cup', 'leaderLabel.value'] = 'Oleg Salenko'
results_df.loc[results_df['itemLabel.value'] == '2010 FIFA World Cup', 'leaderLabel.value'] = 'Thomas Müller'
results_df.loc[results_df['itemLabel.value'] == '2022 FIFA World Cup', 'leaderLabel.value'] = 'Kylian Mbappé'

# reset the index correctly
results_df.sort_values(by = "itemLabel.value", inplace = True)
results_df = results_df.reset_index(drop=True)

results_df

  results_df = results_df.append({'item.value': 'http://www.wikidata.org/entity/Q19317', 'itemLabel.value': '2002 FIFA World Cup',


Unnamed: 0,item.value,itemLabel.value,winnerLabel.value,participantsLabel.value,countryLabel.value,leaderLabel.value
0,http://www.wikidata.org/entity/Q48432,1930 FIFA World Cup,Uruguay,13,Uruguay,Guillermo Stábile
1,http://www.wikidata.org/entity/Q1477177,1934 FIFA World Cup,Italy,16,Italy,Oldřich Nejedlý
2,http://www.wikidata.org/entity/Q131591,1938 FIFA World Cup,Italy,15,France,Leônidas
3,http://www.wikidata.org/entity/Q132515,1950 FIFA World Cup,Uruguay,13,Brazil,Ademir de Menezes
4,http://www.wikidata.org/entity/Q131075,1954 FIFA World Cup,Germany,16,Switzerland,Sándor Kocsis
5,http://www.wikidata.org/entity/Q132533,1958 FIFA World Cup,Brazil,16,Sweden,Just Fontaine
6,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil,16,Chile,Garrincha
7,http://www.wikidata.org/entity/Q134202,1966 FIFA World Cup,England,16,United Kingdom,Eusébio
8,http://www.wikidata.org/entity/Q132664,1970 FIFA World Cup,Brazil,16,Mexico,Gerd Müller
9,http://www.wikidata.org/entity/Q166121,1974 FIFA World Cup,Germany,16,Germany,Grzegorz Lato


# Create Quiz Bank

In [17]:
import random
def quiz_bank(df):
    quiz_bank = {}
    for ind,val in df.iterrows(): #['itemLabel.value'].unique():
        df1 = df.drop([ind], axis=0)
        r = [random.randint(0,len(df1)-1) for _ in range(3)]
        
        question_1 = "Quel est le pays organisateur ?"
        r_answer_1 = val['countryLabel.value']
        answers_1 = df1.reset_index().loc[r,'countryLabel.value']
        
        question_2 = "Quel est le pays vainqueur ?"
        r_answer_2 = val['winnerLabel.value']
        answers_2 = df1.reset_index().loc[r,'winnerLabel.value']
        
        question_3 = "Quel est le nombre de pays participants ?"
        r_answer_3 = val['participantsLabel.value']
        answers_3 = df1.reset_index().loc[r,'participantsLabel.value']

        question_4 = "Qui est le meilleur buteur ?"
        r_answer_4 = val['leaderLabel.value']
        answers_4 = df1.reset_index().loc[r,'leaderLabel.value']
        
        questions = [question_1, question_2, question_3, question_4]
        answers = [r_answer_1,r_answer_2,r_answer_3, r_answer_4]
        alt_answers = [answers_1,answers_2,answers_3, answers_4]
        
        quiz_bank[val['itemLabel.value']] = {"Question": questions, "R_Answers": answers, "Answers": alt_answers}
    return quiz_bank

In [18]:
quiz = quiz_bank(results_df)
quiz['2006 FIFA World Cup']

{'Question': ['Quel est le pays organisateur ?',
  'Quel est le pays vainqueur ?',
  'Quel est le nombre de pays participants ?',
  'Qui est le meilleur buteur ?'],
 'R_Answers': ['Germany', 'Italy', '32', 'Miroslav Klose'],
 'Answers': [3    Brazil
  6     Chile
  3    Brazil
  Name: countryLabel.value, dtype: object,
  3    Uruguay
  6     Brazil
  3    Uruguay
  Name: winnerLabel.value, dtype: object,
  3    13
  6    16
  3    13
  Name: participantsLabel.value, dtype: object,
  3    Ademir de Menezes
  6            Garrincha
  3    Ademir de Menezes
  Name: leaderLabel.value, dtype: object]}