In [200]:
# ! pip install sparqlwrapper
import pandas as pd
import numpy as np
from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

In [201]:
# Q19317 : FIFA World Cup
# P3450 : sports season of league or competition
# P17 : country
# P1132 : number of participants
# P1346 : winner
# P3279 : statistical leader | meilleur buteur

sparql.setQuery("""
SELECT ?item ?itemLabel ?country ?countryLabel ?participants ?participantsLabel ?winner ?winnerLabel ?leader ?leaderLabel

WHERE {
  ?item wdt:P3450 wd:Q19317;
      wdt:P17 ?country;
      wdt:P1132 ?participants;
      wdt:P1346 ?winner;
      wdt:P3279 ?leader.

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [202]:
sparql.setQuery("""
SELECT ?item ?itemLabel ?mascot ?mascotLabel

WHERE {
  ?item wdt:P3450 wd:Q19317;
      wdt:P822 ?mascot.

  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
""")

sparql.setReturnFormat(JSON)
mascots = sparql.query().convert()

In [203]:
# results

In [204]:
results_df = pd.io.json.json_normalize(results['results']['bindings'])
results_df.iloc[1]

  results_df = pd.io.json.json_normalize(results['results']['bindings'])


item.type                                                       uri
item.value                    http://www.wikidata.org/entity/Q46934
leader.type                                                     uri
leader.value                 http://www.wikidata.org/entity/Q178628
winner.type                                                     uri
winner.value                 http://www.wikidata.org/entity/Q676899
participants.datatype      http://www.w3.org/2001/XMLSchema#decimal
participants.type                                           literal
participants.value                                               24
country.type                                                    uri
country.value                    http://www.wikidata.org/entity/Q96
itemLabel.xml:lang                                               en
itemLabel.type                                              literal
itemLabel.value                                 1982 FIFA World Cup
countryLabel.xml:lang                           

In [205]:
results_df = results_df[['item.value', 'itemLabel.value', 'winnerLabel.value',\
            'participantsLabel.value', 'countryLabel.value', 'leaderLabel.value']].sort_values(by = "itemLabel.value")
results_df

Unnamed: 0,item.value,itemLabel.value,winnerLabel.value,participantsLabel.value,countryLabel.value,leaderLabel.value
6,http://www.wikidata.org/entity/Q48432,1930 FIFA World Cup,Uruguay national football team,13,Uruguay,Guillermo Stábile
34,http://www.wikidata.org/entity/Q1477177,1934 FIFA World Cup,Italy national association football team,16,Italy,Oldřich Nejedlý
12,http://www.wikidata.org/entity/Q131591,1938 FIFA World Cup,Italy national association football team,15,France,Leônidas
13,http://www.wikidata.org/entity/Q132515,1950 FIFA World Cup,Uruguay national football team,13,Brazil,Ademir de Menezes
11,http://www.wikidata.org/entity/Q131075,1954 FIFA World Cup,Germany national association football team,16,Switzerland,Sándor Kocsis
15,http://www.wikidata.org/entity/Q132533,1958 FIFA World Cup,Brazil national football team,16,Sweden,Just Fontaine
23,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Dražan Jerković
22,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Leonel Sánchez
21,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Valentin Ivanov
20,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Vavá


In [206]:
mascots_df = pd.io.json.json_normalize(mascots['results']['bindings'])
mascots_df.iloc[1]

  mascots_df = pd.io.json.json_normalize(mascots['results']['bindings'])


item.type                                                   uri
item.value                http://www.wikidata.org/entity/Q46934
mascot.type                                                 uri
mascot.value            http://www.wikidata.org/entity/Q2693211
itemLabel.xml:lang                                           en
itemLabel.type                                          literal
itemLabel.value                             1982 FIFA World Cup
mascotLabel.xml:lang                                         en
mascotLabel.type                                        literal
mascotLabel.value                                     Naranjito
Name: 1, dtype: object

In [207]:
mascots_df = mascots_df[['itemLabel.value', 'mascotLabel.value']].sort_values(by = "itemLabel.value")
mascots_df

Unnamed: 0,itemLabel.value,mascotLabel.value
10,1966 FIFA World Cup,World Cup Willie
9,1970 FIFA World Cup,Menganito
11,1974 FIFA World Cup,Tip and Tap
3,1978 FIFA World Cup,Gauchito
1,1982 FIFA World Cup,Naranjito
2,1986 FIFA World Cup,Gauchito
8,1990 FIFA World Cup,Ciao
7,1994 FIFA World Cup,Striker
6,1998 FIFA World Cup,Footix
4,2002 FIFA World Cup,"Ato, Kaz and Nik"


In [208]:
results_df = results_df.merge(mascots_df, how='left', on='itemLabel.value')
results_df

Unnamed: 0,item.value,itemLabel.value,winnerLabel.value,participantsLabel.value,countryLabel.value,leaderLabel.value,mascotLabel.value
0,http://www.wikidata.org/entity/Q48432,1930 FIFA World Cup,Uruguay national football team,13,Uruguay,Guillermo Stábile,
1,http://www.wikidata.org/entity/Q1477177,1934 FIFA World Cup,Italy national association football team,16,Italy,Oldřich Nejedlý,
2,http://www.wikidata.org/entity/Q131591,1938 FIFA World Cup,Italy national association football team,15,France,Leônidas,
3,http://www.wikidata.org/entity/Q132515,1950 FIFA World Cup,Uruguay national football team,13,Brazil,Ademir de Menezes,
4,http://www.wikidata.org/entity/Q131075,1954 FIFA World Cup,Germany national association football team,16,Switzerland,Sándor Kocsis,
5,http://www.wikidata.org/entity/Q132533,1958 FIFA World Cup,Brazil national football team,16,Sweden,Just Fontaine,
6,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Dražan Jerković,
7,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Leonel Sánchez,
8,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Valentin Ivanov,
9,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil national football team,16,Chile,Vavá,


# Post processing

In [209]:
# Change the teams name to keep the country name only
results_df['winnerLabel.value'] = [x.split(' ')[0] for x in results_df['winnerLabel.value']]

# 2002 in both South Korea and Japan
results_df = results_df[results_df['itemLabel.value'] != '2002 FIFA World Cup']
results_df = results_df.append({'item.value': 'http://www.wikidata.org/entity/Q19317', 'itemLabel.value': '2002 FIFA World Cup',
'winnerLabel.value': 'Brazil', 'participantsLabel.value': '32',
'countryLabel.value': 'South Korea & Japan',
'leaderLabel.value': 'Ronaldo', 'mascotLabel.value': 'Ato, Kaz and Nik'}, ignore_index=True)

# leaderLabel.value : cast to string
results_df['leaderLabel.value'] = results_df['leaderLabel.value'].astype(str)
# concatenate the leaders for each year
results_df = results_df.groupby(
    ['item.value', 'itemLabel.value', 'winnerLabel.value', 'participantsLabel.value', 'countryLabel.value', 'mascotLabel.value'], dropna=False
        ).agg({'leaderLabel.value': ', '.join}).reset_index()
# keep an only leader for each year
results_df.loc[results_df['itemLabel.value'] == '1962 FIFA World Cup', 'leaderLabel.value'] = 'Garrincha'
results_df.loc[results_df['itemLabel.value'] == '1994 FIFA World Cup', 'leaderLabel.value'] = 'Oleg Salenko'
results_df.loc[results_df['itemLabel.value'] == '2010 FIFA World Cup', 'leaderLabel.value'] = 'Thomas Müller'
results_df.loc[results_df['itemLabel.value'] == '2022 FIFA World Cup', 'leaderLabel.value'] = 'Kylian Mbappé'

# reset the index correctly
results_df.sort_values(by = "itemLabel.value", inplace = True)
results_df = results_df.reset_index(drop=True)

results_df

  results_df = results_df.append({'item.value': 'http://www.wikidata.org/entity/Q19317', 'itemLabel.value': '2002 FIFA World Cup',


Unnamed: 0,item.value,itemLabel.value,winnerLabel.value,participantsLabel.value,countryLabel.value,mascotLabel.value,leaderLabel.value
0,http://www.wikidata.org/entity/Q48432,1930 FIFA World Cup,Uruguay,13,Uruguay,,Guillermo Stábile
1,http://www.wikidata.org/entity/Q1477177,1934 FIFA World Cup,Italy,16,Italy,,Oldřich Nejedlý
2,http://www.wikidata.org/entity/Q131591,1938 FIFA World Cup,Italy,15,France,,Leônidas
3,http://www.wikidata.org/entity/Q132515,1950 FIFA World Cup,Uruguay,13,Brazil,,Ademir de Menezes
4,http://www.wikidata.org/entity/Q131075,1954 FIFA World Cup,Germany,16,Switzerland,,Sándor Kocsis
5,http://www.wikidata.org/entity/Q132533,1958 FIFA World Cup,Brazil,16,Sweden,,Just Fontaine
6,http://www.wikidata.org/entity/Q160813,1962 FIFA World Cup,Brazil,16,Chile,,Garrincha
7,http://www.wikidata.org/entity/Q134202,1966 FIFA World Cup,England,16,United Kingdom,World Cup Willie,Eusébio
8,http://www.wikidata.org/entity/Q132664,1970 FIFA World Cup,Brazil,16,Mexico,Menganito,Gerd Müller
9,http://www.wikidata.org/entity/Q166121,1974 FIFA World Cup,Germany,16,Germany,Tip and Tap,Grzegorz Lato


# Create Quiz Bank

In [210]:
import random
def quiz_bank(df):
    quiz_bank = {}
    for ind,val in df.iterrows(): #['itemLabel.value'].unique():
        df1 = df.drop([ind], axis=0)
        r = [random.randint(0,len(df1)-1) for _ in range(3)]
        
        question_1 = "Quel est le pays organisateur ?"
        r_answer_1 = val['countryLabel.value']
        answers_1 = df1.reset_index().loc[r,'countryLabel.value']
        
        question_2 = "Quel est le pays vainqueur ?"
        r_answer_2 = val['winnerLabel.value']
        answers_2 = df1.reset_index().loc[r,'winnerLabel.value']
        
        question_3 = "Quel est le nombre de pays participants ?"
        r_answer_3 = val['participantsLabel.value']
        answers_3 = df1.reset_index().loc[r,'participantsLabel.value']

        question_4 = "Qui est le meilleur buteur ?"
        r_answer_4 = val['leaderLabel.value']
        answers_4 = df1.reset_index().loc[r,'leaderLabel.value']

        question_5 = "Quel est la mascotte ?"
        r_answer_5 = val['mascotLabel.value']
        answers_5 = df1.reset_index().loc[r,'mascotLabel.value']
        
        questions = [question_1, question_2, question_3, question_4, question_5]
        answers = [r_answer_1,r_answer_2,r_answer_3, r_answer_4, r_answer_5]
        alt_answers = [answers_1,answers_2,answers_3, answers_4, answers_5]
        
        quiz_bank[val['itemLabel.value']] = {"Question": questions, "R_Answers": answers, "Answers": alt_answers}
    return quiz_bank

In [211]:
quiz = quiz_bank(results_df)
quiz['2006 FIFA World Cup']

{'Question': ['Quel est le pays organisateur ?',
  'Quel est le pays vainqueur ?',
  'Quel est le nombre de pays participants ?',
  'Qui est le meilleur buteur ?',
  'Quel est la mascotte ?'],
 'R_Answers': ['Germany', 'Italy', '32', 'Miroslav Klose', 'Goleo VI'],
 'Answers': [0     Uruguay
  2      France
  13      Italy
  Name: countryLabel.value, dtype: object,
  0     Uruguay
  2       Italy
  13    Germany
  Name: winnerLabel.value, dtype: object,
  0     13
  2     15
  13    24
  Name: participantsLabel.value, dtype: object,
  0       Guillermo Stábile
  2                Leônidas
  13    Salvatore Schillaci
  Name: leaderLabel.value, dtype: object,
  0      NaN
  2      NaN
  13    Ciao
  Name: mascotLabel.value, dtype: object]}