In [2]:
import numpy as np
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from apyori import apriori
from mlxtend.preprocessing import TransactionEncoder
from tqdm import tqdm

In [3]:
queryString = "SELECT * WHERE { ?s ?p ?o. }"
sparql = SPARQLWrapper("http://localhost:3030/memory/sparql")
sparql.setQuery(queryString)

try :
   ret = sparql.query()
   # ret is a stream with the results in XML, see <http://www.w3.org/TR/rdf-sparql-XMLres/>
except :
   pass

In [4]:
sparql.setQuery("""
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?country (COUNT(?prop) AS ?total) {
  
  SELECT DISTINCT ?country ?prop
    WHERE {
    ?country wdt:P31 wd:Q3624078 .
    ?country ?prop ?value .
} 

} GROUP BY ?country
ORDER BY DESC(?total) 
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [5]:
res = []
for results in results["results"]["bindings"]:
    print('%s: %s' % (results["country"]["value"], results["total"]["value"]))
    entity = str(results["country"]["value"]).split('/')
    res.append(entity[-1])
print('---------------------------')

http://www.wikidata.org/entity/Q142: 126
http://www.wikidata.org/entity/Q30: 125
http://www.wikidata.org/entity/Q801: 124
http://www.wikidata.org/entity/Q183: 121
http://www.wikidata.org/entity/Q213: 119
http://www.wikidata.org/entity/Q408: 118
http://www.wikidata.org/entity/Q16: 116
http://www.wikidata.org/entity/Q159: 115
http://www.wikidata.org/entity/Q668: 115
http://www.wikidata.org/entity/Q17: 113
http://www.wikidata.org/entity/Q29: 113
http://www.wikidata.org/entity/Q31: 113
http://www.wikidata.org/entity/Q33: 113
http://www.wikidata.org/entity/Q851: 112
http://www.wikidata.org/entity/Q155: 111
http://www.wikidata.org/entity/Q884: 109
http://www.wikidata.org/entity/Q145: 108
http://www.wikidata.org/entity/Q212: 108
http://www.wikidata.org/entity/Q41: 108
http://www.wikidata.org/entity/Q148: 106
http://www.wikidata.org/entity/Q20: 106
http://www.wikidata.org/entity/Q34: 106
http://www.wikidata.org/entity/Q43: 106
http://www.wikidata.org/entity/Q252: 105
http://www.wikidata.org/en

In [6]:
db = []

for i in range(len(res)):
    query_string = """
    PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT DISTINCT ?country ?prop {
    VALUES ?country {wd:""" + res[i] + """}
    ?country ?prop ?value .
    }
    """

    sparql.setQuery(query_string)
    sparql.setReturnFormat(JSON)
    results_entity = sparql.query().convert()
    propLabel = []
    for results in results_entity["results"]["bindings"]:
#         print('%s: %s' % (results["country"]["value"], results["propLabel"]["value"]))
        propLabel.append(results["prop"]["value"])
#     print('---------------------------')
    db.append(propLabel)

In [7]:
te = TransactionEncoder()
te_ary = te.fit(db).transform(db)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,http://www.wikidata.org/prop/direct/P10,http://www.wikidata.org/prop/direct/P1001,http://www.wikidata.org/prop/direct/P103,http://www.wikidata.org/prop/direct/P1081,http://www.wikidata.org/prop/direct/P1082,http://www.wikidata.org/prop/direct/P112,http://www.wikidata.org/prop/direct/P1120,http://www.wikidata.org/prop/direct/P1125,http://www.wikidata.org/prop/direct/P1151,http://www.wikidata.org/prop/direct/P1174,...,http://www.wikidata.org/prop/direct/P9318,http://www.wikidata.org/prop/direct/P9322,http://www.wikidata.org/prop/direct/P9346,http://www.wikidata.org/prop/direct/P9348,http://www.wikidata.org/prop/direct/P9349,http://www.wikidata.org/prop/direct/P935,http://www.wikidata.org/prop/direct/P94,http://www.wikidata.org/prop/direct/P948,http://www.wikidata.org/prop/direct/P973,http://www.wikidata.org/prop/direct/P989
0,False,False,False,True,True,False,True,True,True,False,...,False,False,True,False,True,True,True,True,False,True
1,False,False,False,True,True,False,False,True,True,False,...,True,False,False,False,True,True,True,True,False,True
2,True,False,False,True,True,True,False,True,True,False,...,False,False,False,False,True,True,True,True,False,False
3,False,False,False,True,True,False,False,False,True,False,...,False,False,False,False,True,True,True,True,False,False
4,False,False,False,True,True,False,False,False,True,False,...,False,False,False,False,False,True,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,False,False,False,False,True,False,False,False,True,False,...,False,False,False,False,False,False,True,False,False,True
193,False,False,False,True,True,False,False,False,True,False,...,False,False,False,False,False,True,True,True,False,False
194,False,False,False,False,True,False,False,False,True,False,...,False,False,False,False,False,True,True,True,False,False
195,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,True


In [8]:
wikidata = SPARQLWrapper("https://query.wikidata.org/sparql")

In [9]:
propList = df.columns.tolist()
for i in range(len(propList)):
    propList[i] = propList[i].split('/')[-1]

In [10]:
propLabel = []

for i in tqdm(range(len(propList))):
    query_string = """
    SELECT DISTINCT ?propLabel {
      VALUES ?p {wdt:""" + propList[i] + """}
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
      ?prop wikibase:directClaim ?p .
    }
    """

    wikidata.setQuery(query_string)
    wikidata.setReturnFormat(JSON)
    results_prop = wikidata.query().convert()
    for results in results_prop["results"]["bindings"]:
#         print('%s: %s' % (results["country"]["value"], results["propLabel"]["value"]))
        propLabel.append(results["propLabel"]["value"])
#     print('---------------------------')

100%|████████████████████████████████████████████████████████████████████████████████| 242/242 [05:03<00:00,  1.25s/it]


In [15]:
df.columns = propLabel

In [16]:
df

Unnamed: 0,video,applies to jurisdiction,native language,Human Development Index,population,founded by,number of deaths,Gini coefficient,topic's main Wikimedia portal,visitors per year,...,Ávvir topic ID,SVKKL authority ID,France24 topic ID (French),France24 topic ID (Spanish),France24 topic ID (Arabic),Commons gallery,coat of arms image,page banner,described at URL,spoken text audio
0,False,False,False,True,True,False,True,True,True,False,...,False,False,True,False,True,True,True,True,False,True
1,False,False,False,True,True,False,False,True,True,False,...,True,False,False,False,True,True,True,True,False,True
2,True,False,False,True,True,True,False,True,True,False,...,False,False,False,False,True,True,True,True,False,False
3,False,False,False,True,True,False,False,False,True,False,...,False,False,False,False,True,True,True,True,False,False
4,False,False,False,True,True,False,False,False,True,False,...,False,False,False,False,False,True,True,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192,False,False,False,False,True,False,False,False,True,False,...,False,False,False,False,False,False,True,False,False,True
193,False,False,False,True,True,False,False,False,True,False,...,False,False,False,False,False,True,True,True,False,False
194,False,False,False,False,True,False,False,False,True,False,...,False,False,False,False,False,True,True,True,False,False
195,False,False,False,False,True,False,False,False,False,False,...,False,False,False,False,False,False,True,False,False,True
