In [12]:
import numpy as np
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from apyori import apriori
from mlxtend.preprocessing import TransactionEncoder
from tqdm import tqdm

In [13]:
queryString = "SELECT * WHERE { ?s ?p ?o. }"
sparql = SPARQLWrapper("http://localhost:3030/Physical_Object")
sparql.setQuery(queryString)

try :
   ret = sparql.query()
   # ret is a stream with the results in XML, see <http://www.w3.org/TR/rdf-sparql-XMLres/>
except :
   pass

In [14]:
sparql.setQuery("""
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?country (COUNT(?prop) AS ?total) {
  
  SELECT DISTINCT ?country ?prop
    WHERE {
    ?country wdt:P31 wd:Q223557 .
    ?country ?prop ?value .
} 

} GROUP BY ?country
ORDER BY DESC(?total)
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [15]:
res = []
for results in results["results"]["bindings"]:
    print('%s: %s' % (results["country"]["value"], results["total"]["value"]))
    entity = str(results["country"]["value"]).split('/')
    res.append(entity[-1])
print('---------------------------')

http://www.wikidata.org/entity/Q86870962: 27
http://www.wikidata.org/entity/Q98833586: 26
http://www.wikidata.org/entity/Q29321377: 24
http://www.wikidata.org/entity/Q20877089: 23
http://www.wikidata.org/entity/Q29318871: 18
http://www.wikidata.org/entity/Q96573964: 16
http://www.wikidata.org/entity/Q96587104: 16
http://www.wikidata.org/entity/Q722218: 14
http://www.wikidata.org/entity/Q81973728: 14
http://www.wikidata.org/entity/Q394352: 13
http://www.wikidata.org/entity/Q60740239: 13
http://www.wikidata.org/entity/Q63554971: 13
http://www.wikidata.org/entity/Q60024227: 12
http://www.wikidata.org/entity/Q60740792: 12
http://www.wikidata.org/entity/Q60751750: 12
http://www.wikidata.org/entity/Q60759687: 12
http://www.wikidata.org/entity/Q60763364: 12
http://www.wikidata.org/entity/Q60777976: 12
http://www.wikidata.org/entity/Q65944235: 12
http://www.wikidata.org/entity/Q65944236: 12
http://www.wikidata.org/entity/Q65944238: 12
http://www.wikidata.org/entity/Q65944239: 12
http://www.wik

In [16]:
db = []

for i in range(len(res)):
    query_string = """
    PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT DISTINCT ?country ?prop {
    VALUES ?country {wd:""" + res[i] + """}
    ?country ?prop ?value .
    }
    """

    sparql.setQuery(query_string)
    sparql.setReturnFormat(JSON)
    results_entity = sparql.query().convert()
    propLabel = []
    for results in results_entity["results"]["bindings"]:
#         print('%s: %s' % (results["country"]["value"], results["propLabel"]["value"]))
        propLabel.append(results["prop"]["value"])
#     print('---------------------------')
    db.append(propLabel)

In [17]:
te = TransactionEncoder()
te_ary = te.fit(db).transform(db)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,http://www.wikidata.org/prop/P1071,http://www.wikidata.org/prop/P1114,http://www.wikidata.org/prop/P1269,http://www.wikidata.org/prop/P127,http://www.wikidata.org/prop/P131,http://www.wikidata.org/prop/P1343,http://www.wikidata.org/prop/P135,http://www.wikidata.org/prop/P136,http://www.wikidata.org/prop/P1402,http://www.wikidata.org/prop/P1424,...,http://www.wikidata.org/prop/direct/P5008,http://www.wikidata.org/prop/direct/P527,http://www.wikidata.org/prop/direct/P547,http://www.wikidata.org/prop/direct/P61,http://www.wikidata.org/prop/direct/P6216,http://www.wikidata.org/prop/direct/P65,http://www.wikidata.org/prop/direct/P737,http://www.wikidata.org/prop/direct/P793,http://www.wikidata.org/prop/direct/P910,http://www.wikidata.org/prop/direct/P921
0,False,False,False,False,True,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,True
1,False,False,False,False,True,False,True,True,False,False,...,False,False,False,False,False,False,True,False,False,True
2,False,False,False,False,True,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,True
3,False,True,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,True,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
101,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
102,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
103,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [18]:
wikidata = SPARQLWrapper("https://query.wikidata.org/sparql")

In [19]:
propList = df.columns.tolist()
for i in range(len(propList)):
    propList[i] = propList[i].split('/')[-1]

In [20]:
propLabel = []

for i in tqdm(range(len(propList))):
    query_string = """
    SELECT DISTINCT ?propLabel {
      VALUES ?p {wdt:""" + propList[i] + """}
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
      ?prop wikibase:directClaim ?p .
    }
    """

    wikidata.setQuery(query_string)
    wikidata.setReturnFormat(JSON)
    results_prop = wikidata.query().convert()
    for results in results_prop["results"]["bindings"]:
#         print('%s: %s' % (results["country"]["value"], results["propLabel"]["value"]))
        propLabel.append(results["propLabel"]["value"])
#     print('---------------------------')

100%|████████████████████████████████████████████████████████████████████████████████| 107/107 [00:56<00:00,  1.90it/s]


In [21]:
df.columns = propLabel

In [22]:
df

Unnamed: 0,location of creation,quantity,facet of,owned by,located in the administrative territorial entity,described by source,movement,genre,Foundational Model of Anatomy ID,topic's main template,...,on focus list of Wikimedia project,has part,commemorates,discoverer or inventor,copyright status,site of astronomical discovery,influenced by,significant event,topic's main category,main subject
0,False,False,False,False,True,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,True
1,False,False,False,False,True,False,True,True,False,False,...,False,False,False,False,False,False,True,False,False,True
2,False,False,False,False,True,False,False,True,False,False,...,False,False,False,False,False,False,False,False,False,True
3,False,True,False,False,False,False,False,False,False,False,...,False,False,True,False,False,False,False,False,False,False
4,False,False,False,False,True,False,False,True,False,False,...,False,True,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
101,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
102,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
103,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [23]:
from mlxtend.frequent_patterns import association_rules, fpmax, fpgrowth

In [24]:
frequent_itemsets = fpgrowth(df, min_support=0.5, use_colnames=True)

In [25]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,1.000000,(instance of)
1,1.000000,(instance of)
2,0.600000,(made from material)
3,0.600000,(made from material)
4,0.695238,(inventory number)
...,...,...
262,0.504762,"(instance of, owned by, described at URL, made..."
263,0.504762,"(instance of, owned by, inventory number, made..."
264,0.504762,"(instance of, owned by, inventory number, made..."
265,0.504762,"(instance of, owned by, inventory number, made..."


In [27]:
res = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
res

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(made from material),(instance of),0.600000,1.000000,0.600000,1.000000,1.000000,0.000000,inf
1,(made from material),(inventory number),0.600000,0.695238,0.523810,0.873016,1.255708,0.106667,2.400000
2,(inventory number),(made from material),0.695238,0.600000,0.523810,0.753425,1.255708,0.106667,1.622222
3,(made from material),(described at URL),0.600000,0.657143,0.523810,0.873016,1.328502,0.129524,2.700000
4,(described at URL),(made from material),0.657143,0.600000,0.523810,0.797101,1.328502,0.129524,1.971429
...,...,...,...,...,...,...,...,...,...
161,"(made from material, described at URL)","(instance of, owned by, inventory number)",0.523810,0.514286,0.504762,0.963636,1.873737,0.235374,13.357143
162,(owned by),"(instance of, inventory number, described at U...",0.523810,0.514286,0.504762,0.963636,1.873737,0.235374,13.357143
163,(inventory number),"(instance of, owned by, described at URL, made...",0.695238,0.504762,0.504762,0.726027,1.438356,0.153832,1.807619
164,(made from material),"(instance of, owned by, inventory number, desc...",0.600000,0.504762,0.504762,0.841270,1.666667,0.201905,3.120000
