In [38]:
import numpy as np
import pandas as pd
from SPARQLWrapper import SPARQLWrapper, JSON
from apyori import apriori
from mlxtend.preprocessing import TransactionEncoder
from tqdm import tqdm

In [39]:
queryString = "SELECT * WHERE { ?s ?p ?o. }"
sparql = SPARQLWrapper("http://localhost:3030/brand")
sparql.setQuery(queryString)

try :
   ret = sparql.query()
   # ret is a stream with the results in XML, see <http://www.w3.org/TR/rdf-sparql-XMLres/>
except :
   pass

In [40]:
sparql.setQuery("""
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT ?s (COUNT(?prop) AS ?total) {
  
  SELECT DISTINCT ?s ?prop
    WHERE {
    ?s wdt:P31 wd:Q431289 .
    ?s ?prop ?value .
} 

} GROUP BY ?s
ORDER BY DESC(?total) 
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

In [41]:
res = []
for results in results["results"]["bindings"]:
    print('%s: %s' % (results["s"]["value"], results["total"]["value"]))
    entity = str(results["s"]["value"]).split('/')
    res.append(entity[-1])
print('---------------------------')

http://www.wikidata.org/entity/Q312: 89
http://www.wikidata.org/entity/Q1418: 64
http://www.wikidata.org/entity/Q478214: 62
http://www.wikidata.org/entity/Q20165: 53
http://www.wikidata.org/entity/Q483915: 50
http://www.wikidata.org/entity/Q159433: 49
http://www.wikidata.org/entity/Q207922: 41
http://www.wikidata.org/entity/Q162345: 40
http://www.wikidata.org/entity/Q170484: 39
http://www.wikidata.org/entity/Q58898260: 39
http://www.wikidata.org/entity/Q462: 38
http://www.wikidata.org/entity/Q106367983: 36
http://www.wikidata.org/entity/Q218115: 34
http://www.wikidata.org/entity/Q152864: 33
http://www.wikidata.org/entity/Q503592: 32
http://www.wikidata.org/entity/Q5881409: 32
http://www.wikidata.org/entity/Q2813: 31
http://www.wikidata.org/entity/Q650467: 31
http://www.wikidata.org/entity/Q83382: 31
http://www.wikidata.org/entity/Q106463610: 29
http://www.wikidata.org/entity/Q1120617: 29
http://www.wikidata.org/entity/Q136469: 29
http://www.wikidata.org/entity/Q1420671: 29
http://www.w

In [42]:
db = []

for i in range(len(res)):
    query_string = """
    PREFIX wd: <http://www.wikidata.org/entity/>
    SELECT DISTINCT ?s ?prop {
    VALUES ?s {wd:""" + res[i] + """}
    ?s ?prop ?value .
    }
    """

    sparql.setQuery(query_string)
    sparql.setReturnFormat(JSON)
    results_entity = sparql.query().convert()
    propLabel = []
    for results in results_entity["results"]["bindings"]:
#         print('%s: %s' % (results["country"]["value"], results["propLabel"]["value"]))
        propLabel.append(results["prop"]["value"])
#     print('---------------------------')
    db.append(propLabel)

In [43]:
te = TransactionEncoder()
te_ary = te.fit(db).transform(db)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,http://www.wikidata.org/prop/P10,http://www.wikidata.org/prop/P101,http://www.wikidata.org/prop/P1019,http://www.wikidata.org/prop/P1037,http://www.wikidata.org/prop/P1041,http://www.wikidata.org/prop/P1056,http://www.wikidata.org/prop/P106,http://www.wikidata.org/prop/P1064,http://www.wikidata.org/prop/P1068,http://www.wikidata.org/prop/P1071,...,http://www.wikidata.org/prop/direct/P8345,http://www.wikidata.org/prop/direct/P8571,http://www.wikidata.org/prop/direct/P86,http://www.wikidata.org/prop/direct/P8693,http://www.wikidata.org/prop/direct/P880,http://www.wikidata.org/prop/direct/P910,http://www.wikidata.org/prop/direct/P92,http://www.wikidata.org/prop/direct/P921,http://www.wikidata.org/prop/direct/P941,http://www.wikidata.org/prop/direct/P972
0,False,False,False,False,False,True,False,False,False,False,...,False,True,False,False,False,True,False,False,False,False
1,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
2,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
3,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
4,False,False,False,False,False,True,False,False,False,False,...,False,False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2894,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2895,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2896,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2897,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [44]:
wikidata = SPARQLWrapper("https://query.wikidata.org/sparql")

In [45]:
propList = df.columns.tolist()
for i in range(len(propList)):
    propList[i]=propList[i].split('/')[-1]

In [46]:
propLabel = []

for i in tqdm(range(len(propList))):
    query_string = """
    SELECT DISTINCT ?propLabel {
      VALUES ?p {wdt:""" + propList[i] + """}
      SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } 
      ?prop wikibase:directClaim ?p .
    }
    """

    wikidata.setQuery(query_string)
    wikidata.setReturnFormat(JSON)
    results_prop = wikidata.query().convert()
    for results in results_prop["results"]["bindings"]:
#         print('%s: %s' % (results["country"]["value"], results["propLabel"]["value"]))
        propLabel.append(results["propLabel"]["value"])
#     print('---------------------------')

  0%|          | 0/445 [00:00<?, ?it/s]


URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1108)>

In [47]:
df.columns = propLabel
df

ValueError: Length mismatch: Expected axis has 445 elements, new values have 0 elements

In [48]:
from mlxtend.frequent_patterns import association_rules, fpmax, fpgrowth

In [49]:
frequent_itemsets = fpgrowth(df, min_support=0.5, use_colnames=True)

In [50]:
frequent_itemsets

Unnamed: 0,support,itemsets
0,1.0,(http://www.wikidata.org/prop/direct/P31)
1,1.0,(http://www.wikidata.org/prop/P31)
2,1.0,"(http://www.wikidata.org/prop/P31, http://www...."


In [51]:
res = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)
res

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(http://www.wikidata.org/prop/P31),(http://www.wikidata.org/prop/direct/P31),1.0,1.0,1.0,1.0,1.0,0.0,inf
1,(http://www.wikidata.org/prop/direct/P31),(http://www.wikidata.org/prop/P31),1.0,1.0,1.0,1.0,1.0,0.0,inf
