In [2]:
#THIS NOTEBOOK WILL USE INTERNET API TO GET A KNOWLEDGE GRAPH OF A GIVEN ENTITY
#IT NEED TO CONNECT TO DB TO FETCH A EXISTED TAG LIST
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/

import sys
from neo4j import GraphDatabase, basic_auth
from neo4j.exceptions import Neo4jError
import neo4j.time
import csv
import json
import time
import os
from flask_restful_swagger_2 import Api, swagger, Schema


def cprint(content,module='DEBUG',*args):
    if args:
        print('\033[1;32;43m ['+module+'] \033[0m '+ content + '\033[1;35m' +str(args) +' \033[0m' + time.strftime(" |%Y-%m-%d %H:%M:%S|", time.localtime()) )
    else:
        print('\033[1;32;43m ['+module+'] \033[0m '+ content + time.strftime(" |%Y-%m-%d %H:%M:%S|", time.localtime()))

        
DATABASE_USERNAME="neo4j"
DATABASE_PASSWORD="spade-discounts-switch"
DATABASE_URL="bolt://localhost:7687"
DATA_FILE_PATH = 'dev-to-articles.csv'

def db_fetch_all_tags(session):
    '''
    Fetch all nodes with label 'Tag'
    Args:
        session: db session,driver.session()
    Return:
        Cypher result, all nodes with label 'Tag'
    '''
    def _cypher(tx):
        return list(tx.run(
        '''
        MATCH (n:Tag) RETURN n
        '''
        ))
    result = session.read_transaction(_cypher)
    cprint(str(len(result))+'record fetched','DB')
    return result       
def serialize_tag(tag):
    return{
        'name':tag['name'],
        'keywords_for_search':tag['keywords_for_search']
    }


def db_create_subsume_relation(session,parent_tag,child_tag):
    '''
    Create relation: (parent_tag)-[SUBSUME]->(child_tag)
    Args:
        session: db session,driver.session()
        parent_tag: string, name of tag
        child_tag: string, name of tag
    Return:
        Cypher result of created relation
    '''
    def _cypher(tx,parent_tag,child_tag):
        return list(tx.run(
        '''
        MERGE (n:Tag {name:$parent_tag})
        MERGE (m:Tag {name:$child_tag})
        MERGE (n)-[subsume:SUBSUME]-> (m)
        RETURN subsume
        '''
        )),{'parent_tag': parent_tag , 'child_tag': child_tag}
    result = session.write_transaction(_cypher)
    return result
    
    
driver = GraphDatabase.driver(DATABASE_URL, auth=basic_auth(DATABASE_USERNAME, str(DATABASE_PASSWORD)))
with driver.session() as session: 
    tag_list = db_fetch_all_tags(session)
    

    session.close()
    
#EXPORT VAR : tag_list
# for tag in tag_list:
#     print(serialize_tag(tag['n'])['name'])
#     break

[1;32;43m [DB] [0m 496record fetched |2021-04-15 21:36:46|


In [56]:
#QUERY FOR A SINGLE TAG
#THIS CAN RUN SEPERATELY
from SPARQLWrapper import SPARQLWrapper, JSON
def get_query(tag):
    '''
    Args: string, name of entity
    
    Return: string, url for wikidata

    '''
    cprint('Generating query with:'+tag,'WIKIDATA')
    query = '''SELECT ?item ?itemLabel ?P1 ?P1Label ?P2 ?P2Label ?P3 ?P3Label ?item_zh ?P1Label_zh ?P2Label_zh ?P3Label_zh 
       WHERE {?item ?label "'''+tag+'''"@en. 
       ?article schema:about ?item .?article schema:inLanguage "en" .
       ?article schema:isPartOf <https://en.wikipedia.org/>. 
       ?item (wdt:P279|wdt:P361|wdt:P101|wdt:P425|wdt:P31|wdt:P277) ?P1.
       OPTIONAL { ?P1 (wdt:P279|wdt:P361|wdt:P101|wdt:P425|wdt:P31|wdt:P277) ?P2. }
       OPTIONAL { ?P2 (wdt:P279|wdt:P361|wdt:P101|wdt:P425|wdt:P31|wdt:P277) ?P3. }
       OPTIONAL {?item rdfs:label ?item_zh filter (lang(?item_zh) = "zh-cn")}.
       OPTIONAL {?P1 rdfs:label ?P1Label_zh filter (lang(?P1Label_zh) = "zh-cn")}.
       OPTIONAL {?P2 rdfs:label ?P2Label_zh filter (lang(?P2Label_zh) = "zh-cn")}.
       OPTIONAL {?P3 rdfs:label ?P3Label_zh filter (lang(?P3Label_zh) = "zh-cn")}.
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
      }'''
    return query

def get_results(query, endpoint_url="https://query.wikidata.org/sparql", ):
    '''
    Get wikidata query result
    Args:
        endpoint_url : "https://query.wikidata.org/sparql"
        query: query script
        tag: source tag
    Return:
    [
           {
          "item":"http://www.wikidata.org/entity/Q6859454",
          "itemLabel":"web developer",
          "P1":"http://www.wikidata.org/entity/Q183888",
          "P1Label":"software developer",
          "P2":"http://www.wikidata.org/entity/Q4164871",
          "P2Label":"position",
          "P3":"http://www.wikidata.org/entity/Q16686448",
          "P3Label":"artificial entity",
          "P1Label_zh":"软件开发者",
          "P2Label_zh":"职位",
          "P3Label_zh":"人造客体"
           },
           ...
   ]
    '''
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    #adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    res = sparql.query().convert()
    cprint(str(len(res['results']['bindings']))+' records fetched','WIKIDATA')
    return res




# for result in results["results"]["bindings"]:
#     print(result)

#EXPORT VAR : wiki_result

In [57]:
for tag in tag_list:
    wiki_results = get_results(get_query(serialize_tag(tag['n'])['name']))
    break
    

[1;32;43m [WIKIDATA] [0m Generating query with:tutorial |2021-04-15 22:44:47|
[1;32;43m [WIKIDATA] [0m 26 records fetched |2021-04-15 22:44:53|


In [61]:
#THE MOST IMPORTANT PART: TRANSFER GIVEN JSON LIST INTO GRAPH
# len(wiki_results['results']['bindings'])

item = []
P1 = []
P2 = []

for res in wiki_results['results']['bindings']:
    print(res['itemLabel']['value'])
    print(res['P1Label']['value'])
    if('P1Label_zh' in res):
        P1Label_zh = (res['P1Label_zh']['value'])
    if('P1Label_zh' in res):
        P1Label_zh = (res['P1Label_zh']['value'])

tutorial
teaching method
tutorial
method
方法
tutorial
method
方法
tutorial
level
tutorial
level
tutorial
teaching method
tutorial
level
tutorial
level
Wikipedia:TemplateData/Tutorial
Wikimedia project page
tutorial
literary genre
文类
tutorial
level
tutorial
level
tutorial
level
tutorial
method
方法
tutorial
level
tutorial
method
方法
Wikipedia:TemplateData/Tutorial
Wikimedia project page
Wikipedia:TemplateData/Tutorial
Wikimedia project page
Wikipedia:TemplateData/Tutorial
Wikimedia project page
tutorial
level
tutorial
level
tutorial
level
tutorial
method
方法
tutorial
method
方法
tutorial
literary genre
文类
tutorial
teaching method
