# Welcome to Jupyter!

This repo contains an introduction to [Jupyter](https://jupyter.org) and [IPython](https://ipython.org).

Outline of some basics:

* [Notebook Basics](../examples/Notebook/Notebook%20Basics.ipynb)
* [IPython - beyond plain python](../examples/IPython%20Kernel/Beyond%20Plain%20Python.ipynb)
* [Markdown Cells](../examples/Notebook/Working%20With%20Markdown%20Cells.ipynb)
* [Rich Display System](../examples/IPython%20Kernel/Rich%20Output.ipynb)
* [Custom Display logic](../examples/IPython%20Kernel/Custom%20Display%20Logic.ipynb)
* [Running a Secure Public Notebook Server](../examples/Notebook/Running%20the%20Notebook%20Server.ipynb#Securing-the-notebook-server)
* [How Jupyter works](../examples/Notebook/Multiple%20Languages%2C%20Frontends.ipynb) to run code in different languages.

You can also get this tutorial and run it on your laptop:

    git clone https://github.com/ipython/ipython-in-depth

Install IPython and Jupyter:

with [conda](https://www.anaconda.com/download):

    conda install ipython jupyter

with pip:

    # first, always upgrade pip!
    pip install --upgrade pip
    pip install --upgrade ipython jupyter

Start the notebook in the tutorial directory:

    cd ipython-in-depth
    jupyter notebook

In [51]:
import sys
#!{sys.executable} -m pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import numpy as np
import datetime
import func_lib

endpoint_url = "https://query.wikidata.org/sparql"
item = "item"


class Relation:
    """
    The class returned when createRelation is called.
    It contains string field with query.
    We call Relation.query when we need to do the query.
    """

    def __init__(self, entity_id: str, property_id: str, isSubject: bool, rowVerbose: bool,
                 colVerbose: bool, time_property: str, time: str, name: str, label: bool, limit=10000):
        self.entity_id = entity_id
        self.query_str = ""
        self.dic = {}
        self.result_dic = {"Entity ID": []}
        self.df = pd.DataFrame()
        self.count = 0
        self.time_property = time_property
        self.time = time
        self.limit = limit
        self.focus = "Entity ID"
        if property_id:
            self.extend(property_id, isSubject, name, rowVerbose, colVerbose, limit, time_property, time, label)

    def generate_html(self, name: str):
        html = (self.df).to_html()
        text_file = open(name, "w", encoding='utf-8')
        text_file.write(html)
        text_file.close()

    def query(self, require=None):
        if self.query_str == "":
            self.result_dic = {"Entity ID": ['http://www.wikidata.org/entity/' + str(self.entity_id)]}
            return self.result_dic
        results = get_results(endpoint_url, self.query_str)
        result_dict = {"Entity ID": ['http://www.wikidata.org/entity/' + str(self.entity_id)]}
        for i in range(1, self.count + 1):
            result_dict[self.dic[i]["name"] + '_' + self.dic[i]['property_id']] = []
            if self.dic[i]["colVerbose"]:
                result_dict[self.dic[i]["name"] + '_rank_' + self.dic[i]['property_id'] + '_rank'] = []
                for key, value in self.dic[i]["property_name_dic"].items():
                    result_dict[
                        self.dic[i]["name"] + "_" + value + '_' + self.dic[i]['property_id'] + '_' + str(key)] = []
                for key, value in self.dic[i]["ref_dic"].items():
                    result_dict[self.dic[i]["name"] + "_ref_" + self.dic[i]['property_id'] + '_' + str(key)] = []

            if self.dic[i]["label"]:
                result_dict[self.dic[i]["name"] + '_' + self.dic[i]['property_id'] + 'Label'] = []

        for result in results['results']['bindings']:
            for key, value in result_dict.items():
                if key in result.keys():
                    result_dict[key].append(result[key]['value'])
                else:
                    result_dict[key].append('NA')
        result_dict["Entity ID"] = ['http://www.wikidata.org/entity/' + str(self.entity_id)] * len(
            result_dict[self.dic[self.count]["name"] + '_' + self.dic[self.count]["property_id"]])
        self.result_dic = result_dict
        self.df = pd.DataFrame.from_dict(self.result_dic)
        for i in range(1, self.count + 1):
            if self.dic[i]["colVerbose"] and not self.dic[i]["rowVerbose"]:
                col = self.dic[i]['name'] + '_rank_' + self.dic[i]['property_id'] + '_rank'
                if any(self.df[col] == 'http://wikiba.se/ontology#PreferredRank'):
                    self.df = self.df.loc[self.df[col] == 'http://wikiba.se/ontology#PreferredRank']
                else:
                    self.df = self.df.loc[self.df[col] == 'http://wikiba.se/ontology#NormalRank']
#         if require is not None:
#             for r in require:
#                 self.df = self.df.loc[self.df[r] != 'NA']
        self.df = pd.DataFrame(data=self.df)
#         if self.df.shape[0] >= 10000:
#             print("Warning: Your query leads to too many results. Only 10,000 returned.")
        return self.df

    def extend(self, property_id: str, isSubject: bool, name: str, rowVerbose=False, colVerbose=False, limit=None,
               time_property=None, time=None, search=None, label=False):
        self.count += 1
        self.dic[self.count] = {}
        self.dic[self.count]["name"] = name
        self.dic[self.count]["focus"] = self.focus
        self.dic[self.count]["property_id"] = property_id
        self.dic[self.count]["isSubject"] = isSubject
        self.dic[self.count]["limit"] = limit
        self.dic[self.count]["rowVerbose"] = rowVerbose
        self.dic[self.count]["colVerbose"] = colVerbose
        self.dic[self.count]['time_property'] = time_property
        self.dic[self.count]['time'] = time
        self.dic[self.count]['search'] = search
        self.dic[self.count]['label'] = label
        if rowVerbose or colVerbose:
            self.dic[self.count]["property_name_dic"], self.dic[self.count][
                "ref_dic"] = self.search_property_for_verbose()
        if time_property and time:
            self.time_property = time_property
            self.time = time
        if limit:
            self.limit = limit
        self.query_str = self.define_query_relation()

    def changeFocus(self, name="Entity ID"):
        self.focus = name
    
    def extendWithFunction(self, columns, func, name, param=None, dim=-1):
        if dim == 1:
            if isinstance(columns, list):
                if param is not None:
                    if isinstance(param, list):
                        self.df[name] = self.df[columns].apply(lambda x: func(*x, *param), axis=1)
                    else:
                        self.df[name] = self.df[columns].apply(lambda x: func(*x, param), axis=1)
                else:
                    self.df[name] = self.df[columns].apply(lambda x: func(*x), axis=1)
            else:
                if param is not None:
                    if isinstance(param, list):
                        self.df[name] = self.df[columns].apply(lambda x: func(x, *param), axis=1)
                    else:
                        self.df[name] = self.df[columns].apply(lambda x: func(x, param), axis=1)
                else:
                    self.df[name] = self.df[columns].apply(func, axis=1)
        elif dim == 0:
            if isinstance(columns, list):
                if param is not None:
                    if isinstance(param, list):
                        func(self.df[columns], axis=0)
                    else:
                        func(self.df[columns], axis=0)
                else:
                    func(self.df[columns], axis=0)
            else:
                if param is not None:
                    if isinstance(param, list):
                        func(self.df[columns], axis=0)
                    else:
                        func(self.df[columns], axis=0)
                else:
                    func(self.df[columns], axis=0)
                
        else:
            if isinstance(columns, list):
                if param is not None:
                    if isinstance(param, list):
                        self.df = func(self.df, name, *columns, *param)
                    else:
                        self.df = func(self.df, name, *columns, param)
                else:
                    self.df = func(self.df, name, *columns)
            else:
                if param is not None:
                    if isinstance(param, list):
                        self.df = func(self.df, name, columns, *param)
                    else:
                        self.df = func(self.df, name, columns, param)
                else:
                    self.df = func(self.df, name, columns)
        
#         if not entity:
#             if isinstance(columns, list):
#                 if param is not None:
#                     if isinstance(param, list):
#                         self.df = func(self.df, name, *columns, *param)
#                     else:
#                         self.df = func(self.df, name, *columns, param)
#                 else:
#                     self.df = func(self.df, name, *columns)
#             else:
#                 if param is not None:
#                     if isinstance(param, list):
#                         self.df = func(self.df, name, columns, *param)
#                     else:
#                         self.df = func(self.df, name, columns, param)
#                 else:
#                     self.df = func(self.df, name, columns)
#         else:
#             if isinstance(columns, list):
#                 if param is not None:
#                     if isinstance(param, list):
#                         self.df[name] = self.df[columns].apply(lambda x: func(*x, *param), axis=1)
#                     else:
#                         self.df[name] = self.df[columns].apply(lambda x: func(*x, param), axis=1)
#                 else:
#                     self.df[name] = self.df[columns].apply(lambda x: func(*x), axis=1)
#             else:
#                 if param is not None:
#                     if isinstance(param, list):
#                         self.df[name] = self.df[columns].apply(lambda x: func(x, *param), axis=1)
#                     else:
#                         self.df[name] = self.df[columns].apply(lambda x: func(x, param), axis=1)
#                 else:
#                     self.df[name] = self.df[columns].apply(func, axis=1)
#         else:
#             if type(func) == str:
#                 if func.startswith('F'):
#                     try:
#                         func_id = int(func[1:])
#                         if func_id >= func_lib.func_num():
#                             print("Not available.")
#                         else:
#                             if isinstance(objcolumn, list):
#                                 self.df[name] = self.df[columns].apply(lambda x: func_lib.func_list[func_id](*x), axis=1)
#                             else:
#                                 self.df[name] = self.df[columns].apply(func_lib.func_list[func_id])
#                     except:
#                         raise Exception("Not a valid function id, a valid function id should be 'Fn', n is an integer.")
#                 else:
#                     raise Exception("Not a valid function id, a valid function id should be 'Fn', n is an integer.")
#             else:
#                 if isinstance(columns, list):
#                     self.df[name] = self.df[columns].apply(lambda x: func(*x), axis=1)
#                 else:
#                     self.df[name] = self.df[columns].apply(func)

    def define_query_relation(self):
        rdf_triple, time_filter, limit_statement = """""", """""", """"""
        if self.count < 1:
            return None
        focusChanges = 0
        for i in range(1, self.count + 1):
            if self.dic[i]["rowVerbose"] or self.dic[i]["colVerbose"]:
                if self.dic[i]["search"] is None and not self.dic[i]["isSubject"]:
                        rdf_triple += """OPTIONAL {"""
                if self.dic[i]["focus"] == "Entity ID":
#                     if self.dic[i]["search"] is None:
#                         rdf_triple += """OPTIONAL {"""
                    rdf_triple += """wd:""" + self.entity_id + """ p:""" + self.dic[i][
                        'property_id'] + """ ?statement_""" + str(i) + """. """ \
                                  + """?statement_""" + str(i) + """ ps:""" + self.dic[i][
                                      'property_id'] + """ ?""" + \
                                  self.dic[i]['name'] \
                                  + """_""" + self.dic[i]['property_id'] + """. """
                else:
                    rdf_triple += """?""" + self.dic[i]["focus"] + """ p:""" + self.dic[i][
                        'property_id'] + """ ?statement_""" + str(i) + """. """ \
                                  + """?statement_""" + str(i) + """ ps:""" + self.dic[i][
                                      'property_id'] + """ ?""" + \
                                  self.dic[i]['name'] \
                                  + """_""" + self.dic[i]['property_id'] + """. """
                for key, value in self.dic[i]["property_name_dic"].items():
                    rdf_triple += """OPTIONAL { """ + """?statement_""" + str(i) + """ pq:""" + str(key) \
                                  + """ ?""" + self.dic[i]['name'] + """_""" + value + """_""" + self.dic[i][
                                      'property_id'] + """_""" + str(key) + """.} """
                for key, value in self.dic[i]["ref_dic"].items():
                    rdf_triple += """OPTIONAL { ?statement_""" + str(
                        i) + """ prov:wasDerivedFrom ?refnode_""" + str(
                        i) + """. ?refnode_""" + str(i) \
                                  + """ pr:""" + str(key) + """ ?""" + self.dic[i]['name'] + """_ref_""" + \
                                  self.dic[i][
                                      'property_id'] + """_""" + str(key) + """.} """
                rdf_triple += """OPTIONAL { ?statement_""" + str(i) + """ wikibase:rank ?""" + self.dic[i][
                    'name'] + """_rank_""" + self.dic[i]['property_id'] + """_rank. } """
            # none-verbose version
            else:
                if self.dic[i]["focus"] == "Entity ID":
                    if self.dic[i]["isSubject"]:
#                         if self.dic[i]["search"] is None:
#                             rdf_triple += """OPTIONAL {"""
                        rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                            'property_id'] + """ wdt:""" + self.dic[i][
                                          "property_id"] + """ wd:""" + self.entity_id + """. """
                    else:
                        if self.dic[i]["search"] is None:
                            rdf_triple += """OPTIONAL {"""
                        rdf_triple += """wd:""" + self.entity_id + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + \
                                      self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """. """
                else:
                    if self.dic[i]["isSubject"]:
#                         if self.dic[i]["search"] is None:
#                             rdf_triple += """OPTIONAL {"""
                        rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                            'property_id'] + """ wdt:""" + self.dic[i]["property_id"] + """ ?""" + self.dic[i][
                                          'focus'] + """. """
                    else:
                        if self.dic[i]["search"] is None:
                            rdf_triple += """OPTIONAL {"""
                        rdf_triple += """?""" + self.dic[i]['focus'] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                                          'property_id'] + """. """
            if not self.dic[i]["isSubject"]:
                if i < self.count and self.dic[i]["focus"] != self.dic[i + 1]["focus"] and self.dic[i]["search"] is None:
                    focusChanges += 1
                elif self.dic[i]["search"] is None:
                    rdf_triple += """} """
        for i in range(focusChanges):
            rdf_triple += """} """
        for i in range(1, self.count + 1):
            if self.dic[i]['search'] is not None and self.dic[i]["search"] != '!NA':
                if isinstance(self.dic[i]['search'], tuple):
                    if isinstance(self.dic[i]['search'][0], str):
                        rdf_triple += """FILTER (YEAR(?""" + self.dic[i]['name'] + """_""" + self.dic[i][
                            'property_id'] + """) >= """ + \
                                      self.dic[i]['search'][0] + """ && YEAR(?""" + self.dic[i]['name'] + \
                                      """_""" + self.dic[i]['property_id'] + """) <= """ + self.dic[i]['search'][
                                          1] + """) """
                    else:
                        rdf_triple += """FILTER (?""" + self.dic[i]['name'] + """_""" + self.dic[i]['property_id'] + \
                                      """ >= """ + str(self.dic[i]['search'][0]) + """ && ?""" + self.dic[i]['name'] + \
                                      """_""" + self.dic[i]['property_id'] + """ <= """ + str(
                            self.dic[i]['search'][1]) + """) """
                else:
                    rdf_triple += """FILTER (?""" + self.dic[i]['name'] + """_""" + self.dic[i][
                        'property_id'] + """ = """ + \
                                  """wd:""" + self.dic[i]['search'] + """) """
        if self.time_property is not None:
            time_filter = """?""" + self.dic[1]["name"] + """ p:""" + self.time_property + """ ?pubdateStatement.	
                          ?pubdateStatement ps:""" + self.time_property + """ ?date	
                          FILTER (YEAR(?date) = """ + self.time + """)"""
        if self.limit is not None:
            limit_statement = """LIMIT """ + str(self.limit)
        label_statement = """Service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }"""
        query = """SELECT DISTINCT"""
        for i in range(1, self.count + 1):
            if self.dic[i]["rowVerbose"] or self.dic[i]["colVerbose"]:
                query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id']
                if self.dic[i]["label"]:
                    query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """Label"""
                for key, value in self.dic[i]["property_name_dic"].items():
                    query += """ ?""" + self.dic[i]["name"] + """_""" + value + """_""" + self.dic[i][
                        'property_id'] + """_""" + str(key)
                for key, value in self.dic[i]["ref_dic"].items():
                    query += """ ?""" + self.dic[i]["name"] + """_ref_""" + self.dic[i]['property_id'] + """_""" + str(
                        key)
                query += """ ?""" + self.dic[i]["name"] + """_rank_""" + self.dic[i]['property_id'] + """_rank"""
            else:
                query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id']
                if self.dic[i]["label"]:
                    query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """Label"""
        query += """ WHERE {""" + rdf_triple + time_filter + label_statement + """} """ + limit_statement
        return query

    def search_property_for_verbose(self):
        property_to_name = {}
        ref_to_name = {}
        rdf_triple, time_filter, limit_statement = """""", """""", """"""
        if self.dic[self.count]["rowVerbose"] or self.dic[self.count]["colVerbose"]:
            for i in range(1, self.count):
                if self.dic[i]["focus"] == "Entity ID":
                    if self.dic[i]["isSubject"]:
                        rdf_triple += """?""" + self.dic[i]["name"] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ wd:""" + self.entity_id + """ ."""
                    else:
                        rdf_triple += """wd:""" + self.entity_id + """ wdt:""" + self.dic[i]["property_id"] + """ ?""" + \
                                      self.dic[i]["name"] + """ ."""
                else:
                    last = self.dic[i]["focus"].rfind('_')
                    focus = self.dic[i]["focus"][:last]
                    if self.dic[i]["isSubject"]:
                        rdf_triple += """?""" + self.dic[i]["name"] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + focus + """ ."""
                    else:
                        rdf_triple += """?""" + focus + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + self.dic[i]["name"] + """ ."""
            if self.dic[self.count]["focus"] == "Entity ID":
                rdf_triple += """wd:""" + self.entity_id + """ p:""" + self.dic[self.count][
                    'property_id'] + """ ?statement.""" + \
                              """?statement """ + """ps:""" + self.dic[self.count]['property_id'] + """ ?item.""" + \
                              """?statement """ + """?pq """ + """?obj.""" + \
                              """?qual wikibase:qualifier ?pq.""" + \
                              """OPTIONAL{ ?statement prov:wasDerivedFrom ?refnode. ?refnode ?pr ?r.}"""
            else:
                last = self.dic[self.count]["focus"].rfind('_')
                focus = self.dic[self.count]["focus"][:last]
                rdf_triple += """?""" + focus + """ p:""" + self.dic[self.count][
                    'property_id'] + """ ?statement.""" + \
                              """?statement """ + """ps:""" + self.dic[self.count]['property_id'] + """ ?item.""" + \
                              """?statement """ + """?pq """ + """?obj.""" + \
                              """?qual wikibase:qualifier ?pq.""" + \
                              """OPTIONAL{ ?statement prov:wasDerivedFrom ?refnode. ?refnode ?pr ?r.}"""
        if self.time_property is not None:
            time_filter = """?""" + self.dic[1]["name"] + """ p:""" + self.time_property + """ ?pubdateStatement.	
                                  ?pubdateStatement ps:""" + self.time_property + """ ?date	
                                  FILTER (YEAR(?date) = """ + self.time + """)"""
        if self.limit is not None:
            limit_statement = """LIMIT """ + str(self.limit)
        label_statement = """Service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }"""
        query = """SELECT DISTINCT """
        if self.dic[self.count]["rowVerbose"] or self.dic[self.count]["colVerbose"]:
            query += """?item""" + """ ?qual""" + """ ?qualLabel""" + """ ?obj """ + """?pr ?prLabel"""
            query += """ WHERE {""" + rdf_triple + time_filter + label_statement + """} """ + limit_statement
            query_result = get_results(endpoint_url, query)
            for result in query_result['results']['bindings']:
                if 'qual' in result:
                    property_to_name[result['qual']['value'].split('/')[-1]] = result['qualLabel']['value'].replace(' ',
                                                                                                                    '_')
                if 'pr' in result:
                    ref_to_name[result['pr']['value'].split('/')[-1]] = result['prLabel']['value'].replace(' ', '_')
        else:
            query += """?""" + self.dic[self.count]["name"] + """ """
        return property_to_name, ref_to_name

    def __str__(self):
        return str(self.df)

    def __getattr__(self, col_name):
        if col_name in self.df.columns:
            return self.df[col_name]
        else:
            print(col_name + " has not been found.")
            return None


def createRelation(entity_id: str, property_id=None, isSubject=None, rowVerbose=None, colVerbose=None,
                   time_property=None, time=None, name=None, label=False, limit=None):
    if property_id and not name:
        print("Please specify the name of the first column")
        return None
    return Relation(entity_id, property_id, isSubject, rowVerbose, colVerbose, time_property, time, name, label, limit)

def get_Firstname(name: str):
    return name.split(' ')[0]

def get_Lastname(name: str):
    return name.split(' ')[-1]

def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]
    return text


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def get_name(id: str):
    query = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 	
                PREFIX wd: <http://www.wikidata.org/entity/> 	
                select  *	
                where {	
                wd:""" + id + """ rdfs:label ?label .	
                FILTER (langMatches( lang(?label), "EN" ) )	
                } 	
                LIMIT 1"""
    results = get_results(endpoint_url, query)
    result = ''
    for res in results["results"]["bindings"]:
        result = res['label']['value']
    return result


In [89]:
import sys
#!{sys.executable} -m pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import numpy as np
import datetime
import func_lib

endpoint_url = "https://query.wikidata.org/sparql"
item = "item"


class Relation:
    """
    The class returned when createRelation is called.
    It contains string field with query.
    We call Relation.query when we need to do the query.
    """

    def __init__(self, entity_id: str, property_id: str, linkDirection: str, rowVerbose: bool,
                 colVerbose: bool, time_property: str, time: str, name: str, label: bool, limit=10000, subclass=False, showid=False):
        self.entity_id = entity_id
        self.query_str = ""
        self.dic = {}
        self.result_dic = {"Entity ID": []}
        self.df = pd.DataFrame()
        self.count = 0
        self.time_property = time_property
        self.time = time
        self.limit = limit
        self.subclass = subclass
        self.focus = "Entity ID"
        if property_id:
            self.extend(property_id, linkDirection, name, rowVerbose, colVerbose, limit, time_property, time, label, subclass, showid)

    def generate_html(self, name: str):
        html = (self.df).to_html()
        text_file = open(name, "w", encoding='utf-8')
        text_file.write(html)
        text_file.close()

    def query(self):
        if self.query_str == "":
            self.result_dic = {"Entity ID": ['http://www.wikidata.org/entity/' + str(self.entity_id)]}
            return self.result_dic
        results = get_results(endpoint_url, self.query_str)
        result_dict = {"Entity ID": ['http://www.wikidata.org/entity/' + str(self.entity_id)]}
        for i in range(1, self.count + 1):
            result_dict[self.dic[i]["name"] + '_' + self.dic[i]['property_id']] = []
            if self.dic[i]["colVerbose"]:
                result_dict[self.dic[i]["name"] + '_rank_' + self.dic[i]['property_id'] + '_rank'] = []
                for key, value in self.dic[i]["property_name_dic"].items():
                    result_dict[
                        self.dic[i]["name"] + "_" + value + '_' + self.dic[i]['property_id'] + '_' + str(key)] = []
                for key, value in self.dic[i]["ref_dic"].items():
                    result_dict[self.dic[i]["name"] + "_ref_" + self.dic[i]['property_id'] + '_' + str(key)] = []

            if self.dic[i]["label"]:
                result_dict[self.dic[i]["name"] + '_' + self.dic[i]['property_id'] + 'Label'] = []

        for result in results['results']['bindings']:
            for key, value in result_dict.items():
                if key in result.keys():
                    result_dict[key].append(result[key]['value'])
                else:
                    result_dict[key].append('NA')
        result_dict["Entity ID"] = ['http://www.wikidata.org/entity/' + str(self.entity_id)] * len(
            result_dict[self.dic[self.count]["name"] + '_' + self.dic[self.count]["property_id"]])
        result_dict["Basic ID"] = [str(self.entity_id)] * len(
            result_dict[self.dic[self.count]["name"] + '_' + self.dic[self.count]["property_id"]])
        self.result_dic = result_dict
        self.df = pd.DataFrame.from_dict(self.result_dic)
        for i in range(1, self.count + 1):
            if self.dic[i]["colVerbose"] and not self.dic[i]["rowVerbose"]:
                col = self.dic[i]['name'] + '_rank_' + self.dic[i]['property_id'] + '_rank'
                if any(self.df[col] == 'http://wikiba.se/ontology#PreferredRank'):
                    self.df = self.df.loc[self.df[col] == 'http://wikiba.se/ontology#PreferredRank']
                else:
                    self.df = self.df.loc[self.df[col] == 'http://wikiba.se/ontology#NormalRank']
        for i in range(1, self.count + 1):
            if self.dic[i]["showid"]:
                self.df[self.dic[i]["name"] + '_' + self.dic[i]['property_id'] + '_basic'] = self.df[self.dic[i]["name"] + '_' + self.dic[i]['property_id']].apply(lambda x: x.split('/')[-1])
#         if require is not None:
#             for r in require:
#                 self.df = self.df.loc[self.df[r] != 'NA']
        self.df = pd.DataFrame(data=self.df)
#         if self.df.shape[0] >= 10000:
#             print("Warning: Your query leads to too many results. Only 10,000 returned.")
        return self.df

    def extend(self, property_id: str, linkDirection: str, name: str, rowVerbose=False, colVerbose=False, limit=None,
               time_property=None, time=None, search=None, label=False, subclass=False, showid=False):
        self.count += 1
        self.dic[self.count] = {}
        self.dic[self.count]["name"] = name
        self.dic[self.count]["focus"] = self.focus
        self.dic[self.count]["property_id"] = property_id
        if linkDirection == 'forward':
            self.dic[self.count]["isSubject"] = False
        elif linkDirection == 'backward':
            self.dic[self.count]["isSubject"] = True
        else:
            self.count -= 1
            raise Exception("Fail to extend! LinkDirection should be either forward or backward.")
        self.dic[self.count]["limit"] = limit
        self.dic[self.count]["rowVerbose"] = rowVerbose
        self.dic[self.count]["colVerbose"] = colVerbose
        self.dic[self.count]['time_property'] = time_property
        self.dic[self.count]['time'] = time
        self.dic[self.count]['search'] = search
        self.dic[self.count]['label'] = label
        self.dic[self.count]['subclass'] = subclass
        self.dic[self.count]['showid'] = showid
        # subclass is specific to P31, when subclass is true, we get intance of all subclasses of the entity
        if rowVerbose or colVerbose:
            self.dic[self.count]["property_name_dic"], self.dic[self.count][
                "ref_dic"] = self.search_property_for_verbose()
        if time_property and time:
            self.time_property = time_property
            self.time = time
        if limit:
            self.limit = limit
        self.query_str = self.define_query_relation()

    def changeFocus(self, name="Entity ID"):
        self.focus = name
    
    def extendWithFunctionHelper(self, columns, func, name, param, dim):
        if dim == 1:
            if isinstance(columns, list):
                if param is not None:
                    if isinstance(param, list):
                        self.df[name] = self.df[columns].apply(lambda x: func(*x, *param), axis=1)
                    else:
                        self.df[name] = self.df[columns].apply(lambda x: func(*x, param), axis=1)
                else:
                    self.df[name] = self.df[columns].apply(lambda x: func(*x), axis=1)
            else:
                if param is not None:
                    if isinstance(param, list):
                        self.df[name] = self.df[columns].apply(lambda x: func(x, *param), axis=1)
                    else:
                        self.df[name] = self.df[columns].apply(lambda x: func(x, param), axis=1)
                else:
                    self.df[name] = self.df[columns].apply(func, axis=1)
        elif dim == 0:
            if not isinstance(columns, list):
                columns = [columns]
            if param is not None:
                if isinstance(param, list):
                    return self.df[columns].apply(lambda x: func(x, *param), axis=0)
                        #return func(self.df[columns], *param, axis=0)
                else:
                    return self.df[columns].apply(lambda x: func(x, param), axis=0)
                        #return func(self.df[columns], param, axis=0)
            else:
                return self.df[columns].apply(lambda x: func(x), axis=0)
                    #return func(self.df[columns], axis=0)
#             else:
#                 if param is not None:
#                     if isinstance(param, list):
                        
#                         #return func(self.df[columns], axis=0)
#                     else:
#                         return func(self.df[columns], axis=0)
#                 else:
#                     return func(self.df[columns], axis=0)
                
        else:
            if isinstance(columns, list):
                if param is not None:
                    if isinstance(param, list):
                        self.df = func(self.df, name, *columns, *param)
                    else:
                        self.df = func(self.df, name, *columns, param)
                else:
                    self.df = func(self.df, name, *columns)
            else:
                if param is not None:
                    if isinstance(param, list):
                        self.df = func(self.df, name, columns, *param)
                    else:
                        self.df = func(self.df, name, columns, param)
                else:
                    self.df = func(self.df, name, columns)
    
    def extendWithFunction(self, columns, func, name=None, param=None, dim=-1):
        if type(func) == str:
            if func.startswith('F'):
                try:
                    func_id = int(func[1:])
                    if func_id >= func_lib.func_num():
                        print("Not available.")
                    else:
                        return self.extendWithFunctionHelper(columns, func, name, param, dim)
                except:
                    raise Exception("Not a valid function id, a valid function id should be 'Fn', n is an integer.")
            else:
                raise Exception("Not a valid function id, a valid function id should be 'Fn', n is an integer.")
        else:
            return self.extendWithFunctionHelper(columns, func, name, param, dim)

    def define_query_relation(self):
        rdf_triple, time_filter, limit_statement = """""", """""", """"""
        if self.count < 1:
            return None
        focusChanges = 0
        for i in range(1, self.count + 1):
            if self.dic[i]["rowVerbose"] or self.dic[i]["colVerbose"]:
                if self.dic[i]["search"] is None and not self.dic[i]["isSubject"]:
                        rdf_triple += """OPTIONAL {"""
                if self.dic[i]["focus"] == "Entity ID":
#                     if self.dic[i]["search"] is None:
#                         rdf_triple += """OPTIONAL {"""
                    rdf_triple += """wd:""" + self.entity_id + """ p:""" + self.dic[i][
                        'property_id'] + """ ?statement_""" + str(i) + """. """ \
                                  + """?statement_""" + str(i) + """ ps:""" + self.dic[i][
                                      'property_id'] + """ ?""" + \
                                  self.dic[i]['name'] \
                                  + """_""" + self.dic[i]['property_id'] + """. """
                else:
                    rdf_triple += """?""" + self.dic[i]["focus"] + """ p:""" + self.dic[i][
                        'property_id'] + """ ?statement_""" + str(i) + """. """ \
                                  + """?statement_""" + str(i) + """ ps:""" + self.dic[i][
                                      'property_id'] + """ ?""" + \
                                  self.dic[i]['name'] \
                                  + """_""" + self.dic[i]['property_id'] + """. """
                for key, value in self.dic[i]["property_name_dic"].items():
                    rdf_triple += """OPTIONAL { """ + """?statement_""" + str(i) + """ pq:""" + str(key) \
                                  + """ ?""" + self.dic[i]['name'] + """_""" + value + """_""" + self.dic[i][
                                      'property_id'] + """_""" + str(key) + """.} """
                for key, value in self.dic[i]["ref_dic"].items():
                    rdf_triple += """OPTIONAL { ?statement_""" + str(
                        i) + """ prov:wasDerivedFrom ?refnode_""" + str(
                        i) + """. ?refnode_""" + str(i) \
                                  + """ pr:""" + str(key) + """ ?""" + self.dic[i]['name'] + """_ref_""" + \
                                  self.dic[i][
                                      'property_id'] + """_""" + str(key) + """.} """
                rdf_triple += """OPTIONAL { ?statement_""" + str(i) + """ wikibase:rank ?""" + self.dic[i][
                    'name'] + """_rank_""" + self.dic[i]['property_id'] + """_rank. } """
            # none-verbose version
            else:
                if self.dic[i]["focus"] == "Entity ID":
                    if self.dic[i]["isSubject"]:
                        if not self.dic[i]['subclass']:
                            rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                                'property_id'] + """ wdt:""" + self.dic[i][
                                              "property_id"] + """ wd:""" + self.entity_id + """. """
                        else:
                            if self.dic[i]['property_id'] == "P31":
                                rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                                    'property_id'] + """ wdt:""" + self.dic[i][
                                                  "property_id"] + """ ?subclasses. """
                                rdf_triple += """?subclasses wdt:P279+ wd:""" + self.entity_id + """. """
                    else:
                        if self.dic[i]["search"] is None:
                            rdf_triple += """OPTIONAL {"""
                        rdf_triple += """wd:""" + self.entity_id + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + \
                                      self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """. """
                else:
                    if self.dic[i]["isSubject"]:
                        if not self.dic[i]['subclass']:
                            rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                                'property_id'] + """ wdt:""" + self.dic[i]["property_id"] + """ ?""" + self.dic[i][
                                              'focus'] + """. """
                        else:
                            if self.dic[i]['property_id'] == "P31":
                                rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                                'property_id'] + """ wdt:""" + self.dic[i]["property_id"] + """ ?subclasses. """
                                rdf_triple += """?subclasses wdt:P279+ ?""" + self.dic[i]['focus'] + """. """
                    else:
                        if self.dic[i]["search"] is None:
                            rdf_triple += """OPTIONAL {"""
                        rdf_triple += """?""" + self.dic[i]['focus'] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                                          'property_id'] + """. """
            if not self.dic[i]["isSubject"]:
                if i < self.count and self.dic[i]["focus"] != self.dic[i + 1]["focus"] and self.dic[i]["search"] is None:
                    focusChanges += 1
                elif self.dic[i]["search"] is None:
                    rdf_triple += """} """
                
        for i in range(focusChanges):
            rdf_triple += """} """
        for i in range(1, self.count + 1):
            if self.dic[i]['search'] is not None and self.dic[i]["search"] != '!NA':
                if isinstance(self.dic[i]['search'], tuple):
                    if isinstance(self.dic[i]['search'][0], str):
                        rdf_triple += """FILTER (YEAR(?""" + self.dic[i]['name'] + """_""" + self.dic[i][
                            'property_id'] + """) >= """ + \
                                      self.dic[i]['search'][0] + """ && YEAR(?""" + self.dic[i]['name'] + \
                                      """_""" + self.dic[i]['property_id'] + """) <= """ + self.dic[i]['search'][
                                          1] + """) """
                    else:
                        rdf_triple += """FILTER (?""" + self.dic[i]['name'] + """_""" + self.dic[i]['property_id'] + \
                                      """ >= """ + str(self.dic[i]['search'][0]) + """ && ?""" + self.dic[i]['name'] + \
                                      """_""" + self.dic[i]['property_id'] + """ <= """ + str(
                            self.dic[i]['search'][1]) + """) """
                else:
                    rdf_triple += """FILTER (?""" + self.dic[i]['name'] + """_""" + self.dic[i][
                        'property_id'] + """ = """ + \
                                  """wd:""" + self.dic[i]['search'] + """) """
        if self.time_property is not None:
            time_filter = """?""" + self.dic[1]["name"] + """ p:""" + self.time_property + """ ?pubdateStatement.	
                          ?pubdateStatement ps:""" + self.time_property + """ ?date	
                          FILTER (YEAR(?date) = """ + self.time + """)"""
        if self.limit is not None:
            limit_statement = """LIMIT """ + str(self.limit)
        label_statement = """Service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }"""
        query = """SELECT DISTINCT"""
        for i in range(1, self.count + 1):
            if self.dic[i]["rowVerbose"] or self.dic[i]["colVerbose"]:
                query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id']
                if self.dic[i]["label"]:
                    query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """Label"""
                for key, value in self.dic[i]["property_name_dic"].items():
                    query += """ ?""" + self.dic[i]["name"] + """_""" + value + """_""" + self.dic[i][
                        'property_id'] + """_""" + str(key)
                for key, value in self.dic[i]["ref_dic"].items():
                    query += """ ?""" + self.dic[i]["name"] + """_ref_""" + self.dic[i]['property_id'] + """_""" + str(
                        key)
                query += """ ?""" + self.dic[i]["name"] + """_rank_""" + self.dic[i]['property_id'] + """_rank"""
            else:
                query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id']
                if self.dic[i]["label"]:
                    query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """Label"""
        query += """ WHERE {""" + rdf_triple + time_filter + label_statement + """} """ + limit_statement
        return query

    def search_property_for_verbose(self):
        property_to_name = {}
        ref_to_name = {}
        rdf_triple, time_filter, limit_statement = """""", """""", """"""
        if self.dic[self.count]["rowVerbose"] or self.dic[self.count]["colVerbose"]:
            for i in range(1, self.count):
                if self.dic[i]["focus"] == "Entity ID":
                    if self.dic[i]["isSubject"]:
                        rdf_triple += """?""" + self.dic[i]["name"] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ wd:""" + self.entity_id + """ ."""
                    else:
                        rdf_triple += """wd:""" + self.entity_id + """ wdt:""" + self.dic[i]["property_id"] + """ ?""" + \
                                      self.dic[i]["name"] + """ ."""
                else:
                    last = self.dic[i]["focus"].rfind('_')
                    focus = self.dic[i]["focus"][:last]
                    if self.dic[i]["isSubject"]:
                        rdf_triple += """?""" + self.dic[i]["name"] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + focus + """ ."""
                    else:
                        rdf_triple += """?""" + focus + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + self.dic[i]["name"] + """ ."""
            if self.dic[self.count]["focus"] == "Entity ID":
                rdf_triple += """wd:""" + self.entity_id + """ p:""" + self.dic[self.count][
                    'property_id'] + """ ?statement.""" + \
                              """?statement """ + """ps:""" + self.dic[self.count]['property_id'] + """ ?item.""" + \
                              """?statement """ + """?pq """ + """?obj.""" + \
                              """?qual wikibase:qualifier ?pq.""" + \
                              """OPTIONAL{ ?statement prov:wasDerivedFrom ?refnode. ?refnode ?pr ?r.}"""
            else:
                last = self.dic[self.count]["focus"].rfind('_')
                focus = self.dic[self.count]["focus"][:last]
                rdf_triple += """?""" + focus + """ p:""" + self.dic[self.count][
                    'property_id'] + """ ?statement.""" + \
                              """?statement """ + """ps:""" + self.dic[self.count]['property_id'] + """ ?item.""" + \
                              """?statement """ + """?pq """ + """?obj.""" + \
                              """?qual wikibase:qualifier ?pq.""" + \
                              """OPTIONAL{ ?statement prov:wasDerivedFrom ?refnode. ?refnode ?pr ?r.}"""
        if self.time_property is not None:
            time_filter = """?""" + self.dic[1]["name"] + """ p:""" + self.time_property + """ ?pubdateStatement.	
                                  ?pubdateStatement ps:""" + self.time_property + """ ?date	
                                  FILTER (YEAR(?date) = """ + self.time + """)"""
        if self.limit is not None:
            limit_statement = """LIMIT """ + str(self.limit)
        label_statement = """Service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }"""
        query = """SELECT DISTINCT """
        if self.dic[self.count]["rowVerbose"] or self.dic[self.count]["colVerbose"]:
            query += """?item""" + """ ?qual""" + """ ?qualLabel""" + """ ?obj """ + """?pr ?prLabel"""
            query += """ WHERE {""" + rdf_triple + time_filter + label_statement + """} """ + limit_statement
            query_result = get_results(endpoint_url, query)
            for result in query_result['results']['bindings']:
                if 'qual' in result:
                    property_to_name[result['qual']['value'].split('/')[-1]] = result['qualLabel']['value'].replace(' ',
                                                                                                                    '_')
                if 'pr' in result:
                    ref_to_name[result['pr']['value'].split('/')[-1]] = result['prLabel']['value'].replace(' ', '_')
        else:
            query += """?""" + self.dic[self.count]["name"] + """ """
        return property_to_name, ref_to_name

    def __str__(self):
        return str(self.df)

    def __getattr__(self, col_name):
        if col_name in self.df.columns:
            return self.df[col_name]
        else:
            print(col_name + " has not been found.")
            return None


def createRelation(entity_id: str, property_id=None, isSubject=None, rowVerbose=None, colVerbose=None,
                   time_property=None, time=None, name=None, label=False, limit=None, subclass=False):
    if property_id and not name:
        print("Please specify the name of the first column")
        return None
    return Relation(entity_id, property_id, isSubject, rowVerbose, colVerbose, time_property, time, name, label, limit, subclass)

def get_Firstname(name: str):
    return name.split(' ')[0]

def get_Lastname(name: str):
    return name.split(' ')[-1]

def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]
    return text


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def get_name(id: str):
    query = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 	
                PREFIX wd: <http://www.wikidata.org/entity/> 	
                select  *	
                where {	
                wd:""" + id + """ rdfs:label ?label .	
                FILTER (langMatches( lang(?label), "EN" ) )	
                } 	
                LIMIT 1"""
    results = get_results(endpoint_url, query)
    result = ''
    for res in results["results"]["bindings"]:
        result = res['label']['value']
    return result

In [4]:
func_lib.func_num()

4

In [5]:
def get_entity_id(url):
    return url.split('/')[-1]

func_lib.add_func(get_entity_id)

func_lib.func_num()

5

In [105]:
import requests
from bs4 import BeautifulSoup
print("100 Singers")
r = createRelation("Q177220")
r.extend("P106", 'backward', "Singer", limit=100, label=True)
r.changeFocus("Singer_P106")
r.extend("P569", 'forward', "Date_of_Birth")
r.extend("P570", 'forward', "Date_of_Death")
r.query()

def age_cal(birth, death):
    if birth == 'NA':
        return 'NA'
    if death == 'NA':  
        date_time_obj = datetime.datetime.strptime(birth[:4], '%Y')
        return datetime.datetime.now().year - date_time_obj.year
    else:
        return datetime.datetime.strptime(death[:4], '%Y').year - datetime.datetime.strptime(birth[:4], '%Y').year

r.extendWithFunction(['Date_of_Birth_P569', 'Date_of_Death_P570'], age_cal, 'Age', dim=1)
#r.extendWithFunction('Singer_P106Label',textual_summary,'summary')
# for index, row in r.df.iterrows():
#     print(row['summary'])
r.df

100 Singers


Unnamed: 0,Entity ID,Singer_P106,Singer_P106Label,Date_of_Birth_P569,Date_of_Death_P570,Basic ID,Age
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q2680,Bruce Willis,1955-03-19T00:00:00Z,,Q177220,65
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q8741,Rüdiger Skoczowsky,1986-01-01T00:00:00Z,,Q177220,34
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q3008,Mike Oldfield,1953-05-15T00:00:00Z,,Q177220,67
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10681,Alan Stivell,1944-01-06T00:00:00Z,,Q177220,76
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q5950,James Brown,1933-05-03T00:00:00Z,2006-12-25T00:00:00Z,Q177220,73
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1453,Alexandra Stan,1989-06-10T00:00:00Z,,Q177220,31
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1225,Bruce Springsteen,1949-09-23T00:00:00Z,,Q177220,71
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10695,Dan Ar Braz,1949-01-15T00:00:00Z,,Q177220,71
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q392,Bob Dylan,1941-05-24T00:00:00Z,,Q177220,79
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,Rosa Valetti,1876-01-25T00:00:00Z,1937-12-10T00:00:00Z,Q177220,61


In [6]:
func_lib.func_num()

7

In [95]:
import requests, json

film_gross = pd.read_csv('boxoffice.csv')

def map(x):
    if x == 'NA':
        return 'NA'
    info = requests.get('http://ipinfo.io/json').json()
    url = 'https://www.google.com/maps/dir/'
    url += info['city'] + '/'
    url += x
    url.replace(' ', '+')
    url.replace('&', '%26')
    r = requests.get(url)
    return r.url


def share(df, name, film, director, film_gross):
    #film_gross = pd.read_csv('boxoffice.csv')
    film_gross.columns = ['rank', film, 'studio', 'gross', 'year']
    film_gross[film] = film_gross[film].astype(str)
    df[film] = df[film].astype(str)
    new_df = film_gross.merge(df, on=film)  # merge with external data
    new_df.drop_duplicates(subset=['rank'], inplace=True)
    sum_df = new_df.groupby([director]).sum()
    sum_df.columns = ['rk', 'total', 'y']
    final_df = new_df.merge(sum_df, on=director)
    final_df[name] = final_df.apply(lambda x: '%.2f%%' % (x['gross'] / x['total'] * 100), axis=1)
    sel = [str(x) for x in df.columns]
    sel.append(name)
    return final_df[sel]

In [96]:
r3 = createRelation('Q2526255')
r3.extend('P106', 'backward', 'Directors', label=True)
r3.changeFocus('Directors_P106')
r3.extend('P57', 'backward', 'Films', label=True)
r3.changeFocus('Films_P57')
r3.extend('P577', 'forward', 'Time', search=('2015', '2020'))
r3.query()
r3.extendWithFunction(columns=['Films_P57Label', 'Directors_P106Label'],
                      func=share, name='fraction', param=film_gross)
r3.df

Unnamed: 0,Entity ID,Directors_P106,Directors_P106Label,Films_P57,Films_P57Label,Time_P577,Basic ID,fraction
0,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q7383978,Ryan Coogler,http://www.wikidata.org/entity/Q23780734,Black Panther,2018-02-15T00:00:00Z,Q2526255,86.45%
1,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q7383978,Ryan Coogler,http://www.wikidata.org/entity/Q18822138,Creed,2015-11-25T00:00:00Z,Q2526255,13.55%
2,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q18018415,Anthony Russo,http://www.wikidata.org/entity/Q23780914,Avengers: Infinity War,2018-04-25T00:00:00Z,Q2526255,62.45%
3,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q18018415,Anthony Russo,http://www.wikidata.org/entity/Q18407657,Captain America: Civil War,2016-04-27T00:00:00Z,Q2526255,37.55%
4,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q5145625,Colin Trevorrow,http://www.wikidata.org/entity/Q3512046,Jurassic World,2015-05-29T00:00:00Z,Q2526255,99.31%
5,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q5145625,Colin Trevorrow,http://www.wikidata.org/entity/Q21098696,The Book of Henry,2017-09-21T00:00:00Z,Q2526255,0.69%
6,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q310960,Brad Bird,http://www.wikidata.org/entity/Q24832112,Incredibles 2,2018-06-05T00:00:00Z,Q2526255,86.47%
7,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q310960,Brad Bird,http://www.wikidata.org/entity/Q7820305,Tomorrowland,2015-05-21T00:00:00Z,Q2526255,13.53%
8,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q328723,Andrew Stanton,http://www.wikidata.org/entity/Q9321426,Finding Dory,2016-06-16T00:00:00Z,Q2526255,100.00%
9,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q298025,Joss Whedon,http://www.wikidata.org/entity/Q14171368,Avengers: Age of Ultron,2015-04-22T00:00:00Z,Q2526255,100.00%


In [5]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)
r3.df

In [97]:
r4 = createRelation('Q33999')
r4.extend('P106', 'backward', 'Actors', label=True)
r4.changeFocus('Actors_P106')
r4.extend('P161', 'backward', 'Films', label=True)
r4.changeFocus('Films_P161')
r4.extend('P577', 'forward', 'Time', search=('2010', '2020'), limit=2000)
r4.query()
r4.extendWithFunction(columns=['Films_P161Label', 'Actors_P106Label'], func=share, name='fraction', param=film_gross)
r4.df

Unnamed: 0,Entity ID,Actors_P106,Actors_P106Label,Films_P161,Films_P161Label,Time_P577,Basic ID,fraction
0,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q23780914,Avengers: Infinity War,2018-04-25T00:00:00Z,Q33999,35.63%
1,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q14171368,Avengers: Age of Ultron,2015-04-22T00:00:00Z,Q33999,24.10%
2,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q5887360,Guardians of the Galaxy,2014-08-28T00:00:00Z,Q33999,17.49%
3,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q326114,True Grit,2010-01-01T00:00:00Z,Q33999,8.99%
4,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q326526,Wall Street: Money Never Sleeps,2010-05-14T00:00:00Z,Q33999,2.75%
5,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q27921157,Sicario: Day of the Soldado,2018-01-01T00:00:00Z,Q33999,2.62%
6,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q17337292,Sicario,2015-09-18T00:00:00Z,Q33999,2.46%
7,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q27751,Gangster Squad,2013-01-01T00:00:00Z,Q33999,2.42%
8,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q18516636,"Hail, Caesar!",2016-02-01T00:00:00Z,Q33999,1.60%
9,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q41396,Josh Brolin,http://www.wikidata.org/entity/Q4026942,Labor Day,2013-01-01T00:00:00Z,Q33999,0.70%


In [22]:
r6 = createRelation("Q11696")
r6.extend("P39", True, "Presidents")
r6.changeFocus("Presidents_P39")
r6.extend("P19", False, "place_president", label=True)
r6.query()
r6.extend("P26", False, "Spouse")
r6.changeFocus("Spouse_P26")
r6.extend("P19", False, "place_spouse", label=True)
r6.query()
r6.extendWithFunction("place_president_P19Label", map, "to_president")
r6.extendWithFunction("place_spouse_P19Label", map, "to_spouse")
r6.df

Unnamed: 0,Entity ID,Presidents_P39,place_president_P19,place_president_P19Label,Spouse_P26,place_spouse_P19,place_spouse_P19Label,to_president,to_spouse
0,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q608408,,,,,,,
1,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q1052076,,,,,,,
2,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q2719854,,,,,,,
3,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q3438922,,,,,,,
4,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q3545001,,,,,,,
...,...,...,...,...,...,...,...,...,...
65,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11813,http://www.wikidata.org/entity/Q1375967,Port Conway,http://www.wikidata.org/entity/Q233638,http://www.wikidata.org/entity/Q502257,Guilford County,https://www.google.com/maps/dir/Ann%20Arbor/Po...,https://www.google.com/maps/dir/Ann%20Arbor/Gu...
66,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11869,http://www.wikidata.org/entity/Q337348,Charles City County,http://www.wikidata.org/entity/Q255190,http://www.wikidata.org/entity/Q962499,Morristown,https://www.google.com/maps/dir/Ann%20Arbor/Ch...,https://www.google.com/maps/dir/Ann%20Arbor/Mo...
67,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q9640,http://www.wikidata.org/entity/Q975048,Stonewall,http://www.wikidata.org/entity/Q233648,http://www.wikidata.org/entity/Q3476511,Karnack,https://www.google.com/maps/dir/Ann%20Arbor/St...,https://www.google.com/maps/dir/Ann%20Arbor/Ka...
68,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q35498,http://www.wikidata.org/entity/Q1393235,Fairfield,http://www.wikidata.org/entity/Q2306099,http://www.wikidata.org/entity/Q990985,Culpeper,https://www.google.com/maps/dir/Ann%20Arbor/Fa...,https://www.google.com/maps/dir/Ann%20Arbor/Cu...


In [115]:
def growth_rate(df, name, gdp, time):
    df.sort_values(by=[time], ascending=False, inplace=True)
    df.reset_index(drop=True, inplace=True)
    df[name] = df[gdp]
    for i in range(len(df)-1):
        df.loc[i, name] = '%.2f%%' % ((float(df.loc[i, gdp]) - float(df.loc[i+1, gdp])) / float(df.loc[i+1, gdp]) * 100)
    df.loc[len(df)-1, name] = 'NA'
    return df

r = createRelation("Q458")
r.extend("P2131", 'forward', "GDP", rowVerbose=True, colVerbose=True)
r.query()
r.extendWithFunction(['GDP_P2131', 'GDP_point_in_time_P2131_P585'], growth_rate, 'growth_rate')

In [116]:
r.df

Unnamed: 0,Entity ID,GDP_P2131,GDP_rank_P2131_rank,GDP_point_in_time_P2131_P585,GDP_ref_P2131_P248,GDP_ref_P2131_P813,GDP_ref_P2131_P275,GDP_ref_P2131_P854,Basic ID,growth_rate
0,http://www.wikidata.org/entity/Q458,17277697660475.0,http://wikiba.se/ontology#PreferredRank,2017-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,4.77%
1,http://www.wikidata.org/entity/Q458,16491855791194.9,http://wikiba.se/ontology#NormalRank,2016-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,0.46%
2,http://www.wikidata.org/entity/Q458,16416670356766.4,http://wikiba.se/ontology#NormalRank,2015-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,-11.91%
3,http://www.wikidata.org/entity/Q458,18635535561984.7,http://wikiba.se/ontology#NormalRank,2014-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,3.36%
4,http://www.wikidata.org/entity/Q458,18029679886231.6,http://wikiba.se/ontology#NormalRank,2013-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,4.26%
5,http://www.wikidata.org/entity/Q458,17292774157162.6,http://wikiba.se/ontology#NormalRank,2012-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,-5.76%
6,http://www.wikidata.org/entity/Q458,18350556736022.0,http://wikiba.se/ontology#NormalRank,2011-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,8.02%
7,http://www.wikidata.org/entity/Q458,16987391593239.7,http://wikiba.se/ontology#NormalRank,2010-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,-0.67%
8,http://www.wikidata.org/entity/Q458,17102490228101.1,http://wikiba.se/ontology#NormalRank,2009-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,-10.63%
9,http://www.wikidata.org/entity/Q458,19137013228474.8,http://wikiba.se/ontology#NormalRank,2008-01-01T00:00:00Z,http://www.wikidata.org/entity/Q21540096,2018-10-23T00:00:00Z,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=EU,Q458,7.55%


In [117]:
r2 = createRelation('Q30')
r2.extend('P6', 'forward', 'president', rowVerbose=True, colVerbose=True, label=True)
r2.extend('P2131', 'forward', 'gdp', rowVerbose=True, colVerbose=True)
r2.query()
r2.df

Unnamed: 0,Entity ID,president_P6,president_rank_P6_rank,president_end_time_P6_P582,president_start_time_P6_P580,gdp_P2131,gdp_rank_P2131_rank,gdp_point_in_time_P2131_P585,gdp_ref_P2131_P813,gdp_ref_P2131_P248,gdp_ref_P2131_P275,gdp_ref_P2131_P854,Basic ID
0,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q12312,http://wikiba.se/ontology#NormalRank,1857-03-04T00:00:00Z,1853-03-04T00:00:00Z,6539299000000,http://wikiba.se/ontology#NormalRank,1992-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
1,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q12312,http://wikiba.se/ontology#NormalRank,1857-03-04T00:00:00Z,1853-03-04T00:00:00Z,12274928000000,http://wikiba.se/ontology#NormalRank,2004-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
2,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q12312,http://wikiba.se/ontology#NormalRank,1857-03-04T00:00:00Z,1853-03-04T00:00:00Z,5252629000000,http://wikiba.se/ontology#NormalRank,1988-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
3,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q34836,http://wikiba.se/ontology#NormalRank,1877-03-04T00:00:00Z,1869-03-04T00:00:00Z,18624475000000,http://wikiba.se/ontology#NormalRank,2016-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
4,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q12312,http://wikiba.se/ontology#NormalRank,1857-03-04T00:00:00Z,1853-03-04T00:00:00Z,743700000000,http://wikiba.se/ontology#NormalRank,1965-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
5,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q12312,http://wikiba.se/ontology#NormalRank,1857-03-04T00:00:00Z,1853-03-04T00:00:00Z,8608515000000,http://wikiba.se/ontology#NormalRank,1997-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
6,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q12312,http://wikiba.se/ontology#NormalRank,1857-03-04T00:00:00Z,1853-03-04T00:00:00Z,3344991000000,http://wikiba.se/ontology#NormalRank,1982-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
7,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q34836,http://wikiba.se/ontology#NormalRank,1877-03-04T00:00:00Z,1869-03-04T00:00:00Z,11510670000000,http://wikiba.se/ontology#NormalRank,2003-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
8,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q12312,http://wikiba.se/ontology#NormalRank,1857-03-04T00:00:00Z,1853-03-04T00:00:00Z,942500000000,http://wikiba.se/ontology#NormalRank,1968-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30
9,http://www.wikidata.org/entity/Q30,http://www.wikidata.org/entity/Q34836,http://wikiba.se/ontology#NormalRank,1877-03-04T00:00:00Z,1869-03-04T00:00:00Z,1548825000000,http://wikiba.se/ontology#NormalRank,1974-01-01T00:00:00Z,2018-10-18T00:00:00Z,http://www.wikidata.org/entity/Q21540096,http://www.wikidata.org/entity/Q20007257,https://data.worldbank.org/indicator/NY.GDP.MKTP.CD?locations=US,Q30


In [131]:
# def match(df, name, gtime, start, end):
#     df = df.query(str(str(gtime) >= str(start)))
#     return df

r2.df['gdp_point_in_time_P2131_P585'] = r2.df['gdp_point_in_time_P2131_P585'][:4]
r2.df['president_start_time_P6_P580'] = r2.df['president_start_time_P6_P580'][:4]
r2.df['president_end_time_P6_P582'] = r2.df['president_end_time_P6_P582'][:4]
#r2.extendWithFunction(['gdp_point_in_time_P2131_P585', 'president_start_time_P6_P580', 'president_end_time_P6_P582'], match, 'm')


def growth(df, name, pres, gdp):
    df = df[[pres, gdp]]
