# Welcome to Jupyter!

This repo contains an introduction to [Jupyter](https://jupyter.org) and [IPython](https://ipython.org).

Outline of some basics:

* [Notebook Basics](../examples/Notebook/Notebook%20Basics.ipynb)
* [IPython - beyond plain python](../examples/IPython%20Kernel/Beyond%20Plain%20Python.ipynb)
* [Markdown Cells](../examples/Notebook/Working%20With%20Markdown%20Cells.ipynb)
* [Rich Display System](../examples/IPython%20Kernel/Rich%20Output.ipynb)
* [Custom Display logic](../examples/IPython%20Kernel/Custom%20Display%20Logic.ipynb)
* [Running a Secure Public Notebook Server](../examples/Notebook/Running%20the%20Notebook%20Server.ipynb#Securing-the-notebook-server)
* [How Jupyter works](../examples/Notebook/Multiple%20Languages%2C%20Frontends.ipynb) to run code in different languages.

You can also get this tutorial and run it on your laptop:

    git clone https://github.com/ipython/ipython-in-depth

Install IPython and Jupyter:

with [conda](https://www.anaconda.com/download):

    conda install ipython jupyter

with pip:

    # first, always upgrade pip!
    pip install --upgrade pip
    pip install --upgrade ipython jupyter

Start the notebook in the tutorial directory:

    cd ipython-in-depth
    jupyter notebook

In [11]:
import sys
#!{sys.executable} -m pip install SPARQLWrapper
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd
import numpy as np
import datetime
import func_lib

endpoint_url = "https://query.wikidata.org/sparql"
item = "item"


class Relation:
    """
    The class returned when createRelation is called.
    It contains string field with query.
    We call Relation.query when we need to do the query.
    """

    def __init__(self, entity_id: str, property_id: str, isSubject: bool, rowVerbose: bool,
                 colVerbose: bool, time_property: str, time: str, name: str, label: bool, limit=10000):
        self.entity_id = entity_id
        self.query_str = ""
        self.dic = {}
        self.result_dic = {"Entity ID": []}
        self.df = pd.DataFrame()
        self.count = 0
        self.time_property = time_property
        self.time = time
        self.limit = limit
        self.focus = "Entity ID"
        if property_id:
            self.extend(property_id, isSubject, name, rowVerbose, colVerbose, limit, time_property, time, label)

    def generate_html(self, name: str):
        html = (self.df).to_html()
        text_file = open(name, "w", encoding='utf-8')
        text_file.write(html)
        text_file.close()

    def query(self, require=None):
        if self.query_str == "":
            self.result_dic = {"Entity ID": ['http://www.wikidata.org/entity/' + str(self.entity_id)]}
            return self.result_dic
        results = get_results(endpoint_url, self.query_str)
        result_dict = {"Entity ID": ['http://www.wikidata.org/entity/' + str(self.entity_id)]}
        for i in range(1, self.count + 1):
            result_dict[self.dic[i]["name"] + '_' + self.dic[i]['property_id']] = []
            if self.dic[i]["colVerbose"]:
                result_dict[self.dic[i]["name"] + '_rank_' + self.dic[i]['property_id'] + '_rank'] = []
                for key, value in self.dic[i]["property_name_dic"].items():
                    result_dict[
                        self.dic[i]["name"] + "_" + value + '_' + self.dic[i]['property_id'] + '_' + str(key)] = []
                for key, value in self.dic[i]["ref_dic"].items():
                    result_dict[self.dic[i]["name"] + "_ref_" + self.dic[i]['property_id'] + '_' + str(key)] = []

            if self.dic[i]["label"]:
                result_dict[self.dic[i]["name"] + '_' + self.dic[i]['property_id'] + 'Label'] = []

        for result in results['results']['bindings']:
            for key, value in result_dict.items():
                if key in result.keys():
                    result_dict[key].append(result[key]['value'])
                else:
                    result_dict[key].append('NA')
        result_dict["Entity ID"] = ['http://www.wikidata.org/entity/' + str(self.entity_id)] * len(
            result_dict[self.dic[self.count]["name"] + '_' + self.dic[self.count]["property_id"]])
        self.result_dic = result_dict
        self.df = pd.DataFrame.from_dict(self.result_dic)
        for i in range(1, self.count + 1):
            if self.dic[i]["colVerbose"] and not self.dic[i]["rowVerbose"]:
                col = self.dic[i]['name'] + '_rank_' + self.dic[i]['property_id'] + '_rank'
                if any(self.df[col] == 'http://wikiba.se/ontology#PreferredRank'):
                    self.df = self.df.loc[self.df[col] == 'http://wikiba.se/ontology#PreferredRank']
                else:
                    self.df = self.df.loc[self.df[col] == 'http://wikiba.se/ontology#NormalRank']
#         if require is not None:
#             for r in require:
#                 self.df = self.df.loc[self.df[r] != 'NA']
        self.df = pd.DataFrame(data=self.df)
#         if self.df.shape[0] >= 10000:
#             print("Warning: Your query leads to too many results. Only 10,000 returned.")
        return self.df

    def extend(self, property_id: str, isSubject: bool, name: str, rowVerbose=False, colVerbose=False, limit=None,
               time_property=None, time=None, search=None, label=False):
        self.count += 1
        self.dic[self.count] = {}
        self.dic[self.count]["name"] = name
        self.dic[self.count]["focus"] = self.focus
        self.dic[self.count]["property_id"] = property_id
        self.dic[self.count]["isSubject"] = isSubject
        self.dic[self.count]["limit"] = limit
        self.dic[self.count]["rowVerbose"] = rowVerbose
        self.dic[self.count]["colVerbose"] = colVerbose
        self.dic[self.count]['time_property'] = time_property
        self.dic[self.count]['time'] = time
        self.dic[self.count]['search'] = search
        self.dic[self.count]['label'] = label
        if rowVerbose or colVerbose:
            self.dic[self.count]["property_name_dic"], self.dic[self.count][
                "ref_dic"] = self.search_property_for_verbose()
        if time_property and time:
            self.time_property = time_property
            self.time = time
        if limit:
            self.limit = limit
        self.query_str = self.define_query_relation()

    def changeFocus(self, name="Entity ID"):
        self.focus = name
    
    def extendWithFunction(self, objcolumn, func, name, schema=False):
        if schema:
            if isinstance(objcolumn, list):
                self.df = func(self.df, name, *objcolumn)
            else:
                self.df = func(self.df, name, objcolumn)
        else:
            if type(func) == str:
                if func.startswith('F'):
                    try:
                        func_id = int(func[1:])
                        if func_id >= func_lib.func_num():
                            print("Not available.")
                        else:
                            if isinstance(objcolumn, list):
                                self.df[name] = self.df[objcolumn].apply(lambda x: func_lib.func_list[func_id](*x), axis=1)
                            else:
                                self.df[name] = self.df[objcolumn].apply(func_lib.func_list[func_id])
                    except:
                        raise Exception("Not a valid function id, a valid function id should be 'Fn', n is an integer.")
                else:
                    raise Exception("Not a valid function id, a valid function id should be 'Fn', n is an integer.")
            else:
                if isinstance(objcolumn, list):
                    self.df[name] = self.df[objcolumn].apply(lambda x: func(*x), axis=1)
                else:
                    self.df[name] = self.df[objcolumn].apply(func)

    def define_query_relation(self):
        rdf_triple, time_filter, limit_statement = """""", """""", """"""
        if self.count < 1:
            return None
        focusChanges = 0
        for i in range(1, self.count + 1):
            if self.dic[i]["rowVerbose"] or self.dic[i]["colVerbose"]:
                if self.dic[i]["search"] is None and not self.dic[i]["isSubject"]:
                        rdf_triple += """OPTIONAL {"""
                if self.dic[i]["focus"] == "Entity ID":
#                     if self.dic[i]["search"] is None:
#                         rdf_triple += """OPTIONAL {"""
                    rdf_triple += """wd:""" + self.entity_id + """ p:""" + self.dic[i][
                        'property_id'] + """ ?statement_""" + str(i) + """. """ \
                                  + """?statement_""" + str(i) + """ ps:""" + self.dic[i][
                                      'property_id'] + """ ?""" + \
                                  self.dic[i]['name'] \
                                  + """_""" + self.dic[i]['property_id'] + """. """
                else:
                    rdf_triple += """?""" + self.dic[i]["focus"] + """ p:""" + self.dic[i][
                        'property_id'] + """ ?statement_""" + str(i) + """. """ \
                                  + """?statement_""" + str(i) + """ ps:""" + self.dic[i][
                                      'property_id'] + """ ?""" + \
                                  self.dic[i]['name'] \
                                  + """_""" + self.dic[i]['property_id'] + """. """
                for key, value in self.dic[i]["property_name_dic"].items():
                    rdf_triple += """OPTIONAL { """ + """?statement_""" + str(i) + """ pq:""" + str(key) \
                                  + """ ?""" + self.dic[i]['name'] + """_""" + value + """_""" + self.dic[i][
                                      'property_id'] + """_""" + str(key) + """.} """
                for key, value in self.dic[i]["ref_dic"].items():
                    rdf_triple += """OPTIONAL { ?statement_""" + str(
                        i) + """ prov:wasDerivedFrom ?refnode_""" + str(
                        i) + """. ?refnode_""" + str(i) \
                                  + """ pr:""" + str(key) + """ ?""" + self.dic[i]['name'] + """_ref_""" + \
                                  self.dic[i][
                                      'property_id'] + """_""" + str(key) + """.} """
                rdf_triple += """OPTIONAL { ?statement_""" + str(i) + """ wikibase:rank ?""" + self.dic[i][
                    'name'] + """_rank_""" + self.dic[i]['property_id'] + """_rank. } """
            # none-verbose version
            else:
                if self.dic[i]["focus"] == "Entity ID":
                    if self.dic[i]["isSubject"]:
#                         if self.dic[i]["search"] is None:
#                             rdf_triple += """OPTIONAL {"""
                        rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                            'property_id'] + """ wdt:""" + self.dic[i][
                                          "property_id"] + """ wd:""" + self.entity_id + """. """
                    else:
                        if self.dic[i]["search"] is None:
                            rdf_triple += """OPTIONAL {"""
                        rdf_triple += """wd:""" + self.entity_id + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + \
                                      self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """. """
                else:
                    if self.dic[i]["isSubject"]:
#                         if self.dic[i]["search"] is None:
#                             rdf_triple += """OPTIONAL {"""
                        rdf_triple += """?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                            'property_id'] + """ wdt:""" + self.dic[i]["property_id"] + """ ?""" + self.dic[i][
                                          'focus'] + """. """
                    else:
                        if self.dic[i]["search"] is None:
                            rdf_triple += """OPTIONAL {"""
                        rdf_triple += """?""" + self.dic[i]['focus'] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i][
                                          'property_id'] + """. """
            if not self.dic[i]["isSubject"]:
                if i < self.count and self.dic[i]["focus"] != self.dic[i + 1]["focus"] and self.dic[i]["search"] is None:
                    focusChanges += 1
                elif self.dic[i]["search"] is None:
                    rdf_triple += """} """
        for i in range(focusChanges):
            rdf_triple += """} """
        for i in range(1, self.count + 1):
            if self.dic[i]['search'] is not None and self.dic[i]["search"] != '!NA':
                if isinstance(self.dic[i]['search'], tuple):
                    if isinstance(self.dic[i]['search'][0], str):
                        rdf_triple += """FILTER (YEAR(?""" + self.dic[i]['name'] + """_""" + self.dic[i][
                            'property_id'] + """) >= """ + \
                                      self.dic[i]['search'][0] + """ && YEAR(?""" + self.dic[i]['name'] + \
                                      """_""" + self.dic[i]['property_id'] + """) <= """ + self.dic[i]['search'][
                                          1] + """) """
                    else:
                        rdf_triple += """FILTER (?""" + self.dic[i]['name'] + """_""" + self.dic[i]['property_id'] + \
                                      """ >= """ + str(self.dic[i]['search'][0]) + """ && ?""" + self.dic[i]['name'] + \
                                      """_""" + self.dic[i]['property_id'] + """ <= """ + str(
                            self.dic[i]['search'][1]) + """) """
                else:
                    rdf_triple += """FILTER (?""" + self.dic[i]['name'] + """_""" + self.dic[i][
                        'property_id'] + """ = """ + \
                                  """wd:""" + self.dic[i]['search'] + """) """
        if self.time_property is not None:
            time_filter = """?""" + self.dic[1]["name"] + """ p:""" + self.time_property + """ ?pubdateStatement.	
                          ?pubdateStatement ps:""" + self.time_property + """ ?date	
                          FILTER (YEAR(?date) = """ + self.time + """)"""
        if self.limit is not None:
            limit_statement = """LIMIT """ + str(self.limit)
        label_statement = """Service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }"""
        query = """SELECT DISTINCT"""
        for i in range(1, self.count + 1):
            if self.dic[i]["rowVerbose"] or self.dic[i]["colVerbose"]:
                query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id']
                if self.dic[i]["label"]:
                    query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """Label"""
                for key, value in self.dic[i]["property_name_dic"].items():
                    query += """ ?""" + self.dic[i]["name"] + """_""" + value + """_""" + self.dic[i][
                        'property_id'] + """_""" + str(key)
                for key, value in self.dic[i]["ref_dic"].items():
                    query += """ ?""" + self.dic[i]["name"] + """_ref_""" + self.dic[i]['property_id'] + """_""" + str(
                        key)
                query += """ ?""" + self.dic[i]["name"] + """_rank_""" + self.dic[i]['property_id'] + """_rank"""
            else:
                query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id']
                if self.dic[i]["label"]:
                    query += """ ?""" + self.dic[i]["name"] + """_""" + self.dic[i]['property_id'] + """Label"""
        query += """ WHERE {""" + rdf_triple + time_filter + label_statement + """} """ + limit_statement
        return query

    def search_property_for_verbose(self):
        property_to_name = {}
        ref_to_name = {}
        rdf_triple, time_filter, limit_statement = """""", """""", """"""
        if self.dic[self.count]["rowVerbose"] or self.dic[self.count]["colVerbose"]:
            for i in range(1, self.count):
                if self.dic[i]["focus"] == "Entity ID":
                    if self.dic[i]["isSubject"]:
                        rdf_triple += """?""" + self.dic[i]["name"] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ wd:""" + self.entity_id + """ ."""
                    else:
                        rdf_triple += """wd:""" + self.entity_id + """ wdt:""" + self.dic[i]["property_id"] + """ ?""" + \
                                      self.dic[i]["name"] + """ ."""
                else:
                    last = self.dic[i]["focus"].rfind('_')
                    focus = self.dic[i]["focus"][:last]
                    if self.dic[i]["isSubject"]:
                        rdf_triple += """?""" + self.dic[i]["name"] + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + focus + """ ."""
                    else:
                        rdf_triple += """?""" + focus + """ wdt:""" + self.dic[i][
                            "property_id"] + """ ?""" + self.dic[i]["name"] + """ ."""
            if self.dic[self.count]["focus"] == "Entity ID":
                rdf_triple += """wd:""" + self.entity_id + """ p:""" + self.dic[self.count][
                    'property_id'] + """ ?statement.""" + \
                              """?statement """ + """ps:""" + self.dic[self.count]['property_id'] + """ ?item.""" + \
                              """?statement """ + """?pq """ + """?obj.""" + \
                              """?qual wikibase:qualifier ?pq.""" + \
                              """OPTIONAL{ ?statement prov:wasDerivedFrom ?refnode. ?refnode ?pr ?r.}"""
            else:
                last = self.dic[self.count]["focus"].rfind('_')
                focus = self.dic[self.count]["focus"][:last]
                rdf_triple += """?""" + focus + """ p:""" + self.dic[self.count][
                    'property_id'] + """ ?statement.""" + \
                              """?statement """ + """ps:""" + self.dic[self.count]['property_id'] + """ ?item.""" + \
                              """?statement """ + """?pq """ + """?obj.""" + \
                              """?qual wikibase:qualifier ?pq.""" + \
                              """OPTIONAL{ ?statement prov:wasDerivedFrom ?refnode. ?refnode ?pr ?r.}"""
        if self.time_property is not None:
            time_filter = """?""" + self.dic[1]["name"] + """ p:""" + self.time_property + """ ?pubdateStatement.	
                                  ?pubdateStatement ps:""" + self.time_property + """ ?date	
                                  FILTER (YEAR(?date) = """ + self.time + """)"""
        if self.limit is not None:
            limit_statement = """LIMIT """ + str(self.limit)
        label_statement = """Service wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }"""
        query = """SELECT DISTINCT """
        if self.dic[self.count]["rowVerbose"] or self.dic[self.count]["colVerbose"]:
            query += """?item""" + """ ?qual""" + """ ?qualLabel""" + """ ?obj """ + """?pr ?prLabel"""
            query += """ WHERE {""" + rdf_triple + time_filter + label_statement + """} """ + limit_statement
            query_result = get_results(endpoint_url, query)
            for result in query_result['results']['bindings']:
                if 'qual' in result:
                    property_to_name[result['qual']['value'].split('/')[-1]] = result['qualLabel']['value'].replace(' ',
                                                                                                                    '_')
                if 'pr' in result:
                    ref_to_name[result['pr']['value'].split('/')[-1]] = result['prLabel']['value'].replace(' ', '_')
        else:
            query += """?""" + self.dic[self.count]["name"] + """ """
        return property_to_name, ref_to_name

    def __str__(self):
        return str(self.df)

    def __getattr__(self, col_name):
        if col_name in self.df.columns:
            return self.df[col_name]
        else:
            print(col_name + " has not been found.")
            return None


def createRelation(entity_id: str, property_id=None, isSubject=None, rowVerbose=None, colVerbose=None,
                   time_property=None, time=None, name=None, label=False, limit=None):
    if property_id and not name:
        print("Please specify the name of the first column")
        return None
    return Relation(entity_id, property_id, isSubject, rowVerbose, colVerbose, time_property, time, name, label, limit)

def get_Firstname(name: str):
    return name.split(' ')[0]

def get_Lastname(name: str):
    return name.split(' ')[-1]

def remove_prefix(text, prefix):
    if text.startswith(prefix):
        return text[len(prefix):]
    return text


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    # TODO adjust user agent; see https://w.wiki/CX6
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


def get_name(id: str):
    query = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 	
                PREFIX wd: <http://www.wikidata.org/entity/> 	
                select  *	
                where {	
                wd:""" + id + """ rdfs:label ?label .	
                FILTER (langMatches( lang(?label), "EN" ) )	
                } 	
                LIMIT 1"""
    results = get_results(endpoint_url, query)
    result = ''
    for res in results["results"]["bindings"]:
        result = res['label']['value']
    return result


In [8]:
print("20 Singers")
r = createRelation("Q177220")
r.extend("P106", True, "Singer", limit=20)
r.changeFocus("Singer_P106")
r.extend("P569", False, "Date_of_Birth")
r.query()
r.df

20 Singers


Unnamed: 0,Entity ID,Singer_P106,Date_of_Birth_P569
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q12012,1980-09-07T00:00:00Z
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q12010,1989-06-04T00:00:00Z
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q12007,1990-10-29T00:00:00Z
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1779,1901-08-04T00:00:00Z
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,1878-03-17T00:00:00Z
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,1876-01-25T00:00:00Z
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q7241,1861-05-07T00:00:00Z
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q5170,1155-01-01T00:00:00Z
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q8876,1970-05-29T00:00:00Z
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q11901,1970-07-08T00:00:00Z


In [9]:
def age_cal(birth):
    if birth == 'NA':
        return 'NA'
    date_time_obj = datetime.datetime.strptime(birth[:4], '%Y')
    return datetime.datetime.now().year - date_time_obj.year

r.extendWithFunction('Date_of_Birth_P569',age_cal,'Age')

r.df

Unnamed: 0,Entity ID,Singer_P106,Date_of_Birth_P569,Age
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q12012,1980-09-07T00:00:00Z,40
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q12010,1989-06-04T00:00:00Z,31
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q12007,1990-10-29T00:00:00Z,30
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1779,1901-08-04T00:00:00Z,119
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,1878-03-17T00:00:00Z,142
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,1876-01-25T00:00:00Z,144
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q7241,1861-05-07T00:00:00Z,159
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q5170,1155-01-01T00:00:00Z,865
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q8876,1970-05-29T00:00:00Z,50
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q11901,1970-07-08T00:00:00Z,50


In [10]:
print("20 Singers")
r = createRelation("Q177220")
r.extend("P106", True, "Singer", limit=20, label=True)
r.changeFocus("Singer_P106")
r.extend("P569", False, "Date_of_Birth")
r.query()
r.df

20 Singers


Unnamed: 0,Entity ID,Singer_P106,Singer_P106Label,Date_of_Birth_P569
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q2680,Bruce Willis,1955-03-19T00:00:00Z
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q8741,Rüdiger Skoczowsky,1986-01-01T00:00:00Z
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q3008,Mike Oldfield,1953-05-15T00:00:00Z
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1698,Serge Gainsbourg,1928-04-02T00:00:00Z
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10681,Alan Stivell,1944-01-06T00:00:00Z
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1203,John Lennon,1940-10-09T00:00:00Z
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1453,Alexandra Stan,1989-06-10T00:00:00Z
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,Rosa Valetti,1878-03-17T00:00:00Z
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1225,Bruce Springsteen,1949-09-23T00:00:00Z
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10695,Dan Ar Braz,1949-01-15T00:00:00Z


In [11]:
r.extendWithFunction('Singer_P106Label','F1','First Name')
r.extendWithFunction('Singer_P106Label','F2','Last Name')

r.df

Unnamed: 0,Entity ID,Singer_P106,Singer_P106Label,Date_of_Birth_P569,First Name,Last Name
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q2680,Bruce Willis,1955-03-19T00:00:00Z,Bruce,Willis
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q8741,Rüdiger Skoczowsky,1986-01-01T00:00:00Z,Rüdiger,Skoczowsky
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q3008,Mike Oldfield,1953-05-15T00:00:00Z,Mike,Oldfield
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1698,Serge Gainsbourg,1928-04-02T00:00:00Z,Serge,Gainsbourg
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10681,Alan Stivell,1944-01-06T00:00:00Z,Alan,Stivell
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1203,John Lennon,1940-10-09T00:00:00Z,John,Lennon
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1453,Alexandra Stan,1989-06-10T00:00:00Z,Alexandra,Stan
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,Rosa Valetti,1878-03-17T00:00:00Z,Rosa,Valetti
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1225,Bruce Springsteen,1949-09-23T00:00:00Z,Bruce,Springsteen
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10695,Dan Ar Braz,1949-01-15T00:00:00Z,Dan,Braz


In [4]:
func_lib.func_num()

4

In [5]:
def get_entity_id(url):
    return url.split('/')[-1]

func_lib.add_func(get_entity_id)

func_lib.func_num()

5

In [7]:
r.extendWithFunction('Entity ID','F4','id')

In [8]:
r.df

Unnamed: 0,Entity ID,Singer_P106,Singer_P106Label,Date_of_Birth_P569,First Name,Last Name,id
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q2680,Bruce Willis,1955-03-19T00:00:00Z,Bruce,Willis,Q177220
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q8741,Rüdiger Skoczowsky,1986-01-01T00:00:00Z,Rüdiger,Skoczowsky,Q177220
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q3008,Mike Oldfield,1953-05-15T00:00:00Z,Mike,Oldfield,Q177220
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1698,Serge Gainsbourg,1928-04-02T00:00:00Z,Serge,Gainsbourg,Q177220
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10681,Alan Stivell,1944-01-06T00:00:00Z,Alan,Stivell,Q177220
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1203,John Lennon,1940-10-09T00:00:00Z,John,Lennon,Q177220
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1453,Alexandra Stan,1989-06-10T00:00:00Z,Alexandra,Stan,Q177220
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q4826,Rosa Valetti,1878-03-17T00:00:00Z,Rosa,Valetti,Q177220
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q1225,Bruce Springsteen,1949-09-23T00:00:00Z,Bruce,Springsteen,Q177220
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q10695,Dan Ar Braz,1949-01-15T00:00:00Z,Dan,Braz,Q177220


In [11]:
r = createRelation("Q1137809")
r.extend('P31',True, 'Courthouse', limit=10, label=True)
r.changeFocus('Courthouse_P31')
r.extend('P17',False, 'Country')
r.query()
r.df

Unnamed: 0,Entity ID,Courthouse_P31,Courthouse_P31Label,Country_P17
0,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7863296,U.S. Post Office and Courthouse,http://www.wikidata.org/entity/Q30
1,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7863297,U.S. Post Office and Courthouse,http://www.wikidata.org/entity/Q30
2,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7889666,United States Court House,http://www.wikidata.org/entity/Q30
3,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7889667,United States Court House,http://www.wikidata.org/entity/Q30
4,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7889682,United States Courthouse,http://www.wikidata.org/entity/Q30
5,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7889683,United States Courthouse,http://www.wikidata.org/entity/Q30
6,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7889685,United States Courthouse,http://www.wikidata.org/entity/Q30
7,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7889703,United States Customhouse,http://www.wikidata.org/entity/Q30
8,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7889714,United States Customhouse,http://www.wikidata.org/entity/Q30
9,http://www.wikidata.org/entity/Q1137809,http://www.wikidata.org/entity/Q7891082,United States Post Office and Courthouse,http://www.wikidata.org/entity/Q30


In [6]:
import requests
from bs4 import BeautifulSoup
print("20 Singers")
r = createRelation("Q177220")
r.extend("P106", True, "Singer", limit=20,label=True)
r.changeFocus("Singer_P106")
r.extend("P569", False, "Date_of_Birth")
r.extend("P570", False, "Date_of_Death")
r.query()

def age_cal(birth, death):
    if birth == 'NA':
        return 'NA'
    if death == 'NA':  
        date_time_obj = datetime.datetime.strptime(birth[:4], '%Y')
        return datetime.datetime.now().year - date_time_obj.year
    else:
        return datetime.datetime.strptime(death[:4], '%Y').year - datetime.datetime.strptime(birth[:4], '%Y').year

def textual_summary(x):
    url = 'https://en.wikipedia.org/wiki/'
    for word in x.split(' '):
        url += (word + '_')
    url = url[:-1]
    s = requests.session()
    r = s.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    results = soup.find(id='bodyContent')
    results = results.find(id='mw-content-text')
    results = results.find_all('p')
    if len(results) > 1:
        return results[1].text
    return results[0].text
    
r.extendWithFunction(['Date_of_Birth_P569','Date_of_Death_P570'], 'F3','Age')
r.extendWithFunction('Singer_P106Label',textual_summary,'summary')
for index, row in r.df.iterrows():
    print(row['summary'])
r.df

20 Singers
Other reasons this message may be displayed:

Artist was born in Buffalo, New York to Darrell and Judith Artist, and grew up in Williamsville, New York.[3][4] His father is African American, while his mother is of Polish descent.[5] He has a younger sister named Jenna. He took lessons through the Community Music School in Buffalo, New York.[3]

Rainhard Fendrich, called "Raini" by his friends, attended a Catholic boarding school. By his own admission he was a lazy pupil, and shy. He got his first guitar on his 15th birthday, taught himself how to play and started writing songs.[citation needed] He began to study law, but gave it up to embark on a career as actor and singer.

Netsky may refer to

James Douglas Morrison (December 8, 1943 – July 3, 1971) was an American singer, songwriter and poet, who served as the lead vocalist of the rock band The Doors. Due to his wild personality, poetic lyrics, his widely recognized voice, unpredictable and erratic performances, and the d

Unnamed: 0,Entity ID,Singer_P106,Singer_P106Label,Date_of_Birth_P569,Date_of_Death_P570,Age,summary
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q45570,Fredl Fesl,1947-07-07T00:00:00Z,,73,Other reasons this message may be displayed:\n
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q43950,Jacob Artist,1992-10-17T00:00:00Z,,28,"Artist was born in Buffalo, New York to Darrel..."
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q45402,Rainhard Fendrich,1955-02-27T00:00:00Z,,65,"Rainhard Fendrich, called ""Raini"" by his frien..."
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44399,Netsky,1989-03-22T00:00:00Z,,31,Netsky may refer to\n
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44301,Jim Morrison,1943-12-08T00:00:00Z,1971-07-03T00:00:00Z,28,"James Douglas Morrison (December 8, 1943 – Jul..."
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44909,Candye Kane,1961-11-13T00:00:00Z,2016-05-06T00:00:00Z,55,"Kane was born Candice Caleb in Ventura, Califo..."
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44909,Candye Kane,1961-11-13T00:00:00Z,2016-05-09T00:00:00Z,55,"Kane was born Candice Caleb in Ventura, Califo..."
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q43901,Jakob Geis,1840-12-27T00:00:00Z,1908-03-03T00:00:00Z,68,Originally he wanted to become a Catholic prie...
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q46053,Clive Dunn,1920-01-09T00:00:00Z,2012-11-06T00:00:00Z,92,Clive Robert Benjamin Dunn OBE (9 January 192...
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q46036,Lucrecia,1967-03-15T00:00:00Z,,53,Lucrezia or Lucrecia may refer to:\n


In [11]:
pip install beautifulsoup4

You should consider upgrading via the '/Users/jack/opt/anaconda3/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [39]:
def textual_summary(x):
    url = 'https://en.wikipedia.org/wiki/'
    for word in x.split(' '):
        url += (word + '_')
    url = url[:-1]
    s = requests.session()
    r = s.get(url)
    soup = BeautifulSoup(r.content, 'html.parser')
    results = soup.find(id='bodyContent')
    results = results.find(id='mw-content-text')
    results = results.find_all('p')
    return results[1].text

textual_summary('Mark Hoppus')

"Mark Allan Hoppus (born March 15, 1972)[1] is an American musician, singer, songwriter, record producer, and former television personality best known as the bassist and co-lead vocalist of the rock band Blink-182, as well as part of pop rock duo Simple Creatures with All Time Low's Alex Gaskarth.\n"

In [2]:
r = createRelation("Q177220")
r.extend("P106", True, "Singer", limit=20,label=True)
r.changeFocus("Singer_P106")
r.extend("P569", False, "Date_of_Birth")
r.extend("P570", False, "Date_of_Death")
r.query()

r.extendWithFunction(['Date_of_Birth_P569','Date_of_Death_P570'], 'F3','Age')
r.extendWithFunction('Singer_P106Label','F4','textual_summary')
for index, row in r.df.iterrows():
    print(row['textual_summary'])
r.df

Other reasons this message may be displayed:

Artist was born in Buffalo, New York to Darrell and Judith Artist, and grew up in Williamsville, New York
Rainhard Fendrich, called "Raini" by his friends, attended a Catholic boarding school
Netsky may refer to

James Douglas Morrison (December 8, 1943 – July 3, 1971) was an American singer, songwriter and poet, who served as the lead vocalist of the rock band The Doors
Kane was born Candice Caleb in Ventura, California
Kane was born Candice Caleb in Ventura, California
Originally he wanted to become a Catholic priest
 Clive Robert Benjamin Dunn OBE (9 January 1920 – 6 November 2012) was an English actor, comedian, artist, author, and singer
Lucrezia or Lucrecia  may refer to:

Sigmund Esco Jackson was born on his mother Katherine's 21st birthday in 1951
In 2008, Cale, along with Clapton, received a Grammy Award for their album The Road to Escondido
Christopher Keith Irvine (born November 9, 1970), better known by his ring name Chris Jeric

Unnamed: 0,Entity ID,Singer_P106,Singer_P106Label,Date_of_Birth_P569,Date_of_Death_P570,Age,textual_summary
0,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q45570,Fredl Fesl,1947-07-07T00:00:00Z,,73,Other reasons this message may be displayed:\n
1,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q43950,Jacob Artist,1992-10-17T00:00:00Z,,28,"Artist was born in Buffalo, New York to Darrel..."
2,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q45402,Rainhard Fendrich,1955-02-27T00:00:00Z,,65,"Rainhard Fendrich, called ""Raini"" by his frien..."
3,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44399,Netsky,1989-03-22T00:00:00Z,,31,Netsky may refer to\n
4,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44301,Jim Morrison,1943-12-08T00:00:00Z,1971-07-03T00:00:00Z,28,"James Douglas Morrison (December 8, 1943 – Jul..."
5,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44909,Candye Kane,1961-11-13T00:00:00Z,2016-05-06T00:00:00Z,55,"Kane was born Candice Caleb in Ventura, Califo..."
6,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q44909,Candye Kane,1961-11-13T00:00:00Z,2016-05-09T00:00:00Z,55,"Kane was born Candice Caleb in Ventura, Califo..."
7,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q43901,Jakob Geis,1840-12-27T00:00:00Z,1908-03-03T00:00:00Z,68,Originally he wanted to become a Catholic priest
8,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q46053,Clive Dunn,1920-01-09T00:00:00Z,2012-11-06T00:00:00Z,92,Clive Robert Benjamin Dunn OBE (9 January 192...
9,http://www.wikidata.org/entity/Q177220,http://www.wikidata.org/entity/Q46036,Lucrecia,1967-03-15T00:00:00Z,,53,Lucrezia or Lucrecia may refer to:\n


In [4]:
r = createRelation("Q177220")
r.extend("P106", True, "Singer", limit=20,label=True)
r.changeFocus("Singer_P106")
r.extend('P18',False,'image')
r.query()
r.df
for index, row in r.df.iterrows():
    print(row['image_P18'])

http://commons.wikimedia.org/wiki/Special:FilePath/Daniel%20Balavoine.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/CEELOGREENGBB.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Woody%20Guthrie.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Tupac%20graffiti%20New%20York.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Festival%20de%20Cornouaille%202016%20-%20Alan%20Stivell%20-%2001.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/George%20Harrison%201974.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/James%20Brown%202001.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Marilyn%20Monroe%20in%201952%20TFA.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Louis%20Armstrong%20restored.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Paul%20McCartney%20in%20October%202018.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Rabindranath%20Tagore%20in%201909.jpg
http://commons.wikimedia.org/wiki/Special:FilePath/Heino%20FFM13%200

In [8]:
import requests

def pob(x):
    if x == 'NA':
        return 'NA'
    info = requests.get('http://ipinfo.io/json').json()
    url = 'https://www.google.com/maps/dir/'
    url += info['city'] + '/'
    url += x
    url.replace(' ', '+')
    url.replace('&', '%26')
    r = requests.get(url)
    return r.url
   
pd.set_option('display.max_colwidth', 1000)
r = createRelation("Q11696")
r.extend('P39',True, 'President', limit=20,label=True)
r.changeFocus('President_P39')
r.extend('P19',False, 'Place_of_birth',label=True)
r.query()
r.extendWithFunction('Place_of_birth_P19Label', pob, 'url')
r.df
# for index, row in r.df.iterrows():
#     print(row['url'])

Unnamed: 0,Entity ID,President_P39,President_P39Label,Place_of_birth_P19,Place_of_birth_P19Label,url
0,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q3545001,Gonzo the Mechanical Bastard,,,
1,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11806,John Adams,http://www.wikidata.org/entity/Q16101,Braintree,https://www.google.com/maps/dir/Ann%20Arbor/Braintree
2,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q608408,Caroline Reynolds,,,
3,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q7242478,Prez,,,
4,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q1052076,Sylar,,,
5,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q22686,Donald Trump,http://www.wikidata.org/entity/Q23497866,Jamaica Hospital,https://www.google.com/maps/dir/Ann%20Arbor/Jamaica%20Hospital
6,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q35498,Chester A. Arthur,http://www.wikidata.org/entity/Q1393235,Fairfield,https://www.google.com/maps/dir/Ann%20Arbor/Fairfield
7,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q35236,Herbert Hoover,http://www.wikidata.org/entity/Q1798762,West Branch,https://www.google.com/maps/dir/Ann%20Arbor/West%20Branch
8,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q9916,Dwight D. Eisenhower,http://www.wikidata.org/entity/Q534669,Denison,https://www.google.com/maps/dir/Ann%20Arbor/Denison
9,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11881,John Tyler,http://www.wikidata.org/entity/Q337348,Charles City County,https://www.google.com/maps/dir/Ann%20Arbor/Charles%20City%20County


In [None]:
pd.set_option('display.max_colwidth', 1000)
r = createRelation("Q177220")
r.extend("P106", True, "Singer", limit=20,label=True)
r.changeFocus("Singer_P106")
r.extend("P569", False, "Date_of_Birth")
r.extend("P570", False, "Date_of_Death")
r.query()
r.extendWithFunction(['Date_of_Birth_P569','Date_of_Death_P570'], 'F3','Age')
# r.extendWithFunction('Singer_P106Label','F4','textual_summary')
r.extendWithFunction('Age','F6','Age_1')
r.df

In [3]:
r = createRelation("Q11696")
r.extend('P39',True, 'President', limit=20,label=True)
r.changeFocus('President_P39')
r.extend('P19',False, 'Place_of_birth',label=True)
r.query()
r.extendWithFunction('Place_of_birth_P19Label', 'F5', 'url')
r.df

Unnamed: 0,Entity ID,President_P39,President_P39Label,Place_of_birth_P19,Place_of_birth_P19Label,url
0,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q3545001,Gonzo the Mechanical Bastard,,,
1,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11806,John Adams,http://www.wikidata.org/entity/Q16101,Braintree,https://www.google.com/maps/place/Braintree
2,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q608408,Caroline Reynolds,,,
3,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q7242478,Prez,,,
4,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q1052076,Sylar,,,
5,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q22686,Donald Trump,http://www.wikidata.org/entity/Q23497866,Jamaica Hospital,https://www.google.com/maps/place/Jamaica%20Hospital
6,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q35498,Chester A. Arthur,http://www.wikidata.org/entity/Q1393235,Fairfield,https://www.google.com/maps/place/Fairfield
7,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q35236,Herbert Hoover,http://www.wikidata.org/entity/Q1798762,West Branch,https://www.google.com/maps/place/West%20Branch
8,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q9916,Dwight D. Eisenhower,http://www.wikidata.org/entity/Q534669,Denison,https://www.google.com/maps/place/Denison
9,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11881,John Tyler,http://www.wikidata.org/entity/Q337348,Charles City County,https://www.google.com/maps/place/Charles%20City%20County


In [4]:
def add_one(x):
    return x+1

func_lib.add_func(add_one)

In [6]:
func_lib.func_num()

7

In [21]:
import requests, json
def map(x):
    if x == 'NA':
        return 'NA'
    info = requests.get('http://ipinfo.io/json').json()
    url = 'https://www.google.com/maps/dir/'
    url += info['city'] + '/'
    url += x
    url.replace(' ', '+')
    url.replace('&', '%26')
    r = requests.get(url)
    return r.url


def share(df, name, film, director):
    film_gross = pd.read_csv('boxoffice.csv')
    film_gross.columns = ['rank', film, 'studio', 'gross', 'year']
    film_gross[film] = film_gross[film].astype(str)
    df[film] = df[film].astype(str)
    new_df = film_gross.merge(df, on=film)  # merge with external data
    new_df.drop_duplicates(subset=['rank'], inplace=True)
    sum_df = new_df.groupby([director]).sum()
    sum_df.columns = ['rk', 'total', 'y']
    final_df = new_df.merge(sum_df, on=director)
    final_df[name] = final_df.apply(lambda x: '%.2f%%' % (x['gross'] / x['total'] * 100), axis=1)
    sel = [str(x) for x in df.columns]
    sel.append(name)
    return final_df[sel]

In [46]:
r3 = createRelation('Q2526255')
r3.extend('P106', True, 'Directors', label=True)
r3.changeFocus('Directors_P106')
r3.extend('P57', True, 'Films', label=True)
r3.changeFocus('Films_P57')
r3.extend('P577', False, 'Time', search=('2015', '2020'))
r3.query()
r3.extendWithFunction(['Films_P57Label', 'Directors_P106Label'], share, 'fraction', schema=True)
r3.df

Unnamed: 0,Entity ID,Directors_P106,Directors_P106Label,Films_P57,Films_P57Label,Time_P577,fraction
0,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q188137,J. J. Abrams,http://www.wikidata.org/entity/Q6074,Star Wars: The Force Awakens,2015-12-16T00:00:00Z,100.00%
1,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q7383978,Ryan Coogler,http://www.wikidata.org/entity/Q23780734,Black Panther,2018-02-15T00:00:00Z,86.45%
2,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q7383978,Ryan Coogler,http://www.wikidata.org/entity/Q18822138,Creed,2015-11-25T00:00:00Z,13.55%
3,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q20675767,Joe Russo,http://www.wikidata.org/entity/Q23780914,Avengers: Infinity War,2018-04-25T00:00:00Z,62.45%
4,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q20675767,Joe Russo,http://www.wikidata.org/entity/Q18407657,Captain America: Civil War,2016-04-27T00:00:00Z,37.55%
...,...,...,...,...,...,...,...
1994,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q171363,Viggo Mortensen,http://www.wikidata.org/entity/Q65084732,Falling,2020-01-31T00:00:00Z,100.00%
1995,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q95902267,Ryon Baxter,http://www.wikidata.org/entity/Q45827787,Green is Gold,2016-01-01T00:00:00Z,100.00%
1996,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q7150070,Paul Currie,http://www.wikidata.org/entity/Q4633009,2:22,2017-01-01T00:00:00Z,100.00%
1997,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q26896792,Adam Penny,http://www.wikidata.org/entity/Q26896751,Hannah: Buddhism's Untold Journey,2016-10-20T00:00:00Z,100.00%


In [47]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)
r3.df

Unnamed: 0,Entity ID,Directors_P106,Directors_P106Label,Films_P57,Films_P57Label,Time_P577,fraction
0,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q188137,J. J. Abrams,http://www.wikidata.org/entity/Q6074,Star Wars: The Force Awakens,2015-12-16T00:00:00Z,100.00%
1,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q7383978,Ryan Coogler,http://www.wikidata.org/entity/Q23780734,Black Panther,2018-02-15T00:00:00Z,86.45%
2,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q7383978,Ryan Coogler,http://www.wikidata.org/entity/Q18822138,Creed,2015-11-25T00:00:00Z,13.55%
3,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q20675767,Joe Russo,http://www.wikidata.org/entity/Q23780914,Avengers: Infinity War,2018-04-25T00:00:00Z,62.45%
4,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q20675767,Joe Russo,http://www.wikidata.org/entity/Q18407657,Captain America: Civil War,2016-04-27T00:00:00Z,37.55%
5,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q5145625,Colin Trevorrow,http://www.wikidata.org/entity/Q3512046,Jurassic World,2015-05-29T00:00:00Z,99.31%
6,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q5145625,Colin Trevorrow,http://www.wikidata.org/entity/Q21098696,The Book of Henry,2017-09-21T00:00:00Z,0.69%
7,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q621818,Rian Johnson,http://www.wikidata.org/entity/Q18486021,Star Wars: The Last Jedi,2017-12-09T00:00:00Z,100.00%
8,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q310960,Brad Bird,http://www.wikidata.org/entity/Q24832112,Incredibles 2,2018-06-05T00:00:00Z,86.47%
9,http://www.wikidata.org/entity/Q2526255,http://www.wikidata.org/entity/Q310960,Brad Bird,http://www.wikidata.org/entity/Q7820305,Tomorrowland,2015-05-21T00:00:00Z,13.53%


In [53]:
r4 = createRelation('Q33999')
r4.extend('P106', True, 'Actors', label=True)
r4.changeFocus('Actors_P106')
r4.extend('P161', True, 'Films', label=True)
r4.changeFocus('Films_P161')
r4.extend('P577', False, 'Time', search=('2010', '2020'), limit=2000)
#print(r4.query_str)
r4.query()
r4.extendWithFunction(['Films_P161Label', 'Actors_P106Label'], share, 'fraction', schema=True)
r4.df

Unnamed: 0,Entity ID,Actors_P106,Actors_P106Label,Films_P161,Films_P161Label,Time_P577,fraction
0,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q6074,Star Wars: The Force Awakens,2015-12-16T00:00:00Z,57.82%
1,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q171711,Star Trek Into Darkness,2013-05-09T00:00:00Z,14.12%
2,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q15040917,22 Jump Street,2014-06-04T00:00:00Z,11.83%
3,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q17062774,Trainwreck,2015-01-01T00:00:00Z,6.80%
4,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q19689203,The BFG,2016-07-01T00:00:00Z,3.43%
5,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q1164753,Paul,2011-04-14T00:00:00Z,2.31%
6,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q753899,Scott Pilgrim vs. the World,2010-07-22T00:00:00Z,1.95%
7,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q2528865,Hoodwinked Too! Hood vs. Evil,2011-07-21T00:00:00Z,0.63%
8,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q1579371,MacGruber,2010-01-01T00:00:00Z,0.53%
9,http://www.wikidata.org/entity/Q33999,http://www.wikidata.org/entity/Q14537,Bill Hader,http://www.wikidata.org/entity/Q7764599,The Skeleton Twins,2014-01-01T00:00:00Z,0.33%


In [22]:
r6 = createRelation("Q11696")
r6.extend("P39", True, "Presidents")
r6.changeFocus("Presidents_P39")
r6.extend("P19", False, "place_president", label=True)
r6.query()
r6.extend("P26", False, "Spouse")
r6.changeFocus("Spouse_P26")
r6.extend("P19", False, "place_spouse", label=True)
r6.query()
r6.extendWithFunction("place_president_P19Label", map, "to_president")
r6.extendWithFunction("place_spouse_P19Label", map, "to_spouse")
r6.df

Unnamed: 0,Entity ID,Presidents_P39,place_president_P19,place_president_P19Label,Spouse_P26,place_spouse_P19,place_spouse_P19Label,to_president,to_spouse
0,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q608408,,,,,,,
1,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q1052076,,,,,,,
2,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q2719854,,,,,,,
3,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q3438922,,,,,,,
4,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q3545001,,,,,,,
...,...,...,...,...,...,...,...,...,...
65,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11813,http://www.wikidata.org/entity/Q1375967,Port Conway,http://www.wikidata.org/entity/Q233638,http://www.wikidata.org/entity/Q502257,Guilford County,https://www.google.com/maps/dir/Ann%20Arbor/Po...,https://www.google.com/maps/dir/Ann%20Arbor/Gu...
66,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q11869,http://www.wikidata.org/entity/Q337348,Charles City County,http://www.wikidata.org/entity/Q255190,http://www.wikidata.org/entity/Q962499,Morristown,https://www.google.com/maps/dir/Ann%20Arbor/Ch...,https://www.google.com/maps/dir/Ann%20Arbor/Mo...
67,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q9640,http://www.wikidata.org/entity/Q975048,Stonewall,http://www.wikidata.org/entity/Q233648,http://www.wikidata.org/entity/Q3476511,Karnack,https://www.google.com/maps/dir/Ann%20Arbor/St...,https://www.google.com/maps/dir/Ann%20Arbor/Ka...
68,http://www.wikidata.org/entity/Q11696,http://www.wikidata.org/entity/Q35498,http://www.wikidata.org/entity/Q1393235,Fairfield,http://www.wikidata.org/entity/Q2306099,http://www.wikidata.org/entity/Q990985,Culpeper,https://www.google.com/maps/dir/Ann%20Arbor/Fa...,https://www.google.com/maps/dir/Ann%20Arbor/Cu...
