In [13]:
import sys
from qwikidata.sparql import return_sparql_query_results
from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty
from qwikidata.linked_data_interface import get_entity_dict_from_api
from SPARQLWrapper import SPARQLWrapper, JSON
import json
from tqdm import tqdm
import re
import requests as r
import wptools
import pandas as pd
from collections import defaultdict
from libindic import inexactsearch
sys.path.append('../method2')
from People_translator import Translation_Api
translator = Translation_Api()

## Method 1 / Baseline

In [19]:
biography = {
    
    "name": "P251",
    "image": "P18",
    "gender":"P21",
    "residence":"P551",
    "place_of_birth":"P19",
    "date_of_birth":"P569",
    "profession": "P106",
    "notable_works": "P800",
    "education": "P69",
    "positions":"P39",
    "awards": "P166",
    "spouse": "P26",
    "nationality": "P27",
}

to_transliteration = ['motto']

def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (sys.version_info[0], sys.version_info[1])
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

endpoint_url = "https://query.wikidata.org/sparql"

def getEntityInfo(eid):
    return get_entity_dict_from_api(eid)

#extract the name of the entity or the property value in native language
def extractName(info):
    return info.get('labels', {}).get('en', {}).get('value', "")

#extract the description of the entity or the property value in native language
def extractDescription(info):
    return info.get('descriptions', {}).get('en', {}).get('value', "")
    
def baseline_infobox(wd, bio):
    result = {}
    #get entity from api
    entity_info = getEntityInfo(wd)
    result['name'] = translator.get_transliteration(extractName(entity_info))
    result['description'] = translator.get_translation(extractDescription(entity_info))
    #explicitly query using sparql to get main biography data
    for entity, wdt in bio.items():
        spqrqlq = f"SELECT ?entity ?entityLabel ?entityDescription WHERE {{ wd:{wd} wdt:{wdt} ?entity; SERVICE wikibase:label {{ bd:serviceParam wikibase:language \"en\". }} }}"
        v =  ""
        res = get_results(endpoint_url, str(spqrqlq))
        for entities in res['results']['bindings']:
            value = entities.get('entityLabel').get('value', "")
            if value != '' and 'Q' not in value:
                v += value + ','
        if v != "":
            if entity in to_transliteration:
                result[entity] = translator.get_transliteration(v)
            else:
                result[entity] = translator.get_translation(v)
    return result

def getEntityInfo(eid):
    return get_entity_dict_from_api(eid)

#extract the name of the entity or the property value in native language
def extractNameBaseline(info):
    return info.get('labels', {}).get('hi', {}).get('value', "")

#extract the description of the entity or the property value in native language
def extractDescriptionBaseline(info):
    return info.get('descriptions', {}).get('hi', {}).get('value', "")

#print the name and the description
def printNameAndDescriptionBaseline(info, trans):
    name = extractName(info)
    if name != "":
        print(trans['name'] + ":", name)
    desc = extractDescription(info)
    if desc != "":
        print(trans['description'] + ":", desc)
    
def method1_infobox(wd, bio):
    result = {}
    #get entity from api
    entity_info = getEntityInfo(wd)
    #print name and description
#     printNameAndDescription(entity_info, trans)
    result['name'] = extractNameBaseline(entity_info)
    result['description'] = extractDescriptionBaseline(entity_info)
    #explicitly query using sparql to get main biography data
#     print("----------------------",trans['main_info'],"----------------------")
    for entity, wdt in bio.items():
        spqrqlq = f"SELECT ?entity ?entityLabel ?entityDescription WHERE {{ wd:{wd} wdt:{wdt} ?entity; SERVICE wikibase:label {{ bd:serviceParam wikibase:language \"hi\". }} }}"
        v = ""
        res = get_results(endpoint_url, str(spqrqlq))
        for entities in res['results']['bindings']:
            value = entities.get('entityLabel').get('value', "")
            if value != '' and 'Q' not in value:
                v += value + ','
        if v != "":
            result[entity] = v
        
    return result

## Method 2

In [20]:
def update(infobox , translator):
    infobox = defaultdict(str , infobox)
    updated_infobox = {}
    updated_infobox['name'] = translator.get_transliteration(infobox['name'])
    updated_infobox['native_name'] = infobox['native_name']
    updated_infobox['category'] = translator.get_translation(infobox['settlement_type'])
    
    updated_infobox['image'] = infobox['image_skyline']
    if updated_infobox['image'] == '' : 
        updated_infobox['image'] = infobox['image']
    updated_infobox['image_caption'] = translator.get_translation(infobox['image_caption'])
    updated_infobox['flag'] = infobox['flag']
    updated_infobox['map'] = infobox['map']
    updated_infobox['map_caption'] = translator.get_translation(infobox['map_caption'])
    updated_infobox['motto'] = translator.get_transliteration(infobox['motto'])
    updated_infobox['timezone'] = infobox['timezone']
    
    updated_infobox['country'] = translator.get_translation(infobox['country'])
    updated_infobox['state'] = translator.get_translation(infobox['state'])
    updated_infobox['region'] = translator.get_translation(infobox['region'])
    updated_infobox['district'] = translator.get_translation(infobox['district'])
    updated_infobox['municipality'] = translator.get_translation(infobox['municipality'])
    updated_infobox['location'] = translator.get_translation(infobox['location'])
    updated_infobox['area'] = infobox['area_km2']
    updated_infobox['length'] = infobox['length_km']
    updated_infobox['width'] = infobox['width_km']
    
    updated_infobox['population'] = translator.get_translation(infobox['population'])
    updated_infobox['elevation'] = infobox['elevation_m']
    
    updated_infobox['animal'] = translator.get_translation(infobox['animal'])
    updated_infobox['plant'] = translator.get_translation(infobox['plant'])
    updated_infobox['geology'] = translator.get_translation(infobox['geology'])
    updated_infobox = { key : val for key , val in updated_infobox.items() if val!=''}
    cur_len = len(updated_infobox)
    if len(updated_infobox) < 15:
        for key , val in infobox.items():
            if key in updated_infobox : continue
            updated_infobox[key] = translator.get_transliteration(val)
            if len(updated_infobox) == 15: break
    updated_infobox = { key : val for key , val in updated_infobox.items() if val!=''}
    return updated_infobox

def method2_infobox(name):
    page = wptools.page(name).get_parse()
    result = update(page.data['infobox'] , translator)
    return result

In [21]:
def change_format(infobox):
    infobox = [ ("|" + key + " = " +  val) for key , val in infobox.items()]
    print("{{Infobox person")
    print("\n".join(infobox))
    print("}}")

In [11]:
m1_infobox = method1_infobox("Q213854" , biography)

In [14]:
m2_infobox = method2_infobox("Virat Kohli")

en.wikipedia.org (parse) Virat Kohli
en.wikipedia.org (imageinfo) File:The President, Shri Pranab Mukh...
Virat Kohli (en) data
{
  image: <list(1)> {'kind': 'parse-image', 'file': 'File:The Presi...
  infobox: <dict(97)> name, image, caption, birth_date, birth_plac...
  iwlinks: <list(2)> https://commons.wikimedia.org/wiki/Category:V...
  pageid: 16017429
  parsetree: <str(264279)> <root><template><title>short descriptio...
  requests: <list(2)> parse, imageinfo
  title: Virat Kohli
  wikibase: Q213854
  wikidata_url: https://www.wikidata.org/wiki/Q213854
  wikitext: <str(222289)> {{short description|Indian international...
}


In [15]:
baseline_infobox = baseline_infobox("Q213854" , biography)

### Method 1 Infobox

In [16]:
change_format(m1_infobox)

{{Infobox person
|name = विराट कोहली
|description = भारतीय क्रिकेट कप्तान
|image = http://commons.wikimedia.org/wiki/Special:FilePath/Virat%20Kohli%20in%20New%20Delhi%20on%20December%202018.jpg,http://commons.wikimedia.org/wiki/Special:FilePath/Virat%20Kohli%20portrait.jpg,http://commons.wikimedia.org/wiki/Special:FilePath/The%20President%2C%20Shri%20Pranab%20Mukherjee%20presenting%20the%20Padma%20Shri%20Award%20to%20Shri%20Virat%20Kohli%2C%20at%20a%20Civil%20Investiture%20Ceremony%2C%20at%20Rashtrapati%20Bhavan%2C%20in%20New%20Delhi%20on%20March%2030%2C%202017%20%28cropped%29.jpg,
|gender = पुरुष,
|place_of_birth = दिल्ली,
|date_of_birth = 1988-11-05T00:00:00Z,
|profession = क्रिकेटर,
|awards = अर्जुन पुरस्कार,राजीव गांधी खेल रत्न,
|spouse = अनुष्का शर्मा,
|nationality = भारत,
}}


### Method 2 Infobox

In [17]:
change_format(m2_infobox)

{{Infobox person
|name = विराट कोहली
|image = The President, Shri Pranab Mukherjee presenting the Padma Shri Award to Shri Virat Kohli, at a Civil Investiture Ceremony, at Rashtrapati Bhavan, in New Delhi on March 30, 2017 (cropped).jpg
|country = भारत
|caption = कोहली इन 2017
|birth_date = {{birth date and age|1988|11|5|df|=|y}}
|birth_place = [[न्यू डेल्ही]], इंडिया
|nickname = चीकू ऑर चीकू
|family = {{marriage|[[Anushka Sharma]] (wife)|2017}}
|height = 1.75 म
|batting = राइट-हैंडेड
|bowling = राइट-अर्म [[फास्ट बॉलिंग|मीडियम]]
|role = [[बैटिंग ऑडर (क्रिकेट)#टॉप ऑडर|टॉप-ऑडर बैटसमैन]]
|international = ट्रू
|internationalspan = 2008–प्रेज़ेंट
|testdebutdate = 20 जून
}}


### Baseline Infobox

In [18]:
change_format(baseline_infobox)

{{Infobox person
|name = विराट कोहली
|description = भारतीय क्रिकेट खिलाड़ी
|image = http://commons.wikimedia.org/wiki/Special:FilePath/Virat%20Kohli%20in%20New%20Delhi%20on%20December%202018.jpg,http://commons.wikimedia.org/wiki/Special:FilePath/Virat%20Kohli%20portrait.jpg,http://commons.wikimedia.org/wiki/Special:FilePath/The%20President%2C%20Shri%20Pranab%20Mukherjee%20presenting%20the%20Padma%20Shri%20Award%20to%20Shri%20Virat%20Kohli%2C%20at%20a%20Civil%20Investiture%20Ceremony%2C%20at%20Rashtrapati%20Bhavan%2C%20in%20New%20Delhi%20on%20March%2030%2C%202017%20%28cropped%29.jpg,
|gender = पुरुष
|place_of_birth = दिल्ली
|date_of_birth = 1988-11-05T00:00:00Z,
|profession = क्रिकेट खिलाड़ी
|awards = अर्जुन पुरस्कार,राजीव गांधी खेल रत्न, खेल में पद्मश्री
|spouse = अनुष्का शर्मा,
|nationality = भारत
}}
