### Equivalence Relations
Since wikidata obviously doesn't support equivalent relations, we have to get them somehow else. For that we just use the aliases of all general properties that we have in the relation_subset. 

In [93]:
import pandas as pd
from qwikidata.entity import WikidataItem, WikidataLexeme, WikidataProperty
from qwikidata.linked_data_interface import get_entity_dict_from_api
from wikidata.client import Client
from googletrans import Translator
from tqdm import tqdm
from collections import defaultdict
import time

In [94]:
from pymongo import MongoClient
client = MongoClient()
wikiclient = Client()

In [95]:
translator = Translator()

In [147]:
equivalence_relations = pd.read_csv('../data/reasoning/equivalence/relations_zh_ja.csv')
equivalence_relations

Unnamed: 0,id,zh,ja
0,P1034,主要食物來源,主な食料源
1,P457,成立文书,設立文
2,P532,船籍港,船籍港
3,P740,成立地點,結成場所
4,P1817,收件人,宛名
...,...,...,...
509,P31,隶属于,分類
510,P1441,登场作品,登場する作品
511,P828,起因,原因
512,P1542,導致,以下の原因


In [148]:
# GENERATE ALIASES WITH TRANSLATIONS AS ALTERNATIVES
target_languages = ['zh', 'ja']

for target_lang in target_languages:
    alias_target = []
    
    for index, relation in tqdm(equivalence_relations.iterrows(), total=equivalence_relations.shape[0]):

        alias = None
        
        p_dict = get_entity_dict_from_api(relation['id'])

        try:
            i = 0
            alias = p_dict['aliases'][target_lang][i]['value']

            while alias == label:
                i += 1
                alias = p_dict['aliases'][target_lang][i]['value']
        except KeyError:
            for source_lang in target_languages:
                if source_lang == target_lang:
                    continue
                
                if source_lang == 'zh':
                    translation = translator.translate(relation[source_lang], src='zh-tw', dest=target_lang).text
                elif target_lang == 'zh':
                    translation = translator.translate(relation[source_lang], src=source_lang, dest='zh-tw').text
                else:
                    translation = translator.translate(relation[source_lang], src=source_lang, dest=target_lang).text

                if translation != relation[target_lang] and translation.lower() != relation[target_lang].lower():
                    alias = translation
                    break
                    
                
        alias_target.append(alias)

    equivalence_relations[target_lang + '_alias'] = alias_target

equivalence_relations

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 514/514 [03:13<00:00,  2.65it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 514/514 [03:08<00:00,  2.72it/s]


Unnamed: 0,id,zh,ja,zh_alias,ja_alias
0,P1034,主要食物來源,主な食料源,主要食物来源,主食
1,P457,成立文书,設立文,成立聲明,設立書類
2,P532,船籍港,船籍港,船舶注册港,ホームポート
3,P740,成立地點,結成場所,形成地點,創設場所
4,P1817,收件人,宛名,地址,宛名書き
...,...,...,...,...,...
509,P31,隶属于,分類,是一個,種類
510,P1441,登场作品,登場する作品,出场作品,デビュー作
511,P828,起因,原因,原因,
512,P1542,導致,以下の原因,结果,結果


In [149]:
# Add english labels
for target in ['en']:

    label_target = []

    for index, relation in equivalence_relations.iterrows():

        p_dict = get_entity_dict_from_api(relation['id'])

        try:
            label = p_dict['labels'][target]['value']
        except:
            label = None

        label_target.append(label)
        
    equivalence_relations[target] = label_target

equivalence_relations

Unnamed: 0,id,zh,ja,zh_alias,ja_alias,en
0,P1034,主要食物來源,主な食料源,主要食物来源,主食,main food source
1,P457,成立文书,設立文,成立聲明,設立書類,foundational text
2,P532,船籍港,船籍港,船舶注册港,ホームポート,port of registry
3,P740,成立地點,結成場所,形成地點,創設場所,location of formation
4,P1817,收件人,宛名,地址,宛名書き,addressee
...,...,...,...,...,...,...
509,P31,隶属于,分類,是一個,種類,instance of
510,P1441,登场作品,登場する作品,出场作品,デビュー作,present in work
511,P828,起因,原因,原因,,has cause
512,P1542,導致,以下の原因,结果,結果,has effect


In [89]:
# Generate Alias
for target in target_languages:

    label_target = []

    for index, relation in equivalence_relations.iterrows():

        p_dict = get_entity_dict_from_api(relation['id'])

        try:
            label = p_dict['labels'][target]['value']
        except:
            label = None

        label_target.append(label)
        
    equivalence_relations[target] = label_target

equivalence_relations

Unnamed: 0,id,en,ja
0,P1034,main food source,主な食料源
1,P457,foundational text,設立文
2,P8004,ordered by,指示者
3,P532,port of registry,船籍港
4,P740,location of formation,結成場所
...,...,...,...
745,P31,instance of,分類
746,P1441,present in work,登場する作品
747,P828,has cause,原因
748,P1542,has effect,以下の原因


In [99]:
# Generate Translations
target_languages = ['en', 'ru']

translate_dict = defaultdict(list)

for target_lang in target_languages:

    # Iterate over relations
    for idx, relation in tqdm(equivalence_relations.iterrows(), total=equivalence_relations.shape[0]):

        translations = []
        for source_lang in ['en', 'ja']:
            if source_lang == target_lang:
                continue
                
            translation = translator.translate(relation[source_lang], src=source_lang, dest=target_lang).text

            if translation != relation[target_lang] and translation.lower() != relation[target_lang].lower():
                translations.append(translation)

        time.sleep(0.5)

        # Remove duplicates
        translations = list(set(translations))

        # If empty, use None instead
        if not translations:
            translations = None
        else:
            translations = translations[0]

        # Add translations for this language for that 
        translate_dict[target_lang].append(translations)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [07:46<00:00,  1.61it/s]
100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 750/750 [07:32<00:00,  1.66it/s]


In [101]:
equivalence_relations

Unnamed: 0,id,en,ja,en_alias,ja_alias
0,P1034,main food source,主な食料源,Main food sources,主食源
1,P457,foundational text,設立文,Founding statement,基本的なテキスト
2,P8004,ordered by,指示者,Instructor,によって順序付け
3,P532,port of registry,船籍港,Ship registration port,レジストリのポート
4,P740,location of formation,結成場所,Formation place,形成場所
...,...,...,...,...,...
745,P31,instance of,分類,Classification,のインスタンス
746,P1441,present in work,登場する作品,Works that appear,仕事に存在
747,P828,has cause,原因,Cause,原因があります
748,P1542,has effect,以下の原因,The following causes,効果があります


In [142]:
# Delete some rows
equivalence_relations = equivalence_relations.drop(equivalence_relations[equivalence_relations['id'] == 'P8929'].index)


In [144]:
# Rows with None
equivalence_relations[equivalence_relations['en_alias'].isnull()]

KeyError: 'en_alias'

In [150]:
# Entity Candidates to Dataframe
df = pd.DataFrame(equivalence_relations)
df

Unnamed: 0,id,zh,ja,zh_alias,ja_alias,en
0,P1034,主要食物來源,主な食料源,主要食物来源,主食,main food source
1,P457,成立文书,設立文,成立聲明,設立書類,foundational text
2,P532,船籍港,船籍港,船舶注册港,ホームポート,port of registry
3,P740,成立地點,結成場所,形成地點,創設場所,location of formation
4,P1817,收件人,宛名,地址,宛名書き,addressee
...,...,...,...,...,...,...
509,P31,隶属于,分類,是一個,種類,instance of
510,P1441,登场作品,登場する作品,出场作品,デビュー作,present in work
511,P828,起因,原因,原因,,has cause
512,P1542,導致,以下の原因,结果,結果,has effect


In [151]:
# Save as CSV for manual cleanup
df.to_csv('../data/reasoning/equivalence_zh_ja.csv', index=False)