In [44]:
import sys
import json
from tqdm import tqdm
import re
import pandas as pd
from collections import defaultdict
from libindic import inexactsearch
import pandas as pd
import pickle as pkl
from editdistance import eval as ed
from fuzzywuzzy import fuzz
import numpy as np
from random import shuffle
import string

In [45]:
def get_score(translated_infobox , actual_infobox):
    inst = inexactsearch.InexactSearch()
    markings = defaultdict(list)
    all_keys = list(translated_infobox.keys()) + list(actual_infobox.keys())
    translated_infobox = { key.strip() : val for key , val in translated_infobox.items()}
    actual_infobox = { key.strip() : val for key , val in actual_infobox.items()}

    for key in translated_infobox.keys():
        for key2 in actual_infobox.keys():
            if fuzz.token_sort_ratio(key , key2) >= 80:
                if not translated_infobox[key]:
                    markings['S'].append(key)
                    continue 
                elif not actual_infobox[key2]:
                    continue
                skip = False
                try :
                    for let in actual_infobox[key2]:
                        if let in string.ascii_letters:
                            skip = True
                            break
                    val = inst.compare(translated_infobox[key] , actual_infobox[key2])
                except :
                    skip = False
                    val = 0
                if skip : continue
                if val >= 0.6 :
                    markings['C'].append({key : val})
                else :
                    markings['S'].append({key : val})
                break
    for key2 in actual_infobox.keys():
        for key in translated_infobox.keys():
            if fuzz.token_sort_ratio(key , key2) >= 80:
                break
        else :
            markings['D'].append(key2)
            
    for key in translated_infobox.keys():
        for key2 in actual_infobox.keys():
            if fuzz.token_sort_ratio(key , key2) >= 80:
                break
        else :
            markings['I'].append(key)
    if len(markings['C']) + len(markings['S']) != 0:
        precision = len(markings['C']) / (len(markings['C']) + len(markings['S']))
    else : precision = 0
    if (len(markings['C']) + len(markings['S']) + len(markings['D'])) != 0:
        recall = len(markings['C']) / (len(markings['C']) + len(markings['S']) + len(markings['D']))
    else : recall = 0
    return [precision , recall] , markings

In [46]:
def get_results(data):
    overall_score = []
    list_d = [defaultdict(int) for i in range(3)]
    list_i = [defaultdict(int) for i in range(3)]
    keys_score = [defaultdict(lambda : defaultdict(int)) for i in range(3)]
    keys_match = [defaultdict(int) for i in range(2)]
    for ind , entry in enumerate(data) :
        cur_score = []
        for key , val in entry.items():
            for i in range(3):
                if not val[-1] : continue
                score , markings = get_score(val[2-i] , val[-1])
                cur_score.append(score)
                for key in markings['D']:
                    list_d[i][key]+=1
                    keys_score[i][key]['D']+=1
                for key in markings['I']:
                    list_i[i][key]+=1
                    keys_score[i][key]['I']+=1
                for label in ['C' , 'S']:
                    for item in markings[label]:
                        for key2 , val2 in item.items():
                            keys_score[i][key][label]+=1
                            if val2 >= 0.7 :
                                keys_match[0][key2]+=1
                            if val2 >= 0.3:
                                keys_match[1][key2]+=1
        if not cur_score : continue
        overall_score.append(np.array(sum(cur_score , [])))
#         break
    list_d = [ {key : val for key , val in sorted(list_d[i].items() , key = lambda item : item[1] , reverse=True)[:20]} for i in range(3)]
    list_i = [ {key : val for key , val in sorted(list_i[i].items() , key = lambda item : item[1] , reverse=True)[:20]} for i in range(3)]
    df = pd.DataFrame(overall_score , columns=['M1-Precision','M1-Recall','M2-Precision','M2-Recall','Baseline-Precision','Baseline-Recall'])
    for method in keys_score:
        for key , val in method.items():
            den = val['C'] + val['S']
            method[key]['precision'] = val['C'] / (max(1 , den))
            den += val['D']
            method[key]['recall'] = val['C'] / max(1 , den)
    graph_data = {
        'list_d' : list_d,
        'list_i' : list_i,
        'keys_match' : keys_match,
        'keys_score' : keys_score
    }
    return df , graph_data

In [49]:
data = []
with open('people_eval_records.jsonl') as f:
    data = [ json.loads(i) for i in f.readlines()]
print("Number of records for people :" , len(data))
df , graph_data = get_results(data)
display(df.mean())

Number of records for people : 700


M1-Precision          0.482690
M1-Recall             0.074289
M2-Precision          0.418671
M2-Recall             0.183139
Baseline-Precision    0.390357
Baseline-Recall       0.065681
dtype: float64

In [50]:
data = []
with open('places_eval_records.jsonl') as f:
    data = [ json.loads(i) for i in f.readlines()]
print("Number of records for places :" , len(data))
df , graph_data = get_results(data)
display(df.mean())

Number of records for places : 400


M1-Precision          0.497500
M1-Recall             0.018789
M2-Precision          0.246628
M2-Recall             0.034243
Baseline-Precision    0.358750
Baseline-Recall       0.012597
dtype: float64

In [134]:
# graph_data = dict with keys [list_d , list_i , keys_match , keys_score ]


# For list_d / list_i / keys_score[i]:
#     [0] = Method1
#     [1] = Method2
#     [2] = Baseline
#     list_d[i] = dictionary with {count where key in actual infobox but not in our infobox} for each key in decreasing order
#     list_i[i] = dictionary with {count where key in our infobox but not in the actual infobox} for each key in decreasing order
#     keys_score[i] = dictionary with {precision , recall , C , S , I , D} for each key

# For keys_match :
#     [0] = match >= 70%
#     [1] = match >= 30%