#### This code Tests following configuration:

Entity Representation: Centroid of Context words + Words in Entity

Type Representation: Type representation learned on training corpus

Ranking Score: Group Average

Word Representation: Trained word vectors on corpus initialized with pretrained glove vector embeddings

Note: Gives the same results as cosine

In [8]:
import os, pickle
from operator import itemgetter
import numpy as np

In [9]:
entity_ranking_op_folder = '../../Data/output'
type_rep_folder = 'TypeRep/type_word/train'
entity_rep_test_folder = 'EntityRep/test'
entity_rep_file_name = 'doc_role_entity_context_word_centroid.p' 
entity_rep_doc_level_file_name = 'doc_role_entity_doc_level_context_word_centroid.p'
type_rep_file_name = 'tag_vec_dict.p'

doc_level = False
sent_level = True

entity_rep_file = os.path.join(entity_ranking_op_folder, entity_rep_test_folder, entity_rep_file_name)
entity_rep_doc_level_file = os.path.join(entity_ranking_op_folder, entity_rep_test_folder, entity_rep_doc_level_file_name)
type_rep_file = os.path.join(entity_ranking_op_folder, type_rep_folder, type_rep_file_name)

In [10]:
tag_list = ['LOC_Event', 'LOC_Accused', 'LOC_Victim', 'LOC_Others', 'ORG_Accused', 'ORG_Victim', 'ORG_Others', 'PER_Victim', 'PER_Others', 'PER_Accused']
per_tag_list = ['PER_Victim', 'PER_Others', 'PER_Accused']
loc_tag_list = ['LOC_Event', 'LOC_Accused', 'LOC_Victim', 'LOC_Others']
org_tag_list = ['ORG_Accused', 'ORG_Victim', 'ORG_Others']

In [11]:
if sent_level:
    entity_rep_doc_role_context_dict = pickle.load(open(entity_rep_file, 'rb'))

if doc_level:
    entity_rep_doc_role_context_dict = pickle.load(open(entity_rep_doc_level_file, 'rb'))
    
type_rep_dict = pickle.load(open(type_rep_file, 'rb'))

In [12]:
def group_average(entity, role):
    
    size = 300
    num_of_words_in_entity = 1
    num_of_words_in_role = 1
    total_number_of_words = num_of_words_in_entity + num_of_words_in_role
    
    sum_of_entity_type_word_vec = np.zeros(size)
    
    entity = entity/np.linalg.norm(entity)
    sum_of_entity_type_word_vec = np.add(sum_of_entity_type_word_vec, entity)
        
    role = role/np.linalg.norm(role)
    sum_of_entity_type_word_vec = np.add(sum_of_entity_type_word_vec, role)
    
    dot_product = np.dot(sum_of_entity_type_word_vec, sum_of_entity_type_word_vec)
    group_avg = float(dot_product - (total_number_of_words))/(total_number_of_words * (total_number_of_words - 1))
    return group_avg 

In [13]:
precision1 = dict()
AveP = dict()
doc_count_for_tag = dict()

for tag in tag_list:
    doc_count_for_tag[tag] = 0
    precision1[tag] = 0
    AveP[tag] = 0
    

# Precision at K
K = 5

for doc_id in entity_rep_doc_role_context_dict:
    print(doc_id)
    num_actual_entities_with_role = dict()
    
    per_entities = list()
    loc_entities = list()
    org_entities = list()
    doc_role_dict = entity_rep_doc_role_context_dict[doc_id]
    for role in doc_role_dict.keys():
        entities = doc_role_dict[role]
        num_actual_entities_with_role[role] = len(entities)
        if role in per_tag_list:
            for entity in entities:
                per_entities.append((entity, role))
        if role in org_tag_list:
            for entity in entities:
                org_entities.append((entity, role))
        if role in loc_tag_list:
            for entity in entities:
                loc_entities.append((entity, role))
    
    num_per_entity = len(per_entities)
    num_org_entity = len(org_entities)
    num_loc_entity = len(loc_entities)
    
    for role in per_tag_list:
        role_entity = ''
        role_vector = type_rep_dict[role]
        retrivedResult = list()
        if num_actual_entities_with_role[role] != 0:
            TP = 0
            avp = 0
            doc_count_for_tag[role] = doc_count_for_tag[role] + 1
            for entity in per_entities:
                sim = 2
                sim = group_average(entity[0][1], role_vector)
                retrivedResult.append((entity, sim))
            retrivedResult = sorted(retrivedResult,key=itemgetter(1), reverse=True)
            for i in range(min(K, num_per_entity)):
                role_entity = role_entity + ' ' + retrivedResult[i][0][0][0]
                if retrivedResult[i][0][1] == role:
                    TP = TP + 1
                    avp = avp + (float(TP)/(i+1))
            #if TP ==  num_actual_entities_with_role[role]:
                #precision1[role] = precision1[role] + 1
            #else:
            precision1[role] = precision1[role] + float(TP)/min(K, num_per_entity)
            if TP != 0:
                AveP[role] = AveP[role] + float(avp)/TP
            print(role, role_entity)
                
    for role in loc_tag_list:
        role_entity = ''
        role_vector = type_rep_dict[role]
        retrivedResult = list()
        if num_actual_entities_with_role[role] != 0:
            TP = 0
            doc_count_for_tag[role] = doc_count_for_tag[role] + 1
            for entity in loc_entities:
                sim = 2
                sim = group_average(entity[0][1], role_vector)
                retrivedResult.append((entity, sim))
            retrivedResult = sorted(retrivedResult,key=itemgetter(1), reverse=True)
            for i in range(min(K, num_loc_entity)):
                role_entity = role_entity + ' ' + retrivedResult[i][0][0][0]
                if retrivedResult[i][0][1] == role:
                    TP = TP + 1
                    avp = avp + (float(TP)/(i+1))
            #if TP ==  num_actual_entities_with_role[role]:
             #   precision1[role] = precision1[role] + 1
            #else:
            precision1[role] = precision1[role] + float(TP)/min(K, num_loc_entity)
            if TP != 0:
                AveP[role] = AveP[role] + float(avp)/TP
            print(role, role_entity)
    
                
    for role in org_tag_list:
        role_entity = ''
        role_vector = type_rep_dict[role]
        retrivedResult = list()
        if num_actual_entities_with_role[role] != 0:
            TP = 0
            doc_count_for_tag[role] = doc_count_for_tag[role] + 1
            for entity in org_entities:
                sim = 2
                sim = group_average(entity[0][1], role_vector)
                retrivedResult.append((entity, sim))
            retrivedResult = sorted(retrivedResult,key=itemgetter(1), reverse=True)
            for i in range(min(K, num_org_entity)):
                role_entity = role_entity + ' ' + retrivedResult[i][0][0][0]
                if retrivedResult[i][0][1] == role:
                    TP = TP + 1
                    avp = avp + (float(TP)/(i+1))
            #if TP ==  num_actual_entities_with_role[role]:
             #   precision1[role] = precision1[role] + 1
            #else:
            precision1[role] = precision1[role] + float(TP)/min(K, num_org_entity)
            if TP != 0:
                AveP[role] = AveP[role] + float(avp)/TP
            print(role, role_entity)
    print('\n')

avg_pre = 0
mean_avg_pre = 0
for tag in tag_list:
    if doc_count_for_tag[tag] > 0:
        avg_pre = avg_pre + float(precision1[tag] * 100)/doc_count_for_tag[tag]
        mean_avg_pre = mean_avg_pre + float(AveP[tag] * 100)/doc_count_for_tag[tag]
        #print(tag, float(precision1[tag] * 100)/doc_count_for_tag[tag], '%')
        print(tag, float(AveP[tag] * 100)/doc_count_for_tag[tag], '%')
#print('Average', avg_pre/10, '%')
print('Mean Average Precision', mean_avg_pre/10, '%')

ev_060_st_003.txt
PER_Others  Manmohan_Singh Mufti_Mohammad_Sayeed Javid_Makhdoomi
LOC_Event  Lazbal Anantnag Pulwama Nishat Srinagar
LOC_Others  Lazbal Anantnag Church_road Pulwama Nishat
ORG_Accused  Hizbul_Mujahideen Hizbul_Mujahideen Burnhall_school
ORG_Others  Hizbul_Mujahideen Hizbul_Mujahideen Burnhall_school


ev_080_st_008.txt
PER_Others  M._C._Misra Misra
LOC_Others  NEW_DELHI Mehrauli
ORG_Others  AIIMS All-India_Institute_of_Medical_Sciences AIIMS_Trauma_Centre


ev_091_st_010.txt
PER_Others  Singh Rajanth_Singh RPN_Singh Jagadish_Shettar R_Ashoka
LOC_Event  Tamil_Nadu Kolkata Malleshwaram Malleshwaram Malleshwaram
LOC_Others  Tamil_Nadu Kolkata Malleshwaram Malleshwaram Malleshwaram
ORG_Others  Karnataka_Police Karnataka_Police KC_Hospital KC_Hospital Centre_and_the_government_of_Karnataka


ev_043_st_001.txt
LOC_Event  Bishnupur Bishnupur Imphal_West Imphal_West gate_of_6/8
LOC_Others  Bishnupur Bishnupur Imphal_West Imphal_West gate_of_6/8
ORG_Accused  Revolutionary_Peopl

LOC_Event  Chhattisgarh_, Darbha Andhra_Pradesh Padia Padia
LOC_Others  Darbha Malkangiri Malkangiri Malkangiri Malkangiri
ORG_Accused  Kalimela_squad Chhattisgarh_Congress Dandakaranya_Special_Zonal_Committee Odisha_police Odisha_police
ORG_Victim  Maoists Maoists Maoists Maoists Maoists
ORG_Others  Dandakaranya_Special_Zonal_Committee District_Voluntary_Force Maoists Maoists Maoists


2012_9_20_st-361.txt
PER_Others  Syedi_Mufaddal_Bhaisaheb_Saifuddin Syedna_Mohammad_Burhanuddin
LOC_Event  Peshawar Karachi Karachi Karachi India
LOC_Others  Peshawar Karachi Karachi Karachi India
ORG_Victim  Dawoodi_Bohra Pakistan_Air_Force
ORG_Others  Dawoodi_Bohra Pakistan_Air_Force


ev_075_st_001.txt
PER_Others  Manmohan_Singh Shivraj_Patil Pratibha_Patil Sonia_Gandhi Sonia_Gandhi
LOC_Event  Ajmer NEW_DELHI South_Delhi Walled_City Jaipur
LOC_Others  NEW_DELHI Ajmer South_Delhi Jaipur Jaipur
ORG_Accused  Congress HuJI National_Security_Guard Union_government Central_Industrial_Security_Force
ORG_Oth

ORG_Others  Nova_College_of_Engineering The_Hindu government Irrigation_Department


ev_050_st_002.txt
PER_Others  MS.Margaret_Alva MS.Margaret_Alva
LOC_Event  NEW_DELHI NEW_DELHI Srinagar Srinagar Jammu_and_Kashmir
LOC_Victim  Jammu_and_Kashmir Jammu_and_Kashmir Srinagar Srinagar NEW_DELHI
LOC_Others  NEW_DELHI NEW_DELHI Srinagar Srinagar Jammu_and_Kashmir
ORG_Others  Journalist Government Government Government Government


ev_083_st_005.txt
PER_Victim  Rajen_Singh Baidyanath_Prasad Raju Yumnam_Joykumar_Singh K._Mangle_Singh
PER_Others  Rajen_Singh Raju K._Mangle_Singh Baidyanath_Prasad Yumnam_Joykumar_Singh
LOC_Others  Imphal Imphal Manipur Ragailong
ORG_Accused  Peoples_Revolutionary_Party_of_Kangleipak Manipur_police_commando RIMS RIMS Regional_Institute_of_Medical_Sciences
ORG_Others  Peoples_Revolutionary_Party_of_Kangleipak Regional_Institute_of_Medical_Sciences Jawaharlal_Nehru_Hospital Manipur_police_commando RIMS


ev_084_st_027.txt
PER_Others  D._Raja
LOC_Event  Pune Maharas

LOC_Victim  Sudans Khartoum Doha Pune Pune
LOC_Others  Doha Sudans Khartoum Pune Pune
ORG_Others  Inlaks_Budhrani_Hospital Jehangir_Hospital Symbiosis_College Symbiosis_College Poona_College


ev_082_st_014.txt
PER_Others  Pranay_Sahay
LOC_Event  Motor_Stand Radhanagar Radhanagar Radhanagar Bangalore
LOC_Accused  UP north-eastern Agartala Agartala Tripura
LOC_Others  north-eastern UP Bangladesh Bangladesh Radhanagar
ORG_Accused  Indian_Mujahideen SIMI_splinter_group central_intelligence_agencies HuJI HuJI
ORG_Others  Indian_Mujahideen Agartala_Government_Medical_College_hospital SIMI_splinter_group Border_Security_Force state_police


ev_081_st_007.txt
PER_Others  Pragya_Singh_Thakur Shivnarayan_Singh Shamlal_Bhavar_Sahu Dharmendra_Bajrangi Dilip_Nehar
PER_Accused  Pragya_Singh_Thakur Dharmendra_Bajrangi Shivnarayan_Singh Shamlal_Bhavar_Sahu Dilip_Nehar
LOC_Event  Malegaon Malegaon Tukogunj Madhya_Pradesh Bhikku_Chowk
LOC_Accused  MUMBAI/NASHIK Malegaon Malegaon Indore Indore
LOC_Other

PER_Others  Ashok_Chavan Chavan Sharad_Pawar Ramesh_Bagwe Rajendra_Sonawane
PER_Accused  Chavan Ashok_Chavan David_Headley Ramesh_Bagwe Sharad_Pawar
LOC_Event  German_Bakery_on_North_Main_Avenue Punes_Koregaon_Park Osho_Ashram Osho_Ashram Mumbai
LOC_Others  Osho_Ashram Osho_Ashram German_Bakery_on_North_Main_Avenue Punes_Koregaon_Park Mumbai
ORG_Accused  forensic Anti-Terrorism_Squad ATS National_Investigation_Agency NIA
ORG_Others  forensic National_Investigation_Agency ATS Anti-Terrorism_Squad NIA


2013_5_3_st-465.txt
PER_Victim  Rafis Md._._Ameeruddin Rafiuddin Rafiuddin Rafiuddin
PER_Others  Rafis Aleemuddin Aleemuddin Ameeruddins Shamsuddin
LOC_Event  Dilsukhnagar Dilsukhnagar Dilsukhnagar Dilsukhnagar Chandrayangutta
LOC_Others  Koti Chandrayangutta Hafiz_Baba_Nagar Hafiz_Baba_Nagar Dilsukhnagar
ORG_Others  ENT_Hospital


2013_4_9_st-103.txt
PER_Others  Daljeet_Singh Nayyar_Hasnain_Khan Saaj_Infracon
LOC_Others  Kutumba_police_station_area Sewra Aurangabad_district Sahebganj_pol

In [14]:
from operator import itemgetter
data = [('abc', 1.21),('abc', 2.31),('abc', 1.48), ('abc',2.21)]
data = sorted(data,key=itemgetter(1), reverse=True)
data[0:4]

[('abc', 2.31), ('abc', 2.21), ('abc', 1.48), ('abc', 1.21)]