# Similarity based Prediction

In [47]:
import json
import torch
import numpy as np
import evaluation as eval
from collections import Counter
from transformers import BertModel, BertTokenizer

# Inference

In [48]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [324]:
with open("./MEN-Dataset_Results/only_descriptions_and_hypernym.json", "r") as f:
    data = json.load(f)

# MORE THAN EQUAL 5 DOCRED ONLY
unseen_relation_labels = ['given name', 'capital of', 'operator', 'director / manager', 'country of citizenship']

In [325]:
data[0]

{'id': 'article_1',
 'entities': [{'id': 1,
   'label': 'TITLE',
   'position': {'start_offset': 893, 'end_offset': 895}},
  {'id': 2,
   'label': 'PERSON',
   'position': {'start_offset': 116, 'end_offset': 139}},
  {'id': 3,
   'label': 'LOCATION',
   'position': {'start_offset': 0, 'end_offset': 12}},
  {'id': 4,
   'label': 'LOCATION',
   'position': {'start_offset': 14, 'end_offset': 22}},
  {'id': 5,
   'label': 'EVENT',
   'position': {'start_offset': 625, 'end_offset': 633}},
  {'id': 6,
   'label': 'PERSON',
   'position': {'start_offset': 108, 'end_offset': 139}},
  {'id': 7,
   'label': 'PERSON',
   'position': {'start_offset': 91, 'end_offset': 139}},
  {'id': 8,
   'label': 'PERSON',
   'position': {'start_offset': 119, 'end_offset': 139}},
  {'id': 9,
   'label': 'PERSON',
   'position': {'start_offset': 651, 'end_offset': 665}},
  {'id': 10,
   'label': 'PERSON',
   'position': {'start_offset': 654, 'end_offset': 665}},
  {'id': 11,
   'label': 'PERSON',
   'position': {

In [327]:
unseen_relation_labels

['given name',
 'capital of',
 'operator',
 'director / manager',
 'country of citizenship']

In [328]:
data_to_test = []
rel_count = 0
unseen_relation_count_dict = {}

for item in data:
    for rel in item['relations']:
        rel_count=rel_count+1
        if rel['relation'] in unseen_relation_labels:
            head_id = rel["head"]
            tail_id = rel["tail"]
            head_obj = [ent for ent in item['entities'] if ent["id"]==head_id][0]
            tail_obj = [ent for ent in item['entities'] if ent["id"]==tail_id][0]
            head_mention = item['text'][head_obj["position"]['start_offset']:head_obj["position"]['end_offset']].strip()
            tail_mention = item['text'][tail_obj["position"]['start_offset']:tail_obj["position"]['end_offset']].strip()
            head_side_information = [ent_s for ent_s in item['unique_entity_side_information'] if ent_s['entity_mention'].lower()==head_mention.lower()][0]
            tail_side_information = [ent_s for ent_s in item['unique_entity_side_information'] if ent_s['entity_mention'].lower()==tail_mention.lower()][0]
            data_to_test.append(
                {
                    "text": item["text"],
                    "head_side_information": head_side_information,
                    "tail_side_information": tail_side_information,
                    "relation": rel["relation"]
                }
            )
            if rel["relation"] in unseen_relation_count_dict:
                unseen_relation_count_dict[rel["relation"]] = unseen_relation_count_dict[rel["relation"]]+1
            else:
                unseen_relation_count_dict[rel["relation"]] = 0
                unseen_relation_count_dict[rel["relation"]] = unseen_relation_count_dict[rel["relation"]]+1

In [329]:
print(rel_count)

7776


In [335]:
# Load the pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')



In [336]:
def generate_embedding(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings

def cosine_similarity(embedding_a, embedding_b):
    return np.dot(embedding_a, embedding_b) / (np.linalg.norm(embedding_a) * np.linalg.norm(embedding_b))

In [None]:
def calculate_confidence(similarities):
    """
    Calculate confidence score based on consistency of different similarity measures
    """
    similarities = np.array(similarities)
    
    # Calculate mean and standard deviation
    mean_sim = np.mean(similarities)
    std_sim = np.std(similarities)
    
    # Higher confidence if similarities are consistent (low std dev) and high mean
    consistency = 1 / (1 + std_sim)  # Inverse of standard deviation
    confidence = (mean_sim + consistency) / 2
    
    return confidence

In [338]:
def predict_unseen_relation(head, tail, model, tokenizer, unseen_labels):
    """
    Enhanced unseen relation prediction with dynamic weighting and semantic role analysis
    """

    combined_description = head['description'] + " " + tail['description']
    head_hypernym = head['hypernym_llm']
    tail_hypernym = tail['hypernym_llm']
    head_type = head['entity_type']
    tail_type = tail['entity_type']
    
    unseen_scores = []
    
    # Generate base embeddings
    description_embedding = generate_embedding(combined_description, model, tokenizer)
    head_hypernym_embedding = generate_embedding(head_hypernym, model, tokenizer)
    tail_hypernym_embedding = generate_embedding(tail_hypernym, model, tokenizer)
    head_type_embedding = generate_embedding(head_type, model, tokenizer)
    tail_type_embedding = generate_embedding(tail_type, model, tokenizer)
    
    # Generate semantic role embeddings for head and tail
    head_role = f"subject of {head_type} that {head_hypernym}"
    tail_role = f"object of {tail_type} that {tail_hypernym}"
    head_role_embedding = generate_embedding(head_role, model, tokenizer)
    tail_role_embedding = generate_embedding(tail_role, model, tokenizer)
    
    # Create context vector from description
    context_embedding = generate_embedding(f"relationship between {head_type} and {tail_type}", model, tokenizer)
    
    # Compare with each unseen relation label
    for label in unseen_labels:
        # Generate embeddings for label and its variations
        label_embedding = generate_embedding(label, model, tokenizer)
        label_context = generate_embedding(f"relation {label} between {head_type} and {tail_type}", model, tokenizer)
        
        # Calculate base similarities
        description_similarity = cosine_similarity(label_embedding.numpy(), description_embedding.numpy())
        head_hypernym_similarity = cosine_similarity(label_embedding.numpy(), head_hypernym_embedding.numpy())
        tail_hypernym_similarity = cosine_similarity(label_embedding.numpy(), tail_hypernym_embedding.numpy())
        head_type_similarity = cosine_similarity(label_embedding.numpy(), head_type_embedding.numpy())
        tail_type_similarity = cosine_similarity(label_embedding.numpy(), tail_type_embedding.numpy())
        
        # Calculate semantic role similarities
        head_role_similarity = cosine_similarity(label_context.numpy(), head_role_embedding.numpy())
        tail_role_similarity = cosine_similarity(label_context.numpy(), tail_role_embedding.numpy())
        
        # Calculate context similarity
        context_similarity = cosine_similarity(label_context.numpy(), context_embedding.numpy())
        
        # Dynamic weight calculation based on context similarity
        context_weight = (1 + context_similarity) / 2  # Normalize to 0.5-1.0 range
        
        # Calculate semantic compatibility
        semantic_score = (head_role_similarity + tail_role_similarity) / 2
        
        # Calculate confidence based on consistency
        confidence = calculate_confidence([
            description_similarity,
            head_hypernym_similarity,
            tail_hypernym_similarity,
            head_type_similarity,
            tail_type_similarity,
            head_role_similarity,
            tail_role_similarity
        ])
        
        # Final weighted score with dynamic weighting
        total_score = (
            (0.4 * context_weight * description_similarity) +
            (0.1 * head_hypernym_similarity) +
            (0.1 * tail_hypernym_similarity) +
            (0.1 * head_type_similarity) +
            (0.1 * tail_type_similarity) +
            (0.2 * semantic_score)
        ) * confidence
        
        unseen_scores.append((label, total_score))
    
    # Sort by total score and return top predictions with confidence
    sorted_predictions = sorted(unseen_scores, key=lambda x: x[1], reverse=True)
    return sorted_predictions

### The Below Cell to Test Only Unseen Relation Label

In [339]:
%%time
matching = []
not_matching = []
the_predicted = []
the_actual = []
print(len(data_to_test))
for idx in range(len(data_to_test)):
    # if confidence < threshold:
        # Get descriptions for both entities
    item = data_to_test[idx]
    head = item['head_side_information']
    tail = item['tail_side_information']

    unseen_relation = predict_unseen_relation(
        head, tail, bert_model, tokenizer, unseen_relation_labels
    )
    
    # print(f"Unseen relation predicted for {head} -> {tail}: {unseen_relation[0]} with score {unseen_relation[1]}")
    if unseen_relation[0][0] == item['relation']:
        print(f"Matching: {idx}")
        matching.append({"data": item, "unseen_relation_with_score": unseen_relation})
    else:
        not_matching.append({"data": item, "unseen_relation_with_score": unseen_relation})
    the_predicted.append(unseen_relation[0])
    the_actual.append(item['relation'])

1040
Matching: 55
Matching: 65
Matching: 95
Matching: 97
Matching: 100
Matching: 107
Matching: 120
Matching: 133
Matching: 157
Matching: 158
Matching: 160
Matching: 162
Matching: 163
Matching: 164
Matching: 171
Matching: 190
Matching: 191
Matching: 206
Matching: 207
Matching: 208
Matching: 213
Matching: 215
Matching: 222
Matching: 224
Matching: 225
Matching: 233
Matching: 238
Matching: 241
Matching: 277
Matching: 278
Matching: 293
Matching: 294
Matching: 314
Matching: 352
Matching: 367
Matching: 368
Matching: 369
Matching: 371
Matching: 388
Matching: 402
Matching: 403
Matching: 457
Matching: 458
Matching: 459
Matching: 464
Matching: 481
Matching: 482
Matching: 486
Matching: 487
Matching: 490
Matching: 491
Matching: 503
Matching: 504
Matching: 507
Matching: 508
Matching: 509
Matching: 513
Matching: 521
Matching: 523
Matching: 524
Matching: 525
Matching: 526
Matching: 527
Matching: 530
Matching: 531
Matching: 534
Matching: 548
Matching: 566
Matching: 584
Matching: 589
Matching: 590
Match

# Evaluation

In [351]:
unseen_relation_labels_to_int = {unseen_relation_labels[index]:index for index in range(len(unseen_relation_labels))}

In [352]:
the_predicted_mapped_to_int = [unseen_relation_labels_to_int[item[0]] for item in the_predicted]
the_actual_mapped_to_int = [unseen_relation_labels_to_int[item] for item in the_actual]

In [360]:
print("Matched Unseen Relation Label Count:", len(set([item[0] for item in the_predicted])))
print("All Unseen Relation Label Count:",len(set(the_actual)))
print("All Unseen Relation Label:", unseen_relation_labels)
print("Matched Unseen Relation Label:", set([item[0] for item in the_predicted]))
print("All Unseen Relation Label with Count:", Counter(the_actual))
print("Predicted Unseen Relation Label with Count:", Counter([item[0] for item in the_predicted]))
print("Total Instance that Has Unseen Label", len(data_to_test))
print("Total Instance that Matching", len(matching))

micro_precision,micro_recall,micro_f1_score = eval.compute_micro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))
macro_precision,macro_recall,macro_f1_score = eval.compute_macro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))


print("macro_f1_score", macro_f1_score)
print("micro_f1_score", micro_f1_score)
print("macro_precision", macro_precision)
print("micro_precision", micro_precision)
print("macro_recall", macro_recall)
print("micro_recall", micro_recall)

Matched Unseen Relation Label Count: 3
All Unseen Relation Label Count: 5
All Unseen Relation Label: ['given name', 'capital of', 'operator', 'director / manager', 'country of citizenship']
Predicted Unseen Relation Label: {'director / manager', 'country of citizenship', 'capital of'}
All Unseen Relation Label with Count: Counter({'given name': 761, 'director / manager': 128, 'capital of': 102, 'country of citizenship': 41, 'operator': 8})
Predicted Unseen Relation Label with Count: Counter({'country of citizenship': 549, 'director / manager': 490, 'capital of': 1})
Total Instance that Has Unseen Label 1040
Total Instance that Matching 118
macro_f1_score 0.38794702306426976
micro_f1_score 0.11346153846153845
weighted_f1_score 0.039772249718357994
macro_precision 0.41051262034868596
micro_precision 0.11346153846153846
weighted_precision 0.12154551732420585
macro_recall 0.3677329925872788
micro_recall 0.11346153846153846
weighted_recall 0.11346153846153846
acc 0.11346153846153846


In [135]:
print(len(set(the_predicted)))
print(len(set(the_actual)))
print(unseen_relation_labels)
print(set(the_predicted))
print(Counter(the_actual))
print(Counter(the_predicted))

print("macro_f1_score", macro_f1_score)
print("micro_f1_score", micro_f1_score)
print("weighted_f1_score", weighted_f1_score)
print("macro_precision", macro_precision)
print("micro_precision", micro_precision)
print("weighted_precision", weighted_precision)
print("macro_recall", macro_recall)
print("micro_recall", micro_recall)
print("weighted_recall", weighted_recall)
print("acc", acc)

4
15
['investor', 'ethnic group', 'league', 'mother', 'diplomatic relation', 'part of', 'organizer', 'operator', 'spouse', 'child', 'coach of sports team', 'product or material produced', 'founded by', 'manufacturer', 'sibling']
{'coach of sports team', 'product or material produced', 'diplomatic relation', 'organizer'}
Counter({'ethnic group': 19, 'organizer': 19, 'coach of sports team': 19, 'part of': 19, 'child': 19, 'manufacturer': 18, 'diplomatic relation': 17, 'spouse': 12, 'league': 12, 'mother': 11, 'founded by': 10, 'sibling': 9, 'product or material produced': 8, 'investor': 8, 'operator': 8})
Counter({'coach of sports team': 154, 'product or material produced': 32, 'diplomatic relation': 21, 'organizer': 1})
macro_f1_score 0.08394179089341852
micro_f1_score 0.18269230769230768
weighted_f1_score 0.09949261075097704
macro_precision 0.12876082251082252
micro_precision 0.18269230769230768
weighted_precision 0.164601023976024
macro_recall 0.15399896800825594
micro_recall 0.182692

In [136]:
print(classification_report(the_actual_mapped_to_int, the_predicted_mapped_to_int))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00        19
           2       0.00      0.00      0.00        12
           3       0.00      0.00      0.00        11
           4       0.71      0.88      0.79        17
           5       0.00      0.00      0.00        19
           6       1.00      0.05      0.10        19
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00        19
          10       0.12      1.00      0.22        19
          11       0.09      0.38      0.15         8
          12       0.00      0.00      0.00        10
          13       0.00      0.00      0.00        18
          14       0.00      0.00      0.00         9

    accuracy                           0.18       208
   macro avg       0.13      0.15      0.08       208
weighted avg       0.16   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [137]:
eval.compute_macro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))

(0.48285308441558444, 0.16499889429455994, 0.2459519385706299)

In [138]:
eval.compute_micro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))

(0.18269230769230768, 0.18269230769230768, 0.18269230769230765)