# Similarity based Prediction

In [1]:
import time
import json
import torch
import random
import numpy as np
import evaluation as eval
from neo4j import GraphDatabase
from collections import Counter
from pykeen.models import TransR  # Assuming TransR is used, adjust if needed
from typing import List, Tuple, Union
from pykeen.datasets import get_dataset
from pykeen.triples import TriplesFactory
from transformers import BertModel, BertTokenizer
from sklearn.metrics.pairwise import cosine_similarity
from entity_pair_refinement import EntityPairRefinement
from pykeen.predict import predict_triples, predict_target, predict_all
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, auc, roc_curve, roc_auc_score, classification_report, confusion_matrix

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


# Inference

In [2]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [44]:
with open("./DocRED_Results/only_descriptions_and_hypernyms.json", "r") as f:
    data = json.load(f)

In [45]:
unseen_relation_labels = ['member of sports team', 'date of birth', 'publication date', 'headquarters location', 'continent', 'unemployment rate', 'subsidiary', 'residence', 'mouth of the watercourse', 'child', 'participant of', 'languages spoken, written or signed', 'spouse', 'member of', 'developer']

In [46]:
len(data)

3051

In [47]:
data[0]

{'id': 1,
 'tokenized_sent': [['The',
   'short',
   '-',
   'beaked',
   'common',
   'dolphin',
   '(',
   'Delphinus',
   'delphis',
   ')',
   'is',
   'a',
   'species',
   'of',
   'common',
   'dolphin',
   '.'],
  ['It',
   'has',
   'a',
   'larger',
   'range',
   'than',
   'the',
   'long',
   '-',
   'beaked',
   'common',
   'dolphin',
   '(',
   'D.',
   'capensis',
   ')',
   ',',
   'occurring',
   'throughout',
   'warm',
   '-',
   'temperate',
   'and',
   'tropical',
   'oceans',
   ',',
   'including',
   'the',
   'Indian',
   'Ocean',
   'although',
   'in',
   'smaller',
   'quantities',
   'than',
   'other',
   'places',
   'they',
   'are',
   'found',
   '.'],
  ['There',
   'are',
   'more',
   'short',
   '-',
   'beaked',
   'common',
   'dolphins',
   'than',
   'any',
   'other',
   'dolphin',
   'species',
   'in',
   'the',
   'warm',
   '-',
   'temperate',
   'portions',
   'of',
   'the',
   'Atlantic',
   'and',
   'Pacific',
   'Oceans',
   '.']

In [None]:
unseen_relation_labels

In [None]:
data[0].keys()

In [None]:
def find_entity_sentence(entity, tokenized_document):
    # Convert entity to lowercase for case-insensitive search
    entity_lower = entity.lower()

    # Loop through the sentences to check for the entity
    for idx, sentence in enumerate(tokenized_document):
        if any(entity_lower in word.lower() for word in sentence):  # Check if entity exists in sentence
            return idx + 1  # Return the sentence number and break the loop

    return 0  # Return None if entity is not found in any sentence

In [None]:
data_to_test = []
rel_count = 0
for item in data:
    if item['relation_triplet']:
        for rel in item['relation_triplet']:
            rel_count=rel_count+1
            if rel["relation"].lower() in unseen_relation_labels and rel['relation'].lower()!="no_relation":
                try:
                    head_mention = rel["head"]
                    tail_mention = rel["tail"]
                    head_side_information = [ent_s for ent_s in item['unique_entity_side_information'] if ent_s['entity_mention'].lower()==head_mention.lower()][0]
                    tail_side_information = [ent_s for ent_s in item['unique_entity_side_information'] if ent_s['entity_mention'].lower()==tail_mention.lower()][0]
                    loc_head_mention = find_entity_sentence(head_mention, item['tokenized_sent'])
                    loc_tail_mention = find_entity_sentence(tail_mention, item['tokenized_sent'])
                    data_to_test.append(
                        {
                            "head_side_information": head_side_information,
                            "tail_side_information": tail_side_information,
                            "relation": rel["relation"],
                            "in_which_sentence_and_total_sentence": [loc_head_mention, loc_tail_mention, len(item['tokenized_sent'])]
                        }
                    )
                except:
                    print(f"Issue in ID: {item['id']}")

In [None]:
# Load the pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertModel.from_pretrained('bert-base-uncased')

In [41]:
def generate_embedding(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
    return embeddings

def cosine_similarity(embedding_a, embedding_b):
    return np.dot(embedding_a, embedding_b) / (np.linalg.norm(embedding_a) * np.linalg.norm(embedding_b))

In [None]:
def calculate_confidence(similarities):
    """
    Calculate confidence score based on consistency of different similarity measures
    """
    similarities = np.array(similarities)
    
    # Calculate mean and standard deviation
    mean_sim = np.mean(similarities)
    std_sim = np.std(similarities)
    
    # Higher confidence if similarities are consistent (low std dev) and high mean
    consistency = 1 / (1 + std_sim)  # Inverse of standard deviation
    confidence = (mean_sim + consistency) / 2
    
    return confidence

In [43]:
def predict_unseen_relation(head, tail, model, tokenizer, unseen_labels):
    """
    Enhanced unseen relation prediction with dynamic weighting and semantic role analysis
    """

    combined_description = head['description'] + "<SEP>" + tail['description']
    head_hypernym = head['hypernym_llm']
    tail_hypernym = tail['hypernym_llm']
    head_type = head['entity_type']
    tail_type = tail['entity_type']
    
    unseen_scores = []
    
    # Generate base embeddings
    description_embedding = generate_embedding(combined_description, model, tokenizer)
    head_hypernym_embedding = generate_embedding(head_hypernym, model, tokenizer)
    tail_hypernym_embedding = generate_embedding(tail_hypernym, model, tokenizer)
    head_type_embedding = generate_embedding(head_type, model, tokenizer)
    tail_type_embedding = generate_embedding(tail_type, model, tokenizer)
    
    # Generate semantic role embeddings for head and tail
    head_role = f"{head_type} acting as a subject, described as {head_hypernym}"
    tail_role = f"{tail_type} acting as a subject, described as {tail_hypernym}"
    head_role_embedding = generate_embedding(head_role, model, tokenizer)
    tail_role_embedding = generate_embedding(tail_role, model, tokenizer)
    
    # Create context vector from description
    context_embedding = generate_embedding(f"Relation between {head_type} and {tail_type}", model, tokenizer)
    
    # Compare with each unseen relation label
    for label in unseen_labels:
        # Generate embeddings for label and its variations
        label_embedding = generate_embedding(label, model, tokenizer)
        label_context = generate_embedding(f"relation {label} between {head_type} and {tail_type}", model, tokenizer)
        
        # Calculate base similarities
        description_similarity = cosine_similarity(label_embedding.numpy(), description_embedding.numpy())
        head_hypernym_similarity = cosine_similarity(label_embedding.numpy(), head_hypernym_embedding.numpy())
        tail_hypernym_similarity = cosine_similarity(label_embedding.numpy(), tail_hypernym_embedding.numpy())
        head_type_similarity = cosine_similarity(label_embedding.numpy(), head_type_embedding.numpy())
        tail_type_similarity = cosine_similarity(label_embedding.numpy(), tail_type_embedding.numpy())
        
        # Calculate semantic role similarities
        head_role_similarity = cosine_similarity(label_context.numpy(), head_role_embedding.numpy())
        tail_role_similarity = cosine_similarity(label_context.numpy(), tail_role_embedding.numpy())
        
        # Calculate context similarity
        context_similarity = cosine_similarity(label_embedding.numpy(), context_embedding.numpy())
        
        # Dynamic weight calculation based on context similarity
        context_weight = (1 + context_similarity) / 2  # Normalize to 0.5-1.0 range
        
        # Calculate semantic compatibility
        semantic_score = (head_role_similarity + tail_role_similarity) / 2  #Because both Head Role and Tail Role Contribute (On telling which Sub or Obj)
        
        # Calculate confidence based on consistency
        confidence = calculate_confidence([
            description_similarity,
            head_hypernym_similarity,
            tail_hypernym_similarity,
            head_type_similarity,
            tail_type_similarity,
            head_role_similarity,
            tail_role_similarity
        ])
        
        # Final weighted score with dynamic weighting
        total_score = (
            (0.4 * description_similarity) +
            (0.1 * head_hypernym_similarity) +
            (0.1 * tail_hypernym_similarity) +
            (0.1 * head_type_similarity) +
            (0.1 * tail_type_similarity) +
            (0.1 * semantic_score) +
            (0.1 * context_similarity)
        ) * confidence
        
        unseen_scores.append((label, total_score))
    
    # Sort by total score and return top predictions with confidence
    sorted_predictions = sorted(unseen_scores, key=lambda x: x[1], reverse=True)
    return sorted_predictions

### Run Data to Identify Unseen Label

In [38]:
%%time

matching = []
not_matching = []
the_predicted = []
the_actual = []
print(len(data_to_test))
start_time = time.time()
for idx in range(len(data_to_test)):
    try:
        item = data_to_test[idx]
        head = item['head_side_information']
        tail = item['tail_side_information']
    
        unseen_relation = predict_unseen_relation(
            head, tail, bert_model, tokenizer, unseen_relation_labels
        )
        
        # print(f"Unseen relation predicted for {head} -> {tail}: {unseen_relation[0]} with score {unseen_relation[1]}")
        if unseen_relation[0][0] == item['relation']:
            print(f"Matching: {idx}")
            matching.append({"data": item, "unseen_relation_with_score": unseen_relation})
        else:
            not_matching.append({"data": item, "unseen_relation_with_score": unseen_relation})
        the_predicted.append(unseen_relation[0])
        the_actual.append(item['relation'])
    except:
        print(f"Failure: {idx}")

1918
Matching: 10
Matching: 13
Matching: 41
Matching: 59
Matching: 72
Matching: 84
Matching: 85
Matching: 86
Matching: 87
Matching: 88
Matching: 89
Matching: 97
Matching: 98
Matching: 145
Matching: 151
Matching: 177
Matching: 178
Matching: 191
Matching: 194
Matching: 204
Matching: 205
Matching: 214
Matching: 215
Matching: 216
Matching: 217
Matching: 218
Matching: 219
Matching: 220
Matching: 221
Matching: 222
Matching: 223
Matching: 224
Matching: 225
Matching: 226
Matching: 227
Matching: 228
Matching: 229
Matching: 230
Matching: 253
Matching: 261
Matching: 262
Matching: 284
Matching: 285
Matching: 289
Matching: 292
Matching: 293
Matching: 294
Matching: 295
Matching: 296
Matching: 297
Matching: 300
Matching: 304
Matching: 306
Matching: 309
Matching: 313
Matching: 318
Matching: 321
Matching: 323
Matching: 324
Matching: 325
Matching: 327
Matching: 328
Matching: 353
Matching: 380
Matching: 381
Matching: 397
Matching: 411
Matching: 412
Matching: 414
Matching: 421
Matching: 422
Matching: 423


# Evaluation

In [494]:
unseen_relation_labels_to_int = {unseen_relation_labels[index]:index for index in range(len(unseen_relation_labels))}

In [495]:
the_predicted_mapped_to_int = [unseen_relation_labels_to_int[item[0]] for item in the_predicted]
the_actual_mapped_to_int = [unseen_relation_labels_to_int[item] for item in the_actual]

In [503]:
print("Matched Unseen Relation Label Count:", len(set([item[0] for item in the_predicted])))
print("All Unseen Relation Label Count:",len(set(the_actual)))
print("All Unseen Relation Label:", unseen_relation_labels)
print("Matched Unseen Relation Label:", set([item[0] for item in the_predicted]))
print("All Unseen Relation Label with Count:", Counter(the_actual))
print("Predicted Unseen Relation Label with Count:", Counter([item[0] for item in the_predicted]))
print("Total Instance that Has Unseen Label", len(data_to_test))
print("Total Instance that Matching", len(matching))

micro_precision,micro_recall,micro_f1_score = eval.compute_micro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))
macro_precision,macro_recall,macro_f1_score = eval.compute_macro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))


print("macro_f1_score", macro_f1_score)
print("micro_f1_score", micro_f1_score)
print("weighted_f1_score", weighted_f1_score)
print("macro_precision", macro_precision)
print("micro_precision", micro_precision)
print("weighted_precision", weighted_precision)
print("macro_recall", macro_recall)
print("micro_recall", micro_recall)
print("weighted_recall", weighted_recall)
print("acc", acc)

Matched Unseen Relation Label Count: 5
All Unseen Relation Label Count: 5
All Unseen Relation Label: ['narrative location', 'continent', 'capital', 'position held', 'part of']
Matched Unseen Relation Label: {'narrative location', 'capital', 'position held', 'continent', 'part of'}
All Unseen Relation Label with Count: Counter({'part of': 456, 'continent': 189, 'capital': 31, 'narrative location': 20, 'position held': 11})
Predicted Unseen Relation Label with Count: Counter({'narrative location': 309, 'part of': 165, 'continent': 112, 'position held': 79, 'capital': 42})
Total Instance that Has Unseen Label 707
Total Instance that Matching 253
macro_f1_score 0.43098852287939277
micro_f1_score 0.3578500707213578
weighted_f1_score 0.4401075278862871
macro_precision 0.4042699599978081
micro_precision 0.3578500707213578
weighted_precision 0.7200766059587451
macro_recall 0.46148872507106803
micro_recall 0.3578500707213578
weighted_recall 0.3578500707213578
acc 0.3578500707213578


In [135]:
print(len(set(the_predicted)))
print(len(set(the_actual)))
print(unseen_relation_labels)
print(set(the_predicted))
print(Counter(the_actual))
print(Counter(the_predicted))

print("macro_f1_score", macro_f1_score)
print("micro_f1_score", micro_f1_score)
print("weighted_f1_score", weighted_f1_score)
print("macro_precision", macro_precision)
print("micro_precision", micro_precision)
print("weighted_precision", weighted_precision)
print("macro_recall", macro_recall)
print("micro_recall", micro_recall)
print("weighted_recall", weighted_recall)
print("acc", acc)

4
15
['investor', 'ethnic group', 'league', 'mother', 'diplomatic relation', 'part of', 'organizer', 'operator', 'spouse', 'child', 'coach of sports team', 'product or material produced', 'founded by', 'manufacturer', 'sibling']
{'coach of sports team', 'product or material produced', 'diplomatic relation', 'organizer'}
Counter({'ethnic group': 19, 'organizer': 19, 'coach of sports team': 19, 'part of': 19, 'child': 19, 'manufacturer': 18, 'diplomatic relation': 17, 'spouse': 12, 'league': 12, 'mother': 11, 'founded by': 10, 'sibling': 9, 'product or material produced': 8, 'investor': 8, 'operator': 8})
Counter({'coach of sports team': 154, 'product or material produced': 32, 'diplomatic relation': 21, 'organizer': 1})
macro_f1_score 0.08394179089341852
micro_f1_score 0.18269230769230768
weighted_f1_score 0.09949261075097704
macro_precision 0.12876082251082252
micro_precision 0.18269230769230768
weighted_precision 0.164601023976024
macro_recall 0.15399896800825594
micro_recall 0.182692

In [136]:
print(classification_report(the_actual_mapped_to_int, the_predicted_mapped_to_int))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.00      0.00      0.00        19
           2       0.00      0.00      0.00        12
           3       0.00      0.00      0.00        11
           4       0.71      0.88      0.79        17
           5       0.00      0.00      0.00        19
           6       1.00      0.05      0.10        19
           7       0.00      0.00      0.00         8
           8       0.00      0.00      0.00        12
           9       0.00      0.00      0.00        19
          10       0.12      1.00      0.22        19
          11       0.09      0.38      0.15         8
          12       0.00      0.00      0.00        10
          13       0.00      0.00      0.00        18
          14       0.00      0.00      0.00         9

    accuracy                           0.18       208
   macro avg       0.13      0.15      0.08       208
weighted avg       0.16   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [137]:
eval.compute_macro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))

(0.48285308441558444, 0.16499889429455994, 0.2459519385706299)

In [138]:
eval.compute_micro_PRF(np.asarray(the_predicted_mapped_to_int, dtype=np.float32), np.asarray(the_actual_mapped_to_int, dtype=np.float32))

(0.18269230769230768, 0.18269230769230768, 0.18269230769230765)