# Summary evaluation using knowledge graphs

In [None]:
# !pip install sentence_transformers
import json
from sentence_transformers import SentenceTransformer, util


## Load the triplets 

In [3]:

with open("../../data/reference_output/context_triplets.txt", "r") as fp:
    ref_triplets = json.load(fp)

In [5]:
ref_triplets[:2]

[{'label': 'Launched', 'from': 'City council', 'to': 'Green initiative'},
 {'label': 'Aimed at',
  'from': 'Green initiative',
  'to': 'Reducing air pollution'}]

In [6]:
with open("../../data/summary_output/summary_triplets2.txt", "r") as fp:
    summary_triplets = json.load(fp)

In [7]:
summary_triplets[:2]

[{'label': 'Announced', 'from': 'City council', 'to': 'Green initiative'},
 {'label': 'Designed to tackle',
  'from': 'Green initiative',
  'to': 'Air pollution'}]

### Get tuples from json

In [18]:
def json_unravel(triplets_json):
    list_of_list = []
    for each_triplet in triplets_json:
        list_of_list.append([each_triplet['from'], each_triplet['label'], each_triplet['to']])
    return list_of_list
    

In [19]:
ref_triplet_tuple = json_unravel(ref_triplets)
summary_triplet_tuple = json_unravel(summary_triplets)

In [20]:
ref_triplet_tuple[:2]

[['City council', 'Launched', 'Green initiative'],
 ['Green initiative', 'Aimed at', 'Reducing air pollution']]

In [21]:
summary_triplet_tuple[:2]

[['City council', 'Announced', 'Green initiative'],
 ['Green initiative', 'Designed to tackle', 'Air pollution']]

###### NOTE
1. The entities may not be matching exactly but be synonym of each other
2. The entities may not be ordered in the same order 

## Compute Similarity

##### There are different ways to compute the similarity between the tripletse or evaluate the summary against the reference:
1. Take the triplet and join it with a delimiter like space or newline and compare the resulting two sentences
2. Compare each entity of thetriplet and take decision accordingly

**We will focus on the first approach here**

In [11]:
sentence_sim_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

  from tqdm.autonotebook import tqdm, trange


In [33]:
def context_summary_similarity(summary, context, model):
    embeddings = {}
    scores = []
    min_scores = []
    for summary_sent in summary:
        modified_sent = " ".join(summary_sent)
        # print(modified_sent)
        summary_embedding= model.encode(modified_sent, convert_to_tensor=True)
        min_score = 0.0
        # Get the best matching context triplet for the current summary triplet
        for context_index, context_sent in enumerate(context):
            #Compute embedding for both lists
            # print(" ".join(context_sent))
            # WE retrieve the already computed embedding if it exists in the embeddings dictionary
            ref_embedding = embeddings.get(context_index, 
                                         model.encode(" ".join(context_sent), convert_to_tensor=True))
            embeddings[context_index] =  ref_embedding
            score = util.pytorch_cos_sim(summary_embedding, ref_embedding)

            if min_score < score:
                min_score = score
                min_index = context_index

        min_scores.append({"context": context[min_index],
                           "summary": summary_sent,
                           "similarity": min_score})
    return min_scores

In [34]:
# This is a slow call becaues of m x n calls to the similarity computing transformer
summary_triplet_best_matches = context_summary_similarity(summary_triplet_tuple, 
                                       ref_triplet_tuple, 
                                       sentence_sim_model)

In [35]:
summary_triplet_best_matches[:5]

[{'context': ['City council', 'Launched', 'Green initiative'],
  'summary': ['City council', 'Announced', 'Green initiative'],
  'similarity': tensor([[0.9711]], device='mps:0')},
 {'context': ['Green initiative', 'Aimed at', 'Reducing air pollution'],
  'summary': ['Green initiative', 'Designed to tackle', 'Air pollution'],
  'similarity': tensor([[0.9728]], device='mps:0')},
 {'context': ['Green initiative',
   'Aimed at',
   'Promoting environmental sustainability'],
  'summary': ['Green initiative',
   'Designed to enhance',
   'Environmental sustainability'],
  'similarity': tensor([[0.9680]], device='mps:0')},
 {'context': ['Initiative', 'Includes', 'Creating bike lanes'],
  'summary': ['Plan', 'Involves', 'Expanding bike lanes'],
  'similarity': tensor([[0.7210]], device='mps:0')},
 {'context': ['Initiative', 'Includes', 'Increasing charging stations'],
  'summary': ['Plan',
   'Involves',
   'Boosting availability of electric vehicle charging stations'],
  'similarity': tensor(

In [45]:
for triplet_matched in summary_triplet_best_matches:
    hallucinated = False
    for each_entity in triplet_matched['summary']:
        if 'Controversial' in each_entity:
            hallucinated = True
    
    if hallucinated:
        print("Matched triplets: ")
        print("Summary triplet: {}".format(triplet_matched['summary'])) 
        print("Reference triplet: {}".format(triplet_matched['context']))
        print("Matching score: {}".format(triplet_matched['similarity'][0].tolist()[0]))
        print()
    
    

Matched triplets: 
Summary triplet: ['Initiative', 'Includes', 'Controversial plan to reduce public transportation services']
Reference triplet: ['Initiative', 'Includes', 'Increasing charging stations']
Matching score: 0.5326859951019287

Matched triplets: 
Summary triplet: ['Initiative', 'Includes', 'Controversial plan']
Reference triplet: ['Initiative', 'Includes', 'Planting trees']
Matching score: 0.5433995723724365

Matched triplets: 
Summary triplet: ['Controversial plan to reduce public transportation services', 'Raised concerns among', 'Residents']
Reference triplet: ['City council', 'Launched', 'Green initiative']
Matching score: 0.391315758228302



In [46]:
for triplet_matched in summary_triplet_best_matches:
    hallucinated = False
    for each_entity in triplet_matched['summary']:
        if 'Controversial' in each_entity:
            hallucinated = True
    
    if not hallucinated:
        print("Matched triplets: ")
        print("Summary triplet: {}".format(triplet_matched['summary'])) 
        print("Reference triplet: {}".format(triplet_matched['context']))
        print("Matching score: {}".format(triplet_matched['similarity'][0].tolist()[0]))
        print()
    
    

Matched triplets: 
Summary triplet: ['City council', 'Announced', 'Green initiative']
Reference triplet: ['City council', 'Launched', 'Green initiative']
Matching score: 0.971133828163147

Matched triplets: 
Summary triplet: ['Green initiative', 'Designed to tackle', 'Air pollution']
Reference triplet: ['Green initiative', 'Aimed at', 'Reducing air pollution']
Matching score: 0.9727627038955688

Matched triplets: 
Summary triplet: ['Green initiative', 'Designed to enhance', 'Environmental sustainability']
Reference triplet: ['Green initiative', 'Aimed at', 'Promoting environmental sustainability']
Matching score: 0.9679796099662781

Matched triplets: 
Summary triplet: ['Plan', 'Involves', 'Expanding bike lanes']
Reference triplet: ['Initiative', 'Includes', 'Creating bike lanes']
Matching score: 0.7209548950195312

Matched triplets: 
Summary triplet: ['Plan', 'Involves', 'Boosting availability of electric vehicle charging stations']
Reference triplet: ['Initiative', 'Includes', 'Increa

##### We showed above very curde way of comparing two triplets. 
Definitely this is not the best and the only way to compare two triplets. 
Ideally, one should do the entity to entity match but this also highlights the challenges.   