In [2]:
!pip install bert_score
!pip install rouge-score
!pip install evaluate
!pip install sentence_transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 KB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers>=3.0.0
  Downloading transformers-4.26.1-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.2-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.2/199.2 KB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m93.4 MB/s[0m eta [36m0:00:0

In [3]:
import pandas as pd
from evaluate import load
from sentence_transformers import SentenceTransformer, util

model_sentence_transformers = SentenceTransformer('all-MiniLM-L6-v2')
rouge = load('rouge')
bertscore = load("bertscore")

Downloading (…)e9125/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)7e55de9125/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)55de9125/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)125/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading (…)e9125/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

Downloading (…)9125/train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

Downloading (…)7e55de9125/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)5de9125/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.95k [00:00<?, ?B/s]

## Load Test Data to read dialogues

In [34]:
test = pd.read_csv('TaskA-ValidationSet.csv')
gt = test['section_text'].tolist()
dia = test['dialogue'].str.lower().tolist()

# Reading Task-A Summaries generated from different models

In [5]:
import pickle
pred1 = pickle.load(open('val_section_prediction_m3.pkl', 'rb')) #gpt3 summary
pred2 = pickle.load(open('val_section_prediction_m6_1.pkl', 'rb')) #bart-large summary
pred3 = pickle.load(open('val_section_prediction_m6_2.pkl', 'rb')) #bart-large summary
pred4 = pickle.load(open('val_section_prediction_m4.pkl', 'rb')) #t5-base summary

# Ensemble Task-A summaries

In [31]:
import numpy as np

def similarity(s1, s2):
  embeddings1 = model_sentence_transformers.encode(s1, convert_to_tensor=True)
  embeddings2 = model_sentence_transformers.encode(s2, convert_to_tensor=True)
  cosine_scores = util.cos_sim(embeddings1, embeddings2)
  return float(cosine_scores[0][0])

def select_one(all_preds):
  C={}
  for i in range(len(all_preds)):
    sim=[]
    for j in range(len(all_preds)):
      if i!=j:
        sim.append(similarity(all_preds[i], all_preds[j]))
    C[all_preds[i]] = np.mean(sim)
  return C

def jaccard_sim(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    union = (len(set(list1)) + len(set(list2))) - intersection
    return float(intersection) / union

def select_two(all_preds, m):
  score2 = {}
  for i in all_preds:
    score = jaccard_sim(i.lower().split(),m.split())
    score2[i] = score
  return score2

def blend_scores(score1, score2):
  final_scores = {}
  for k,v in score1.items():
    final_scores[k] = 0.5*v + 0.5*score2[k]
  resp = sorted(final_scores.items(), key=lambda item: item[1], reverse=True)
  return resp

def ensemble(pred1, pred2, pred3, pred4, dia):
  prediction = []
  for i,j,k,l,m in zip(pred1, pred2, pred3, pred4, dia):
    all_preds = [i,j,k,l]
    score1 = select_one(all_preds)
    score2 = select_two(all_preds, m.lower())
    final_scores = blend_scores(score1, score2)
    prediction.append(final_scores[0][0])
  return prediction

final_summary = ensemble(pred1, pred2, pred3, pred4, dia)

In [38]:
test_predictions_with_ids = []
for i,j in zip(test['ID'].tolist(), final_summary):
  test_predictions_with_ids.append([i,j])

In [39]:
test_df = pd.DataFrame(test_predictions_with_ids, columns = ['ID', 'SystemOutput2'])
test_df.to_csv('final_taskA_summarization_output.csv', index=False)