In [None]:
# Allowed to make changes.
!pip install rank_bm25 -q
!pip install beir -q
!pip install tensorflow_text -q
!pip install optimum[intel] -q
!pip install wget -q
!pip install evaluate -q
!python -m pip install optimum[neural-compressor] -q
!python -m pip install optimum[openvino,nncf] -q

import numpy as np
import logging
import pathlib, os
import torch
import pickle
import gc
import sys
import warnings
import wget

from beir import util, LoggingHandler
from beir.retrieval import models
from beir.datasets.data_loader import GenericDataLoader
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES

from beir.retrieval import models
from beir.retrieval.evaluation import EvaluateRetrieval
from beir.retrieval.search.dense import DenseRetrievalExactSearch as DRES
warnings.simplefilter(action='ignore', category=FutureWarning)

from rank_bm25 import BM25Okapi
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
from optimum.onnxruntime import ORTModelForQuestionAnswering
import collections
import json
import pandas as pd
import re
import string
import timeit
from ast import literal_eval

In [None]:
# Allowed to make changes.

# Pre-processing cell. You can use this cell to pre-process input data or load
# your models.

# loading domain adapted reader model trained in the provided training notebook

name = 'devpranjal/deberta-v3-base-domain-adapted'
model = AutoModelForQuestionAnswering.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)

global_model = pipeline("question-answering", model=model, tokenizer=tokenizer, handle_impossible_answer=True)
# Downloading pargraphs.csv
download = wget.download('https://drive.google.com/uc?export=download&id=1GIK2DnacjH5Ddj2QEaCLApvxXayfp9IG')
paragraph_ds = pd.read_csv('paragraphs.csv')

# Providing corpus in beir format themewise
corpus = {}

# Dictionary containing all the pargraphs in a theme as a list
paragraphs_by_theme = {}

# Theme specific bm25 retriever
bm25_theme = {}


for theme in paragraph_ds['theme'].unique():
    paragraphs_by_theme[theme] = paragraph_ds[paragraph_ds['theme']==theme]['paragraph'].tolist()
    temp = paragraph_ds[paragraph_ds['theme']==theme]
    d = {}
    for i in temp.index:
        d[temp.loc[i,'id']] = {
                'title' : temp.loc[i,'theme'],
                'text' : temp.loc[i,'paragraph']
            }
    corpus[theme] = d

    paragraphs = paragraphs_by_theme[theme]
    tokenized_corpus = [doc.split(" ") for doc in paragraphs]
    bm25 = BM25Okapi(tokenized_corpus)

    bm25_theme[theme] = bm25

# Dense retriever Model
model = DRES(models.SentenceBERT("multi-qa-mpnet-base-dot-v1"), batch_size=16)
retriever = EvaluateRetrieval(model, score_function="dot")

download = wget.download('https://drive.google.com/uc?export=download&id=1R2OLKRqkgD5TgkCZwuXnCe-jfrhXlTFi')
download = wget.download('https://drive.google.com/u/0/uc?id=1aYgNN3u2Vw0q14YcbPuGfYcG0jK-xoKB&export=download')

In [None]:
# Allowed to make changes.
def get_theme_model(theme):
# Returns global model defined earlier
  return global_model

In [None]:
# Allowed to make changes.
def pred_theme_ans(questions, theme_model, pred_out):
  theme = questions[0]["theme"]
  # Theme wise corpus in BEIR format
  c = corpus[theme]

  # tokenized_corpus = [doc.split(" ") for doc in paragraphs]
  # bm25 = BM25Okapi(tokenized_corpus)
  bm25 = bm25_theme[theme]
  queries = {}
  bm25_results = {}

  # Retrieves BM25 of top 5 contexts along with scores for each question 
  for q in questions:
    tokenized_query = q['question'].split(" ")
    doc_scores = bm25.get_scores(tokenized_query)
    l = sorted(range(len(doc_scores)), key=lambda i: doc_scores[i])[-5:]
    id = q['id']
    queries[id] = q['question']
    d = {}
    for v in l:
      d[str(v)] = doc_scores[v]
    bm25_results[id] = d

  max_id = {}

  # Returns top 5 contexts with combined scores of BM25 and Dense Retriever
  results = retriever.retrieve(c, queries, bm25_result = bm25_results, k_values = [5])
  for id in results.keys():
        # Returns top 1 context
        max_id[id] = max(zip(results[id].values(), results[id].keys()))[1] 
  for q in questions:
      id = q['id']
      context = c[max_id[id]]['text']

      # Passing question and retrived context to our reader model
      res = theme_model(question=q['question'], context=context)
      paragraph_id = paragraph_ds[paragraph_ds['paragraph']==context]['id'].iloc[0]
      ans = {}
      ans['question_id'] = id
      
      # Saving outputs in required format
      if res['score'] < 0.01 or res['answer']=="":
        ans["paragraph_id"] = -1
        ans["answers"] = ""
      else:
        ans['paragraph_id'] = paragraph_id
        ans['answers'] = res['answer']
      pred_out.append(ans)

In [None]:
# NOT allowed to make changes. 

# All theme prediction.
questions = json.loads(pd.read_csv("sample_input_question_1.csv").to_json(orient="records"))
theme_intervals = json.loads(pd.read_csv("sample_theme_interval_1.csv").to_json(orient="records"))
pred_out = []
theme_inf_time = {}
for theme_interval in theme_intervals:
  theme_ques = questions[int(theme_interval["start"]) - 1: int(theme_interval["end"])]
  theme = theme_ques[0]["theme"]
  # Load model fine-tuned for this theme.
  theme_model = get_theme_model(theme)
  execution_time = timeit.timeit(lambda: pred_theme_ans(theme_ques, theme_model, pred_out), number=1)
  theme_inf_time[theme_interval["theme"]] = execution_time * 1000 # in milliseconds.
  print (theme_inf_time)
pred_df = pd.DataFrame.from_records(pred_out)
pred_df.fillna(value='', inplace=True)
# Write prediction to a CSV file. Teams are required to submit this csv file.
pred_df.to_csv('sample_output_prediction.csv', index=False)

In [None]:
# NOT allowed to make changes. 

def normalize_answer(s):
  """Lower text and remove punctuation, articles and extra whitespace."""
  def remove_articles(text):
    regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
    return re.sub(regex, ' ', text)
  def white_space_fix(text):
    return ' '.join(text.split())
  def remove_punc(text):
    exclude = set(string.punctuation)
    return ''.join(ch for ch in text if ch not in exclude)
  def lower(text):
    return text.lower()
  return white_space_fix(remove_articles(remove_punc(lower(s))))

def get_tokens(s):
  if not s: return []
  return normalize_answer(s).split()

def calc_f1(a_gold, a_pred):
  gold_toks = get_tokens(a_gold)
  pred_toks = get_tokens(a_pred)
  common = collections.Counter(gold_toks) & collections.Counter(pred_toks)
  num_same = sum(common.values())
  if len(gold_toks) == 0 or len(pred_toks) == 0:
    # If either is no-answer, then F1 is 1 if they agree, 0 otherwise
    return int(gold_toks == pred_toks)
  if num_same == 0:
    return 0
  precision = 1.0 * num_same / len(pred_toks)
  recall = 1.0 * num_same / len(gold_toks)
  f1 = (2 * precision * recall) / (precision + recall)
  return f1

def calc_max_f1(predicted, ground_truths):
  max_f1 = 0
  if len(ground_truths) == 0:
    return len(predicted) == 0
  for ground_truth in ground_truths:
    f1 = calc_f1(predicted, ground_truth)
    max_f1 = max(max_f1, f1)
  return max_f1

In [None]:
# NOT allowed to make changes. 

# Evaluation methodology.
metrics = {}
pred = pd.read_csv("sample_output_prediction.csv")
pred.fillna(value='', inplace=True)
truth = pd.read_csv("sample_ground_truth.csv")
truth.fillna(value='', inplace=True)
truth.paragraph_id = truth.paragraph_id.apply(literal_eval)
truth.answers = truth.answers.apply(literal_eval)
questions = pd.read_csv("sample_input_question_1.csv")
for idx in pred.index:
  q_id = pred["question_id"][idx]
  q_rows = questions.loc[questions['id'] == q_id].iloc[-1]
  theme = q_rows["theme"]
  predicted_paragraph = pred["paragraph_id"][idx]
  predicted_ans = pred["answers"][idx]
  
  if theme not in metrics.keys():
    metrics[theme] = {"true_positive": 0, "true_negative": 0, "total_predictions": 0, "f1_sum": 0}

  truth_row = truth.loc[truth['question_id'] == q_id].iloc[-1]
  truth_paragraph_id = [ int(i) for i in truth_row["paragraph_id"] ]
  if predicted_paragraph in truth_paragraph_id:
    # Increase TP for that theme.
    metrics[theme]["true_positive"] = metrics[theme]["true_positive"] + 1
  # -1 prediction in case there is no paragraph which can answer the query.
  if predicted_paragraph == -1 and truth_row["paragraph_id"] == []:
    # Increase TN.
    metrics[theme]["true_negative"] = metrics[theme]["true_negative"] + 1
  # Increase total predictions for that theme.
  metrics[theme]["total_predictions"] = metrics[theme]["total_predictions"] + 1
  f1 = calc_max_f1(predicted_ans, truth_row["answers"])
  metrics[theme]["f1_sum"] = metrics[theme]["f1_sum"] + f1

In [None]:
# NOT allowed to make changes.

# Final score.
inf_time_threshold = 1000.0 # milliseconds.
final_para_score = 0.0
final_qa_score = 0.0
# Weight would stay hidden from teams.
theme_weights = {}
for theme in paragraph_ds['theme'].unique():
  theme_weights[theme] = 1/len(paragraph_ds['theme'].unique())
for theme in metrics:
  inf_time_score = 1.0
  metric = metrics[theme]
  para_score = (metric["true_positive"] + metric["true_negative"]) / metric["total_predictions"] 
  qa_score = metric["f1_sum"] / metric["total_predictions"]
  avg_inf_time = theme_inf_time[theme] / metric["total_predictions"]
  if avg_inf_time > inf_time_threshold:
    inf_time_score = inf_time_threshold / avg_inf_time
  final_qa_score += theme_weights[theme] * inf_time_score * qa_score
  final_para_score += theme_weights[theme] * inf_time_score * para_score
print (final_para_score)
print (final_qa_score)
