## Implementation

### Load Data

Load validation data for testing, based on missing data in the training data from squad 2.0 dataset. Round 1 data contains themes that are not present in training data. While, round 2 data contains themes that are present in training data.

In [None]:
!pip install --upgrade --no-cache-dir gdown

In [None]:
import gdown

def download_test_data(round = 1):
    """Download the test data (4 csv files)"""
    assert round in [1,2], "round can be 1 or 2"
    ids = [
        [
            "15WPYOD3ZLShFq_NRtiBHbpz3RTvc8ZWR",
            "15yxIF27NvEa3l12yNy6F5h8lGCJ2n7rf",
            "1Ilpxyj_0T-1KzQMdVSEbSmc1ybxOv69G",
            "1nkEDQZJY6_cAEVw3JlaKCgz0C6mDSYiv"
        ],
        [
            "1-3fMldkBVsTAX3W5JewdAdlUG_agexG0",
            "1-59pQe8TH7UaORF1RSqzFWybMJShdf1U",
            "1-AbnJRRHQiTU5zyUdDC2gUwbIGkEF5l6",
            "1-Px6FFj043L7lbAEBOAMSy2bdoPiVNhy"
        ]
    ]
    for id in ids[round-1]:
        url = f"https://drive.google.com/u/1/uc?id={id}&export=download"
        gdown.download(url, quiet=True)

### Generate Embeddings

For a given theme, break its paragraphs into sentences and store their paragraph id. Load sentence encoder and calculate embeddings for the sentences from paragraphs and the queries.

In [None]:
!pip install -U sentence-transformers

In [None]:
import nltk
nltk.download('punkt')

def para_to_sentences(para):
    """Splits a paragraph into sentences."""
    para = para.replace('\n', ' ').replace('\t', ' ').replace('\x00', ' ')
    return nltk.sent_tokenize(para)

def load_sents_from_para(paras):
    """Spilits a list of paragraphs into sentences and returns the sentences
    and their corresponding paragraph id"""
    sents = []
    para_id = []
    for i,p in enumerate(paras):
        new_sents = para_to_sentences(p['paragraph'])
        sents += new_sents
        para_id += [i]*len(new_sents)
    return sents, para_id

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from sentence_transformers import SentenceTransformer

def load_encoder(encoder="universal-sentence-encoder-qa-v3"):
    """Load Google's Universal Sentence Encoder for QA"""
    if encoder == "universal-sentence-encoder-qa-v3":
        module_url = "https://tfhub.dev/google/universal-sentence-encoder-qa/3"
        model = hub.load(module_url)
    elif encoder == "mpnet-base-v2":
        model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
    elif encoder == "distilroberta-v1":
        model = SentenceTransformer('sentence-transformers/all-distilroberta-v1')
    elif encoder == "minilm-l12-v2":
        model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')
    else:
        raise "Unknown sentence encoder"
    return model

def get_embeddings_guse(sents, paras, para_id, model, sents_type="Context"):
    """Calculate embeddings for given list of sentences based on its type
    i.e. either its a Question or a Context"""
    if sents_type == "Question":
        return model.signatures['question_encoder'](
            tf.constant(sents)
        )['outputs']
    else:
        contexts = [
            paras[para_id[i]]['paragraph'] for i in range(len(sents))
        ]
        return model.signatures['response_encoder'](
            input = tf.constant(sents),
            context = tf.constant(contexts)             # can play with this
        )['outputs']

def get_embeddings_st(sents, model):
    return model.encode(sents)

def get_embeddings(encoder_name, sents, paras, para_id, model, sents_type="Context"):
    if encoder_name == "universal-sentence-encoder-qa-v3":
        return get_embeddings_guse(sents, paras, para_id, model, sents_type)
    elif encoder_name in ["mpnet-base-v2", "distilroberta-v1", "minilm-l12-v2"]:
        return get_embeddings_st(sents, model)
    else:
        raise "Unknown Sentence Encoder"

### Nearest Neighbour Search

Based on the embeddings calculated, indexes them based on L2 distance and then applies nearest neighbour search to get top k closest sentences for each query

In [None]:
!pip install -U  faiss-gpu

In [None]:
from sentence_transformers import CrossEncoder

def load_cross_encoder():
    model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2', max_length=512)
    return model

In [None]:
import faiss
import numpy as np

def get_k_nearest_neighbours(source_embeds, target_embeds, k = 10):
    """Returns k nearest neighbours of target_embeds in source_embeds"""
    index = faiss.IndexFlatL2(source_embeds.shape[1])
    index.add(np.array(source_embeds))
    return index.search(np.array(target_embeds), k)

def rerank(model, queries, sents, nearest_neighbours):    
    new_nns = []
    for q_idx, nns in enumerate(nearest_neighbours):
        query = queries[q_idx]
        data = []
        for s_idx in nns:
            data.append((query, sents[s_idx]))
        scores = model.predict(data)
        scores = sorted([(score, nns[i]) for i, score in enumerate(scores)])
        new_ranks = [score[1] for score in scores]
        new_nns.append(new_ranks)
    return new_nns

### Check previously answered queries

### Context Generation

Generates a context for a given query and its nearest neighbours. Also provides a method to get the paragraph id given the start idx of the answer.

In [None]:
def get_context(query_id, query, sents, paras, para_ids, nearest_neighbours, option=1, m=1):
    """Generate the context for a given query and store the para_id for
    each sentence"""
    if option in [1, 2]:
        if option == 1:
            m = 0
        context = ""
        context_para_ids, sent_length = [], []
        for sent_id in nearest_neighbours:
            for j in range(-m, m+1):
                cur_id = sent_id + j
                if cur_id >= 0 and cur_id < len(para_ids) and para_ids[sent_id] == para_ids[cur_id]:
                    context += sents[cur_id]
                    context_para_ids.append(paras[para_ids[cur_id]]['id'])
                    sent_length.append(len(sents[cur_id]))
    # else:

    sum = -1
    for i in range(len(sent_length)):
        sum += sent_length[i] + 1
        sent_length[i] = sum
    return context, context_para_ids, sent_length

def para_id_retriever(start_idx, sent_length, context_para_ids):
    """Given start index of the answer, return the id of the paragraph
    in which the answer belongs"""
    if start_idx == -1:
        return -1
    for j in range(len(sent_length)):
        if start_idx <= sent_length[j]:
            return context_para_ids[j]
    return context_para_ids[-1]

### Load QA model

Given a theme, download the corresponding fine-tuned QA model and load the QA pipeline 

In [None]:
!pip install adapter-transformers

In [None]:
# You will to restart the runtime here, due to import conflicts

from transformers import AutoModelWithHeads
from transformers import AutoTokenizer
from transformers import pipeline
from transformers.adapters import AutoAdapterModel
from transformers.adapters import AdapterConfig

def load_adapter_model_pipeline(adapter):
  
  if adapter == "roberta-base-pf":
    model = AutoModelWithHeads.from_pretrained('roberta-base')
    adapter_name = model.load_adapter('AdapterHub/roberta-base-pf-squad_v2', source='hf')
    model.active_adapters = adapter_name
    tokenizer = AutoTokenizer.from_pretrained('roberta-base')
  elif adapter == 'roberta-base-pf-ukp':
    model = AutoAdapterModel.from_pretrained("roberta-base")
    config = AdapterConfig.load("pfeiffer")
    adapter_name = model.load_adapter("qa/squad2@ukp", config=config)
    model.active_adapters = adapter_name
    tokenizer = AutoTokenizer.from_pretrained('roberta-base')
  elif adapter == 'bart-large':
    model = AutoAdapterModel.from_pretrained("facebook/bart-large")
    config = AdapterConfig.load("lohfink-rossi-leaveout", non_linearity="relu", reduction_factor=16)
    adapter_name = model.load_adapter("qa/squad2@lohfink-rossi", config=config)
    model.active_adapters = adapter_name
    tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large')
  elif adapter == 'bert-pf-ukp':
    model = AutoAdapterModel.from_pretrained("bert-base-uncased")
    config = AdapterConfig.load("pfeiffer")
    adapter_name = model.load_adapter("qa/squad2@ukp", config=config)
    model.active_adapters = adapter_name
    tokenizer == AutoTokenizer.from_pretrained('bert-base-uncased')
  elif adapter == 'roberta-base-hl':
    model = AutoAdapterModel.from_pretrained("roberta-base")
    config = AdapterConfig.load("houlsby")
    adapter_name = model.load_adapter("qa/squad2@ukp", config=config)
    model.active_adapters = adapter_name
    tokenizer = AutoTokenizer.from_pretrained('roberta-base')
  elif adapter == 'bert-hl-ukp':
    model = AutoAdapterModel.from_pretrained("bert-base-uncased")
    config = AdapterConfig.load("houlsby")
    adapter_name = model.load_adapter("qa/squad2@ukp", config=config)
    model.active_adapters = adapter_name
    tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

  qa = pipeline(
      task='question-answering',
      model=model,
      tokenizer=tokenizer,
      handle_impossible_answer=True,
      # device=0
  )

  return qa


In [None]:
import gdown
import json
from zipfile import ZipFile

def load_model_links(json_link):
    """Downloads the JSON that contains the links to models and tokenizer"""
    gdown.download(json_link, quiet=True)
    with open('theme_wise_models.json') as f:
        model_links = json.load(f)
    return model_links

def load_theme_model_pipeline(theme, tokenizer_link, model_links, model_id=""):
    """Given a theme, loads the corresponding QA model"""
    task = "question-answering"
    if not model_id:
        gdown.download(tokenizer_link, "tokenizer.zip", quiet=True)
        gdown.download(model_links[theme]['link'], "model.zip", quiet=True)
        with ZipFile("tokenizer.zip", 'r') as zObject:
            zObject.extractall()
        with ZipFile("model.zip", 'r') as zObject:
            zObject.extractall()
        tokenizer_path = "tokenizer"
        model_path = model_links[theme]['path']
        model = ORTModelForQuestionAnswering.from_pretrained(model_path)  # Vanilla model
        model = ORTModelForQuestionAnswering.from_pretrained(
            model_path, file_name="model_optimized_quantized.onnx", provider="CUDAExecutionProvider"
        )
        # model = ORTModelForQuestionAnswering.from_pretrained(
        #     model_path, file_name="model_optimized_quantized.onnx"
        # )                                        # Optimized and Quantized Model
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    else:
        model = ORTModelForQuestionAnswering.from_pretrained(
            model_id, from_transformers=True, provider="CUDAExecutionProvider"
        )
        # model = ORTModelForQuestionAnswering.from_pretrained(
        #     model_id, from_transformers=True
        # )
        tokenizer = AutoTokenizer.from_pretrained(model_id)
    optimum_qa = pipeline(
        task, model=model, tokenizer=tokenizer, handle_impossible_answer=True
    )
    return optimum_qa

### Run QA pipeline

Predicts the answer given query and context in the required format

In [None]:
def predict(query_id, query, context, qa_model, pred_paras, sent_length, context_para_ids):
    """Predict the answer given a query and a context"""
    prediction = qa_model(question=query, context=context)
    ans = {
        "question_id": query_id,
        "answers": [prediction['answer']],
        "paragraph_id": -1,
        "context": context                # Extra info
    }
    if prediction['answer'] != "":
        ans["paragraph_id"] = para_id_retriever(
            prediction['start'], sent_length, context_para_ids
        )
    return ans

In [None]:
def divide_context(passes, context, sent_length):
    context_list = []
    rem_sents = len(sent_length)
    passes = min(passes, rem_sents)
    passes_left = passes
    i, end = -1, -1
    for j in range(passes):
        i += int(rem_sents / passes_left)
        context_list.append(context[end+1:sent_length[i]])
        end = sent_length[i]
        rem_sents -= rem_sents / passes_left
        passes_left -= 1
    return context_list

def get_best_prediction(query, context_list, qa_model):
    best_prediction = {'answer': ""}
    text_start = 0
    for context in context_list:
        prediction = qa_model(question=query, context=context)
        if prediction['answer'] != "" and (best_prediction['answer'] == "" or prediction['score'] > best_prediction['score']):
            prediction['start'] += text_start
            best_prediction = prediction
        text_start += len(context)
    return best_prediction

def multiple_pass_prediction(passes, query_id, query, context, qa_model, pred_paras, sent_length, context_para_ids):
    context_list = divide_context(passes, context, sent_length)
    prediction = get_best_prediction(query, context_list, qa_model)
    ans = {
        "question_id": query_id,
        "answers": [prediction['answer']],
        "paragraph_id": -1,
        "context": context                # Extra info
    }
    if prediction['answer'] != "":
        ans["paragraph_id"] = para_id_retriever(
            prediction['start'], sent_length, context_para_ids
        )
    return ans

In [None]:
import time
from tqdm import tqdm

def predict_theme_wise(paras, ques, pred_out, encoder_name, sents_encoder, qa_pipeline, ctx_option, k, m, qa_passes):
    """Predicts the answers for all queries of a particular theme"""
    ann_inference_time, qna_inference_time = 0., 0.
    theme = ques[0]["theme"]
    print(f'Theme: {theme}')

    # Preprocessing of contexts
    sents, para_id = load_sents_from_para(paras)
    sents_embed = get_embeddings(
        encoder_name, sents, paras, para_id, sents_encoder, sents_type="Context"
    )

    # Nearest Neighbour Search
    start_time = time.time()
    ques_list = [q['question'] for q in ques]
    ques_embed = get_embeddings(
        encoder_name, ques_list, None, None, sents_encoder, sents_type="Question"
    )
    D, I = get_k_nearest_neighbours(sents_embed, ques_embed, k)
    ann_inference_time = (time.time() - start_time)*1000.

    pred_paras = [
        [paras[para_id[sent_idx]]['id'] for sent_idx in I[i]]
        for i in range(len(I))
    ]

    start_time = time.time()
    for i in tqdm(range(len(ques))):
        q = ques[i]

        # Context Generation
        context, context_para_ids, sent_length = get_context(
            q["id"], q['question'], sents, paras, para_id, I[i], ctx_option, m
        )

        # Answer Prediction and Paragraph Retrieval
        ans = multiple_pass_prediction(
            qa_passes, q["id"], q['question'], context, qa_pipeline,
            pred_paras[i], sent_length, context_para_ids
        )
        pred_out.append(ans)

    # Print Inference Time
    qna_inference_time = (time.time() - start_time)*1000.
    print(
        f'Avg. ANN IT = {round(ann_inference_time/len(ques), 2)} ms, ' +
        f'Avg. QnA IT = {round(qna_inference_time/len(ques),2)} ms\n'
    )
    return (ann_inference_time, qna_inference_time)

In [None]:
import pandas as pd

def predict_multiple_themes(params):
    """Predicts the answers for queries from multiple (num_themes) themes"""
    # Load paras and queries
    paragraphs = json.loads(pd.read_csv("input_paragraph.csv").to_json(orient="records"))
    questions = json.loads(pd.read_csv("input_question.csv").to_json(orient="records"))
    theme_intervals = json.loads(pd.read_csv("theme_interval.csv").to_json(orient="records"))
    pred_out = []
    theme_inf_time = {}
    
    # Number of themes for prediction
    if params['num_themes'] == -1 or params['num_themes'] > len(theme_intervals):
        params['num_themes'] = len(theme_intervals)
    
    # if using pretrained model
    if params['use_pretrained']:
        # qa_pipeline = load_optimized_model_pipeline(
        #     params['model_id'], '/content/model.onnx', params['use_onnx'], params['optimize'], params['quantize']
        # )

        qa_pipeline = load_adapter_model_pipeline(params['model_id'])

    # Predict for each theme
    for theme_interval in theme_intervals[:params['num_themes']]:
        theme = theme_interval["theme"]
        if not params['use_pretrained']:
            qa_pipeline = load_theme_model_pipeline(
                theme, params['tok_link'], params['qam_links'], 
            )
        theme_ques = questions[int(theme_interval["start"]) - 1: int(theme_interval["end"])]
        theme_paras = [p for p in paragraphs if p["theme"] == theme]
        execution_time = predict_theme_wise(
            theme_paras, theme_ques, pred_out, params['encoder_name'], params['encoder'],
            qa_pipeline, params['ctx_option'], params['k'], params['m'], params['qa_passes']
        )
        theme_inf_time[theme] = execution_time
    
    # Export predictions
    pred_df = pd.DataFrame.from_records(pred_out)
    pred_df.to_csv('output_prediction.csv', index=False)

    return theme_inf_time

### Evaluation

Evaluates and prints statistics of the predictions by the given pipeline. Metrics include the F1 Score, Paragraph Accuracy, Mean Rank of the gold paragraph, performance on true positives and negatives, inference times, etc. 

In [None]:
import string, re
from collections import Counter

def normalize_answer(s):
    """Lower text and remove punctuation, articles and extra whitespace.""" 
    def remove_articles(text):
        regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
        return re.sub(regex, ' ', text)
    
    def white_space_fix(text):
        return ' '.join(text.split())
    
    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)
    
    def lower(text):
        return text.lower()
    
    return white_space_fix(remove_articles(remove_punc(lower(s))))


def get_tokens(s):
    if not s:
        return []
    return normalize_answer(s).split()


def calc_f1(a_gold, a_pred):
    """Calulates F1 score, given prediction and a gold answer"""
    gold_toks = get_tokens(a_gold)
    pred_toks = get_tokens(a_pred)
    common = Counter(gold_toks) & Counter(pred_toks)
    num_same = sum(common.values())
    
    if len(gold_toks) == 0 or len(pred_toks) == 0:
        # If either is no-answer, then F1 is 1 if they agree, 0 otherwise
        return int(gold_toks == pred_toks)
    
    if num_same == 0:
        return 0
    
    precision = 1.0 * num_same / len(pred_toks)
    recall = 1.0 * num_same / len(gold_toks)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


def calc_max_f1(predicted, ground_truths):
    """Calulates the max F1 score, given prediction and the gold answers"""
    max_f1 = 0
    for ground_truth in ground_truths:
        f1 = calc_f1(str(predicted), str(ground_truth))
        max_f1 = max(max_f1, f1)
    return max_f1

In [None]:
from ast import literal_eval
import pandas as pd

def evaluate_metrics():
    """Calculate metrics using the predictions and the ground truths"""
    metrics = {}
    # Load questions, prediction and ground_truth csv
    questions = pd.read_csv("input_question.csv")
    pred = pd.read_csv("output_prediction.csv")
    truth = pd.read_csv("ground_truth.csv")
    
    # String to list and numbers conversion
    truth.paragraph_id = truth.paragraph_id.apply(literal_eval)
    truth.answers = truth.answers.apply(literal_eval)
    pred.answers = pred.answers.apply(literal_eval)

    # Go thorugh each prediction and update the metrics
    for idx in pred.index:
        q_id = pred["question_id"][idx]
        q_rows = questions.loc[questions['id'] == q_id].iloc[-1]
        theme = q_rows["theme"]
        predicted_paragraph = pred["paragraph_id"][idx]
        predicted_ans = pred["answers"][idx][0]
        
        if theme not in metrics.keys():
            metrics[theme] = {
                "total_positive": 0,
                "total_negative": 0,
                "true_positive": 0,
                "true_negative": 0,
                'ansInCtx': 0,
                "total_predictions": 0,
                "f1_sum": 0
            }

        truth_row = truth.loc[truth['question_id'] == q_id].iloc[-1]
        truth_paragraph_id = [ int(i) for i in truth_row["paragraph_id"] ]

        if truth_paragraph_id == []:
            metrics[theme]["total_negative"] += 1
        else:
            metrics[theme]["total_positive"] += 1
            for ans in truth_row["answers"]:
                if ans in pred['context'][idx]:
                    metrics[theme]["ansInCtx"] += 1
                    break

        if predicted_paragraph in truth_paragraph_id:
            # Increase TP for that theme.
            metrics[theme]["true_positive"] = metrics[theme]["true_positive"] + 1
        
        # -1 prediction in case there is no paragraph which can answer the query.
        if predicted_paragraph == -1 and truth_paragraph_id == []:
            # Increase TN.
            metrics[theme]["true_negative"] = metrics[theme]["true_negative"] + 1

        # Increase total predictions for that theme.
        metrics[theme]["total_predictions"] = metrics[theme]["total_predictions"] + 1
        if truth_row["answers"] == []:
            truth_row["answers"] = [""]
        f1 = calc_max_f1(predicted_ans, truth_row["answers"])
        metrics[theme]["f1_sum"] = metrics[theme]["f1_sum"] + f1
    
    return metrics

In [None]:
def show(val, sz, dec = 0):
    """Prints the value and adds whitespaces so that characters printed = sz"""
    val_str = str(round(val, dec))
    return ' '*(max(0, sz - len(val_str))) + val_str


def calculate_score(theme_inf_time, inf_time_threshold = 1000.0):
    """Calculates and prints theme-wise as well as aggregated metrics score"""
    metrics = evaluate_metrics()
    final_para_score = 0.0
    final_qa_score = 0.0
    q, aic, tait, tqit, totp, totn, tp, tn, tf1 = 0, 0, 0., 0., 0, 0, 0, 0, 0.

    print('Theme             | Queries |  AIT: (ANN) + (QnA) = Total | ansInCtx'
        ' % | TP % (TotP) | TN % (TotN) | Para Acc | Final PA | F1 Score | '
        'Final F1'
    )
    print('------------------|---------|-----------------------------|-------'
        '-----|-------------|-------------|----------|----------|----------|'
        '---------'
    )

    # Print theme wise metrics score
    for theme in metrics:
        inf_time_score = 1.0
        metric = metrics[theme]
        para_score = (metric["true_positive"] + metric["true_negative"]) / metric["total_predictions"] 
        qa_score = metric["f1_sum"] / metric["total_predictions"]
        avg_ann_inf_time = theme_inf_time[theme][0] / metric["total_predictions"]
        avg_qna_inf_time = theme_inf_time[theme][1] / metric["total_predictions"]

        avg_inf_time = avg_ann_inf_time + avg_qna_inf_time
        if avg_inf_time > inf_time_threshold:
            inf_time_score = inf_time_threshold / avg_inf_time

        q += metric["total_predictions"]
        aic += metric["ansInCtx"]
        tait += theme_inf_time[theme][0]
        tqit += theme_inf_time[theme][1]
        totp += metric["total_positive"]
        totn += metric["total_negative"]
        tp += metric["true_positive"]
        tn += metric["true_negative"]
        tf1 += metric["f1_sum"]
        final_qa_score += inf_time_score * qa_score
        final_para_score += inf_time_score * para_score

        print(f'{(theme + " "*17)[:17]} | '
            f'{show(metric["total_predictions"],7)} | '
            f'{show(avg_ann_inf_time,6,2)} + {show(avg_qna_inf_time,6,2)} = '
            f'{show(avg_inf_time,6,2)} ms | '
            f'{show(metric["ansInCtx"]*100./metric["total_positive"],8,2)} '
            f'% | {show(int(metric["true_positive"]*100./max(1,metric["total_positive"])),3)}% '
            f'({show(metric["total_positive"],4)}) | '
            f'{show(int(metric["true_negative"]*100./max(1,metric["total_negative"])),3)}% '
            f'({show(metric["total_negative"],4)}) | '
            f'{show(para_score,8,4)} | {show(inf_time_score*para_score,8,5)} | '
            f'{show(qa_score,8,4)} | {show(inf_time_score*qa_score,8,5)}')

    final_qa_score /= len(metrics)
    final_para_score /= len(metrics)
    # Print Aggregated Metrics Score
    print(f'------------------|---------|-----------------------------|'
        f'------------|-------------|-------------|----------|----------'
        f'|----------|---------')
    print(f'Grand Total       | {show(q,7)} | {show(tait/q,6,2)} + '
        f'{show(tqit/q,6,2)} = {show((tait+tqit)/q,6,2)} ms |'
        f'{show(aic*100./totp,9,2)} % | {show(int(tp*100./max(1,totp)),3)}% '
        f'({show(totp,4)}) | {show(int(tn*100./max(1,totn)),3)}% '
        f'({show(totn,4)}) | {show((tp+tn)/q,8,4)} | {show(final_para_score,8,5)} | '
        f'{show(tf1/q,8,4)} | {show(final_qa_score,8,5)}')


## Execution

In [None]:
# data 1 contains queries for new themes, while data 2 contains queries for old themes
validation_data = 1 #@param ["1", "2"] {type:"raw"}
# Choose -1 to test on all themes
num_themes_to_test = 10 #@param {type:"integer"}

sentence_encoder = "mpnet-base-v2" #@param ["universal-sentence-encoder-qa-v3", "mpnet-base-v2", "distilroberta-v1", "minilm-l12-v2"]
indexing_library = "faiss" #@param ["faiss"]
search_previously_answered_queries = False #@param {type:"boolean"}
context_generation = "top-k nearest sentences" #@param ["top-k nearest sentences", "top-k nearest sentences with window of m sentences", "paragraphs of top k sentences"]
context_option = 1
if context_generation == "top-k nearest sentences with window of m sentences":
    context_option = 2
elif context_generation == "paragraphs of top k sentences":
    context_option = 3
k = 7 #@param {type:"slider", min:1, max:15, step:1}
m = 1 #@param {type:"slider", min:1, max:3, step:1}
context_similarity_threshold = 2 #@param {type:"number"}

use_pretrained_model_for_QA = True #@param {type:"boolean"}
model_id = "roberta-base-pf-ukp" #@param ["roberta-base-pf", "roberta-base-pf-ukp", "bart-large", "bert-pf-ukp", "roberta-base-hl", "bert-hl-ukp"]
use_onnx = True #@param {type:"boolean"}
optimize_model = True #@param {type:"boolean"}
quantize_model = False #@param {type:"boolean"}
tokenizer_link = "https://drive.google.com/u/1/uc?id=1Rq9kXnOpbY1FsDBjHtlx4i7scrnk_0A9&export=download" #@param {type:"string"}
model_links_json = "https://drive.google.com/u/1/uc?id=1usU8GcPTzIakelkJd7ChvQGxqwlEJxoz&export=download" #@param {type:"string"}
num_qa_passes = 1 #@param {type:"slider", min:1, max:5, step:1}

# download validation data
download_test_data(round = validation_data)

# load sentence encoder (Google's Universal Sentence Encoder for QA)
sents_encoder = load_encoder(sentence_encoder)

# load QA models link (if using fine-tuned models)
qa_model_links = load_model_links(model_links_json)

params = {
    'encoder_name': sentence_encoder,
    'encoder': sents_encoder,
    'qam_links': qa_model_links,
    'tok_link': tokenizer_link,
    'ctx_option': context_option,
    'k': k,
    'm': m,
    'use_pretrained': use_pretrained_model_for_QA,
    'model_id': model_id,
    'use_onnx': use_onnx,
    'optimize': optimize_model,
    'quantize': quantize_model,
    'num_themes': num_themes_to_test,
    'qa_passes': num_qa_passes
}

In [None]:
theme_inf_time = predict_multiple_themes(params)

In [None]:
calculate_score(theme_inf_time, inf_time_threshold = 1000.0) # with mpnet and bert-base-uncased ukp / bert-base_qa_squad2_houlsby