### 1. IMPORT

In [1]:
import json
import torch
import numpy as np
import random
import pandas as pd
from transformers import (
    DPRContextEncoder,
    DPRQuestionEncoder,
    DPRContextEncoderTokenizer,
    DPRQuestionEncoderTokenizer,
)
import torch.nn.functional as F
import math
from rank_bm25 import BM25Okapi

### 2. CHECK CUDA

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

### 3. LOAD TEST DATA

In [3]:
with open("./data/test_data.json", "r") as f:
    test_data = json.load(f)

comparison_data = pd.DataFrame()

### 4. DEFINE FUNCTIONS TO CALCULATE AP, RR, NDCG

#### 4.1 AVERAGE PRECISION(AP)

In [4]:
def calculate_average_precision(relevance_list):
    """
    Calculates Average Precision (AP) for a single query.
    :param relevance_list: List of relevance scores (0s and 1s) for a query.
    :return: Average Precision (AP) for that query.
    """
    relevant_count = 0
    precision_sum = 0
    for i, relevance in enumerate(relevance_list):
        if relevance == 1:
            relevant_count += 1
            precision_sum += relevant_count / (i + 1)
    
    if relevant_count == 0:
        return 0  # No relevant items, AP is 0
    return precision_sum / relevant_count

#### 4.2 AVERAGE PRECISION MODIFIED VERSION(AP-modified)

In [5]:
def calculate_modified_average_precision(relevance_list):
    """
    Calculates Modified Average Precision (AP) for a single query.
    :param relevance_list: List of relevance scores (0s and 1s) for a query.
    :return: Modified Average Precision (AP) for that query.

    Note:- This function imposes penality on irrelevant documents
    """
    count = 0
    precision_sum = 0
    for i, relevance in enumerate(relevance_list):
        if relevance == 1:
            count += 1
        precision_sum += count / (i + 1)
    
    if count == 0:
        return 0  # No relevant items, AP is 0
    return precision_sum / len(relevance_list)

#### 4.3 RECIPROCAL RANK(RR)

In [6]:
def calculate_reciprocal_rank(relevance_list):
    """
    Calculates Reciprocal rank for a single query.
    :param relevance_list: List of relevance scores (0s and 1s) for a query.
    :return: Reciprocal rank for that query.

    Note:- This function is based on the first relevant document retrieved
    """

    rec_rank = 0

    for i, relevance in enumerate(relevance_list):
        if relevance == 1:
            rec_rank = 1/(i+1)
            break
    
    return rec_rank
    

#### 4.4 NORMALIZED DISCOUNTED CUMULATIVE GAIN(NDCG)

In [7]:
def calculate_normalized_DCG_k(relevance_list,k):
    """
    Calculates Normalized Discounted Cumulative Gain at 'k' for a single query.
    :param relevance_list: List of relevance scores (0s and 1s) for a query.
    :return: Normalized Discounted Cumulative Gain at 'k' for that query.

    Note:- This function assigns higher imprtance to higher ranked documents.
    """

    IDCG_k = 0
    DCG_k = 0
    
    for i in range(k):
        if i == len(relevance_list):
            break
        IDCG_k += 1/(math.log(i+2,2))
        if relevance_list[i] == 1:
            DCG_k += 1/(math.log(i+2,2))

    if DCG_k == 0:
        return 0
    
    return DCG_k/IDCG_k

#### 4.5 SAMPLE RESULTS FOR ABOVE FUNCTIONS

In [8]:
relevance_list = [0,0,1,1,1]
relevance_list1 = [0,1,0,0,0]

print('AP relevance_list', round(calculate_average_precision(relevance_list), 4))
print('AP-modified relevance_list', round(calculate_modified_average_precision(relevance_list), 4))
print('RR relevance_list',round(calculate_reciprocal_rank(relevance_list), 4))
print('NDCG relevance_list', round(calculate_normalized_DCG_k(relevance_list,len(relevance_list)),4))

print()

print('AP relevance_list1', round(calculate_average_precision(relevance_list1), 4))
print('AP-modified relevance_list1', round(calculate_modified_average_precision(relevance_list1), 4))
print('RR relevance_list1',round(calculate_reciprocal_rank(relevance_list1), 4))
print('NDCG relevance_list1', round(calculate_normalized_DCG_k(relevance_list1,len(relevance_list1)),4))

AP relevance_list 0.4778
AP-modified relevance_list 0.2867
RR relevance_list 0.3333
NDCG relevance_list 0.4469

AP relevance_list1 0.5
AP-modified relevance_list1 0.2567
RR relevance_list1 0.5
NDCG relevance_list1 0.214


### 5. MODELS COMPARISON

#### 5.1 BM25

##### 5.1.1 INITILAIZATIONS

In [9]:
correct_predictions = 0
positve_samples = 0
labels_list = []
accuracy_list = []

total_average_precision = 0
total_modified_average_precision = 0
total_reciprocal_rank = 0
total_normalized_DCG_k = 0

##### 5.1.2 CALCULATE METRICS

In [10]:
for sample in test_data:
    label = sample["label"]
    job_description = sample["description"]
    pos_samples = sample["pos"]
    neg_samples = sample["neg"]
    all_samples = pos_samples + neg_samples
    random.shuffle(all_samples) 
	
	# Tokenize documents and prepare BM25
    tokenized_docs = [doc.split(" ") for doc in all_samples]
    bm25 = BM25Okapi(tokenized_docs)

	# Tokenize query and calculate similarity scores
    tokenized_query = job_description.split(" ")
    similarity_scores = bm25.get_scores(tokenized_query)

	# Retrieve top-5 documents
    _, indices = torch.topk(torch.from_numpy(similarity_scores), 5)
    # print('indices',indices)
	
    relevance_of_indices_retrieved = []
    relevant_passages = np.array(all_samples)[indices]
    num_correct = 0
    # print('relevant_passages',relevant_passages)
    for p in relevant_passages:
        if p in pos_samples:
            num_correct += 1
            relevance_of_indices_retrieved.append(1)
        else:
            relevance_of_indices_retrieved.append(0)
        
    correct_predictions += num_correct
    positve_samples += 5
    
    average_precision = calculate_average_precision(relevance_of_indices_retrieved)
    modified_average_precision = calculate_modified_average_precision(relevance_of_indices_retrieved)
    reciprocal_rank = calculate_reciprocal_rank(relevance_of_indices_retrieved)
    normalized_DCG_k = calculate_normalized_DCG_k(relevance_of_indices_retrieved,len(relevance_of_indices_retrieved))
    total_average_precision += average_precision
    total_modified_average_precision += modified_average_precision
    total_reciprocal_rank += reciprocal_rank
    total_normalized_DCG_k += normalized_DCG_k
    labels_list.append(label)
    accuracy_list.append(num_correct)
    
    print('relevance_of_indices_retrieved',relevance_of_indices_retrieved)
    print('Average Precision',round(average_precision, 4))
    print('Modified Average Precision', round(modified_average_precision, 4))
    print('Reciprocal Rank', round(reciprocal_rank, 4))
    print('Normalized DCG_k', round(normalized_DCG_k, 4))
    print(f"Accuracy ({label}): {num_correct}/{len(relevant_passages)}")
    print()

    # break

labels_list.append("Total")
accuracy_list.append(correct_predictions)
comparison_data["Label"] = labels_list
comparison_data["BM25"] = accuracy_list
total_average_precision = round(total_average_precision, 2)
total_modified_average_precision = round(total_modified_average_precision, 2)
total_reciprocal_rank = round(total_reciprocal_rank,2)
total_normalized_DCG_k = round(total_normalized_DCG_k, 2)

print(f"Mean Average Precision: {round(total_average_precision/len(test_data), 4)}")
print(f"Modified Mean Average Precision: {round(total_modified_average_precision/len(test_data), 4)}")
print(f"Mean Reciprocal Rank: {round(total_reciprocal_rank/len(test_data), 4)}")
print(f"Mean Normalized DCG_k: {round(total_normalized_DCG_k/len(test_data), 4)}")
print(f"Total accuracy: {correct_predictions}/{positve_samples}")


relevance_of_indices_retrieved [1, 1, 1, 0, 0]
Average Precision 1.0
Modified Average Precision 0.87
Reciprocal Rank 1.0
Normalized DCG_k 0.7227
Accuracy (Security_Analyst): 3/5

relevance_of_indices_retrieved [1, 1, 1, 0, 0]
Average Precision 1.0
Modified Average Precision 0.87
Reciprocal Rank 1.0
Normalized DCG_k 0.7227
Accuracy (Systems_Administrator): 3/5

relevance_of_indices_retrieved [1, 0, 0, 0, 1]
Average Precision 0.7
Modified Average Precision 0.4967
Reciprocal Rank 1.0
Normalized DCG_k 0.4704
Accuracy (Project_manager): 2/5

relevance_of_indices_retrieved [1, 0, 0, 0, 1]
Average Precision 0.7
Modified Average Precision 0.4967
Reciprocal Rank 1.0
Normalized DCG_k 0.4704
Accuracy (Database_Administrator): 2/5

relevance_of_indices_retrieved [1, 1, 0, 1, 1]
Average Precision 0.8875
Modified Average Precision 0.8433
Reciprocal Rank 1.0
Normalized DCG_k 0.8304
Accuracy (Software_Developer): 4/5

relevance_of_indices_retrieved [1, 0, 1, 0, 0]
Average Precision 0.8333
Modified Ave

#### 5.2 DPR BASE

##### 5.2.1 INITIALIZATIONS

In [11]:
question_encoder = DPRQuestionEncoder.from_pretrained(
    "facebook/dpr-question_encoder-single-nq-base"
).to(device)

question_tokenizer = DPRQuestionEncoderTokenizer.from_pretrained(
    "facebook/dpr-question_encoder-single-nq-base"
)

context_encoder = DPRContextEncoder.from_pretrained(
    "facebook/dpr-ctx_encoder-single-nq-base"
).to(device)

context_tokenizer = DPRContextEncoderTokenizer.from_pretrained(
    "facebook/dpr-ctx_encoder-single-nq-base"
)

def encode(tokenizer, encoder, text):
    tokenized_output = tokenizer(
        text, return_tensors="pt", padding="max_length", max_length=512, truncation=True
    )
    input_ids = tokenized_output["input_ids"]
    attention_mask = tokenized_output["attention_mask"]

    return encoder(input_ids.to(device), attention_mask.to(device)).pooler_output

correct_predictions = 0
positve_samples = 0
accuracy_list = []

total_average_precision = 0
total_modified_average_precision = 0
total_reciprocal_rank = 0
total_normalized_DCG_k = 0

Some weights of the model checkpoint at facebook/dpr-question_encoder-single-nq-base were not used when initializing DPRQuestionEncoder: ['question_encoder.bert_model.pooler.dense.bias', 'question_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRQuestionEncoder from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DPRQuestionEncoder from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at facebook/dpr-ctx_encoder-single-nq-base were not used when initializing DPRContextEncoder: ['ctx_encoder.bert_model.pooler.dense.bias', 'ctx_encoder.bert_model.pooler.dense.weight']
- This IS expected if you are initializing DPRContextEncoder from the

##### 5.2.2 CALCULATE METRICS

In [12]:
for sample in test_data:
    label = sample["label"]
    job_description = sample["description"]
    pos_samples = sample["pos"]
    neg_samples = sample["neg"]
    all_samples = pos_samples + neg_samples
    random.shuffle(all_samples)

    # Encode the question and the context
    question_output = encode(question_tokenizer, question_encoder, job_description)
    context_output = encode(context_tokenizer, context_encoder, all_samples)
	
	# Find the cosine similarities of above encodings
    scores = F.cosine_similarity(question_output, context_output)
    _, indices = torch.topk(scores, 5)

    relevance_of_indices_retrieved = []

    relevant_passages = np.array(all_samples)[indices.cpu().numpy()]
    num_correct = 0
    for p in relevant_passages:
        if p in pos_samples:
            num_correct += 1
            relevance_of_indices_retrieved.append(1)
        else:
            relevance_of_indices_retrieved.append(0)

    correct_predictions += num_correct
    positve_samples += 5

    average_precision = calculate_average_precision(relevance_of_indices_retrieved)
    modified_average_precision = calculate_modified_average_precision(relevance_of_indices_retrieved)
    reciprocal_rank = calculate_reciprocal_rank(relevance_of_indices_retrieved)
    normalized_DCG_k = calculate_normalized_DCG_k(relevance_of_indices_retrieved, len(relevance_of_indices_retrieved))
    total_average_precision += average_precision
    total_modified_average_precision += modified_average_precision
    total_reciprocal_rank += reciprocal_rank
    total_normalized_DCG_k += normalized_DCG_k
    accuracy_list.append(num_correct)

    print('relevance_of_indices_retrieved',relevance_of_indices_retrieved)
    print('Average Precision',round(average_precision, 4))
    print('Modified Average Precision', round(modified_average_precision, 4))
    print('Reciprocal Rank', round(reciprocal_rank, 4))
    print('Normalized DCG_k', round(normalized_DCG_k, 4))
    print(f"Accuracy ({label}): {num_correct}/{len(relevant_passages)}")
    print()
accuracy_list.append(correct_predictions)
comparison_data["DPR Base"] = accuracy_list
total_average_precision = round(total_average_precision, 2)
total_modified_average_precision = round(total_modified_average_precision, 2)
total_reciprocal_rank = round(total_reciprocal_rank, 2)
total_normalized_DCG_k = round(total_normalized_DCG_k, 2)

print(f"Mean Average Precision: {round(total_average_precision/len(test_data), 4)}")
print(f"Modified Mean Average Precision: {round(total_modified_average_precision/len(test_data), 4)}")
print(f"Mean Reciprocal Rank: {round(total_reciprocal_rank/len(test_data), 4)}")
print(f"Mean Normalized DCG_k: {round(total_normalized_DCG_k/len(test_data), 4)}")
print(f"Total accuracy: {correct_predictions}/{positve_samples}")

relevance_of_indices_retrieved [0, 0, 1, 0, 1]
Average Precision 0.3667
Modified Average Precision 0.1967
Reciprocal Rank 0.3333
Normalized DCG_k 0.3008
Accuracy (Security_Analyst): 2/5

relevance_of_indices_retrieved [1, 1, 0, 1, 0]
Average Precision 0.9167
Modified Average Precision 0.8033
Reciprocal Rank 1.0
Normalized DCG_k 0.6992
Accuracy (Systems_Administrator): 3/5

relevance_of_indices_retrieved [1, 1, 0, 1, 0]
Average Precision 0.9167
Modified Average Precision 0.8033
Reciprocal Rank 1.0
Normalized DCG_k 0.6992
Accuracy (Project_manager): 3/5

relevance_of_indices_retrieved [1, 1, 1, 0, 0]
Average Precision 1.0
Modified Average Precision 0.87
Reciprocal Rank 1.0
Normalized DCG_k 0.7227
Accuracy (Database_Administrator): 3/5

relevance_of_indices_retrieved [1, 1, 0, 0, 1]
Average Precision 0.8667
Modified Average Precision 0.7533
Reciprocal Rank 1.0
Normalized DCG_k 0.6844
Accuracy (Software_Developer): 3/5

relevance_of_indices_retrieved [0, 1, 0, 0, 1]
Average Precision 0.45


#### 5.3 DPR FT(BATCH SIZE 16)

##### 5.3.1 INITIALIZATIONS

In [13]:
question_encoder = DPRQuestionEncoder.from_pretrained(
    "./models_16/finetune_question_encoder"
).to(device)
context_encoder = DPRContextEncoder.from_pretrained(
    "./models_16/finetune_context_encoder"
).to(device)

correct_predictions = 0
positve_samples = 0
accuracy_list = []

total_average_precision = 0
total_modified_average_precision = 0
total_reciprocal_rank = 0
total_normalized_DCG_k = 0

##### 5.3.2 CALCULATE METRICS

In [14]:
for sample in test_data:
    label = sample["label"]
    job_description = sample["description"]
    pos_samples = sample["pos"]
    neg_samples = sample["neg"]
    all_samples = pos_samples + neg_samples
    random.shuffle(all_samples)

	#encoded the job description and resumes
    question_output = encode(question_tokenizer, question_encoder, job_description)
    context_output = encode(context_tokenizer, context_encoder, all_samples)
	
	#compute similiarities of above encodings
    scores = F.cosine_similarity(question_output, context_output)
    _, indices = torch.topk(scores, 5)

    # print('indices',indices)
    relevance_of_indices_retrieved = []

    relevant_passages = np.array(all_samples)[indices.cpu().numpy()]
    num_correct = 0
    for p in relevant_passages:
        if p in pos_samples:
            num_correct += 1
            relevance_of_indices_retrieved.append(1)
        else:
            relevance_of_indices_retrieved.append(0)


    correct_predictions += num_correct
    positve_samples += 5

    average_precision = calculate_average_precision(relevance_of_indices_retrieved)
    modified_average_precision = calculate_modified_average_precision(relevance_of_indices_retrieved)
    reciprocal_rank = calculate_reciprocal_rank(relevance_of_indices_retrieved)
    normalized_DCG_k = calculate_normalized_DCG_k(relevance_of_indices_retrieved, len(relevance_of_indices_retrieved))
    total_average_precision += average_precision
    total_modified_average_precision += modified_average_precision
    total_reciprocal_rank += reciprocal_rank
    total_normalized_DCG_k += normalized_DCG_k

    accuracy_list.append(num_correct)
    print('relevance_of_indices_retrieved',relevance_of_indices_retrieved) 
    print('Average Precision',round(average_precision, 4))
    print('Modified Average Precision', round(modified_average_precision, 4))
    print('Reciprocal Rank',round(reciprocal_rank,4))
    print('Normalized DCG_k',round(normalized_DCG_k,4))
    print(f"Accuracy ({label}): {num_correct}/{len(relevant_passages)}")
    print()

accuracy_list.append(correct_predictions)
comparison_data["DPR FT2(16)"] = accuracy_list

total_average_precision = round(total_average_precision, 2)
total_modified_average_precision = round(total_modified_average_precision, 2)
total_reciprocal_rank = round(total_reciprocal_rank, 2)
total_normalized_DCG_k = round(total_normalized_DCG_k, 2)

print(f"Mean Average Precision: {round(total_average_precision/len(test_data), 4)}")
print(f"Modified Mean Average Precision: {round(total_modified_average_precision/len(test_data), 4)}")
print(f"Mean Reciprocal Rank: {round(total_reciprocal_rank/len(test_data), 4)}")
print(f"Mean Normalized DGC_k: {round(total_normalized_DCG_k/len(test_data), 4)}")
print(f"Total accuracy: {correct_predictions}/{positve_samples}")

relevance_of_indices_retrieved [0, 1, 1, 0, 1]
Average Precision 0.5889
Modified Average Precision 0.4533
Reciprocal Rank 0.5
Normalized DCG_k 0.5148
Accuracy (Security_Analyst): 3/5

relevance_of_indices_retrieved [1, 1, 1, 0, 0]
Average Precision 1.0
Modified Average Precision 0.87
Reciprocal Rank 1.0
Normalized DCG_k 0.7227
Accuracy (Systems_Administrator): 3/5

relevance_of_indices_retrieved [1, 1, 1, 0, 1]
Average Precision 0.95
Modified Average Precision 0.91
Reciprocal Rank 1.0
Normalized DCG_k 0.8539
Accuracy (Project_manager): 4/5

relevance_of_indices_retrieved [1, 1, 1, 1, 1]
Average Precision 1.0
Modified Average Precision 1.0
Reciprocal Rank 1.0
Normalized DCG_k 1.0
Accuracy (Database_Administrator): 5/5

relevance_of_indices_retrieved [1, 1, 1, 1, 0]
Average Precision 1.0
Modified Average Precision 0.96
Reciprocal Rank 1.0
Normalized DCG_k 0.8688
Accuracy (Software_Developer): 4/5

relevance_of_indices_retrieved [1, 1, 0, 0, 0]
Average Precision 1.0
Modified Average Preci

#### 5.4 BM25 + DPR FT2(BATCH SIZE 16)

##### 5.4.1 INITIALIZATIONS

In [15]:
lambda_val = 1.1
question_encoder = DPRQuestionEncoder.from_pretrained(
    "./models_16/finetune_question_encoder"
).to(device)
context_encoder = DPRContextEncoder.from_pretrained(
    "./models_16/finetune_context_encoder"
).to(device)

correct_predictions = 0
positve_samples = 0
accuracy_list = []

total_average_precision = 0
total_modified_average_precision = 0
total_reciprocal_rank = 0
total_normalized_DCG_k = 0

##### 5.4.2 CALCULATE METRICS

In [16]:
for sample in test_data:
    label = sample["label"]
    job_description = sample["description"]
    pos_samples = sample["pos"]
    neg_samples = sample["neg"]
    all_samples = pos_samples + neg_samples
    # random.shuffle(all_samples)

    question_output = encode(question_tokenizer, question_encoder, job_description)
    context_output = encode(context_tokenizer, context_encoder, all_samples)
    scores = F.cosine_similarity(question_output, context_output).detach().cpu().numpy()
    scores = scores/np.max(scores)

    tokenized_all = [doc.split(" ") for doc in all_samples]
    bm25 = BM25Okapi(tokenized_all)

    tokenized_query = job_description.split(" ")
    scores_bm25 = bm25.get_scores(tokenized_query)
    scores_bm25=scores_bm25/np.max(scores_bm25)
    # from IPython import embed; embed()
    # print(scores_bm25)

    consolidated_scores = torch.from_numpy(lambda_val*scores + scores_bm25)
    _, indices = torch.topk(consolidated_scores, 5)

    # print('indices',indices)
    relevance_of_indices_retrieved = []

    relevant_passages = np.array(all_samples)[indices.numpy()]
    num_correct = 0
    for p in relevant_passages:
        if p in pos_samples:
            num_correct += 1
            relevance_of_indices_retrieved.append(1)
        else:
            relevance_of_indices_retrieved.append(0)

    correct_predictions += num_correct
    positve_samples += 5

    average_precision = calculate_average_precision(relevance_of_indices_retrieved)
    modified_average_precision = calculate_modified_average_precision(relevance_of_indices_retrieved)
    reciprocal_rank = calculate_reciprocal_rank(relevance_of_indices_retrieved)
    normalized_DCG_k = calculate_normalized_DCG_k(relevance_of_indices_retrieved, len(relevance_of_indices_retrieved))
    total_average_precision += average_precision
    total_modified_average_precision += modified_average_precision
    total_reciprocal_rank += reciprocal_rank
    total_normalized_DCG_k += normalized_DCG_k

    accuracy_list.append(num_correct)
    print('relevance_of_indices_retrieved',relevance_of_indices_retrieved) 
    print('Average Precision',round(average_precision, 4))
    print('Modified Average Precision', round(modified_average_precision, 4))
    print('Reciprocal Rank', round(reciprocal_rank, 4))
    print('Normalized DCG_k', round(normalized_DCG_k, 4))
    print(f"Accuracy ({label}): {num_correct}/{len(relevant_passages)}")
    print()


accuracy_list.append(correct_predictions)
comparison_data["BM25 + DPR FT2(16)"] = accuracy_list

total_average_precision = round(total_average_precision, 2)
total_modified_average_precision = round(total_modified_average_precision, 2)
total_reciprocal_rank = round(total_reciprocal_rank, 2)
total_normalized_DCG_k = round(total_normalized_DCG_k, 2)

print(f"Mean Average Precision: {round(total_average_precision/len(test_data), 4)}")
print(f"Modified Mean Average Precision: {round(total_modified_average_precision/len(test_data), 4)}")
print(f"Mean Reciprocal Rank: {round(total_reciprocal_rank/len(test_data), 4)}")
print(f"Mean Normalized DCG_k: {round(total_normalized_DCG_k/len(test_data), 4)}")
print(f"Total accuracy: {correct_predictions}/{positve_samples}")

relevance_of_indices_retrieved [1, 1, 1, 0, 0]
Average Precision 1.0
Modified Average Precision 0.87
Reciprocal Rank 1.0
Normalized DCG_k 0.7227
Accuracy (Security_Analyst): 3/5

relevance_of_indices_retrieved [1, 1, 0, 1, 0]
Average Precision 0.9167
Modified Average Precision 0.8033
Reciprocal Rank 1.0
Normalized DCG_k 0.6992
Accuracy (Systems_Administrator): 3/5

relevance_of_indices_retrieved [1, 1, 0, 1, 0]
Average Precision 0.9167
Modified Average Precision 0.8033
Reciprocal Rank 1.0
Normalized DCG_k 0.6992
Accuracy (Project_manager): 3/5

relevance_of_indices_retrieved [1, 1, 0, 0, 1]
Average Precision 0.8667
Modified Average Precision 0.7533
Reciprocal Rank 1.0
Normalized DCG_k 0.6844
Accuracy (Database_Administrator): 3/5

relevance_of_indices_retrieved [1, 1, 1, 1, 0]
Average Precision 1.0
Modified Average Precision 0.96
Reciprocal Rank 1.0
Normalized DCG_k 0.8688
Accuracy (Software_Developer): 4/5

relevance_of_indices_retrieved [1, 0, 1, 0, 0]
Average Precision 0.8333
Modifi

### 6. RESULTS

In [17]:
# Intentionally only accuracy results are displayed for comparision. 
# MAP, MRR, MNDCG results of each query of corresponsind can be seen in the output part section 5.
pd.set_option("display.max_rows", None)
comparison_data

Unnamed: 0,Label,BM25,DPR Base,DPR FT2(16),BM25 + DPR FT2(16)
0,Security_Analyst,3,2,3,3
1,Systems_Administrator,3,3,3,3
2,Project_manager,2,3,4,3
3,Database_Administrator,2,3,5,3
4,Software_Developer,4,3,4,4
5,Front_End_Developer,2,2,2,2
6,Web_Developer,4,0,1,3
7,Java_Developer,4,3,3,4
8,Network_Administrator,3,3,3,4
9,Python_Developer,4,2,2,4
