SciFact, a dataset of 1.4K expert-written scientific claims paired with evidence-containing abstracts, and annotated with labels and rationales.

In [None]:
import faiss
import pickle
import numpy as np
from elasticsearch import Elasticsearch
from datasets import load_dataset

### Import data

In [None]:
# Specify the file path where your embeddings were saved
claim = "scifact_claim_embeddings.pkl"
evidence = "scifact_evidence_embeddings.pkl"

# Step 1: Load the pickled embeddings for evidence and claim
with open(evidence, "rb") as f:
    evidence_embeddings = pickle.load(f)

with open(claim, "rb") as f:
    claim_embeddings = pickle.load(f)

In [None]:
# Reformat embeddings into numpy array
claim_doc = []
claim_mat = []
for doc, embeddings in claim_embeddings.items():
    claim_doc.append(doc)
    claim_mat.append(embeddings)
claim_mat = np.array(claim_mat)


evidence_doc = []
evidence_mat = []
for doc, embeddings in evidence_embeddings.items():
    evidence_doc.append(doc)
    evidence_mat.append(embeddings)
evidence_mat = np.array(evidence_mat)

In [None]:
# Check on embedding dimensions
n_evi, d = np.shape(evidence_mat)
print(f"Evidence embedding dimension: {(n_evi, d)}")
n_claim, d = np.shape(claim_mat)
print(f"Claim embedding dimension: {(n_claim, d)}")


In [None]:
# Get relevant documents index for claims
scifact_evidence = load_dataset("allenai/scifact", "corpus")
scifact_claims = load_dataset("allenai/scifact", "claims")

In [None]:
query_id =[doc[0] for doc in claim_doc]
evidence_doc_id = [str(i) for i in scifact_evidence["train"]['doc_id']]
golden_evidence_id = []

for claim in scifact_claims['train']:
    if claim["id"] in query_id:
        if claim["evidence_doc_id"] == "":
            golden_evidence_id.append([-1])
        else:
            golden_evidence_id.append([evidence_doc_id.index(claim["evidence_doc_id"])])
        query_id.remove(claim["id"])
golden_evidence_id = np.array(golden_evidence_id)

In [None]:
# Mean Reciprocal Rank @ K (MRR@K)
def mrr_at_k(actual, predicted, k):
    """
    Calculate Mean Reciprocal Rank at K (MRR@K).
    
    Args:
    actual: List of lists containing actual relevant items for each query.
    predicted: List of lists containing predicted items for each query.
    k: The number of top predictions to consider.
    
    Returns:
    float: The MRR@K score.
    """
    reciprocal_ranks = []
    
    for act, pred in zip(actual, predicted):
        # Find the rank of the first relevant item in top K predictions
        for rank, item in enumerate(pred[:k], 1):
            if item in act:
                reciprocal_ranks.append(1.0 / rank)
                break
        else:
            reciprocal_ranks.append(0.0)
    
    return np.mean(reciprocal_ranks)

def mean_average_precision_at_k(actual, predicted, k):
    ap_at_k = []

    map_at_k = np.mean(ap_at_k)
    
    return map_at_k

def average_precision_at_k(relevant_docs, retrieved_docs, k):
    """
    Calculate Average Precision at K for a single query
    
    Args:
    relevant_docs (list): Indices of relevant documents
    retrieved_docs (list): Indices of retrieved documents, in order of retrieval
    k (int): Number of top results to consider
    
    Returns:
    float: Average Precision at K
    """
    if -1 in relevant_docs:
        return 0.0
    
    relevant_docs = set(relevant_docs)
    retrieved_docs = retrieved_docs[:k]
    
    precision_sum = 0
    num_relevant = 0
    
    for i, doc in enumerate(retrieved_docs, 1):
        if doc in relevant_docs:
            num_relevant += 1
            precision_sum += num_relevant / i
    
    return precision_sum / min(len(relevant_docs), k)

def mean_average_precision_at_k(queries_relevant_docs, queries_retrieved_docs, k):
    """
    Calculate Mean Average Precision at K (MAP@K) for multiple queries
    
    Args:
    queries_relevant_docs (list of lists): List of relevant document indices for each query
    queries_retrieved_docs (list of lists): List of retrieved document indices for each query
    k (int): Number of top results to consider
    
    Returns:
    float: MAP@K score
    """
    ap_scores = [
        average_precision_at_k(relevant, retrieved, k)
        for relevant, retrieved in zip(queries_relevant_docs, queries_retrieved_docs)
    ]
    return np.mean(ap_scores)

## 5.2 Nearest Neighbor with FAISS
reference: https://github.com/facebookresearch/faiss/wiki/Getting-started

### Build index with Evidence Embeddings

In [None]:
index = faiss.IndexFlatL2(d)   # build the index
print(index.is_trained)
index.add(evidence_mat)  # add vectors to the index
print(index.ntotal)

### Search KNN

In [None]:
K = 50
D, I = index.search(claim_mat, K)     # actual search

### Evaluation of MAP and MRR

In [None]:
map_at_1 = mean_average_precision_at_k(golden_evidence_id, I, 1)
map_at_10 = mean_average_precision_at_k(golden_evidence_id, I, 10)
map_at_50 = mean_average_precision_at_k(golden_evidence_id, I, 50)
print(f"MAP@1: {map_at_1:.4f}")
print(f"MAP@10: {map_at_10:.4f}")
print(f"MAP@50: {map_at_50:.4f}")

In [None]:
mrr_at_1 = mrr_at_k(actual=golden_evidence_id, predicted=I, k=1)
mrr_at_10 = mrr_at_k(actual=golden_evidence_id, predicted=I, k=10)
mrr_at_50 = mrr_at_k(actual=golden_evidence_id, predicted=I, k=50)
print(f"MRR@1:{mrr_at_1}, MRR@10: {mrr_at_10}, MRR@50: {mrr_at_50}")

## 5.3 ElasticSearch

In [None]:
# Connect to Elasticsearch instance
es = Elasticsearch("http://localhost:9200")

# Check if Elasticsearch is running
if not es.ping():
    raise ValueError("Connection failed")

### Build Index with Evidence Text

In [None]:
import json
# Index documents in Elasticsearch
for i, doc in enumerate(evidence_doc):
    es.index(index = "evidence-index", id = i, body ={"text": doc[1]})

In [None]:
I = []
for claim in claim_doc:
    query = claim[1]
    response = es.search(index = "evidence-index", 
                     body ={"query": 
                            {"match": 
                             {"text": query}
                             }
                             ,
                             "size": 50})
    topk = [int(hit['_id']) for hit in response["hits"]["hits"]]
    I.append(topk)
I = np.array(I)


In [None]:
mrr_at_1 = mrr_at_k(actual=golden_evidence_id, predicted=I, k=1)
mrr_at_10 = mrr_at_k(actual=golden_evidence_id, predicted=I, k=10)
mrr_at_50 = mrr_at_k(actual=golden_evidence_id, predicted=I, k=50)
print(f"MRR@1:{mrr_at_1}, MRR@10: {mrr_at_10}, MRR@50: {mrr_at_50}")

In [None]:
map_at_1 = mean_average_precision_at_k(golden_evidence_id, I, 1)
map_at_10 = mean_average_precision_at_k(golden_evidence_id, I, 10)
map_at_50 = mean_average_precision_at_k(golden_evidence_id, I, 50)
print(f"MAP@1: {map_at_1:.4f}")
print(f"MAP@10: {map_at_10:.4f}")
print(f"MAP@50: {map_at_50:.4f}")