In [1]:
import os
import re
import json
import random
from datetime import datetime
from collections import defaultdict
from typing import List, Tuple, Callable
from pydantic import BaseModel, computed_field

from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

from sentence_transformers import SentenceTransformer, util

_use_sample = True
dataset_name = "contractnli"
vectorstore_path = "./vectorstore/faiss_store_sample_minilm"
directory_path = f"../data/{'sample_' if _use_sample else ''}corpus/{dataset_name}"
test_file = f"../data/{'sample_' if _use_sample else ''}benchmarks/{dataset_name}.json"
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") 

# sentence-transformers/all-MiniLM-L6-v2
# Linq-AI-Research/Linq-Embed-Mistral
# thenlper/gte-base

  from .autonotebook import tqdm as notebook_tqdm
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


# Build Vector Store

In [5]:
def load_documents_with_spans(directory: str, chunk_size: int = 1000, chunk_overlap: int = 0):
    """
    Loads .txt files from a directory, splits each document's text into chunks using
    RecursiveCharacterTextSplitter, computes the span (start, end) for each chunk, and
    returns a list of Document objects with metadata (including filename, source, and span).
    """
    documents = []
    # Initialize the splitter with the desired separators and parameters.
    splitter = RecursiveCharacterTextSplitter(
        separators=["\n\n", "\n", "!", "?", ".", ":", ";", ",", " ", ""],
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
        is_separator_regex=False,
        strip_whitespace=False,
    )
    
    # Process each .txt file in the directory.
    for filename in os.listdir(directory):
        if filename.endswith(".txt"):
            filepath = os.path.join(directory, filename)
            with open(filepath, "r", encoding="utf-8") as f:
                text = f.read()
            
            # Split text into chunks.
            text_splits = splitter.split_text(text)
            
            # Verify that the chunks concatenate to the original text.
            assert "".join(text_splits) == text, "Concatenated splits do not match the original text."
            
            # Compute spans and create Document objects.
            prev_index = 0
            for i, chunk_text in enumerate(text_splits):
                span = (prev_index, prev_index + len(chunk_text))
                prev_index += len(chunk_text)
                doc = Document(
                    page_content=chunk_text,
                    metadata={
                        "filename": filename,
                        "filepath": f"{dataset_name}/{filename}",
                        "span": span,  # Stores the (start, end) positions of the chunk.
                        "id": f"{filename}_chunk_{i}"
                    }
                )
                documents.append(doc)
    return documents

# Load the documents, splitting each into chunks with span metadata.
documents = load_documents_with_spans(directory_path, chunk_size=500, chunk_overlap=0)
print(f"Loaded {len(documents)} document chunks with spans.")

# Build the FAISS vector store using the list of Document objects.
vectorstore = FAISS.from_documents(documents, embeddings)

# Save the FAISS vector store locally for later retrieval.
vectorstore.save_local(vectorstore_path)
print(f"FAISS vector store saved locally at {vectorstore_path}.")

Loaded 2912 document chunks with spans.
FAISS vector store saved locally at ./vectorstore/faiss_store_sample_minilm.


In [None]:
# import shutil

# # Check if the directory exists
# if os.path.exists(vectorstore_path):
#     shutil.rmtree(vectorstore_path)
#     print(f"Deleted the FAISS vector store at: {vectorstore_path}")
# else:
#     print(f"No FAISS vector store found at: {vectorstore_path}")


Deleted the FAISS vector store at: ./vectorstore/faiss_store_gte_base


# Evaluation

In [20]:
#############################
# Define Data Models
#############################

class QASnippet(BaseModel):
    file_path: str
    span: Tuple[int, int]
    answer: str

class QAGroundTruth(BaseModel):
    query: str
    snippets: List[QASnippet]

class RetrievedSnippet(BaseModel):
    file_path: str
    span: Tuple[int, int]
    text: str      # Retrieved text content from the FAISS vectorstore
    score: float   # Relevance score returned by similarity search

class QAResult(BaseModel):
    qa_gt: QAGroundTruth
    retrieved_snippets: List[RetrievedSnippet]

    @computed_field
    @property
    def precision(self) -> float:
        total_retrieved_len = 0
        relevant_retrieved_len = 0
        for snippet in self.retrieved_snippets:
            total_retrieved_len += snippet.span[1] - snippet.span[0]
            # Compare with each ground-truth snippet (they are guaranteed not to overlap)
            for gt_snippet in self.qa_gt.snippets:
                if snippet.file_path == gt_snippet.file_path:
                    common_min = max(snippet.span[0], gt_snippet.span[0])
                    common_max = min(snippet.span[1], gt_snippet.span[1])
                    if common_max > common_min:
                        relevant_retrieved_len += common_max - common_min
        if total_retrieved_len == 0:
            return 0
        return relevant_retrieved_len / total_retrieved_len

    @computed_field
    @property
    def recall(self) -> float:
        total_relevant_len = 0
        relevant_retrieved_len = 0
        for gt_snippet in self.qa_gt.snippets:
            total_relevant_len += gt_snippet.span[1] - gt_snippet.span[0]
            for snippet in self.retrieved_snippets:
                if snippet.file_path == gt_snippet.file_path:
                    common_min = max(snippet.span[0], gt_snippet.span[0])
                    common_max = min(snippet.span[1], gt_snippet.span[1])
                    if common_max > common_min:
                        relevant_retrieved_len += common_max - common_min
        if total_relevant_len == 0:
            return 0
        return relevant_retrieved_len / total_relevant_len

#############################
# Helper Functions
#############################

def load_groundtruth(json_file_path: str) -> List[QAGroundTruth]:
    """
    Loads the QA ground-truth data from a JSON file.
    Expected JSON format:
    {
        "tests": [
            {
                "query": "Your query...",
                "snippets": [
                    {
                        "file_path": "path/to/file.txt",
                        "span": [start, end],
                        "answer": "The answer text..."
                    },
                    ...
                ]
            },
            ...
        ]
    }
    """
    with open(json_file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    groundtruth_tests = []
    for test in data.get("tests", []):
        snippets = [QASnippet(**snippet) for snippet in test["snippets"]]
        groundtruth_tests.append(QAGroundTruth(query=test["query"], snippets=snippets))
    return groundtruth_tests

# def perform_retrieval(vectorstore: FAISS, query: str, k: int = 5) -> List[RetrievedSnippet]:
#     """
#     Uses the FAISS vector store to perform a similarity search on the given query using
#     similarity_search_with_relevance_score. Converts the returned Document objects into 
#     RetrievedSnippet instances using the metadata, and also stores the relevance score.
#     """
#     # Retrieve a list of tuples: (Document, relevance_score)
#     docs_and_scores: List[Tuple[Document, float]] = vectorstore.similarity_search_with_relevance_scores(query, k=k)
#     retrieved = []
#     for doc, score in docs_and_scores:
#         # Retrieve file path and span from metadata.
#         file_path = doc.metadata.get("filepath")
#         span = doc.metadata.get("span", (0, len(doc.page_content)))
#         retrieved.append(RetrievedSnippet(file_path=file_path, span=span, text=doc.page_content, score=score))
#     return retrieved

def find_best_corpus_embeddings(tgt_corpus: str, corpus_files: List[str],
                                model: SentenceTransformer) -> Tuple[str, float]:
    """
    Embeds the target corpus description and each file name using a sentence transformer,
    then computes cosine similarities to find the best matching file.
    """
    # Embed the target description.
    tgt_embedding = model.encode(tgt_corpus, convert_to_tensor=True)
    # Embed all candidate file names.
    file_embeddings = model.encode(corpus_files, convert_to_tensor=True)
    # Compute cosine similarities.
    cosine_scores = util.cos_sim(tgt_embedding, file_embeddings)[0]
    # Get the index of the best matching file.
    best_idx = int(cosine_scores.argmax())
    best_score = float(cosine_scores[best_idx])
    return corpus_files[best_idx], best_score

# Helper function to extract the target corpus description.
def extract_tgt_corpus(query: str) -> str:
    """
    Extracts the target corpus description from the query and preprocesses it
    by removing the term "Non-Disclosure Agreement" and common English stopwords.
    For example, given:
      "Consider the Non-Disclosure Agreement between Artop and Inno; Does the document permit..."
    it might return:
      "between Artop Inno"
    """
    pattern = r"^Consider (.*?);"
    match = re.match(pattern, query)
    if not match:
        return ""
    
    tgt = match.group(1).strip()
    # Remove the term "Non-Disclosure Agreement" (case-insensitive)
    tgt = re.sub(r"(?i)Non-Disclosure Agreement", "", tgt).strip()
    
    # (Optional: you can add stopword removal here if needed.)
    return tgt

# Helper function to extract the question part (after the semicolon)
def extract_question(query: str) -> str:
    """
    Extracts the question part of the query (i.e. everything after the first semicolon).
    If no semicolon exists, returns the full query.
    """
    parts = query.split(";", 1)
    return parts[1].strip() if len(parts) == 2 else query.strip()

# Updated perform_retrieval function
def perform_retrieval(vectorstore,
                      query: str,
                      k: int = 5,
                      threshold: float = 0.5,
                      match_fn: Callable[[str, List[str]], Tuple[str, float]] = None,
                      candidate_files: List[str] = None,
                      initial_k: int = 20,
                      max_k: int = 1000) -> List["RetrievedSnippet"]:
    """
    Uses the FAISS vector store to perform a similarity search on the question part of the query.
    
    If candidate_files and a matching function (match_fn) are provided, the function will:
      - Extract the target corpus description from the query.
      - Compute the best matching file and its similarity score.
      - If the similarity score is above (or equal to) the threshold,
        it will iteratively retrieve documents (increasing k) until at least k documents from that file are found.
      - Otherwise, a normal similarity search is performed.
    
    The query for the vector store is taken as the text after the first semicolon.
    
    Returns a list of RetrievedSnippet objects.
    """
    # Extract the question part (after the semicolon).
    question_text = extract_question(query)
    
    # If candidate files and match_fn are provided, try to restrict search.
    if candidate_files is not None and match_fn is not None:
        tgt_corpus = extract_tgt_corpus(query)
        best_file, similarity = match_fn(tgt_corpus, candidate_files)
        #print(f"Extracted tgt_corpus: '{tgt_corpus}' | Best matching file: '{best_file}' with similarity: {similarity:.2f}")
        if similarity >= threshold:
            # Iteratively retrieve more documents until we have at least k from best_file or hit max_k.
            current_k = initial_k
            filtered = []
            while current_k <= max_k:
                docs_and_scores = vectorstore.similarity_search_with_relevance_scores(question_text, k=current_k)
                filtered = [(doc, score) for doc, score in docs_and_scores if doc.metadata.get("filepath") == best_file]
                if len(filtered) >= k:
                    break
                current_k *= 2
            # Sort filtered results by descending relevance.
            filtered.sort(key=lambda x: x[1], reverse=True)
            docs_and_scores = filtered[:k]
        else:
            # If similarity is low, perform a normal retrieval.
            docs_and_scores = vectorstore.similarity_search_with_relevance_scores(question_text, k=k)
    else:
        docs_and_scores = vectorstore.similarity_search_with_relevance_scores(question_text, k=k)
    
    # Convert the results into RetrievedSnippet objects.
    retrieved = []
    for doc, score in docs_and_scores:
        file_path = doc.metadata.get("filepath")
        span = doc.metadata.get("span", (0, len(doc.page_content)))
        retrieved.append(RetrievedSnippet(file_path=file_path, span=span, text=doc.page_content, score=score))
    
    return retrieved


#############################
# Main Execution
#############################

# 1. Load ground-truth data.
groundtruth_tests = load_groundtruth(test_file)

# 2. Load the FAISS vector store that was previously created.
vectorstore = FAISS.load_local(vectorstore_path, embeddings, allow_dangerous_deserialization=True)
candidate_files = [os.path.join(f"{dataset_name}", filename) for filename in os.listdir(directory_path) if filename.endswith(".txt")]
model = SentenceTransformer("all-MiniLM-L6-v2")
match_fn_embeddings = lambda tgt, files: find_best_corpus_embeddings(tgt, files, model)

# 3. Evaluate retrieval performance for different k values.
k_values = [1, 3, 5, 10]
all_results = []

for gt in groundtruth_tests:
    for k in k_values:
        retrieved_snippets = perform_retrieval(vectorstore, gt.query, k=k, threshold=0.3,
                                match_fn=match_fn_embeddings,
                                candidate_files=candidate_files)
        qa_result = QAResult(qa_gt=gt, retrieved_snippets=retrieved_snippets)
        # Create a dictionary of results for this query and k.
        result_dict = {
            "query": gt.query,
            "k": k,
            "precision": qa_result.precision,
            "recall": qa_result.recall,
            "ground_truth": [gt_snippet.dict() for gt_snippet in gt.snippets],
            "retrieved": [snippet.dict() for snippet in retrieved_snippets]
        }
        all_results.append(result_dict)

# 4. Save the results as JSON.
curr_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
result_file = f"../data/results/qa_results_{curr_timestamp}.json"
with open(result_file, "w", encoding="utf-8") as f:
    json.dump(all_results, f, indent=2)

print(f"QA results saved to {result_file}.")

/var/folders/hk/j9r7jggx4dxgt8gmzj_c2z080000gn/T/ipykernel_15282/2226371063.py:245: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  "ground_truth": [gt_snippet.dict() for gt_snippet in gt.snippets],
/var/folders/hk/j9r7jggx4dxgt8gmzj_c2z080000gn/T/ipykernel_15282/2226371063.py:246: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  "retrieved": [snippet.dict() for snippet in retrieved_snippets]


QA results saved to ../data/results/qa_results_20250216_223201.json.


In [21]:
with open(result_file, "r", encoding="utf-8") as f:
    results = json.load(f)

# Dictionary to collect precision and recall values per K.
# The keys will be the K value and the values a list of (precision, recall) tuples.
metrics_by_k = defaultdict(list)

for item in results:
    k = item.get("k")
    precision = item.get("precision", 0)
    recall = item.get("recall", 0)
    metrics_by_k[k].append((precision, recall))

# Compute the average precision and recall for each K.
avg_metrics = {}
for k, metrics in metrics_by_k.items():
    if metrics:
        total_precision = sum(m[0] for m in metrics)
        total_recall = sum(m[1] for m in metrics)
        count = len(metrics)
        avg_precision = total_precision / count
        avg_recall = total_recall / count
    else:
        avg_precision = 0
        avg_recall = 0
    avg_metrics[k] = {"avg_precision": avg_precision, "avg_recall": avg_recall}

# Print the results.
print("Average Precision and Recall for each K:")
for k in sorted(avg_metrics.keys()):
    metrics = avg_metrics[k]
    print(f"K = {k}: Average Precision = {metrics['avg_precision']:.4f}, Average Recall = {metrics['avg_recall']:.4f}")

Average Precision and Recall for each K:
K = 1: Average Precision = 0.1934, Average Recall = 0.1780
K = 3: Average Precision = 0.1597, Average Recall = 0.3753
K = 5: Average Precision = 0.1192, Average Recall = 0.4306
K = 10: Average Precision = 0.0983, Average Recall = 0.5281


In [None]:
# with open(result_file, "r", encoding="utf-8") as f:
#     results = json.load(f)

# # Dictionary to collect precision and recall values per K.
# # The keys will be the K value and the values a list of (precision, recall) tuples.
# metrics_by_k = defaultdict(list)

# for item in results:
#     k = item.get("k")
#     precision = item.get("precision", 0)
#     recall = item.get("recall", 0)
#     metrics_by_k[k].append((precision, recall))

# # Compute the average precision and recall for each K.
# avg_metrics = {}
# for k, metrics in metrics_by_k.items():
#     if metrics:
#         total_precision = sum(m[0] for m in metrics)
#         total_recall = sum(m[1] for m in metrics)
#         count = len(metrics)
#         avg_precision = total_precision / count
#         avg_recall = total_recall / count
#     else:
#         avg_precision = 0
#         avg_recall = 0
#     avg_metrics[k] = {"avg_precision": avg_precision, "avg_recall": avg_recall}

# # Print the results.
# print("Average Precision and Recall for each K:")
# for k in sorted(avg_metrics.keys()):
#     metrics = avg_metrics[k]
#     print(f"K = {k}: Average Precision = {metrics['avg_precision']:.4f}, Average Recall = {metrics['avg_recall']:.4f}")

Average Precision and Recall for each K:
K = 1: Average Precision = 0.0192, Average Recall = 0.0165
K = 3: Average Precision = 0.0122, Average Recall = 0.0434
K = 5: Average Precision = 0.0109, Average Recall = 0.0594
K = 10: Average Precision = 0.0098, Average Recall = 0.0832


In [6]:
qidx = random_number = random.randint(1, len(results))
results[qidx]

{'query': 'Consider the Mutual Non-Disclosure Agreement between The Knights of Unity; Does the document state that Confidential Information shall only include technical information?',
 'k': 1,
 'precision': 0.0,
 'recall': 0.0,
 'ground_truth': [{'file_path': 'contractnli/MutualNDA_The_Knights_of_Unity.txt',
   'span': [1200, 1901],
   'answer': '1. References to "Confidential Information" mean all knowledge, information or materials whether of a technical or financial nature or otherwise relating to the business or affairs of the Parties (including without limitation any subsidiary or affiliated entity thereof), including all memoranda, notes, analyses, compilations, studies and other materials prepared by or for the receiving party which contain or reflect such knowledge, information or materials, which is provided or disclosed by the disclosing party to the receiving party in connection with the Opportunity and identified at the time of such disclosure as being confidential; provide

In [21]:
qidx = random_number = random.randint(1, len(results))
results[qidx]

{'query': "Consider SWA's Non-Disclosure Agreement; Does the document mention that some obligations of the Agreement may survive the termination of the Agreement?",
 'k': 10,
 'precision': 0.0,
 'recall': 0.0,
 'ground_truth': [{'file_path': 'contractnli/Model%20NDA%20(recommended%20by%20SWA).txt',
   'span': [10497, 10690],
   'answer': '19. All obligations respecting the Confidential information already provided hereunder shall survive in perpetuity after the date that the specific Confidential information was first disclosed.'}],
 'retrieved': [{'file_path': 'contractnli/JB-Machine-LLC-NDA-1.txt',
   'span': [14659, 14903],
   'text': '. This Agreement may be terminated by means of written notice of the Owner or by end of the contractual relationship to which this Non-disclosure agreement undoubtedly ties to the execution of a contract related to this Non-disclosure agreement',
   'score': 0.5935623454754999},
  {'file_path': 'contractnli/wayne-fueling-systems-mutual-non-disclosure-

# Query Segmentation

In [43]:
groundtruth_tests = load_groundtruth(test_file)
test_queries = [gt.query for gt in groundtruth_tests]
list_corpus = [os.path.join(f"{dataset_name}", filename) for filename in os.listdir(directory_path) if filename.endswith(".txt")]

In [80]:
import re
import difflib
from collections import Counter
from rapidfuzz import fuzz
from typing import List, Tuple, Callable
from sentence_transformers import SentenceTransformer, util
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords', quiet=True)

def extract_tgt_corpus(query: str) -> str:
    """
    Extracts the target corpus description from the query and preprocesses it
    by removing the term "Non-Disclosure Agreement" (case-insensitive) and common English stopwords.
    
    For example:
      "Consider the Non-Disclosure Agreement between Artop and Inno; Does the document permit..."
    returns (after preprocessing):
      "between Artop Inno"
    """
    # Extract text between "Consider the " and the first semicolon
    pattern = r"^Consider (.*?);"
    match = re.match(pattern, query)
    if not match:
        return ""
    
    tgt = match.group(1).strip()
    
    # Remove the term "Non-Disclosure Agreement" (case-insensitive)
    tgt = re.sub(r"(?i)Non-Disclosure Agreement", "", tgt).strip()
    
    # Load common English stopwords from NLTK
    stop_words = set(stopwords.words("english"))
    
    # Tokenize the text (here using simple whitespace splitting)
    tokens = tgt.split()
    
    # Filter out stopwords
    filtered_tokens = [token for token in tokens if token.lower() not in stop_words]
    
    # Join tokens back into a string
    processed_tgt = " ".join(filtered_tokens)
    return processed_tgt

def find_best_corpus(tgt_corpus: str, corpus_files: List[str]) -> Tuple[str, float]:
    """
    Given a target corpus description and a list of corpus file names,
    returns the file name with the highest similarity score and that score.
    """
    best_match = None
    best_ratio = 0.0
    for file in corpus_files:
        ratio = difflib.SequenceMatcher(None, tgt_corpus.lower(), file.lower()).ratio()
        if ratio > best_ratio:
            best_ratio = ratio
            best_match = file
    return best_match, best_ratio

def find_best_corpus_rapid(tgt_corpus: str, corpus_files: List[str]) -> Tuple[str, float]:
    """
    Uses RapidFuzz's token_set_ratio to compute a similarity score between the target corpus and each file name.
    Returns the best matching file and its score (normalized between 0 and 1).
    """
    best_match = None
    best_score = 0.0
    for file in corpus_files:
        # token_set_ratio handles unordered tokens and common token removal well.
        score = fuzz.token_set_ratio(tgt_corpus, file)
        if score > best_score:
            best_score = score
            best_match = file
    # Normalize the score to [0, 1] (RapidFuzz returns a value in [0,100])
    return best_match, best_score / 100.0

def find_best_corpus_embeddings(tgt_corpus: str, corpus_files: List[str],
                                model: SentenceTransformer) -> Tuple[str, float]:
    """
    Embeds the target corpus description and each file name using a sentence transformer,
    then computes cosine similarities to find the best matching file.
    """
    # Embed the target description.
    tgt_embedding = model.encode(tgt_corpus, convert_to_tensor=True)
    # Embed all candidate file names.
    file_embeddings = model.encode(corpus_files, convert_to_tensor=True)
    # Compute cosine similarities.
    cosine_scores = util.cos_sim(tgt_embedding, file_embeddings)[0]
    # Get the index of the best matching file.
    best_idx = int(cosine_scores.argmax())
    best_score = float(cosine_scores[best_idx])
    return corpus_files[best_idx], best_score


def evaluate_corpus_matching(ground_truths: List[QAGroundTruth],
                             candidate_files: List[str],
                             threshold: float,
                             match_fn: Callable[[str, List[str]], Tuple[str, float]]
                             ) -> List[int]:
    """
    For each QAGroundTruth:
      - Extract the target corpus from the query.
      - Find the best matching file using the provided match_fn and its similarity score.
      - If the similarity score is below the threshold, assign a score of 0.
      - If above the threshold:
          * Assign 1 if the best matching file is among the actual file paths.
          * Assign -1 if it does not match.
    Returns a list of scores.
    """
    scores = []
    for gt in ground_truths:
        tgt_corpus = extract_tgt_corpus(gt.query)
        best_file, similarity = match_fn(tgt_corpus, candidate_files)
        # Get the set of actual file paths from the ground truth snippets.
        actual_files = {snippet.file_path for snippet in gt.snippets}
        
        if similarity >= threshold:
            score = 1 if best_file in actual_files else -1
        else:
            score = 0

        scores.append(score)
        print("Query:")
        print(gt.query)
        print("Extracted tgt_corpus:", tgt_corpus)
        print(f"Best matching file: {best_file} (similarity: {similarity:.2f})")
        print("Actual file(s):", actual_files)
        print("Assigned score:", score)
        print("-" * 60)
    return scores


In [59]:
threshold = 0.5
scores = evaluate_corpus_matching(groundtruth_tests, list_corpus, threshold, find_best_corpus)

print("Final evaluation scores:", scores)
counts = Counter(scores)
print(counts)

Query:
Consider the Mutual Non-Disclosure Agreement between Khronos and Khronos; Does the document allow the Receiving Party to acquire information similar to the Confidential Information from a third party?
Extracted tgt_corpus: the Mutual Non-Disclosure Agreement between Khronos and Khronos
Best matching file: contractnli/amc-general-mutual-non-disclosure-agreement-en-gb.txt (similarity: 0.52)
Actual file(s): {'contractnli/khronos-mutual-nda.txt'}
Assigned score: -1
------------------------------------------------------------
Query:
Consider the Non-Disclosure Agreement between ON Semiconductor and Industry Analysts; Does the document restrict the use of Confidential Information to the purposes stated in the Agreement?
Extracted tgt_corpus: the Non-Disclosure Agreement between ON Semiconductor and Industry Analysts
Best matching file: contractnli/Mutual-Non-Disclosure-Agreement-Inventor-Product-Development-Experts-Inc..txt (similarity: 0.46)
Actual file(s): {'contractnli/NDA-ONSemi_I

In [61]:
threshold = 0.4
scores = evaluate_corpus_matching(groundtruth_tests, list_corpus, threshold, find_best_corpus_rapid)

print("Final evaluation scores:", scores)
counts = Counter(scores)
print(counts)

Query:
Consider the Mutual Non-Disclosure Agreement between Khronos and Khronos; Does the document allow the Receiving Party to acquire information similar to the Confidential Information from a third party?
Extracted tgt_corpus: the Mutual Non-Disclosure Agreement between Khronos and Khronos
Best matching file: contractnli/amc-general-mutual-non-disclosure-agreement-en-gb.txt (similarity: 0.48)
Actual file(s): {'contractnli/khronos-mutual-nda.txt'}
Assigned score: -1
------------------------------------------------------------
Query:
Consider the Non-Disclosure Agreement between ON Semiconductor and Industry Analysts; Does the document restrict the use of Confidential Information to the purposes stated in the Agreement?
Extracted tgt_corpus: the Non-Disclosure Agreement between ON Semiconductor and Industry Analysts
Best matching file: contractnli/Data Use Agreement New York City.txt (similarity: 0.46)
Actual file(s): {'contractnli/NDA-ONSemi_IndustryAnalystConf-2011.txt'}
Assigned sc

In [82]:
threshold = 0.3
model = SentenceTransformer("all-MiniLM-L6-v2")
match_fn_embeddings = lambda tgt, files: find_best_corpus_embeddings(tgt, files, model)

scores = evaluate_corpus_matching(groundtruth_tests, list_corpus, threshold, match_fn_embeddings)

print("Final evaluation scores:", scores)
counts = Counter(scores)
print(counts)

Query:
Consider the Mutual Non-Disclosure Agreement between Khronos and Khronos; Does the document allow the Receiving Party to acquire information similar to the Confidential Information from a third party?
Extracted tgt_corpus: Mutual Khronos Khronos
Best matching file: contractnli/khronos-mutual-nda.txt (similarity: 0.49)
Actual file(s): {'contractnli/khronos-mutual-nda.txt'}
Assigned score: 1
------------------------------------------------------------
Query:
Consider the Non-Disclosure Agreement between ON Semiconductor and Industry Analysts; Does the document restrict the use of Confidential Information to the purposes stated in the Agreement?
Extracted tgt_corpus: Semiconductor Industry Analysts
Best matching file: contractnli/Mutual-Non-Disclosure-Agreement-Inventor-Product-Development-Experts-Inc..txt (similarity: 0.37)
Actual file(s): {'contractnli/NDA-ONSemi_IndustryAnalystConf-2011.txt'}
Assigned score: -1
------------------------------------------------------------
Query:
