## Import libraries

In [1]:
import faiss
import numpy as np
import json
from transformers import AutoTokenizer, AutoModel
import torch

In [2]:
embeddings_path = "embeddings.npy"
metadata_path = "metadata_new.json"
index_path = "legal_cases_index.faiss"

# tokenizer and model
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

2024-12-06 18:12:05.988585: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Load tokenizer and model

In [3]:
with open(metadata_path, "r") as f:
    metadata = json.load(f)

index = faiss.read_index(index_path)

## Embed query text

In [4]:
def embed_text(text):
    """Generate embeddings for a given text."""
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
    return embeddings

## Query the FAISS index

In [5]:
def query_index(user_query, index, metadata):
    """Perform a similarity search for the user query."""
    query_embedding = embed_text(user_query)
    k = 5  # Top 5 results
    distances, indices = index.search(query_embedding, k)
    
    results = []
    for i, idx in enumerate(indices[0]):
        results.append({
            "rank": i + 1,
            "file": metadata[idx]["file"],
            "text": metadata[idx]["text"],
            "distance": distances[0][i],
        })
    return results

In [6]:
user_query = input("Enter your query: ")
results = query_index(user_query, index, metadata)

for result in results:
    print(f"Rank {result['rank']}:")
    print(f"File: {result['file']}")
    print(f"Text: {result['text']}")
    print(f"Distance: {result['distance']:.4f}\n")

Enter your query:  Limitations of freedom of speech


Rank 1:
File: 0339-01.json
Text: y v. Fisher, 13 Wall. 335, 351, 20 L. Ed. 646.
From these authorities three principles may be extracted: (i) In reference to judges of limited and inferior authority, it has been held that they are protected only when they act within their jurisdiction. (2) Judges of courts of superior or general ju
Distance: 39.0884

Rank 2:
File: 0431-01.json
Text:  as follows:
“That the law of limitations of a foreign country cannot of itself be pleaded, as a bar to an action in this commonwealth seems conceded, and is, indeed, too well settled to be drawn in question. Byrne v. Crowninshield, 17 Mass. 55. The authorities, both from the civil and common law, c
Distance: 39.2956

Rank 3:
File: 0180-01.json
Text:  upon the grounds: (1) That prosecutions for the crime alleged to have been committed by the defendants cannot be begun or instituted by information; that the same must be on presentment or indictment by a grand jury; that such information is contrary to the fi