# Setup the notebook

In [1]:
#!pip install faiss-cpu numpy transformers

In [2]:
import faiss
import json
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch


In [3]:
# Load FAISS index
index = faiss.read_index("legal_cases_index.faiss")

# Load metadata
with open("metadata_new.json", "r") as f:
    metadata = json.load(f)

# Embedding function

In [4]:
def embed_text(text):
    """Generate embeddings for a given text."""
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
    return embeddings


# Query function

In [5]:
def query_index(user_query, index, metadata):
    """Perform a query on the FAISS index and return top results."""
    query_embedding = embed_text(user_query)

    k = 5  # Number of top results to return
    distances, indices = index.search(query_embedding, k)

    results = []
    for i, idx in enumerate(indices[0]):
        results.append({
            "rank": i + 1,
            "file": metadata[idx]["file"],
            "text": metadata[idx]["text"],
            "distance": distances[0][i],
        })

    return results

# Define interactive query system

In [6]:
def query_system(save_results=True):
    """Interactive query system with an option to save results."""
    print("Select a query type:")
    print("1. Search by Name")
    print("2. Search by Abbreviation")
    print("3. Search by Decision Date")
    print("4. Search by Jurisdiction")
    print("5. Custom Legal Query")

    choice = input("Enter choice (1-5): ").strip()
    query = ""

    if choice == "1":
        query = input("Enter case name: ").strip()
    elif choice == "2":
        query = input("Enter case abbreviation: ").strip()
    elif choice == "3":
        query = input("Enter decision date (YYYY-MM-DD): ").strip()
    elif choice == "4":
        query = input("Enter jurisdiction: ").strip()
    elif choice == "5":
        query = input("Enter your custom query: ").strip()
    else:
        print("Invalid choice. Exiting.")
        return

    # Perform the query using precomputed embeddings and metadata
    results = query_index(query, index, metadata)

    # Display results
    print("\nQuery Results:")
    for result in results:
        print(f"Rank: {result['rank']}")
        print(f"File: {result['file']}")
        print(f"Text Snippet: {result['text'][:200]}...")
        print(f"Distance: {result['distance']:.4f}")
        print("\n")

    # Save results to a file if the option is enabled
    if save_results:
        output_file = f"query_results_{choice}.json"
        with open(output_file, "w") as f:
            json.dump(results, f, indent=4)
        print(f"Results saved to {output_file}")


# Interactive query system

In [7]:
query_system()

Select a query type:
1. Search by Name
2. Search by Abbreviation
3. Search by Decision Date
4. Search by Jurisdiction
5. Custom Legal Query


Enter choice (1-5):  1
Enter case name:  morgan


2024-12-07 16:33:31.449840: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



Query Results:
Rank: 1
File: 0353-01.json
Text Snippet:  they performed their services depending upon any lien upon the vessel, or that they had any contract of that kind. The C. Vanderbilt (D. C.) 86 Fed. 785.
In the case of The Mary Morgan (D. C.) 28 Fed...
Distance: 51.2585


Rank: 2
File: 0353-01.json
Text Snippet: sent, at their home and at the home port of the vessel, an implied lien in admiralty will not be presumed. Unless a contract lien is shown, and unless a lien is specially given by the legal owner for ...
Distance: 52.3619


Rank: 3
File: 0121-01.json
Text Snippet: f, and on the affidavit of R. W. Jennings, hereunto attached.
“A. K. Delaney,
“R. W. Jennings,
“Attorneys for Defendants.”
The affidavit referred to recites the particulars of the failure of the jury ...
Distance: 52.5945


Rank: 4
File: 0431-01.json
Text Snippet:  bar was fully discussed, with the same conclusion as that reached by Chief Justice Kent in the case heretofore referred to. The same question was a

TypeError: Object of type float32 is not JSON serializable