# Setup the notebook

In [1]:
#!pip install faiss-cpu numpy transformers

In [2]:
import faiss
import json
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch


In [3]:
# Load FAISS index
index = faiss.read_index("legal_cases_index.faiss")

# Load metadata
with open("metadata_new.json", "r") as f:
    metadata = json.load(f)

# Embedding function

In [4]:
def embed_text(text):
    """Generate embeddings for a given text."""
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)

    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
    return embeddings


# Query function

In [5]:
def query_index(user_query, index, metadata):
    """Perform a query on the FAISS index and return top results."""
    query_embedding = embed_text(user_query)

    k = 5  # Number of top results to return
    distances, indices = index.search(query_embedding, k)

    results = []
    for i, idx in enumerate(indices[0]):
        results.append({
            "rank": i + 1,
            "file": metadata[idx]["file"],
            "text": metadata[idx]["text"],
            "distance": float(distances[0][i]),
        })

    return results

# Define interactive query system

In [6]:
def query_system(save_results=True):
    """Interactive query system with an option to save results."""
    while True:
        print("\nWelcome to the Legal Case Retrieval System!")
        print("Type 'exit' at any point to quit.")
        print("Select a query type:")
        print("1. Search by Name")
        print("2. Search by Abbreviation")
        print("3. Search by Decision Date")
        print("4. Search by Jurisdiction")
        print("5. Custom Legal Query")

        choice = input("Enter choice (1-5): ").strip()
        if choice.lower() == "exit":
            print("Exiting the system. Goodbye!")
            break

        query = ""
        if choice == "1":
            query = input("Enter case name: ").strip()
        elif choice == "2":
            query = input("Enter case abbreviation: ").strip()
        elif choice == "3":
            query = input("Enter decision date (YYYY-MM-DD): ").strip()
        elif choice == "4":
            query = input("Enter jurisdiction: ").strip()
        elif choice == "5":
            query = input("Enter your custom query: ").strip()
        else:
            print("Invalid choice. Please try again.")
            continue

        if query.lower() == "exit":
            print("Exiting the system. Goodbye!")
            break

        # Perform the query using precomputed embeddings and metadata
        results = query_index(query, index, metadata)

        # Display results
        print("\nQuery Results:")
        for result in results:
            print(f"Rank: {result['rank']}")
            print(f"File: {result['file']}")
            print(f"Text Snippet: {result['text'][:200]}...")
            print(f"Distance: {result['distance']:.4f}")
            print("\n")

        if save_results:
            output_file = f"query_results_{choice}.json"
            with open(output_file, "w") as f:
                json.dump(results, f, indent=4)
            print(f"Results saved to {output_file}")

        print("\nWould you like to perform another query?")


# Interactive query system

In [None]:
query_system()


Welcome to the Legal Case Retrieval System!
Type 'exit' at any point to quit.
Select a query type:
1. Search by Name
2. Search by Abbreviation
3. Search by Decision Date
4. Search by Jurisdiction
5. Custom Legal Query


Enter choice (1-5):  3
Enter decision date (YYYY-MM-DD):  1901-11-16


2024-12-09 04:28:36.168080: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.



Query Results:
Rank: 1
File: 0570-01.json
Text Snippet: uary, 1901, but made no further examination to ascertain whether or not these people had any valid claim to the property, because of his opinion that the title of Pitcher, through whom they obtained t...
Distance: 29.6576


Rank: 2
File: 0575-01.json
Text Snippet: s now pending on appeal. This point is based upon the contents of an order made and entered on August 12, 1901, as follows:
“It is hereby ordered in the above-entitled cause that all proceedings there...
Distance: 29.8681


Rank: 3
File: 0204-01.json
Text Snippet: s case it appears that, after the court made its order and injunction of November 28, 1900, and also its order of August 7, 1901, the defendants in this proceeding, Price and Tremper, went upon the pr...
Distance: 30.7524


Rank: 4
File: 0439-01.json
Text Snippet: date of bringing this action (December 24, 1900). The plaintiff further contends that, under the statute passed on June 6, 1900, it would have one y

Enter choice (1-5):  1
Enter case name:  Colbert v Broadway





Query Results:
Rank: 1
File: 0407-01.json
Text Snippet:  Ed. 59; Railroad Co. v. Converse, 139 U. S. 469, 11 Sup. Ct. 569, 35 L. Ed. 213; Gowen v. Harley, 6 C. C. A. 190, 56 Fed. 973, 12 U. S. App. 574; Chicago G. W. Ry. Co. v. Price, 38 C. C. A. 239, 97 F...
Distance: 61.0792


Rank: 2
File: 0570-01.json
Text Snippet: court, and the defendants will not now be-heard to question the technical form of the action.
Upon the facts, I am compelled to render judgment in. favor of the plaintiffs. At the time of the defendan...
Distance: 61.1685


Rank: 3
File: 0561-01.json
Text Snippet:  jury as offered, and, after a careful consideration of the authorities, is satisfied that no error was committed in so doing. Boyce v. California Stage Co., 25 Cal. 460; Roberts v. Graham, 6 Wall. 57...
Distance: 61.5271


Rank: 4
File: 0081-01.json
Text Snippet: ever been chance of doubt since the Magna Charta. Chalker v. Dickenson, 1 Conn. 382, 6 Am. Dec. 250; Collins v. Benbury, 25 N. C. 277, 38 Am. Dec. 7

Enter choice (1-5):  1
Enter case name:  Colbert



Query Results:
Rank: 1
File: 0630-01.json
Text Snippet: ted States, which reads as follows:
“The Congress shall have power to lay and collect taxes, duties, imposts and excises, to pay the debts and provide for the common defense and general welfare of the...
Distance: 63.1039


Rank: 2
File: 0114-01.json
Text Snippet: ted States, 42 C. C. A. 452, 102 Fed. 473; McAllister v. United States, 141 U. S. 174, 11 Sup. Ct. 949, 35 L. Ed. 693; Ex parte Cooper, 143 U. S. 472, 12 Sup. Ct. 453, 36 L. Ed. 232. In legislating fo...
Distance: 63.1185


Rank: 3
File: 0111-01.json
Text Snippet: BROWN, District Judge.
The petition of Samuel Burton states that he is a native of British Columbia, now and for many years a resident of Alaska, and he prays to be admitted to citizenship in the Unit...
Distance: 63.2825


Rank: 4
File: 0658-01.json
Text Snippet: an v. M. E. Handy et al. Notice of Appeal. To John E. Dillon, Attorney for Plaintiff Herein: You will please take notice that the defendants in the 

Enter choice (1-5):  5
Enter your custom query:  Colbert



Query Results:
Rank: 1
File: 0630-01.json
Text Snippet: ted States, which reads as follows:
“The Congress shall have power to lay and collect taxes, duties, imposts and excises, to pay the debts and provide for the common defense and general welfare of the...
Distance: 63.1039


Rank: 2
File: 0114-01.json
Text Snippet: ted States, 42 C. C. A. 452, 102 Fed. 473; McAllister v. United States, 141 U. S. 174, 11 Sup. Ct. 949, 35 L. Ed. 693; Ex parte Cooper, 143 U. S. 472, 12 Sup. Ct. 453, 36 L. Ed. 232. In legislating fo...
Distance: 63.1185


Rank: 3
File: 0111-01.json
Text Snippet: BROWN, District Judge.
The petition of Samuel Burton states that he is a native of British Columbia, now and for many years a resident of Alaska, and he prays to be admitted to citizenship in the Unit...
Distance: 63.2825


Rank: 4
File: 0658-01.json
Text Snippet: an v. M. E. Handy et al. Notice of Appeal. To John E. Dillon, Attorney for Plaintiff Herein: You will please take notice that the defendants in the 

Enter choice (1-5):  3
Enter decision date (YYYY-MM-DD):  1901-11-16



Query Results:
Rank: 1
File: 0570-01.json
Text Snippet: uary, 1901, but made no further examination to ascertain whether or not these people had any valid claim to the property, because of his opinion that the title of Pitcher, through whom they obtained t...
Distance: 29.6576


Rank: 2
File: 0575-01.json
Text Snippet: s now pending on appeal. This point is based upon the contents of an order made and entered on August 12, 1901, as follows:
“It is hereby ordered in the above-entitled cause that all proceedings there...
Distance: 29.8681


Rank: 3
File: 0204-01.json
Text Snippet: s case it appears that, after the court made its order and injunction of November 28, 1900, and also its order of August 7, 1901, the defendants in this proceeding, Price and Tremper, went upon the pr...
Distance: 30.7524


Rank: 4
File: 0439-01.json
Text Snippet: date of bringing this action (December 24, 1900). The plaintiff further contends that, under the statute passed on June 6, 1900, it would have one y

Enter choice (1-5):  5
Enter your custom query:  What about 1901-11-16?



Query Results:
Rank: 1
File: 0439-01.json
Text Snippet: date of bringing this action (December 24, 1900). The plaintiff further contends that, under the statute passed on June 6, 1900, it would have one year in which to bring its action after June 6, 1900....
Distance: 23.8545


Rank: 2
File: 0575-01.json
Text Snippet: s now pending on appeal. This point is based upon the contents of an order made and entered on August 12, 1901, as follows:
“It is hereby ordered in the above-entitled cause that all proceedings there...
Distance: 24.0466


Rank: 3
File: 0286-01.json
Text Snippet: n contravention of the established customs of the mining region. Therefore it was modified by the act of May 10, 1872, so as to provide for exceptional cases where reason and common sense required a d...
Distance: 24.5631


Rank: 4
File: 0439-01.json
Text Snippet: refore, that the proviso attached to section 4 of the act of June 6, 1900, was a mere reiteration and adoption of the old Oregon statute, and it can