In [4]:
from sentence_transformers import SentenceTransformer
import faiss
import os, json

# Load embedding model
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Folder of judgment files (already split into 4 sections)
folder = r"C:\Users\nihca\OneDrive\Documents\vscode\ml project\summary"

docs = []
embeddings = []

# Each file contains Case facts, Issues, Arguments, Decision
for file in os.listdir(folder):
    if file.endswith(".txt"):
        with open(os.path.join(folder, file), "r", encoding="utf-8") as f:
            content = f.read()

        # Simple parser: split by section headers
        parts = content.split("\n\n")
        for part in parts:
            if ":" in part:
                section, text = part.split(":", 1)
                section = section.strip()
                text = text.strip()

                emb = embedder.encode(text)
                docs.append({"case_id": file, "section": section, "text": text})
                embeddings.append(emb)

# Convert to FAISS index
import numpy as np
embeddings = np.array(embeddings).astype("float32")
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

# Save docs + index
faiss.write_index(index, "judgements.index")
with open("judgements_meta.json", "w", encoding="utf-8") as f:
    json.dump(docs, f, indent=2)

print("Embeddings saved!")

# ---------- QUERY ----------
def search(query, top_k=3):
    q_emb = embedder.encode(query).astype("float32").reshape(1, -1)
    D, I = index.search(q_emb, top_k)
    with open("judgements_meta.json", "r", encoding="utf-8") as f:
        docs = json.load(f)
    return [docs[i] for i in I[0]]

results = search("Attacking a police officer", top_k=1)
for r in results:
    print(f"[{r['section']} - {r['case_id']}] {r['text'][:200]}...")


Embeddings saved!
[Arguments/Reasoning - 7.txt] Prosecution relied on oral testimony of 3 witnesses; medical evidence and ballistic expert’s evidence were considered unsatisfactory or inconclusive.
Doubts existed regarding the weapon used, nature o...
