In [1]:
!pip install faiss-gpu-cu12 sentence-transformers rank_bm25 transformers

Collecting faiss-gpu-cu12
  Downloading faiss_gpu_cu12-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting rank_bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading faiss_gpu_cu12-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (47.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.9/47.9 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank_bm25, faiss-gpu-cu12
Successfully installed faiss-gpu-cu12-1.10.0 rank_bm25-0.2.2


In [2]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [3]:
import json
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

# 🔹 Set device to CPU
device = "cpu"

# 🔹 Load embedding model on CPU
model = SentenceTransformer("infly/inf-retriever-v1-1.5b", trust_remote_code=True)
model.to(device)

# 🔹 Load legal data
with open("/kaggle/input/content/legal_chunks.json", "r", encoding="utf-8") as f:
    data = json.load(f)

all_chunks = data["BNS"]

# 🔹 Generate embeddings in batches (to manage memory usage)
batch_size = 64  # Adjust batch size based on CPU memory
embeddings_list = []

for i in range(0, len(all_chunks), batch_size):
    batch = all_chunks[i : i + batch_size]
    batch_embeddings = model.encode(batch, prompt_name="query", convert_to_numpy=True)  # Generate embeddings on CPU
    embeddings_list.append(batch_embeddings)

# 🔹 Convert to NumPy array and normalize
embeddings = np.vstack(embeddings_list)
embeddings /= np.linalg.norm(embeddings, axis=1, keepdims=True)  # Normalize embeddings

# 🔹 Use FAISS on CPU
dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)  # Create a FAISS index on CPU

# 🔹 Add embeddings to FAISS index
index.add(embeddings)

# 🔹 Save FAISS index
faiss.write_index(index, "legal_faiss_cpu_1.index")

print(f"✅ FAISS index saved with {len(all_chunks)} chunks using CPU!")

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/284 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/19.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/55.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/918 [00:00<?, ?B/s]

modeling_qwen.py:   0%|          | 0.00/65.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/infly/inf-retriever-v1-1.5b:
- modeling_qwen.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

tokenization_qwen.py:   0%|          | 0.00/10.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/infly/inf-retriever-v1-1.5b:
- tokenization_qwen.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/370 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

Batches:   0%|          | 0/2 [00:00<?, ?it/s]

✅ FAISS index saved with 488 chunks using CPU!
