In [None]:
import os
import chromadb
import pickle

# ==============================
# PATHS
# ==============================

PKL_PATH = r"C:\Users\kriti\OneDrive\Desktop\Infosys\4embedding..4\all_visa_chunks_E5_embeddings.pkl"
CHROMA_DB_PATH = r"C:\Users\kriti\OneDrive\Desktop\Infosys\5vectordb..5\chroma_db"

# ==============================
# ENSURE DIRECTORY EXISTS
# ==============================

os.makedirs(CHROMA_DB_PATH, exist_ok=True)

# ==============================
# LOAD EMBEDDED CHUNKS
# ==============================

with open(PKL_PATH, "rb") as f:
    chunks = pickle.load(f)

print(f"Loaded {len(chunks)} embedded chunks")

# ==============================
# CREATE CHROMA CLIENT
# ==============================

client = chromadb.Client(
    chromadb.config.Settings(
        persist_directory=CHROMA_DB_PATH,
        anonymized_telemetry=False,
        is_persistent=True
    )
)

# ⚠️ IMPORTANT: delete old collection if exists
try:
    client.delete_collection("visa_rules")
    print("Old collection deleted")
except:
    pass

collection = client.get_or_create_collection("visa_rules")

# ==============================
# PREPARE DATA
# ==============================

ids = []
documents = []
embeddings = []
metadatas = []

for c in chunks:
    ids.append(c["id"])
    documents.append(c["text"])
    embeddings.append(c["embedding"])
    metadatas.append({
        "visa_type": c["visa_type"],
        "chunk_index": c["chunk_index"]
    })

# ==============================
# ADD TO CHROMA
# ==============================

collection.add(
    ids=ids,
    documents=documents,
    embeddings=embeddings,
    metadatas=metadatas
)

print("✅ Data added to Chroma")

# ==============================
# SANITY CHECK
# ==============================

print("Collection count:", collection.count())
print("DB contents:", os.listdir(CHROMA_DB_PATH))


Loaded 36 embedded chunks
✅ Data added to Chroma
Collection count: 36
DB contents: ['chroma.sqlite3', 'dfb197c2-7ca3-4395-94f2-41e7d03e9779']


In [1]:
import chromadb
from sentence_transformers import SentenceTransformer

# ==============================
# PATH
# ==============================

CHROMA_DB_PATH = r"C:\Users\kriti\OneDrive\Desktop\Infosys\5vectordb..5\chroma_db"

# ==============================
# LOAD EMBEDDING MODEL
# ==============================

model = SentenceTransformer("intfloat/e5-large-v2")

# ==============================
# LOAD CHROMA DB
# ==============================

client = chromadb.Client(
    chromadb.config.Settings(
        persist_directory=CHROMA_DB_PATH,
        anonymized_telemetry=False,
        is_persistent=True
    )
)

collection = client.get_or_create_collection("visa_rules")

# ==============================
# USER QUERY
# ==============================

query = "What is the minimum salary for Skilled Worker visa?"

query_embedding = model.encode(
    "query: " + query,
    normalize_embeddings=True
)

# ==============================
# RETRIEVE TOP-K
# ==============================

results = collection.query(
    query_embeddings=[query_embedding.tolist()],
    n_results=3,
    where={"visa_type": "SkilledWorker"}
)

# ==============================
# SHOW RESULTS
# ==============================

for i, doc in enumerate(results["documents"][0]):
    print(f"\nResult {i+1}")
    print("Text:", doc)
    print("Metadata:", results["metadatas"][0][i])


  from .autonotebook import tqdm as notebook_tqdm




Result 1
Text: Skilled Worker visa Contents Overview— Your job (/skilled-worker-visa/your-job) — When you can be paid less (/skilled-worker-visa/when-you-can-be-paid-less) — If you work in healthcare or education (/skilled-worker-visa/if-you-work-in-healthcare-or-education) — Knowledge of English (/skilled-worker-visa/knowledge-of-english) — How much it costs (/skilled-worker-visa/how-much-it-costs) — Documents you'll need to apply (/skilled-worker-visa/documents-you-must-provide) — Apply from outside the UK (/skilled-worker-visa/apply-from-outside-the-uk) — Your partner and children (/skilled-worker-visa/your-partner-and-children) — Extend your visa (/skilled-worker-visa/extend-your-visa) — If you got your first certificate of sponsorship before 4 April 2024 (/skilled-worker-visa/certificate-of-sponsorship-before-4-april-2024) — Update your visa if you change job or employer (/skilled-worker-visa/update-your-visa-if-you-change-job-or-employer) — Switch to this visa (/skilled-worker-