In [1]:
import pandas as pd
import chromadb
from sentence_transformers import SentenceTransformer

# Load the IPC sections CSV
df = pd.read_csv(r"Final_IPC_Sections.csv", encoding="ISO-8859-1")

# Ensure necessary columns exist
if 'Section' not in df.columns or 'Description' not in df.columns:
    raise ValueError("CSV must have 'Section' and 'Description' columns")

# Initialize ChromaDB and create a persistent database
chroma_path = "./new_vector_db"  # Directory to store vector DB
chroma_client = chromadb.PersistentClient(path=chroma_path)
collection = chroma_client.get_or_create_collection(name="ipc_sections")

# Load embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")  # Lightweight and fast

# Remove NaN (missing values) in the 'Description' column
df = df.dropna(subset=["Description"])  # Option 1: Drop rows where 'Description' is missing
# OR
df["Description"] = df["Description"].fillna("")  # Option 2: Replace NaN with an empty string


# Add IPC sections to vector DB
for index, row in df.iterrows():
    section_id = str(row["Section"])
    description = row["Description"]
    embedding = embedding_model.encode(description).tolist()
    
    collection.add(
        ids=[section_id],
        embeddings=[embedding],
        metadatas=[{"section": section_id, "description": description}]
    )

print("✅ IPC sections stored in ChromaDB at", chroma_path)


  from .autonotebook import tqdm as notebook_tqdm
⚠️ It looks like you upgraded from a version below 0.5.6 and could benefit from vacuuming your database. Run chromadb utils vacuum --help for more information.
Add of existing embedding ID: IPC_140
Add of existing embedding ID: IPC_140
Insert of existing embedding ID: IPC_140
Add of existing embedding ID: IPC_140
Insert of existing embedding ID: IPC_140
Add of existing embedding ID: IPC_127
Insert of existing embedding ID: IPC_127
Add of existing embedding ID: IPC_128
Insert of existing embedding ID: IPC_128
Add of existing embedding ID: IPC_129
Insert of existing embedding ID: IPC_129
Add of existing embedding ID: IPC_130
Insert of existing embedding ID: IPC_130
Add of existing embedding ID: IPC_131
Insert of existing embedding ID: IPC_131
Add of existing embedding ID: IPC_132
Insert of existing embedding ID: IPC_132
Add of existing embedding ID: IPC_133
Insert of existing embedding ID: IPC_133
Add of existing embedding ID: IPC_134
Ins

✅ IPC sections stored in ChromaDB at ./new_vector_db
