In [7]:
import os
import json
import chromadb
from sentence_transformers import SentenceTransformer
from pathlib import Path
# === Configuration ===
knowledge = "honda_knowledge_store"
persist_path = f"/Users/lewisblackwell/Documents/DESIGN_KID/designkid/scripts/{knowledge}"
json_path="/Users/lewisblackwell/Documents/DESIGN_KID/designkid/scripts/honda.json"

In [8]:
def doc_loader(json_path):
    # === Load and index documents ===
    with open(json_path, "r") as f:
        try:
            my_docs = json.load(f)
        except Exception as e:
            my_docs=None
            print(f"Unable to open {json_path}")
    return my_docs

In [13]:
def index_chromadb(persist_path, my_docs):
    model_name = "all-MiniLM-L6-v2"
    collection_name = "knowledge_vector_db"
    store_exists = os.path.exists(persist_path) and os.listdir(persist_path)

    try:
        client = chromadb.PersistentClient(path=persist_path)
        model = SentenceTransformer(model_name)
    except Exception as e:
        print(f"‚ùå Unable to initialize ChromaDB or model: {e}")
        return None

    try:
        if store_exists:
            print("‚úÖ Vector store already exists. Loading from disk...")
            knowledge_db = client.get_collection(name=collection_name)
        else:
            print("üì¶ No vector store found. Indexing from honda.json...")
            knowledge_db = client.create_collection(name=collection_name)

            for i, doc in enumerate(my_docs):
                text=doc.get("text")
                if text!=None:
                    ID=doc.get("id")
                    if ID!=None:
                        my_id=f"{i}_{ID}"
                    else:
                        my_id=f"{i}"
                    embedding = model.encode(text).tolist()
                    knowledge_db.add(
                        documents=[text],
                        ids=[f"chromadb_doc_{my_id}"],
                        embeddings=[embedding]
                    )
            print(f"‚úÖ Indexed {len(my_docs)} documents.")
        return knowledge_db,client

    except Exception as e:
        print(f"‚ùå Error during collection setup or indexing: {e}")
        return None,None


def query_knowledge_db(knowledge_db, query, model_name="all-MiniLM-L6-v2", top_k=5):
    """
    Query the ChromaDB knowledge collection and return top matching documents.

    Parameters:
        knowledge_db: ChromaDB collection object
        query: str, user question
        model_name: str, embedding model name
        top_k: int, number of results to return

    Returns:
        Dictionary containing documents and their IDs
    """
    try:
        model = SentenceTransformer(model_name)
        query_embedding = model.encode(query).tolist()

        # Use only valid include fields for your ChromaDB version
        results = knowledge_db.query(
            query_embeddings=[query_embedding],
            n_results=top_k,
            include=["documents", "metadatas"]  # Remove "ids" - it's not valid
        )

        # IDs are always returned by ChromaDB, even if not in include
        return results

    except Exception as e:
        print(f"‚ùå Query failed: {e}")
        return {"documents": [], "ids": []}


# Fixed test function
if __name__ == "__main__":
    my_docs = doc_loader(json_path)
    knowledge_db, client = index_chromadb(persist_path, my_docs)  
    query = "How do I change the oil on a 2010 Honda Accord?"
    
    # Get the full results
    #results = query_knowledge_db(knowledge_db, query)
    results=query_knowledge_db(knowledge_db, query, model_name="all-MiniLM-L6-v2", top_k=5)
    
    # ChromaDB ALWAYS returns IDs, even if not in include parameter
    documents = results["documents"][0]
    ids = results["ids"][0]  # IDs are always available
    
    for i, (doc_id, chunk) in enumerate(zip(ids, documents)):
        print(f"üîß Chunk {i+1} (ID: {doc_id}):\n{chunk}\n")

‚úÖ Vector store already exists. Loading from disk...
üîß Chunk 1 (ID: chromadb_doc_9_10):
To change the oil on a 2014 Honda Ridgeline, remove the drain plug, let the oil drain completely, replace the oil filter, and refill with 5W-20 synthetic oil.

üîß Chunk 2 (ID: chromadb_doc_70_71):
Honda Accord models with the 3.5L V6 engine may develop oil leaks from the rear main seal after 100,000 miles. This requires transmission removal for repair.

üîß Chunk 3 (ID: chromadb_doc_0_1):
To replace the air filter on a 2010 Honda Accord, open the hood, locate the air filter housing near the engine, unclip the cover, and swap out the old filter with a new one.

üîß Chunk 4 (ID: chromadb_doc_44_45):
To replace the fuel injectors on a 2009 Honda Accord, depressurize the fuel system, disconnect the fuel rail, remove the injector clips, and pull out the old injectors.

üîß Chunk 5 (ID: chromadb_doc_27_28):
To reset the maintenance light on a 2014 Honda Civic, turn the ignition to ON position, pr