# Config-Assistant RAG Pipeline

**1. Intialising Tracers and LLM**

In [41]:
from langchain_core.documents import Document
from langchain_together import ChatTogether, TogetherEmbeddings
from langchain_core.prompts import  PromptTemplate
from langchain.chains import LLMChain
import chromadb
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain_community.vectorstores import Chroma
from dotenv import load_dotenv # type: ignore
from pathlib import Path
import json 
import os
from tqdm import tqdm

load_dotenv()
langchain_api_key = os.getenv("LANGCHAIN_API_KEY")
langsmith_project = os.getenv("LANGSMITH_PROJECT")
langchain_tracking = os.getenv("LANGCHAIN_TRACKING")
together_api_key = os.getenv("TOGETHER_API_KEY")
langchain_endpoint = os.getenv("LANGCHAIN_ENDPOINT")

llm = ChatTogether(
    model = "mistralai/Mistral-7B-Instruct-v0.3",
    together_api_key= os.getenv("TOGETHER_API_KEY"), # type: ignore
    temperature=0
)

**2. Funcion to vectorise dofiles declared**

In [42]:
def vectorise_configs(path: Path):
    documents = []

    for file in tqdm(list(path.rglob("*.json")), desc="Processing JSON files"):
        with open(file, "r", encoding="utf-8") as f:
            data = json.load(f)

        for block in data.get("structure", {}).values():
            chunk_text = f"{block.get('purpose', '')}\n\n{block.get('content', '')}"

            raw_metadata = {
                "repo_name": data.get("repo_name"),
                "path": block.get("path"),
                "language": block.get("language"),
                "dependencies": block.get("dependencies"),
            }

            # Simple manual filter:
            allowed_types = (str, int, float, bool, type(None))
            safe_metadata = {k: v for k, v in raw_metadata.items() if isinstance(v, allowed_types)}

            documents.append(
                Document(
                    page_content=chunk_text,
                    metadata=safe_metadata,
                )
            )

    embeddings = TogetherEmbeddings(
        model="togethercomputer/m2-bert-80M-32k-retrieval",
        api_key=os.getenv("TOGETHER_API_KEY")
    )

    persist_dir = "/Users/krishiv/Desktop/Projects/config-assistant/data/vector_store"

    vectorstore = Chroma.from_documents(
        documents,
        embeddings,
        persist_directory=persist_dir
    )

    print(f"[+] Stored {len(documents)} documents in vectorstore at: {persist_dir}")

    return vectorstore

In [43]:
PATH = Path("/Users/krishiv/Desktop/Projects/config-assistant/data/configs/annotated_json")
vectorstore = vectorise_configs(PATH)
vectorstore.persist()
retriever = vectorstore.as_retriever()

Processing JSON files: 100%|██████████| 16/16 [00:00<00:00, 2167.11it/s]


InternalError: Database error: error returned from database: (code: 1) no such table: tenants