<a href="https://colab.research.google.com/github/rsrini7/Colabs/blob/main/llamaindex_langchain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
!pip install llama-index chromadb llama-index-vector-stores-chroma llama-index-embeddings-huggingface sentence-transformers llama-index-llms-openai litellm --quiet


In [8]:
# llamaindex_rag_openrouter_colab_litellm.py
from google.colab import userdata
import os
import logging
import sys
from typing import Optional, Any, AsyncGenerator, Generator

# --- LlamaIndex Imports ---
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    Settings,
    Document
)
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata
from llama_index.core.callbacks import CallbackManager
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# --- Other Necessary Imports ---
import chromadb
import litellm

# --- Configuration & Constants ---
EMBED_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
DATA_DIR = "./data"
SAMPLE_FILE_NAME = "sample.txt"
OPENROUTER_LITELLM_MODEL_STRING = "openrouter/openai/gpt-3.5-turbo" # Or your preferred OpenRouter model
DB_PATH = './db_chroma_llamaindex_openrouter_litellm'
COLLECTION_NAME = "llamaindex_rag_openrouter_colab_litellm"

# --- Helper: Setup Logging (Optional) ---
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger('litellm').setLevel(logging.INFO) # To see LiteLLM logs

# --- Custom LLM Class using LiteLLM ---
class LiteLLMCustom(CustomLLM):
    model_string_for_litellm: str = OPENROUTER_LITELLM_MODEL_STRING
    num_output: int = 512

    _model_name_internal: str
    _actual_context_window: int

    def __init__(self,
                 model_string_for_litellm: Optional[str] = None,
                 num_output: Optional[int] = None,
                 callback_manager: Optional[CallbackManager] = None,
                 **kwargs: Any):
        init_data = {}
        if model_string_for_litellm is not None:
            init_data["model_string_for_litellm"] = model_string_for_litellm
        if num_output is not None:
            init_data["num_output"] = num_output
        if callback_manager is not None:
            init_data["callback_manager"] = callback_manager
        init_data.update(kwargs)
        super().__init__(**init_data)
        self._model_name_internal = self.model_string_for_litellm
        self._actual_context_window = self._get_model_info(self.model_string_for_litellm)

    def _get_model_info(self, model_name_param: str) -> int:
        try:
            info = litellm.get_model_info(model_name_param)
            if info and 'max_input_tokens' in info and info['max_input_tokens'] is not None:
                return int(info['max_input_tokens'])
        except Exception as e:
            print(f"Warning: Could not get model info for {model_name_param} from LiteLLM: {e}. Using fallback 4096.")
        return 4096

    @property
    def metadata(self) -> LLMMetadata:
        return LLMMetadata(
            context_window=self._actual_context_window,
            num_output=self.num_output,
            model_name=self._model_name_internal,
        )

    def _prepare_litellm_kwargs(self, **kwargs) -> dict:
        allowed_litellm_keys = {"temperature", "max_tokens", "top_p", "stop", "presence_penalty", "frequency_penalty", "seed"}
        return {key: value for key, value in kwargs.items() if key in allowed_litellm_keys}

    def complete(self, prompt: str, formatted: bool = False, **kwargs) -> CompletionResponse:
        messages = [{"role": "user", "content": prompt}]
        litellm_call_kwargs = self._prepare_litellm_kwargs(**kwargs)
        response = litellm.completion(
            model=self.model_string_for_litellm, messages=messages,
            api_key=os.getenv("OPENROUTER_API_KEY"), **litellm_call_kwargs
        )
        text_response = response.choices[0].message.content or ""
        return CompletionResponse(text=text_response, raw=response.model_dump()) # UPDATED

    async def acomplete(self, prompt: str, formatted: bool = False, **kwargs) -> CompletionResponse:
        messages = [{"role": "user", "content": prompt}]
        litellm_call_kwargs = self._prepare_litellm_kwargs(**kwargs)
        response = await litellm.acompletion(
            model=self.model_string_for_litellm, messages=messages,
            api_key=os.getenv("OPENROUTER_API_KEY"), **litellm_call_kwargs
        )
        text_response = response.choices[0].message.content or ""
        return CompletionResponse(text=text_response, raw=response.model_dump()) # UPDATED

    def stream_complete(self, prompt: str, formatted: bool = False, **kwargs) -> Generator[CompletionResponse, None, None]:
        messages = [{"role": "user", "content": prompt}]
        litellm_call_kwargs = self._prepare_litellm_kwargs(**kwargs)
        response_stream = litellm.completion(
            model=self.model_string_for_litellm, messages=messages, stream=True,
            api_key=os.getenv("OPENROUTER_API_KEY"), **litellm_call_kwargs
        )
        content_so_far = ""
        for chunk in response_stream:
            delta = ""
            if chunk.choices and chunk.choices[0].delta:
                delta = chunk.choices[0].delta.content or ""
            if delta:
                content_so_far += delta
                yield CompletionResponse(text=content_so_far, delta=delta, raw=chunk.model_dump()) # UPDATED

    async def astream_complete(self, prompt: str, formatted: bool = False, **kwargs) -> AsyncGenerator[CompletionResponse, None]:
        messages = [{"role": "user", "content": prompt}]
        litellm_call_kwargs = self._prepare_litellm_kwargs(**kwargs)
        response_stream = await litellm.acompletion(
            model=self.model_string_for_litellm, messages=messages, stream=True,
            api_key=os.getenv("OPENROUTER_API_KEY"), **litellm_call_kwargs
        )
        content_so_far = ""
        async for chunk in response_stream:
            delta = ""
            if chunk.choices and chunk.choices[0].delta:
                delta = chunk.choices[0].delta.content or ""
            if delta:
                content_so_far += delta
                yield CompletionResponse(text=content_so_far, delta=delta, raw=chunk.model_dump()) # UPDATED

# --- Main Script Logic ---
def main():
    print("--- Starting LlamaIndex RAG with OpenRouter via LiteLLM ---")
    try:
        openrouter_api_key = userdata.get('OPENROUTER_API_KEY')
        os.environ["OPENROUTER_API_KEY"] = openrouter_api_key
        print("OpenRouter API Key loaded from Colab Secrets.")
    except userdata.SecretNotFoundError:
        print("ERROR: OPENROUTER_API_KEY not found in Colab Secrets. Please add it.")
        sys.exit(1)
    except Exception as e:
        print(f"ERROR: Could not load OpenRouter API Key: {e}")
        sys.exit(1)

    sample_file_path = os.path.join(DATA_DIR, SAMPLE_FILE_NAME)
    if not os.path.exists(DATA_DIR): os.makedirs(DATA_DIR)
    if not os.path.exists(sample_file_path):
        with open(sample_file_path, "w") as f:
            f.write("""The history of AI began in antiquity, with myths and stories.
Modern AI started in the 1950s with Alan Turing.
Key developments include machine learning and deep learning.
Large Language Models (LLMs) like GPT-4 are a significant advancement.
Frameworks like LlamaIndex help build LLM applications.
OpenRouter provides access to many different LLMs.
Vector databases are essential for semantic search in RAG.
""")
        print(f"Created dummy sample file: '{sample_file_path}'")

    print(f"\nConfiguring LLM: Custom LiteLLM Wrapper with model '{OPENROUTER_LITELLM_MODEL_STRING}'")
    Settings.llm = LiteLLMCustom(model_string_for_litellm=OPENROUTER_LITELLM_MODEL_STRING)
    print(f"LLM configured. Context window: {Settings.llm.metadata.context_window}, Output size: {Settings.llm.metadata.num_output}")

    print(f"\nConfiguring Embedding Model: '{EMBED_MODEL_NAME}'")
    try:
        Settings.embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_NAME)
        print("Embedding model configured successfully.")
    except Exception as e:
        print(f"ERROR: Could not load HuggingFace embedding model '{EMBED_MODEL_NAME}': {e}")
        sys.exit(1)

    print("\n--- 1. Ingesting Data ---")
    try:
        documents = SimpleDirectoryReader(DATA_DIR).load_data()
        if not documents:
            print(f"Warning: No documents loaded from '{DATA_DIR}'.")
            sys.exit(1)
        print(f"Loaded {len(documents)} document(s) from '{DATA_DIR}'.")
    except Exception as e:
        print(f"ERROR during document loading: {e}")
        sys.exit(1)

    print("\n--- 2. Storing in Vector Database (ChromaDB) ---")
    try:
        chroma_client = chromadb.PersistentClient(path=DB_PATH)
        chroma_collection = chroma_client.get_or_create_collection(COLLECTION_NAME)
        vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
        storage_context = StorageContext.from_defaults(vector_store=vector_store)
        print(f"ChromaDB setup: collection '{COLLECTION_NAME}' at '{DB_PATH}'. Initial count: {chroma_collection.count()}")
        print("Building or loading index...")
        index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
        print(f"Index built/loaded. Documents in Chroma collection now: {chroma_collection.count()}.")
    except Exception as e:
        print(f"ERROR during vector store or indexing setup: {e}")
        sys.exit(1)

    print("\n--- 3. Performing Explicit Search ---")
    query = "What are key developments in AI?"
    try:
        retriever = index.as_retriever(similarity_top_k=2)
        retrieved_nodes = retriever.retrieve(query)
        print(f"Search query: '{query}'")
        print(f"Found {len(retrieved_nodes)} relevant node(s):")
        for i, node_with_score in enumerate(retrieved_nodes):
            print(f"  Result {i+1} (Score: {node_with_score.score:.4f}): {node_with_score.node.get_content()[:100].strip()}...")
    except Exception as e:
        print(f"ERROR during retrieval: {e}")
        sys.exit(1)

    print("\n--- 4. Generating Answer with LLM ---")
    try:
        query_engine = index.as_query_engine(similarity_top_k=2)
        print(f"Querying LLM with: '{query}'")
        response = query_engine.query(query)
        print(f"\nLLM Answer for '{query}':")
        print(f"Answer: {response.response}")
    except Exception as e:
        print(f"ERROR during query engine execution or LLM call: {e}")

    print("\n--- LlamaIndex RAG with OpenRouter via LiteLLM Finished ---")

if __name__ == "__main__":
    main()

--- Starting LlamaIndex RAG with OpenRouter via LiteLLM ---
OpenRouter API Key loaded from Colab Secrets.

Configuring LLM: Custom LiteLLM Wrapper with model 'openrouter/openai/gpt-3.5-turbo'
LLM configured. Context window: 4096, Output size: 512

Configuring Embedding Model: 'sentence-transformers/all-MiniLM-L6-v2'


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Embedding model configured successfully.

--- 1. Ingesting Data ---
Loaded 1 document(s) from './data'.

--- 2. Storing in Vector Database (ChromaDB) ---
ChromaDB setup: collection 'llamaindex_rag_openrouter_colab_litellm' at './db_chroma_llamaindex_openrouter_litellm'. Initial count: 3
Building or loading index...
Index built/loaded. Documents in Chroma collection now: 4.

--- 3. Performing Explicit Search ---
Search query: 'What are key developments in AI?'
Found 1 relevant node(s):
  Result 1 (Score: 0.3383): # sample.txt
The history of AI began in antiquity, with myths, stories and rumors of artificial bein...

--- 4. Generating Answer with LLM ---
Querying LLM with: 'What are key developments in AI?'

LLM Answer for 'What are key developments in AI?':
Answer: Key developments in AI include the rise of machine learning in the 2000s and deep learning in the 2010s.

--- LlamaIndex RAG with OpenRouter via LiteLLM Finished ---


In [9]:
!pip install langchain langchain-community langchain-core sentence-transformers chromadb litellm --quiet

In [10]:
# langchain_rag_openrouter_litellm.py
from google.colab import userdata
import os
import logging
import sys
from typing import Any, List, Mapping, Optional, Dict, Union, cast, AsyncIterator, Iterator

# --- Langchain Imports ---
from langchain_core.language_models.llms import LLM
from langchain_core.callbacks.manager import CallbackManagerForLLMRun, AsyncCallbackManagerForLLMRun
from langchain_core.outputs import GenerationChunk, Generation
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# --- Other Necessary Imports ---
import litellm

# --- Configuration & Constants ---
EMBED_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
DATA_DIR = "./data_langchain" # Use a different data directory to avoid conflicts
SAMPLE_FILE_NAME = "sample_langchain.txt"
OPENROUTER_LITELLM_MODEL_STRING = "openrouter/openai/gpt-3.5-turbo" # Or your preferred OpenRouter model
DB_PATH_LANGCHAIN = './db_chroma_langchain_openrouter_litellm'
# COLLECTION_NAME_LANGCHAIN = "langchain_rag_openrouter_litellm" # Chroma handles this internally based on persist_directory

# --- Helper: Setup Logging (Optional) ---
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logging.getLogger('litellm').setLevel(logging.INFO) # To see LiteLLM logs

# --- Custom Langchain LLM Class using LiteLLM ---
class LiteLLMWrapperForLangchain(LLM):
    """
    Custom Langchain LLM Wrapper for LiteLLM.
    """
    model_name: str = OPENROUTER_LITELLM_MODEL_STRING
    """The model name to pass to litellm.completion."""

    temperature: float = 0.0
    """The temperature to use for the completion."""

    max_tokens: Optional[int] = 512 # Max tokens for the *output*
    """The maximum number of tokens to generate."""

    top_p: float = 1.0
    """The top-p value to use for the completion."""

    litellm_kwargs: Optional[Dict[str, Any]] = None
    """Additional keyword arguments to pass to litellm.completion."""

    streaming: bool = False
    """Whether to stream the output."""

    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "litellm_langchain_wrapper"

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {
            "model_name": self.model_name,
            "temperature": self.temperature,
            "max_tokens": self.max_tokens,
            "top_p": self.top_p,
            "streaming": self.streaming,
            **(self.litellm_kwargs or {}),
        }

    def _prepare_litellm_call_kwargs(self, stop: Optional[List[str]] = None) -> Dict[str, Any]:
        kwargs = self.litellm_kwargs or {}
        kwargs["model"] = self.model_name
        kwargs["temperature"] = self.temperature
        if self.max_tokens is not None: # LiteLLM uses max_tokens for output tokens
             kwargs["max_tokens"] = self.max_tokens
        kwargs["top_p"] = self.top_p
        if stop:
            kwargs["stop"] = stop
        kwargs["api_key"] = os.getenv("OPENROUTER_API_KEY") # LiteLLM can pick this up
        return kwargs

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Call out to LiteLLM's completion endpoint."""
        if self.streaming: # Langchain handles streaming via _stream or _astream
            # This _call method is for non-streaming. If streaming is true,
            # it implies the user might have set it expecting streaming from _generate.
            # For simplicity here, we'll just make a non-streaming call if _call is invoked.
            # A more robust implementation would raise an error or adapt.
            pass

        call_kwargs = self._prepare_litellm_call_kwargs(stop=stop)
        call_kwargs.update(kwargs) # Allow overriding with call-specific kwargs

        messages = [{"role": "user", "content": prompt}]
        response = litellm.completion(messages=messages, **call_kwargs)
        return response.choices[0].message.content or ""

    async def _acall(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Async call out to LiteLLM's completion endpoint."""
        call_kwargs = self._prepare_litellm_call_kwargs(stop=stop)
        call_kwargs.update(kwargs)

        messages = [{"role": "user", "content": prompt}]
        response = await litellm.acompletion(messages=messages, **call_kwargs)
        return response.choices[0].message.content or ""

    def _stream(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[GenerationChunk]:
        """Stream responses from LiteLLM."""
        call_kwargs = self._prepare_litellm_call_kwargs(stop=stop)
        call_kwargs.update(kwargs)
        call_kwargs["stream"] = True

        messages = [{"role": "user", "content": prompt}]
        for chunk in litellm.completion(messages=messages, **call_kwargs):
            if chunk.choices and chunk.choices[0].delta:
                delta_content = chunk.choices[0].delta.content
                if delta_content:
                    yield GenerationChunk(text=delta_content)
                    if run_manager:
                        run_manager.on_llm_new_token(delta_content)

    async def _astream(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> AsyncIterator[GenerationChunk]:
        """Async stream responses from LiteLLM."""
        call_kwargs = self._prepare_litellm_call_kwargs(stop=stop)
        call_kwargs.update(kwargs)
        call_kwargs["stream"] = True

        messages = [{"role": "user", "content": prompt}]
        async for chunk in await litellm.acompletion(messages=messages, **call_kwargs):
            if chunk.choices and chunk.choices[0].delta:
                delta_content = chunk.choices[0].delta.content
                if delta_content:
                    yield GenerationChunk(text=delta_content)
                    if run_manager:
                        await run_manager.on_llm_new_token(delta_content)

# --- Main Script Logic ---
def main():
    print("--- Starting Langchain RAG with OpenRouter via LiteLLM ---")

    # 0. Setup: API Keys and Sample Data
    # litellm.set_verbose = True # Uncomment for verbose LiteLLM logs

    try:
        openrouter_api_key = userdata.get('OPENROUTER_API_KEY')
        os.environ["OPENROUTER_API_KEY"] = openrouter_api_key # For LiteLLM
        print("OpenRouter API Key loaded from Colab Secrets.")
    except userdata.SecretNotFoundError:
        print("ERROR: OPENROUTER_API_KEY not found in Colab Secrets. Please add it.")
        sys.exit(1)
    except Exception as e:
        print(f"ERROR: Could not load OpenRouter API Key: {e}")
        sys.exit(1)

    sample_file_path = os.path.join(DATA_DIR, SAMPLE_FILE_NAME)
    if not os.path.exists(DATA_DIR):
        os.makedirs(DATA_DIR)
    if not os.path.exists(sample_file_path):
        with open(sample_file_path, "w") as f:
            f.write("""Langchain is a framework for developing applications powered by language models.
It provides modular components for building complex chains and agents.
Key features include document loaders, text splitters, vector stores, and LLM wrappers.
This example uses Langchain with OpenRouter via LiteLLM for RAG.
Retrieval Augmented Generation enhances LLM responses with external data.
""")
        print(f"Created dummy sample file: '{sample_file_path}'")

    # Configure Langchain Components
    print(f"\nConfiguring LLM: Langchain LiteLLM Wrapper with model '{OPENROUTER_LITELLM_MODEL_STRING}'")
    llm = LiteLLMWrapperForLangchain(
        model_name=OPENROUTER_LITELLM_MODEL_STRING,
        temperature=0.0,
        max_tokens=256 # Max output tokens for the LLM response
    )
    print("LLM configured.")

    print(f"\nConfiguring Embedding Model: '{EMBED_MODEL_NAME}'")
    try:
        embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)
        print("Embedding model configured successfully.")
    except Exception as e:
        print(f"ERROR: Could not load HuggingFace embedding model '{EMBED_MODEL_NAME}': {e}")
        print("Ensure 'pip install sentence-transformers' has been run.")
        sys.exit(1)

    # 1. Ingest Text File
    print("\n--- 1. Ingesting Data ---")
    try:
        # Using DirectoryLoader to load all .txt files in the directory
        loader = DirectoryLoader(DATA_DIR, glob=f"**/{SAMPLE_FILE_NAME}", loader_cls=TextLoader, show_progress=True)
        documents = loader.load()
        if not documents:
            print(f"Warning: No documents loaded from '{DATA_DIR}'. Ensure '{SAMPLE_FILE_NAME}' exists.")
            sys.exit(1)
        print(f"Loaded {len(documents)} document(s). Total characters: {sum(len(doc.page_content) for doc in documents)}")
    except Exception as e:
        print(f"ERROR during document loading: {e}")
        sys.exit(1)

    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)
    print(f"Split into {len(texts)} chunks.")

    # 2. Store Contents in a Vector Database (ChromaDB)
    print("\n--- 2. Storing in Vector Database (ChromaDB) ---")
    try:
        print(f"Initializing Chroma vector store at '{DB_PATH_LANGCHAIN}'...")
        # If the directory exists and has data, Chroma will load it.
        # For a fresh run, you might want to delete the DB_PATH_LANGCHAIN directory.
        vectorstore = Chroma.from_documents(
            documents=texts,
            embedding=embeddings,
            persist_directory=DB_PATH_LANGCHAIN
        )
        vectorstore.persist() # Ensure persistence
        print(f"Vector store created/loaded. Collection count (approx): {vectorstore._collection.count()}")
    except Exception as e:
        print(f"ERROR during vector store setup: {e}")
        # If error is "Invalid dimension" check embedding model output vs Chroma expectations.
        sys.exit(1)

    # 3. Perform a Search Operation (via Retriever)
    print("\n--- 3. Performing Explicit Search (Retriever) ---")
    query = "What is Langchain?"
    try:
        # Load from disk if needed (e.g., in a separate run after ingestion)
        # vectorstore = Chroma(persist_directory=DB_PATH_LANGCHAIN, embedding_function=embeddings)
        retriever = vectorstore.as_retriever(search_kwargs={"k": 2}) # Get top 2 results
        retrieved_docs = retriever.invoke(query) # Langchain uses 'invoke'

        print(f"Search query: '{query}'")
        print(f"Found {len(retrieved_docs)} relevant document chunk(s):")
        for i, doc in enumerate(retrieved_docs):
            print(f"  Result {i+1} (Metadata: {doc.metadata}): {doc.page_content[:150].strip()}...")
    except Exception as e:
        print(f"ERROR during retrieval: {e}")
        sys.exit(1)

    # 4. Pass Search Results to LLM for Generating Answers (RetrievalQA Chain)
    print("\n--- 4. Generating Answer with LLM using RetrievalQA Chain ---")
    try:
        # Define a prompt template (optional, but good practice)
        prompt_template_str = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Keep the answer concise and based *only* on the provided context.

Context:
{context}

Question: {question}
Helpful Answer:"""
        QA_PROMPT = PromptTemplate(
            template=prompt_template_str, input_variables=["context", "question"]
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff", # "stuff" puts all context into the prompt
            retriever=retriever,
            return_source_documents=True, # Optionally return source documents
            chain_type_kwargs={"prompt": QA_PROMPT}
        )

        print(f"Querying LLM with (via chain): '{query}'")
        result = qa_chain.invoke({"query": query}) # Langchain chains use 'invoke'

        print(f"\nLLM Answer for '{query}':")
        print(f"Answer: {result['result']}")

        print("\nSource Documents considered by LLM:")
        for i, doc in enumerate(result["source_documents"]):
            print(f"  Source {i+1} (Metadata: {doc.metadata}): {doc.page_content[:100].strip()}...")

    except Exception as e:
        print(f"ERROR during RetrievalQA chain execution or LLM call: {e}")

    print("\n--- Langchain RAG with OpenRouter via LiteLLM Finished ---")

if __name__ == "__main__":
    main()

--- Starting Langchain RAG with OpenRouter via LiteLLM ---
OpenRouter API Key loaded from Colab Secrets.

Configuring LLM: Langchain LiteLLM Wrapper with model 'openrouter/openai/gpt-3.5-turbo'
LLM configured.

Configuring Embedding Model: 'sentence-transformers/all-MiniLM-L6-v2'


  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL_NAME)


Embedding model configured successfully.

--- 1. Ingesting Data ---


100%|██████████| 1/1 [00:00<00:00, 422.81it/s]

Loaded 1 document(s). Total characters: 379
Split into 1 chunks.

--- 2. Storing in Vector Database (ChromaDB) ---
Initializing Chroma vector store at './db_chroma_langchain_openrouter_litellm'...



  vectorstore.persist() # Ensure persistence


Vector store created/loaded. Collection count (approx): 2

--- 3. Performing Explicit Search (Retriever) ---
Search query: 'What is Langchain?'
Found 2 relevant document chunk(s):
  Result 1 (Metadata: {'source': 'data_langchain/sample_langchain.txt'}): Langchain is a framework for developing applications powered by language models.
It provides modular components for building complex chains and agents...
  Result 2 (Metadata: {'source': 'data_langchain/sample_langchain.txt'}): Langchain is a framework for developing applications powered by language models.
It provides modular components for building complex chains and agents...

--- 4. Generating Answer with LLM using RetrievalQA Chain ---
Querying LLM with (via chain): 'What is Langchain?'

LLM Answer for 'What is Langchain?':
Answer: Langchain is a framework for developing applications powered by language models.

Source Documents considered by LLM:
  Source 1 (Metadata: {'source': 'data_langchain/sample_langchain.txt'}): Langchain i