# Setup

In [4]:
# %%capture
# !pip -q install git+https://github.com/huggingface/transformers
# !pip install -q datasets loralib sentencepiece
# !pip -q install bitsandbytes accelerate
# !pip -q install langchain
# !pip install einops
# !pip install faiss-gpu
# !pip install langchain_community
# !pip install --upgrade --quiet chromadb bs4 qdrant-client
# !pip install langchainhub
# !pip install -U langchain-huggingface
# !pip install -U langchain-cohere
# !pip install --upgrade --quiet  wikipedia
# !pip install --upgrade --quiet  arxiv
# !pip install --upgrade --quiet  pymupdf

# !pip install xmltodict

# !pip install cohere

In [5]:
# import importlib
# from subprocess import run

# def install_if_missing(package, import_name=None, extra_args=""):
#     try:
#         importlib.import_module(import_name or package)
#         print(f"{package} is already installed. Skipping installation.")
#     except ImportError:
#         print(f"{package} not found. Installing now.")
#         run(f"pip install {extra_args} {package}", shell=True)

# install_if_missing("git+https://github.com/huggingface/transformers", "transformers", "-q")
# install_if_missing("datasets", extra_args="-q")
# install_if_missing("loralib", extra_args="-q")
# install_if_missing("sentencepiece", extra_args="-q")
# install_if_missing("bitsandbytes", extra_args="-q")
# install_if_missing("accelerate", extra_args="-q")
# install_if_missing("langchain", extra_args="-q")
# install_if_missing("einops")
# install_if_missing("faiss-gpu")
# install_if_missing("langchain_community")
# install_if_missing("chromadb", extra_args="--upgrade --quiet")
# install_if_missing("bs4", extra_args="--upgrade --quiet")
# install_if_missing("qdrant-client", "qdrant_client", extra_args="--upgrade --quiet")
# install_if_missing("langchainhub")
# install_if_missing("langchain-huggingface", "langchain_huggingface", extra_args="-U")
# install_if_missing("langchain-cohere", "langchain_cohere", extra_args="-U")
# install_if_missing("wikipedia", extra_args="--upgrade --quiet")
# install_if_missing("arxiv", extra_args="--upgrade --quiet")
# install_if_missing("pymupdf", "fitz", extra_args="--upgrade --quiet")
# install_if_missing("xmltodict")
# install_if_missing("cohere")
# install_if_missing("loguru")

In [6]:
# %%capture
# !pip -q install loguru

# Hydrate Vector DB

In [7]:
import os
import bs4
import enum
from loguru import logger
from typing import List, Tuple, Any, Dict, Optional
from dataclasses import dataclass

from langchain_text_splitters import RecursiveCharacterTextSplitter
# from langchain_community.embeddings import HuggingFaceEmbeddings # deprecated
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Qdrant
from langchain_community.document_loaders import (
    PyMuPDFLoader,
    WikipediaLoader,
    WebBaseLoader,
)
from langchain_core.documents import Document


class SupportedEmbeddingModels(enum.Enum):
    MpNetBaseV2 = "all-mpnet-base-v2"
    MiniLmL6V2 = "all-MiniLM-L6-v2"
    DistilRobertaV1 = "all-distilroberta-v1"
    MultiQaMpNetBasedCosV1 = "multi-qa-mpnet-base-cos-v1"
    MultiQaMpNetBasedDotV1 = "multi-qa-mpnet-base-dot-v1"


class SourceType(enum.Enum):
    ARXIV = "arxiv"
    WIKIPEDIA = "wikipedia"
    WEBSITE = "website"


@dataclass
class DataSource:
    identifier: str
    source_type: SourceType

    # Optional metadata that can be added to all documents from this source
    additional_metadata: Optional[Dict[str, Any]] = None


def get_embedding_model(model: str) -> HuggingFaceEmbeddings:
    """
    Creates and returns a HuggingFaceEmbeddings model for text embeddings.

    Args:
        model (str): The name of the Hugging Face model to use for embeddings.
                    Should be one of the values in SupportedEmbeddingModels.

    Returns:
        HuggingFaceEmbeddings: The initialized embedding model.

    Raises:
        ValueError: If the model name is empty, invalid, or not supported.
        ImportError: If the required dependencies are not installed.
    """
    if not model or not isinstance(model, str):
        raise ValueError("Model name must be a non-empty string")

    # Check if model is supported
    supported_models = [m.value for m in SupportedEmbeddingModels]
    if model not in supported_models:
        raise ValueError(
            f"Unsupported model: {model}. Must be one of: {supported_models}"
        )

    try:
        return HuggingFaceEmbeddings(model_name=model)
    except ImportError as e:
        raise ImportError(f"Missing dependencies for HuggingFaceEmbeddings: {str(e)}")
    except Exception as e:
        raise ValueError(f"Failed to load embedding model '{model}': {str(e)}")


def initialize_vector_db(
    embedding_model_name: str = SupportedEmbeddingModels.MultiQaMpNetBasedCosV1,
    collection_name: str = "rag267",
    in_memory: bool = True,
    force_recreate: bool = True,
) -> Qdrant:
    """
    Initialize a Qdrant vector database with specified embedding model.

    Args:
        embedding_model_name: Name of the Hugging Face embedding model to use
        collection_name: Name for the Qdrant collection
        in_memory: Whether to use in-memory storage (True) or disk storage (False)
        force_recreate: Whether to recreate the collection if it exists

    Returns:
        Initialized Qdrant vectorstore object
    """
    # Initialize embeddings
    embeddings = get_embedding_model(embedding_model_name)

    # Location for Qdrant storage
    db_location = (
        ":memory:" if in_memory else "http://localhost:6333"
    )  # TODO make this more robust

    # Create an empty Qdrant vectorstore
    vectorstore = Qdrant.from_documents(
        documents=[
            Document(page_content="Initialization document", metadata={})
        ],  # Just to initialize
        embedding=embeddings,
        location=db_location,
        collection_name=collection_name,
        force_recreate=force_recreate,
    )

    return vectorstore


def load_arxiv_documents(
    arxiv_id: str, doc_id: int, additional_metadata: Optional[Dict[str, Any]] = None
) -> List[Document]:
    """
    Load documents from an ArXiv paper.

    Args:
        arxiv_id: ArXiv ID of the paper
        doc_id: Document ID to assign
        additional_metadata: Additional metadata to add to documents

    Returns:
        List of Document objects
    """
    # Construct URL using the arXiv unique identifier
    arx_url = f"https://arxiv.org/pdf/{arxiv_id}.pdf"

    try:
        # Extract pages from the document
        arx_loader = PyMuPDFLoader(arx_url)
        arx_pages = arx_loader.load()

        # Add metadata to each page
        for page_num in range(len(arx_pages)):
            page = arx_pages[page_num]
            page.metadata["page_num"] = page_num
            page.metadata["doc_num"] = doc_id
            page.metadata["doc_source"] = "ArXiv"
            page.metadata["source_id"] = arxiv_id

            # Add any additional metadata
            if additional_metadata:
                for key, value in additional_metadata.items():
                    page.metadata[key] = value

        return arx_pages
    except Exception as e:
        logger.exception(f"Error loading ArXiv document {arxiv_id}: {e}")
        return []


def load_wikipedia_documents(
    query: str,
    doc_id: int,
    max_docs: int = 4,
    additional_metadata: Optional[Dict[str, Any]] = None,
) -> List[Document]:
    """
    Load documents from Wikipedia.

    Args:
        query: Search query for Wikipedia
        doc_id: Document ID to assign
        max_docs: Maximum number of documents to load
        additional_metadata: Additional metadata to add to documents

    Returns:
        List of Document objects
    """
    try:
        wiki_docs = WikipediaLoader(query=query, load_max_docs=max_docs).load()

        # Add metadata to each document
        for doc in wiki_docs:
            doc.metadata["doc_num"] = doc_id
            doc.metadata["doc_source"] = "Wikipedia"
            doc.metadata["source_id"] = query

            # Add any additional metadata
            if additional_metadata:
                for key, value in additional_metadata.items():
                    doc.metadata[key] = value

        return wiki_docs
    except Exception as e:
        logger.exception(f"Error loading Wikipedia documents for query {query}: {e}")
        return []


def load_website_documents(
    url: str, doc_id: int, additional_metadata: Optional[Dict[str, Any]] = None
) -> List[Document]:
    """
    Load documents from a website.

    Args:
        url: URL of the website
        doc_id: Document ID to assign
        additional_metadata: Additional metadata to add to documents

    Returns:
        List of Document objects
    """
    try:
        # Set up the web loader with BeautifulSoup settings to extract relevant content
        web_loader = WebBaseLoader(
            web_paths=(url,),
            bs_kwargs=dict(
                parse_only=bs4.SoupStrainer(
                    class_=(
                        "post-content",
                        "post-title",
                        "post-header",
                        "article",
                        "content",
                        "main",
                    )
                )
            ),
        )

        web_documents = web_loader.load()

        # Add metadata to each document
        for doc in web_documents:
            doc.metadata["doc_num"] = doc_id
            doc.metadata["doc_source"] = "Website"
            doc.metadata["source_id"] = url

            # Add any additional metadata
            if additional_metadata:
                for key, value in additional_metadata.items():
                    doc.metadata[key] = value

        return web_documents
    except Exception as e:
        logger.exception(f"Error loading website document from {url}: {e}")
        return []


def split_and_add_documents(
    vectorstore: Qdrant,
    documents: List[Document],
    chunk_size: int = 128,
    chunk_overlap: int = 0,
) -> None:
    """
    Split documents into chunks and add them to the vectorstore.

    Args:
        vectorstore: Qdrant vectorstore object
        documents: List of Document objects
        chunk_size: Size of each chunk
        chunk_overlap: Overlap between chunks
    """
    if not documents:
        return

    # Create text splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )

    # Split documents
    splits = text_splitter.split_documents(documents)

    # Add split_id to metadata
    for idx, split in enumerate(splits):
        split.metadata["split_id"] = idx

    # Add to vectorstore
    vectorstore.add_documents(documents=splits)

    logger.info(f"Added {len(splits)} chunks to the vectorstore")


def hydrate_vector_db(
    vectorstore: Qdrant,
    data_sources: List[DataSource],
    chunk_size: int = 128,
    chunk_overlap: int = 0,
) -> None:
    """
    Hydrate the vector database with documents from various sources.

    Args:
        vectorstore: Qdrant vectorstore object
        data_sources: List of DataSource objects
        chunk_size: Size of each chunk
        chunk_overlap: Overlap between chunks
    """
    doc_id = 1

    for data_source in data_sources:
        logger.info(
            f"Processing source: {data_source.identifier} ({data_source.source_type.value})"
        )

        documents = []

        # Route to the appropriate loading function based on source type
        if data_source.source_type == SourceType.ARXIV:
            documents = load_arxiv_documents(
                data_source.identifier, doc_id, data_source.additional_metadata
            )
        elif data_source.source_type == SourceType.WIKIPEDIA:
            documents = load_wikipedia_documents(
                data_source.identifier,
                doc_id,
                additional_metadata=data_source.additional_metadata,
            )
        elif data_source.source_type == SourceType.WEBSITE:
            documents = load_website_documents(
                data_source.identifier, doc_id, data_source.additional_metadata
            )
        else:
            logger.warning(f"Unknown source type: {data_source.source_type}")
            continue

        # Split and add documents to vectorstore
        split_and_add_documents(vectorstore, documents, chunk_size, chunk_overlap)

        doc_id += 1

    logger.info(f"Finished hydrating vector database with {doc_id - 1} documents")


USER_AGENT environment variable not set, consider setting it to identify your requests.


In [8]:
class VectorDatabaseManager:
    def __init__(
        self,
        embedding_model_name: str = "multi-qa-mpnet-base-dot-v1",
        collection_name: str = "myrag",
        chunk_size: int = 128,
        chunk_overlap: int = 0,
        in_memory: bool = True,
        force_recreate: bool = True,
    ):
        self.embedding_model_name = embedding_model_name
        self.collection_name = collection_name
        self.chunk_size = chunk_size
        self.chunk_overlap = chunk_overlap
        self.in_memory = in_memory
        self.force_recreate = force_recreate
        
        # *** VECTORSTORE ***
        self.vectorstore = initialize_vector_db(
            embedding_model_name=embedding_model_name,
            collection_name=collection_name,
            in_memory=in_memory,
            force_recreate=force_recreate,
        )

    def hydrate(self, data_sources: List[DataSource]) -> None:
        hydrate_vector_db(
            self.vectorstore,
            data_sources,
            chunk_size=self.chunk_size,
            chunk_overlap=self.chunk_overlap,
        )

    def get_config(self) -> dict:
        return {
            'embedding_model': self.embedding_model_name,
            'collection_name': self.collection_name,
            'in_memory': self.in_memory,
            'force_recreate': self.force_recreate,
            'chunking_strategy': {
                'chunk_size': self.chunk_size,
                'chunk_overlap': self.chunk_overlap,
                'splitter_type': "RecursiveCharacterTextSplitter"
            }
        }

class VectorDatabaseRunner:
    def __init__(self):
        self.instances = {}

    def add_instance(self, name: str, manager: VectorDatabaseManager):
        self.instances[name] = manager

    def get_instance(self, name: str) -> VectorDatabaseManager:
        return self.instances.get(name)

    def list_instances(self) -> List[str]:
        return list(self.instances.keys())

# Example usage:
# vdb_runner = VectorDatabaseRunner()

# # Create and add multiple instances
# vdb_runner.add_instance(
#     "default",
#     VectorDatabaseManager(),
# )

# vdb_runner.add_instance(
#     "custom_chunk",
#     VectorDatabaseManager(chunk_size=256, chunk_overlap=50),
# )

# print(vdb_runner)


In [9]:
vdm = VectorDatabaseManager(
    embedding_model_name="multi-qa-mpnet-base-dot-v1",
    collection_name="myrag",
    chunk_size=128,
    chunk_overlap=0,
    in_memory=True,
    force_recreate=True,
)

In [10]:
vdm.get_config()

{'embedding_model': 'multi-qa-mpnet-base-dot-v1',
 'collection_name': 'myrag',
 'in_memory': True,
 'force_recreate': True,
 'chunking_strategy': {'chunk_size': 128,
  'chunk_overlap': 0,
  'splitter_type': 'RecursiveCharacterTextSplitter'}}

In [11]:
vdr = VectorDatabaseRunner()

In [12]:
vdr.add_instance('default', vdm)

vdr.list_instances()

['default']

In [13]:
# Define data sources
data_sources = [
    # ArXiv papers
    DataSource(identifier="2005.11401", source_type=SourceType.ARXIV),
    DataSource(identifier="2104.07567", source_type=SourceType.ARXIV),
    # Wikipedia articles
    DataSource(
        identifier="Generative Artificial Intelligence",
        source_type=SourceType.WIKIPEDIA,
    ),
    DataSource(
        identifier="Large Language Models",
        source_type=SourceType.WIKIPEDIA,
        additional_metadata={"category": "AI Models"},
    ),
    # Websites
    DataSource(
        identifier="https://lilianweng.github.io/posts/2023-06-23-agent/",
        source_type=SourceType.WEBSITE,
    ),
    DataSource(
        identifier="https://lilianweng.github.io/posts/2020-10-29-odqa/",
        source_type=SourceType.WEBSITE,
        additional_metadata={"author": "Lilian Weng"},
    ),
]

In [14]:
# Example usage

# Initialize vector database
# vectorstore = initialize_vector_db(
#     embedding_model_name="multi-qa-mpnet-base-dot-v1",
#     collection_name="myrag",
#     in_memory=True,
#     force_recreate=True,
# )

# Hydrate vector database
# hydrate_vector_db(
#     vectorstore=vectorstore,
#     data_sources=data_sources,
#     chunk_size=128,
#     chunk_overlap=0,
# )

vdm.hydrate(data_sources=data_sources)

[32m2025-03-23 15:33:23.157[0m | [1mINFO    [0m | [36m__main__[0m:[36mhydrate_vector_db[0m:[36m300[0m - [1mProcessing source: 2005.11401 (arxiv)[0m
[32m2025-03-23 15:33:25.412[0m | [1mINFO    [0m | [36m__main__[0m:[36msplit_and_add_documents[0m:[36m279[0m - [1mAdded 731 chunks to the vectorstore[0m
[32m2025-03-23 15:33:25.413[0m | [1mINFO    [0m | [36m__main__[0m:[36mhydrate_vector_db[0m:[36m300[0m - [1mProcessing source: 2104.07567 (arxiv)[0m
[32m2025-03-23 15:33:28.078[0m | [1mINFO    [0m | [36m__main__[0m:[36msplit_and_add_documents[0m:[36m279[0m - [1mAdded 838 chunks to the vectorstore[0m
[32m2025-03-23 15:33:28.079[0m | [1mINFO    [0m | [36m__main__[0m:[36mhydrate_vector_db[0m:[36m300[0m - [1mProcessing source: Generative Artificial Intelligence (wikipedia)[0m
[32m2025-03-23 15:33:29.596[0m | [1mINFO    [0m | [36m__main__[0m:[36msplit_and_add_documents[0m:[36m279[0m - [1mAdded 157 chunks to the vectorstore[0m


# RAG System Implementation

In [30]:
import os
from typing import Dict, List, Optional, Any, Callable
import enum
from dotenv import load_dotenv
from loguru import logger
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    pipeline,
    BitsAndBytesConfig,
)
from langchain_huggingface import HuggingFacePipeline
from langchain_cohere import ChatCohere
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.vectorstores import Qdrant


class SupportedGeneratorModels(enum.Enum):
    MistralInstructV2 = "mistralai/Mistral-7B-Instruct-v0.2"

class ModelType(enum.Enum):
    Mistral = "mistral"
    Cohere = "cohere"

class Team(enum.Enum):
    Engineering = "engineering"
    Marketing = "marketing"

class RAGSystem:
    def __init__(
        self,
        vector_db_manager: VectorDatabaseManager,
        engineering_template_path: str = "templates/engineering_template.txt",
        marketing_template_path: str = "templates/marketing_template.txt",
        cohere_api_key: Optional[str] = None,
        use_cohere: bool = True,
        use_mistral: bool = False,
        mistral_model_name: SupportedGeneratorModels = SupportedGeneratorModels.MistralInstructV2,
        top_k: int = 4,
    ):

        if use_cohere == True and use_mistral == True:
            raise ValueError("use_cohere and use_mistral cannot both be True. Choose one LLM.")

        # load_dotenv()

        self.config = {
            'llm': {},
            'top_k': top_k,
            'engineering_template_path': engineering_template_path,
            'marketing_template_path': marketing_template_path,
        }

        # self.hf_token = os.getenv('HF_TOKEN')

        self.vdm = vector_db_manager
        # self.vectorstore = vdm.vectorstore
        self.retriever = vdm.vectorstore.as_retriever(search_kwargs={"k": top_k})

        if use_mistral:
            self.llm = self._init_mistral(mistral_model_name)

        if use_cohere:
            if not cohere_api_key:
                raise ValueError("Cohere API key is required when use_cohere=True")
            self.llm = self._init_cohere(cohere_api_key)

        self.engineering_template = self._load_template(engineering_template_path)
        self.marketing_template = self._load_template(marketing_template_path)

        self.engineering_prompt = ChatPromptTemplate.from_template(
            self.engineering_template
        )
        self.marketing_prompt = ChatPromptTemplate.from_template(
            self.marketing_template
        )

        self.output_parser = StrOutputParser()

    def get_config(self) -> dict:
        summary = {
            **self.config,
            'vectorstore': self.vdm.get_config()
        }
        return summary

    def _load_template(self, template_path: str) -> str:
        try:
            with open(template_path, "r") as f:
                template = f.read()
            return template
        except FileNotFoundError as e:
            logger.exception(e)
            logger.warning(
                f"Template file {template_path} not found. Using default template."
            )
            return e

    def _init_mistral(self, model_name: SupportedGeneratorModels):
        logger.info(f"Initializing Mistral model: {model_name}")

        quant_cfg = BitsAndBytesConfig(load_in_4bit=True)

        model = AutoModelForCausalLM.from_pretrained(
            model_name,
            torch_dtype=torch.float32,
            quantization_config=quant_cfg,
        )

        tokenizer = AutoTokenizer.from_pretrained(model_name)

        pipe = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=1000,
            temperature=0.6,
            top_p=0.95,
            do_sample=True,
            repetition_penalty=1.2,
        )

        pipe.model.config.pad_token_id = pipe.model.config.eos_token_id

        self.config['llm'] = {
            'family': ModelType.Mistral,
            'model_name': model_name,
            'quantization': {'load_in_4bit': True},
            'tokenizer': tokenizer.name_or_path,
            'temperature': 0.6,
            'top_p': 0.95,
            'max_new_tokens': 1000,
            'repetition_penalty': 1.2,
        }

        return HuggingFacePipeline(pipeline=pipe)

    def _init_cohere(self, api_key: str):
        logger.info("Initializing Cohere model")
        model_name = "command-r"
        self.config['llm'] = {
            'family': ModelType.Cohere,
            'model_name': model_name,
            'api_key_provided': bool(api_key)
        }
        return ChatCohere(cohere_api_key=api_key, model=model_name)

    def format_docs(self, docs):
        """Format a list of documents into a string."""
        return "\n\n".join(
            f"Document {i + 1}:\n{doc.page_content}" for i, doc in enumerate(docs)
        )

    def query_vectorstore(self, query: str) -> List[Any]:
        """Get retrieved documents for a query."""
        return self.retriever.invoke(query)

    def get_retrieval_metadata(self, query: str) -> List[Dict[str, Any]]:
        """Get metadata about the retrieved documents."""
        docs = self.query_vectorstore(query)
        return [doc.metadata for doc in docs]

    def invoke(self, team: Team, query: str) -> str:
        """Generate an answer for the engineering team."""

        if not isinstance(team, Team):
            raise ValueError(f"Invalid team: {team}")

        if not self.llm:
            raise ValueError("LLM not initialized")

        prompt = self.engineering_prompt if team == Team.Engineering else self.marketing_prompt

        # Create chain
        chain = (
            {
                "context": self.retriever | self.format_docs,
                "question": RunnablePassthrough(),
            }
            | prompt
            | self.llm
            | self.output_parser
        )

        # Run chain
        return chain.invoke(query)

    def generate_responses(self, query: str) -> Dict[str, str]:
        """Generate responses for both engineering and marketing teams."""
        engineering_response = self.invoke(Team.Engineering, query)
        marketing_response = self.invoke(Team.Marketing, query)

        return {
            "engineering": engineering_response,
            "marketing": marketing_response
        }

    def get_document_sources(self, query: str) -> List[str]:
        docs = self.query_vectorstore(query)
        sources = []

        for doc in docs:
            source_info = ""
            if "doc_source" in doc.metadata:
                source_info += doc.metadata["doc_source"]
            if "source_id" in doc.metadata:
                source_info += f": {doc.metadata['source_id']}"
            sources.append(source_info)

        return sources


# Initialize and test the RAG System

In [16]:
# Uncomment for Google Colab:
# from google.colab import userdata
# cohere_api_key = userdata.get("COHERE_API_KEY")

from dotenv import load_dotenv
load_dotenv()
cohere_api_key = os.getenv('COHERE_API_KEY_PROD')

In [17]:
from pathlib import Path

engineering_template = Path.cwd() / "../templates/engineering_template.txt"
marketing_template = Path.cwd() / "../templates/marketing_template.txt"

In [18]:
# from huggingface_hub import login
# login()

In [31]:
rag_system_cohere = RAGSystem(
    vector_db_manager=vdm,
    engineering_template_path=engineering_template,
    marketing_template_path=marketing_template,
    cohere_api_key=cohere_api_key,
    use_mistral=False,
    use_cohere=True,
    mistral_model_name=SupportedGeneratorModels.MistralInstructV2.value,
    top_k=4,
)
rag_system_mistral = RAGSystem(
    vector_db_manager=vdm,
    engineering_template_path=engineering_template,
    marketing_template_path=marketing_template,
    cohere_api_key=cohere_api_key,
    use_mistral=True,
    use_cohere=False,
    mistral_model_name=SupportedGeneratorModels.MistralInstructV2.value,
    top_k=4,
)

[32m2025-03-23 15:45:24.792[0m | [1mINFO    [0m | [36m__main__[0m:[36m_init_cohere[0m:[36m142[0m - [1mInitializing Cohere model[0m
[32m2025-03-23 15:45:24.855[0m | [1mINFO    [0m | [36m__main__[0m:[36m_init_mistral[0m:[36m103[0m - [1mInitializing Mistral model: mistralai/Mistral-7B-Instruct-v0.2[0m
`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


In [32]:
rag_system = rag_system_mistral
# rag_system = rag_system_cohere

In [33]:
answer_research = rag_system.invoke(Team.Engineering, "What defines a large language model in the context of natural language processing tasks?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [36]:
answer_marketing = rag_system.invoke(Team.Marketing, "What defines a large language model in the context of natural language processing tasks?")

In [34]:
gold_answer_research = "A large language model in the context of natural language processing tasks is characterized by its ability to achieve general-purpose language generation and other NLP tasks through self-supervised and semi-supervised training on large datasets. These models typically utilize feedforward neural networks and transformers, surpassing earlier models like recurrent neural networks and word n-gram language models. They are often pre-trained on vast amounts of text data from the internet and can be fine-tuned for specific downstream tasks such as summarization, machine reading comprehension, and natural language to SQL translation."

gold_answer_marketing = "A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. LLMs acquire these abilities by learning statistical relationships from text documents during a computationally intensive self-supervised and semi-supervised training process."

In [35]:
print(f"Length of research answer: {len(answer_research)}")
print(f"Length of research gold answer: {len(gold_answer_research)}")

Length of research answer: 3969
Length of research gold answer: 633


In [40]:
print(f"Legnth of marketing answer: {len(answer_marketing)}")
print(f"Length of marketing gold answer: {len(gold_answer_marketing)}")

Legnth of marketing answer: 611
Length of marketing gold answer: 363


## Cosine similarity

In [None]:
# cosine similarity
from sklearn.metrics.pairwise import cosine_similarity

base_embeddings = HuggingFaceEmbeddings(model_name="multi-qa-mpnet-base-dot-v1")
answer_research_embedding = base_embeddings.embed_query(answer_research)
answer_marketing_embedding = base_embeddings.embed_query(answer_marketing)

gold_answer_research_embedding = base_embeddings.embed_query(gold_answer_research)
gold_answer_marketing_embedding = base_embeddings.embed_query(gold_answer_marketing)

print(f"Research answer cosine similarity: {cosine_similarity([answer_research_embedding], [gold_answer_research_embedding])}")
print(f"Marketing answer cosine similarity: {cosine_similarity([answer_marketing_embedding], [gold_answer_marketing_embedding])}")

Research answer cosine similarity: [[0.84549987]]
Marketing answer cosine similarity: [[0.89585698]]


## BLEU

In [None]:
# BLEU
from nltk.translate.bleu_score import sentence_bleu
from nltk.tokenize import word_tokenize

import nltk
nltk.download('punkt_tab')

def compute_bleu(reference: str, candidate: str):
    reference_tokens: list[str] = word_tokenize(reference)
    candidate_tokens: list[str] = word_tokenize(candidate)

    # Calculate BLEU score
    bleu_score = sentence_bleu(reference_tokens, candidate_tokens)

    # For example, using only unigrams and bigrams with equal weights
    # weights = (0.5, 0.5, 0, 0)
    # bleu_score = sentence_bleu(reference_tokens, candidate_tokens, weights=weights)

    return bleu_score

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [None]:
bleu_score_research = compute_bleu(gold_answer_research, answer_research)
bleu_score_marketing = compute_bleu(gold_answer_marketing, answer_marketing)

print(f"Research answer BLEU score: {bleu_score_research}")
print(f"Marketing answer BLEU score: {bleu_score_marketing}")

Research answer BLEU score: 6.190746313491463e-232
Marketing answer BLEU score: 8.551484609999234e-232


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


## A Full Pass With Basic Evaluations

In [49]:
import json

with open("../data/validation_question_answers.json", "r") as f:
    validation_question_answers = json.load(f)

In [52]:
import time
import numpy as np
from tqdm import tqdm
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from sentence_transformers import SentenceTransformer, util

# Load sentence embedding model for cosine similarity
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize ROUGE scorer
rouge = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

start_time = time.time()

# Store results with evaluation metrics
answers_with_metrics = []

for key, data in tqdm(validation_question_answers.items(), desc="Processing Questions"):
    question = data["question"]

    # Invoke RAG system with error handling and retry logic
    def robust_invoke(team, question, retries=3, delay=10):
        for attempt in range(retries):
            try:
                return rag_system.invoke(team, question)
            except Exception as e:
                print(f"Error invoking Cohere API: {e}, attempt {attempt + 1}/{retries}")
                if attempt < retries - 1:
                    time.sleep(delay)
                else:
                    return "[API Error: No response]"

    answer_research = robust_invoke(Team.Engineering, question)
    answer_marketing = robust_invoke(Team.Marketing, question)

    # Gold answers
    gold_answer_research = data["gold_answer_research"]
    gold_answer_marketing = data["gold_answer_marketing"]

    # Calculate BLEU
    smoothie = SmoothingFunction().method4
    bleu_research = sentence_bleu(
        [gold_answer_research.split()], answer_research.split(), smoothing_function=smoothie
    )
    bleu_marketing = sentence_bleu(
        [gold_answer_marketing.split()], answer_marketing.split(), smoothing_function=smoothie
    )

    # Calculate ROUGE
    rouge_research = rouge.score(gold_answer_research, answer_research)
    rouge_marketing = rouge.score(gold_answer_marketing, answer_marketing)

    # Cosine similarity of embeddings
    embed_gold_research = embedding_model.encode(gold_answer_research)
    embed_answer_research = embedding_model.encode(answer_research)
    cosine_research = util.cos_sim(embed_gold_research, embed_answer_research).item()

    embed_gold_marketing = embedding_model.encode(gold_answer_marketing)
    embed_answer_marketing = embedding_model.encode(answer_marketing)
    cosine_marketing = util.cos_sim(embed_gold_marketing, embed_answer_marketing).item()

    # Length difference
    len_diff_research = abs(len(answer_research.split()) - len(gold_answer_research.split()))
    len_diff_marketing = abs(len(answer_marketing.split()) - len(gold_answer_marketing.split()))

    answers_with_metrics.append({
        "question": question,
        "answer_research": answer_research,
        "answer_marketing": answer_marketing,
        "metrics_research": {
            "bleu": bleu_research,
            "rouge": rouge_research,
            "cosine_similarity": cosine_research,
            "length_difference": len_diff_research
        },
        "metrics_marketing": {
            "bleu": bleu_marketing,
            "rouge": rouge_marketing,
            "cosine_similarity": cosine_marketing,
            "length_difference": len_diff_marketing
        }
    })

end_time = time.time()

# Display total elapsed time
elapsed_time = end_time - start_time
print(f"Test questions answered and evaluated in {elapsed_time:.2f} seconds.")

Processing Questions:  47%|████▋     | 37/78 [04:15<04:48,  7.03s/it]

Error invoking Cohere API: status_code: 504, body: stream timeout, attempt 1/3


Processing Questions: 100%|██████████| 78/78 [14:21<00:00, 11.04s/it]   

Test questions answered and evaluated in 861.10 seconds.





In [36]:
rag_system.get_config()

{'llm': {'family': <ModelType.Mistral: 'mistral'>,
  'model_name': 'mistralai/Mistral-7B-Instruct-v0.2',
  'quantization': {'load_in_4bit': True},
  'tokenizer': 'mistralai/Mistral-7B-Instruct-v0.2',
  'temperature': 0.6,
  'top_p': 0.95,
  'max_new_tokens': 1000,
  'repetition_penalty': 1.2},
 'top_k': 4,
 'engineering_template_path': PosixPath('/home/ubuntu/w267-final-project-rag-pipeline/notebook/../templates/engineering_template.txt'),
 'marketing_template_path': PosixPath('/home/ubuntu/w267-final-project-rag-pipeline/notebook/../templates/marketing_template.txt'),
 'vectorstore': {'embedding_model': 'multi-qa-mpnet-base-dot-v1',
  'collection_name': 'myrag',
  'in_memory': True,
  'force_recreate': True,
  'chunking_strategy': {'chunk_size': 128,
   'chunk_overlap': 0,
   'splitter_type': 'RecursiveCharacterTextSplitter'}}}

# LLM-Driven Evaluations

In [16]:
import os

# These should already be loaded from the dotenv file
# If not, set them here:

# os.environ["LANGSMITH_TRACING"] = "true"
# os.environ["LANGSMITH_API_KEY"] = "YOUR LANGSMITH API KEY"
# os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY"

In [37]:
import json

with open("../data/validation_question_answers.json", "r") as f:
    validation_question_answers = json.load(f)
    
for i, sample in validation_question_answers.items():
    print(i, sample)

0 {'question': 'What defines a large language model in the context of natural language processing tasks?', 'gold_answer_research': 'A large language model in the context of natural language processing tasks is characterized by its ability to achieve general-purpose language generation and other NLP tasks through self-supervised and semi-supervised training on large datasets. These models typically utilize feedforward neural networks and transformers, surpassing earlier models like recurrent neural networks and word n-gram language models. They are often pre-trained on vast amounts of text data from the internet and can be fine-tuned for specific downstream tasks such as summarization, machine reading comprehension, and natural language to SQL translation.', 'gold_answer_marketing': 'A large language model (LLM) is a language model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. LLMs acquire these

In [38]:
from langsmith import Client

client = Client()

This code will only work the first time you run it. Subsequent runs will throw a 409, indicating that the datasets have already been created on LangSmith, in which case you can either (A) rip and replace them, or (B) load them from LangSmith

In [40]:
# First, transform the existing dataset into LangSmith compatible examples:
examples_engineering = []
examples_marketing = []

for sample in validation_question_answers.values():
    examples_engineering.append({
        "inputs": {"question": sample["question"]},
        "outputs": {"answer": sample["gold_answer_research"]}
    })

    examples_marketing.append({
        "inputs": {"question": sample["question"]},
        "outputs": {"answer": sample["gold_answer_marketing"]}
    })

# Create dataset for Engineering
engineering_dataset_name = "w267-rag-validation-engineering"
engineering_dataset = client.create_dataset(dataset_name=engineering_dataset_name)
client.create_examples(
    dataset_id=engineering_dataset.id,
    examples=examples_engineering
)

# Create dataset for Marketing
marketing_dataset_name = "w267-rag-validation-marketing"
marketing_dataset = client.create_dataset(dataset_name=marketing_dataset_name)
client.create_examples(
    dataset_id=marketing_dataset.id,
    examples=examples_marketing
)

LangSmithConflictError: Conflict for /datasets. HTTPError('409 Client Error: Conflict for url: https://api.smith.langchain.com/datasets', '{"detail":"Dataset with this name already exists."}')

Here's how we can gracefully load them from LangSmith if they are already created:

In [41]:
def get_or_create_dataset(client, dataset_name, examples):
    if client.has_dataset(dataset_name=dataset_name):
        print(f"Dataset '{dataset_name}' already exists, loading existing dataset.")
        dataset = client.read_dataset(dataset_name=dataset_name)
    else:
        print(f"Dataset '{dataset_name}' does not exist, creating it now.")
        dataset = client.create_dataset(dataset_name=dataset_name)
        client.create_examples(dataset_id=dataset.id, examples=examples)
    return dataset

# Engineering dataset
engineering_dataset_name = "w267-rag-validation-engineering"
engineering_dataset = get_or_create_dataset(client, engineering_dataset_name, examples_engineering)

# Marketing dataset
marketing_dataset_name = "w267-rag-validation-marketing"
marketing_dataset = get_or_create_dataset(client, marketing_dataset_name, examples_marketing)

Dataset 'w267-rag-validation-engineering' already exists, loading existing dataset.
Dataset 'w267-rag-validation-marketing' already exists, loading existing dataset.


In [42]:
from langchain_openai import ChatOpenAI

In [43]:
from typing_extensions import Annotated, TypedDict

# Grade output schema
class CorrectnessGrade(TypedDict):
    # Note that the order in the fields are defined is the order in which the model will generate them.
    # It is useful to put explanations before responses because it forces the model to think through
    # its final response before generating it:
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    correct: Annotated[bool, ..., "True if the answer is correct, False otherwise."]

# Grade prompt
correctness_instructions = """You are a teacher grading a quiz. 

You will be given a QUESTION, the GROUND TRUTH (correct) ANSWER, and the STUDENT ANSWER. 

Here is the grade criteria to follow:
(1) Grade the student answers based ONLY on their factual accuracy relative to the ground truth answer. 
(2) Ensure that the student answer does not contain any conflicting statements.
(3) It is OK if the student answer contains more information than the ground truth answer, as long as it is factually accurate relative to the  ground truth answer.

Correctness:
A correctness value of True means that the student's answer meets all of the criteria.
A correctness value of False means that the student's answer does not meet all of the criteria.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. 

Avoid simply stating the correct answer at the outset."""

# Grader LLM
grader_llm = ChatOpenAI(model="gpt-4o", temperature=0).with_structured_output(CorrectnessGrade, method="json_schema", strict=True)

def correctness(inputs: dict, outputs: dict, reference_outputs: dict) -> bool:
    """An evaluator for RAG answer accuracy"""
    answers = f"""\
QUESTION: {inputs['question']}
GROUND TRUTH ANSWER: {reference_outputs['answer']}
STUDENT ANSWER: {outputs['answer']}"""

    # Run evaluator
    grade = grader_llm.invoke([
        {"role": "system", "content": correctness_instructions}, 
        {"role": "user", "content": answers}
    ])
    return grade["correct"]

In [44]:
# Grade output schema
class RelevanceGrade(TypedDict):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    relevant: Annotated[bool, ..., "Provide the score on whether the answer addresses the question"]

# Grade prompt
relevance_instructions="""You are a teacher grading a quiz. 

You will be given a QUESTION and a STUDENT ANSWER. 

Here is the grade criteria to follow:
(1) Ensure the STUDENT ANSWER is concise and relevant to the QUESTION
(2) Ensure the STUDENT ANSWER helps to answer the QUESTION

Relevance:
A relevance value of True means that the student's answer meets all of the criteria.
A relevance value of False means that the student's answer does not meet all of the criteria.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. 

Avoid simply stating the correct answer at the outset."""

# Grader LLM
relevance_llm = ChatOpenAI(model="gpt-4o", temperature=0).with_structured_output(RelevanceGrade, method="json_schema", strict=True)

# Evaluator
def relevance(inputs: dict, outputs: dict) -> bool:
    """A simple evaluator for RAG answer helpfulness."""
    answer = f"QUESTION: {inputs['question']}\nSTUDENT ANSWER: {outputs['answer']}"
    grade = relevance_llm.invoke([
        {"role": "system", "content": relevance_instructions}, 
        {"role": "user", "content": answer}
    ])
    return grade["relevant"]

In [45]:
# Grade output schema
class GroundedGrade(TypedDict):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    grounded: Annotated[bool, ..., "Provide the score on if the answer hallucinates from the documents"]

# Grade prompt
grounded_instructions = """You are a teacher grading a quiz. 

You will be given FACTS and a STUDENT ANSWER. 

Here is the grade criteria to follow:
(1) Ensure the STUDENT ANSWER is grounded in the FACTS. 
(2) Ensure the STUDENT ANSWER does not contain "hallucinated" information outside the scope of the FACTS.

Grounded:
A grounded value of True means that the student's answer meets all of the criteria.
A grounded value of False means that the student's answer does not meet all of the criteria.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. 

Avoid simply stating the correct answer at the outset."""

# Grader LLM 
grounded_llm = ChatOpenAI(model="gpt-4o", temperature=0).with_structured_output(GroundedGrade, method="json_schema", strict=True)

# Evaluator
def groundedness(inputs: dict, outputs: dict) -> bool:
    """A simple evaluator for RAG answer groundedness."""
    doc_string = "\n\n".join(doc.page_content for doc in outputs["documents"])
    answer = f"FACTS: {doc_string}\nSTUDENT ANSWER: {outputs['answer']}"
    grade = grounded_llm.invoke([{"role": "system", "content": grounded_instructions}, {"role": "user", "content": answer}])
    return grade["grounded"]

In [46]:
# Grade output schema
class RetrievalRelevanceGrade(TypedDict):
    explanation: Annotated[str, ..., "Explain your reasoning for the score"]
    relevant: Annotated[bool, ..., "True if the retrieved documents are relevant to the question, False otherwise"]

# Grade prompt
retrieval_relevance_instructions = """You are a teacher grading a quiz. 

You will be given a QUESTION and a set of FACTS provided by the student. 

Here is the grade criteria to follow:
(1) You goal is to identify FACTS that are completely unrelated to the QUESTION
(2) If the facts contain ANY keywords or semantic meaning related to the question, consider them relevant
(3) It is OK if the facts have SOME information that is unrelated to the question as long as (2) is met

Relevance:
A relevance value of True means that the FACTS contain ANY keywords or semantic meaning related to the QUESTION and are therefore relevant.
A relevance value of False means that the FACTS are completely unrelated to the QUESTION.

Explain your reasoning in a step-by-step manner to ensure your reasoning and conclusion are correct. 

Avoid simply stating the correct answer at the outset."""

# Grader LLM
retrieval_relevance_llm = ChatOpenAI(model="gpt-4o", temperature=0).with_structured_output(RetrievalRelevanceGrade, method="json_schema", strict=True)

def retrieval_relevance(inputs: dict, outputs: dict) -> bool:
    """An evaluator for document relevance"""
    doc_string = "\n\n".join(doc.page_content for doc in outputs["documents"])
    answer = f"FACTS: {doc_string}\nQUESTION: {inputs['question']}"

    # Run evaluator
    grade = retrieval_relevance_llm.invoke([
        {"role": "system", "content": retrieval_relevance_instructions}, 
        {"role": "user", "content": answer}
    ])
    return grade["relevant"]

In [47]:
# These should have already been initialized way up above - but in case not here are the RAG initializations:

# rag_system_cohere = RAGSystem(
#     vector_db_manager=vdm,
#     engineering_template_path=engineering_template,
#     marketing_template_path=marketing_template,
#     cohere_api_key=cohere_api_key,
#     use_mistral=False,
#     use_cohere=True,
#     mistral_model_name=SupportedGeneratorModels.MistralInstructV2,
#     top_k=4,
# )
# rag_system_mistral = RAGSystem(
#     vector_db_manager=vdm,
#     engineering_template_path=engineering_template,
#     marketing_template_path=marketing_template,
#     cohere_api_key=cohere_api_key,
#     use_mistral=True,
#     use_cohere=False,
#     mistral_model_name=SupportedGeneratorModels.MistralInstructV2,
#     top_k=4,
# )

In [48]:
# rag_system = rag_system_cohere
rag_system = rag_system_mistral

In [49]:
def engineering_target(inputs: dict) -> dict:
    question = inputs["question"]
    answer = rag_system.invoke(Team.Engineering, question)
    retrieved_docs = rag_system.query_vectorstore(question)
    return {
        "answer": answer,
        "documents": retrieved_docs
    }

def marketing_target(inputs: dict) -> dict:
    question = inputs["question"]
    answer = rag_system.invoke(Team.Marketing, question)
    retrieved_docs = rag_system.query_vectorstore(question)
    return {
        "answer": answer,
        "documents": retrieved_docs
    }

In [50]:
# Engineering evaluation
engineering_experiment_results = client.evaluate(
    engineering_target,
    data=engineering_dataset_name,
    evaluators=[correctness, groundedness, relevance, retrieval_relevance],
    experiment_prefix="rag-mistral-eval-engineering",
    metadata=rag_system.get_config(),
)

# Marketing evaluation
marketing_experiment_results = client.evaluate(
    marketing_target,
    data=marketing_dataset_name,
    evaluators=[correctness, groundedness, relevance, retrieval_relevance],
    experiment_prefix="rag-mistral-eval-marketing",
    metadata=rag_system.get_config(),
)

View the evaluation results for experiment: 'rag-mistral-eval-engineering-c771cc65' at:
https://smith.langchain.com/o/b90a129f-4968-4c94-8e16-5eba475d05e8/datasets/b1cd9c51-0a5b-40f9-9bc9-d26f8ad660be/compare?selectedSessions=2199a4db-d1df-4a7f-8c96-6e014c27396e




0it [00:00, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting

View the evaluation results for experiment: 'rag-mistral-eval-marketing-d0bbca6c' at:
https://smith.langchain.com/o/b90a129f-4968-4c94-8e16-5eba475d05e8/datasets/fffe62a0-c961-4cfc-8600-436128b8dbbb/compare?selectedSessions=0d115463-1e3d-4663-a5af-c0ee202db797




0it [00:00, ?it/s]

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:2 for o

In [None]:
# Explore results locally as a dataframe if you have pandas installed
experiment_results.to_pandas()

In [97]:
engineering_dataset

Dataset(name='w267-rag-validation-engineering', description=None, data_type=<DataType.kv: 'kv'>, id=UUID('b1cd9c51-0a5b-40f9-9bc9-d26f8ad660be'), created_at=datetime.datetime(2025, 3, 23, 14, 44, 43, 573827, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2025, 3, 23, 14, 44, 43, 573827, tzinfo=datetime.timezone.utc), example_count=0, session_count=0, last_session_start_time=None, inputs_schema=None, outputs_schema=None, transformations=None)

In [None]:
client.list_examples()

AttributeError: 'generator' object has no attribute 'count'