True

In [None]:
import os
from dotenv import load_dotenv
load_dotenv(".env")
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "ls_..."  # Your API key

from langchain_benchmarks import clone_public_dataset, registry

registry = registry.filter(Type="RetrievalTask")
registry
langchain_docs = registry["LangChain Docs Q&A"]
langchain_docs
docs = list(langchain_docs.get_docs())
len(docs)

In [None]:
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.vectorstores.chroma import Chroma

# embeddings = HuggingFaceEmbeddings(
#     model_name="thenlper/gte-base",
#     # model_kwargs={"device": 0},  # Comment out to use CPU
# )

# vectorstore = Chroma(
#     collection_name="lcbm-b-huggingface-gte-base",
#     embedding_function=embeddings,
#     persist_directory="./chromadb",
# )

# vectorstore.add_documents(docs)
# retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores.chroma import Chroma

embeddings = HuggingFaceEmbeddings(
    model_name="thenlper/gte-base",
    # model_kwargs={"device": 0},  # Comment out to use CPU
)

vectorstore = Chroma(
    collection_name="lcbm-b-huggingface-gte-base",
    embedding_function=embeddings,
    persist_directory="./chromadb",
)

# vectorstore.add_documents(docs)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

In [7]:
retriever.invoke("How to load an existed Chroma DB store?")

 Document(page_content='Chroma | 🦜️🔗 Langchain\n\n[Skip to main content](#docusaurus_skipToContent_fallback)# Chroma\n\n[Chroma](https://docs.trychroma.com/getting-started) is a database for building AI applications with embeddings.\n\n## Installation and Setup\u200b\n\n```bash\npip install chromadb\n```\n\n## VectorStore\u200b\n\nThere exists a wrapper around Chroma vector databases, allowing you to use it as a vectorstore,\nwhether for semantic search or example selection.\n\n```python\nfrom langchain.vectorstores import Chroma\n```\n\nFor a more detailed walkthrough of the Chroma wrapper, see [this notebook](/docs/integrations/vectorstores/chroma)\n\n## Retriever\u200b\n\nSee a [usage example](/docs/modules/data_connection/retrievers/how_to/self_query/chroma_self_query).\n\n```python\nfrom langchain.retrievers import SelfQueryRetriever\n```\n\n- [Installation and Setup](#installation-and-setup)\n\n- [VectorStore](#vectorstore)\n\n- [Retriever](#retriever)', metadata={'changefreq': '

In [11]:
from operator import itemgetter
from typing import Sequence

from langchain.chat_models import ChatAnthropic, ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.document import Document
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable.passthrough import RunnableAssign


# After the retriever fetches documents, this
# function formats them in a string to present for the LLM
def format_docs(docs: Sequence[Document]) -> str:
    formatted_docs = []
    for i, doc in enumerate(docs):
        doc_string = (
            f"<document index='{i}'>\n"
            f"<source>{doc.metadata.get('source')}</source>\n"
            f"<doc_content>{doc.page_content}</doc_content>\n"
            "</document>"
        )
        formatted_docs.append(doc_string)
    formatted_str = "\n".join(formatted_docs)
    return f"<documents>\n{formatted_str}\n</documents>"


prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You will be provided with documents to expand your knowledge about the topic:"
            "\n{context}\n"
            "Carefully respond to the user query.",
        ),
        ("human", "{question}"),
    ]
)
llm = ChatOpenAI(temperature=0.6, model="gpt-3.5-turbo-1106")

response_generator = (prompt | llm | StrOutputParser()).with_config(
    run_name="GenerateResponse",
)

# This is the final response chain.
# It fetches the "question" key from the input dict,
# passes it to the retriever, then formats as a string.

chain = (
    RunnableAssign(
        {
            "context": (itemgetter("question") | retriever | format_docs).with_config(
                run_name="FormatDocs"
            )
        }
    )
    # The "RunnableAssign" above returns a dict with keys
    # question (from the original input) and
    # context: the string-formatted docs.
    # This is passed to the response_generator above
    | response_generator
)

In [15]:
Chroma(client=2)

AttributeError: 'int' object has no attribute 'get_or_create_collection'

In [19]:
q = '''What is RunnableAssign?
'''
r = chain.invoke({"question": q})
print(r)

The `RunnableAssign` class is not mentioned in the provided documents. It's possible that it may be a custom or specific class within a particular codebase or framework that is not covered in the provided documentation.

If you have specific details or context about the `RunnableAssign` class, please provide more information so that I can assist you further. Alternatively, if it is a specific class within a framework or library, you may want to refer to the official documentation or resources related to that framework for more detailed information.


In [20]:
q = '''What is Runnable Assign?
'''
retriever.invoke(q)

[Document(page_content="langchain.schema.runnable.base.RunnableBinding — 🦜🔗 LangChain 0.0.337\n\nAPI\n\nExperimental\n\nPython Docs\n\nToggle Menu\n\nPrevUp\nNext\n\nLangChain 0.0.337\n\nlangchain.schema.runnable.base.RunnableBinding\n\nlangchain.schema.runnable.base.RunnableBinding¶\n\nclass langchain.schema.runnable.base.RunnableBinding[source]¶\nBases: RunnableBindingBase[Input, Output]\nA runnable that delegates calls to another runnable with a set of kwargs.\nCreate a new model by parsing and validating input data from keyword arguments.\nRaises ValidationError if the input data cannot be parsed to form a valid model.\n\nparam bound: langchain.schema.runnable.base.Runnable[langchain.schema.runnable.utils.Input, langchain.schema.runnable.utils.Output] [Required]¶\n\nparam config: langchain.schema.runnable.config.RunnableConfig [Optional]¶\n\nparam config_factories: List[Callable[[langchain.schema.runnable.config.RunnableConfig], langchain.schema.runnable.config.RunnableConfig]] [Op

In [38]:
from langsmith.client import Client
from langchain_benchmarks.rag import get_eval_config

In [56]:
client = Client()
RAG_EVALUATION = get_eval_config()

In [57]:
import uuid

# Generate a unique run ID for this experiment
run_uid = uuid.uuid4().hex[:6]

test_run = client.run_on_dataset(
    dataset_name=langchain_docs.name,
    llm_or_chain_factory=chain,
    evaluation=RAG_EVALUATION,
    project_name=f"claude-2 qa-chain simple-index {run_uid}",
    project_metadata={
        "index_method": "basic",
    },
    verbose=True,
)

LangSmithAuthError: Authentication failed for https://api.smith.langchain.com/datasets. HTTPError('401 Client Error: Unauthorized for url: https://api.smith.langchain.com/datasets?limit=1&name=LangChain+Docs+Q%26A', '{"detail":"Invalid API key"}')