# LangSmith and Evaluation Overview with AI Makerspace


# Task 1: Dependencies and OpenAI API Key


In [6]:
!pip install langchain_core langchain_openai langchain_community langchain-qdrant qdrant-client langsmith openai tiktoken cohere lxml -qU


In [2]:
import os
import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key:")

# Task 2: Basic RAG Chain


## OpenAI Model


In [3]:
from langchain_openai.chat_models import ChatOpenAI

base_llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    tags=["base_llm"]
)

## Asyncio Bug Handling


In [4]:
import nest_asyncio
nest_asyncio.apply()

## SiteMap Loader


In [5]:
from langchain.document_loaders import SitemapLoader

documents = SitemapLoader(web_path="https://blog.langchain.dev/sitemap-posts.xml").load()

Fetching pages: 100%|##########| 225/225 [00:32<00:00,  7.01it/s]


In [6]:
documents[0].metadata["source"]


'https://blog.langchain.dev/customers-wordsmith/'

## RecursiveCharacterTextSplitter


In [7]:

from langchain.text_splitter import RecursiveCharacterTextSplitter

split_documents = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size = 500,
    chunk_overlap = 20
).split_documents(documents)

In [8]:
len(split_documents)


1568

## Embeddings

In [9]:
from langchain_openai.embeddings import OpenAIEmbeddings

base_embeddings_model = OpenAIEmbeddings(model="text-embedding-3-small")

## Qdrant VectorStore Retriever


In [10]:
from langchain_qdrant import Qdrant

vectorstore = Qdrant.from_documents(
    split_documents,
    base_embeddings_model,
    location=":memory:",
    collection_name="langchainblogs")

In [11]:
base_retriever = vectorstore.as_retriever()


## Prompt Template


In [12]:
from langchain.prompts import ChatPromptTemplate

base_rag_prompt_template = """\
Using the provided context, please answer the user's question. If you don't know the answer based on the context, say you don't know.

Context:
{context}

Question:
{question}
"""

base_rag_prompt = ChatPromptTemplate.from_template(base_rag_prompt_template)

## LCEL Chain


In [13]:
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain.schema import StrOutputParser

base_rag_chain = (
    # INVOKE CHAIN WITH: {"question" : "<>"}
    # "question" : populated by getting the value of the "question" key
    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
    {"context": itemgetter("question") | base_retriever, "question": itemgetter("question")}
    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
    #              by getting the value of the "context" key from the previous step
    | RunnablePassthrough.assign(context=itemgetter("context"))
    # "response" : the "context" and "question" values are used to format our prompt object and then piped
    #              into the LLM and stored in a key called "response"
    # "context"  : populated by getting the value of the "context" key from the previous step
    | {"response": base_rag_prompt | base_llm | StrOutputParser(), "context": itemgetter("context")}
)

In [14]:
base_rag_chain.invoke({"question" : "What is a good way to evaluate agents?"})["response"]


"A good way to evaluate agents is by testing their capabilities in various tasks such as planning, task decomposition, function calling, and the ability to override pre-trained biases when needed. Creating test environments specifically designed to measure an agent's effectiveness in using tools to accomplish tasks can help in evaluating their performance and agentic behavior."

# Task 3: Setting Up LangSmith


In [15]:
from uuid import uuid4

unique_id = uuid4().hex[0:8]

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = f"LangSmith - {unique_id}"

## LangSmith API


In [16]:
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass('Enter your LangSmith API key: ')


In [17]:
base_rag_chain.invoke({"question" : "What is LangSmith?"}, {"tags" : ["Demo Run"]})['response']


'LangSmith is a unified platform for debugging, testing, evaluating, and monitoring LLM (large language model) applications. It provides tools for improving collaboration, organization, and iteration speed for users working with LLM applications.'

# Task 4: Examining the Trace in LangSmith!


## 🏗️ Activity #1:
Include a screenshot of your trace and explain what it means.
![alt text](./week-5-day-2.png)


# Task 5: Create Testing Dataset
