The following code is taken entirely from: https://www.youtube.com/watch?v=sVcwVQRHIc8 and our slides reflect the material taught in the video as well. This code snippet is used for demo purposes only.

In [1]:
!pip install -U langchain  langchain_community tiktoken langchain-openai langchainhub chromadb 



<center><b><h2>Part 1: Overview</h2></b></center>

In [2]:
import os

from dotenv import load_dotenv

load_dotenv()
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = os.environ.get('LANGCHAIN_API_KEY')
os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')
os.environ["USER_AGENT"] = "MyCustomUserAgent/1.0"

In [3]:
import bs4
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# Step 1: Load a Web Document
document_url = "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/"
user_agent = {"User-Agent": "Mozilla/5.0 (compatible; LangChainBot/1.0; +http://example.com/bot)"}

loader = WebBaseLoader(
    web_paths=(document_url,),
    bs_kwargs={"parse_only": bs4.SoupStrainer(class_="post-content")},
    requests_kwargs={"headers": user_agent}
)

documents = loader.load()

# Step 2: Split Text into Chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
chunks = splitter.split_documents(documents)

# Step 3: Embed and Store in Vector Database
embedding_model = OpenAIEmbeddings()
vector_db = Chroma.from_documents(documents=chunks, embedding=embedding_model)
retriever = vector_db.as_retriever()

# Step 4: Setup Retrieval-Augmented Generation Chain
prompt_template = hub.pull("rlm/rag-prompt")
language_model = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

def combine_documents(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_pipeline = (
    {"context": retriever | combine_documents, "question": RunnablePassthrough()}
    | prompt_template
    | language_model
    | StrOutputParser()
)

# Step 5: Ask a Question
question = "What is Prompt Engineering?"
response = rag_pipeline.invoke(question)

# Output the Response
print(response)


Prompt Engineering, also known as In-Context Prompting, involves methods to communicate with LLM to guide its behavior without updating model weights. It focuses on alignment and model steerability for autoregressive language models. Prompt engineering requires heavy experimentation and heuristics due to varying effects among models.


<center><b><h2>Part 2: Indexing</b></center>

In [4]:
# Documents
question = "What kinds of food do I like?"
document = "My favorite cuisine is Chinese food."

In [5]:
import tiktoken

def num_tokens_from_string(string: str, encoding_name: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(string))
    return num_tokens

num_tokens_from_string(question, "cl100k_base")

8

In [6]:
from langchain_openai import OpenAIEmbeddings
embd = OpenAIEmbeddings()
query_result = embd.embed_query(question)
document_result = embd.embed_query(document)
len(query_result)

1536

In [7]:
import numpy as np

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

similarity = cosine_similarity(query_result, document_result)
print("Cosine Similarity:", similarity)


Cosine Similarity: 0.8403835197922264


In [8]:
#### INDEXING ####

# Load blog
import bs4
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-03-15-prompt-engineering",),
    bs_kwargs={"parse_only": bs4.SoupStrainer(class_="post-content")},
    requests_kwargs={"headers": user_agent}
)
blog_docs = loader.load()

In [9]:
# Split
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

# Make splits
splits = text_splitter.split_documents(blog_docs)

In [10]:
# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

retriever

VectorStoreRetriever(tags=['Chroma', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x11ad90b30>, search_kwargs={})

<center><b><h2>Part 3: Retrieval</h2></b></center>

In [11]:
# Index
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits, 
                                    embedding=OpenAIEmbeddings())


retriever = vectorstore.as_retriever(search_kwargs={"k": 1})

In [12]:
docs = retriever.get_relevant_documents("What is Prompt Engineering?")
len(docs)

  docs = retriever.get_relevant_documents("What is Prompt Engineering?")


1

<center><b><h2>Part 4: Generation</h2></b></center>

In [13]:
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate

# Prompt
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='Answer the question based only on the following context:\n{context}\n\nQuestion: {question}\n'), additional_kwargs={})])

In [14]:
# LLM
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

# Chain
chain = prompt | llm

# Run
chain.invoke({"context":docs,"question":"What is Prompt Engineering?"})

AIMessage(content='Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 36, 'prompt_tokens': 177, 'total_tokens': 213, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'id': 'chatcmpl-BPPvkJMHbKnzz4RPSpKTuFqrFGWXo', 'finish_reason': 'stop', 'logprobs': None}, id='run-70ab34c5-7a45-4633-9d21-67d817262e16-0', usage_metadata={'input_tokens': 177, 'output_tokens': 36, 'total_tokens': 213, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [15]:
from langchain import hub
prompt_hub_rag = hub.pull("rlm/rag-prompt")

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain.invoke("What is Prompt Engineering?")

'Prompt Engineering, also known as In-Context Prompting, refers to methods for how to communicate with LLM to steer its behavior for desired outcomes without updating the model weights.'