In [1]:
import os
from langchain_community.document_loaders import WebBaseLoader
import bs4
import openai
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import chromadb
from langchain_community.vectorstores import Chroma
from langchain_experimental.text_splitter import SemanticChunker
from pprint import pprint

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# How to simplify warnings

import warnings

def simple_warning_format(message, category, filename, lineno, file=None, line=None):
    return f"{category.__name__}: {message}\n"

warnings.formatwarning = simple_warning_format
warnings.simplefilter("always", UserWarning)

In [3]:
openai.api_key = os.environ['OPENAI_API_KEY']

In [4]:
# Web loading and crawling

loader = WebBaseLoader(
web_paths=("https://kbourne.github.io/chapter1.html",),
bs_kwargs=dict(
    parse_only=bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))
),
)

docs = loader.load()

In [5]:
# Splitting

text_splitter = SemanticChunker(OpenAIEmbeddings())
splits = text_splitter.split_documents(docs)

In [None]:
print(splits[0])

In [7]:
# Embedding and indexing the chunks

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings())

retreiver = vectorstore.as_retriever()

In [None]:
query = "How does RAG comare with fine-tuning?"
relevant_docs = retreiver.get_relevant_documents(query)
print(relevant_docs)

In [10]:
# Prompt templates from the LangChain Hub

prompt = hub.pull("jclemens24/rag-prompt")

print("Promt Templates:")
print(prompt.messages[0].prompt.template)
print("\nInput Variables:", prompt.input_variables)
# print("Metadata:", prompt.metadata)



Promt Templates:
You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
Question: {question} 
Context: {context} 
Answer:

Input Variables: ['context', 'question']


In [11]:
# Formatting a function so that it matches the next step’s input

# The purpose of this function is to format the output of the retriever into the string format that it will 
# need to be in for the next step in the chain, after the retriever step. This function is necessary because
# the retriever step outputs a list of documents, but the next step in the chain expects a string.

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [12]:
# Defining your LLM

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

In [13]:
# Setting up a LangChain chain using LCEL

rag_chain = (
    {"context": retreiver | format_docs,
     "question": RunnablePassthrough()}
     | prompt
     | llm
     | StrOutputParser()
)

In [14]:
# Submitting a question for RAG

rag_chain.invoke("What are the advantages of using RAG?")

"The advantages of using Retrieval-Augmented Generation (RAG) include:\n\n1. **Improved Accuracy and Relevance**: RAG enhances the accuracy and relevance of responses generated by large language models (LLMs) by incorporating specific, real-time information from databases or datasets.\n\n2. **Customization and Flexibility**: RAG allows for tailored responses based on a company's specific needs by integrating internal databases, creating personalized experiences and outputs that meet unique business requirements.\n\n3. **Expanding Model Knowledge Beyond Training Data**: RAG enables models to access and utilize information that was not included in their initial training sets, effectively broadening the model's knowledge base without the need for retraining.\n\nThese advantages make RAG a powerful tool for organizations looking to leverage their internal data and improve the effectiveness of AI applications."