In [None]:
%pip install --upgrade pip

# Uninstall conflicting packages
%pip uninstall -y langchain-core langchain-openai langchain-experimental beautifulsoup4 langchain-community langchain chromadb beautifulsoup4

# Install compatible versions of langchain-core and langchain-openai
%pip install langchain-core==0.3.6
%pip install langchain-openai==0.2.1
%pip install langchain-experimental==0.3.2
%pip install langchain-community==0.3.1
%pip install langchain==0.3.1

# Install remaining packages
%pip install chromadb==0.5.11
%pip install beautifulsoup4==4.12.3

In [None]:
!pip install langchain-google-genai

In [None]:
import os
import bs4
import openai
import chromadb


from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma

from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

from langchain_experimental.text_splitter import SemanticChunker

from langchain_openai import ChatOpenAI #, OpenAIEmbeddings
from langchain_google_genai import GoogleGenerativeAIEmbeddings

from langchain import hub
from google.colab import userdata



In [None]:
os.environ['OPENAI_API_KEY']= userdata.get('OPENAI_API_KEY')
openai.api_key = userdata.get('OPENAI_API_KEY')
os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')
os.environ['USER_AGENT'] = 'RAGUserAgent'

In [None]:
gemini_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

### Load documents

In [None]:
path = 'http://kbourne.github.io/chapter1.html'
loader = WebBaseLoader(
    web_paths=(path,),
    bs_kwargs=dict
            (
                parse_only=bs4.SoupStrainer(
                    class_=('post-content','post-title','post-header')
                )
            )
    )


docs = loader.load()

### Split documents

In [None]:
text_splitter = SemanticChunker(gemini_embeddings)
splits = text_splitter.split_documents(docs)

In [None]:
vector_store = Chroma.from_documents(documents=splits, embedding=gemini_embeddings)
# The retriever is an object that provides a convenient interface for performing these similarity
# searches and retrieving the relevant documents from the vector database based on those searches.
retriever = vector_store.as_retriever()

In [None]:
prompt = hub.pull('jclemens24/rag-prompt')

In [29]:
prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'jclemens24', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '1a1f3ccb9a5a92363310e3b130843dfb2540239366ebe712ddd94982acc06734'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
llm = ChatOpenAI(model_name='gpt-4o-mini', temperature=0)

In [30]:
rag_chain = (
    {'context': retriever | format_docs,'question': RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [31]:
rag_chain.invoke('What are the advantages of RAG')

"The advantages of RAG (Retrieval Augmented Generation) include:\n\n1. **Improved Accuracy and Relevance**: RAG enhances the accuracy and relevance of responses generated by large language models (LLMs) by incorporating specific, real-time information from databases or datasets.\n\n2. **Customization and Flexibility**: RAG allows for tailored responses based on a company's specific needs by integrating internal databases, creating personalized experiences and detailed outputs.\n\n3. **Expanding Model Knowledge Beyond Training Data**: RAG enables models to access and utilize information not included in their initial training sets, effectively broadening the model's knowledge base without the need for retraining. \n\nThese advantages make RAG a powerful tool for leveraging internal data and improving the effectiveness of generative AI applications in various business contexts."