In [None]:
%pip install -qU langchain-google-genai langchain Chromadb sentence_transformers

In [None]:
%pip install --upgrade --quiet  langchain langchain-community langchainhub langchain-openai langchain-chroma bs4

In [None]:
import getpass
import os

# Keep your api key secret.
os.environ["GOOGLE_API_KEY"] = getpass.getpass()

In [None]:
from langchain_google_genai import GoogleGenerativeAI

# You can specify any other model instead of gemini.
llm = GoogleGenerativeAI(model="models/gemini-pro")

In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
# Load the content of the website and take it as documents.
loader = WebBaseLoader("https://medium.com/stackademic/mojo-90-000-times-faster-than-python-finally-open-sourced-777bdd9a1896")
docs = loader.load()

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# In this example, we'll use HuggingFaceEmbeddings to embedd our chunks.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=HuggingFaceEmbeddings())

In [None]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()

In [None]:
# Use the RAG prompt (you can change it and use another).
prompt = hub.pull("rlm/rag-prompt")

In [None]:
# Function to concat the content of the retrieved docs.
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
# Put all things together.
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
# Let's try our RAG.
rag_chain.invoke("What is the Mojo?")

In [None]:
# Clean our vectorstore after finishing.
vectorstore.delete_collection()