<a href="https://colab.research.google.com/github/navneet-g/google_collab_langchain_session/blob/main/LangChain_RAG_GoogleAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Install dependencies

In [None]:
!pip install --q -U langchain langchain_community \
google-generativeai  langchain-google-genai selenium unstructured \
langchain-text-splitters unstructured faiss-cpu langchain-google-vertexai

Import packages

In [None]:
from langchain_community.document_loaders import SeleniumURLLoader  # loading documents
from langchain.text_splitter import CharacterTextSplitter  # splitting text
from langchain_community.vectorstores import (
    FAISS,
)  # creating vector store from embeddings; can use chromadb instead as well
from langchain.chains import RetrievalQA  # creating qa system
from google.colab import userdata
from langchain_google_genai import GoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings


Initialize LLM

In [None]:
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')

# docs https://python.langchain.com/docs/integrations/llms/google_ai/
llm = GoogleGenerativeAI(model="models/text-bison-001", google_api_key=GOOGLE_API_KEY)
# print(
#     llm.invoke(
#         "What are some of the pros and cons of Python as a programming language?"
#     )
# )

Load custom data

In [None]:

# load url
urls = [
    "https://en.wikipedia.org/wiki/96th_Academy_Awards",
]

loader = SeleniumURLLoader(urls=urls)
data = loader.load()


# split document by character
print("Splitting document by character...")
text_splitter = CharacterTextSplitter(
    separator="\n", chunk_size=1000, chunk_overlap=200
)

# split into multiple documents
print("Splitting into multiple documents...")
docs = text_splitter.split_documents(data)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


Splitting document by character...
Splitting into multiple documents...


Create Vector Store

In [None]:
print("Creating vector store...")
# create vector store
db = FAISS.from_documents(docs, GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY))


Creating vector store...


In [None]:

# create retriever to ask questions using openai and vector store
print("Creating retriever...")
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 10}),
)

Creating retriever...


In [None]:
def ask_question(question):
    print("Asking question: " + question)
    print(qa.invoke(question))


In [None]:
ask_question("Who were the academy awards nominees?")
ask_question("List the names of all academy awards nominees")
ask_question("What date did the academy awards happen?")
ask_question("What date did the 96th academy awards happen?")

Asking question: Who were the academy awards nominees?
{'query': 'Who were the academy awards nominees?', 'result': 'The Academy Awards nominees are listed in the article.'}
Asking question: List the names of all academy awards nominees
{'query': 'List the names of all academy awards nominees', 'result': 'The following is a list of nominees for the 96th Academy Awards.'}
Asking question: What date did the academy awards happen?




{'query': 'What date did the academy awards happen?', 'result': 'March 10, 2024'}
Asking question: What date did the 96th academy awards happen?
{'query': 'What date did the 96th academy awards happen?', 'result': 'March 10, 2024'}
