In [14]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import WebBaseLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.chat_models import init_chat_model
from tqdm import tqdm

In [15]:
import os
os.environ["USER_AGENT"] = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/120.0.0.0 Safari/537.36"
)

In [16]:
loader = WebBaseLoader("https://www.andrewng.org/about")
docs = loader.load()

In [17]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

In [18]:
chunks = text_splitter.split_documents(docs)
print("Number of chunks created: ", len(chunks))

Number of chunks created:  5


In [19]:
# Get embeddings model
embeddings_model = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001")

In [20]:
db = FAISS.from_documents(chunks, embeddings_model)
print('Created FAISS index')

Created FAISS index


In [21]:
retriever = db.as_retriever()

In [22]:
question = "What did Andrew Ng do in Baidu?"

In [23]:
results = retriever.invoke(question)
print("Number of documents retrieved: ", len(results))

Number of documents retrieved:  4


In [24]:
context = "\n".join([doc.page_content for doc in results])

In [25]:
prompt = f"""
Give me answer to my question based on the context.
{context}
Question: {question}
"""

In [26]:
llm = init_chat_model("gemini-2.5-flash", model_provider="google_genai")

result = llm.invoke(prompt)
print(result.content)

Based on the context, Andrew Ng was the Chief Scientist at Baidu, where he led the company's ~1300 person AI Group and was responsible for driving the company's global AI strategy and infrastructure.
