In [2]:
import os
from dotenv import load_dotenv
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import WebBaseLoader
from langchain_google_genai import ChatGoogleGenerativeAI


Load gemini llm model and wikipedia document loader

In [7]:
load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash",api_key=GOOGLE_API_KEY)

url = "https://en.wikipedia.org/wiki/Artificial_intelligence"
loader = WebBaseLoader(url)
documents = loader.load()

Chunk the large wikipedia document

In [13]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 100
)
docs = text_splitter.split_documents(documents)

Use hugging face local embedding (gemini didnot provide embedding)

In [15]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

embedding_model = HuggingFaceBgeEmbeddings(
    model_name = 'sentence-transformers/all-MiniLM-L6-v2'
)

Store Embeddings in a vector store (FAISS)

In [21]:
from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(docs, embedding_model)

# Optional: Save for reuse later
vectorstore.save_local("faiss_index")

Search (Semantic Search)

In [24]:
query = "Who is considered the father of AI?"
results = vectorstore.similarity_search(query,k=3)

for r in results:
    print("------ MATCHED CHUNK ------")
    print(r.page_content)

------ MATCHED CHUNK ------
^ Colton, Emma (7 May 2023). "'Father of AI' says tech fears misplaced: 'You cannot stop it'". Fox News. Archived from the original on 26 May 2023. Retrieved 26 May 2023.

^ Jones, Hessie (23 May 2023). "Juergen Schmidhuber, Renowned 'Father Of Modern AI,' Says His Life's Work Won't Lead To Dystopia". Forbes. Archived from the original on 26 May 2023. Retrieved 26 May 2023.
------ MATCHED CHUNK ------
Halpern, Sue, "The Coming Tech Autocracy" (review of Verity Harding, AI Needs You: How We Can Change AI's Future and Save Our Own, Princeton University Press, 274 pp.; Gary Marcus, Taming Silicon Valley: How We Can Ensure That AI Works for Us, MIT Press, 235 pp.; Daniela Rus and Gregory Mone, The Mind's Mirror: Risk and Reward in the Age of AI, Norton, 280 pp.; Madhumita Murgia, Code Dependent: Living in the Shadow of AI, Henry Holt, 311 pp.), The New York Review of Books, vol. LXXI, no. 17 (7
------ MATCHED CHUNK ------
McCarthy, John (2007), "From Here to Hum