In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
import requests
from bs4 import BeautifulSoup

In [4]:
COMPANY_URL="https://blog.google/technology/ai/google-gemini-ai/"
response=requests.get(COMPANY_URL)
soup=BeautifulSoup(response.text,"html.parser")

paragraph=soup.find_all('p')

In [5]:
scraped_data="\n\n".join([
    p.get_text(strip=True) for p in paragraph if
    len(p.get_text(strip=True))>30
])

In [6]:
print(scraped_data[:800])

Making AI more helpful for everyone

A note from Google and Alphabet CEO Sundar Pichai:

Every technology shift is an opportunity to advance scientific discovery, accelerate human progress, and improve lives. I believe the transition we are seeing right now with AI will be the most profound in our lifetimes, far bigger than the shift to mobile or to the web before it. AI has the potential to create opportunities — from the everyday to the extraordinary — for people everywhere. It will bring new waves of innovation and economic progress and drive knowledge, learning, creativity and productivity on a scale we haven’t seen before.

That’s what excites me: the chance to make AI helpful for everyone, everywhere in the world.

Nearly eight years into our journey as an AI-first company, the pace 


In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [8]:
text_spliter=RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=50)
docs=text_spliter.create_documents([scraped_data])

In [9]:
docs

[Document(metadata={}, page_content='Making AI more helpful for everyone\n\nA note from Google and Alphabet CEO Sundar Pichai:'),
 Document(metadata={}, page_content='Every technology shift is an opportunity to advance scientific discovery, accelerate human progress, and improve lives. I believe the transition we are seeing right now with AI will be the most profound in our lifetimes, far bigger than the shift to mobile or to the web before it. AI has the potential to create opportunities — from the everyday to the extraordinary — for people everywhere. It will bring new waves of innovation and economic progress and drive knowledge, learning, creativity and'),
 Document(metadata={}, page_content='and drive knowledge, learning, creativity and productivity on a scale we haven’t seen before.'),
 Document(metadata={}, page_content='That’s what excites me: the chance to make AI helpful for everyone, everywhere in the world.'),
 Document(metadata={}, page_content='Nearly eight years into our

In [10]:
len(docs)

48

In [11]:
for i,d in enumerate(docs[:3]):
    print(f"-----chunk{i+1}---\n{d.page_content}\n")

-----chunk1---
Making AI more helpful for everyone

A note from Google and Alphabet CEO Sundar Pichai:

-----chunk2---
Every technology shift is an opportunity to advance scientific discovery, accelerate human progress, and improve lives. I believe the transition we are seeing right now with AI will be the most profound in our lifetimes, far bigger than the shift to mobile or to the web before it. AI has the potential to create opportunities — from the everyday to the extraordinary — for people everywhere. It will bring new waves of innovation and economic progress and drive knowledge, learning, creativity and

-----chunk3---
and drive knowledge, learning, creativity and productivity on a scale we haven’t seen before.



In [12]:
openai_api_key=os.getenv("OPENAI_API_KEY")

In [13]:
##embeddings 
from langchain.embeddings import OpenAIEmbeddings

In [14]:
embeddings=OpenAIEmbeddings(openai_api_key=openai_api_key)
doc_embeddings=embeddings.embed_documents([d.page_content for d in docs])

  embeddings=OpenAIEmbeddings(openai_api_key=openai_api_key)


In [15]:
len(doc_embeddings)

48

In [16]:
print(doc_embeddings[0][:5])

[-0.006556369754933286, 0.0045434492120532315, 0.02226053339114034, -0.01818732896506731, 0.013403682816203584]


In [17]:
from langchain.vectorstores import Chroma

In [19]:
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embeddings,
    persist_directory="chroma_store"  # recreated here
)
vectordb.persist()


  vectordb.persist()


In [20]:
retriever = vectordb.as_retriever()

In [21]:
from langchain.prompts import PromptTemplate

In [22]:
prompt_template=PromptTemplate(
    input_variables=["context","question"],
    template="""
    you are a helpful assistant. use the following context to answer the question
    context:
    {context}

    question:
    {question}
    answer in a clear and detailed manner
    """
)

In [23]:
from langchain_openai import ChatOpenAI



In [24]:
llm=ChatOpenAI(
    temperature=0,
    model_name="gpt-4o",
    openai_api_key=openai_api_key
)

In [25]:
from langchain_core.output_parsers import StrOutputParser

In [26]:
# Reuse what you already have
parser = StrOutputParser()

In [27]:
# Simple chain using pipe operator
chain = prompt_template | llm | parser

In [28]:
# Step 5: Simple invoke example
question = "What is Gemini and who made it?"
docs = retriever.get_relevant_documents(question)
context = "\n\n".join([doc.page_content for doc in docs])

response = chain.invoke({
    "context": context,
    "question": question
})

print(" Question:", question)
print("\n Answer:\n", response)

  docs = retriever.get_relevant_documents(question)


 Question: What is Gemini and who made it?

 Answer:
 Gemini is a sophisticated foundation model developed by Google, designed to understand, explain, and generate high-quality code in popular programming languages such as Python, Java, C++, and Go. It is notable for its ability to work across multiple languages and reason about complex information, making it one of the leading models for coding in the world.

Gemini was created through large-scale collaborative efforts by teams across Google, including contributions from Google Research. It was built from the ground up to be a multimodal model, meaning it can generalize and seamlessly understand, operate across, and combine different types of information, including text, code, audio, image, and video.

The first version, Gemini 1.0, is being rolled out across a range of products and platforms, bringing its capabilities to billions of people through various Google products. Its training allows it to recognize and understand text, image