In [234]:

!pip install langchain
!pip install pinecone
!pip install pinecone-client
!pip install google-generativeai
!pip install openai
!pip install tqdm
!pip install langchain pinecone-client google-generativeai openai tqdm

Collecting pinecone-plugin-inference<4.0.0,>=2.0.0 (from pinecone)
  Using cached pinecone_plugin_inference-3.1.0-py3-none-any.whl.metadata (2.2 kB)
Using cached pinecone_plugin_inference-3.1.0-py3-none-any.whl (87 kB)
Installing collected packages: pinecone-plugin-inference
  Attempting uninstall: pinecone-plugin-inference
    Found existing installation: pinecone-plugin-inference 1.1.0
    Uninstalling pinecone-plugin-inference-1.1.0:
      Successfully uninstalled pinecone-plugin-inference-1.1.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pinecone-client 5.0.1 requires pinecone-plugin-inference<2.0.0,>=1.0.3, but you have pinecone-plugin-inference 3.1.0 which is incompatible.[0m[31m
[0mSuccessfully installed pinecone-plugin-inference-3.1.0
Collecting pinecone-plugin-inference<2.0.0,>=1.0.3 (from pinecone-client)
  Using cached pinecone_plugin_inf

In [235]:
from google.colab import userdata
import os
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
os.environ['GOOGLE_API_KEY'] = GOOGLE_API_KEY
PINECONE_ENVIRONMENT = 'us-east-1'

In [236]:
!pip install python-dotenv




In [237]:

import os
from dotenv import load_dotenv

load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")


In [238]:
import os
from pinecone import Pinecone, ServerlessSpec


pc = Pinecone(
    api_key=PINECONE_API_KEY
)

# Check if the index exists; if not, create it
index_name = "rag-gemini"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",  # Choose the metric: cosine, euclidean, or dotproduct
        spec=ServerlessSpec(
            cloud="aws",
            region=PINECONE_ENVIRONMENT  # Use your environment's region
        )
    )

# # Connect to the index
index = pc.Index(name=index_name)
print(f"Successfully connected to index: {index_name}")

Successfully connected to index: rag-gemini


In [None]:
!pip install langchain-google-genai


In [240]:

from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY # Assuming PINECONE_API_KEY is already defined


embeddings = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001",  # Specify the desired embedding model
    api_key=GOOGLE_API_KEY
)


In [241]:
vector = embeddings.embed_query("hello, world!")
vector[:10]

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966,
 -0.0018945280462503433,
 0.028477225452661514,
 -0.007562300190329552,
 0.011064725928008556,
 -0.005353901535272598]

In [242]:
vectors = embeddings.embed_documents(
    [
        "Today is Monday",
        "Today is Tuesday",
        "Today is April Fools day",
    ]
)
len(vectors), len(vectors[0])

(3, 768)

In [243]:
!pip install pypdf
!pip install langchain-community




In [244]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load PDF document
loader = PyPDFLoader("/content/document.pdf")  # Replace with your PDF file path
documents = loader.load()

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)

# Optionally, print out the first chunk to check
print(docs[0].page_content)


Lecture 1: Motivation and AI History
Viliam Lis´ y & Branislav Boˇ sansk´ y
Artiﬁcial Intelligence Center
Department of Computer Science, Faculty of Electrical Eng.
Czech Technical University in Prague
viliam.lisy@fel.cvut.cz
February, 2021
Viliam Lis´ y & Branislav Boˇ sansk´ y Introduction to Artiﬁcial Intelligence 1 / 33


In [245]:

from tqdm.notebook import tqdm

# Create embeddings and upload to Pinecone
for doc in tqdm(docs):
    vector = embeddings.embed_query(doc.page_content)

    # Modify the upsert to use a dictionary for metadata
    index.upsert([{
        "id": doc.metadata["source"],  # Use "id" instead of the first tuple element
        "values": vector,  # Use "values" for the vector
        "metadata": {
            "text": doc.page_content,  # Add the text as part of metadata
            "source": doc.metadata["source"]  # Include the source
        }
    }])

  0%|          | 0/43 [00:00<?, ?it/s]

In [246]:
from langchain.vectorstores import Pinecone

# Use from_existing_index to load from an existing index
retriever = Pinecone.from_existing_index(index_name=index_name, embedding=embeddings, text_key="text")

In [247]:
!pip install langchain-google-genai



In [248]:
from langchain_google_genai import ChatGoogleGenerativeAI

gemini_model = ChatGoogleGenerativeAI(api_key=GOOGLE_API_KEY,model="gemini-2.0-flash-exp", temperature=0.7)

In [249]:

!pip install langchain-pinecone



In [250]:

from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA

# Create a vector store using the Pinecone index
vectorstore = PineconeVectorStore(
    index_name=index_name,
    embedding=embeddings
)

# Create the retriever
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})  # Retrieve top 4 most similar documents

# Create the QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=gemini_model,
    chain_type="stuff",
    retriever=retriever,  # Pass the retriever here
    return_source_documents=True  # Optional: to get the source documents used in the response
)

In [251]:
query = "Whos is villians?"
response = qa_chain.invoke(query)

# The response will include the generated answer and possibly the source documents used to generate the answer
print(response)


{'query': 'Whos is villians?', 'result': 'Based on the provided text, the names are Viliam Lis´ y and Branislav Boˇ sansk´ y. It seems like you might have misread the names. They are the authors of the course material.\n', 'source_documents': [Document(id='/content/document.pdf', metadata={'source': '/content/document.pdf'}, page_content='Course literature\nSlides are not study materials!\n1 Take notes.\n2 Artiﬁcial Intelligence: A Modern\nApproach (AIMA) by Stuart J. Russell\nand Peter Norvig (however, it is not free)\n3 Reinforcement Learning: An Introduction\nby Richard S. Sutton and Andrew G.\nBarto (PDF available online)\n4 Links on the courseware page\n5 Wikipedia\nViliam Lis´ y & Branislav Boˇ sansk´ y Introduction to Artiﬁcial Intelligence 33 / 33')]}


In [268]:
print("Welcome To Engineer's AI World")
print('='*100)

query = "What is AIMA?"
print('Human Message:',query)
response = qa_chain.invoke(query)

# Print the answer
print("Agent Message:", response['result'])

# Print the source documents
print("\n According To Given Information:")
for doc in response['source_documents']:
    print(f"- {doc.page_content[:100]}...")  # Print first 2000 characters of each source document

Welcome To Engineer's AI World
Human Message: What is AIMA?
Agent Message: AIMA is the book "Artificial Intelligence: A Modern Approach" by Stuart J. Russell and Peter Norvig.


 According To Given Information:
- Course literature
Slides are not study materials!
1 Take notes.
2 Artiﬁcial Intelligence: A Modern
A...
