In [None]:
!python -m pip install -U pip
%pip install llama-index llama-index-llms-gemini llama-index-embeddings-gemini python-dotenv google-generativeai llama-index-vector-stores-pinecone pinecone-client

In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [2]:
import os
from dotenv import load_dotenv
from llama_index.core import Settings
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding

load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

llm = Gemini(model_name="models/gemini-1.5-flash-latest")
embed_model = GeminiEmbedding(model_name="models/embedding-001")

Settings.llm = llm
Settings.embed_model = embed_model

In [3]:
from pinecone import Pinecone, ServerlessSpec
from llama_index.vector_stores.pinecone import PineconeVectorStore

# pc = Pinecone(api_key=PINECONE_API_KEY)

# pc.create_index(
#     name="quickstart",
#     dimension=768, # Replace with your model dimensions
#     metric="cosine", # Replace with your model metric
#     spec=ServerlessSpec(
#         cloud="aws",
#         region="us-east-1"
#     ) 
# )

# pinecone_index = pc.Index(name="quickstart")
pinecone_vector_store = PineconeVectorStore(index_name="quickstart", api_key=PINECONE_API_KEY)

In [4]:
from llama_index.core import SimpleDirectoryReader

docs = SimpleDirectoryReader(input_files=["./data/tiger.pdf"]).load_data()

In [14]:
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store)
vector_engine = vector_index.as_query_engine()
vector_retriever = vector_index.as_retriever()

In [6]:
# vector_index.insert_nodes(nodes=docs)

Upserted vectors:   0%|          | 0/41 [00:00<?, ?it/s]

In [7]:
from llama_index.core import SummaryIndex

index = SummaryIndex.from_documents(documents=docs, verbose=True)
query_engine = index.as_query_engine()

In [9]:
response = query_engine.query("Tell me some fun facts about tigers")

In [10]:
print(response)

Tigers are powerful swimmers and can easily cross rivers as wide as 5 miles. They are also known to climb trees, though not as well as other cat species.  Tigers are normally silent but can produce numerous vocalisations. They roar to signal their presence to other individuals over long distances. This vocalisation can be heard 2 miles away. 



In [11]:
response = vector_engine.query("Tell me some fun facts about tigers")

In [12]:
print(response)

The provided text focuses on the conflict between tigers and humans, as well as the welfare of tigers in captivity. It does not contain any fun facts about tigers. 



In [15]:
vector_retriever.retrieve("Tell me some fun facts about tigers")

[NodeWithScore(node=TextNode(id_='b7d6c774-663f-4d89-be72-46d43095926a', embedding=[0.0285537075, -0.0437133349, -0.0434626974, -0.0860822052, 0.0482412204, 0.0388832688, -0.00872905087, 0.0188693125, 0.0279571339, 0.0438863933, 0.0290747173, -0.000873269397, 0.020822648, -0.00886980817, -0.0309516303, 0.0014678227, 0.0458419062, 0.0216991697, -0.0323811732, -0.00327994581, -0.012014077, 0.00881716609, -0.0133045651, -0.0404749699, 0.0183345824, -0.0489261784, 0.024788307, -0.0376167074, 0.00404116092, 0.013079064, -0.0475063846, -0.00836981926, -0.0141741559, 0.0134059899, 0.0156168798, -0.0339690484, -0.0113076605, 0.0689148605, 0.0369964354, 0.0791261941, -0.0128435744, -0.00882627629, -0.0302967373, 0.0123689463, -0.00294084754, -0.0127184363, 0.0145954341, 0.0280048698, 0.028141696, -0.017387975, -0.0169656798, -0.00526311854, 0.0693389624, 0.0225035, -0.032655891, -0.0820328593, -0.0176667515, -0.0442097224, -0.0816712305, 0.054982394, 0.00243820809, -0.0105892755, 0.00463881483,