In [1]:
import os
from pinecone import Pinecone
from llama_index.llms import Gemini
from llama_index.vector_stores import PineconeVectorStore
from llama_index.storage.storage_context import StorageContext
from llama_index.embeddings import GeminiEmbedding
from llama_index import ServiceContext, VectorStoreIndex, download_loader, set_global_service_context
from settings import GOOGLE_API_KEY, PINECONE_API_KEY

import pandas as pd  # For working with DataFrames


os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
index_name = "adam"
DATA_URL = "https://www.gettingstarted.ai/how-to-use-gemini-pro-api-llamaindex-pinecone-index-to-build-rag-app/"

# set llm as Gemini Pro
llm = Gemini()

# create pinecone client
pinecone_client = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

# list pinecone indexes
for index in pinecone_client.list_indexes():
    print(index['name'])

# select pinecone index 'posts'
pinecone_index = pinecone_client.Index(index_name)
# index_description = pinecone_client.describe_index("posts")

# load page using llamaindex
BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")

loader = BeautifulSoupWebReader()
documents = loader.load_data(urls=[DATA_URL])

# grab embeddings from gemini embeddings model
gemini_embed_model = GeminiEmbedding(model_name="models/embedding-001")
service_context = ServiceContext.from_defaults(llm=llm, embed_model=gemini_embed_model)

set_global_service_context(service_context)

# store embeddings in pinecone index
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

index = VectorStoreIndex.from_vector_store(vector_store=vector_store)

storage_context = StorageContext.from_defaults(
    vector_store=vector_store
)

# # create Pinecone index programmatically 
# if input("re") == "y":
#     index = VectorStoreIndex.from_documents(
#         documents, 
#         storage_context=storage_context
#     )

# query pinecone index for similar embeddings
query_engine = index.as_query_engine()
gemini_response = query_engine.query("""What is the summary of the context provided
                                     """)

# print response
import textwrap
print(textwrap.fill(str(gemini_response), width=100))

  from tqdm.autonotebook import tqdm


ImportError: cannot import name 'Gemini' from 'llama_index.llms' (unknown location)

In [5]:
from llama_index.llms import Openai

ImportError: cannot import name 'Openai' from 'llama_index.llms' (unknown location)

In [6]:
pip install --upgrade llama_index

Collecting llama_index
  Downloading llama_index-0.10.4-py3-none-any.whl (5.6 kB)
Collecting llama-index-agent-openai<0.2.0,>=0.1.0 (from llama_index)
  Downloading llama_index_agent_openai-0.1.1-py3-none-any.whl (12 kB)
Collecting llama-index-core<0.11.0,>=0.10.0 (from llama_index)
  Downloading llama_index_core-0.10.3-py3-none-any.whl (630 kB)
                                              0.0/630.7 kB ? eta -:--:--
     ------------------------------------- 630.7/630.7 kB 20.0 MB/s eta 0:00:00
Collecting llama-index-embeddings-openai<0.2.0,>=0.1.0 (from llama_index)
  Downloading llama_index_embeddings_openai-0.1.1-py3-none-any.whl (6.1 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama_index)
  Downloading llama_index_legacy-0.9.48-py3-none-any.whl (2.0 MB)
                                              0.0/2.0 MB ? eta -:--:--
     -----------------------------            1.5/2.0 MB 31.2 MB/s eta 0:00:01
     ---------------------------------------- 2.0/2.0 MB 31.3 MB/s e


[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.


ERROR: llama-index-integrations/llms/llama-index-llms-ollama is not a valid editable requirement. It should either be a path to a local project or a VCS URL (beginning with bzr+http, bzr+https, bzr+ssh, bzr+sftp, bzr+ftp, bzr+lp, bzr+file, git+http, git+https, git+ssh, git+git, git+file, hg+file, hg+http, hg+https, hg+ssh, hg+static-http, svn+ssh, svn+http, svn+https, svn+svn, svn+file).

[notice] A new release of pip is available: 23.1.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
# from llama_index.core import SimpleDirectoryReader
from llama_index.core import download_loader

from llama_index.readers.database import DatabaseReader
from llama_index.core import Document

from llama_index.core import VectorStoreIndex
from llama_index.core import Settings

class RAG():
    def __init__(self, index_name):
        self.index_name = index_name
        self.pinecone_client = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
        self.pinecone_index = self.pinecone_client.Index(self.index_name)
        self.vector_store = PineconeVectorStore(pinecone_index=self.pinecone_index)
        self.index = VectorStoreIndex.from_vector_store(vector_store=self.vector_store)
        # self.query_engine = self.index.as_query_engine()
        # self.gemini_response = self.query_engine.query("""What is the summary of the context providedd
        #                              """)

    def get_response(self):
        return self.gemini_response
    
    # Data ingestion methods
    # def ingest_directory(self, 
    #                      directory_path="./database/adam_RAG"):
    #     documents = SimpleDirectoryReader(directory_path).load_data()
    #     return documents 
    
    def ingest_database(self,
                        query = "SELECT * FROM hadiths"):
        
            reader = DatabaseReader(
                scheme=os.getenv("DB_SCHEME"),
                host=os.getenv("DB_HOST"),
                port=os.getenv("DB_PORT"),
                user=os.getenv("DB_USER"),
                password=os.getenv("DB_PASS"),
                dbname=os.getenv("DB_NAME"),
            )

            query = "SELECT * FROM users"
            documents = reader.load_data(query=query)
            return documents
    
    def ingest_web(self, url):
        BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
        loader = BeautifulSoupWebReader()
        documents = loader.load_data(urls=[url])
        return documents
    
    def ingest_text(self, text):
        return Document(text=text)
    

    def high_level_transformation(self, 
                                  documents, 
                                  prompt="hey i feel like taking a trip that follows the exact footsteps of Moses,starting from egypt. so give me a itenerary for a 14 day trip. "):
        vector_index = VectorStoreIndex.from_documents(documents)
        vector_index.as_query_engine()

        response = query_engine.query(prompt)
        print(response)
        # text_splitter = SentenceSplitter(chunk_size=512, chunk_overlap=10)
        # Settings.text_splitter = text_splitter

        # # per-index
        # index = VectorStoreIndex.from_documents(
        #     documents, transformations=[text_splitter]
        # )
    

ImportError: cannot import name 'download_loader' from 'llama_index.core' (unknown location)

In [10]:
# query pinecone index for similar embeddings
query_engine = index.as_query_engine()
gemini_response = query_engine.query("""According to the context provided,
                                    What happened after Heraclius read the letter?
                                     """)

# print response
import textwrap
print(textwrap.fill(str(gemini_response), width=100))

There was a great hue and cry in the Royal Court.


In [7]:
# how to view the index itself
# i want to see how the embeddings look like
chat_engine = index.as_chat_engine()
gemini_response = chat_engine.query("""
                                    According to the context provided,
                                    How did Heraclius feel about the Prophet?
                                    """)

# print response
import textwrap
print(textwrap.fill(str(gemini_response), width=100))

Sorry, I cannot answer your query.


In [41]:
pinecone_index

<pinecone.data.index.Index at 0x148f83db850>

In [4]:
1_298_000/603

2152.57048092869

In [3]:
2_399_999/3132

766.2832056194126