#  RAG 10k Query

## Basic Setup

In [None]:
## Check if GPU is enabled
import os
import torch

## To disable GPU and experiment, uncomment the following line
## Normally, you would want to use GPU, if one is available.
# os.environ["CUDA_VISIBLE_DEVICES"]=""

print ("using CUDA/GPU: ", torch.cuda.is_available())

for i in range(torch.cuda.device_count()):
   print("device ", i , torch.cuda.get_device_properties(i).name)

In [None]:
## Setup logging.  To see more loging set the level to DEBUG

import sys
import logging

# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## Step-1: Load Settings

In [1]:
import os,sys
## Load Settings from .env file
from dotenv import find_dotenv, dotenv_values

# _ = load_dotenv(find_dotenv()) # read local .env file
config = dotenv_values(find_dotenv())

# debug
# print (config)

ATLAS_URI = config.get('ATLAS_URI')

if not ATLAS_URI:
    raise Exception ("'ATLAS_URI' is not set.  Please set it above to continue...")

## Only need this if we are using OpenAI for Embeddings
OPENAI_API_KEY = config.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise Exception ("'OPENAI_API_KEY' is not set.  Please set it above to continue...")

In [2]:
DB_NAME = 'rag1'
COLLECTION_NAME = '10k'
INDEX_NAME = 'idx_embedding'

In [3]:
import os
## LlamaIndex will download embeddings models as needed.
## Set llamaindex cache dir to ./cache dir here (Default is system tmp)
## This way, we can easily see downloaded artifacts
os.environ['LLAMA_INDEX_CACHE_DIR'] = os.path.join(os.path.abspath(''), 'cache')

In [4]:
from pymongo import MongoClient

mongodb_client = MongoClient(ATLAS_URI)

print ("Atlas client initialized")

Atlas client initialized


## Setup Embeddings

The default embedding is OpenAI.  We can always plugin custom embeddings

### OpenAI Embeddings

This is using OpenAI embedding model
You will need an API key (defined in env variable : OPENAI_API_KEY)

In [5]:
# from llama_index import  OpenAIEmbedding
# embed_model = OpenAIEmbedding()

### Using Custom Embeddings

Remember this embedding model must be the same as in `populate` step. 

In [6]:
from llama_index.embeddings import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from llama_index import  ServiceContext
from llama_index.llms import OpenAI

# The LLM used to generate natural language responses to queries.
# If not provided, defaults to gpt-3.5-turbo from OpenAI
# If your OpenAI key is not set, defaults to llama2-chat-13B from Llama.cpp

## Here are the models available : https://platform.openai.com/docs/models
##  gpt-3.5-turbo  |   gpt-4  | gpt-4-turbo

#  | model         | context window |
#  |---------------|----------------|
#  | gpt-3.5-turbo | 4,096          |
#  | gpt-4         | 8,192          |
#  | gpt-4-turbo   | 128,000        |


## set temperature=0 for predictable results

llm = OpenAI(model="gpt-3.5-turbo", temperature=0)
## setup embed model

service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)

## Connect Illama-Index and MongoDB Atlas

Let's define MongoDB Atlas as our vector storage. This is critical to stored indexed data and then query

In [8]:
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.storage.storage_context import StorageContext
from llama_index.indices.vector_store.base import VectorStoreIndex


vector_store = MongoDBAtlasVectorSearch(mongodb_client = mongodb_client,
                                 db_name = DB_NAME, collection_name = COLLECTION_NAME,
                                 index_name  = 'idx_embedding',
                                 ## the following columns are set to default values
                                 # embedding_key = 'embedding', text_key = 'text', metadata_= 'metadata',
                                 )
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(vector_store=vector_store, service_context=service_context)

## Query Data / Ask Questions

Now that we have every thing setup, let's ask some questions

In [9]:
from IPython.display import Markdown

response = index.as_query_engine().query("What was Uber's revenue?")
print (response)
# display(Markdown(f"<b>{response}</b>"))

Uber's revenue for the years ended December 31, 2019, 2020, and 2021 was $13,000 million, $11,139 million, and $17,455 million, respectively.


In [10]:
response = index.as_query_engine().query("How much money Lyft made in 2020?")
print (response)

Lyft made $2,364,681 in revenue in 2020.


In [11]:
## The answer to this question doesn't exist in the Lyft_10k filing!
## Let's see what we get back
response = index.as_query_engine().query("How much money Lyft made in 2018?")
print (response)

I'm sorry, but I cannot provide the answer to your query.


In [12]:
response = index.as_query_engine().query("When did Uber do IPO?")
print (response)

Uber did its initial public offering (IPO) on May 14, 2019.
