In [49]:
import os, sys
import openai
import pinecone
import pandas as pd
from dotenv import dotenv_values
from llama_hub.confluence.base import ConfluenceReader
from langchain.chat_models import ChatOpenAI
from llama_index import LLMPredictor, ServiceContext, GPTVectorStoreIndex
from llama_index.storage.storage_context import StorageContext
from llama_index.vector_stores import PineconeVectorStore
from llama_index import set_global_service_context

config = dotenv_values(".env")

openai.organization = config["ORG_ID"]
openai.api_key = config["OPENAI_API_KEY"]

# set the size of the context window of the LLM. Typically this is set automatically with the model metadata. We can also explicitly override via this parameter for additional control
context_window = 4096
# set number of output tokens from the LLM. Typically this is set automatically with the model metadata. It doesn't limit the model output, but affects the amount of “space” we save for the output, when computing available context window size for packing text from retrieved Nodes
num_output = 512

#LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=num_output))

#constructs service_context
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, 
                                                context_window=context_window,
                                                num_output=num_output)

set_global_service_context(service_context)

# init pinecone
pinecone.init(api_key=config['PINECONE_API_KEY'], environment="asia-southeast1-gcp-free")

def data_ingestion_indexing():

    # Confluence base url and space key
    base_url = "https://navihq.atlassian.net/wiki/"
    space_key = "IN"
    

    # construct ConfluenceReader and load data from Confluence
    loader = ConfluenceReader(base_url=base_url)
    documents = loader.load_data(space_key=space_key, page_ids=[], include_attachments=False)
    # set pinecone index
    pinecone_index = pinecone.Index("confluence-index")
    # build the PineconeVectorStore and GPTVectorStoreIndex
    vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context)
    return index

index = data_ingestion_indexing()

RetryError: RetryError[<Future at 0x2803f04f0 state=finished raised RateLimitError>]

In [48]:
query_suffix = ".Use bullet points wherever applicable. Please always cite sources along with your answer."

def data_querying(input_text):
    result = index.as_query_engine(service_context=service_context).query(input_text + query_suffix)
    return result


print(data_querying("service logs are being throtled. However we are emitting similar log count as before. Is the log ingestion limit reduced?"))


It is possible that the log ingestion limit has been reduced if the service logs are being throttled despite emitting a similar log count as before. This is because the threshold for throttling is based on the log ingestion rate, which is determined by the Prometheus query mentioned in the context information. If the ingestion rate exceeds the threshold, logs will be dropped and the threshold may be reduced over time if the service continues to exceed it. It is important for teams to take appropriate action to control log ingestion when they receive alerts about throttling. 

Sources: 
- Context information: "Any log ingested over the throttling threshold will be dropped and not be available at any point in time. Teams already receive alerts when their service 's logs are throttled and appropriate action is required to control the ingestion."
- Context information: "If the ingestion rate fails to come down below the original throttling limit, the threshold will keep getting reduced eve