In [28]:
import os, sys
import openai
import pinecone
import pandas as pd
from dotenv import dotenv_values
from llama_hub.confluence.base import ConfluenceReader
from langchain.chat_models import ChatOpenAI
from llama_index import LLMPredictor, ServiceContext, GPTVectorStoreIndex
from llama_index.storage.storage_context import StorageContext
from llama_index.vector_stores import PineconeVectorStore
from llama_index import set_global_service_context

config = dotenv_values(".env")

openai.organization = config["ORG_ID"]
openai.api_key = config["OPENAI_API_KEY"]

# os.environ["CONFLUENCE_API_TOKEN"] = config["CONFLUENCE_API_TOKEN"]
# os.environ["CONFLUENCE_USERNAME"] = config["CONFLUENCE_USERNAME"]

# set the size of the context window of the LLM. Typically this is set automatically with the model metadata. We can also explicitly override via this parameter for additional control
context_window = 4096
# set number of output tokens from the LLM. Typically this is set automatically with the model metadata. It doesn't limit the model output, but affects the amount of “space” we save for the output, when computing available context window size for packing text from retrieved Nodes
num_output = 512

#LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=num_output))

#constructs service_context
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, 
                                                context_window=context_window,
                                                num_output=num_output)

set_global_service_context(service_context)

# init pinecone
pinecone.init(api_key=config['PINECONE_API_KEY'], environment="asia-southeast1-gcp-free")

def data_ingestion_indexing():

    # Confluence base url and space key
    base_url = "https://navihq.atlassian.net/wiki/"
    space_key = "IN"
    

    # construct ConfluenceReader and load data from Confluence
    loader = ConfluenceReader(base_url=base_url)
    documents = loader.load_data(space_key=space_key, page_ids=[], include_attachments=False)
    # set pinecone index
    pinecone_index = pinecone.Index("confluence-index")
    # build the PineconeVectorStore and GPTVectorStoreIndex
    vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)
    index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context)

    return index

index = data_ingestion_indexing()

Upserted vectors: 100%|██████████| 274/274 [00:03<00:00, 83.76it/s]


In [32]:

def data_querying(input_text):
    # res = openai.Embedding.create(
    #     input=[
    #         input_text
    #     ], engine="text-embedding-ada-002"
    # )
    # embeds = [record['embedding'] for record in res['data']]
    # result = index.as_query_engine(service_context=service_context).query(embeds[0])
    result = index.as_query_engine(service_context=service_context).query(input_text)
    return result

print(data_querying("I’m not able to see any role on consoleMe, I’ve checked with security team, roles have been assigned to me, but not visible on the portal\
This is urgent , please check"))


You may need to refresh the ConsoleMe portal a few times to see the roles that have been assigned to you. If the issue persists, you should reach out to the Infra/Security team for further assistance.
