In [1]:
import os
import json
import sys

## Read and set API keys

In [2]:
# Open and read the config file
with open('config.json', 'r') as config_file:
    config_data = json.load(config_file)

# Retrieve the API key from the config data
api_key = config_data['api_key']
os.environ['OPENAI_API_KEY'] = api_key

## Logging

In [3]:
# Set up logging
import logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO) #DEBUG, INFO, WARNING, ERROR, CRITICAL
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [4]:
import tiktoken
from llama_index.callbacks import CallbackManager, TokenCountingHandler
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext

# you can set a tokenizer directly, or optionally let it default
# to the same tokenizer that was used previously for token counting
# NOTE: The tokenizer should be a function that takes in text and returns a list of tokens
token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model("gpt-4").encode,
    verbose=True  # set to true to see usage printed to the console
    )
callback_manager = CallbackManager([token_counter])

INFO:numexpr.utils:Note: NumExpr detected 10 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 10 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


## Load the index

In [5]:
from llama_index import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
# load index
index = load_index_from_storage(storage_context)

DEBUG:llama_index.storage.kvstore.simple_kvstore:Loading llama_index.storage.kvstore.simple_kvstore from ./storage/docstore.json.
Loading llama_index.storage.kvstore.simple_kvstore from ./storage/docstore.json.
DEBUG:fsspec.local:open file: /Users/simonvirgo/PycharmProjects/Leia/storage/docstore.json
open file: /Users/simonvirgo/PycharmProjects/Leia/storage/docstore.json
DEBUG:llama_index.storage.kvstore.simple_kvstore:Loading llama_index.storage.kvstore.simple_kvstore from ./storage/index_store.json.
Loading llama_index.storage.kvstore.simple_kvstore from ./storage/index_store.json.
DEBUG:fsspec.local:open file: /Users/simonvirgo/PycharmProjects/Leia/storage/index_store.json
open file: /Users/simonvirgo/PycharmProjects/Leia/storage/index_store.json
DEBUG:llama_index.vector_stores.simple:Loading llama_index.vector_stores.simple from ./storage/vector_store.json.
Loading llama_index.vector_stores.simple from ./storage/vector_store.json.
DEBUG:fsspec.local:open file: /Users/simonvirgo/Pyc

## GPT 4

In [6]:
from llama_index.llms import OpenAI
from llama_index import ServiceContext

gpt4 = OpenAI(temperature=0, model="gpt-4")
service_context_gpt4 = ServiceContext.from_defaults(llm=gpt4,callback_manager=callback_manager)

### create Prompt Template

In [7]:
from llama_index import Prompt
# define custom Prompt
TEMPLATE_STR = (
    "You are Leia, the LiquidEarth Intelligent Assistant. You are helping a user with a question about LiquidEarth. You are very smart and friendly and always in a great mood.\n"
    "In LiquidEarth, a 'Space' and a 'Project are the same thing. We have provided Documentation on the software and further information below. In some cases the metadata includes a 'Control' Field that points to a UI element in the app associated to the described functionality. this is only for internal use. when describing controls to the user, use descriptions and names from the text, not the 'control' values. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Answer the question for a human to understand. Additionally, return the 'Control' properties in the order of operations in the following form at the end of your response: [[control1], [control2], ...]. Append the list to your response without further comment. If no controls are found, do not comment it. Never include any controls that are not specified in the Metadata Field in the provided documentation. Do not interpret any controls from the text body. If the answer requires multiple steps, describe each step in detail. Given this information, please answer the question: {query_str}\n"
)
QA_TEMPLATE = Prompt(TEMPLATE_STR)

In [8]:
query_engine = index.as_query_engine(service_context=service_context_gpt4, text_qa_template=QA_TEMPLATE, retriever_mode="embedding",callback_manager=callback_manager)

In [9]:
#dirty hack: trying to increase the context size
query_engine.retriever._similarity_top_k = 6

In [10]:
response = query_engine.query("hello,how can i create a project and add some data?")

DEBUG:openai:message='Request to OpenAI API' method=post path=https://api.openai.com/v1/embeddings
message='Request to OpenAI API' method=post path=https://api.openai.com/v1/embeddings
DEBUG:openai:api_version=None data='{"input": ["hello,how can i create a project and add some data?"], "model": "text-embedding-ada-002", "encoding_format": "base64"}' message='Post details'
api_version=None data='{"input": ["hello,how can i create a project and add some data?"], "model": "text-embedding-ada-002", "encoding_format": "base64"}' message='Post details'
DEBUG:urllib3.util.retry:Converted retries value: 2 -> Retry(total=2, connect=None, read=None, redirect=None, status=None)
Converted retries value: 2 -> Retry(total=2, connect=None, read=None, redirect=None, status=None)
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): api.openai.com:443
Starting new HTTPS connection (1): api.openai.com:443
DEBUG:urllib3.connectionpool:https://api.openai.com:443 "POST /v1/embeddings HTTP/1.1" 2

In [11]:
print(response)

Hello! Here's how you can create a project and add data in LiquidEarth:

**Creating a New Project:**

1. Open the server explorer on the left side of your screen.
2. Click the Plus button at the bottom of your server explorer list to create a new project.
3. (Optional) You can adapt basic information and project metadata such as the project name and owner by using the Edit Data option in the inspector. Once you've set all required fields correctly, click Save to accept the information.
4. Use the Load Project option in the inspector to load your project into your local workspace (3D Workspace). Your project will appear in your 3D workspace and will be listed in the Local Explorer. If you're working online, the project will also be automatically uploaded to the cloud or the remote server you're connected to.

**Adding Data to Your Project:**

1. Select the target space (your newly created project) and click on the Import Data button in the bottom left of the Local Explorer.
2. This will

## Chat with a prompt template (ToDo)

In [None]:
custom_prompt = Prompt("""\
Given a conversation (between Human and Assistant) and a follow up message from Human, \
rewrite the message to be a standalone question that captures all relevant context \
from the conversation.

<Chat History>
{chat_history}

<Follow Up Message>
{question}

<Standalone question>
""")

# list of (human_message, ai_message) tuples
custom_chat_history = [
    (
        'Hello assistant, we are having a insightful discussion about Paul Graham today.',
        'Okay, sounds good.'
    )
]

query_engine = index.as_query_engine()
chat_engine = CondenseQuestionChatEngine.from_defaults(
    query_engine=query_engine,
    condense_question_prompt=custom_prompt,
    chat_history=custom_chat_history,
    verbose=True
)

## print token usage

In [12]:
print('Embedding Tokens: ', token_counter.total_embedding_token_count, '\n',
      'LLM Prompt Tokens: ', token_counter.prompt_llm_token_count, '\n',
      'LLM Completion Tokens: ', token_counter.completion_llm_token_count, '\n',
      'Total LLM Token Count: ', token_counter.total_llm_token_count)

Embedding Tokens:  0 
 LLM Prompt Tokens:  1393 
 LLM Completion Tokens:  318 
 Total LLM Token Count:  1711
