# Code

In [3]:
# Imports and setup model
import os
import openai
import json
import configobj
import tiktoken
from tqdm.auto import tqdm
import pinecone
from time import sleep

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Setup env vars and constants
config = configobj.ConfigObj('.env')
openai.api_key = config["OPENAI_API_KEY"]
pinecone_key = config["PINECONE_API_KEY"]
pinecone_env = config["PINECONE_ENVIRONMENT"]

embedding_model = "text-embedding-ada-002" 
api_model = "gpt-4"
dimensions = 1536
docs_dir = 'docs/'
typed_keys = ['arcs', 'geos', 'nodes', 'stats']

In [5]:
#Tokenize docs
tokenizer_name = tiktoken.encoding_for_model(api_model)
tokenizer = tiktoken.get_encoding(tokenizer_name.name)

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)


In [6]:
# load docs
texts = []
docs_by_name = {}

for doc in os.listdir(docs_dir):
    with open(docs_dir + doc, 'r') as f:
        text = f.read()
        texts.append(text)
        docs_by_name[doc[0:-3]] = text

print(texts[1])

### `maps`
This key group allows designers to specify information about the starting state of the map, what information is contained and how it is grouped in the legend, and what viewports can be easily jumped to by the user.

## Common keys
- [`allowModification`](../common_keys/common_keys.md#allowModification)
- [`data`](../common_keys/common_keys.md#data)
- [`icon`](../common_keys/common_keys.md#icon)
- [`name`](../common_keys/common_keys.md#name)
- [`order`](../common_keys/common_keys.md#order)
- [`sendToApi`](../common_keys/common_keys.md#sendToApi)
- [`sendToClient`](../common_keys/common_keys.md#sendToClient)

## Special and custom keys
Key | Default | Description
--- | ------- | -----------
<a name="defaultViewport">`customMapKey.defaultViewport`</a> | | A dictionary object containing geo properties that set the map's default field of view. Also used by the "home" viewport button in the app.
`customMapKey.defaultViewport.bearing` | `0` | The initial bearing (rotation) of the m

In [7]:
#intialize pinecone index
pinecone.init(api_key=pinecone_key, environment=pinecone_env)
pinecone.whoami()

index_name = 'gpt-4-embedded-docs'
print(pinecone.list_indexes())
# check if index already exists (it shouldn't if this is first time)
if index_name not in pinecone.list_indexes():
    # if does not exist, create index
    pinecone.create_index(
        index_name,
        dimension=dimensions,
        metric='cosine'
    )
    # wait for index to be initialized
    sleep(10)

# connect to index
index = pinecone.GRPCIndex(index_name)
# view index stats
index.describe_index_stats()


['gpt-4-embedded-docs']


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 12}},
 'total_vector_count': 12}

In [8]:
upload=False
if upload:
    # get embeddings
    for idx, doc in enumerate(tqdm(texts)):
    # create embeddings (try-except added to avoid RateLimitError)
        try:
                res = openai.Embedding.create(input=doc, engine=embedding_model)
        except:
            done = False
            while not done:
                sleep(5)
                try:
                    res = openai.Embedding.create(input=doc, engine=embedding_model)
                    done = True
                except:
                    pass

        embeds = [record['embedding'] for record in res['data']]
        to_upsert = list(zip([f'{idx}'], embeds))
        index.upsert(vectors=to_upsert)

In [9]:
# Example retrieval
query = "Deactivate the warehouse in the netherlands"

res = openai.Embedding.create(
    input=[query],
    engine=embedding_model
)

# retrieve from Pinecone
xq = res['data'][0]['embedding']

# get relevant contexts (including the questions)
output = index.query(xq, top_k=3, include_metadata=True)

In [10]:
print(texts[int(output['matches'][0]['id'])])

### `maps`
This key group allows designers to specify information about the starting state of the map, what information is contained and how it is grouped in the legend, and what viewports can be easily jumped to by the user.

## Common keys
- [`allowModification`](../common_keys/common_keys.md#allowModification)
- [`data`](../common_keys/common_keys.md#data)
- [`icon`](../common_keys/common_keys.md#icon)
- [`name`](../common_keys/common_keys.md#name)
- [`order`](../common_keys/common_keys.md#order)
- [`sendToApi`](../common_keys/common_keys.md#sendToApi)
- [`sendToClient`](../common_keys/common_keys.md#sendToClient)

## Special and custom keys
Key | Default | Description
--- | ------- | -----------
<a name="defaultViewport">`customMapKey.defaultViewport`</a> | | A dictionary object containing geo properties that set the map's default field of view. Also used by the "home" viewport button in the app.
`customMapKey.defaultViewport.bearing` | `0` | The initial bearing (rotation) of the m

In [11]:
state = ""
with open('state.json') as f:
    state = f.read()

In [34]:
topLevel = ""
with open(f'{docs_dir}topLevelInfo.md') as f:
    topLevel = f.read()

print(tiktoken_len(topLevel))

916


In [31]:
query = "Color districts by population"

# Run Query through gpt4

Note: Currently using gpt-4 as gpt-4-32k hasn't been publicly released yet and gpt-3.5-turbo-16k isn't great

In [32]:
completion = openai.ChatCompletion.create(
  model=api_model,
  temperature=0.1,
  messages=[
    {"role": "system", "content": "Using the given documentation delimited by triple quotes, respond to user requests with nothing but the name of the top-level key that must be edited in order to fufill the request surrounded by single quotes"},
    {"role": "user", "content": '"""' + topLevel + '""" \n\n' + 'request: ' + query},
  ]
)
current_key = completion.choices[0].message.content.replace("'", '')

print(current_key)

geos


In [36]:
encoded_data = json.dumps(json.loads(state)[current_key])
max_size = False
chunks = [None]
if tiktoken_len(encoded_data) > 7000:
    max_size = True
    key_dict = json.loads(state)[current_key]
    chunks[-1] = {i:key_dict[i] for i in key_dict if i!='data'}
    chunks[-1]['data'] = {}
else:
    chunks = [encoded_data]
print(tiktoken_len(json.dumps(chunks)))
if max_size:
    for key, value in key_dict['data'].items():
        if tiktoken_len(json.dumps(chunks[-1]) + json.dumps({key: value})) > 7000:
            chunks.append({i:key_dict[i] for i in key_dict if i!='data'})
            chunks[-1]['data'] = {key: value}
        else:
            chunks[-1]['data'][key] = value      
    for idx, chunk in enumerate(chunks):
        chunks[idx] = json.dumps(chunk)

for data in chunks:
    print(tiktoken_len(data))

print(len(chunks))

935
6999
6979
6986
6902
6872
6879
6884
6982
6991
6818
6993
6809
6806
6800
6795
6831
6984
6989
6987
6796
6816
6896
6908
6934
6969
6950
6931
6982
6945
6924
6962
6952
6923
6949
6942
6940
6953
6959
6938
6936
6934
6926
6952
6919
6950
6947
6921
6875
6868
6884
6881
6884
6909
6915
6955
6948
6997
6992
6975
6803
6953
6791
6982
6976
6986
4076
66


In [37]:
for data in chunks:
  completion = openai.ChatCompletion.create(
    model=api_model,
    temperature=0.1,
    messages=[
      {"role": "system", "content": "Using the given documentation delimited by triple quotes and current api state encoded in JSON, respond to user requests with the path(s) and value(s) you would modify in the api to achieve the users desired action in the format \n```{\"path\": path, \"value\": value}```. If the user request isn't possible with the given data respond with 'Not possible'"},
      {"role": "user", "content": '"""' + docs_by_name[current_key] + '""" \n\n' + data + '\n\nrequest: ' + query},
    ]
  )

  print(completion.choices[0].message.content)
  mutate_command = completion.choices[0].message.content
  if not 'Not possible' in mutate_command:
    print(mutate_command)
    break

{"path": "types.DHLAppDistrict.colorBy", "value": "population"}
{"path": "types.DHLAppDistrict.colorBy", "value": "population"}
