# Code

In [7]:
# Imports and setup model
import os
import openai
import json
import configobj
import numpy
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import tiktoken
from typing_extensions import Concatenate
from uuid import uuid4
from tqdm.auto import tqdm
import pinecone
from time import sleep
from IPython.display import Markdown

In [55]:
# Setup env vars and constants
config = configobj.ConfigObj('.env')
openai.api_key = config["OPENAI_API_KEY"]
pinecone_key = config["PINECONE_API_KEY"]
pinecone_env = config["PINECONE_ENVIRONMENT"]

embedding_model = "text-embedding-ada-002" 
api_model = "gpt-4"
dimensions = 1536
docs_dir = 'docs/'

In [9]:
# load docs
texts = []
docs_by_name = {}

for doc in os.listdir(docs_dir):
    with open(docs_dir + doc, 'r') as f:
        text = f.read()
        texts.append(text)
        docs_by_name[doc[0:-3]] = text

print(texts[1])

### `maps`
This key group allows designers to specify information about the starting state of the map, what information is contained and how it is grouped in the legend, and what viewports can be easily jumped to by the user.

## Common keys
- [`allowModification`](../common_keys/common_keys.md#allowModification)
- [`data`](../common_keys/common_keys.md#data)
- [`icon`](../common_keys/common_keys.md#icon)
- [`name`](../common_keys/common_keys.md#name)
- [`order`](../common_keys/common_keys.md#order)
- [`sendToApi`](../common_keys/common_keys.md#sendToApi)
- [`sendToClient`](../common_keys/common_keys.md#sendToClient)

## Special and custom keys
Key | Default | Description
--- | ------- | -----------
<a name="defaultViewport">`customMapKey.defaultViewport`</a> | | A dictionary object containing geo properties that set the map's default field of view. Also used by the "home" viewport button in the app.
`customMapKey.defaultViewport.bearing` | `0` | The initial bearing (rotation) of the m

In [57]:
#Tokenize docs
tokenizer_name = tiktoken.encoding_for_model(api_model)
tokenizer = tiktoken.get_encoding(tokenizer_name.name)

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

print([tiktoken_len(doc) for doc in texts])

[151, 1603, 771, 963, 977, 875, 2260, 663, 1417, 745, 890, 820]


In [11]:
texts[5]

'# `categories`\nBoth designers and users often need to work with different levels of data aggregation. the `categories` top level key allows for easy and arbitrary aggregation/filtering of data in the UI. They are a core aspect for how chart groupings, chart subgroupgings and general filtering works.\n\n## Common keys\n- [`allowModification`](../common_keys/common_keys.md#allowModification)\n- [`data`](../common_keys/common_keys.md#data)\n- [`name`](../common_keys/common_keys.md#name)\n- [`order`](../common_keys/common_keys.md#order)\n- [`sendToApi`](../common_keys/common_keys.md#sendToApi)\n- [`sendToClient`](../common_keys/common_keys.md#sendToClient)\n\n## Special and custom keys\nKey | Default | Description\n--- | ------- | -----------\n<a name="customCategory">`customCategory*`</a> | Required | A custom key for categorical data. Each `customCategory*` key encloses a well-defined structure. This represents a higher level structure for filtering and aggregation purposes. A simple e

In [12]:
#intialize pinecone index
pinecone.init(api_key=pinecone_key, environment=pinecone_env)
pinecone.whoami()

index_name = 'gpt-4-embedded-docs'
print(pinecone.list_indexes())
# check if index already exists (it shouldn't if this is first time)
if index_name not in pinecone.list_indexes():
    # if does not exist, create index
    pinecone.create_index(
        index_name,
        dimension=dimensions,
        metric='cosine'
    )
    # wait for index to be initialized
    sleep(10)

# connect to index
index = pinecone.GRPCIndex(index_name)
# view index stats
index.describe_index_stats()


['gpt-4-embedded-docs']


{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 12}},
 'total_vector_count': 12}

In [13]:
upload=False
if upload:
    # get embeddings
    for idx, doc in enumerate(tqdm(texts)):
    # create embeddings (try-except added to avoid RateLimitError)
        try:
                res = openai.Embedding.create(input=doc, engine=embedding_model)
        except:
            done = False
            while not done:
                sleep(5)
                try:
                    res = openai.Embedding.create(input=doc, engine=embedding_model)
                    done = True
                except:
                    pass

        embeds = [record['embedding'] for record in res['data']]
        to_upsert = list(zip([f'{idx}'], embeds))
        index.upsert(vectors=to_upsert)

In [14]:
# Example retrieval
query = "Set node type A to size by numeric prop example b"

res = openai.Embedding.create(
    input=[query],
    engine=embedding_model
)

# retrieve from Pinecone
xq = res['data'][0]['embedding']

# get relevant contexts (including the questions)
output = index.query(xq, top_k=3, include_metadata=True)

In [15]:
print(texts[int(output['matches'][0]['id'])])

# `nodes`
The `nodes` group contains data that is typically used to visualize single geographic locations in the "**Map**" view.

## Common keys
- [`allowModification`](../common_keys/common_keys.md#allowModification)
- [`category`](../common_keys/common_keys.md#category)
- [`colorByOptions`](../common_keys/common_keys.md#colorByOptions)
- [`column`](../common_keys/common_keys.md#column)
- [`data`](../common_keys/common_keys.md#data)
- [`enabled`](../common_keys/common_keys.md#enabled)
- [`endGradientColor`](../common_keys/common_keys.md#endGradientColor)
- [`endSize`](../common_keys/common_keys.md#endSize)
- [`help`](../common_keys/props.md#help)
- [`icon`](../common_keys/common_keys.md#icon)
- [`name`](../common_keys/common_keys.md#name)
- [`numberFormat`](../common_keys/common_keys.md#number-format)
- [`prop > type`](../common_keys/props.md#prop-type)
- [`props`](../common_keys/common_keys.md#props-short)
- [`sendToApi`](../common_keys/common_keys.md#sendToApi)
- [`sendToClient`](..

In [16]:
state = ""
with open('state.json') as f:
    state = f.read()

print(state)

{"arcs":{"types":{"T1":{"name":"Flow Type 1","colorByOptions":{"selectorPropForColor":{"a":"rgb(128,255,255)","b":"rgb(0,153,51)","c":"rgb(0,0,128)","d":"rgb(204,0,0)","e":"rgb(153,77,0)","f":"rgb(255,25,255)"},"numericPropExampleA":{"min":0,"max":50,"startGradientColor":{"dark":"rgb(233, 0, 0)","light":"rgb(52, 52, 236)"},"endGradientColor":{"dark":"rgb(96, 2, 2)","light":"rgb(23, 23, 126)"}},"numericPropExampleB":{"min":0,"max":40,"startGradientColor":{"dark":"rgb(233, 0, 0)","light":"rgb(52, 52, 236)"},"endGradientColor":{"dark":"rgb(96, 2, 2)","light":"rgb(23, 23, 126)"}}},"lineBy":"solid","sizeByOptions":{"numericPropExampleA":{"min":0,"max":50},"numericPropExampleB":{"min":0,"max":40}},"startSize":"15px","endSize":"30px","props":{"numericPropExampleA":{"name":"Numeric Prop Example A","type":"num","enabled":true,"help":"Help for numeric prop example A","numberFormat":{"unit":"A units"},"legendOverride":{"useScientificFormat":false,"minLabel":"small"}},"numericPropExampleB":{"name"

# Run Query through gpt4

Note: Currently using gpt-4 as gpt-4-32k hasn't been publicly released yet and gpt-3.5-turbo-16k isn't great

In [35]:
# Note: Consider if its worth vectorizing this... or anything tbh
topLevel = ""
with open(f'{docs_dir}topLevelInfo.md') as f:
    topLevel = f.read()

print(tiktoken_len(topLevel))

900


In [53]:
query = "Hide node type a on the map"

completion = openai.ChatCompletion.create(
  model=api_model,
  temperature=0.1,
  messages=[
    {"role": "system", "content": "Using the given documentation delimited by triple quotes, respond to user requests with nothing but the name of the top-level key that must be edited in order to fufill the request surrounded by single quotes"},
    {"role": "user", "content": '"""' + topLevel + '""" \n\n' + 'request: ' + query},
  ]
)
current_key = completion.choices[0].message.content.replace("'", '')

print(current_key)

maps


In [58]:
encoded_key = json.dumps(json.loads(state)[current_key])
print(tiktoken_len(encoded_key))
print(tiktoken_len(docs_by_name[current_key]))
print(encoded_key)

889
1603
{"data": {"map1": {"defaultViewport": {"longitude": -75.44766721108091, "latitude": 40.34530681636297, "zoom": 4.657916626867326, "pitch": 0, "bearing": 0, "height": 1287, "altitude": 1.5, "maxZoom": 12, "minZoom": 2}, "optionalViewports": {"ov0": {"icon": "FaGlobeAsia", "name": "Asia", "zoom": 4, "order": 1, "pitch": 0, "bearing": 0, "maxZoom": 12, "minZoom": 2, "latitude": 30, "longitude": 121}, "ov1": {"icon": "FaGlobeEurope", "name": "EMEA", "zoom": 4, "order": 1, "pitch": 0, "bearing": 0, "maxZoom": 12, "minZoom": 2, "latitude": 47, "longitude": 14}}, "legendGroups": {"lga": {"name": "Legend Group A", "nodes": {"nodeTypeA": {"value": true, "sizeBy": "numericPropExampleA", "colorBy": "booleanPropExample"}}, "arcs": {"T1": {"colorBy": "numericPropExampleA", "sizeBy": "numericPropExampleB", "value": true}}, "order": 1}, "lgb": {"name": "Legend Group B", "nodes": {"nodeTypeB": {"value": true, "sizeBy": "numericPropExampleB", "colorBy": "booleanPropExample"}}, "arcs": {"T2": {

In [54]:
completion = openai.ChatCompletion.create(
  model=api_model,
  temperature=0.1,
  messages=[
    {"role": "system", "content": "Using the given documentation delimited by triple quotes and current api state encoded in JSON, respond to user requests with the path(s) and value(s) you would modify in the api to achieve the users desired action in the format \n```{\"path\": path, \"value\": value}```. If the user request isn't possible with the given data respond with 'Not possible'"},
    {"role": "user", "content": '"""' + docs_by_name[current_key] + '""" \n\n' + encoded_key + '\n\nrequest: ' + query},
  ]
)

print(completion.choices[0].message.content)
mutate_command = completion.choices[0].message.content

{"path": "data.map1.legendGroups.lga.nodes.nodeTypeA.value", "value": false}
