In [3]:
import os

# Set the OpenAI API key
os.environ["OPENAI_API_KEY"] = "your key here"


In [14]:
import numpy as np
import faiss
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.in_memory import InMemoryDocstore
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain import LLMChain
from langchain_openai import ChatOpenAI
from langchain.schema import HumanMessage


In [15]:
# Predefined actions, protocols, and chains
predefined_actions = ["swap", "stake", "bridge"]
predefined_protocols = ["uniswap", "lido", "across"]
predefined_chains = ["ethereum", "polygon", "arbitrum"]

# Create OpenAI embedding model
embedding_model = OpenAIEmbeddings()

# Embed predefined lists and convert to NumPy arrays
action_embeddings = np.array(embedding_model.embed_documents(predefined_actions))
protocol_embeddings = np.array(embedding_model.embed_documents(predefined_protocols))
chain_embeddings = np.array(embedding_model.embed_documents(predefined_chains))


In [16]:
# Create FAISS Index
action_faiss_index = faiss.IndexFlatL2(action_embeddings.shape[1])  # Initialize FAISS index
protocol_faiss_index = faiss.IndexFlatL2(protocol_embeddings.shape[1])
chain_faiss_index = faiss.IndexFlatL2(chain_embeddings.shape[1])

# Add vectors to the FAISS index
action_faiss_index.add(action_embeddings)
protocol_faiss_index.add(protocol_embeddings)
chain_faiss_index.add(chain_embeddings)


In [17]:
# Create InMemoryDocstore for each (with Document objects)
action_docstore = InMemoryDocstore({
    i: Document(page_content=predefined_actions[i]) for i in range(len(predefined_actions))
})

protocol_docstore = InMemoryDocstore({
    i: Document(page_content=predefined_protocols[i]) for i in range(len(predefined_protocols))
})

chain_docstore = InMemoryDocstore({
    i: Document(page_content=predefined_chains[i]) for i in range(len(predefined_chains))
})


In [18]:
# Map indices to Docstore IDs
action_index_to_docstore_id = {i: i for i in range(len(predefined_actions))}
protocol_index_to_docstore_id = {i: i for i in range(len(predefined_protocols))}
chain_index_to_docstore_id = {i: i for i in range(len(predefined_chains))}


In [19]:
# Create FAISS vector stores with the required arguments
action_store = FAISS(
    index=action_faiss_index, 
    docstore=action_docstore, 
    index_to_docstore_id=action_index_to_docstore_id, 
    embedding_function=embedding_model
)

protocol_store = FAISS(
    index=protocol_faiss_index, 
    docstore=protocol_docstore, 
    index_to_docstore_id=protocol_index_to_docstore_id, 
    embedding_function=embedding_model
)

chain_store = FAISS(
    index=chain_faiss_index, 
    docstore=chain_docstore, 
    index_to_docstore_id=chain_index_to_docstore_id, 
    embedding_function=embedding_model
)


In [20]:
def process_command_with_embeddings(command: str):
    # Embed the user command
    command_embedding = embedding_model.embed_query(command)

    # Search for the closest matching action, protocol, and chain
    action_result = action_store.similarity_search_by_vector(command_embedding, k=1)[0]
    protocol_result = protocol_store.similarity_search_by_vector(command_embedding, k=1)[0]
    chain_result = chain_store.similarity_search_by_vector(command_embedding, k=1)[0]

    # Create prompt for LLM to extract parameters and convert to DSL
    dsl_prompt = f"""
    Convert the following natural language command into a structured JSON output based on the following schema:
    {{
      "action": "<action>",
      "protocol": "<protocol>",
      "chain": "<chain>",
      "params": {{
        "token_in": "<token_in>",
        "token_out": "<token_out>",
        "amount_in": "<amount_in>"
      }}
    }}

    Command: {command}

    Output:
    """

    # Initialize OpenAI Chat-based model (GPT-4 or GPT-3.5-turbo)
    openai_llm = ChatOpenAI(temperature=0, model="gpt-4")  # or "gpt-3.5-turbo"

    # Call the LLM with the prompt
    messages = [HumanMessage(content=dsl_prompt)]
    response = openai_llm(messages)

    # Return the generated DSL as a structured JSON output
    return response.content


In [23]:
# Test case
command = "Swap 200 btc for ETH on Uniswap on Ethereum"
structured_output = process_command_with_embeddings(command)

# Print the structured DSL output
print(structured_output)


{
  "action": "Swap",
  "protocol": "Uniswap",
  "chain": "Ethereum",
  "params": {
    "token_in": "btc",
    "token_out": "ETH",
    "amount_in": "200"
  }
}


# Explanation

To help you visualize the documents used in your system, let's walk through how the documents are structured in the `InMemoryDocstore`. These documents represent the actions, protocols, and chains you defined earlier, and they are linked to the FAISS index for similarity search.

Each document corresponds to an action, protocol, or chain, and they are stored in the `InMemoryDocstore` as `Document` objects. These documents have the following structure:

- **Document Object**: Each document contains:
  - `page_content`: The content of the document, which is the actual text (like "swap", "uniswap", "ethereum").
  - `metadata` (optional): Additional information about the document (like the document's source or type).

Here’s a visualization of how these documents are organized and stored.

### Document Structure Visualization

Imagine you have the following predefined lists:

```python
predefined_actions = ["swap", "stake", "bridge"]
predefined_protocols = ["uniswap", "lido", "across"]
predefined_chains = ["ethereum", "polygon", "arbitrum"]
```

For each predefined item, we create a `Document` object and store it in the `InMemoryDocstore`. The documents will look like this:

#### Actions:

| Document ID | Page Content |
|-------------|--------------|
| 0           | "swap"       |
| 1           | "stake"      |
| 2           | "bridge"     |

#### Protocols:

| Document ID | Page Content |
|-------------|--------------|
| 0           | "uniswap"    |
| 1           | "lido"       |
| 2           | "across"     |

#### Chains:

| Document ID | Page Content |
|-------------|--------------|
| 0           | "ethereum"   |
| 1           | "polygon"    |
| 2           | "arbitrum"   |

### How They Are Stored in `InMemoryDocstore`

Each of these tables is stored as a dictionary in the `InMemoryDocstore`. Let’s visualize the storage for each type (actions, protocols, and chains) in Python dictionary format.

#### Actions `InMemoryDocstore`:

```python
{
    0: Document(page_content="swap"),
    1: Document(page_content="stake"),
    2: Document(page_content="bridge")
}
```

#### Protocols `InMemoryDocstore`:

```python
{
    0: Document(page_content="uniswap"),
    1: Document(page_content="lido"),
    2: Document(page_content="across")
}
```

#### Chains `InMemoryDocstore`:

```python
{
    0: Document(page_content="ethereum"),
    1: Document(page_content="polygon"),
    2: Document(page_content="arbitrum")
}
```

### How the Documents Are Retrieved During FAISS Search

When you perform similarity search using FAISS (based on the embedding of the user input), the closest match (according to the embedding vector) is found in the FAISS index. The corresponding document ID is retrieved from the `InMemoryDocstore`.

- For example, if the user command is "Swap 100 USDT for ETH on Uniswap on Ethereum", the closest match for action could be the document with `page_content="swap"` in the `InMemoryDocstore`.
- The same logic applies for protocols and chains.

### Code to Visualize Stored Documents

If you'd like to see the stored documents in code, you can print them out as follows:

#### Print Stored Actions:

```python
for doc_id, doc in action_docstore._docs.items():
    print(f"Document ID: {doc_id}, Content: {doc.page_content}")
```

#### Print Stored Protocols:

```python
for doc_id, doc in protocol_docstore._docs.items():
    print(f"Document ID: {doc_id}, Content: {doc.page_content}")
```

#### Print Stored Chains:

```python
for doc_id, doc in chain_docstore._docs.items():
    print(f"Document ID: {doc_id}, Content: {doc.page_content}")
```

This code will output something like:

```
Document ID: 0, Content: swap
Document ID: 1, Content: stake
Document ID: 2, Content: bridge
```

### Visualizing How Everything Works Together

Here’s a visual flow of how the system works:

1. **Input**: User provides a natural language command (e.g., "Swap 100 USDT for ETH on Uniswap on Ethereum").
   
2. **Embeddings**: The input is embedded as a vector using OpenAI embeddings.

3. **Similarity Search with FAISS**:
   - FAISS uses the embeddings to search for the closest matching action, protocol, and chain in the FAISS index.
   
4. **Document Retrieval**:
   - Based on the FAISS search, the corresponding documents (e.g., `swap`, `uniswap`, `ethereum`) are retrieved from the `InMemoryDocstore`.

5. **LLM Usage**:
   - The LLM is asked to format the output in the desired DSL format, filling in the action, protocol, chain, and parameters.

6. **Output**: The final output is a structured JSON format (as described in your DSL schema).

### Example Visualization of a Full Process

1. **User Input**:  
   `"Swap 100 USDT for ETH on Uniswap on Ethereum"`

2. **Embeddings and Search Results**:  
   - Action: `"swap"`
   - Protocol: `"uniswap"`
   - Chain: `"ethereum"`

3. **Final Structured Output** (Generated by the LLM):
   ```json
   {
     "action": "swap",
     "protocol": "uniswap",
     "chain": "ethereum",
     "params": {
       "token_in": "USDT",
       "token_out": "ETH",
       "amount_in": "100"
     }
   }
   ```

### Summary

- **Documents**: Each document (action, protocol, and chain) is stored in the `InMemoryDocstore` as a `Document` object with `page_content`.
- **FAISS Search**: The embeddings allow for similarity search in the FAISS index to find the closest matching action, protocol, and chain.
- **Retrieval**: Based on the FAISS search, the document ID is used to retrieve the relevant document from the docstore.
- **LLM Formatting**: Finally, the LLM is used to format the information into a structured JSON format based on the DSL schema.

