# This notebook is demonstrating (and testing) scenarios for generating requests to the AICI server (evolving API)

In [1]:
import sys
#sys.path.insert(0, 'c:\\users\\emrek\\source\\guidance\\prompt-plan\\promptlib\\')
sys.path.insert(0, '/workspaces/aici/promptlib')


In [2]:
from promptlib import PromptNode
from promptlib import set_model, append, gen, choose, begin, end
from promptlib.models import LLM, TransformersLLM
from promptlib.endpoints import AICI

  from .autonotebook import tqdm as notebook_tqdm


# Here are examples of plans generated by promptlib

First, we show building a single path through a prompt tree.

In [3]:
llm = TransformersLLM("gpt2")

pn = PromptNode().set_model(llm)                                          \
    .append("Here is a generated prompt:").gen(max_tokens=10)             \
    .append("Here is another generated prompt. ").gen(max_tokens=10)

aici_steps = pn.build_linear_plan()
print(aici_steps)

{'steps': [{'Model': {'model': 'gpt2'}}, {'Fixed': {'text': 'Here is a generated prompt:'}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}, {'Fixed': {'text': 'Here is another generated prompt. '}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}]}


In [4]:
llm = TransformersLLM("gpt2")

pn0  = PromptNode()

pn = pn0.set_model(llm)                                          \
    .begin(id="block1") \
    .append("Here is a generated prompt. Resp:").gen(max_tokens=10)             \
    .append("Here is another generated prompt. Resp:").gen(max_tokens=10)      \
    .end() \
    .append("Here is a final prompt. Resp:").gen(max_tokens=10)  
    

aici_steps = pn0.build_tree_plan()
print(aici_steps)

{'steps': [{'Model': {'model': 'gpt2'}}, {'block': {'steps': [{'Fixed': {'text': 'Here is a generated prompt. Resp:'}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}, {'Fixed': {'text': 'Here is another generated prompt. Resp:'}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}], 'id': 'block1'}}, {'Fixed': {'text': 'Here is a final prompt. Resp:'}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}]}


The constraints we are imposing here have both a python code implementation and also return a regex that can impose the same constraint.

The general idea is that a constraint in python has to match some implementation of a constraint in the Rust/WASM AST-Runner.  If none exists, then we need to upload imperative code (in WASM) to the server to implement the constraint.

In [5]:
llm = TransformersLLM("gpt2")

from promptlib.constraints import DummyCharacterConstraint, OddEvenDigitConstraint

pn0  = PromptNode()

pn = pn0.set_model(llm)                                          \
    .begin(id="block1") \
    .constrain(DummyCharacterConstraint(['h','e','l','o'])) \
    .constrain(OddEvenDigitConstraint()) \
    .append("Here is a generated prompt. Resp:").gen(max_tokens=10)             \
    .append("Here is another generated prompt. Resp:").gen(max_tokens=10)      \
    .end() \
    .append("Here is a final prompt. Resp:").gen(max_tokens=10)  
    

aici_steps = pn0.build_tree_plan()
print(aici_steps)

{'steps': [{'Model': {'model': 'gpt2'}}, {'block': {'steps': [{'constraint': {'type': 'regex', 'rx': '[h|e|l|o]+'}}, {'constraint': {'type': 'regex', 'rx': '^(?:[13579][02468])*[13579]|[02468]([13579]?|(?:[13579][02468])*)$'}}, {'Fixed': {'text': 'Here is a generated prompt. Resp:'}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}, {'Fixed': {'text': 'Here is another generated prompt. Resp:'}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}], 'id': 'block1'}}, {'Fixed': {'text': 'Here is a final prompt. Resp:'}}, {'Gen': {'max_tokens': 10, 'rx': '.+', 'genargs': {}}}]}


Here's an example of actually uploading a program and plan to the server

In [5]:
ep = AICI(base_url="http://127.0.0.1:8080/v1/", wasm_runner_path="/workspaces/aici/aici_ast_runner/target/strip.wasm")

endpoint = PromptNode().set_endpoint(ep)

pn = endpoint.append("Please answer the following questions:\n Q: Who is the president of the USA?\n A:").gen(max_tokens=10)             \
    .append("\nQ: And who is the vice president?\n A:").gen(max_tokens=20) \
    .append("\nPlease give a url with evidence\n http://").gen(max_tokens=20)

endpoint.run(pn)

upload module... 

RuntimeError: bad response to model upload: 500 Internal Server Error: Internal Server Error