### Idea of this notebook

- Start app from here
- Use predictions to control app(s)

### Workflow

- Use Predictor class instance -> model for a test
- Create an app here, reset it
- Use total reward running variable
- Use initial state and subsequent states to control

In [None]:
from naturalnets.environments.i_environment import get_environment_class
from naturalnets.tools.utils import rescale_values

from predictors import DummyAppPredictor, GUIAppPredictor

In [None]:
env_config = {
    "type": "DummyApp",
    "number_time_steps": 100,
    "screen_width": 400,
    "screen_height": 400,
    "number_button_columns": 5,
    "number_button_rows": 5,
    "button_width": 50,
    "button_height": 30,
    "fixed_env_seed": False,
    "force_consecutive_click_order": False
}

allowed_actions = [i for i in range(25)]

In [None]:
env_config = {
    "type": "GUIApp",
    "number_time_steps": 200,
    "include_fake_bug": False
}

In [None]:
env_class = get_environment_class(env_config["type"])
app = env_class(env_config)

predictor_class = DummyAppPredictor

if env_config["type"] == "GUIApp":
    predictor_class = GUIAppPredictor

### Open Source Model

In [None]:
# model_name = "EleutherAI/gpt-j-6B"  # General CausalLM
# model_name = "EleutherAI/gpt-neo-1.3B"  # General CausalLM
model_name = "google/flan-t5-base"  # seq2seq instructional LM
# model_name = "allenai/tk-instruct-3b-def"  # seq2seq instructional LM

predictor = predictor_class(
    env_config=env_config,
    model_name=model_name,
    use_openai=False,
    max_new_tokens=3,
    num_return_sequences=5,
    temperature=0.5,
    do_sample=False
)

### OpenAI model

In [None]:
predictor = predictor_class(
    env_config=env_config,
    model_name="text-davinci-003",
    use_openai=True,
    max_new_tokens=3,
    num_return_sequences=1,
    temperature=0.0,
    do_sample=False
)

In [None]:
predictor.set_prompt_template(1)

In [None]:
predictor.current_prompt_template

In [None]:
def one_interaction(_app, _predictor, meta_info_id: int, print_prompt=False):
    ob = _app.get_observation_dict()
    # TODO fix this for GUIApp, see also allowed_actions below
    # ob = _app.app_controller.get_states_info()
    
    prompt = _predictor.convert_to_prompt(ob)
    
    if print_prompt:
        print(prompt)
        print("\n")
    
    possible_buttons = _predictor.predict(prompt)
    
    debug_output = f"Possible buttons: {possible_buttons}. Selected: "

    for button in possible_buttons:
        if button in allowed_actions and button not in ob["pressed_buttons"]:
            _app.step_widget(button)
            debug_output += f"{button}"
            break

    print(debug_output)

In [None]:
app.reset()

for i in range(5):
    print_prompt = False
    if i == 0:
        print_prompt = True
        
    one_interaction(app, predictor, meta_info_id=1, print_prompt=print_prompt)