In [1]:
# only do if key not defined
if 'key' not in locals():
    key = !op read "op://private/sfm openai api key/password"
    assert len(key) == 1
    key = key[0]
assert len(key) == 51

In [2]:
import openai
import os
import json
from pprint import pprint
from pydantic import BaseModel

openai.api_key = key

In [3]:
class RunTest(BaseModel):
    include_visual_observation: bool
    agent_test_code: str

In [4]:
def read_test():
    path = r"D:\Repos\Minecraft\Forge\SuperFactoryManager\src\gametest\java\ca\teamdman\sfm\ai\OpenDoorTest.java"
    with open(path, "r") as f:
        return f.read()

In [5]:
def write_test(content: str):
    path = r"D:\Repos\Minecraft\Forge\SuperFactoryManager\src\gametest\java\ca\teamdman\sfm\ai\OpenDoorTest.java"
    with open(path, "w") as f:
        f.write(content)

In [6]:
def indent_text(text, spaces=4):
    space_str = " " * spaces
    return "\n".join(f"{space_str}{line}" for line in text.split("\n"))
print(indent_text("a\nb\nc",8))

        a
        b
        c


In [7]:
def with_agent_content(test: str, new_content: str) -> str:
    """
    We want to remove any previous attempts at the test

    // begin agent code
    item.setPos(Vec3.atCenterOf(helper.absolutePos(pressurePlatePos).offset(0,3,0)));
    // end agent code

    should remove the content between the two comments
    """
    region_start = "        // begin agent code"
    region_end = "        // end agent code"
    start = test.find(region_start)
    end = test.find(region_end)
    return test[:start+len(region_start)] + "\n" + new_content + ("\n" if new_content != "" else "") + test[end:]

In [8]:
def get_agent_content(test: str) -> str:
    region_start = "        // begin agent code"
    region_end = "        // end agent code"
    start = test.find(region_start)
    end = test.find(region_end)
    return test[start+len(region_start)+1:end]

# begin hotkey_test

In [9]:
import pyautogui
import pygetwindow as gw
from time import sleep
import pyperclip

In [10]:
def focus_intellij():
    try:
        title = next(x for x in gw.getAllTitles() if "TestChambers.java" in x)
        windows = gw.getWindowsWithTitle(title)
        assert len(windows) > 0, f"Window not found: {title}"
        windows[0].activate()
    except Exception as e:
        print(f"Error focusing window: {e}")

In [11]:
def is_building():
    return pyautogui.pixel(1947, 622) == (95, 173, 101)

In [12]:
def get_build_output():
    # store cursor position
    x, y = pyautogui.position()
    # click and drag from 2890, 638 to 2895, 638 to select text without activating links
    pyautogui.moveTo(2890, 638)
    pyautogui.mouseDown()
    pyautogui.moveTo(2895, 638)
    pyautogui.mouseUp()
    pyautogui.hotkey('ctrl', 'a')
    pyautogui.hotkey('ctrl', 'c')
    # restore cursor position
    pyautogui.moveTo(x, y)
    return pyperclip.paste().replace('\r', '')

In [13]:
def is_happy_build_output(output):
    happy = """
Executing pre-compile tasks…
Running 'before' tasks
Checking sources
Running 'after' tasks
Finished, saving caches…
Executing post-compile tasks…
Finished, saving caches…
    """.strip()
    return output.strip() == happy

In [14]:
def hot_reload():
    print("[Hot Reload] Focusing IntelliJ")
    focus_intellij()
    print("[Hot Reload] Trigger hot reload")
    pyautogui.hotkey('ctrl', 'alt', 'num0')
    print("[Hot Reload] Wait for build to finish")
    while is_building():
        sleep(0.1)
    sleep(0.5)
    print("[Hot Reload] Copy build output")
    output = get_build_output()
    success = is_happy_build_output(output)
    print(f"[Hot Reload] Build output succeeded: {success}")
    return success, output

# end hotkey_test

# begin runner_comms

In [15]:
from pathlib import Path
msgdir = Path("./messages")
msgdir.mkdir(parents=True, exist_ok=True)

In [16]:
def run_test():
    print("[Run Test] Running test, ensure you ran `/sfm_ai listen` in Minecraft")
    # touch messages/run.txt
    (msgdir / "run.txt").touch()

    from time import sleep
    # wait for the file to have test results appended
    while not (msgdir / "run.txt").stat().st_size > 0:
        sleep(0.1)
    
    # read the file
    with open(msgdir / "run.txt", "r") as f:
        results = f.read()

    archive_content = get_agent_content(read_test()) + "\n\n" + results
    
    # clean up the file by renaming it with the time
    from datetime import datetime
    now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    (msgdir / f"run_{now}.txt").write_text(archive_content)
    (msgdir / "run.txt").unlink()

    return results

# end runner_comms

In [17]:
def save_convo(messages):
    content = ""
    for msg in messages:
        content += f"# ~=~ {msg['role']}\n{msg['content']}\n"
        if "function_call" in msg:
            content += f"---\n{msg['function_call']}\n"
    with open("convo.md", "w") as f:
        f.write(content)

In [18]:
messages = [
    {"role": "system", "content": "The assistant is tasked with solving puzzles in a Minecraft game test environment, similar to the video game _Portal_. The assistant is presented a game test with some code at the beginning and end of the test that the agent can not change. The agent is responsible for replacing the code in the middle of the test to cause the test to succeed."},
    {"role": "user", "content": f"Current test content:\n```java\n{with_agent_content(read_test(), '')}```"},
]

In [41]:
from time import sleep
tries = 0
while True:
    print("Getting the AI to propose a new solution")
    response0 = openai.ChatCompletion.create(
        model="gpt-4",
        # model="gpt-3.5-turbo",
        messages=messages[:2] + messages[-2:], # only send the last 2 messages
        functions=[
            {
            "name": "run_test",
            "description": "Run the test with the agent code block substituted for the provided content. Requesting a visual observation will slow down the process through a dependency on humans.",
            "parameters": RunTest.model_json_schema()
            },
        ],
        function_call={"name": "run_test"}
    )
    tries += 1
    # print(response0)

    print("Performing build")
    run = RunTest(**json.loads(response0.choices[0]["message"]["function_call"]["arguments"]))
    write_test(with_agent_content(read_test(), indent_text(run.agent_test_code, 12)))
    build_success, build_output = hot_reload()
    if not build_success:
        print("Build failed!", build_output)
    if build_success:
        print("Build succeeded! Running test", build_output)
        test_output = run_test()

    # update conversation history 
    fn0 = {
        "role": "function",
        "name": "run_test",
        "content": build_output if not build_success else test_output,
    }
    messages.append(response0.choices[0]["message"])
    messages.append(fn0)
    save_convo(messages)

    if build_success and "passed" in test_output:
        print(f"Test passed after {tries} tries!", test_output)
        break
    else:
        print(f"Test failed! {tries} tries so far, test output: {test_output if build_success else None}")
    print("sleeping just in case")
    sleep(3)
    

Getting the AI to propose a new solution


Performing build
[Hot Reload] Focusing IntelliJ
Error focusing window: 
[Hot Reload] Trigger hot reload
[Hot Reload] Wait for build to finish
[Hot Reload] Copy build output
[Hot Reload] Build output succeeded: True
Build succeeded! Running test Executing pre-compile tasks…
Running 'before' tasks
Checking sources
Running 'after' tasks
Finished, saving caches…
Executing post-compile tasks…
Finished, saving caches…
[Run Test] Running test, ensure you ran `/sfm_ai listen` in Minecraft
Test failed! 1 tries so far, test output: open_door failed! Expected property open to be true, was false at 2,-58,4 (relative: 2,2,1) (t=60)
sleeping just in case
Getting the AI to propose a new solution
Performing build
[Hot Reload] Focusing IntelliJ
Error focusing window: 
[Hot Reload] Trigger hot reload
[Hot Reload] Wait for build to finish
[Hot Reload] Copy build output
[Hot Reload] Build output succeeded: True
Build succeeded! Running test Executing pre-compile tasks…
Running 'before' tasks
Checking sour