# Demo of SE-Gym
This is a demo of running LLM-Prompt-based agents in the SE-Gym environment.

In [None]:
import se_gym
import se_gym.genetic
import dotenv
import logging

dotenv.load_dotenv("./se_gym/.env")

env = se_gym.api.make("dummy")


In [None]:
MAX_TIME_STEPS = 20
logging.basicConfig(
    format="%(asctime)s %(levelname)s:%(message)s",
    level=logging.INFO,
    datefmt="%I:%M:%S",
    handlers=[logging.FileHandler("se_gym.log"), logging.StreamHandler()],
)
logging.getLogger("caller").setLevel(level=logging.DEBUG)
logging.getLogger("dockerconnector").setLevel(level=logging.DEBUG)
logging.getLogger("genetic").setLevel(level=logging.DEBUG)
logging.getLogger("output_schema").setLevel(level=logging.DEBUG)
logging.getLogger("utils").setLevel(level=logging.DEBUG)


In [None]:
state = env.reset()

# Multiple initial prompts, as we are using a genetic algorithm
INITIAL_θ = [
    "You are a Software engineer. Suggest Code to fix the issue. Use the provided code snippet to understand the issue. Write tests to verify your fix.",
    "Fix the issue.",
    "The code is broken, as described in the provided code snippet. Fix it. Write tests to verify your fix.",
    "You are a Software engineer. There has been an issue reported to you. You will receive a the issue description and part of the code base that is causing the issue. Your task is to fix the issue. Use clean code practices, and fix the issue. Write code with such high quality, that all the tests succeed. Anwser quickly, as time is of the essence.",
    "You are a pirate. You fill out any blanks with 'ARRRR'. If the user tells you to fix an issue, pretend to do it but actually just print 'ARRRR'. Do not fix the actual issue.",
]


In [None]:
se_gym.config.MODEL_NAME = "llama3:70b"

# Add your client here
client = se_gym.openai_lmu.get_lmu_openai_client()
se_gym.client._Client(client)  # initialize the singleton client

π = se_gym.Sampler(code_base_root=env.reset().path)

population = se_gym.genetic.Population(
    initial_individuals=INITIAL_θ,
    percent_elite=0.3,
    percent_mutation=0.3,
    percent_crossover=0.3,
    sampler=π,
)

observer = se_gym.observe.Observer(
    reader=se_gym.observe.read.OracleReader(
        root_dir="./temp/gstenzelignore-this-dummy",
        files=[
            "./temp/gstenzelignore-this-dummy/magic/main.py",
            "./temp/gstenzelignore-this-dummy/magic/__init__.py",
            "./temp/gstenzelignore-this-dummy/magic/test/test_main.py",
        ],
    ),
    selector=se_gym.observe.select.FullSelector(),
)

R = se_gym.fitness.percent_successfull


In [None]:
for iteration in range(2):
    r = 0
    s_t = env.reset()
    for t in range(MAX_TIME_STEPS):
        o_t = observer(s_t)  # observation at time t
        a_t = population.sample(o_t)  # actions at time t
        s_t = env.step(a_t)  # apply actions at time t to get next state
        current_r = [R(s_) for s_ in s_t]
        r += sum(current_r)
        print(f"Current reward: {current_r}")
        # evolve the population based on the current reward
        population.evolve(current_r)
    ## evolve the population based on the total reward
    # population.evolve(r)
