# Demo of SE-Gym
This is a demo of running LLM-Prompt-based agents in the SE-Gym environment.

In [None]:
# # Uncomment for rapid development
# %load_ext autoreload
# %autoreload 2

In [None]:
import se_gym
import se_gym.genetic
import logging
import time

env = se_gym.make("dummy")

In [None]:
MAX_TIME_STEPS = 5  # maximum number of time steps per episode
NUM_EPOCHS = 5

se_gym.config.MODEL_NAME = "phi3:14b"  # model name to use for code generation
se_gym.config.EVO_MODEL_NAME = "phi3:14b"  # model name to use for evolution

# Add your client here
import dotenv

dotenv.load_dotenv("./se_gym/.env")
se_gym.set_client(se_gym.client.LMU_get_openai_client())  # initialize the singleton client
se_gym.set_generator(
    se_gym.generator_singleton.LMU_get_ollama_generator()
)  # initialize the singleton client

logging.basicConfig(
    format="%(asctime)s %(levelname)s:%(message)s",
    level=logging.DEBUG,
    datefmt="%I:%M:%S",
    handlers=[
        logging.FileHandler("se_gym.log"),
        # logging.StreamHandler(),
    ],
)
# logging.getLogger("caller").setLevel(level=logging.DEBUG)
# logging.getLogger("dockerconnector").setLevel(level=logging.DEBUG)
# logging.getLogger("genetic").setLevel(level=logging.DEBUG)
# logging.getLogger("output_schema").setLevel(level=logging.DEBUG)
# logging.getLogger("utils").setLevel(level=logging.DEBUG)
# logging.getLogger("output_validator").setLevel(level=logging.DEBUG)
# logging.getLogger("runner_host").setLevel(level=logging.DEBUG)
# logging.getLogger("runner_docker").setLevel(level=logging.DEBUG)

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.basicConfig(level=logging.DEBUG)


# Multiple initial prompts, as we are using a genetic algorithm
INITIAL_θ = [
    "You are a Software engineer. Suggest Code to fix the issue. Use the provided code snippet to understand the issue. Write tests to verify your fix.",
    # "Fix the issue.",
    # "The code is broken, as described in the provided code snippet. Fix it. Write tests to verify your fix.",
    # "You are a Software engineer. There has been an issue reported to you. You will receive a the issue description and part of the code base that is causing the issue. Your task is to fix the issue. Use clean code practices, and fix the issue. Write code with such high quality, that all the tests succeed. Anwser quickly, as time is of the essence.",
    "You are a pirate. You fill out any blanks with 'ARRRR'. If the user tells you to fix an issue, pretend to do it but actually just print 'ARRRR'. Do not fix the actual issue.",
]

parquet_path = f"data.{int(time.time())}.parquet"
print(f"Data will be stored in {parquet_path}")

In [None]:
π = se_gym.Sampler(
    store=se_gym.observe.Store(
        converter="py",
        retriever="codemap",
        llm=se_gym.generator_singleton.get_generator(),
    )
)

population = se_gym.genetic.Population(
    initial_individuals=INITIAL_θ,
    percent_elite=0.3,
    percent_mutation=0.3,
    percent_crossover=0.3,
    sampler=π,
)

R = se_gym.fitness.percent_successfull


In [None]:
for epoch in range(NUM_EPOCHS):
    print(f"Epoch {epoch}")
    epoch_loss = []
    for issue in range(env.num_challenges):
        print(f"\tIssue {issue}")
        rewards = []
        for individual in population.individuals:
            print(f"\t\tIndividual {population.individuals.index(individual)}")
            s_t = env.reset(issue)  # All individuals start with the same issue
            r_ind = []  # Reward for the individual
            for timestep in range(MAX_TIME_STEPS):
                print(f"\t\t\tTimestep {timestep}")
                starttime = time.time()
                a_t = population.get_action(individual, s_t)  # Get the action
                s_t = env.step(a_t, s_t)  # Take the action
                r_ind_t = R(s_t)  # Reward for the timestep
                # se_gym.utils.log_to_parqet(log_filename=parquet_path,model=se_gym.config.MODEL_NAME,epoch=epoch,individual_i=population.individuals.index(individual),individual=individual,issue=issue,timestep=timestep,patch=a_t,score=r_ind_t,time=time.time()-starttime)
                r_ind.append(r_ind_t)
                if r_ind_t == 1:  # If the reward is 1, the issue is fixed
                    print(f"\t\t\t\tIssue fixed in {timestep} timesteps")
                    break
            else:
                print(f"\t\t\tIssue not fixed in {timestep} timesteps")
            rewards.append(r_ind)
        epoch_loss.append(rewards)
    # change epoch_loss from [epoch, individual, timestep] to [individual, epoch, timestep]
    epoch_loss = list(map(list, zip(*epoch_loss)))
    population.evolve(epoch_loss)  # Evolve the population based on the rewards