In [1]:
import numpy as np
import random

def create_environment(N, M):
    env = np.random.choice(["clean", "dirty"], size=(N, M), p=[0.5, 0.5])
    env[0, :] = "wall"
    env[:, 0] = "wall"
    env[N-1, :] = "wall"
    env[:, M-1] = "wall"
    return env

def get_valid_moves(x, y, N, M):
    moves = []
    if x > 1: moves.append("up")
    if x < N-2: moves.append("down")
    if y > 1: moves.append("left")
    if y < M-2: moves.append("right")
    return moves

def run_simulation(N, M, T):
    env = create_environment(N, M)
    x, y = 1, 1
    score = 5

    for _ in range(T):
        if env[x, y] == "dirty":
            env[x, y] = "clean"
            score += 1
        else:
            moves = get_valid_moves(x, y, N, M)
            action = random.choice(moves + ["noop"])  # Random movement or noop

            if action == "left": y -= 1
            elif action == "right": y += 1
            elif action == "down": x += 1
            elif action == "up": x -= 1
            if action != "noop":
                score -= 1

    return score

def main():
    N, M = 10, 10
    iterations_list = [100, 250, 500, 1000]
    runs = 100

    results = {T: [] for T in iterations_list}

    for T in iterations_list:
        for _ in range(runs):
            results[T].append(run_simulation(N, M, T))
        print(f"T = {T}: Avg Score = {np.mean(results[T])}")

if __name__ == "__main__":
    main()





T = 100: Avg Score = -48.81
T = 250: Avg Score = -149.35
T = 500: Avg Score = -330.09
T = 1000: Avg Score = -716.38
