In [1]:
import random
import time

# ----------------------------
# Environment
# ----------------------------
class VacuumEnvironment:
    def __init__(self):
        self.world = {"A": "dirty", "B": "dirty"}
        self.location = "A"

    def percept(self):
        return (self.location, self.world[self.location])

    def execute(self, action):
        reward = 0
        
        if action == "Suck":
            if self.world[self.location] == "dirty":
                if random.random() < 0.8:
                    self.world[self.location] = "clean"
                    reward = 10
                else:
                    reward = 0
            else:
                reward = -2

        elif action == "Right":
            self.location = "B"
            reward = -1
            
        elif action == "Left":
            self.location = "A"
            reward = -1

        elif action == "Stop":
            reward = 0

        return reward

    def show(self):
        print(f"Environment: Location={self.location}, A={self.world['A']}, B={self.world['B']}")


# ----------------------------
# Rational Agent (One-step reward evaluation)
# ----------------------------
class VacuumAgent:
    def __init__(self):
        self.internal_state = {"A": "unknown", "B": "unknown"}
        self.location = "A"
        self.total_reward = 0

    def update(self, percept):
        location, dirt = percept
        self.location = location
        self.internal_state[location] = dirt

    def choose_action(self):
        # If current square dirty → best immediate reward
        if self.internal_state[self.location] == "dirty":
            return "Suck"

        # If both squares are clean → Stop (avoid movement penalty)
        if self.internal_state["A"] == "clean" and self.internal_state["B"] == "clean":
            return "Stop"

        # Otherwise move to other square
        if self.location == "A":
            return "Right"
        return "Left"

    def add_reward(self, reward):
        self.total_reward += reward


# ----------------------------
# Simulation
# ----------------------------
env = VacuumEnvironment()
agent = VacuumAgent()

steps = 12

print("Starting Simulation...\n")

for step in range(steps):
    print(f"--- Step {step+1} ---")
    
    percept = env.percept()
    print("Percept:", percept)
    
    agent.update(percept)
    
    action = agent.choose_action()
    print("Action:", action)
    
    reward = env.execute(action)
    agent.add_reward(reward)
    
    env.show()
    print("Reward this step:", reward)
    print("Total Reward:", agent.total_reward)
    print("Agent Internal State:", agent.internal_state)
    print()

    if action == "Stop":
        print("Agent decided to stop.")
        break

    time.sleep(0.5)

print("Simulation Complete.")


Starting Simulation...

--- Step 1 ---
Percept: ('A', 'dirty')
Action: Suck
Environment: Location=A, A=clean, B=dirty
Reward this step: 10
Total Reward: 10
Agent Internal State: {'A': 'dirty', 'B': 'unknown'}

--- Step 2 ---
Percept: ('A', 'clean')
Action: Right
Environment: Location=B, A=clean, B=dirty
Reward this step: -1
Total Reward: 9
Agent Internal State: {'A': 'clean', 'B': 'unknown'}

--- Step 3 ---
Percept: ('B', 'dirty')
Action: Suck
Environment: Location=B, A=clean, B=clean
Reward this step: 10
Total Reward: 19
Agent Internal State: {'A': 'clean', 'B': 'dirty'}

--- Step 4 ---
Percept: ('B', 'clean')
Action: Stop
Environment: Location=B, A=clean, B=clean
Reward this step: 0
Total Reward: 19
Agent Internal State: {'A': 'clean', 'B': 'clean'}

Agent decided to stop.
Simulation Complete.
