In [1]:
"""
Exercise 1: Agents and Environments
"""
import random
from enum import Enum
from typing import Tuple

class Location(Enum):
    A = "A"
    B = "B"

class Status(Enum):
    CLEAN = "Clean"
    DIRTY = "Dirty"

class Action(Enum):
    LEFT = "Left"
    RIGHT = "Right"
    SUCK = "Suck"
    NOOP = "NoOp"

class VacuumEnvironment:
    """Simple two-location vacuum environment"""
    def __init__(self):
        self.locations = {Location.A: Status.DIRTY, Location.B: Status.DIRTY}
        self.agent_location = Location.A
        self.performance = 0
        self.time_steps = 0
    
    def percept(self) -> Tuple[Location, Status]:
        """Return current percept: [Location, Status]"""
        return (self.agent_location, self.locations[self.agent_location])
    
    def execute(self, action: Action):
        """Execute action and update environment"""
        self.time_steps += 1
        
        if action == Action.SUCK:
            if self.locations[self.agent_location] == Status.DIRTY:
                self.locations[self.agent_location] = Status.CLEAN
                self.performance += 10  # Reward for cleaning
        elif action == Action.LEFT:
            self.agent_location = Location.A
            self.performance -= 1  # Cost of movement
        elif action == Action.RIGHT:
            self.agent_location = Location.B
            self.performance -= 1  # Cost of movement
    
    def is_clean(self) -> bool:
        return all(status == Status.CLEAN for status in self.locations.values())
    
    def __str__(self):
        return f"[{self.locations[Location.A].value}] Agent@{self.agent_location.value} [{self.locations[Location.B].value}] | Perf: {self.performance}"

# Demonstration
print("="*70)
print("EXERCISE 1: Agent-Environment Interaction")
print("="*70)

env = VacuumEnvironment()
print(f"\nInitial state: {env}")
print(f"Current percept: {env.percept()}")
print("\nExecuting actions manually:")

# Manual action sequence
env.execute(Action.SUCK)
print(f"After SUCK: {env}")

env.execute(Action.RIGHT)
print(f"After RIGHT: {env}")

env.execute(Action.SUCK)
print(f"After SUCK: {env}")

print(f"\nFinal Performance: {env.performance}")

EXERCISE 1: Agent-Environment Interaction

Initial state: [Dirty] Agent@A [Dirty] | Perf: 0
Current percept: (<Location.A: 'A'>, <Status.DIRTY: 'Dirty'>)

Executing actions manually:
After SUCK: [Clean] Agent@A [Dirty] | Perf: 10
After RIGHT: [Clean] Agent@B [Dirty] | Perf: 9
After SUCK: [Clean] Agent@B [Clean] | Perf: 19

Final Performance: 19


In [2]:
"""
Exercise 2: Simple Reflex Agent (Rational Behavior)
"""

def simple_reflex_vacuum_agent(percept: Tuple[Location, Status]) -> Action:
    """
    Agent function: maps current percept to action
    Rules:
      - If current location is dirty → SUCK
      - If at location A and clean → move RIGHT
      - If at location B and clean → move LEFT
    """
    location, status = percept
    
    if status == Status.DIRTY:
        return Action.SUCK
    elif location == Location.A:
        return Action.RIGHT
    else:
        return Action.LEFT

print("\n" + "="*70)
print("EXERCISE 2: Simple Reflex Agent Behavior")
print("="*70)
print("\nAgent Rules:")
print("  - IF dirty THEN suck")
print("  - IF at A and clean THEN move right")
print("  - IF at B and clean THEN move left")
print()

env = VacuumEnvironment()
for step in range(8):
    percept = env.percept()
    action = simple_reflex_vacuum_agent(percept)
    print(f"Step {step}: {env}")
    print(f"  Percept: {percept} → Action: {action.value}")
    env.execute(action)
    if env.is_clean():
        print(f"\nStep {step+1}: {env}")
        print("✓ All locations clean!")
        break

print(f"\nFinal Performance Score: {env.performance}")
print(f"Steps taken: {env.time_steps}")


EXERCISE 2: Simple Reflex Agent Behavior

Agent Rules:
  - IF dirty THEN suck
  - IF at A and clean THEN move right
  - IF at B and clean THEN move left

Step 0: [Dirty] Agent@A [Dirty] | Perf: 0
  Percept: (<Location.A: 'A'>, <Status.DIRTY: 'Dirty'>) → Action: Suck
Step 1: [Clean] Agent@A [Dirty] | Perf: 10
  Percept: (<Location.A: 'A'>, <Status.CLEAN: 'Clean'>) → Action: Right
Step 2: [Clean] Agent@B [Dirty] | Perf: 9
  Percept: (<Location.B: 'B'>, <Status.DIRTY: 'Dirty'>) → Action: Suck

Step 3: [Clean] Agent@B [Clean] | Perf: 19
✓ All locations clean!

Final Performance Score: 19
Steps taken: 3


In [6]:
"""
Exercise 3: Environment Properties - Stochasticity
"""

class StochasticVacuumEnvironment(VacuumEnvironment):
    """Vacuum environment where SUCK action may fail"""
    def execute(self, action: Action):
        self.time_steps += 1
        
        if action == Action.SUCK:
            if self.locations[self.agent_location] == Status.DIRTY:
                # Only 70% success rate
                if random.random() > 0.3:
                    self.locations[self.agent_location] = Status.CLEAN
                    self.performance += 10
                else:
                    print("    ⚠ SUCK action failed!")
        elif action == Action.LEFT:
            self.agent_location = Location.A
            self.performance -= 1
        elif action == Action.RIGHT:
            self.agent_location = Location.B
            self.performance -= 1

print("\n" + "="*70)
print("EXERCISE 3: Stochastic Environment")
print("="*70)
print("\nEnvironment: SUCK action has 70% success rate")
print()

random.seed(49)  # For reproducible results
env_stochastic = StochasticVacuumEnvironment()

for step in range(15):
    percept = env_stochastic.percept()
    action = simple_reflex_vacuum_agent(percept)
    print(f"Step {step}: {env_stochastic}")
    print(f"  Action: {action.value}")
    env_stochastic.execute(action)
    if env_stochastic.is_clean():
        print(f"\n✓ All locations clean!")
        break

print(f"\nFinal Performance: {env_stochastic.performance}")
print(f"Total steps: {env_stochastic.time_steps}")


EXERCISE 3: Stochastic Environment

Environment: SUCK action has 70% success rate

Step 0: [Dirty] Agent@A [Dirty] | Perf: 0
  Action: Suck
    ⚠ SUCK action failed!
Step 1: [Dirty] Agent@A [Dirty] | Perf: 0
  Action: Suck
Step 2: [Clean] Agent@A [Dirty] | Perf: 10
  Action: Right
Step 3: [Clean] Agent@B [Dirty] | Perf: 9
  Action: Suck
    ⚠ SUCK action failed!
Step 4: [Clean] Agent@B [Dirty] | Perf: 9
  Action: Suck

✓ All locations clean!

Final Performance: 19
Total steps: 5


In [7]:
"""
Exercise 4: Model-Based Reflex Agent
"""

class ModelBasedVacuumAgent:
    """Agent that maintains internal state about the world"""
    def __init__(self):
        # Internal model of the world state
        self.model = {Location.A: Status.DIRTY, Location.B: Status.DIRTY}
        self.location = Location.A
    
    def agent_program(self, percept: Tuple[Location, Status]) -> Action:
        location, status = percept
        
        # Update internal model based on percept
        self.location = location
        self.model[location] = status
        
        # Decide action based on model
        if status == Status.DIRTY:
            return Action.SUCK
        elif self.model[Location.A] == Status.DIRTY:
            return Action.LEFT
        elif self.model[Location.B] == Status.DIRTY:
            return Action.RIGHT
        else:
            return Action.NOOP  # Believes all locations clean

print("\n" + "="*70)
print("EXERCISE 4: Model-Based Reflex Agent")
print("="*70)
print("\nAgent maintains internal model of both locations")
print()

env = VacuumEnvironment()
agent = ModelBasedVacuumAgent()

for step in range(10):
    percept = env.percept()
    action = agent.agent_program(percept)
    print(f"Step {step}: {env}")
    print(f"  Agent's Model: A={agent.model[Location.A].value}, B={agent.model[Location.B].value}")
    print(f"  Action: {action.value}")
    env.execute(action)
    print()
    if action == Action.NOOP:
        print("✓ Agent believes task is complete")
        break

print(f"Final Performance: {env.performance}")


EXERCISE 4: Model-Based Reflex Agent

Agent maintains internal model of both locations

Step 0: [Dirty] Agent@A [Dirty] | Perf: 0
  Agent's Model: A=Dirty, B=Dirty
  Action: Suck

Step 1: [Clean] Agent@A [Dirty] | Perf: 10
  Agent's Model: A=Clean, B=Dirty
  Action: Right

Step 2: [Clean] Agent@B [Dirty] | Perf: 9
  Agent's Model: A=Clean, B=Dirty
  Action: Suck

Step 3: [Clean] Agent@B [Clean] | Perf: 19
  Agent's Model: A=Clean, B=Clean
  Action: NoOp

✓ Agent believes task is complete
Final Performance: 19


In [10]:
"""
Exercise 5: Goal-Based Agent
"""

class GoalBasedVacuumAgent:
    """Agent that plans actions to achieve explicit goals"""
    def __init__(self):
        self.goal = "all locations visited and clean"
        self.visited = set()
        self.cleaned = set()
    
    def agent_program(self, percept: Tuple[Location, Status]) -> Action:
        location, status = percept
        self.visited.add(location)
        
        # Subgoal: Clean current location if dirty
        if status == Status.DIRTY:
            return Action.SUCK
        else:
            self.cleaned.add(location)
        
        # Plan: Visit unvisited locations
        if Location.A not in self.visited:
            return random.choice([Action.LEFT, Action.RIGHT])
        elif Location.B not in self.visited:
            return random.choice([Action.LEFT, Action.RIGHT])
        else:
            # Goal achieved: all locations visited and clean
            return Action.NOOP

print("\n" + "="*70)
print("EXERCISE 5: Goal-Based Agent")
print("="*70)
print("\nGoal: Visit and clean all locations")
print()

env = VacuumEnvironment()
agent = GoalBasedVacuumAgent()

for step in range(10):
    percept = env.percept()
    action = agent.agent_program(percept)
    print(f"Step {step}: {env}")
    print(f"  Visited: {sorted([loc.value for loc in agent.visited])}")
    print(f"  Cleaned: {sorted([loc.value for loc in agent.cleaned])}")
    print(f"  Action: {action.value}")
    env.execute(action)
    print()
    if action == Action.NOOP:
        print("✓ Goal achieved!")
        break

print(f"Final Performance: {env.performance}")


EXERCISE 5: Goal-Based Agent

Goal: Visit and clean all locations

Step 0: [Dirty] Agent@A [Dirty] | Perf: 0
  Visited: ['A']
  Cleaned: []
  Action: Suck

Step 1: [Clean] Agent@A [Dirty] | Perf: 10
  Visited: ['A']
  Cleaned: ['A']
  Action: Left

Step 2: [Clean] Agent@A [Dirty] | Perf: 9
  Visited: ['A']
  Cleaned: ['A']
  Action: Left

Step 3: [Clean] Agent@A [Dirty] | Perf: 8
  Visited: ['A']
  Cleaned: ['A']
  Action: Right

Step 4: [Clean] Agent@B [Dirty] | Perf: 7
  Visited: ['A', 'B']
  Cleaned: ['A']
  Action: Suck

Step 5: [Clean] Agent@B [Clean] | Perf: 17
  Visited: ['A', 'B']
  Cleaned: ['A', 'B']
  Action: NoOp

✓ Goal achieved!
Final Performance: 17


In [11]:
"""
Exercise 6: Utility-Based Agent
"""

class UtilityBasedVacuumAgent:
    """Agent that maximizes expected utility"""
    def __init__(self):
        self.state = {Location.A: Status.DIRTY, Location.B: Status.DIRTY}
        self.location = Location.A
    
    def utility(self, state: dict) -> float:
        """Calculate utility of a state"""
        clean_count = sum(1 for s in state.values() if s == Status.CLEAN)
        return clean_count * 10  # 10 points per clean location
    
    def expected_utility(self, action: Action) -> float:
        """Predict utility after taking action (minus costs)"""
        next_state = self.state.copy()
        next_location = self.location
        
        if action == Action.SUCK:
            if self.state[self.location] == Status.DIRTY:
                next_state[self.location] = Status.CLEAN
            return self.utility(next_state) - 0  # No cost
        elif action == Action.LEFT:
            next_location = Location.A
            # Expected utility: might find dirt there
            if next_state[Location.A] == Status.DIRTY:
                # If we know it's dirty, we can clean it (net +9)
                return self.utility(next_state) + 10 - 1
            return self.utility(next_state) - 1  # Just movement cost
        elif action == Action.RIGHT:
            next_location = Location.B
            # Expected utility: might find dirt there
            if next_state[Location.B] == Status.DIRTY:
                # If we know it's dirty, we can clean it (net +9)
                return self.utility(next_state) + 10 - 1
            return self.utility(next_state) - 1  # Just movement cost
        return self.utility(next_state)
    
    def agent_program(self, percept: Tuple[Location, Status]) -> Action:
        location, status = percept
        self.location = location
        self.state[location] = status
        
        # Evaluate all possible actions
        actions = [Action.SUCK, Action.LEFT, Action.RIGHT]
        action_utilities = []
        
        for a in actions:
            eu = self.expected_utility(a)
            action_utilities.append((a, eu))
        
        # Choose action with maximum expected utility
        best_action = max(action_utilities, key=lambda x: x[1])
        
        return best_action[0]

print("\n" + "="*70)
print("EXERCISE 6: Utility-Based Agent")
print("="*70)
print("\nUtility Function: +10 per clean location, -1 per move")
print()

env = VacuumEnvironment()
agent = UtilityBasedVacuumAgent()

for step in range(10):
    percept = env.percept()
    
    # Show utility calculation
    print(f"Step {step}: {env}")
    print(f"  Current state utility: {agent.utility(agent.state)}")
    
    # Get action
    action = agent.agent_program(percept)
    print(f"  Chosen action: {action.value}")
    
    env.execute(action)
    print()
    if env.is_clean():
        print("✓ All locations clean!")
        break

print(f"Final Performance: {env.performance}")


EXERCISE 6: Utility-Based Agent

Utility Function: +10 per clean location, -1 per move

Step 0: [Dirty] Agent@A [Dirty] | Perf: 0
  Current state utility: 0
  Chosen action: Suck

Step 1: [Clean] Agent@A [Dirty] | Perf: 10
  Current state utility: 0
  Chosen action: Right

Step 2: [Clean] Agent@B [Dirty] | Perf: 9
  Current state utility: 10
  Chosen action: Suck

✓ All locations clean!
Final Performance: 19


In [22]:
"""
Exercise 7: Comparing All Agent Types
"""

def run_agent_comparison():
    """Run all four agent types and compare performance"""
    
    def run_simple_reflex(max_steps=10):
        env = VacuumEnvironment()
        for _ in range(max_steps):
            action = simple_reflex_vacuum_agent(env.percept())
            env.execute(action)
            if env.is_clean():
                break
        return env.performance, env.time_steps
    
    def run_model_based(max_steps=10):
        env = VacuumEnvironment()
        agent = ModelBasedVacuumAgent()
        for _ in range(max_steps):
            action = agent.agent_program(env.percept())
            env.execute(action)
            if action == Action.NOOP:
                break
        return env.performance, env.time_steps
    
    def run_goal_based(max_steps=10):
        env = VacuumEnvironment()
        agent = GoalBasedVacuumAgent()
        for _ in range(max_steps):
            action = agent.agent_program(env.percept())
            env.execute(action)
            if action == Action.NOOP:
                break
        return env.performance, env.time_steps
    
    def run_utility_based(max_steps=10):
        env = VacuumEnvironment()
        agent = UtilityBasedVacuumAgent()
        for _ in range(max_steps):
            action = agent.agent_program(env.percept())
            env.execute(action)
            if env.is_clean():
                break
        return env.performance, env.time_steps
    
    return {
        "Simple Reflex": run_simple_reflex(),
        "Model-Based": run_model_based(),
        "Goal-Based": run_goal_based(),
        "Utility-Based": run_utility_based()
    }

print("\n" + "="*70)
print("EXERCISE 7: Agent Performance Comparison")
print("="*70)
print("\nRunning all four agent types in identical environments...\n")

results = run_agent_comparison()

print("Results:")
print("-" * 70)
print(f"{'Agent Type':<20} {'Performance':<15} {'Steps':<10}")
print("-" * 70)
for agent_type, (perf, steps) in results.items():
    print(f"{agent_type:<20} {perf:<15} {steps:<10}")
print("-" * 70)

# Find best performer
best_agent = max(results.items(), key=lambda x: x[1][0])
print(f"\n Best Performance: {best_agent[0]} (Score: {best_agent[1][0]})")


EXERCISE 7: Agent Performance Comparison

Running all four agent types in identical environments...

Results:
----------------------------------------------------------------------
Agent Type           Performance     Steps     
----------------------------------------------------------------------
Simple Reflex        19              3         
Model-Based          19              4         
Goal-Based           19              4         
Utility-Based        19              3         
----------------------------------------------------------------------

 Best Performance: Simple Reflex (Score: 19)
