In [25]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import random
from collections import deque, namedtuple
import gymnasium as gym
from gymnasium import spaces
import time

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

class PredatorPreyEnv:
    """Custom Predator-Prey Environment"""
    
    def __init__(self, grid_size=10, num_predators=2, num_prey=3, max_steps=100):
        self.grid_size = grid_size
        self.num_predators = num_predators
        self.num_prey = num_prey
        self.max_steps = max_steps
        
        # Action space: 0=stay, 1=up, 2=down, 3=left, 4=right
        self.action_space = spaces.Discrete(5)
        
        # Observation space: [own_x, own_y, prey1_x, prey1_y, prey2_x, prey2_y, ...]
        obs_dim = 2 + (self.num_prey * 2) + (self.num_predators - 1) * 2  # self + prey + other predators
        self.observation_space = spaces.Box(low=0, high=grid_size-1, shape=(obs_dim,), dtype=np.float32)
        
        self.reset()
    
    def reset(self):
        """Reset environment to initial state"""
        self.step_count = 0
        self.caught_prey = set()
        
        # Initialize positions randomly
        all_positions = [(x, y) for x in range(self.grid_size) for y in range(self.grid_size)]
        random.shuffle(all_positions)
        
        self.predator_positions = all_positions[:self.num_predators]
        self.prey_positions = all_positions[self.num_predators:self.num_predators + self.num_prey]
        
        return self._get_observations()
    
    def step(self, actions):
        """Execute one step in the environment"""
        self.step_count += 1
        
        # Move predators
        new_predator_positions = []
        for i, action in enumerate(actions):
            pos = self._move_agent(self.predator_positions[i], action)
            new_predator_positions.append(pos)
        self.predator_positions = new_predator_positions
        
        # Move prey (random movement)
        new_prey_positions = []
        for i, pos in enumerate(self.prey_positions):
            if i not in self.caught_prey:
                action = random.randint(0, 4)
                new_pos = self._move_agent(pos, action)
                new_prey_positions.append(new_pos)
            else:
                new_prey_positions.append(pos)  # Caught prey don't move
        self.prey_positions = new_prey_positions
        
        # Check for catches
        rewards = [0.0] * self.num_predators
        new_catches = []
        
        for prey_idx, prey_pos in enumerate(self.prey_positions):
            if prey_idx not in self.caught_prey:
                for pred_idx, pred_pos in enumerate(self.predator_positions):
                    if pred_pos == prey_pos:
                        self.caught_prey.add(prey_idx)
                        rewards[pred_idx] += 10.0  # Reward for catching
                        new_catches.append(prey_idx)
                        break
        
        # Small negative reward for time
        for i in range(self.num_predators):
            rewards[i] -= 0.1
        
        # Check if done
        done = (len(self.caught_prey) == self.num_prey) or (self.step_count >= self.max_steps)
        
        observations = self._get_observations()
        info = {'caught_prey': len(self.caught_prey), 'new_catches': new_catches}
        
        return observations, rewards, done, info
    
    def _move_agent(self, pos, action):
        """Move agent based on action"""
        x, y = pos
        
        if action == 1:  # up
            y = max(0, y - 1)
        elif action == 2:  # down
            y = min(self.grid_size - 1, y + 1)
        elif action == 3:  # left
            x = max(0, x - 1)
        elif action == 4:  # right
            x = min(self.grid_size - 1, x + 1)
        # action == 0 is stay
        
        return (x, y)
    
    def _get_observations(self):
        """Get observations for all predators"""
        observations = []
        
        for pred_idx in range(self.num_predators):
            obs = []
            
            # Own position
            obs.extend(self.predator_positions[pred_idx])
            
            # Prey positions
            for prey_idx, prey_pos in enumerate(self.prey_positions):
                if prey_idx in self.caught_prey:
                    obs.extend([-1, -1])  # Caught prey marked as (-1, -1)
                else:
                    obs.extend(prey_pos)
            
            # Other predator positions
            for other_pred_idx in range(self.num_predators):
                if other_pred_idx != pred_idx:
                    obs.extend(self.predator_positions[other_pred_idx])
            
            observations.append(np.array(obs, dtype=np.float32))
        
        return observations
    
    def render(self):
        """Simple text rendering"""
        grid = [['.' for _ in range(self.grid_size)] for _ in range(self.grid_size)]
        
        # Place prey
        for prey_idx, (x, y) in enumerate(self.prey_positions):
            if prey_idx in self.caught_prey:
                grid[y][x] = 'X'  # Caught prey
            else:
                grid[y][x] = 'P'  # Prey
        
        # Place predators
        for pred_idx, (x, y) in enumerate(self.predator_positions):
            if grid[y][x] == '.':
                grid[y][x] = str(pred_idx)
            else:
                grid[y][x] = f"{pred_idx}*"  # Predator on prey/caught prey
        
        print(f"\nStep {self.step_count}, Caught: {len(self.caught_prey)}/{self.num_prey}")
        for row in grid:
            print(' '.join([f"{cell:>2}" for cell in row]))


class Actor(nn.Module):
    """Actor network for DDPG"""
    
    def __init__(self, state_dim, action_dim, hidden_dim=64, communication_dim=0):
        super(Actor, self).__init__()
        self.communication_dim = communication_dim
        
        input_dim = state_dim + communication_dim
        
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, action_dim)
        
        # Communication layer (if enabled)
        if communication_dim > 0:
            self.comm_layer = nn.Linear(hidden_dim, communication_dim)
    
    def forward(self, state, comm_input=None):
        # Handle communication input properly
        if self.communication_dim > 0:
            if comm_input is not None:
                x = torch.cat([state, comm_input], dim=-1)
            else:
                # Pad with zeros if no communication input but communication is enabled
                zeros = torch.zeros(state.shape[0], self.communication_dim, device=state.device)
                x = torch.cat([state, zeros], dim=-1)
        else:
            x = state
            
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        
        # Generate communication message if enabled
        comm_output = None
        if self.communication_dim > 0:
            comm_output = torch.tanh(self.comm_layer(x))
        
        action_probs = F.softmax(self.fc3(x), dim=-1)
        return action_probs, comm_output


class Critic(nn.Module):
    """Critic network for MADDPG"""
    
    def __init__(self, state_dim, action_dim, hidden_dim=64, num_agents=1):
        super(Critic, self).__init__()
        
        # For MADDPG, critic sees all states and actions
        # Input dimension calculation fixed: all states + one-hot encoded actions
        input_dim = state_dim * num_agents + action_dim * num_agents
        
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, 1)
        self.num_agents = num_agents
        self.action_dim = action_dim
    
    def forward(self, states, actions):
        batch_size = states.shape[0]
        
        # Flatten states
        states_flat = states.view(batch_size, -1)
        
        # Convert actions to one-hot encoding
        if actions.dtype == torch.long or actions.dtype == torch.int64:
            actions_one_hot = F.one_hot(actions, num_classes=self.action_dim).float()
            actions_flat = actions_one_hot.view(batch_size, -1)
        else:
            actions_flat = actions.view(batch_size, -1)
        
        x = torch.cat([states_flat, actions_flat], dim=-1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        value = self.fc3(x)
        return value


class MADDPGAgent:
    """MADDPG Agent with optional communication"""
    
    def __init__(self, state_dim, action_dim, agent_id, num_agents, 
                 lr=0.001, gamma=0.95, tau=0.01, communication=False):
        self.agent_id = agent_id
        self.num_agents = num_agents
        self.gamma = gamma
        self.tau = tau
        self.communication = communication
        self.action_dim = action_dim
        
        # Communication setup
        comm_dim = 4 if communication else 0
        
        # Networks
        self.actor = Actor(state_dim, action_dim, communication_dim=comm_dim)
        self.actor_target = Actor(state_dim, action_dim, communication_dim=comm_dim)
        self.critic = Critic(state_dim, action_dim, num_agents=num_agents)
        self.critic_target = Critic(state_dim, action_dim, num_agents=num_agents)
        
        # Optimizers
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr)
        self.critic_optimizer = optim.Adam(self.critic.parameters(), lr=lr)
        
        # Initialize target networks
        self.hard_update(self.actor_target, self.actor)
        self.hard_update(self.critic_target, self.critic)
    
    def act(self, state, comm_input=None, epsilon=0.1):
        """Select action using actor network"""
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        comm_tensor = torch.FloatTensor(comm_input).unsqueeze(0) if comm_input is not None else None
        
        with torch.no_grad():
            action_probs, comm_output = self.actor(state_tensor, comm_tensor)
        
        # Add exploration noise
        if random.random() < epsilon:
            action = random.randint(0, self.action_dim - 1)
        else:
            action = torch.multinomial(action_probs, 1).item()
        
        comm_msg = comm_output.squeeze().numpy() if comm_output is not None else None
        return action, comm_msg
    
    def learn(self, batch_data, other_agents):
        """Update networks using batch data"""
        states, actions, rewards, next_states, dones = batch_data
        batch_size = len(states)
        
        # Convert to tensors
        states_tensor = torch.FloatTensor(states)
        actions_tensor = torch.LongTensor(actions)
        rewards_tensor = torch.FloatTensor([r[self.agent_id] for r in rewards])
        next_states_tensor = torch.FloatTensor(next_states)
        dones_tensor = torch.BoolTensor(dones)
        
        # Update Critic
        with torch.no_grad():
            next_actions = []
            for i in range(self.num_agents):
                if i == self.agent_id:
                    next_action_probs, _ = self.actor_target(next_states_tensor[:, i])
                else:
                    next_action_probs, _ = other_agents[i].actor_target(next_states_tensor[:, i])
                next_actions.append(torch.multinomial(next_action_probs, 1).squeeze(-1))
            
            next_actions_tensor = torch.stack(next_actions, dim=1)
            target_q = self.critic_target(next_states_tensor, next_actions_tensor)
            target_q = rewards_tensor.unsqueeze(1) + (self.gamma * target_q * (~dones_tensor).unsqueeze(1))
        
        current_q = self.critic(states_tensor, actions_tensor)
        critic_loss = F.mse_loss(current_q, target_q)
        
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        self.critic_optimizer.step()
        
        # Update Actor
        predicted_actions = []
        for i in range(self.num_agents):
            if i == self.agent_id:
                action_probs, _ = self.actor(states_tensor[:, i])
                predicted_actions.append(torch.multinomial(action_probs, 1).squeeze(-1))
            else:
                with torch.no_grad():
                    action_probs, _ = other_agents[i].actor(states_tensor[:, i])
                    predicted_actions.append(torch.multinomial(action_probs, 1).squeeze(-1))
        
        predicted_actions_tensor = torch.stack(predicted_actions, dim=1)
        actor_loss = -self.critic(states_tensor, predicted_actions_tensor).mean()
        
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()
        
        # Soft update target networks
        self.soft_update(self.actor_target, self.actor)
        self.soft_update(self.critic_target, self.critic)
    
    def soft_update(self, target, source):
        """Soft update target network"""
        for target_param, param in zip(target.parameters(), source.parameters()):
            target_param.data.copy_(target_param.data * (1.0 - self.tau) + param.data * self.tau)
    
    def hard_update(self, target, source):
        """Hard update target network"""
        for target_param, param in zip(target.parameters(), source.parameters()):
            target_param.data.copy_(param.data)


class MADDPGTrainer:
    """MADDPG Training Manager"""
    
    def __init__(self, env, communication=False):
        self.env = env
        self.communication = communication
        self.num_agents = env.num_predators
        
        # Get state and action dimensions
        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.n
        
        # Create agents
        self.agents = []
        for i in range(self.num_agents):
            agent = MADDPGAgent(state_dim, action_dim, i, self.num_agents, 
                              communication=communication)
            self.agents.append(agent)
        
        # Shared experience buffer for MADDPG
        self.shared_memory = deque(maxlen=10000)
        
        # Training metrics
        self.episode_rewards = []
        self.episode_catches = []
        self.episode_lengths = []
    
    def train(self, num_episodes=1000, render_freq=100):
        """Train the MADDPG agents"""
        print(f"Training MADDPG {'with' if self.communication else 'without'} communication...")
        
        for episode in range(num_episodes):
            states = self.env.reset()
            episode_reward = [0] * self.num_agents
            episode_length = 0
            communication_messages = [None] * self.num_agents if self.communication else None
            
            while True:
                # Get actions from all agents
                actions = []
                new_comm_messages = []
                
                for i, agent in enumerate(self.agents):
                    comm_input = None
                    if self.communication:
                        if communication_messages and communication_messages[0] is not None:
                            # Use average of other agents' messages
                            other_messages = [msg for j, msg in enumerate(communication_messages) if j != i and msg is not None]
                            if other_messages:
                                comm_input = np.mean(other_messages, axis=0)
                    
                    epsilon = max(0.1, 1.0 - episode / (num_episodes * 0.5))  # Decay exploration
                    action, comm_msg = agent.act(states[i], comm_input, epsilon)
                    actions.append(action)
                    new_comm_messages.append(comm_msg)
                
                if self.communication:
                    communication_messages = new_comm_messages
                
                # Environment step
                next_states, rewards, done, info = self.env.step(actions)
                
                # Store experience
                experience = (states, actions, rewards, next_states, done)
                self.shared_memory.append(experience)
                
                # Update rewards and state
                for i in range(self.num_agents):
                    episode_reward[i] += rewards[i]
                
                states = next_states
                episode_length += 1
                
                if done:
                    break
            
            # Train agents with batch learning
            if len(self.shared_memory) > 100:
                # Sample batch
                batch_size = min(32, len(self.shared_memory))
                batch = random.sample(list(self.shared_memory), batch_size)
                
                # Organize batch data
                batch_states = []
                batch_actions = []
                batch_rewards = []
                batch_next_states = []
                batch_dones = []
                
                for exp in batch:
                    batch_states.append(exp[0])
                    batch_actions.append(exp[1])
                    batch_rewards.append(exp[2])
                    batch_next_states.append(exp[3])
                    batch_dones.append(exp[4])
                
                batch_data = (batch_states, batch_actions, batch_rewards, batch_next_states, batch_dones)
                
                # Train each agent
                for agent in self.agents:
                    agent.learn(batch_data, self.agents)
            
            # Record metrics
            self.episode_rewards.append(np.mean(episode_reward))
            self.episode_catches.append(len(self.env.caught_prey))
            self.episode_lengths.append(episode_length)
            
            # Print progress
            if episode % render_freq == 0:
                avg_reward = np.mean(self.episode_rewards[-render_freq:])
                avg_catches = np.mean(self.episode_catches[-render_freq:])
                avg_length = np.mean(self.episode_lengths[-render_freq:])
                
                print(f"Episode {episode}/{num_episodes}")
                print(f"  Avg Reward: {avg_reward:.2f}")
                print(f"  Avg Catches: {avg_catches:.2f}/{self.env.num_prey}")
                print(f"  Avg Length: {avg_length:.1f}")
                
                if episode % (render_freq * 2) == 0:
                    self.demonstrate()
    
    def demonstrate(self):
        """Demonstrate trained agents"""
        print("\n--- Demonstration ---")
        states = self.env.reset()
        self.env.render()
        
        communication_messages = [None] * self.num_agents if self.communication else None
        
        for step in range(50):
            actions = []
            new_comm_messages = []
            
            for i, agent in enumerate(self.agents):
                comm_input = None
                if self.communication:
                    if communication_messages and communication_messages[0] is not None:
                        other_messages = [msg for j, msg in enumerate(communication_messages) if j != i and msg is not None]
                        if other_messages:
                            comm_input = np.mean(other_messages, axis=0)
                
                action, comm_msg = agent.act(states[i], comm_input, epsilon=0.0)  # No exploration
                actions.append(action)
                new_comm_messages.append(comm_msg)
            
            if self.communication:
                communication_messages = new_comm_messages
                if communication_messages[0] is not None:
                    print(f"Communication: {[f'{msg[0]:.2f},{msg[1]:.2f}' if msg is not None else 'None' for msg in communication_messages]}")
            
            states, rewards, done, info = self.env.step(actions)
            self.env.render()
            
            if done:
                print(f"Episode finished! Caught {len(self.env.caught_prey)}/{self.env.num_prey} prey")
                break
            
            time.sleep(0.5)  # Slow down for visualization


def plot_training_results(trainers):
    """Plot training results comparison"""
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Rewards comparison
    axes[0, 0].set_title('Average Episode Rewards')
    for name, trainer in trainers.items():
        axes[0, 0].plot(trainer.episode_rewards, label=name, alpha=0.7)
        # Add moving average
        window = 50
        if len(trainer.episode_rewards) >= window:
            moving_avg = np.convolve(trainer.episode_rewards, np.ones(window)/window, mode='valid')
            axes[0, 0].plot(range(window-1, len(trainer.episode_rewards)), moving_avg, 
                          linewidth=2, label=f'{name} (MA)')
    axes[0, 0].set_xlabel('Episode')
    axes[0, 0].set_ylabel('Average Reward')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    # Catches comparison
    axes[0, 1].set_title('Prey Caught per Episode')
    for name, trainer in trainers.items():
        axes[0, 1].plot(trainer.episode_catches, label=name, alpha=0.7)
        # Add moving average
        if len(trainer.episode_catches) >= window:
            moving_avg = np.convolve(trainer.episode_catches, np.ones(window)/window, mode='valid')
            axes[0, 1].plot(range(window-1, len(trainer.episode_catches)), moving_avg, 
                          linewidth=2, label=f'{name} (MA)')
    axes[0, 1].set_xlabel('Episode')
    axes[0, 1].set_ylabel('Prey Caught')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # Episode lengths comparison
    axes[1, 0].set_title('Episode Lengths')
    for name, trainer in trainers.items():
        axes[1, 0].plot(trainer.episode_lengths, label=name, alpha=0.7)
        # Add moving average
        if len(trainer.episode_lengths) >= window:
            moving_avg = np.convolve(trainer.episode_lengths, np.ones(window)/window, mode='valid')
            axes[1, 0].plot(range(window-1, len(trainer.episode_lengths)), moving_avg, 
                          linewidth=2, label=f'{name} (MA)')
    axes[1, 0].set_xlabel('Episode')
    axes[1, 0].set_ylabel('Episode Length')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    # Success rate comparison (last 100 episodes)
    axes[1, 1].set_title('Success Rate (Last 100 Episodes)')
    success_rates = {}
    for name, trainer in trainers.items():
        recent_catches = trainer.episode_catches[-100:] if len(trainer.episode_catches) >= 100 else trainer.episode_catches
        total_prey = trainer.env.num_prey
        success_rate = np.mean([catches == total_prey for catches in recent_catches]) * 100
        success_rates[name] = success_rate
    
    names = list(success_rates.keys())
    rates = list(success_rates.values())
    bars = axes[1, 1].bar(names, rates, color=['blue', 'orange'])
    axes[1, 1].set_ylabel('Success Rate (%)')
    axes[1, 1].set_ylim(0, 100)
    
    # Add value labels on bars
    for bar, rate in zip(bars, rates):
        axes[1, 1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
                       f'{rate:.1f}%', ha='center', va='bottom')
    
    axes[1, 1].grid(True, axis='y')
    
    plt.tight_layout()
    plt.show()


def main():
    """Main training and comparison function"""
    print("Multi-Agent Predator-Prey MARL System")
    print("=====================================")
    
    # Training parameters
    num_episodes = 500
    grid_size = 8
    num_predators = 2
    num_prey = 2
    
    print(f"Environment: {grid_size}x{grid_size} grid, {num_predators} predators, {num_prey} prey")
    print(f"Training for {num_episodes} episodes each\n")
    
    # Create environments and trainers
    trainers = {}
    
    # Without communication
    print("1. Training MADDPG without communication...")
    env1 = PredatorPreyEnv(grid_size=grid_size, num_predators=num_predators, 
                          num_prey=num_prey, max_steps=50)
    trainer_no_comm = MADDPGTrainer(env1, communication=False)
    trainer_no_comm.train(num_episodes=num_episodes, render_freq=100)
    trainers['No Communication'] = trainer_no_comm
    
    # With communication
    print("\n2. Training MADDPG with communication...")
    env2 = PredatorPreyEnv(grid_size=grid_size, num_predators=num_predators, 
                          num_prey=num_prey, max_steps=50)
    trainer_comm = MADDPGTrainer(env2, communication=True)
    trainer_comm.train(num_episodes=num_episodes, render_freq=100)
    trainers['With Communication'] = trainer_comm
    
    # Final demonstrations
    print("\n" + "="*50)
    print("FINAL DEMONSTRATIONS")
    print("="*50)
    
    print("\nDemonstration 1: MADDPG without communication")
    trainer_no_comm.demonstrate()
    
    print("\nDemonstration 2: MADDPG with communication")
    trainer_comm.demonstrate()
    
    # Compare results
    print("\n" + "="*50)
    print("TRAINING RESULTS COMPARISON")
    print("="*50)
    
    for name, trainer in trainers.items():
        recent_rewards = trainer.episode_rewards[-100:] if len(trainer.episode_rewards) >= 100 else trainer.episode_rewards
        recent_catches = trainer.episode_catches[-100:] if len(trainer.episode_catches) >= 100 else trainer.episode_catches
        recent_lengths = trainer.episode_lengths[-100:] if len(trainer.episode_lengths) >= 100 else trainer.episode_lengths
        
        success_rate = np.mean([catches == trainer.env.num_prey for catches in recent_catches]) * 100
        
        print(f"\n{name}:")
        print(f"  Average Reward (last 100): {np.mean(recent_rewards):.2f}")
        print(f"  Average Catches (last 100): {np.mean(recent_catches):.2f}/{trainer.env.num_prey}")
        print(f"  Success Rate (last 100): {success_rate:.1f}%")
        print(f"  Average Episode Length (last 100): {np.mean(recent_lengths):.1f}")
    
    # Plot results
    plot_training_results(trainers)
    
    print("\n" + "="*50)
    print("ANALYSIS")
    print("="*50)
    print("Key observations:")
    print("1. Communication allows agents to coordinate better")
    print("2. Shared critics in MADDPG help with multi-agent credit assignment")
    print("3. Emergent behaviors develop through trial and error")
    print("4. Success rate typically improves with communication")
    print("5. Episode length may decrease with better coordination")
    

if __name__ == "__main__":
    main()

Multi-Agent Predator-Prey MARL System
Environment: 8x8 grid, 2 predators, 2 prey
Training for 500 episodes each

1. Training MADDPG without communication...
Training MADDPG without communication...
Episode 0/500
  Avg Reward: 7.20
  Avg Catches: 2.00/2
  Avg Length: 28.0

--- Demonstration ---

Step 0, Caught: 0/2
 1  .  .  .  .  .  .  P
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  P  .  .  .  .  0

Step 1, Caught: 0/2
 .  .  .  .  .  .  .  P
 1  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  0
 .  .  .  P  .  .  .  .

Step 2, Caught: 0/2
 1  .  .  .  .  .  .  P
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  .  .  .  .  .
 .  .  .  P  .  .  .  0

Step 3, Caught: 0/2
 .  1  .  .  .  .  .  .
 .  .  .  .  .  .  . 

KeyboardInterrupt: 