In [71]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import random
import matplotlib.pyplot as plt
from collections import deque, namedtuple

# Define map elements
EMPTY = 0
WALL = 1
LAVA = 2
TREASURE = 3
EXIT = 4
START = 5

# Define Colors for Visualization
COLOR_MAP = {
    EMPTY: "white",
    WALL: "brown",
    LAVA: "red",
    TREASURE: "yellow",
    EXIT: "green",
    START: "blue",
}

# Experience replay buffer
Experience = namedtuple('Experience', ['state', 'action', 'reward', 'next_state', 'done'])

In [3]:
class ReplayBuffer:
    def __init__(self, capacity=5000):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append(Experience(state, action, reward, next_state, done))

    def sample(self, batch_size):
        experiences = random.sample(self.buffer, k=batch_size)
        states = torch.stack([torch.tensor(e.state, dtype=torch.float) for e in experiences])
        actions = torch.tensor([e.action for e in experiences])
        rewards = torch.tensor([e.reward for e in experiences], dtype=torch.float)
        next_states = torch.stack([torch.tensor(e.next_state, dtype=torch.float) for e in experiences])
        dones = torch.tensor([e.done for e in experiences], dtype=torch.float)
        return states, actions, rewards, next_states, dones

    def __len__(self):
        return len(self.buffer)

class DQNModel(nn.Module):
    """Lightweight Q-Network"""
    def __init__(self, input_dim, output_dim):
        super(DQNModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, output_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [4]:
class DungeonAgent:
    """Agent that learns to generate levels"""
    def __init__(self, state_size, action_size, seed=42):
        self.state_size = state_size
        self.action_size = action_size

        random.seed(seed)
        torch.manual_seed(seed)

        self.qnetwork = DQNModel(state_size, action_size)
        self.optimizer = optim.Adam(self.qnetwork.parameters(), lr=0.001)
        self.memory = ReplayBuffer(5000)

        self.batch_size = 32
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.999995
        self.epsilon_min = 0.000001
        print(self.epsilon_decay)
        print(self.epsilon_min)

    def act(self, state):
        """Epsilon-greedy action selection"""
        if random.random() < self.epsilon:
            return random.randint(0, self.action_size - 1)
        state = torch.tensor(state, dtype=torch.float).unsqueeze(0)
        with torch.no_grad():
            return torch.argmax(self.qnetwork(state)).item()

    def step(self, state, action, reward, next_state, done):
        """Save experience & train"""
        self.memory.push(state, action, reward, next_state, done)
        if len(self.memory) > self.batch_size:
            self.learn(self.memory.sample(self.batch_size))

    def learn(self, experiences):
        states, actions, rewards, next_states, dones = experiences

        Q_expected = self.qnetwork(states).gather(1, actions.unsqueeze(1)).squeeze(1)
        Q_targets = rewards + (self.gamma * self.qnetwork(next_states).max(1)[0] * (1 - dones))

        loss = F.mse_loss(Q_expected, Q_targets)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

import numpy as np
import random
import torch

class DungeonEnvironment:
    """Environment for RL-based level generation with maze-like structures"""
    def __init__(self, width, height):
        self.width = width
        self.height = height
        self.grid = np.zeros((height, width), dtype=np.int8)
        self.max_steps = width * height // 2  # Increased steps for better structures
        self.current_step = 0
        self.element_types = [EMPTY, WALL, LAVA, TREASURE]
        self.start_pos = None
        self.exit_pos = None

    def reset(self):
        """Reset dungeon with randomized start and exit positions"""
        self.grid.fill(EMPTY)

        # Set boundary walls (DO NOT MODIFY THESE)
        self.grid[0, :] = WALL
        self.grid[:, 0] = WALL
        self.grid[-1, :] = WALL
        self.grid[:, -1] = WALL

        # Randomly place start and exit
        self.start_pos, self.exit_pos = self._place_start_and_exit()
        self.grid[self.start_pos[1], self.start_pos[0]] = START
        self.grid[self.exit_pos[1], self.exit_pos[0]] = EXIT

        self.current_step = 0
        return self._get_state()

    def _place_start_and_exit(self):
        """Randomly place start and exit, ensuring they are far apart"""
        while True:
            start_x, start_y = random.randint(1, self.width - 2), random.randint(1, self.height - 2)
            exit_x, exit_y = random.randint(1, self.width - 2), random.randint(1, self.height - 2)

            # Ensure they are far apart
            min_distance = max(self.width, self.height) // 2
            if abs(start_x - exit_x) + abs(start_y - exit_y) >= min_distance:
                return (start_x, start_y), (exit_x, exit_y)

    def step(self, action):
        """Modify grid based on action"""
        self.current_step += 1
        position_idx = action // len(self.element_types)
        element_type = self.element_types[action % len(self.element_types)]

        y, x = divmod(position_idx, self.width)
        reward = 0
        done = False

        # Restrict modifications to the interior (NO MODIFICATION of walls on boundary)
        if x == 0 or y == 0 or x == self.width-1 or y == self.height-1:
            return self._get_state(), -5, False  # Penalize modifying boundaries

        if (x, y) == self.start_pos or (x, y) == self.exit_pos:
            reward -= 5  # Can't modify start/exit
        else:
            self.grid[y, x] = element_type

            # **Improved Reward System for Maze-Like Walls**
            if element_type == WALL:
                if self._creates_dead_end(x, y):
                    reward -= 2  # Penalize dead ends
                else:
                    reward += 10  # Strongly encourage walls

                if self._too_many_walls():
                    reward -= 4  # Penalize excessive walls

                if self._wall_continues_maze(x, y):
                    reward += 10  # Reward walls that extend the maze structure

            elif element_type == LAVA:
                if abs(x - self.start_pos[0]) + abs(y - self.start_pos[1]) < 3:
                    reward -= 3  # Penalize lava near the start
                else:
                    reward += 10  # Reward strategic lava placement

            elif element_type == TREASURE:
                if self._is_clustered_treasure(x, y):
                    reward -= 1  # Penalize clustering treasures
                else:
                    reward += 5  # Reward scattered treasure placement

        # **Bonus Reward for Path Complexity**
        if self.current_step >= self.max_steps:
            done = True
            path_length = self._calculate_path_length()
            reward += path_length * 0.3  # Encourage longer paths

        return self._get_state(), reward, done

    def _creates_dead_end(self, x, y):
        """Check if placing a wall at (x,y) creates a dead end"""
        open_paths = 0
        for dx, dy in [(0, -1), (1, 0), (0, 1), (-1, 0)]:
            nx, ny = x + dx, y + dy
            if 0 <= nx < self.width and 0 <= ny < self.height and self.grid[ny, nx] != WALL:
                open_paths += 1
        return open_paths <= 1  # If only 1 way out, it's a dead end

    def _too_many_walls(self):
        """Check if the map has too many walls (avoid over-blocking)"""
        wall_count = np.sum(self.grid == WALL)
        total_cells = self.width * self.height
        return wall_count > total_cells * 0.7  # Limit walls to 70% of the map

    def _wall_continues_maze(self, x, y):
        """Encourage walls that form a maze-like pattern"""
        adjacent_walls = 0
        for dx, dy in [(0, -1), (1, 0), (0, 1), (-1, 0)]:
            nx, ny = x + dx, y + dy
            if 0 <= nx < self.width and 0 <= ny < self.height and self.grid[ny, nx] == WALL:
                adjacent_walls += 1
        return adjacent_walls >= 2  # Encourage walls that connect to existing ones

    def _is_clustered_treasure(self, x, y):
        """Check if treasure is too close to another treasure"""
        for dy in [-1, 0, 1]:
            for dx in [-1, 0, 1]:
                if 0 <= x + dx < self.width and 0 <= y + dy < self.height:
                    if self.grid[y + dy, x + dx] == TREASURE:
                        return True
        return False

    def _calculate_path_length(self):
        """Estimate the shortest path from start to exit"""
        queue = [(self.start_pos, 0)]
        visited = set()
        visited.add(self.start_pos)

        while queue:
            (x, y), dist = queue.pop(0)

            if (x, y) == self.exit_pos:
                return dist  # Shortest path found

            # Check all four directions
            for dx, dy in [(0, -1), (1, 0), (0, 1), (-1, 0)]:
                nx, ny = x + dx, y + dy
                if (0 <= nx < self.width and 0 <= ny < self.height and
                    (nx, ny) not in visited and self.grid[ny, nx] != WALL):
                    queue.append(((nx, ny), dist + 1))
                    visited.add((nx, ny))

        return 0  # No path found

    def _get_state(self):
        return self.grid.flatten().astype(np.float32)


In [5]:
def train_dungeon_generator(width, height, num_episodes=500):
    """Train RL model to generate levels"""
    env = DungeonEnvironment(width, height)
    state_size = width * height
    action_size = width * height * len(env.element_types)
    agent = DungeonAgent(state_size, action_size)

    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            agent.step(state, action, reward, next_state, done)
            state = next_state

        if episode % 100 == 0:
            print(f"Episode {episode}, Epsilon: {agent.epsilon:.3f}")

    return agent, env

def generate_dungeon_with_model(agent, width, height):
    """Generate a level using a trained RL model"""
    env = DungeonEnvironment(width, height)
    state = env.reset()
    done = False

    while not done:
        action = agent.act(state)
        state, _, done = env.step(action)

    return env.grid

def visualize_dungeon(grid):
    """Display dungeon grid using matplotlib"""
    fig, ax = plt.subplots(figsize=(6, 6))
    for y in range(grid.shape[0]):
        for x in range(grid.shape[1]):
            rect = plt.Rectangle((x, grid.shape[0] - y - 1), 1, 1, facecolor=COLOR_MAP[grid[y, x]], edgecolor='black')
            ax.add_patch(rect)

    ax.set_xlim(0, grid.shape[1])
    ax.set_ylim(0, grid.shape[0])
    ax.set_xticks([])
    ax.set_yticks([])
    plt.show()

In [None]:
# Train and Visualize
if __name__ == "__main__":
    trained_agent, trained_env = train_dungeon_generator(20, 20, num_episodes=1000)
    generated_dungeon = generate_dungeon_with_model(trained_agent, 20, 20)
    visualize_dungeon(generated_dungeon)

0.999995
1e-06
Episode 0, Epsilon: 0.999
Episode 100, Epsilon: 0.904


In [1]:
generated_dungeon = generate_dungeon_with_model(trained_agent, 20, 20)
visualize_dungeon(generated_dungeon)

NameError: name 'generate_dungeon_with_model' is not defined

In [68]:
import torch

def save_model(agent, filename="dungeon_rl_model.pth"):
    """Save trained DQN model weights."""
    torch.save(agent.qnetwork.state_dict(), filename)
    print(f"Model saved as {filename}")

def load_model(agent, filename="dungeon_rl_model.pth"):
    """Load trained DQN model weights."""
    agent.qnetwork.load_state_dict(torch.load(filename))
    agent.qnetwork.eval()  # Set to evaluation mode
    print(f"Model loaded from {filename}")


In [None]:
save_model(trained_agent)

In [None]:
# Load model before generating dungeons
agent = DungeonAgent(state_size=10*10, action_size=10*10*4)  # Ensure same architecture
load_model(agent)

# Generate dungeon using the loaded model
dungeon = generate_dungeon_with_model(agent, 10, 10)
visualize_dungeon(dungeon)