In [1]:
pip install imageio imageio-ffmpeg

Collecting imageio-ffmpeg
  Downloading imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl.metadata (1.5 kB)
Downloading imageio_ffmpeg-0.6.0-py3-none-win_amd64.whl (31.2 MB)
   ---------------------------------------- 0.0/31.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/31.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/31.2 MB ? eta -:--:--
   ---------------------------------------- 0.0/31.2 MB 326.8 kB/s eta 0:01:36
   ---------------------------------------- 0.1/31.2 MB 798.9 kB/s eta 0:00:39
    --------------------------------------- 0.4/31.2 MB 2.1 MB/s eta 0:00:15
    --------------------------------------- 0.7/31.2 MB 3.1 MB/s eta 0:00:10
   - -------------------------------------- 1.4/31.2 MB 5.0 MB/s eta 0:00:06
   -- ------------------------------------- 1.9/31.2 MB 5.7 MB/s eta 0:00:06
   --- ------------------------------------ 2.5/31.2 MB 6.7 MB/s eta 0:00:05
   ---- ----------------------------------- 3.2/31.2 MB 7.5 MB/s eta 0:00:04
  

In [1]:
# JUPYTER → REAL PYGAME WINDOW (no headless) + RECORDING (MP4/GIF) + WATERMARK
import os, random
from dataclasses import dataclass
from typing import Optional, Tuple, List, Dict

# Ensure we do NOT use the dummy headless driver in this notebook
if "SDL_VIDEODRIVER" in os.environ and os.environ["SDL_VIDEODRIVER"] == "dummy":
    del os.environ["SDL_VIDEODRIVER"]

import numpy as np
from PIL import Image, ImageDraw

# ---------------- assets: ensure placeholders if missing ----------------
ASSETS_DIR = "assets"
DRONE_PATH = os.path.join(ASSETS_DIR, "drone.png")
BUILDING_PATH = os.path.join(ASSETS_DIR, "building.png")

def ensure_assets():
    os.makedirs(ASSETS_DIR, exist_ok=True)
    if not os.path.exists(DRONE_PATH):
        img = Image.new("RGBA", (256,256), (0,0,0,0))
        d = ImageDraw.Draw(img)
        d.ellipse((88,88,168,168), fill=(180,180,185,255))
        d.rectangle((126,20,130,236), fill=(160,160,165,255))
        d.rectangle((20,126,236,130), fill=(160,160,165,255))
        for cx,cy in [(128,32),(128,224),(32,128),(224,128)]:
            d.ellipse((cx-24,cy-24,cx+24,cy+24), outline=(90,90,95,255), width=6, fill=(210,210,215,255))
        img.save(DRONE_PATH)
    if not os.path.exists(BUILDING_PATH):
        img = Image.new("RGBA", (256,256), (0,0,0,0))
        d = ImageDraw.Draw(img)
        d.rectangle((70,60,186,210), fill=(70,120,200,255))
        d.rectangle((90,30,166,60), fill=(60,100,180,255))
        for r in range(4):
            for c in range(3):
                x0 = 82 + c*28; y0 = 74 + r*32
                d.rectangle((x0,y0,x0+20,y0+20), fill=(230,240,255,255))
        d.rectangle((122,170,134,210), fill=(40,70,130,255))
        img.save(BUILDING_PATH)

ensure_assets()

# ---------------- env ----------------
try:
    import gymnasium as gym
    from gymnasium import spaces
except Exception as e:
    raise ImportError("Please install gymnasium: pip install gymnasium") from e

import pygame

# ---- video recorder (mp4 if possible, else gif) ----
try:
    import imageio.v2 as imageio
except Exception:
    imageio = None  # we’ll handle gracefully below

class PygameRecorder:
    """
    Records frames written from a Pygame Surface.
    - .mp4 via ffmpeg/H.264 if available
    - falls back to .gif (uses duration instead of fps)
    Avoids TIFF entirely (Pillow TIFF writer has no 'fps').
    """
    def __init__(self, path: str, fps: int = 8):
        if imageio is None:
            raise RuntimeError(
                "imageio is not installed. Install with: pip install imageio imageio-ffmpeg"
            )
        self.fps = int(fps)
        base, ext = os.path.splitext(path)
        ext_lower = ext.lower()
        if ext_lower in (".tif", ".tiff"):
            ext_lower = ".gif"
            path = base + ext_lower
        if ext_lower not in (".mp4", ".gif"):
            ext_lower = ".mp4"
            path = base + ext_lower

        self.path = path
        self.ext = ext_lower
        self.writer = None
        self._opened_gif_fallback = False

    def _open_writer(self, frame_shape_hw3):
        # frame_shape_hw3 is (H, W, 3)
        if self.ext == ".mp4":
            try:
                self.writer = imageio.get_writer(
                    self.path, fps=self.fps, codec="libx264", quality=8
                )
                return
            except Exception:
                # fallback to GIF
                self.ext = ".gif"
                self.path = os.path.splitext(self.path)[0] + ".gif"

        if self.ext == ".gif":
            # GIF uses duration per frame (seconds), not fps
            self.writer = imageio.get_writer(
                self.path,
                format="GIF",
                mode="I",
                duration=1.0 / max(1, self.fps),
                loop=0,
            )
            self._opened_gif_fallback = True
            return

        # last resort: force GIF
        self.ext = ".gif"
        self.path = os.path.splitext(self.path)[0] + ".gif"
        self.writer = imageio.get_writer(
            self.path,
            format="GIF",
            mode="I",
            duration=1.0 / max(1, self.fps),
            loop=0,
        )
        self._opened_gif_fallback = True

    def append(self, surface: pygame.Surface):
        # Pygame surface -> (H,W,3) uint8
        arr = pygame.surfarray.array3d(surface)
        frame = np.transpose(arr, (1, 0, 2))
        if self.writer is None:
            self._open_writer(frame.shape)
        self.writer.append_data(frame)

    def close(self):
        if self.writer is not None:
            self.writer.close()
            if self._opened_gif_fallback or self.ext == ".gif":
                print(f"[Recorder] Saved GIF to {self.path}")
            else:
                print(f"[Recorder] Saved MP4 to {self.path}")

@dataclass
class GridConfig:
    width: int = 7
    height: int = 7
    num_agents: int = 5
    max_steps: int = 160
    step_penalty: float = 0.01
    goal_reward: float = 1.0
    collision_penalty: float = 0.05
    seed: Optional[int] = 42

class MultiAgentGridEnv(gym.Env):
    metadata = {"render_modes": ["human","rgb_array"], "render_fps": 8}

    def __init__(self, cfg: GridConfig = GridConfig(), render_mode: Optional[str] = "human",
                 watermark_text: Optional[str] = "sreejeetm1729", recorder: Optional[PygameRecorder] = None):
        super().__init__()
        self.cfg = cfg
        self.render_mode = render_mode
        self.rng = np.random.RandomState(cfg.seed)

        self.action_space = spaces.MultiDiscrete([5]*cfg.num_agents)
        obs_dim = cfg.num_agents * 2 * 2
        self.observation_space = spaces.Box(0.0, 1.0, shape=(obs_dim,), dtype=np.float32)

        self._agent_pos: List[Tuple[int,int]] = []
        self._goals: List[Tuple[int,int]] = []
        self._t = 0
        self._last_collisions = 0

        # pygame stuff
        self._viewer = None
        self._surface = None
        self._clock = None
        self._font = None
        self._drone_sprites = None
        self._building_sprite = None

        # watermark & recorder
        self._watermark_text = watermark_text
        self._recorder = recorder  # type: Optional[PygameRecorder]

    def set_recorder(self, recorder: Optional[PygameRecorder]):
        self._recorder = recorder

    def set_watermark(self, text: Optional[str]):
        self._watermark_text = text

    def seed(self, seed: Optional[int] = None):
        self.cfg.seed = seed
        self.rng = np.random.RandomState(seed)

    def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
        if seed is not None:
            self.seed(seed)
        self._t = 0
        self._last_collisions = 0
        self._agent_pos = []
        occ = set()
        while len(self._agent_pos) < self.cfg.num_agents:
            x = int(self.rng.randint(0, self.cfg.width))
            y = int(self.rng.randint(0, self.cfg.height))
            if (x,y) not in occ:
                self._agent_pos.append((x,y)); occ.add((x,y))
        self._goals = []
        while len(self._goals) < self.cfg.num_agents:
            x = int(self.rng.randint(0, self.cfg.width))
            y = int(self.rng.randint(0, self.cfg.height))
            if (x,y) not in occ:
                self._goals.append((x,y)); occ.add((x,y))
        obs = self._get_obs()
        info = {}
        if self.render_mode == "human":
            self._render_frame()
        return obs, info

    def step(self, action):
        self._t += 1
        actions = np.array(action, dtype=int)
        assert actions.shape == (self.cfg.num_agents,)
        # propose
        proposed = []
        for i,a in enumerate(actions):
            x,y = self._agent_pos[i]
            if   a==1: y = max(0, y-1)
            elif a==2: y = min(self.cfg.height-1, y+1)
            elif a==3: x = max(0, x-1)
            elif a==4: x = min(self.cfg.width-1, x+1)
            proposed.append((x,y))
        # collisions
        counts = {}
        for p in proposed: counts[p] = counts.get(p,0)+1
        new_pos, col_mask = [], [False]*self.cfg.num_agents
        for i,p in enumerate(proposed):
            if counts[p] > 1:
                col_mask[i] = True
                new_pos.append(self._agent_pos[i])
            else:
                new_pos.append(p)
        self._agent_pos = new_pos
        self._last_collisions = int(sum(col_mask))

        # rewards
        rewards = np.zeros(self.cfg.num_agents, dtype=np.float32)
        for i in range(self.cfg.num_agents):
            rewards[i] -= self.cfg.step_penalty
            if col_mask[i]:
                rewards[i] -= self.cfg.collision_penalty

        # goals
        occ = set(self._agent_pos)
        for i in range(self.cfg.num_agents):
            if self._agent_pos[i] == self._goals[i]:
                rewards[i] += self.cfg.goal_reward
                taken = occ.union(set(self._goals))
                while True:
                    gx = int(self.rng.randint(0, self.cfg.width))
                    gy = int(self.rng.randint(0, self.cfg.height))
                    if (gx,gy) not in taken:
                        self._goals[i] = (gx,gy); break

        done = self._t >= self.cfg.max_steps
        obs = self._get_obs()
        info = {"collisions": self._last_collisions, "per_agent_rewards": rewards.copy()}
        if self.render_mode == "human":
            self._render_frame()
        return obs, rewards.sum().astype(np.float32), done, False, info

    def _get_obs(self):
        W,H = max(1,self.cfg.width-1), max(1,self.cfg.height-1)
        out = []
        for (ax,ay) in self._agent_pos: out += [ax/W, ay/H]
        for (gx,gy) in self._goals:     out += [gx/W, gy/H]
        return np.array(out, dtype=np.float32)

    def close(self):
        if self._viewer is not None:
            pygame.display.quit()
            pygame.quit()
            self._viewer = None
            self._surface = None
            self._clock = None
            self._font = None
            self._drone_sprites = None
            self._building_sprite = None

    # ----- rendering helpers -----
    def _try_load(self, path):
        try:
            return pygame.image.load(path).convert_alpha()
        except Exception:
            return None

    def _ensure_window(self, wpx, hpx):
        if self._viewer is None:
            pygame.init()
            self._viewer = pygame.display.set_mode((wpx, hpx))
            pygame.display.set_caption("MultiAgentGridEnv")
            self._surface = pygame.Surface((wpx, hpx), flags=pygame.SRCALPHA).convert_alpha()
            self._clock = pygame.time.Clock()
            self._font = pygame.font.SysFont("consolas", 18)

    def _ensure_sprites(self, cell):
        if self._drone_sprites is not None and self._building_sprite is not None:
            return
        pad = 10; size = max(8, cell - pad)
        drone = self._try_load(DRONE_PATH)
        building = self._try_load(BUILDING_PATH)
        if drone is not None:
            drone = pygame.transform.smoothscale(drone, (size, size))
        if building is not None:
            building = pygame.transform.smoothscale(building, (size, size))
        self._drone_sprites = [drone for _ in range(self.cfg.num_agents)]
        self._building_sprite = building

    def _draw_watermark(self, surf: pygame.Surface, wpx: int, hpx: int):
        if not self._watermark_text:
            return
        text = self._font.render(self._watermark_text, True, (240, 240, 245))
        tw, th = text.get_size()
        pad = 8
        # semi-transparent rounded background on an alpha surface
        bg = pygame.Surface((tw + 2*pad, th + 2*pad), pygame.SRCALPHA)
        pygame.draw.rect(bg, (0,0,0,120), bg.get_rect(), border_radius=10)
        bg_pos = (wpx - bg.get_width() - 10, 10)
        surf.blit(bg, bg_pos)
        surf.blit(text, (bg_pos[0] + pad, bg_pos[1] + pad))

    def _render_frame(self, return_array: bool = False):
        cell, margin = 64, 2
        wpx = self.cfg.width*cell + (self.cfg.width+1)*margin
        hpx = self.cfg.height*cell + (self.cfg.height+1)*margin + 40
        self._ensure_window(wpx, hpx)
        self._ensure_sprites(cell)

        # keep window responsive
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.close()
                return

        surf = self._surface
        surf.fill((25,25,30,255))

        # grid
        for y in range(self.cfg.height):
            for x in range(self.cfg.width):
                rx = x*cell + (x+1)*margin
                ry = y*cell + (y+1)*margin
                pygame.draw.rect(surf, (40,45,55), (rx,ry,cell,cell), border_radius=8)

        agent_colors = [(66,135,245), (80,200,120), (255,99,132), (255,165,0), (160,95,245)]
        goal_colors  = [(120,170,255), (120,230,170), (255,160,180), (255,205,120), (200,160,255)]

        # goals
        for i,(gx,gy) in enumerate(self._goals):
            rx = gx*cell + (gx+1)*margin
            ry = gy*cell + (gy+1)*margin
            cx,cy = rx+cell//2, ry+cell//2
            if self._building_sprite is not None:
                rect = self._building_sprite.get_rect(center=(cx,cy))
                surf.blit(self._building_sprite, rect)
            else:
                pts = [(cx, ry+8), (rx+cell-8, cy), (cx, ry+cell-8), (rx+8, cy)]
                pygame.draw.polygon(surf, goal_colors[i%len(goal_colors)], pts)

        # agents
        for i,(ax,ay) in enumerate(self._agent_pos):
            rx = ax*cell + (ax+1)*margin
            ry = ay*cell + (ay+1)*margin
            cx,cy = rx+cell//2, ry+cell//2
            drone = self._drone_sprites[i] if self._drone_sprites else None
            if drone is not None:
                rect = drone.get_rect(center=(cx,cy))
                surf.blit(drone, rect)
            else:
                pygame.draw.circle(surf, agent_colors[i%len(agent_colors)], (cx,cy), cell//3)
                idx = self._font.render(str(i+1), True, (15,15,18))
                surf.blit(idx, idx.get_rect(center=(cx,cy)))

        # HUD
        hud = self._font.render(f"t={self._t}  collisions={self._last_collisions}", True, (230,230,235))
        surf.blit(hud, (10, hpx-32))

        # WATERMARK (top-right)
        self._draw_watermark(surf, wpx, hpx)

        # blit + present
        if self._viewer is not None:
            self._viewer.blit(surf, (0,0))
            pygame.display.flip()

        # record this frame if enabled
        if self._recorder is not None:
            self._recorder.append(surf)

        self._clock.tick(self.metadata["render_fps"])

# ---------------- independent Q-learners ----------------
class IndependentQLearners:
    def __init__(self, env: MultiAgentGridEnv, alpha=0.5, gamma=0.97, eps_start=1.0, eps_end=0.05, eps_decay=0.98):
        self.env = env
        n = env.cfg.num_agents
        self.qtables: List[Dict[Tuple[int,int,int,int,int], float]] = [dict() for _ in range(n)]
        self.alpha, self.gamma = alpha, gamma
        self.eps, self.eps_end, self.eps_decay = eps_start, eps_end, eps_decay

    def _state_i(self, i):
        (ax,ay) = self.env._agent_pos[i]
        (gx,gy) = self.env._goals[i]
        return (ax,ay,gx,gy)

    def _q(self, i, s, a): return self.qtables[i].get((*s,a), 0.0)

    def _best_a(self, i, s):
        best_a, best_q = 0, -1e9
        for a in range(5):
            q = self._q(i,s,a)
            if q > best_q: best_q, best_a = q, a
        return best_a, (best_q if best_q > -1e9 else 0.0)

    def select_actions(self):
        acts = []
        for i in range(self.env.cfg.num_agents):
            s = self._state_i(i)
            if np.random.rand() < self.eps:
                a = np.random.randint(0,5)
            else:
                a,_ = self._best_a(i,s)
            acts.append(a)
        return np.array(acts, dtype=int)

    def update(self, prev_states, actions, rewards, next_states):
        for i in range(self.env.cfg.num_agents):
            s = prev_states[i]; a = int(actions[i]); r = float(rewards[i])
            _, max_q_next = self._best_a(i, next_states[i])
            old = self._q(i,s,a)
            self.qtables[i][(*s,a)] = old + self.alpha*(r + self.gamma*max_q_next - old)
        self.eps = max(self.eps_end, self.eps*self.eps_decay)

# ---------------- runner that opens pygame window & records ----------------
def run_pygame_demo(train_episodes=8, test_episodes=2, record_to: Optional[str] = "run_recording.mp4"):
    cfg = GridConfig(width=7, height=7, num_agents=5, max_steps=160, seed=42)

    # set up optional recorder
    recorder = None
    if record_to is not None:
        try:
            recorder = PygameRecorder(record_to, fps=MultiAgentGridEnv.metadata["render_fps"])
        except Exception as e:
            print("[Recorder] Could not initialize recorder:", e)
            recorder = None

    env = MultiAgentGridEnv(cfg, render_mode="human", watermark_text="sreejeetm1729", recorder=recorder)
    agent = IndependentQLearners(env)

    try:
        print("Training...")
        for ep in range(train_episodes):
            obs,_ = env.reset()
            ep_r = 0.0
            for _ in range(cfg.max_steps):
                # pump events during training too (keeps window alive if open)
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        env.close()
                        raise SystemExit
                prev_states = [agent._state_i(i) for i in range(cfg.num_agents)]
                acts = agent.select_actions()
                obs, total_r, done, truncated, info = env.step(acts)
                rewards = info.get("per_agent_rewards", np.full(cfg.num_agents, total_r/cfg.num_agents, dtype=np.float32))
                next_states = [agent._state_i(i) for i in range(cfg.num_agents)]
                agent.update(prev_states, acts, rewards, next_states)
                ep_r += float(rewards.sum())
                if done: break
            print(f"Train ep {ep+1}/{train_episodes}  return={ep_r:.2f}  eps={agent.eps:.2f}")

        print("\nTesting (greedy)... Close window to stop.")
        agent.eps = 0.0
        for ep in range(test_episodes):
            obs,_ = env.reset()
            ep_r = 0.0
            for _ in range(cfg.max_steps):
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        env.close()
                        raise SystemExit
                acts = agent.select_actions()
                obs, total_r, done, truncated, info = env.step(acts)
                ep_r += float(total_r)
                if done: break
            print(f"Test ep {ep+1}/{test_episodes}  return={ep_r:.2f}")
    finally:
        if env is not None and hasattr(env, "_recorder") and env._recorder is not None:
            try:
                env._recorder.close()
            except Exception as e:
                print("[Recorder] close() error:", e)
        env.close()

# ---- run it ----
# For MP4 (recommended): ensure `pip install imageio imageio-ffmpeg`
# For GIF: set record_to="run_recording.gif"
run_pygame_demo(train_episodes=8, test_episodes=2, record_to="run_recording.mp4")


Training...




Train ep 1/8  return=-4.00  eps=0.05
Train ep 2/8  return=-3.80  eps=0.05
Train ep 3/8  return=-8.40  eps=0.05
Train ep 4/8  return=-6.40  eps=0.05
Train ep 5/8  return=-4.70  eps=0.05
Train ep 6/8  return=-0.65  eps=0.05
Train ep 7/8  return=-1.85  eps=0.05
Train ep 8/8  return=-8.10  eps=0.05

Testing (greedy)... Close window to stop.
Test ep 1/2  return=-8.00
Test ep 2/2  return=-8.00
[Recorder] Saved MP4 to run_recording.mp4
