# Phase A

In [1]:
!pip install pygame opencv-python tqdm --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.0/14.0 MB[0m [31m106.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.0/63.0 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
!git clone https://github.com/sourabhv/FlapPyBird.git
%cd FlapPyBird

Cloning into 'FlapPyBird'...
remote: Enumerating objects: 484, done.[K
remote: Counting objects: 100% (72/72), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 484 (delta 48), reused 44 (delta 44), pack-reused 412 (from 2)[K
Receiving objects: 100% (484/484), 907.50 KiB | 7.32 MiB/s, done.
Resolving deltas: 100% (240/240), done.
/content/FlapPyBird


In [3]:
# ------- collector.py  (overwrite the old file) --------------------
import os, csv, random, cv2, numpy as np
import pygame
from pygame.locals import *

# ---------------- CONFIG -----------------
EPISODES   = 300                # adjust for more/less data
SAVE_DIR   = "data"
STACK_SIZE = 4
RES        = (84, 84)           # H × W of saved frames
FPS        = 30
# -----------------------------------------

os.environ["SDL_VIDEODRIVER"] = "dummy"        # headless SDL
pygame.init()
SCREEN      = pygame.display.set_mode((288, 512))
FPSCLOCK    = pygame.time.Clock()
IMAGES = {
    "bg"  : pygame.image.load("assets/sprites/background-day.png").convert(),
    "base": pygame.image.load("assets/sprites/base.png").convert_alpha(),
    "bird": pygame.image.load("assets/sprites/yellowbird-midflap.png").convert_alpha(),
    "pipe": [pygame.transform.flip(
                 pygame.image.load("assets/sprites/pipe-green.png").convert_alpha(),
                 False, True),
             pygame.image.load("assets/sprites/pipe-green.png").convert_alpha()]
}
BASEY = int(512 * 0.79)          # ground y-coord

# ---------- helpers -------------------------------------------------
def new_pipe_pair():
    """Return TWO dicts (upper & lower) representing one pipe gap."""
    gap_y = random.randint(100, 300)
    x     = 288 + 10
    return [{'x': x, 'y': gap_y - IMAGES["pipe"][0].get_height()},
            {'x': x, 'y': gap_y + 100}]

def game_step(state, action):
    """Advance game one frame given action (0/1). Return (state,reward,done)."""
    bird_y, vel_y, pipes = state
    # flap
    if action and bird_y > -2 * IMAGES["bird"].get_height():
        vel_y = -9
    # gravity
    vel_y = min(vel_y + 1, 10)
    bird_y += vel_y
    # move pipes
    for p in pipes:
        p['x'] -= 4
    # add new pair when rightmost pair is left of threshold
    if pipes[-1]['x'] < 288 - 150:
        pipes.extend(new_pipe_pair())
    # remove leftmost pair once off-screen
    if pipes[0]['x'] < -IMAGES["pipe"][0].get_width():
        pipes = pipes[2:]

    # collision & reward
    reward, done = 0., False
    if bird_y + IMAGES["bird"].get_height() >= BASEY:
        done, reward = True, -1
    else:
        for up, lo in zip(pipes[::2], pipes[1::2]):        # iterate over pairs
            if up['x'] < 60 < up['x'] + IMAGES["pipe"][0].get_width():
                if not (up['y'] + IMAGES["pipe"][0].get_height() < bird_y <
                        lo['y'] - IMAGES["bird"].get_height()):
                    done, reward = True, -1
                    break
    if not done and pipes[0]['x'] + 4 == 60:   # passed a pipe
        reward = 1
    return (bird_y, vel_y, pipes), reward, done

def render(state):
    """Return numpy RGB frame."""
    bird_y, _, pipes = state
    SCREEN.blit(IMAGES["bg"], (0, 0))
    for up, lo in zip(pipes[::2], pipes[1::2]):
        SCREEN.blit(IMAGES["pipe"][0], (up['x'], up['y']))
        SCREEN.blit(IMAGES["pipe"][1], (lo['x'], lo['y']))
    SCREEN.blit(IMAGES["base"], (0, BASEY))
    SCREEN.blit(IMAGES["bird"], (60, bird_y))
    return pygame.surfarray.array3d(SCREEN).swapaxes(0, 1)  # H,W,C
# -------------------------------------------------------------------

os.makedirs(SAVE_DIR, exist_ok=True)
csv_path = os.path.join(SAVE_DIR, "labels.csv")
with open(csv_path, "w", newline="") as f_csv:
    writer = csv.writer(f_csv)
    writer.writerow(["episode", "step", "action", "reward", "done"])

frame_stack = [np.zeros(RES, np.uint8)] * STACK_SIZE
counter = 0

for ep in range(EPISODES):
    state = (int((512 - IMAGES["bird"].get_height()) / 2), -9, list(new_pipe_pair()))
    done, step = False, 0
    while not done:
        action = random.randint(0, 1)  # 0 = no flap, 1 = flap
        state, reward, done = game_step(state, action)
        frame_rgb = render(state)
        frame_gray = cv2.cvtColor(cv2.resize(frame_rgb, RES), cv2.COLOR_BGR2GRAY)

        frame_stack.pop(0)
        frame_stack.append(frame_gray)
        stacked = np.stack(frame_stack, axis=0)  # shape: (4,84,84)

        np.savez_compressed(f"{SAVE_DIR}/f{counter:07d}", obs=stacked)
        writer = csv.writer(open(csv_path, "a", newline=""))
        writer.writerow([ep, step, action, reward, int(done)])

        counter += 1
        step += 1
        FPSCLOCK.tick(FPS)

print(f"✅  Saved {counter} stacked frames + CSV in '{SAVE_DIR}/'")
# --------------------------------------------------------------------

pygame 2.6.1 (SDL 2.28.4, Python 3.11.12)
Hello from the pygame community. https://www.pygame.org/contribute.html
✅  Saved 18000 stacked frames + CSV in 'data/'


In [4]:
!python collector.py
!du -sh data
!head data/labels.csv

python3: can't open file '/content/FlapPyBird/collector.py': [Errno 2] No such file or directory
72M	data
episode,step,action,reward,done
0,0,1,0.0,0
0,1,0,0.0,0
0,2,0,0.0,0
0,3,1,0.0,0
0,4,0,0.0,0
0,6,1,0.0,0
0,7,0,0.0,0
0,8,0,0.0,0
0,9,0,0.0,0


# Phase B

In [6]:
import glob, numpy as np, pandas as pd, torch
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T

DATA_DIR   = "data"
CSV_PATH   = f"{DATA_DIR}/labels.csv"
STACK_SIZE = 4            # must match collector
RES        = (84, 84)

class FlappyFrameStack(Dataset):
    def __init__(self, csv_path, root_dir, transform=None):
        self.meta = pd.read_csv(csv_path)
        self.root = root_dir
        self.transform = transform or (lambda x: x)

    def __len__(self):
        return len(self.meta)

    def __getitem__(self, idx):
        row  = self.meta.iloc[idx]
        path = f"{self.root}/f{idx:07d}.npz"      # relies on sequential naming
        stack = np.load(path)["obs"] / 255.0      # to float32 [0,1]
        stack = torch.from_numpy(stack).float()   # shape (4,84,84)
        action = torch.tensor(row.action, dtype=torch.long)
        return self.transform(stack), action

# transforms (optional – add random crop, noise, etc.)
dataset = FlappyFrameStack(CSV_PATH, DATA_DIR)
train_loader = DataLoader(dataset, batch_size=128, shuffle=True, num_workers=2)

print("Batches:", len(train_loader), "   Sample shape:", next(iter(train_loader))[0].shape)

Batches: 141    Sample shape: torch.Size([128, 4, 84, 84])


In [10]:
# -------- Phase B : Supervised CNN Warm-Start -----------------------
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader, random_split
from pathlib import Path
import pandas as pd, numpy as np

# 1)  Dataset ----------------------------------------------------------------
DATA_DIR = Path("data")          # or your Drive path
CSV_PATH = DATA_DIR / "labels.csv"

class FlappyFrames(torch.utils.data.Dataset):
    def __init__(self, csv_path, data_dir):
        self.meta = pd.read_csv(csv_path)
        self.data_dir = data_dir
    def __len__(self):
        return len(self.meta)
    def __getitem__(self, idx):
        row  = self.meta.iloc[idx]
        arr  = np.load(self.data_dir / f"f{idx:07d}.npz")["obs"]   # uint8 (4,84,84)
        arr  = torch.tensor(arr, dtype=torch.float32) / 255.0      # → float32 [0,1]
        label= torch.tensor(row.action, dtype=torch.long)
        return arr, label

full_ds   = FlappyFrames(CSV_PATH, DATA_DIR)
train_len = int(0.9 * len(full_ds))          # guarantee exact sum
val_len   = len(full_ds) - train_len
train_ds, val_ds = random_split(full_ds, [train_len, val_len])

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=2)
val_loader   = DataLoader(val_ds,   batch_size=128, shuffle=False, num_workers=2)

# 2)  Tiny CNN (Atari-style) --------------------------------------------------
class TinyCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(4, 32, 8, stride=4), nn.ReLU(),
            nn.Conv2d(32, 64, 4, stride=2), nn.ReLU(),
            nn.Conv2d(64, 64, 3, stride=1), nn.ReLU(),
        )
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64*7*7, 512), nn.ReLU(),   # 3136 = 64×7×7
            nn.Linear(512, 2)                    # 2 actions
        )
    def forward(self, x):
        return self.head(self.features(x))

device = "cuda" if torch.cuda.is_available() else "cpu"
model  = TinyCNN().to(device)
opt    = optim.Adam(model.parameters(), lr=1e-4)
loss_fn= nn.CrossEntropyLoss()

# 3)  Train 5 epochs ----------------------------------------------------------
for epoch in range(5):
    model.train(); total_loss = 0
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        loss = loss_fn(model(xb), yb); loss.backward(); opt.step()
        total_loss += loss.item() * xb.size(0)
    train_loss = total_loss / train_len

    model.eval(); correct = total = 0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb).argmax(1)
            correct += (preds == yb).sum().item()
            total   += yb.size(0)
    acc = 100 * correct / total
    print(f"Epoch {epoch+1} | loss {train_loss:.4f} | val acc {acc:.2f}%")

# 4)  Save weights for RL -----------------------------------------------------
torch.save(model.state_dict(), "cnn_supervised.pth")
print("✅  Saved cnn_supervised.pth — ready for RL warm-start")

Epoch 1 | loss 0.6932 | val acc 50.50%
Epoch 2 | loss 0.6933 | val acc 50.50%
Epoch 3 | loss 0.6932 | val acc 49.72%
Epoch 4 | loss 0.6932 | val acc 49.50%
Epoch 5 | loss 0.6931 | val acc 49.67%
✅  Saved cnn_supervised.pth — ready for RL warm-start
