In [None]:
# =============================================================
# Full Working DRL (PPO) Gait Recognition Script for Colab
# =============================================================

# 1. Install dependencies
!pip install stable-baselines3

# 2. Import Libraries
import os
import gymnasium as gym
from gymnasium import spaces

import numpy as np
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env

# 3. Set Dataset Path
DATASET_PATH = '/content/drive/MyDrive/Dataset/062'  # <<== Change this to your path

# 4. Load Silhouette Images
def load_images_universal(folder, target_size=(64, 64)):
    images = []
    labels = []
    label_to_idx = {}
    current_label_idx = 0

    def is_image_file(filename):
        return filename.lower().endswith(('.png', '.jpg', '.jpeg'))

    for root, dirs, files in os.walk(folder):
        image_files = [f for f in files if is_image_file(f)]
        if image_files:  # If there are image files in this folder
            class_folder = os.path.basename(root)
            if class_folder not in label_to_idx:
                label_to_idx[class_folder] = current_label_idx
                current_label_idx += 1
            for img_file in image_files:
                img_path = os.path.join(root, img_file)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is None:
                    continue
                img = cv2.resize(img, target_size)
                images.append(img)
                labels.append(label_to_idx[class_folder])

    return np.array(images), np.array(labels)


silhouette_images, gait_labels = load_images_universal(DATASET_PATH)
print(silhouette_images.shape, gait_labels.shape)
silhouette_images = silhouette_images / 255.0
silhouette_images = silhouette_images[..., np.newaxis]
print(f"✅ Loaded {len(silhouette_images)} images!")

# 5. Feature Extractor CNN
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1, 1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, 1, 1), nn.ReLU(), nn.MaxPool2d(2),
            nn.Flatten()
        )
        self.fc = nn.Linear(64 * 16 * 16, 128)

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

# 6. Dataset and Feature Extraction
class GaitDataset(Dataset):
    def __init__(self, images, labels):
        self.images = torch.tensor(images, dtype=torch.float32).permute(0, 3, 1, 2)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

train_dataset = GaitDataset(silhouette_images, gait_labels)
loader = DataLoader(train_dataset, batch_size=64, shuffle=False)

feature_extractor = FeatureExtractor()
feature_extractor.eval()

feature_list = []
label_list = []

with torch.no_grad():
    for images, labels in loader:
        feats = feature_extractor(images)
        feature_list.append(feats)
        label_list.append(labels)

features = torch.cat(feature_list).numpy()
labels = torch.cat(label_list).numpy()
print(f"✅ Extracted {features.shape[0]} features of dimension {features.shape[1]}")

# 7. Define Gait Environment
class GaitEnv(gym.Env):
    def __init__(self, features, labels):
        super(GaitEnv, self).__init__()
        self.features = features
        self.labels = labels
        self.num_samples = len(features)
        self.current_idx = 0

        self.action_space = spaces.Discrete(len(np.unique(labels)))
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(features.shape[1],), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_idx = np.random.randint(0, self.num_samples)
        return self.features[self.current_idx], {}

    def step(self, action):
        correct_label = self.labels[self.current_idx]
        reward = 1 if action == correct_label else 0
        done = True
        info = {}
        next_obs = self.features[np.random.randint(0, self.num_samples)]
        return next_obs, reward, done, False, info


# 8. Train PPO Agent
env = GaitEnv(features, labels)
check_env(env)

model = PPO('MlpPolicy', env, verbose=1, batch_size=64, learning_rate=1e-4)
model.learn(total_timesteps=500000)

# 9. Evaluate Agent
correct = 0
total = 200

for _ in range(total):
    obs, _ = env.reset()  # <- unpack properly
    action, _ = model.predict(obs, deterministic=True)
    if action == env.labels[env.current_idx]:
        correct += 1

print(f"🎯 Final DRL PPO Test Accuracy: {correct/total:.4f}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1           |
|    ep_rew_mean          | 0.09        |
| time/                   |             |
|    fps                  | 639         |
|    iterations           | 8           |
|    time_elapsed         | 25          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.020468898 |
|    clip_fraction        | 0.191       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.12       |
|    explained_variance   | 0.0119      |
|    learning_rate        | 0.0001      |
|    loss                 | 0.0231      |
|    n_updates            | 70          |
|    policy_gradient_loss | -0.0283     |
|    value_loss           | 0.109       |
-----------------------------------------
-----------