In [4]:
import os, json, cv2, torch, numpy as np
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader

# ================================================
# 1. Dataset Loader
# ================================================
class RobotDataset(Dataset):
    def __init__(self, root="dataset", img_size=128):
        self.samples = []
        self.img_size = img_size

        for session in os.listdir(root):
            spath = os.path.join(root, session)
            frames = os.path.join(spath, "frames")
            moves = os.path.join(spath, "moves.json")
            if not os.path.exists(moves):
                continue

            with open(moves) as f:
                entries = json.load(f)
            for t, a in entries:
                frame_file = os.path.join(frames, f"{t:.3f}.jpg")
                if os.path.exists(frame_file):
                    self.samples.append((frame_file, a))

        print(f"📦 Loaded {len(self.samples)} frame-action pairs from '{root}'")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, a = self.samples[idx]
        img = cv2.imread(path)
        img = cv2.resize(img, (self.img_size, self.img_size))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = torch.tensor(img, dtype=torch.float32).permute(2,0,1) / 255.0

        # Normalize actions
        y = torch.tensor([
            a["speed"] / 40.0,        # 0–1
            a["lateral"] / 200.0,     # -1–1
            a["pitch"] / 100.0,       # -1–1
            a["roll"] / 40.0,         # -1–1
            a["height"] / 100.0,      # -1–1
            float(a["backwards"])     # 0–1
        ], dtype=torch.float32)
        return img, y


# ================================================
# 2. Model
# ================================================
class VisionToAction(nn.Module):
    def __init__(self):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 16, 3, 2, 1), nn.BatchNorm2d(16), nn.ReLU(),
            nn.Conv2d(16, 32, 3, 2, 1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.Conv2d(32, 64, 3, 2, 1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.Flatten(),
        )
        self.fc = nn.Sequential(
            nn.Linear(64 * 16 * 16, 128), nn.ReLU(),
            nn.Linear(128, 6)
        )

    def forward(self, x):
        return self.fc(self.cnn(x))


# ================================================
# 3. Train
# ================================================
def train():
    dataset = RobotDataset("dataset", img_size=128)
    loader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=0)

    model = VisionToAction()
    opt = optim.Adam(model.parameters(), lr=5e-4)
    loss_fn = nn.SmoothL1Loss()

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    print(f"🚀 Training on {device}")

    model_path = "robot_model.pth"

    for epoch in range(10):
        model.train()
        total_loss, total_mae = 0, 0

        for imgs, acts in loader:
            imgs, acts = imgs.to(device), acts.to(device)
            preds = model(imgs)
            loss = loss_fn(preds, acts)
            mae = (preds - acts).abs().mean().item()

            opt.zero_grad()
            loss.backward()
            opt.step()

            total_loss += loss.item()
            total_mae += mae

        avg_loss = total_loss / len(loader)
        avg_mae = total_mae / len(loader)
        print(f"Epoch {epoch+1:02d}: Loss={avg_loss:.4f}  MAE={avg_mae:.3f}")

        # 🧠 Save single model (overwrite each epoch)
        torch.save(model.state_dict(), model_path)
        print(f"💾 Model saved → {model_path}")

    print("✅ Training complete. Final model saved.")


if __name__ == "__main__":
    train()


📦 Loaded 735 frame-action pairs from 'dataset'
🚀 Training on cpu
Epoch 01: Loss=0.0675  MAE=0.202
💾 Model saved → robot_model.pth
Epoch 02: Loss=0.0082  MAE=0.070
💾 Model saved → robot_model.pth
Epoch 03: Loss=0.0065  MAE=0.066
💾 Model saved → robot_model.pth
Epoch 04: Loss=0.0049  MAE=0.057
💾 Model saved → robot_model.pth
Epoch 05: Loss=0.0039  MAE=0.052
💾 Model saved → robot_model.pth
Epoch 06: Loss=0.0031  MAE=0.048
💾 Model saved → robot_model.pth
Epoch 07: Loss=0.0025  MAE=0.045
💾 Model saved → robot_model.pth
Epoch 08: Loss=0.0023  MAE=0.046
💾 Model saved → robot_model.pth
Epoch 09: Loss=0.0018  MAE=0.041
💾 Model saved → robot_model.pth
Epoch 10: Loss=0.0014  MAE=0.034
💾 Model saved → robot_model.pth
✅ Training complete. Final model saved.
