# **[WIP] BipedalWalker - Gaussian PPO (Experimental)**

The implementation of PPO with gaussian critic for BipedalWalker v3.



## Initialization

In [None]:
%matplotlib inline
from IPython import display
import matplotlib.pyplot as plt

In [None]:
from sim.GymTrainer import GymTrainer
from framework.GaussianPPO import ProximalPolicyOptimizationAgent

import torch
trainDevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Create the simulation environment
sim = GymTrainer(
    'BipedalWalker-v3', 
    evalDevice="cpu", 
    trainDevice=trainDevice, 
    render_mode='rgb_array', 
    envNum=1,
    hardcore=False,
    batchSize=8000,
    maxEpisode=6000,
    maxStep=100000,
    seed=555)

In [None]:
# Create the agent
agent = ProximalPolicyOptimizationAgent(
    actionNum=sim.actionSize(), 
    stateNum=sim.stateSize(), 
    gamma=0.99, 
    lamda=0.95,
    eps=0.2,
    rwShaper=lambda rwds: torch.clamp(rwds, min = -1.0),
    entropyBeta=0.0002,
    memorySize=8000,
    batchSize=2000,
    trainEpoch=70,
    policyLR=1e-4, 
    criticLR=1e-4,
    layerActor=[64] ,
    layerCritic=[64])

In [None]:
name, writer = sim.makeSummaryWriter(agent)

## Training

In [None]:
sim.train(agent, writer)

# Testing

In [None]:
sim.test(
    agent = agent, 
    episode = 5, 
    maxStep = 10000, 
    renderStep = 6,
    writer = writer)

# 2-Stage Training

In [None]:
# Create the simulation environment
sim = GymTrainer(
    "BipedalWalkerHardcore-v3", 
    evalDevice="cpu", 
    trainDevice=trainDevice, 
    render_mode='rgb_array', 
    envNum=1,
    hardcore=True,
    batchSize=8000,
    maxEpisode=50000,
    maxStep=100000,
    seed=123)

In [None]:
name, writer = sim.makeSummaryWriter(agent)

In [None]:
sim.train(agent, writer)

In [None]:
agent.load_state_dict(torch.load("/home/rtu/gym_plaground/RLPlayground/runs/BipedalWalkerHardcore-v3-hardcore-ProximalPolicyOptimizationAgent-20251203-092220/best_weight.pt"))

In [None]:
sim.test(
    agent = agent, 
    episode = 40, 
    maxStep = 10000, 
    renderStep = 5,
    writer = None)