# Basic DDPG Training with Hyperparameter Tuning

Using the existing training scripts with simple hyperparameter configuration.
No fancy stuff - just basic training to verify setup works.


In [None]:
# Setup: Add training_scripts to path
import sys
sys.path.insert(0, './training_scripts')

print("‚úì Path configured")


‚úì Environment variables set
   Continuing anyway...


In [None]:
# Initialize Environment and SAI Client
import torch
import torch.nn.functional as F
import numpy as np
from sai_rl import SAIClient
from training_scripts.ddpg import DDPG_FF
from training_scripts.training import training_loop

print("[Setup] Initializing SAI client...")
sai = SAIClient(comp_id="lower-t1-penalty-kick-goalie")
env = sai.make_env()
print(f"[Setup] ‚úì Environment created")
print(f"[Setup] Observation space: {env.observation_space.shape}")
print(f"[Setup] Action space: {env.action_space.shape}")


[Setup] Cloning repository from GitHub...


  return datetime.utcnow().replace(tzinfo=utc)


[Setup] ‚úì Repository cloned
[Setup] Working directory: /content/booster_repo
[Setup] Files available:
  - .git
  - .gitignore
  - IMITATION_LEARNING.md
  - LICENSE
  - README.md
  - booster_control
  - imitation_learning
  - mimic
  - requirements.txt
  - resources
  - training_scripts


In [None]:
# Define Preprocessor Class
class Preprocessor():
    def get_task_onehot(self, info):
        if 'task_index' in info:
            return info['task_index']
        else:
            return np.array([])

    def quat_rotate_inverse(self, q: np.ndarray, v: np.ndarray):
        q_w = q[:,[-1]]
        q_vec = q[:,:3]
        a = v * (2.0 * q_w**2 - 1.0)
        b = np.cross(q_vec, v) * (q_w * 2.0)
        c = q_vec * (np.dot(q_vec, v).reshape(-1,1) * 2.0)    
        return a - b + c 

    def modify_state(self, obs, info):
        if len(obs.shape) == 1:
            obs = np.expand_dims(obs, axis=0)

        task_onehot = self.get_task_onehot(info)
        if len(task_onehot.shape) == 1:
            task_onehot = np.expand_dims(task_onehot, axis=0)
        
        if len(info["robot_quat"].shape) == 1:
            info["robot_quat"] = np.expand_dims(info["robot_quat"], axis = 0)
            info["robot_gyro"] = np.expand_dims(info["robot_gyro"], axis = 0)
            info["robot_accelerometer"] = np.expand_dims(info["robot_accelerometer"], axis = 0)
            info["robot_velocimeter"] = np.expand_dims(info["robot_velocimeter"], axis = 0)
            info["goal_team_0_rel_robot"] = np.expand_dims(info["goal_team_0_rel_robot"], axis = 0)
            info["goal_team_1_rel_robot"] = np.expand_dims(info["goal_team_1_rel_robot"], axis = 0)
            info["goal_team_0_rel_ball"] = np.expand_dims(info["goal_team_0_rel_ball"], axis = 0)
            info["goal_team_1_rel_ball"] = np.expand_dims(info["goal_team_1_rel_ball"], axis = 0)
            info["ball_xpos_rel_robot"] = np.expand_dims(info["ball_xpos_rel_robot"], axis = 0) 
            info["ball_velp_rel_robot"] = np.expand_dims(info["ball_velp_rel_robot"], axis = 0) 
            info["ball_velr_rel_robot"] = np.expand_dims(info["ball_velr_rel_robot"], axis = 0) 
            info["player_team"] = np.expand_dims(info["player_team"], axis = 0)
            info["goalkeeper_team_0_xpos_rel_robot"] = np.expand_dims(info["goalkeeper_team_0_xpos_rel_robot"], axis = 0)
            info["goalkeeper_team_0_velp_rel_robot"] = np.expand_dims(info["goalkeeper_team_0_velp_rel_robot"], axis = 0)
            info["goalkeeper_team_1_xpos_rel_robot"] = np.expand_dims(info["goalkeeper_team_1_xpos_rel_robot"], axis = 0)
            info["goalkeeper_team_1_velp_rel_robot"] = np.expand_dims(info["goalkeeper_team_1_velp_rel_robot"], axis = 0)
            info["target_xpos_rel_robot"] = np.expand_dims(info["target_xpos_rel_robot"], axis = 0)
            info["target_velp_rel_robot"] = np.expand_dims(info["target_velp_rel_robot"], axis = 0)
            info["defender_xpos"] = np.expand_dims(info["defender_xpos"], axis = 0)
        
        robot_qpos = obs[:,:12]
        robot_qvel = obs[:,12:24]
        quat = info["robot_quat"]
        base_ang_vel = info["robot_gyro"]
        project_gravity = self.quat_rotate_inverse(quat, np.array([0.0, 0.0, -1.0]))
        
        obs = np.hstack((robot_qpos, 
                         robot_qvel,
                         project_gravity,
                         base_ang_vel,
                         info["robot_accelerometer"],
                         info["robot_velocimeter"],
                         info["goal_team_0_rel_robot"], 
                         info["goal_team_1_rel_robot"], 
                         info["goal_team_0_rel_ball"], 
                         info["goal_team_1_rel_ball"], 
                         info["ball_xpos_rel_robot"], 
                         info["ball_velp_rel_robot"], 
                         info["ball_velr_rel_robot"], 
                         info["player_team"], 
                         info["goalkeeper_team_0_xpos_rel_robot"], 
                         info["goalkeeper_team_0_velp_rel_robot"], 
                         info["goalkeeper_team_1_xpos_rel_robot"], 
                         info["goalkeeper_team_1_velp_rel_robot"], 
                         info["target_xpos_rel_robot"], 
                         info["target_velp_rel_robot"], 
                         info["defender_xpos"],
                         task_onehot))

        return obs

print("‚úì Preprocessor class defined")


Installing dependencies from requirements.txt...
‚úì All dependencies installed!
‚úì All imports successful!


In [None]:
# Hyperparameters Configuration (ADJUST THESE FOR TUNING)
HYPERPARAMS = {
    'n_features': 87,
    'neurons': [24, 12, 6],              # Network architecture - try: [64, 32, 16] or [48, 24, 12]
    'learning_rate': 0.0001,             # Try: 0.00005, 0.0001, 0.0002
    'timesteps': 10000,                  # Small for testing - scale up after verification
}

print("[Config] Hyperparameters:")
for key, val in HYPERPARAMS.items():
    print(f"  {key}: {val}")


[Setup] Registering SAI environments...
[Setup] ‚úì sai_mujoco imported - environments should be registered
[Setup] Found 10 SAI soccer environments:
  - LowerT1GoalKeeper-v0
  - LowerT1GoaliePenaltyKick-v0
  - LowerT1KickToTarget-v0
  - LowerT1ObstaclePenaltyKick-v0
  - LowerT1PenaltyKick-v0
  - T1GoalKeeper-v0
  - T1GoaliePenaltyKick-v0
  - T1KickToTarget-v0
  - T1ObstaclePenaltyKick-v0
  - T1PenaltyKick-v0


In [None]:
# Create DDPG Model
print("[Model] Creating DDPG model...")
model = DDPG_FF(
    n_features=HYPERPARAMS['n_features'],
    action_space=env.action_space,
    neurons=HYPERPARAMS['neurons'],
    activation_function=F.relu,
    learning_rate=HYPERPARAMS['learning_rate'],
)

total_params = sum(p.numel() for p in model.parameters())
print(f"[Model] ‚úì Model created with {total_params} parameters")
print(f"[Model] Device: {'CUDA' if torch.cuda.is_available() else 'CPU'}")


‚úì MultiTaskWrapper created


In [None]:
# Define Action Function
def action_function(policy):
    expected_bounds = [-1, 1]
    action_percent = (policy - expected_bounds[0]) / (
        expected_bounds[1] - expected_bounds[0]
    )
    bounded_percent = np.minimum(np.maximum(action_percent, 0), 1)
    return (
        env.action_space.low
        + (env.action_space.high - env.action_space.low) * bounded_percent
    )

print("‚úì Action function defined")


[Setup] Creating environment...
[Environment] Creating multi-task wrapper...
[Environment] Creating LowerT1GoaliePenaltyKick-v0...
[Environment] ERROR creating LowerT1GoaliePenaltyKick-v0: No "body" with name soccer_ball exists. Available "body" names = ('world', 'floor', '/football_field', '/goal_post_north', '/goal_post_south', '/boards', '/soccer_ball', '/goalkeeper_team_0_body', '/goalkeeper_team_1_body', 'robot_0:Trunk', 'robot_0:Hip_Pitch_Left', 'robot_0:Hip_Roll_Left', 'robot_0:Hip_Yaw_Left', 'robot_0:Shank_Left', 'robot_0:Ankle_Cross_Left', 'robot_0:left_foot_link', 'robot_0:Hip_Pitch_Right', 'robot_0:Hip_Roll_Right', 'robot_0:Hip_Yaw_Right', 'robot_0:Shank_Right', 'robot_0:Ankle_Cross_Right', 'robot_0:right_foot_link').
[Setup] Multi-task failed: No "body" with name soccer_ball exists. Available "body" names = ('world', 'floor', '/football_field', '/goal_post_north', '/goal_post_south', '/boards', '/soccer_ball', '/goalkeeper_team_0_body', '/goalkeeper_team_1_body', 'robot_0:T

ValueError: No "body" with name soccer_ball exists. Available "body" names = ('world', 'floor', '/football_field', '/goal_post_north', '/goal_post_south', '/boards', '/soccer_ball', '/goalkeeper_team_0_body', '/goalkeeper_team_1_body', 'robot_0:Trunk', 'robot_0:Hip_Pitch_Left', 'robot_0:Hip_Roll_Left', 'robot_0:Hip_Yaw_Left', 'robot_0:Shank_Left', 'robot_0:Ankle_Cross_Left', 'robot_0:left_foot_link', 'robot_0:Hip_Pitch_Right', 'robot_0:Hip_Roll_Right', 'robot_0:Hip_Yaw_Right', 'robot_0:Shank_Right', 'robot_0:Ankle_Cross_Right', 'robot_0:right_foot_link').

In [None]:
# Run Training
print("=" * 70)
print("üöÄ STARTING TRAINING")
print("=" * 70)
print(f"\nConfiguration:")
print(f"  Timesteps: {HYPERPARAMS['timesteps']}")
print(f"  Learning rate: {HYPERPARAMS['learning_rate']}")
print(f"  Network: {HYPERPARAMS['neurons']}")
print("=" * 70)

try:
    training_loop(
        env, 
        model, 
        action_function=action_function, 
        preprocess_class=Preprocessor,
        timesteps=HYPERPARAMS['timesteps']
    )
    print("\n" + "=" * 70)
    print("‚úÖ TRAINING COMPLETED!")
    print("=" * 70)
    
    # Save model
    torch.save(model.state_dict(), 'ddpg_trained_model.pt')
    print("[Model] ‚úì Model saved to ddpg_trained_model.pt")
    
except KeyboardInterrupt:
    print("\n‚ö†Ô∏è Training interrupted by user")
    torch.save(model.state_dict(), 'ddpg_interrupted_model.pt')
    print("[Model] ‚úì Checkpoint saved")
    
except Exception as e:
    print(f"\n‚ùå Error: {e}")
    import traceback
    traceback.print_exc()
