In [10]:
import os, sys
from pathos.multiprocessing import ProcessingPool as Pool
import random
import numpy as np
import torch

In [11]:
d = os.getcwd()
p = os.path.dirname(d)

sys.path.append(p)

In [12]:
from src.dynamics import RobotWorld
from src.algorithms.addpgpd_sampled import ADpgpdSampled
from src.algorithms.pgdual import LinearDual

# 1 - Parameters

In [13]:
ds = 4
da = 2

b = - 1_000
gamma = 0.99

tau = 0.2

G1 = - torch.tensor([1.0, 1.0, .001, .001]).double()
G2 = - torch.tensor([.001, .001, 1.0, 1.0]).double()

R1 = - torch.tensor([0.01, 0.01]).double()
R2 = - torch.tensor([0.01, 0.01]).double()

def primal_reward_fn(env, a):
    return (env.s.abs() * G1).sum(dim=1) + (a.abs() * R1).sum(dim=1)

def primal_reward_reg_fn(env, a):
    return - (tau / 2) * (a * a).sum(dim=1)

def dual_reward_fn(env, a):
    return ((env.s ** 2) * G2).sum(dim=1) + (tau / 2) + ((a ** 2) * R2).sum(dim=1)

def starting_pos_fn(nsamples):
    rng = np.random.default_rng()

    s = torch.tensor(rng.uniform(
        low=[40, 40, -10, -10],
        high= [50, 50, 10, 10],
        size=[nsamples, 4],
    )).double()

    a = torch.tensor(rng.uniform(
        low=[-10, -10],
        high= [10, 10],
        size=[nsamples, 2],
    )).double()

    return s, a

# 2 - A-DPPG

### A - Unconstrained

In [5]:
epochs = 1000
n_pe = 100
n_rho = 1_000
n_roll = 400

alpha = 1.0
eta = 0.0001

env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, primal_reward_reg_fn, dual_reward_fn, starting_pos_fn)

K, losses_primal, losses_dual = dpgpd.train_unconstrained(epochs, n_pe, n_rho, n_roll)

Episode 999/1000 - Return -6866.484779195138  

### B - Constrained

In [6]:
epochs = 40_000
n_pe = 100
n_rho = 1_000
n_roll = 400

alpha = 1.0
eta = 0.00001

env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, primal_reward_reg_fn, dual_reward_fn, starting_pos_fn)

K, lmbda, losses_primal, losses_dual = dpgpd.train_constrained(epochs, n_pe, n_rho, n_roll)

Episode 39999/40000 - Return -7239.290952206639 - Constrain -992.9945970913296 - Lambda 0.88434755802154542245

In [18]:
def run_experiment(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    epochs = 40_000
    n_pe = 100
    n_rho = 1_000
    n_roll = 400

    alpha = 1.0
    eta = 0.00001

    env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])
    dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, primal_reward_reg_fn, dual_reward_fn, starting_pos_fn)

    K, lmbda, losses_primal, losses_dual = dpgpd.train_constrained(epochs, n_pe, n_rho, n_roll)
    return K, lmbda, losses_primal, losses_dual

def run_parallel_experiments(n_experiments):
    seeds = [np.random.randint(0, 1000000) for _ in range(n_experiments)]
    with Pool() as pool:
        results = pool.map(run_experiment, seeds)
    return results

n_experiments = 10
results = run_parallel_experiments(n_experiments)

Ks, lambdas, losses_primals, losses_duals = zip(*results)
avg_K = np.mean(Ks, axis=0)
avg_lambda = np.mean(lambdas, axis=0)
avg_losses_primal = np.mean(losses_primals, axis=0)
avg_losses_dual = np.mean(losses_duals, axis=0)

print("Average K:", avg_K)
print("Average lambda:", avg_lambda)
print("Average primal losses:", avg_losses_primal)
print("Average dual losses:", avg_losses_dual)

KeyboardInterrupt: 

In [7]:
np.save('../results/vel_sampled_primal.npy', losses_primal)
np.save('../results/vel_sampled_dual.npy', losses_dual)

# 3 - PGDual

In [8]:
n_epochs = 40_000
n_samples = 100
n_rollout = 400
n_rho = 1_000

n_dual_update = 100
lr_actor = 1e-4
lr_dual = 1e-5

env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])

ld = LinearDual(ds, da, env, lr_actor, lr_dual, gamma, b, starting_pos_fn, primal_reward_fn, dual_reward_fn)
loss_primal, loss_dual = ld.train(n_epochs, n_samples, n_rollout, n_rho, n_dual_update)

Epoch 0 - Primal -8664.382874007882 - Dual -444.3736942899904 - Lambda 0.0
Epoch 100 - Primal -8082.432539650312 - Dual -640.0080219978075 - Lambda 0.0
Epoch 200 - Primal -7737.440062154495 - Dual -922.1102194708895 - Lambda 0.0
Epoch 300 - Primal -7367.737150190917 - Dual -1573.9262305885343 - Lambda 0.005739262327551842
Epoch 400 - Primal -7195.392097675411 - Dual -1782.5972520770677 - Lambda 0.01356523483991623
Epoch 500 - Primal -7085.335008100474 - Dual -2043.3642756380982 - Lambda 0.02399887889623642
Epoch 600 - Primal -7060.645696346894 - Dual -2178.8713407454657 - Lambda 0.03578759357333183
Epoch 700 - Primal -6907.691559532769 - Dual -2785.8488505876708 - Lambda 0.05364608019590378
Epoch 800 - Primal -6792.229957371476 - Dual -3071.8239916629245 - Lambda 0.0743643194437027
Epoch 900 - Primal -6693.4611953556105 - Dual -3209.4086379924097 - Lambda 0.09645840525627136
Epoch 1000 - Primal -6631.539258466184 - Dual -3378.815324485134 - Lambda 0.12024655938148499
Epoch 1100 - Prima

In [None]:
def run_experiment(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    n_epochs = 40_000
    n_samples = 100
    n_rollout = 400
    n_rho = 1_000

    n_dual_update = 100
    lr_actor = 1e-4
    lr_dual = 1e-5

    env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])

    ld = LinearDual(ds, da, env, lr_actor, lr_dual, gamma, b, starting_pos_fn, primal_reward_fn, dual_reward_fn)
    losses_primal, losses_dual = ld.train(n_epochs, n_samples, n_rollout, n_rho, n_dual_update)
    return K, lmbda, losses_primal, losses_dual

def run_parallel_experiments(n_experiments):
    seeds = [np.random.randint(0, 1000000) for _ in range(n_experiments)]
    with Pool() as pool:
        results = pool.map(run_experiment, seeds)
    return results

n_experiments = 10
results = run_parallel_experiments(n_experiments)

Ks, lambdas, losses_primals, losses_duals = zip(*results)
avg_K = np.mean(Ks, axis=0)
avg_lambda = np.mean(lambdas, axis=0)
avg_losses_primal = np.mean(losses_primals, axis=0)
avg_losses_dual = np.mean(losses_duals, axis=0)

print("Average K:", avg_K)
print("Average lambda:", avg_lambda)
print("Average primal losses:", avg_losses_primal)
print("Average dual losses:", avg_losses_dual)

KeyboardInterrupt: 

In [9]:
np.save('../results/vel_sampled_primal_dm.npy', loss_primal)
np.save('../results/vel_sampled_dual_dm.npy', loss_dual)