In [None]:
import os, sys
import numpy as np
import torch

In [None]:
d = os.getcwd()
p = os.path.dirname(d)

sys.path.append(p)

In [None]:
from src.dynamics import RobotWorld
from src.algorithms.addpgpd_sampled import ADpgpdSampled
from src.algorithms.pgdual import LinearDual
from src.sampling import Sampler

# 1 - Parameters

In [None]:
ds = 4
da = 2

tau = 0.01
gamma = 0.95
b = - 200.0

G = - torch.tensor([
    [1.0, 0, 0, 0],
    [0, 1.0, 0, 0],
    [0, 0, 0.1, 0],
    [0, 0, 0, 0.1]
]).double()

R =  - torch.tensor([
    [0.1, 0],
    [0, 0.1],
]).double()

def primal_reward_fn(env, a):
    return ((env.s @ G) * env.s).sum(dim=1) + ((a @ R) * a).sum(dim=1)

def primal_reward_reg_fn(env, a):
    return - (tau / 2) * (a * a).sum(dim=1)

def dual_reward_fn(env, a):
    return 100 * (env.s[:, 0].clip(max=1.0) + env.s[:, 1].clip(max=1.0) - 2)

def starting_pos_fn(nsamples):
    rng = np.random.default_rng()

    s = torch.tensor(rng.uniform(
        low=[40, 40, -10, -10],
        high= [50, 50, 10, 10],
        size=[nsamples, 4],
    )).double()

    a = torch.tensor(rng.uniform(
        low=[-10, -10],
        high= [10, 10],
        size=[nsamples, 2],
    )).double()

    return s, a

# 2 - A-DPPG

### A - Unconstrained

In [None]:
epochs = 1_000
n_pe = 100
n_rho = 2_000
n_roll = 200

alpha = 1.0
eta = 0.001

env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])
sampler = Sampler(env, gamma)
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, primal_reward_reg_fn, dual_reward_fn, starting_pos_fn)

K, losses_primal, losses_dual = dpgpd.train_unconstrained(epochs, n_pe, n_rho, n_roll)

### B - Constrained

In [None]:
epochs = 50_000
n_pe = 100
n_rho = 2_000
n_roll = 200

alpha = 1.0
eta = 0.00005

env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])
sampler = Sampler(env, gamma)
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, primal_reward_reg_fn, dual_reward_fn, starting_pos_fn)

K, lmbda, losses_primal, losses_dual = dpgpd.train_constrained(epochs, n_pe, n_rho, n_roll)

In [None]:
np.save('../results/obs_primal.npy', losses_primal)
np.save('../results/obs_dual.npy', losses_dual)

# 3 - PGDual

In [None]:
n_epochs = 50_000
n_samples = 100
n_rollout = 200
n_rho = 2_000

n_dual_update = 10
lr_actor = 1e-4
lr_dual = 1e-3

env = RobotWorld(range_pos=[40, 50], range_vel=[-.1, .1])

ld = LinearDual(ds, da, env, lr_actor, lr_dual, gamma, b, starting_pos_fn, primal_reward_fn, dual_reward_fn)
loss_primal, loss_dual = ld.train(n_epochs, n_samples, n_rollout, n_rho, n_dual_update)

In [None]:
np.save('../results/obs_primal_dm.npy', loss_primal)
np.save('../results/obs_dual_dm.npy', loss_dual)