In [562]:
import os, sys
import numpy as np
import torch
import matplotlib.pyplot as plt

In [563]:
d = os.getcwd()
p = os.path.dirname(d)

sys.path.append(p)

In [564]:
from src.dynamics import BurgersDynamics
from src.algorithms.addpgpd_sampled import ADpgpdSampled
from src.algorithms.pgdual import LinearDual

# 1 - Parameters

In [566]:
ds = 10
da = 10

tau = 0.01
gamma = 0.9
alpha = 1.0

viscosity = 0.1
dt = 0.01
dx = 1.0 / (ds - 1)

b = -20

G = - torch.diag(torch.tensor([1.0] * ds)).double()
R =  - torch.diag(torch.tensor([0.1] * da)).double() * (tau / 2)
C = - torch.eye(da).double()

def primal_reward_fn(env, a):
    return ((env.u @ G) * env.u).sum(dim=1) + ((a @ R) * a).sum(dim=1)

def dual_reward_fn(env, a):
    return (a.abs() @ C).sum(dim=1)

def starting_pos_fn(n_samples):
    rng = np.random.default_rng()

    x = torch.linspace(0, 1, ds).double()
    u = torch.sin(np.pi * x).repeat(n_samples, 1)
    noise = torch.normal(0, 0.01, size=u.shape).double()
    u += noise

    a = torch.tensor(rng.uniform(
        low=[-1] * da,
        high=[1] * da,
        size=[n_samples, da],
    )).double()

    return u, a

# 2 - A-DPPG

### A - Unconstrained

In [624]:
epochs = 2_000
n_pe = 100
n_rho = 100
n_roll = 100

eta = 0.01

env = BurgersDynamics(ds, viscosity, dt, dx)
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, dual_reward_fn, starting_pos_fn)

K, losses_primal, losses_dual = dpgpd.train_unconstrained(epochs, n_pe, n_rho, n_roll)

Episode 1999/2000 - Return -7.0491197119818505 

### B - Constrained

In [672]:
epochs = 10_000
n_pe = 100
n_rho = 1_000
n_roll = 100

gamma = 0.9
eta = 0.001

env = BurgersDynamics(ds, viscosity, dt, dx)
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, dual_reward_fn, starting_pos_fn)

K, lmbda, losses_primal, losses_dual = dpgpd.train_constrained(epochs, n_pe, n_rho, n_roll)

Episode 9999/10000 - Return -22.701542074996606 - Constrain -19.839496509471587 - Lambda 1.150968432426452645

In [674]:
np.save('../results/burg_primal.npy', losses_primal)
np.save('../results/burg_dual.npy', losses_dual)

# 3 - PGDual

In [663]:
n_epochs = 10_000
n_samples = 100
n_rollout = 100
n_rho = 1_000

n_dual_update = 10
lr_actor = 1e-3
lr_dual = 1e-2

env = BurgersDynamics(ds, viscosity, dt, dx)

ld = LinearDual(ds, da, env, lr_actor, lr_dual, gamma, b, starting_pos_fn, primal_reward_fn, dual_reward_fn)
loss_primal, loss_dual = ld.train(n_epochs, n_samples, n_rollout, n_rho, n_dual_update)

Epoch 0 - Primal -37.159804024140065 - Dual -0.4975757379473628 - Lambda 0.0
Epoch 10 - Primal -34.16966780281373 - Dual -4.818819118212545 - Lambda 0.0
Epoch 20 - Primal -31.84264330726358 - Dual -8.138098860986684 - Lambda 0.0
Epoch 30 - Primal -29.863585341597773 - Dual -10.856882674779301 - Lambda 0.0
Epoch 40 - Primal -28.276027975563178 - Dual -13.069620585900683 - Lambda 0.0
Epoch 50 - Primal -26.91476783630158 - Dual -14.8897039860829 - Lambda 0.0
Epoch 60 - Primal -25.74159280157141 - Dual -16.5620770493292 - Lambda 0.0
Epoch 70 - Primal -24.67376958411141 - Dual -18.061934102080734 - Lambda 0.0
Epoch 80 - Primal -23.78379366505822 - Dual -19.318125400897443 - Lambda 0.0
Epoch 90 - Primal -22.9756702798441 - Dual -20.480907298064764 - Lambda 0.00480907317250967
Epoch 100 - Primal -22.304111370411185 - Dual -21.42695496142091 - Lambda 0.01907862350344658
Epoch 110 - Primal -21.722031176456426 - Dual -22.271566830119713 - Lambda 0.041794292628765106
Epoch 120 - Primal -21.224982

In [675]:
np.save('../results/burg_primal_dm.npy', loss_primal)
np.save('../results/burg_dual_dm.npy', loss_dual)