In [None]:
import os, sys
import numpy as np
import torch

In [None]:
d = os.getcwd()
p = os.path.dirname(d)

sys.path.append(p)

In [None]:
from src.dynamics import BurgersDynamics
from src.algorithms.addpgpd_sampled import ADpgpdSampled
from src.algorithms.pgdual import LinearDual

# 1 - Parameters

In [None]:
ds = 10
da = 10

tau = 0.001
gamma = 0.9
alpha = 1.0

viscosity = 0.1
dt = 0.01
dx = 1.0 / (ds - 1)

b = -20

G = - torch.diag(torch.tensor([1.0] * ds)).double()
H = - torch.diag(torch.tensor([1.0] * da)).double() * (tau / 2)
C = - torch.eye(da).double()

def primal_reward_fn(env, a):
    return ((env.u @ G) * env.u).sum(dim=1)

def primal_reward_reg_fn(env, a):
    return ((a @ H) * a).sum(dim=1)

def dual_reward_fn(env, a):
    return (a.abs() @ C).sum(dim=1)

def starting_pos_fn(n_samples):
    rng = np.random.default_rng()

    x = torch.linspace(0, 1, ds).double()
    u = torch.sin(np.pi * x).repeat(n_samples, 1)
    noise = torch.normal(0, 0.01, size=u.shape).double()
    u += noise

    a = torch.tensor(rng.uniform(
        low=[-1] * da,
        high=[1] * da,
        size=[n_samples, da],
    )).double()

    return u, a

# 2 - A-DPPG

### A - Unconstrained

In [None]:
epochs = 2_000
n_pe = 100
n_rho = 100
n_roll = 100

eta = 0.01

env = BurgersDynamics(ds, viscosity, dt, dx)
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, primal_reward_reg_fn, dual_reward_fn, starting_pos_fn)

K, losses_primal, losses_dual = dpgpd.train_unconstrained(epochs, n_pe, n_rho, n_roll)

### B - Constrained

In [None]:
epochs = 10_000
n_pe = 100
n_rho = 1_000
n_roll = 100

gamma = 0.9
eta = 0.001

env = BurgersDynamics(ds, viscosity, dt, dx)
dpgpd = ADpgpdSampled(ds, da, env, eta, tau, gamma, b, alpha, primal_reward_fn, primal_reward_reg_fn, dual_reward_fn, starting_pos_fn)

K, lmbda, losses_primal, losses_dual = dpgpd.train_constrained(epochs, n_pe, n_rho, n_roll)

In [None]:
np.save('../results/burg_primal.npy', losses_primal)
np.save('../results/burg_dual.npy', losses_dual)

# 3 - PGDual

In [None]:
n_epochs = 10_000
n_samples = 100
n_rollout = 100
n_rho = 1_000

n_dual_update = 10
lr_actor = 1e-3
lr_dual = 1e-2

env = BurgersDynamics(ds, viscosity, dt, dx)

ld = LinearDual(ds, da, env, lr_actor, lr_dual, gamma, b, starting_pos_fn, primal_reward_fn, dual_reward_fn)
loss_primal, loss_dual = ld.train(n_epochs, n_samples, n_rollout, n_rho, n_dual_update)

In [None]:
np.save('../results/burg_primal_dm.npy', loss_primal)
np.save('../results/burg_dual_dm.npy', loss_dual)