In [7]:
%reload_ext autoreload
%autoreload 2

import numpy as np
import torch
from numpy import genfromtxt
import torch
from deepc_hunt.dynamics import CartpoleDx
from deepc_hunt import DeePC, Trainer

# Cartpole system

### Load in data

In [8]:
##################################
# Do Not Change #
Tini = 4
m = 1
p = 4
Tf = 25
T = (m+1)*(Tini + Tf + p) + 4
##################################

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [9]:
ud = genfromtxt('data/cartpole_ud.csv', delimiter=',')
yd = genfromtxt('data/cartpole_yd.csv', delimiter=',')
yd = yd.reshape(T*p,)

noise_y =  np.random.randn(*yd.shape)*0.001
noise_u =  np.random.randn(*ud.shape)*0.01
yd += noise_y
ud += noise_u

### Initialise DeePC controller

In [10]:
u_constraints = np.ones(Tf)*4
y_constraints = np.kron(np.ones(Tf), np.array([0.25, 0.2, 0.15, 0.2]))
r = torch.ones(m)*0.01
q = torch.ones(p)*100 
n_batch = 20

controller = DeePC(
    ud=ud, yd=yd, u_constraints=u_constraints, y_constraints=y_constraints,
    Tini=Tini, N=Tf, m=m, p=p, n_batch=n_batch, device=device,
    linear=False, stochastic_y=True, stochastic_u=True, q=q, r=r
).to(device)

controller.initialise(lam_g1=200, lam_g2=200, lam_u=200, lam_y=200)



### Run DeePC-HUNT

In [11]:
epochs = 70
time_steps = controller.N

# Tune regularization params
deepc_tuner = Trainer(controller=controller, env=CartpoleDx())
final_params = deepc_tuner.run(epochs=epochs, time_steps=time_steps)

Loss = 24600.4199, lam_y : 200.165, lam_u : 199.835, lam_g1 : 199.835, lam_g2 : 199.835, :  11%| | 8