In [3]:
%load_ext autoreload
# %reload_ext autoreload
%autoreload 2
from IPython.display import HTML
import numpy as np
from numpy import loadtxt
import matplotlib.pyplot as plt
from deepc_hunt.utils import CartpoleDx, Projection, episode_loss
from tqdm import tqdm
import torch
import torch.optim as optim
import tempfile
from deepc_hunt.controllers import DDeePC
import os
import io
import base64
torch.manual_seed(0)

%matplotlib inline


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Initialise controller parameters and data


In [4]:
##################################
# Do Not Change #
Tini = 4
m = 1
p = 4
Tf = 25
T = (m+1)*(Tini + Tf + p) + 4
##################################

n_batch = 20

device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
print(device)

ud = loadtxt('data/cartpole_ud.csv', delimiter=',')
yd = loadtxt('data/cartpole_yd.csv', delimiter=',')
yd = yd.reshape(T*p,)

noise =  np.random.randn(*yd.shape)*0.001
noiseu =  np.random.randn(*ud.shape)*0.01
yd = yd + noise
ud = ud + noiseu

dx = CartpoleDx().to(device)
projection = Projection()

def uniform(shape, low, high):
    r = high-low
    return torch.rand(shape)*r+low

cuda


## Initialise Controller

In [5]:
u_constraints = np.ones(Tf)*4
y_constraints = np.kron(np.ones(Tf), np.array([0.25, 0.2, 0.15, 0.2]))
r = torch.ones(m)*0.01
q = torch.ones(p)*100 
q = torch.Tensor([100,10,100,10])

lam_g1 = torch.Tensor([500.409]).to(device) 
lam_g2 = torch.Tensor([0.01]).to(device) 
lam_y = torch.Tensor([250.258]).to(device) 
lam_u = torch.Tensor([250.258]).to(device) 

controller = DDeePC(
    ud=ud, yd=yd, u_constraints=u_constraints, y_constraints=y_constraints,
    Tini=Tini, N=Tf, m=m, p=p, n_batch=n_batch, device=device,
    linear=False, stochastic_y=True, stochastic_u=True, q=q, r=r
).to(device)

for param in controller.parameters():
    print(param)



Parameter containing:
tensor([200.0015], device='cuda:0', requires_grad=True)
Parameter containing:
tensor([199.9971], device='cuda:0', requires_grad=True)
Parameter containing:
tensor([199.9998], device='cuda:0', requires_grad=True)
Parameter containing:
tensor([200.0006], device='cuda:0', requires_grad=True)


## Training

In [6]:
t_dir = tempfile.mkdtemp()

opt = optim.Rprop(controller.parameters(), lr=0.01, step_sizes=(1e-4,50))
epochs = 70
pbar = tqdm(range(epochs))

Q, R = torch.diag(controller.q).to(controller.device), torch.diag(controller.r).to(controller.device)
Q = torch.kron(torch.eye(controller.N).to(controller.device), Q)
R = torch.kron(torch.eye(controller.N).to(controller.device), R)

for j in pbar:
                                                                                                                                        
    uini = (torch.zeros(size=(n_batch, Tini*m)) + torch.randn(n_batch, Tini*m)*0.001).to(device)
    uini += torch.randn(uini.shape).to(device)*0.01
    
    zero = torch.zeros(n_batch)
    th = uniform((n_batch), -0.01, 0.01)
    
    yini = torch.stack((zero, zero, th, zero), dim=1).repeat(1,Tini)
    y = yini[:,-p:].to(device)
    yini += torch.randn(yini.shape)*0.001
    yini = yini.to(device)
    traj = yini
    
    Y, U = torch.Tensor().to(device), torch.Tensor().to(device)
    # G, Ey, Eu = torch.Tensor().to(device), torch.Tensor().to(device), torch.Tensor().to(device)

    for i in range(controller.N):

        g, u_pred, _, sig_y, sig_u = controller(ref=None, uref=None, y_ini=yini, u_ini=uini)
        
        # G, Ey, Eu = torch.cat((G, g.unsqueeze(1)), axis=1), torch.cat((Ey, sig_y.unsqueeze(1)), axis=1), torch.cat((Eu, sig_u.unsqueeze(1)), axis=1)
        input = u_pred[:,:m] + torch.randn(u_pred[:,:m].shape).to(device)*0.01
        y = dx(y, input)

        U = torch.cat((U, input.unsqueeze(1)), axis=1)
        Y = torch.cat((Y, y.unsqueeze(1)), axis=1)
       
        noise = torch.randn(y.shape).to(device)*0.001
        y += noise
       
        yini = torch.cat((yini[:, p:], y), axis=1)
        uini = torch.cat((uini[:, m:], input), axis=1)
 
    # loss = episode_loss(G=G, U=U, Y=Y, Ey=Ey, Eu=Eu, controller=controller, PI=PI)
    Y = Y.reshape((Y.shape[0], Y.shape[1]*Y.shape[2],1))
    loss = torch.sum(Y.mT @ Q @ Y + U.mT @ R @ U)
    opt.zero_grad()   
    loss.backward()
    opt.step()
    controller.apply(projection)

    pbar.set_description(f'loss={loss.item():.3f}, ly={controller.lam_y.data.item():.3f},\
 l1={controller.lam_g1.data.item():.3f}, l2={controller.lam_g2.data.item():.3f}, lu={controller.lam_u.data.item():.3f}')

loss=40.911, ly=375.958,l1=200.059, l2=24.044, lu=200.056: 100%|██████████| 70/70 [20:35<00:00, 17.65s/it]   


## Demo new parameters

In [7]:
# Demo learned parameters
learned_params = [param for param in controller.parameters()]
# learned_params = [torch.Tensor([420]).to(device), torch.Tensor([200]).to(device), torch.Tensor([200]).to(device), torch.Tensor([2.5]).to(device)]
demo_controller = DDeePC(
    ud=ud, yd=yd, u_constraints=u_constraints, y_constraints=y_constraints,
    Tini=Tini, N=Tf, m=m, p=p, n_batch=4, device=device,
    linear=False, stochastic_y=True, stochastic_u=True, q=q, r=r,
    lam_y=learned_params[0], lam_g1=learned_params[1], lam_g2=learned_params[3], lam_u=learned_params[2]
).to(device)

# Demo parameters we initialised with
initial_param = torch.Tensor([200]).to(device)
initial_controller = DDeePC(
    ud=ud, yd=yd, u_constraints=u_constraints, y_constraints=y_constraints,
    Tini=Tini, N=Tf, m=m, p=p, n_batch=4, device=device,
    linear=False, stochastic_y=True, stochastic_u=True, q=q, r=r,
    lam_y=initial_param, lam_g1=initial_param, lam_g2=initial_param, lam_u=initial_param
).to(device)

In [8]:
episodes = 100
controllers = [initial_controller, demo_controller]
n_batch = 4

for i, controller in enumerate(controllers):

    t_dir = tempfile.mkdtemp()
    print('Tmp dir: {}'.format(t_dir))
    dx = CartpoleDx().to(device)
    uini = (torch.zeros(size=(n_batch, Tini*m)) + torch.randn(n_batch, Tini*m)*0.001).to(device)
    uini += torch.randn(uini.shape).to(device)*0.01
    zero = torch.zeros(n_batch)
    th = uniform((n_batch), -0.01, 0.01)
    yini = torch.stack((zero, zero, th, zero), dim=1).repeat(1,Tini)
    y = yini[:,-p:].to(device)
    yini += torch.randn(yini.shape)*0.001
    yini = yini.to(device)
    traj = yini

    for j in tqdm(range(episodes)):
        g, u_pred, _, sig_y, sig_u = controller(ref=None, uref=None, y_ini=yini, u_ini=uini)
        input = u_pred[:,:m] + torch.randn(u_pred[:,:m].shape).to(device)*0.01
        y = dx(y, input)
        noise = torch.randn(y.shape).to(device)*0.001
        y += noise
       
        yini = torch.cat((yini[:, p:], y), axis=1)

        uini = torch.cat((uini[:, m:], input), axis=1)
        n_row, n_col = 2, 2
        fig, axs = plt.subplots(n_row, n_col, figsize=(3*n_col,3*n_row))
        axs = axs.reshape(-1)
        
        for n in range(n_batch):
            dx.get_frame(y[n], ax=axs[n])
            axs[n].get_xaxis().set_visible(False)
            axs[n].get_yaxis().set_visible(False)
        fig.tight_layout()
        fig.savefig(os.path.join(t_dir, '{:03d}.png'.format(j)))
        plt.close(fig)

    vid_fname = '../videos/cartpole_demo.mp4' if i==1 else '../videos/cartpole_init.mp4'
    if os.path.exists(vid_fname):
        os.remove(vid_fname)
    cmd = 'ffmpeg -r 16 -f image2 -i {}/%03d.png -vcodec libx264 -crf 25 -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -pix_fmt yuv420p {}'.format(
        t_dir, vid_fname
    )
    os.system(cmd)
    # print('Saving video to: {}'.format(vid_fname))

    video = io.open(vid_fname, 'r+b').read()
    encoded = base64.b64encode(video)
    HTML(data='''<video alt="test" controls>
                    <source src="data:video/mp4;base64,{0}" type="video/mp4" />
                </video>'''.format(encoded.decode('ascii')))

Tmp dir: /tmp/tmp6emea3mz


100%|██████████| 100/100 [00:15<00:00,  6.43it/s]
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-li

Tmp dir: /tmp/tmppwe438l6


100%|██████████| 100/100 [00:30<00:00,  3.31it/s]
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-li