In [3]:
import torch
import numpy as np
import pickle

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Product, ConstantKernel as C

import gym_sin
from gym import spaces

from active_learning.arguments import get_args
from active_learning.oracle import OracleAgent
from active_learning.new_pt_multi_task import DecoupledPMTAgent
from network.vae import InferenceNetwork, InferenceNetwork2, InferenceNetworkNoPrev
from task.GuassianTaskGenerator import GaussianTaskGenerator
from utilities.folder_management import handle_folder_creation


In [4]:
def get_task_sequence(alpha, n_restarts, num_test_processes, ):
    kernel = C(1.0, (1e-5, 1e5)) * RBF(1, (1e-5, 1e5))

    gp_list = []
    for i in range(2):
        gp_list.append([GaussianProcessRegressor(kernel=kernel,
                                                 alpha=alpha ** 2,
                                                 normalize_y=True,
                                                 n_restarts_optimizer=n_restarts)
                        for _ in range(num_test_processes)])
    test_kwargs = []
    init_prior_test = [torch.tensor([[0, 30], [4, 4]], dtype=torch.float32) for _ in range(num_test_processes)]

    for idx in range(50):
        if idx < 15:
            mean = 0
            std = 30
        elif idx > 40:
            mean = 0
            std = 30
        else:
            mean = 0 - idx 
            std = 30 - idx / 16

        test_kwargs.append({'amplitude': 1,
                            'mean': mean,
                            'std': std,
                            'noise_std': 0.001,
                            'scale_reward': False})

    return gp_list, test_kwargs, init_prior_test


In [5]:
env_name = "gauss-v0"
action_space = spaces.Box(low=np.array([-1]), high=np.array([1]))
latent_dim = 2
x_min = -100
x_max = 100
min_mean = -40
max_mean = 40
min_std = 15
max_std = 35
prior_mu_min = -5
prior_mu_max = 5
prior_std_min = 3
prior_std_max = 5

device = "cpu"

In [6]:
task_generator = GaussianTaskGenerator(x_min=x_min, x_max=x_max, min_mean=min_mean,
                                       max_mean=max_mean, min_std=min_std, max_std=max_std,
                                       prior_mu_min=prior_mu_min, prior_mu_max=prior_mu_max,
                                       prior_std_min=prior_std_min, prior_std_max=prior_std_max)
task_generator.get_task_family(n_tasks=10000, n_batches=1, test_perc=0, batch_size=5)

([[{'train': [tensor([[-43.4400],
             [-66.7400],
             [-26.0800],
             [ 22.1600],
             [-22.5800]]), tensor([[ 0.1720],
             [-0.1871],
             [ 0.2799],
             [ 1.0523],
             [ 0.2297]])],
    'test': [tensor([], size=(0, 1)), tensor([], size=(0, 1))]}],
  [{'train': [tensor([[-41.1700],
             [ -1.0300],
             [ -1.1900],
             [ 60.4900],
             [-70.0700]]), tensor([[ 0.7665],
             [ 0.8045],
             [ 0.6444],
             [-0.0271],
             [ 0.2179]])],
    'test': [tensor([], size=(0, 1)), tensor([], size=(0, 1))]}],
  [{'train': [tensor([[  6.4100],
             [  5.5900],
             [ 43.6200],
             [-97.3000],
             [ 38.5000]]), tensor([[ 0.2750],
             [ 0.2216],
             [ 1.0026],
             [-0.0143],
             [ 0.8511]])],
    'test': [tensor([], size=(0, 1)), tensor([], size=(0, 1))]}],
  [{'train': [tensor([[41.9500],
       

In [18]:
max_old = [100, 50, 20, 20]
min_old = [-100, 0, 0, 0]
vae_min_seq = 1
vae_max_seq = 150

obs_shape = (2,)

In [19]:
vi = InferenceNetworkNoPrev(n_in=6, z_dim=latent_dim)
vi_optim = torch.optim.Adam(vi.parameters())

In [20]:
agent = DecoupledPMTAgent(action_space=action_space, device=device, gamma=0.99,
                                 num_steps=150, num_processes=32,
                                 clip_param=0.2, ppo_epoch=4,
                                 num_mini_batch=8,
                                 value_loss_coef=0.5,
                                 entropy_coef=0.,
                                 lr=0.0001,
                                 eps=1e-6, max_grad_norm=0.5,
                                 use_linear_lr_decay=False,
                                 use_gae=False,
                                 gae_lambda=0.95,
                                 use_proper_time_limits=False,
                                 obs_shape=obs_shape,
                                 latent_dim=latent_dim,
                                 recurrent_policy=False,
                                 hidden_size=32,
                                 use_elu=True,
                                 variational_model=vi,
                                 vae_optim=vi_optim,
                                 rescale_obs=True,
                                 max_old=max_old,
                                 min_old=min_old,
                                 vae_min_seq=vae_min_seq,
                                 vae_max_seq=vae_max_seq,
                                 max_action=100,
                                 min_action=-100)

In [21]:
gp_list, test_kwargs, init_prior_test = get_task_sequence(alpha=0.25,
                                                                  n_restarts=1,
                                                                  num_test_processes=2)
res_eval, res_vae, test_list = agent.train(training_iter=2000,
                                                   env_name=env_name,
                                                   seed=0,
                                                   task_generator=task_generator,
                                                   eval_interval=50,
                                                   log_dir=".",
                                                   use_env_obs=False,
                                                   num_vae_steps=4,
                                                   gp_list=gp_list,
                                                   sw_size=20,
                                                   test_kwargs=test_kwargs,
                                                   init_prior_test=init_prior_test,
                                                   num_random_task_to_eval=128,
                                                   num_test_processes=2)


Vae step 0/4, mse 947.652099609375, kdl 239.3385009765625
Vae step 0/4, mse 821.635009765625, kdl 216.07574462890625
Vae step 0/4, mse 742.4215087890625, kdl 202.4710693359375
Vae step 0/4, mse 698.589599609375, kdl 191.06658935546875
Vae step 0/4, mse 627.283935546875, kdl 165.90997314453125
Vae step 0/4, mse 629.427978515625, kdl 152.41561889648438
Vae step 0/4, mse 620.6961669921875, kdl 159.4633026123047
Vae step 0/4, mse 681.9771728515625, kdl 186.76560974121094
Vae step 0/4, mse 550.9859619140625, kdl 148.06521606445312
Vae step 0/4, mse 566.5457763671875, kdl 144.27224731445312
Vae step 0/4, mse 631.437744140625, kdl 160.14480590820312
Vae step 0/4, mse 551.114501953125, kdl 146.0675048828125
Vae step 0/4, mse 482.5705261230469, kdl 130.61888122558594
Vae step 0/4, mse 488.6905822753906, kdl 133.38253784179688
Vae step 0/4, mse 464.00201416015625, kdl 114.99727630615234
Vae step 0/4, mse 455.818603515625, kdl 126.53179168701172
Vae step 0/4, mse 410.81353759765625, kdl 106.60868

Vae step 0/4, mse 15.19314956665039, kdl 3.506195306777954
Vae step 0/4, mse 14.681524276733398, kdl 2.5325188636779785
Vae step 0/4, mse 16.238706588745117, kdl 3.343555212020874
Vae step 0/4, mse 13.151203155517578, kdl 2.219089984893799
Vae step 0/4, mse 52.81000900268555, kdl 15.28711223602295
Vae step 0/4, mse 14.987841606140137, kdl 2.742358922958374
Vae step 0/4, mse 14.568775177001953, kdl 2.571589946746826
Vae step 0/4, mse 17.44623374938965, kdl 2.2718257904052734
Vae step 0/4, mse 25.897968292236328, kdl 7.2690749168396
Vae step 0/4, mse 16.889238357543945, kdl 2.4564123153686523
Vae step 0/4, mse 21.78610610961914, kdl 2.7185401916503906
Vae step 0/4, mse 21.727706909179688, kdl 2.694566488265991
Vae step 0/4, mse 35.796321868896484, kdl 6.9540863037109375
Vae step 0/4, mse 12.104724884033203, kdl 2.904820680618286
Vae step 0/4, mse 17.23314666748047, kdl 2.5520071983337402
Vae step 0/4, mse 22.1678409576416, kdl 5.157050132751465
Vae step 0/4, mse 22.322433471679688, kdl 5

Vae step 0/4, mse 15.303703308105469, kdl 2.2300477027893066
Vae step 0/4, mse 14.653631210327148, kdl 1.7526072263717651
Vae step 0/4, mse 15.298125267028809, kdl 2.3846564292907715
Vae step 0/4, mse 14.335885047912598, kdl 1.6823183298110962
Vae step 0/4, mse 12.470270156860352, kdl 1.6650277376174927
Vae step 0/4, mse 15.867631912231445, kdl 2.297940254211426
Vae step 0/4, mse 14.971728324890137, kdl 1.9156066179275513
Vae step 0/4, mse 8.718086242675781, kdl 2.258869171142578
Vae step 0/4, mse 16.841644287109375, kdl 1.8458340167999268
Vae step 0/4, mse 13.492292404174805, kdl 2.0579140186309814
Vae step 0/4, mse 10.490740776062012, kdl 2.0969436168670654
Vae step 0/4, mse 14.872830390930176, kdl 3.115032434463501
Vae step 0/4, mse 20.957319259643555, kdl 1.700422763824463
Vae step 0/4, mse 17.098047256469727, kdl 1.9505505561828613
Vae step 0/4, mse 17.988582611083984, kdl 3.6200013160705566
Vae step 0/4, mse 17.680381774902344, kdl 2.8552794456481934
Vae step 0/4, mse 12.75345039

Vae step 0/4, mse 13.398637771606445, kdl 1.789102554321289
Vae step 0/4, mse 15.247157096862793, kdl 1.9016156196594238
Vae step 0/4, mse 17.55634307861328, kdl 1.8933206796646118
Vae step 0/4, mse 14.012476921081543, kdl 1.9514216184616089
Vae step 0/4, mse 12.150089263916016, kdl 1.5488229990005493
Epoch 400 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 38.6649486875
Meta-testing...
Reward : 44.98557167
Vae step 0/4, mse 10.632134437561035, kdl 1.649902105331421
Vae step 0/4, mse 18.965465545654297, kdl 1.6891448497772217
Vae step 0/4, mse 15.943733215332031, kdl 3.3192577362060547
Vae step 0/4, mse 12.581034660339355, kdl 2.2151081562042236
Vae step 0/4, mse 13.536958694458008, kdl 1.9591928720474243
Vae step 0/4, mse 18.71196937561035, kdl 1.8591811656951904
Vae step 0/4, mse 14.516444206237793, kdl 2.105698823928833
Vae step 0/4, mse 12.081950187683105, kdl 1.736102819442749
Vae step 0/4, mse 11.807674407958984, kdl 1.6925387382507324
Vae step 0/4, mse 14.82763957

Vae step 0/4, mse 9.554652214050293, kdl 1.6828107833862305
Vae step 0/4, mse 18.76935386657715, kdl 1.5929932594299316
Vae step 0/4, mse 15.62994384765625, kdl 2.4221906661987305
Vae step 0/4, mse 10.573548316955566, kdl 1.8213083744049072
Vae step 0/4, mse 18.651508331298828, kdl 1.7344894409179688
Vae step 0/4, mse 14.047945022583008, kdl 1.69822359085083
Vae step 0/4, mse 12.461784362792969, kdl 1.7087575197219849
Vae step 0/4, mse 12.843743324279785, kdl 1.8774393796920776
Vae step 0/4, mse 14.785086631774902, kdl 2.430237293243408
Vae step 0/4, mse 17.635059356689453, kdl 2.21055006980896
Vae step 0/4, mse 15.575787544250488, kdl 2.4390981197357178
Vae step 0/4, mse 14.30015754699707, kdl 2.3393502235412598
Vae step 0/4, mse 17.695068359375, kdl 1.9054548740386963
Vae step 0/4, mse 23.649755477905273, kdl 1.780447006225586
Vae step 0/4, mse 10.436187744140625, kdl 2.639407157897949
Vae step 0/4, mse 12.841432571411133, kdl 1.6061196327209473
Vae step 0/4, mse 15.149002075195312, 

Vae step 0/4, mse 13.50174331665039, kdl 2.021242141723633
Vae step 0/4, mse 15.503785133361816, kdl 1.9108901023864746
Vae step 0/4, mse 13.408843994140625, kdl 1.7570140361785889
Vae step 0/4, mse 14.765693664550781, kdl 1.8397254943847656
Vae step 0/4, mse 12.599555969238281, kdl 1.8430393934249878
Vae step 0/4, mse 12.646435737609863, kdl 1.6177775859832764
Vae step 0/4, mse 14.358349800109863, kdl 1.9924702644348145
Vae step 0/4, mse 14.510448455810547, kdl 1.6299775838851929
Vae step 0/4, mse 13.585921287536621, kdl 2.3536648750305176
Vae step 0/4, mse 13.00185775756836, kdl 1.9561101198196411
Vae step 0/4, mse 10.958365440368652, kdl 1.902448058128357
Vae step 0/4, mse 17.522785186767578, kdl 1.7497749328613281
Vae step 0/4, mse 11.756871223449707, kdl 1.7975904941558838
Vae step 0/4, mse 13.374260902404785, kdl 1.794496774673462
Vae step 0/4, mse 12.40310001373291, kdl 1.9661699533462524
Vae step 0/4, mse 12.197056770324707, kdl 2.0356345176696777
Vae step 0/4, mse 19.214567184

Vae step 0/4, mse 12.77640151977539, kdl 1.8110566139221191
Vae step 0/4, mse 12.15412425994873, kdl 1.75089693069458
Vae step 0/4, mse 12.559850692749023, kdl 1.7001447677612305
Vae step 0/4, mse 17.91558837890625, kdl 1.613900899887085
Vae step 0/4, mse 11.486821174621582, kdl 2.338395357131958
Vae step 0/4, mse 17.41193199157715, kdl 2.1075499057769775
Vae step 0/4, mse 17.71465301513672, kdl 1.8822851181030273
Vae step 0/4, mse 16.09686279296875, kdl 2.444288730621338
Vae step 0/4, mse 7.150564193725586, kdl 2.132817029953003
Vae step 0/4, mse 14.2202730178833, kdl 2.6189756393432617
Vae step 0/4, mse 12.532464027404785, kdl 1.5834095478057861
Vae step 0/4, mse 9.598282814025879, kdl 2.1386423110961914
Vae step 0/4, mse 13.671794891357422, kdl 1.8263022899627686
Vae step 0/4, mse 12.947467803955078, kdl 2.0289692878723145
Epoch 800 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 64.63108367968749
Meta-testing...
Reward : 69.23697967
Vae step 0/4, mse 16.20171928405761

Vae step 0/4, mse 12.458828926086426, kdl 1.9838770627975464
Vae step 0/4, mse 11.35097885131836, kdl 1.8208978176116943
Vae step 0/4, mse 16.573148727416992, kdl 1.924210548400879
Vae step 0/4, mse 18.405765533447266, kdl 2.140794038772583
Vae step 0/4, mse 10.25787353515625, kdl 2.065749406814575
Vae step 0/4, mse 10.115650177001953, kdl 1.756343126296997
Vae step 0/4, mse 13.67827320098877, kdl 1.7719032764434814
Vae step 0/4, mse 13.890946388244629, kdl 2.0068533420562744
Vae step 0/4, mse 11.365945816040039, kdl 2.2221198081970215
Vae step 0/4, mse 12.561450004577637, kdl 1.801596999168396
Vae step 0/4, mse 13.211862564086914, kdl 1.8966739177703857
Vae step 0/4, mse 16.926959991455078, kdl 1.7784162759780884
Vae step 0/4, mse 10.375563621520996, kdl 2.3281795978546143
Vae step 0/4, mse 12.076868057250977, kdl 2.4750590324401855
Vae step 0/4, mse 16.36701774597168, kdl 2.0272674560546875
Vae step 0/4, mse 12.058919906616211, kdl 2.0200459957122803
Vae step 0/4, mse 12.083374977111

Vae step 0/4, mse 11.405874252319336, kdl 2.521902084350586
Vae step 0/4, mse 9.267858505249023, kdl 2.1632754802703857
Epoch 1050 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 73.25564725
Meta-testing...
Reward : 82.39145371000001
Vae step 0/4, mse 13.708866119384766, kdl 2.584038019180298
Vae step 0/4, mse 18.22023582458496, kdl 2.1622917652130127
Vae step 0/4, mse 13.459220886230469, kdl 2.343722105026245
Vae step 0/4, mse 10.137262344360352, kdl 2.855132579803467
Vae step 0/4, mse 15.773066520690918, kdl 2.079094409942627
Vae step 0/4, mse 11.593581199645996, kdl 2.0124082565307617
Vae step 0/4, mse 11.579256057739258, kdl 2.2565150260925293
Vae step 0/4, mse 11.42173957824707, kdl 2.246800661087036
Vae step 0/4, mse 11.510161399841309, kdl 2.1759581565856934
Vae step 0/4, mse 11.016764640808105, kdl 2.1911139488220215
Vae step 0/4, mse 10.688039779663086, kdl 2.2719342708587646
Vae step 0/4, mse 10.299968719482422, kdl 2.5161304473876953
Vae step 0/4, mse 12.194266

Vae step 0/4, mse 9.948358535766602, kdl 2.643144130706787
Vae step 0/4, mse 7.824411392211914, kdl 2.4248597621917725
Vae step 0/4, mse 11.176288604736328, kdl 2.657041549682617
Vae step 0/4, mse 6.804757118225098, kdl 2.4222230911254883
Vae step 0/4, mse 13.961135864257812, kdl 2.982326030731201
Vae step 0/4, mse 9.014435768127441, kdl 2.204145908355713
Vae step 0/4, mse 12.290589332580566, kdl 2.4545340538024902
Vae step 0/4, mse 7.720206260681152, kdl 2.8809378147125244
Vae step 0/4, mse 10.021026611328125, kdl 2.73321533203125
Vae step 0/4, mse 10.862593650817871, kdl 2.5801169872283936
Vae step 0/4, mse 10.78893756866455, kdl 2.470939874649048
Vae step 0/4, mse 10.631555557250977, kdl 2.387596368789673
Vae step 0/4, mse 9.918045997619629, kdl 3.0487053394317627
Vae step 0/4, mse 10.418150901794434, kdl 2.621462821960449
Vae step 0/4, mse 9.541086196899414, kdl 2.740302562713623
Vae step 0/4, mse 14.327441215515137, kdl 2.5863072872161865
Vae step 0/4, mse 8.96292495727539, kdl 2.

Vae step 0/4, mse 9.948074340820312, kdl 2.598863124847412
Vae step 0/4, mse 9.207148551940918, kdl 2.6208832263946533
Vae step 0/4, mse 9.071414947509766, kdl 2.7373533248901367
Vae step 0/4, mse 6.603338718414307, kdl 2.517392873764038
Vae step 0/4, mse 7.7683424949646, kdl 2.5645065307617188
Vae step 0/4, mse 7.842593669891357, kdl 2.675053119659424
Vae step 0/4, mse 7.11293363571167, kdl 2.5540778636932373
Vae step 0/4, mse 10.87224292755127, kdl 3.149362325668335
Vae step 0/4, mse 8.815457344055176, kdl 2.37463641166687
Vae step 0/4, mse 6.211451530456543, kdl 2.58805513381958
Vae step 0/4, mse 7.349713325500488, kdl 2.9330763816833496
Vae step 0/4, mse 8.795943260192871, kdl 2.4535369873046875
Vae step 0/4, mse 9.331934928894043, kdl 2.460092067718506
Vae step 0/4, mse 11.492705345153809, kdl 3.219463586807251
Vae step 0/4, mse 8.962203979492188, kdl 2.944464683532715
Vae step 0/4, mse 6.679005146026611, kdl 2.4045932292938232
Vae step 0/4, mse 7.509121894836426, kdl 2.9478626251

Vae step 0/4, mse 6.777841567993164, kdl 2.8851208686828613
Vae step 0/4, mse 10.4402494430542, kdl 2.3873400688171387
Vae step 0/4, mse 9.43948745727539, kdl 2.7729413509368896
Vae step 0/4, mse 8.566232681274414, kdl 3.4249117374420166
Vae step 0/4, mse 9.666742324829102, kdl 3.338254451751709
Vae step 0/4, mse 6.598126411437988, kdl 2.5025339126586914
Epoch 1450 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 99.81101368750001
Meta-testing...
Reward : 93.96225034000001
Vae step 0/4, mse 6.068159580230713, kdl 3.445765495300293
Vae step 0/4, mse 7.308764934539795, kdl 2.559164524078369
Vae step 0/4, mse 5.580949783325195, kdl 2.8045127391815186
Vae step 0/4, mse 6.906045913696289, kdl 3.2449893951416016
Vae step 0/4, mse 8.675520896911621, kdl 2.988410472869873
Vae step 0/4, mse 10.857405662536621, kdl 3.28570294380188
Vae step 0/4, mse 9.628130912780762, kdl 2.7674708366394043
Vae step 0/4, mse 7.484493255615234, kdl 2.9315221309661865
Vae step 0/4, mse 5.1120648384094

Vae step 0/4, mse 8.007857322692871, kdl 2.5117528438568115
Vae step 0/4, mse 9.806440353393555, kdl 2.773655414581299
Vae step 0/4, mse 5.478013515472412, kdl 2.845021963119507
Vae step 0/4, mse 6.031352519989014, kdl 2.567256212234497
Vae step 0/4, mse 7.5500359535217285, kdl 2.6625893115997314
Vae step 0/4, mse 7.011898040771484, kdl 2.9688644409179688
Vae step 0/4, mse 16.618043899536133, kdl 2.9650604724884033
Vae step 0/4, mse 7.667822360992432, kdl 3.0054051876068115
Vae step 0/4, mse 5.9601850509643555, kdl 2.7289817333221436
Vae step 0/4, mse 10.974688529968262, kdl 3.0457420349121094
Vae step 0/4, mse 6.5463175773620605, kdl 2.7804949283599854
Vae step 0/4, mse 5.7759690284729, kdl 2.614312171936035
Vae step 0/4, mse 7.749757766723633, kdl 3.0271544456481934
Vae step 0/4, mse 10.589261054992676, kdl 2.9485340118408203
Vae step 0/4, mse 11.409595489501953, kdl 2.7312278747558594
Vae step 0/4, mse 9.697640419006348, kdl 3.166224718093872
Vae step 0/4, mse 8.44920539855957, kdl 

Vae step 0/4, mse 6.518650054931641, kdl 2.831827163696289
Vae step 0/4, mse 7.630928993225098, kdl 3.5023045539855957
Vae step 0/4, mse 7.300294399261475, kdl 2.9345157146453857
Vae step 0/4, mse 12.729787826538086, kdl 3.095315456390381
Vae step 0/4, mse 7.911128997802734, kdl 3.5218563079833984
Vae step 0/4, mse 7.796476364135742, kdl 3.215122938156128
Vae step 0/4, mse 7.570777893066406, kdl 2.5299038887023926
Vae step 0/4, mse 5.705686092376709, kdl 3.4315648078918457
Vae step 0/4, mse 9.15023136138916, kdl 2.522650957107544
Vae step 0/4, mse 7.23015832901001, kdl 2.5841426849365234
Vae step 0/4, mse 7.338918685913086, kdl 3.0037479400634766
Vae step 0/4, mse 8.173906326293945, kdl 2.378997802734375
Vae step 0/4, mse 9.174220085144043, kdl 2.868584632873535
Vae step 0/4, mse 7.335684299468994, kdl 2.951002597808838
Vae step 0/4, mse 11.597671508789062, kdl 2.9009976387023926
Vae step 0/4, mse 8.656045913696289, kdl 2.630911111831665
Vae step 0/4, mse 9.113306999206543, kdl 3.53020

Vae step 0/4, mse 4.7912211418151855, kdl 2.62241268157959
Vae step 0/4, mse 7.001248359680176, kdl 2.6949071884155273
Vae step 0/4, mse 5.87733793258667, kdl 2.9836173057556152
Vae step 0/4, mse 7.027227401733398, kdl 2.9464547634124756
Vae step 0/4, mse 4.56440544128418, kdl 3.1770811080932617
Vae step 0/4, mse 8.814461708068848, kdl 3.4744935035705566
Vae step 0/4, mse 8.708450317382812, kdl 3.73586368560791
Vae step 0/4, mse 9.155593872070312, kdl 2.9120230674743652
Epoch 1850 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 117.8553823984375
Meta-testing...
Reward : 109.12897665000003
Vae step 0/4, mse 5.529279708862305, kdl 2.8635756969451904
Vae step 0/4, mse 9.960172653198242, kdl 2.947856903076172
Vae step 0/4, mse 8.59566593170166, kdl 3.55363130569458
Vae step 0/4, mse 7.548910140991211, kdl 2.4308156967163086
Vae step 0/4, mse 6.466241836547852, kdl 2.9467029571533203
Vae step 0/4, mse 5.623302936553955, kdl 2.6538138389587402
Vae step 0/4, mse 5.63036346435546

Vae step 0/4, mse 9.293098449707031, kdl 2.5286059379577637
Vae step 0/4, mse 3.9128551483154297, kdl 3.1623787879943848
Vae step 0/4, mse 5.014650821685791, kdl 2.8713784217834473
Vae step 0/4, mse 6.424896240234375, kdl 3.079519271850586
Vae step 0/4, mse 8.521562576293945, kdl 3.1059463024139404
Vae step 0/4, mse 7.874139785766602, kdl 3.6991634368896484
Vae step 0/4, mse 6.230180740356445, kdl 2.702176094055176
Vae step 0/4, mse 7.38533878326416, kdl 2.8311593532562256
Vae step 0/4, mse 9.272360801696777, kdl 2.7049777507781982
Vae step 0/4, mse 8.388409614562988, kdl 2.614088773727417
Vae step 0/4, mse 4.852052211761475, kdl 3.048985004425049
Vae step 0/4, mse 7.681288719177246, kdl 2.728153705596924
Vae step 0/4, mse 7.750273704528809, kdl 2.47737455368042
Vae step 0/4, mse 7.562464714050293, kdl 2.6777729988098145
Vae step 0/4, mse 7.569507598876953, kdl 2.9205234050750732
Vae step 0/4, mse 7.091155529022217, kdl 2.195330858230591
Vae step 0/4, mse 5.288936138153076, kdl 2.83577

In [22]:
res_eval_2, res_vae_2, test_list_2 = agent.train(training_iter=2000,
                                                   env_name=env_name,
                                                   seed=0,
                                                   task_generator=task_generator,
                                                   eval_interval=50,
                                                   log_dir=".",
                                                   use_env_obs=False,
                                                   num_vae_steps=4,
                                                   gp_list=gp_list,
                                                   sw_size=20,
                                                   test_kwargs=test_kwargs,
                                                   init_prior_test=init_prior_test,
                                                   num_random_task_to_eval=128,
                                                   num_test_processes=2)


Vae step 0/4, mse 7.343837261199951, kdl 2.574493169784546
Vae step 0/4, mse 5.210126876831055, kdl 2.4658641815185547
Vae step 0/4, mse 7.2474822998046875, kdl 2.5920443534851074
Vae step 0/4, mse 4.469850540161133, kdl 3.3561666011810303
Vae step 0/4, mse 10.24297046661377, kdl 3.1275038719177246
Vae step 0/4, mse 5.8895769119262695, kdl 2.5737178325653076
Vae step 0/4, mse 5.620012283325195, kdl 2.638986825942993
Vae step 0/4, mse 9.849235534667969, kdl 3.4117841720581055
Vae step 0/4, mse 7.74678373336792, kdl 2.7908308506011963
Vae step 0/4, mse 7.062656879425049, kdl 2.233008623123169
Vae step 0/4, mse 6.75533390045166, kdl 2.601968288421631
Vae step 0/4, mse 7.217862129211426, kdl 2.442674160003662
Vae step 0/4, mse 7.03381872177124, kdl 2.9014530181884766
Vae step 0/4, mse 7.824925899505615, kdl 2.97751522064209
Vae step 0/4, mse 7.8158793449401855, kdl 3.0830652713775635
Vae step 0/4, mse 7.690583229064941, kdl 2.8347256183624268
Vae step 0/4, mse 6.335115909576416, kdl 2.9194

Vae step 0/4, mse 6.113187313079834, kdl 3.3205935955047607
Vae step 0/4, mse 5.716949462890625, kdl 3.3787288665771484
Vae step 0/4, mse 6.167819023132324, kdl 2.765195608139038
Vae step 0/4, mse 5.977693557739258, kdl 3.1134724617004395
Vae step 0/4, mse 7.745462417602539, kdl 3.3657076358795166
Vae step 0/4, mse 6.708592414855957, kdl 3.070754289627075
Vae step 0/4, mse 4.632443904876709, kdl 3.5951576232910156
Vae step 0/4, mse 6.522255897521973, kdl 3.6229748725891113
Vae step 0/4, mse 8.064404487609863, kdl 3.1915178298950195
Vae step 0/4, mse 7.709693908691406, kdl 2.614180088043213
Vae step 0/4, mse 4.432231426239014, kdl 3.3505773544311523
Vae step 0/4, mse 6.673807144165039, kdl 2.995836019515991
Vae step 0/4, mse 10.801969528198242, kdl 2.5181405544281006
Vae step 0/4, mse 5.810070991516113, kdl 3.498779773712158
Vae step 0/4, mse 8.441621780395508, kdl 3.144742727279663
Vae step 0/4, mse 8.32873821258545, kdl 2.3033804893493652
Vae step 0/4, mse 6.043818950653076, kdl 3.288

Vae step 0/4, mse 9.3004732131958, kdl 2.8619167804718018
Vae step 0/4, mse 7.305104732513428, kdl 2.6066319942474365
Vae step 0/4, mse 8.0192232131958, kdl 3.157196283340454
Vae step 0/4, mse 5.406189918518066, kdl 3.018296480178833
Vae step 0/4, mse 8.305510520935059, kdl 2.686422348022461
Vae step 0/4, mse 7.0665059089660645, kdl 2.566401720046997
Vae step 0/4, mse 5.453456401824951, kdl 3.454995632171631
Vae step 0/4, mse 5.619857311248779, kdl 3.0361456871032715
Vae step 0/4, mse 5.894598960876465, kdl 2.8945069313049316
Vae step 0/4, mse 5.368599891662598, kdl 2.8943095207214355
Vae step 0/4, mse 7.333088397979736, kdl 3.322802782058716
Vae step 0/4, mse 8.519689559936523, kdl 3.2816388607025146
Vae step 0/4, mse 7.082638740539551, kdl 2.8198401927948
Vae step 0/4, mse 7.279982089996338, kdl 3.1546053886413574
Vae step 0/4, mse 7.427539825439453, kdl 3.546602964401245
Vae step 0/4, mse 7.866940498352051, kdl 2.5634374618530273
Vae step 0/4, mse 6.748720169067383, kdl 2.9657752513

Vae step 0/4, mse 5.52536153793335, kdl 2.523151397705078
Epoch 400 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 108.0717963203125
Meta-testing...
Reward : 67.91036962999999
Vae step 0/4, mse 4.923345565795898, kdl 2.5108768939971924
Vae step 0/4, mse 6.003870010375977, kdl 3.1184070110321045
Vae step 0/4, mse 6.814378261566162, kdl 2.6478960514068604
Vae step 0/4, mse 5.951251983642578, kdl 2.636256694793701
Vae step 0/4, mse 7.949939250946045, kdl 2.8336801528930664
Vae step 0/4, mse 8.945473670959473, kdl 2.52524471282959
Vae step 0/4, mse 6.972136497497559, kdl 2.6652040481567383
Vae step 0/4, mse 6.586102485656738, kdl 2.295778274536133
Vae step 0/4, mse 6.3399200439453125, kdl 3.1014809608459473
Vae step 0/4, mse 6.748938083648682, kdl 3.8646795749664307
Vae step 0/4, mse 6.316578388214111, kdl 2.5198001861572266
Vae step 0/4, mse 6.444257736206055, kdl 2.770892858505249
Vae step 0/4, mse 6.618334770202637, kdl 3.350494384765625
Vae step 0/4, mse 7.00471496582031

Vae step 0/4, mse 7.9714202880859375, kdl 2.8731935024261475
Vae step 0/4, mse 7.943186283111572, kdl 3.8120288848876953
Vae step 0/4, mse 8.425458908081055, kdl 2.8161840438842773
Vae step 0/4, mse 7.678215503692627, kdl 3.5777461528778076
Vae step 0/4, mse 7.1487717628479, kdl 2.61584210395813
Vae step 0/4, mse 10.20211124420166, kdl 3.3590409755706787
Vae step 0/4, mse 5.650498867034912, kdl 2.7293148040771484
Vae step 0/4, mse 6.757545471191406, kdl 2.9161486625671387
Vae step 0/4, mse 7.752709865570068, kdl 3.0376265048980713
Vae step 0/4, mse 9.509514808654785, kdl 2.856544256210327
Vae step 0/4, mse 6.161365985870361, kdl 2.561291456222534
Vae step 0/4, mse 7.353151321411133, kdl 2.9473297595977783
Vae step 0/4, mse 5.756291389465332, kdl 2.605712413787842
Vae step 0/4, mse 5.7693986892700195, kdl 2.410001754760742
Vae step 0/4, mse 5.941762924194336, kdl 3.2137296199798584
Vae step 0/4, mse 8.218184471130371, kdl 2.6666159629821777
Vae step 0/4, mse 6.045374870300293, kdl 3.398

Vae step 0/4, mse 8.209456443786621, kdl 2.8134765625
Vae step 0/4, mse 7.399439334869385, kdl 2.7529053688049316
Vae step 0/4, mse 6.79928731918335, kdl 2.671405076980591
Vae step 0/4, mse 5.904838562011719, kdl 2.5197925567626953
Vae step 0/4, mse 6.5288543701171875, kdl 3.027090549468994
Vae step 0/4, mse 9.340578079223633, kdl 2.494642496109009
Vae step 0/4, mse 6.1682233810424805, kdl 2.947453498840332
Vae step 0/4, mse 6.031802654266357, kdl 3.0134623050689697
Vae step 0/4, mse 7.762882232666016, kdl 2.891458034515381
Vae step 0/4, mse 8.03333568572998, kdl 3.212937831878662
Vae step 0/4, mse 9.039450645446777, kdl 2.853034496307373
Vae step 0/4, mse 6.699115753173828, kdl 2.482715606689453
Vae step 0/4, mse 5.45381498336792, kdl 2.802922487258911
Vae step 0/4, mse 14.65937614440918, kdl 2.950798511505127
Vae step 0/4, mse 9.018275260925293, kdl 2.686840295791626
Vae step 0/4, mse 8.874913215637207, kdl 2.5471818447113037
Vae step 0/4, mse 8.307243347167969, kdl 3.072624683380127

Vae step 0/4, mse 10.704487800598145, kdl 2.946566343307495
Vae step 0/4, mse 6.78461217880249, kdl 2.8116114139556885
Vae step 0/4, mse 7.710818290710449, kdl 3.1027028560638428
Vae step 0/4, mse 8.110950469970703, kdl 2.552055597305298
Epoch 800 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 126.67110846093749
Meta-testing...
Reward : 99.93442926000002
Vae step 0/4, mse 10.143607139587402, kdl 2.895275592803955
Vae step 0/4, mse 10.198801040649414, kdl 3.1418566703796387
Vae step 0/4, mse 8.14879035949707, kdl 2.965392589569092
Vae step 0/4, mse 7.484044075012207, kdl 2.969351291656494
Vae step 0/4, mse 6.516819477081299, kdl 2.4113073348999023
Vae step 0/4, mse 5.760363578796387, kdl 2.9769487380981445
Vae step 0/4, mse 10.536385536193848, kdl 2.821023941040039
Vae step 0/4, mse 6.882938385009766, kdl 2.9048235416412354
Vae step 0/4, mse 7.2310991287231445, kdl 3.0789430141448975
Vae step 0/4, mse 5.4777703285217285, kdl 2.589855670928955
Vae step 0/4, mse 9.064105987

Vae step 0/4, mse 8.99775505065918, kdl 3.347203493118286
Vae step 0/4, mse 5.943190574645996, kdl 2.718841075897217
Vae step 0/4, mse 7.699095726013184, kdl 2.999964714050293
Vae step 0/4, mse 6.554554462432861, kdl 2.9241886138916016
Vae step 0/4, mse 5.148583889007568, kdl 3.12723970413208
Vae step 0/4, mse 8.237791061401367, kdl 3.1216065883636475
Vae step 0/4, mse 6.633694648742676, kdl 3.6459600925445557
Vae step 0/4, mse 5.597478866577148, kdl 2.5705008506774902
Vae step 0/4, mse 6.159429550170898, kdl 2.7642784118652344
Vae step 0/4, mse 9.367013931274414, kdl 2.9485924243927
Vae step 0/4, mse 6.323282718658447, kdl 3.0952835083007812
Vae step 0/4, mse 9.489645957946777, kdl 3.106337070465088
Vae step 0/4, mse 8.467988967895508, kdl 3.0464890003204346
Vae step 0/4, mse 9.489594459533691, kdl 2.370126962661743
Vae step 0/4, mse 6.448522567749023, kdl 2.7025856971740723
Vae step 0/4, mse 7.996782302856445, kdl 3.4007906913757324
Vae step 0/4, mse 8.685415267944336, kdl 2.72201275

Vae step 0/4, mse 7.165818691253662, kdl 2.811643123626709
Vae step 0/4, mse 6.45203971862793, kdl 2.810629367828369
Vae step 0/4, mse 6.033267974853516, kdl 3.0869433879852295
Vae step 0/4, mse 9.646774291992188, kdl 2.771209716796875
Vae step 0/4, mse 8.460061073303223, kdl 3.1404452323913574
Vae step 0/4, mse 7.364805698394775, kdl 2.35971999168396
Vae step 0/4, mse 9.084712028503418, kdl 2.3732776641845703
Vae step 0/4, mse 9.727560997009277, kdl 3.0607643127441406
Vae step 0/4, mse 6.303809642791748, kdl 3.1374475955963135
Vae step 0/4, mse 6.478312015533447, kdl 3.233959197998047
Vae step 0/4, mse 8.760457038879395, kdl 2.576078414916992
Vae step 0/4, mse 8.728157997131348, kdl 3.1744630336761475
Vae step 0/4, mse 10.571073532104492, kdl 2.726532459259033
Vae step 0/4, mse 10.93608283996582, kdl 3.2199227809906006
Vae step 0/4, mse 7.392214775085449, kdl 2.8515825271606445
Vae step 0/4, mse 6.727571487426758, kdl 3.0653748512268066
Vae step 0/4, mse 5.075529098510742, kdl 2.83258

Vae step 0/4, mse 5.621277332305908, kdl 3.026906728744507
Vae step 0/4, mse 5.396453857421875, kdl 3.3697967529296875
Vae step 0/4, mse 6.207668304443359, kdl 2.363746404647827
Vae step 0/4, mse 7.6954545974731445, kdl 3.1917717456817627
Vae step 0/4, mse 7.572698593139648, kdl 2.8627982139587402
Vae step 0/4, mse 7.798908233642578, kdl 2.8094394207000732
Vae step 0/4, mse 6.894210338592529, kdl 2.767352342605591
Epoch 1200 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 122.174431328125
Meta-testing...
Reward : 101.26710625
Vae step 0/4, mse 7.530048370361328, kdl 2.6675243377685547
Vae step 0/4, mse 8.049074172973633, kdl 2.705947160720825
Vae step 0/4, mse 7.0806498527526855, kdl 2.865448236465454
Vae step 0/4, mse 6.384624481201172, kdl 2.9550397396087646
Vae step 0/4, mse 7.217679977416992, kdl 2.949275255203247
Vae step 0/4, mse 5.671003818511963, kdl 2.8308961391448975
Vae step 0/4, mse 6.833995342254639, kdl 3.0802714824676514
Vae step 0/4, mse 5.841697692871094,

Vae step 0/4, mse 7.4406914710998535, kdl 3.5325372219085693
Vae step 0/4, mse 8.151514053344727, kdl 2.333261728286743
Vae step 0/4, mse 7.295662879943848, kdl 2.792921781539917
Vae step 0/4, mse 9.786114692687988, kdl 3.053640365600586
Vae step 0/4, mse 5.023075103759766, kdl 2.889939785003662
Vae step 0/4, mse 11.205367088317871, kdl 3.4743077754974365
Vae step 0/4, mse 11.454936027526855, kdl 3.075892448425293
Vae step 0/4, mse 8.763111114501953, kdl 3.3213717937469482
Vae step 0/4, mse 9.331363677978516, kdl 3.163073778152466
Vae step 0/4, mse 6.144693851470947, kdl 2.981806516647339
Vae step 0/4, mse 14.300518035888672, kdl 2.3284404277801514
Vae step 0/4, mse 8.027176856994629, kdl 3.2216577529907227
Vae step 0/4, mse 6.020644187927246, kdl 2.318370819091797
Vae step 0/4, mse 6.694007873535156, kdl 3.0384268760681152
Vae step 0/4, mse 7.239784240722656, kdl 2.6383631229400635
Vae step 0/4, mse 6.298065185546875, kdl 3.4082376956939697
Vae step 0/4, mse 7.823929786682129, kdl 2.4

Vae step 0/4, mse 5.547783374786377, kdl 2.96492862701416
Vae step 0/4, mse 5.801102638244629, kdl 3.144407272338867
Vae step 0/4, mse 7.220664978027344, kdl 3.256040334701538
Vae step 0/4, mse 8.089076042175293, kdl 2.5975961685180664
Vae step 0/4, mse 7.902414798736572, kdl 2.6356377601623535
Vae step 0/4, mse 4.773592948913574, kdl 2.7165026664733887
Vae step 0/4, mse 8.77926254272461, kdl 2.642951726913452
Vae step 0/4, mse 7.633118152618408, kdl 3.139890670776367
Vae step 0/4, mse 8.431478500366211, kdl 2.63411021232605
Vae step 0/4, mse 6.601566791534424, kdl 2.775742769241333
Vae step 0/4, mse 9.417366981506348, kdl 2.6310863494873047
Vae step 0/4, mse 6.722869396209717, kdl 2.6960806846618652
Vae step 0/4, mse 7.712591171264648, kdl 3.6544370651245117
Vae step 0/4, mse 6.8983073234558105, kdl 3.110785722732544
Vae step 0/4, mse 6.617812633514404, kdl 3.0254297256469727
Vae step 0/4, mse 5.781460762023926, kdl 2.8477911949157715
Vae step 0/4, mse 6.406915664672852, kdl 2.5852313

Vae step 0/4, mse 6.280129432678223, kdl 3.2049427032470703
Vae step 0/4, mse 10.907388687133789, kdl 2.578474521636963
Vae step 0/4, mse 6.478519916534424, kdl 3.1483659744262695
Vae step 0/4, mse 7.5880022048950195, kdl 3.44266414642334
Vae step 0/4, mse 5.765052795410156, kdl 3.3409643173217773
Vae step 0/4, mse 9.074905395507812, kdl 3.1540637016296387
Vae step 0/4, mse 6.419805526733398, kdl 2.763709306716919
Vae step 0/4, mse 7.484773635864258, kdl 2.8067049980163574
Vae step 0/4, mse 8.298487663269043, kdl 2.7571628093719482
Vae step 0/4, mse 9.588642120361328, kdl 3.3927221298217773
Epoch 1600 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 119.19900707812499
Meta-testing...
Reward : 96.59564775999999
Vae step 0/4, mse 8.810943603515625, kdl 3.003084659576416
Vae step 0/4, mse 8.702391624450684, kdl 3.1402134895324707
Vae step 0/4, mse 7.083219528198242, kdl 2.972639322280884
Vae step 0/4, mse 5.857175827026367, kdl 2.9937379360198975
Vae step 0/4, mse 5.139454364

Vae step 0/4, mse 6.478126049041748, kdl 2.634031295776367
Vae step 0/4, mse 12.113417625427246, kdl 2.682645797729492
Vae step 0/4, mse 9.116436958312988, kdl 2.687236785888672
Vae step 0/4, mse 9.309091567993164, kdl 2.813558578491211
Vae step 0/4, mse 4.563925266265869, kdl 3.0058836936950684
Vae step 0/4, mse 7.809933185577393, kdl 2.3524346351623535
Vae step 0/4, mse 8.494946479797363, kdl 2.7210779190063477
Vae step 0/4, mse 8.953497886657715, kdl 3.1789164543151855
Vae step 0/4, mse 6.988937854766846, kdl 3.66921329498291
Vae step 0/4, mse 10.624361038208008, kdl 2.527357816696167
Vae step 0/4, mse 5.496981620788574, kdl 2.4539763927459717
Vae step 0/4, mse 7.480201721191406, kdl 3.193119764328003
Vae step 0/4, mse 8.792229652404785, kdl 3.07462739944458
Vae step 0/4, mse 7.900045871734619, kdl 2.4649136066436768
Vae step 0/4, mse 7.412431716918945, kdl 2.380153179168701
Vae step 0/4, mse 6.593869209289551, kdl 3.023341417312622
Vae step 0/4, mse 6.242206573486328, kdl 2.7840273

Vae step 0/4, mse 7.026490211486816, kdl 2.607923984527588
Vae step 0/4, mse 6.372658729553223, kdl 2.9324541091918945
Vae step 0/4, mse 8.306665420532227, kdl 3.0070388317108154
Vae step 0/4, mse 7.43083381652832, kdl 2.8783631324768066
Vae step 0/4, mse 5.900120735168457, kdl 3.0164635181427
Vae step 0/4, mse 5.0518951416015625, kdl 2.2846591472625732
Vae step 0/4, mse 5.956830024719238, kdl 2.6319973468780518
Vae step 0/4, mse 10.969185829162598, kdl 3.149555206298828
Vae step 0/4, mse 7.937525272369385, kdl 3.105978012084961
Vae step 0/4, mse 7.381074905395508, kdl 2.9933180809020996
Vae step 0/4, mse 6.981295108795166, kdl 3.4605674743652344
Vae step 0/4, mse 7.677556991577148, kdl 3.149883508682251
Vae step 0/4, mse 5.824305057525635, kdl 3.6360857486724854
Vae step 0/4, mse 7.012378215789795, kdl 3.0218207836151123
Vae step 0/4, mse 7.067934989929199, kdl 3.1572132110595703
Vae step 0/4, mse 9.075034141540527, kdl 2.4442009925842285
Vae step 0/4, mse 7.0874762535095215, kdl 2.89

Vae step 0/4, mse 7.7380900382995605, kdl 2.5440855026245117
Vae step 0/4, mse 7.113489151000977, kdl 3.2632718086242676
Vae step 0/4, mse 10.384842872619629, kdl 2.469266891479492
Vae step 0/4, mse 8.20029354095459, kdl 3.6066994667053223
Vae step 0/4, mse 10.933719635009766, kdl 2.791395664215088
Vae step 0/4, mse 9.487507820129395, kdl 3.1970319747924805
Vae step 0/4, mse 8.007078170776367, kdl 3.1917195320129395
Vae step 0/4, mse 8.327980995178223, kdl 2.4829752445220947
Vae step 0/4, mse 5.013543128967285, kdl 2.642455577850342
Vae step 0/4, mse 8.920103073120117, kdl 3.3854198455810547
Vae step 0/4, mse 5.710628032684326, kdl 3.035137414932251
Vae step 0/4, mse 10.291300773620605, kdl 2.8822240829467773
Vae step 0/4, mse 5.880987644195557, kdl 2.896653652191162


In [23]:
res_eval_2, res_vae_2, test_list_2 = agent.train(training_iter=2000,
                                                   env_name=env_name,
                                                   seed=0,
                                                   task_generator=task_generator,
                                                   eval_interval=50,
                                                   log_dir=".",
                                                   use_env_obs=False,
                                                   num_vae_steps=4,
                                                   gp_list=gp_list,
                                                   sw_size=20,
                                                   test_kwargs=test_kwargs,
                                                   init_prior_test=init_prior_test,
                                                   num_random_task_to_eval=128,
                                                   num_test_processes=2)


Vae step 0/4, mse 9.751439094543457, kdl 3.4959182739257812
Vae step 0/4, mse 10.051315307617188, kdl 2.411655902862549
Vae step 0/4, mse 5.869720935821533, kdl 3.5335206985473633
Vae step 0/4, mse 6.467080593109131, kdl 3.1840968132019043
Vae step 0/4, mse 5.962515354156494, kdl 2.623443603515625
Vae step 0/4, mse 10.257915496826172, kdl 3.093611240386963
Vae step 0/4, mse 7.229539394378662, kdl 3.300877571105957
Vae step 0/4, mse 8.46236801147461, kdl 3.578124761581421
Vae step 0/4, mse 7.7498369216918945, kdl 3.4087305068969727
Vae step 0/4, mse 6.093611717224121, kdl 2.8441808223724365
Vae step 0/4, mse 7.710565567016602, kdl 2.4141483306884766
Vae step 0/4, mse 6.499960899353027, kdl 3.272643566131592
Vae step 0/4, mse 8.758105278015137, kdl 2.729245662689209
Vae step 0/4, mse 5.188764572143555, kdl 2.5928874015808105
Vae step 0/4, mse 5.538147449493408, kdl 3.1463406085968018
Vae step 0/4, mse 7.899267196655273, kdl 2.6748156547546387
Vae step 0/4, mse 6.046120643615723, kdl 3.06

Vae step 0/4, mse 8.4801025390625, kdl 2.7089061737060547
Vae step 0/4, mse 5.859566688537598, kdl 3.501232147216797
Vae step 0/4, mse 7.394027233123779, kdl 2.8060598373413086
Vae step 0/4, mse 6.917903423309326, kdl 2.90401029586792
Vae step 0/4, mse 9.41043472290039, kdl 2.937570810317993
Vae step 0/4, mse 6.443066596984863, kdl 2.454773426055908
Vae step 0/4, mse 7.657135009765625, kdl 2.561030864715576
Vae step 0/4, mse 7.841577053070068, kdl 2.6175458431243896
Vae step 0/4, mse 8.890186309814453, kdl 2.751359701156616
Vae step 0/4, mse 11.639399528503418, kdl 2.8499114513397217
Vae step 0/4, mse 7.5078887939453125, kdl 2.5279793739318848
Vae step 0/4, mse 9.23965072631836, kdl 2.977912425994873
Vae step 0/4, mse 8.274102210998535, kdl 2.611182451248169
Vae step 0/4, mse 9.552855491638184, kdl 3.1994056701660156
Vae step 0/4, mse 7.855503559112549, kdl 2.483145236968994
Vae step 0/4, mse 5.2378387451171875, kdl 3.417801856994629
Vae step 0/4, mse 6.843834400177002, kdl 3.016473293

Vae step 0/4, mse 7.772951126098633, kdl 2.5053369998931885
Vae step 0/4, mse 8.498039245605469, kdl 2.847071409225464
Vae step 0/4, mse 6.24040412902832, kdl 2.945680618286133
Vae step 0/4, mse 6.418949604034424, kdl 3.0284054279327393
Vae step 0/4, mse 5.083146572113037, kdl 2.688422441482544
Vae step 0/4, mse 4.920773983001709, kdl 2.7534122467041016
Vae step 0/4, mse 9.468501091003418, kdl 3.387179136276245
Vae step 0/4, mse 7.388369560241699, kdl 2.8961243629455566
Vae step 0/4, mse 8.28569507598877, kdl 2.786276340484619
Vae step 0/4, mse 8.602110862731934, kdl 3.3076109886169434
Vae step 0/4, mse 9.952095031738281, kdl 1.967398762702942
Vae step 0/4, mse 5.608614921569824, kdl 3.9366567134857178
Vae step 0/4, mse 8.248738288879395, kdl 3.0567572116851807
Vae step 0/4, mse 6.534206390380859, kdl 3.195603370666504
Vae step 0/4, mse 8.069591522216797, kdl 2.682138204574585
Vae step 0/4, mse 11.34082317352295, kdl 2.6640493869781494
Vae step 0/4, mse 5.2673187255859375, kdl 2.615679

Vae step 0/4, mse 7.419111251831055, kdl 2.4637274742126465
Epoch 400 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 129.8403473515625
Meta-testing...
Reward : 74.51539779999999
Vae step 0/4, mse 11.875505447387695, kdl 3.6775596141815186
Vae step 0/4, mse 10.250069618225098, kdl 2.709946632385254
Vae step 0/4, mse 10.258910179138184, kdl 2.889423370361328
Vae step 0/4, mse 5.679728031158447, kdl 2.241917133331299
Vae step 0/4, mse 8.59101390838623, kdl 2.971108913421631
Vae step 0/4, mse 6.868757724761963, kdl 2.5814037322998047
Vae step 0/4, mse 5.432216644287109, kdl 2.6159632205963135
Vae step 0/4, mse 8.831299781799316, kdl 3.145104169845581
Vae step 0/4, mse 8.886255264282227, kdl 2.820103883743286
Vae step 0/4, mse 9.158129692077637, kdl 2.7351126670837402
Vae step 0/4, mse 6.977162837982178, kdl 2.817701578140259
Vae step 0/4, mse 7.464725017547607, kdl 3.2332730293273926
Vae step 0/4, mse 9.710805892944336, kdl 2.5272767543792725
Vae step 0/4, mse 7.786715030670

ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  _check_optimize_result("lbfgs", opt_res)


Reward : 61.42339785
Vae step 0/4, mse 7.904296398162842, kdl 2.7060909271240234
Vae step 0/4, mse 6.2627716064453125, kdl 3.111698627471924
Vae step 0/4, mse 9.098939895629883, kdl 2.7577145099639893
Vae step 0/4, mse 7.241929054260254, kdl 2.90433406829834
Vae step 0/4, mse 7.392210483551025, kdl 2.926332950592041
Vae step 0/4, mse 6.344212055206299, kdl 2.5596232414245605
Vae step 0/4, mse 8.234879493713379, kdl 2.5063774585723877
Vae step 0/4, mse 8.425483703613281, kdl 3.193262815475464
Vae step 0/4, mse 8.939667701721191, kdl 2.684499979019165
Vae step 0/4, mse 9.133851051330566, kdl 3.156374454498291
Vae step 0/4, mse 6.658515930175781, kdl 3.38533353805542
Vae step 0/4, mse 7.382264137268066, kdl 3.6701879501342773
Vae step 0/4, mse 5.759895324707031, kdl 3.0676193237304688
Vae step 0/4, mse 9.499330520629883, kdl 2.8889379501342773
Vae step 0/4, mse 9.251226425170898, kdl 2.340785503387451
Vae step 0/4, mse 7.4937825202941895, kdl 2.8092880249023438
Vae step 0/4, mse 6.5955200

Vae step 0/4, mse 6.043217658996582, kdl 3.389954090118408
Vae step 0/4, mse 7.616166114807129, kdl 2.829009532928467
Vae step 0/4, mse 8.963493347167969, kdl 2.425400972366333
Vae step 0/4, mse 10.154635429382324, kdl 2.7598702907562256
Vae step 0/4, mse 12.605466842651367, kdl 3.476060390472412
Vae step 0/4, mse 8.762833595275879, kdl 3.6181466579437256
Vae step 0/4, mse 7.961399555206299, kdl 3.3399832248687744
Vae step 0/4, mse 6.451181411743164, kdl 2.785449743270874
Vae step 0/4, mse 7.215414524078369, kdl 2.39308762550354
Vae step 0/4, mse 6.012493133544922, kdl 3.9592530727386475
Vae step 0/4, mse 8.355207443237305, kdl 2.5966339111328125
Vae step 0/4, mse 10.139387130737305, kdl 3.1514785289764404
Vae step 0/4, mse 8.172745704650879, kdl 2.8305206298828125
Vae step 0/4, mse 5.204066276550293, kdl 3.4141626358032227
Vae step 0/4, mse 12.505121231079102, kdl 2.163372278213501
Vae step 0/4, mse 8.653485298156738, kdl 2.679548501968384
Vae step 0/4, mse 6.740413665771484, kdl 2.98

Vae step 0/4, mse 8.881503105163574, kdl 2.9702212810516357
Vae step 0/4, mse 7.156129837036133, kdl 2.5876877307891846
Vae step 0/4, mse 5.163532733917236, kdl 2.832887887954712
Vae step 0/4, mse 7.811436653137207, kdl 2.3604071140289307
Vae step 0/4, mse 9.60960578918457, kdl 3.067638397216797
Vae step 0/4, mse 7.596312999725342, kdl 3.466367483139038
Vae step 0/4, mse 8.438685417175293, kdl 3.003753662109375
Vae step 0/4, mse 8.132925033569336, kdl 2.9089064598083496
Vae step 0/4, mse 7.656960964202881, kdl 2.7881734371185303
Vae step 0/4, mse 13.174853324890137, kdl 2.6844630241394043
Vae step 0/4, mse 5.5645551681518555, kdl 2.319483518600464
Vae step 0/4, mse 8.032599449157715, kdl 2.893440008163452
Vae step 0/4, mse 7.7876434326171875, kdl 2.715339422225952
Vae step 0/4, mse 5.85662317276001, kdl 2.995751142501831
Vae step 0/4, mse 6.881711006164551, kdl 2.8068461418151855
Vae step 0/4, mse 7.345969200134277, kdl 2.9992551803588867
Vae step 0/4, mse 8.342825889587402, kdl 2.8652

Vae step 0/4, mse 8.827384948730469, kdl 2.886807680130005
Vae step 0/4, mse 6.910350322723389, kdl 2.9326558113098145
Epoch 900 / 2000
Evaluation...
Evaluation using 128 tasks. Mean reward: 108.06703729687501
Meta-testing...
Reward : 94.20161200000001
Vae step 0/4, mse 9.536104202270508, kdl 2.520228147506714
Vae step 0/4, mse 3.8700432777404785, kdl 3.239917278289795
Vae step 0/4, mse 7.374723434448242, kdl 3.2714834213256836
Vae step 0/4, mse 7.4725141525268555, kdl 3.0042197704315186
Vae step 0/4, mse 7.9714860916137695, kdl 3.295475959777832
Vae step 0/4, mse 7.0416669845581055, kdl 2.5028538703918457
Vae step 0/4, mse 11.646201133728027, kdl 2.875075340270996
Vae step 0/4, mse 7.979957103729248, kdl 2.7336788177490234
Vae step 0/4, mse 11.545722007751465, kdl 2.485931873321533
Vae step 0/4, mse 5.5733795166015625, kdl 2.568727731704712
Vae step 0/4, mse 6.914723873138428, kdl 2.4992146492004395
Vae step 0/4, mse 7.96389627456665, kdl 2.885913372039795
Vae step 0/4, mse 8.84846305

KeyboardInterrupt: 