In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import matplotlib as mp
import matplotlib.animation as animation
import numpy as np
import IPython

from envs.Quadrotor import Quadrotor

In [3]:
m = 1.0
I = 1.0
r = 1.0

quadrotor = Quadrotor(m, I, r)

In [4]:
quadrotor = Quadrotor(m, I, r)
u = np.array([10, 10])

for i in range(50):
    quadrotor.step(u)

In [5]:
fig = mp.figure.Figure(figsize=[8, 8])
mp.backends.backend_agg.FigureCanvasAgg(fig)
ax = fig.add_subplot(111, autoscale_on=False, xlim=[-2, 12], ylim=[-2, 12])
ax.grid()

#create the quadrotor
center, = ax.plot([], [], 'k', marker="o")
lines = []
    
for i in range(8):
    line, = ax.plot([], [], 'k', lw=2)
    lines.append(line)

In [6]:
def drawQuadrotor(quadrotor, ax, center, lines, t):
    r = quadrotor.r
    h = r/2
    x, y, theta, u, v, omega = quadrotor.x
    
    for line in lines: #reset all lines
        line.set_data([],[])
    
    R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    t = np.array([[x], [y]])
    
    A = np.array([-r, h/2])
    B = np.array([ r, h/2])
    C = np.array([ r,-h/2])
    D = np.array([-r,-h/2])
    
    E = np.array([-r+h/2, h/2])
    F = np.array([-r+h/2, h   ])
    G = np.array([ r-h/2, h/2])
    H = np.array([ r-h/2, h   ])
    
    I = np.array([F[0]-h*np.cos(np.pi*t), F[1]])
    J = np.array([F[0]+h*np.cos(np.pi*t), F[1]])
    K = np.array([H[0]-h*np.cos(np.pi*t), H[1]])
    L = np.array([H[0]+h*np.cos(np.pi*t), H[1]])
    
    coords = np.vstack([A, B, C, D, E, F, G, H, I, J, K, L])
    coords = coords.T
    
    coords = R @ coords + t
    
    A = coords[:, 0]
    B = coords[:, 1]
    C = coords[:, 2]
    D = coords[:, 3]
    E = coords[:, 4]
    F = coords[:, 5]
    G = coords[:, 6]
    H = coords[:, 7]
    I = coords[:, 8]
    J = coords[:, 9]
    K = coords[:, 10]
    L = coords[:, 11]
    
    center.set_data([x], [y])
    
    lines[0].set_data([A[0], B[0]], [A[1], B[1]])
    lines[1].set_data([B[0], C[0]], [B[1], C[1]])
    lines[2].set_data([C[0], D[0]], [C[1], D[1]])
    lines[3].set_data([A[0], D[0]], [A[1], D[1]])
    
    lines[4].set_data([E[0], F[0]], [E[1], F[1]])
    lines[5].set_data([G[0], H[0]], [G[1], H[1]])
    
    lines[6].set_data([I[0], J[0]], [I[1], J[1]])
    lines[7].set_data([K[0], L[0]], [K[1], L[1]])
    
    return lines

In [7]:
def animate_quadrotor(quadrotor, controller, horizon):
    dt = quadrotor.dt
    
    fig = mp.figure.Figure(figsize=[12, 12])
    mp.backends.backend_agg.FigureCanvasAgg(fig)
    ax = fig.add_subplot(111, autoscale_on=False, xlim=[-2, 42], ylim=[-2, 42])
    ax.grid()

    #create the quadrotor
    center, = ax.plot([], [], 'k', marker="o")
    lines = []

    for i in range(8):
        line, = ax.plot([], [], 'k', lw=2)
        lines.append(line)
    
    # simulate with controller
    def animate(i):
        nonlocal lines
        t = dt * i
        lines = drawQuadrotor(quadrotor, ax, center, lines, t)
        u = controller(quadrotor.x)
        quadrotor.step(u)
        return lines
        
    def init():
        return animate(0)
    
    ani = animation.FuncAnimation(fig, animate, np.arange(0, horizon),
        interval=1000*dt, blit=True, init_func=init)
    plt.close(fig)
    plt.close(ani._fig)
    IPython.display.display_html(IPython.core.display.HTML(ani.to_html5_video()))

In [8]:
def controller(x):
    return np.random.randint(0, 20, 2)

In [9]:
quadrotor = Quadrotor(m, I, r)

# animate_quadrotor(quadrotor, lambda x: np.array([5, 5]), 1000)

# Debug on Buffer

In [10]:
from infrastructure.buffer import ReplayBuffer

In [11]:
buffer = ReplayBuffer()

quadrotor = Quadrotor(m, I, r)

x = quadrotor.reset()
print(x)
terminal = False
i = 0

while not terminal:
    u = controller(x)
    x_next, cost, terminal = quadrotor.step(u)
    buffer.push((x, u, cost, x_next, terminal))
    
    x = x_next
    i += 1

print(f"Simulation ends in {i} steps")

[0. 0. 0. 0. 0. 0.]
Simulation ends in 67 steps


# Debug on Agent

In [21]:
from utils.util import build_network
from agents.pg_agent import PGAgent

import tensorflow as tf

In [22]:
n_states = 6
n_actions = 2

q_network = build_network(n_states+n_actions, 1, 4, 512)
p_network = build_network(n_states, n_actions, 4, 512, "relu")

pg = PGAgent(q_network, p_network, 0.99, 0.1, 1e-5, 1e-5)

# animate_quadrotor(quadrotor, pg, 1000)

In [23]:
# tf.keras.backend.set_floatx('float64')

quadrotor = Quadrotor(m, I, r)
buffer = ReplayBuffer(10**4)

print("Start pre-sampling")
for t in range(10):
    x = quadrotor.reset()
    terminal = False
    i = 0
    
    while not terminal:
        u = np.random.rand(2) * 20
        x_next, cost, terminal = quadrotor.step(u)
        buffer.push((x, u, cost, x_next, terminal))
        x = x_next
        
        i += 1
    
    print(f"Simulation ends in {i} steps")

for t in range(1000):
    print(f"Episode {t} begins...")
    x = quadrotor.reset()
    terminal = False
    i = 0
    
    mu = 10*np.exp(- t/100)
    std = np.exp(- t/100)
    
    while not terminal:
        u = pg(x)
#         u = u[0]
        noise = np.random.normal(loc=mu, scale=std, size=2)
        print(f"Current x: {(x[0], x[1], x[2])}, u: {u}, noise: {noise}")
        u += noise
        u[u<0] = 0
        
        x_next, cost, terminal = quadrotor.step(u)
        buffer.push((x, u, cost, x_next, terminal))
    
        x = x_next
        i+=1
    
#     if t % 10 == 0:
        print("Start training ...")
        exps = buffer.sample(64)
        L, J = pg.train(exps)

#         pg.update_target_networks()
        
        print(f"Step: {t}, L: {L}, J: {J}")
    
    print(f"Simulation ends in {i} steps")


Start pre-sampling
Simulation ends in 60 steps
Simulation ends in 67 steps
Simulation ends in 62 steps
Simulation ends in 64 steps
Simulation ends in 65 steps
Simulation ends in 63 steps
Simulation ends in 2 steps
Simulation ends in 62 steps
Simulation ends in 59 steps
Simulation ends in 68 steps
Episode 0 begins...


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [10.57494488  8.22049028]
Start training ...
Step: 0, L: 642.1651611328125, J: 0.2938149571418762
Current x: (0.0, 0.0, 0.0), u: [0.         0.01835374], noise: [10.28847103 11.17081378]
Start training ...
Step: 0, L: 401.74737548828125, J: 0.2883332669734955
Current x: (0.0, 0.0008985434951782228, 0.0002354454040527344), u: [0.    

Step: 0, L: 407.9801330566406, J: 0.6954777836799622
Current x: (-0.0002690408264607418, 0.7680114199828404, 0.0007175097465515139), u: [0. 0.], noise: [8.83360835 9.17099813]
Start training ...
Step: 0, L: 506.9350891113281, J: 0.7314584851264954
Current x: (-0.0003175597409689042, 0.808842933178479, 0.0002497191429138186), u: [0. 0.], noise: [11.66107784 10.0305722 ]
Start training ...
Step: 0, L: 288.8062744140625, J: 0.71497642993927
Current x: (-0.00036737050354957655, 0.8504939067260905, -0.00025181045532226545), u: [0. 0.], noise: [10.54619557 10.90371574]
Start training ...
Step: 0, L: 426.2203369140625, J: 0.7074615955352783
Current x: (-0.00041772294815901233, 0.8933330452451302, -0.0005902894973754882), u: [0. 0.], noise: [ 9.50198995 10.02636266]
Start training ...
Step: 0, L: 410.57196044921875, J: 0.8405919075012207
Current x: (-0.0004675352615376139, 0.937336174998655, -0.0009645205497741699), u: [0. 0.], noise: [9.06586785 9.35451731]
Start training ...
Step: 0, L: 437.

Current x: (0.00043293004339365514, 0.31177877189218345, -0.005335911273956298), u: [0. 0.], noise: [ 9.59352368 10.6691649 ]
Start training ...
Step: 1, L: 313.1889953613281, J: 1.3915596008300781
Current x: (0.0005023088933286717, 0.3374661579302064, -0.006026474666595458), u: [0. 0.], noise: [ 9.39514587 10.58962443]
Start training ...
Step: 1, L: 289.6264343261719, J: 1.4348163604736328
Current x: (0.0005824996823222485, 0.3641987838906977, -0.006824602127075195), u: [0. 0.], noise: [10.11702703  8.93750694]
Start training ...
Step: 1, L: 203.83596801757812, J: 1.2793500423431396
Current x: (0.0006747341693187161, 0.3919488505426754, -0.007742177486419677), u: [0. 0.], noise: [9.2882466  9.04716221]
Start training ...
Step: 1, L: 420.8195495605469, J: 1.5973851680755615
Current x: (0.0007799725173209043, 0.4206233263126031, -0.008541800785064698), u: [0. 0.], noise: [8.91920161 9.39660719]
Start training ...
Step: 1, L: 303.7427062988281, J: 1.574553370475769
Current x: (0.00089940

Step: 2, L: 280.83612060546875, J: 2.2368216514587402
Current x: (-1.6776238108504014e-06, 0.026074621599607772, -0.0006735400199890137), u: [0. 0.], noise: [8.37385519 9.82319718]
Start training ...
Step: 2, L: 296.3912048339844, J: 2.1617770195007324
Current x: (-8.534406591169604e-07, 0.033617849441245226, -0.001105440616607666), u: [0. 0.], noise: [9.69078601 9.48661945]
Start training ...
Step: 2, L: 390.6058349609375, J: 2.2774088382720947
Current x: (1.1963866768599668e-06, 0.04199978207031778, -0.0016822753906250002), u: [0. 0.], noise: [ 9.33019308 11.78926558]
Start training ...
Step: 2, L: 359.12335205078125, J: 2.428126335144043
Current x: (5.366161966815112e-06, 0.05131845415875544, -0.0022386934280395512), u: [0. 0.], noise: [10.35670354 12.01836588]
Start training ...
Step: 2, L: 224.72654724121094, J: 2.149566411972046
Current x: (1.3088810220477306e-05, 0.06176806917395919, -0.003041018676757813), u: [0. 0.], noise: [9.05290974 8.25242723]
Start training ...
Step: 2, L

Step: 2, L: 261.2224426269531, J: 3.112239360809326
Current x: (0.029434987335729856, 1.5715225792783318, -0.03987240934371948), u: [0. 0.], noise: [10.62419285  9.70346888]
Start training ...
Step: 2, L: 219.420654296875, J: 3.3224120140075684
Current x: (0.03131376569917156, 1.6285018035299845, -0.04060716824531555), u: [0. 0.], noise: [ 9.398101  10.8057466]
Start training ...
Step: 2, L: 225.44529724121094, J: 3.1555867195129395
Current x: (0.033273573880002676, 1.686531178482724, -0.04124985475540161), u: [0. 0.], noise: [9.47829269 8.297595  ]
Start training ...
Step: 2, L: 174.22293090820312, J: 3.1777987480163574
Current x: (0.035315401620606424, 1.7455982727040766, -0.04203330588340759), u: [0. 0.], noise: [10.02714324 10.42879276]
Start training ...
Step: 2, L: 207.46002197265625, J: 3.36088228225708
Current x: (0.0374305338423107, 1.805460443464977, -0.04269868726730346), u: [0. 0.], noise: [10.87720932  8.10474209]
Start training ...
Step: 2, L: 224.24319458007812, J: 3.279

Step: 3, L: 205.72499084472656, J: 3.8940532207489014
Current x: (0.004363351113963766, 0.7224540488961644, -0.017000090980529785), u: [0. 0.], noise: [ 9.19567493 10.99702713]
Start training ...
Step: 3, L: 143.79052734375, J: 3.4867968559265137
Current x: (0.004823432528208814, 0.7600437899201583, -0.016985585117340087), u: [0. 0.], noise: [ 8.6575396  10.55608399]
Start training ...
Step: 3, L: 260.97442626953125, J: 4.580451965332031
Current x: (0.005317840068239661, 0.7986715094882014, -0.01715121450424194), u: [0. 0.], noise: [10.83519401 10.02625715]
Start training ...
Step: 3, L: 337.84521484375, J: 4.228704452514648
Current x: (0.005844881501983873, 0.8382393142013788, -0.017506698322296142), u: [0. 0.], noise: [8.60083668 9.47335857]
Start training ...
Step: 3, L: 159.96585083007812, J: 3.949730396270752
Current x: (0.006407701102275775, 0.8789119571070613, -0.017781288528442383), u: [0. 0.], noise: [11.09458677  8.89479583]
Start training ...
Step: 3, L: 284.08416748046875, 

Step: 4, L: 192.24945068359375, J: 4.430689811706543
Current x: (-0.00043552477153526586, 0.16792426679583344, 0.004828397274017334), u: [0. 0.], noise: [9.76194257 9.34233736]
Start training ...
Step: 4, L: 266.1592712402344, J: 5.08709716796875
Current x: (-0.0005237318375021758, 0.1865083556807679, 0.004707835721969604), u: [0. 0.], noise: [11.34331704  8.80037632]
Start training ...
Step: 4, L: 287.1314392089844, J: 5.44394588470459
Current x: (-0.0006211631732026533, 0.20602185034361986, 0.0046292346954345705), u: [0. 0.], noise: [10.40769574  8.90765811]
Start training ...
Step: 4, L: 233.86285400390625, J: 5.459751605987549
Current x: (-0.0007280777931570462, 0.22656869188521855, 0.004804927778244018), u: [0. 0.], noise: [9.30236193 9.7672915 ]
Start training ...
Step: 4, L: 266.2924499511719, J: 5.0788044929504395
Current x: (-0.0008439339115839868, 0.24806604806998828, 0.005130624580383301), u: [0. 0.], noise: [9.21114896 9.51773764]
Start training ...
Step: 4, L: 240.27508544

Step: 4, L: 189.99746704101562, J: 6.559878349304199
Simulation ends in 65 steps
Episode 5 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [9.46353227 9.0117538 ]
Start training ...
Step: 5, L: 190.92002868652344, J: 6.249529838562012
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [10.64935377  8.92970191]
Start training ...
Step: 5, L: 169.9393310546875, J: 5.670609474182129
Current x: (0.0, 0.0008665286483764649, 4.5177841186523437e-05), u: [0. 0.], noise: [11.24411864 10.99218802]
Start training ...
Step: 5, L: 261.69940185546875, J: 6.720977783203125
Current x: (0.0, 0.0027099628753662107, 0.0002623208999633789), u: [0. 0.], noise: [ 8.84068939 10.5162161 ]
Start training ...
Step: 5, L: 280.5107421875, J: 5.89765739440918
Current x: (-1.004588352392111e-07, 0.005796027814503203, 0.000504656982421875), u: [0. 0.], noise: [8.4636014 9.7888375]
Start training ...
Step: 5, L: 328.6730041503906, J: 6.3753509521484375
Current x: (-7.086897882669438e-07, 0.00983678337612749, 0.0

Step: 5, L: 250.95449829101562, J: 7.184203624725342
Current x: (-0.0024615598373350727, 0.9977377018634158, 0.005641621923446656), u: [0. 0.], noise: [12.14208614  8.61645351]
Start training ...
Step: 5, L: 218.87881469726562, J: 7.047114372253418
Current x: (-0.0026602481906585, 1.0408957182161718, 0.005752686595916749), u: [0. 0.], noise: [10.87014909  8.85487788]
Start training ...
Step: 5, L: 148.7864532470703, J: 6.2605695724487305
Current x: (-0.0028706476648431515, 1.0851485554539648, 0.006216314554214479), u: [0. 0.], noise: [8.75640643 9.79518332]
Start training ...
Step: 5, L: 190.9869384765625, J: 6.913318634033203
Current x: (-0.00309239426523571, 1.1303928625711377, 0.00688146963119507), u: [0. 0.], noise: [9.75960936 9.74108919]
Start training ...
Step: 5, L: 211.3513641357422, J: 6.021266937255859
Current x: (-0.0033256730432266123, 1.1765112928409553, 0.007442747068405153), u: [0. 0.], noise: [8.09863614 9.30530402]
Start training ...
Step: 5, L: 195.20172119140625, J:

Step: 6, L: 281.9421691894531, J: 8.61247444152832
Current x: (-0.0008270281828940032, 0.3477296976379174, -0.0008812605381011954), u: [0. 0.], noise: [11.12923828 10.49732438]
Start training ...
Step: 6, L: 243.71572875976562, J: 8.088262557983398
Current x: (-0.0008897173343786628, 0.3731863318095031, -0.0007255858898162832), u: [0. 0.], noise: [10.77362715  9.81097411]
Start training ...
Step: 6, L: 282.77484130859375, J: 8.695219039916992
Current x: (-0.0009505006225330256, 0.39982462135316077, -0.0005067198276519766), u: [0. 0.], noise: [8.86778378 7.58538515]
Start training ...
Step: 6, L: 243.01995849609375, J: 8.045731544494629
Current x: (-0.0010097903211167484, 0.42754037059055033, -0.00019158844947814835), u: [0. 0.], noise: [8.83875449 8.67728352]
Start training ...
Step: 6, L: 237.15794372558594, J: 8.080436706542969
Current x: (-0.0010682463050467857, 0.45592043650361186, 0.0002517827510833751), u: [0. 0.], noise: [ 9.07893884 10.06582625]
Start training ...
Step: 6, L: 2

Step: 7, L: 288.1351318359375, J: 9.12027359008789
Current x: (-1.5811070429845186e-06, 0.013209345983751855, 0.0009709399223327636), u: [0. 0.], noise: [9.59101809 8.6719764 ]
Start training ...
Step: 7, L: 263.735595703125, J: 10.554252624511719
Current x: (-4.154659644486282e-06, 0.018739644924449517, 0.0013227483749389648), u: [0. 0.], noise: [9.25700996 9.4289442 ]
Start training ...
Step: 7, L: 262.8432312011719, J: 9.350175857543945
Current x: (-8.501438854174499e-06, 0.02511524229018753, 0.0017664609909057616), u: [0. 0.], noise: [9.52797184 9.89798652]
Start training ...
Step: 7, L: 259.0340270996094, J: 8.510351181030273
Current x: (-1.531989901043487e-05, 0.03237843352598007, 0.0021929800987243652), u: [0. 0.], noise: [10.95200914  9.57795453]
Start training ...
Step: 7, L: 265.8567199707031, J: 8.914412498474121
Current x: (-2.5569877195774397e-05, 0.04060321759429274, 0.002582497787475586), u: [0. 0.], noise: [9.04125906 9.70757762]
Start training ...
Step: 7, L: 296.88088

Step: 7, L: 245.50100708007812, J: 10.825358390808105
Current x: (-0.012238246150930481, 1.343876429074941, 0.028825818109512322), u: [0. 0.], noise: [8.06771666 8.69159555]
Start training ...
Step: 7, L: 219.14541625976562, J: 11.123492240905762
Current x: (-0.013111160316792606, 1.3925830062115216, 0.030151654338836664), u: [0. 0.], noise: [8.78364392 8.59733494]
Start training ...
Step: 7, L: 245.74468994140625, J: 9.712908744812012
Current x: (-0.014032377879568424, 1.4419838182750269, 0.031415102720260615), u: [0. 0.], noise: [9.67579308 8.20912929]
Start training ...
Step: 7, L: 276.909423828125, J: 11.501909255981445
Current x: (-0.015005994025008029, 1.4921409380897652, 0.03269718198776245), u: [0. 0.], noise: [7.33109652 8.01469286]
Start training ...
Step: 7, L: 249.4951171875, J: 10.556146621704102
Current x: (-0.016035786595480765, 1.5431046676406035, 0.03412592759132385), u: [0. 0.], noise: [9.45514509 8.23816675]
Start training ...
Step: 7, L: 179.9649200439453, J: 10.972

Current x: (-0.0006351765001480563, 0.38214257436487703, 0.0074001414775848404), u: [0. 0.], noise: [9.18537969 8.56776909]
Start training ...
Step: 8, L: 254.126220703125, J: 11.124056816101074
Current x: (-0.0007457783382193523, 0.40747405924609204, 0.007753393697738649), u: [0. 0.], noise: [9.0146498  8.09755507]
Start training ...
Step: 8, L: 270.29632568359375, J: 12.66486930847168
Current x: (-0.0008695176378354485, 0.4335998104208075, 0.008168407011032107), u: [0. 0.], noise: [ 9.77054354 10.08855003]
Start training ...
Step: 8, L: 210.1891326904297, J: 12.561342239379883
Current x: (-0.0010065245711517027, 0.4604557307112086, 0.00867512974739075), u: [0. 0.], noise: [8.37865457 8.36141396]
Start training ...
Step: 8, L: 272.3765563964844, J: 12.45916748046875
Current x: (-0.00115975303928979, 0.48831649402043215, 0.0091500518321991), u: [0. 0.], noise: [9.11752967 9.23416908]
Start training ...
Step: 8, L: 289.9302978515625, J: 13.338451385498047
Current x: (-0.0013275035518425

Current x: (2.444920218612287e-06, 0.012699171295677047, -0.0004254060745239257), u: [0. 0.], noise: [10.09557743  8.9154171 ]
Start training ...
Step: 9, L: 273.8048095703125, J: 11.518600463867188
Current x: (4.380658478583912e-06, 0.017483356289377, -0.00044087471961975084), u: [0. 0.], noise: [9.35769734 7.95122611]
Start training ...
Step: 9, L: 316.11505126953125, J: 13.261093139648438
Current x: (7.125135945389215e-06, 0.023187640506807616, -0.00033832731246948223), u: [0. 0.], noise: [8.58502823 9.10915226]
Start training ...
Step: 9, L: 229.2080841064453, J: 12.7568998336792
Current x: (1.0632720076728985e-05, 0.02964181692815236, -9.5132780075073e-05), u: [0. 0.], noise: [9.5523899  7.81198518]
Start training ...
Step: 9, L: 267.068115234375, J: 13.203036308288574
Current x: (1.473894661748805e-05, 0.03688441120171977, 9.564943313598661e-05), u: [0. 0.], noise: [8.73846204 8.68058465]
Start training ...
Step: 9, L: 257.1324462890625, J: 13.159355163574219
Current x: (1.901036

Step: 9, L: 380.2471923828125, J: 15.375319480895996
Current x: (-0.01570603955792717, 1.1291461587710605, 0.03197575874328612), u: [0. 0.], noise: [ 9.93277481 10.33564019]
Start training ...
Step: 9, L: 306.4132995605469, J: 14.30987548828125
Current x: (-0.01693375554422199, 1.1732875255112294, 0.03286341228485106), u: [0. 0.], noise: [9.24613109 9.40798368]
Start training ...
Step: 9, L: 268.8970947265625, J: 13.964569091796875
Current x: (-0.018226270277120538, 1.2184736975229444, 0.0337107792854309), u: [0. 0.], noise: [9.95818308 9.14064885]
Start training ...
Step: 9, L: 358.59686279296875, J: 13.644052505493164
Current x: (-0.019580077759056132, 1.2645432736765185, 0.034541961002349846), u: [0. 0.], noise: [8.79079724 9.44478947]
Start training ...
Step: 9, L: 308.0809020996094, J: 14.66481876373291
Current x: (-0.020998256695484007, 1.3115406478390579, 0.03545489616394042), u: [0. 0.], noise: [8.68189698 9.73478854]
Start training ...
Step: 9, L: 225.6897430419922, J: 13.9643

Step: 10, L: 408.71368408203125, J: 17.790695190429688
Current x: (0.002110498391262309, 0.35164079580675733, 0.0015341160774230908), u: [0. 0.], noise: [9.50365243 7.37905516]
Start training ...
Step: 10, L: 321.2517395019531, J: 14.937080383300781
Current x: (0.0022600903053522476, 0.37564795982269733, 0.0020437949657440132), u: [0. 0.], noise: [ 8.13069115 10.04068454]
Start training ...
Step: 10, L: 285.46759033203125, J: 14.610920906066895
Current x: (0.0024070922171428007, 0.4003623926115374, 0.0027659336090087834), u: [0. 0.], noise: [8.46127516 8.73341716]
Start training ...
Step: 10, L: 279.43109130859375, J: 14.802566528320312
Current x: (0.002550380274988178, 0.42591295913266397, 0.0032970729351043645), u: [0. 0.], noise: [10.28427681 10.43981197]
Start training ...
Step: 10, L: 428.0296630859375, J: 17.175731658935547
Current x: (0.002688912401078556, 0.45220198833766, 0.0038009980201721134), u: [0. 0.], noise: [ 9.79021646 10.44634766]
Start training ...
Step: 10, L: 367.0

Step: 11, L: 252.21841430664062, J: 15.402291297912598
Current x: (2.818242845443406e-08, 0.005445309509090855, 1.7418289184570308e-05), u: [0. 0.], noise: [7.836932   8.02465701]
Start training ...
Step: 11, L: 301.94146728515625, J: 16.419567108154297
Current x: (-7.057766821806474e-08, 0.008927922281494333, 3.785414695739746e-05), u: [0. 0.], noise: [8.59211409 9.46720824]
Start training ...
Step: 11, L: 293.08880615234375, J: 16.24854278564453
Current x: (-1.9696594005410318e-07, 0.013015693996833461, 3.9517498016357416e-05), u: [0. 0.], noise: [7.77752805 8.63724517]
Start training ...
Step: 11, L: 280.8506774902344, J: 15.623662948608398
Current x: (-3.9171623611977715e-07, 0.01792839794659647, -4.6328496932983425e-05), u: [0. 0.], noise: [8.0574202  9.01761739]
Start training ...
Step: 11, L: 413.92401123046875, J: 17.621280670166016
Current x: (-6.513336118339686e-07, 0.02350157928918179, -0.0002181461811065674), u: [0. 0.], noise: [9.33988179 8.02383969]
Start training ...
Ste

Step: 11, L: 373.74139404296875, J: 18.861339569091797
Current x: (0.0021922595730918108, 1.0476888514742169, -0.0064590895652771015), u: [0. 0.], noise: [8.72142468 9.36858523]
Start training ...
Step: 11, L: 286.8368835449219, J: 17.03660774230957
Current x: (0.002386169550861464, 1.0892509838918552, -0.006254659700393679), u: [0. 0.], noise: [9.10288887 8.23058194]
Start training ...
Step: 11, L: 323.50457763671875, J: 17.044193267822266
Current x: (0.002591763947899361, 1.131641079733675, -0.0061149458885192895), u: [0. 0.], noise: [8.34551235 7.16124035]
Start training ...
Step: 11, L: 383.7283020019531, J: 18.21106719970703
Current x: (0.002808199770689141, 1.1747834888004915, -0.0058880013942718525), u: [0. 0.], noise: [9.67684746 8.96684228]
Start training ...
Step: 11, L: 291.95538330078125, J: 17.792495727539062
Current x: (0.00303411782991497, 1.21849554417232, -0.00554262971878052), u: [0. 0.], noise: [9.69578912 9.35099717]
Start training ...
Step: 11, L: 372.0703735351562

Step: 12, L: 405.93206787109375, J: 19.31680679321289
Current x: (-0.0003687274824783649, 0.2467654886543103, 0.0003088318347930913), u: [0. 0.], noise: [9.76402536 9.50730866]
Start training ...
Step: 12, L: 406.0678405761719, J: 20.75861358642578
Current x: (-0.0004119006233312689, 0.2671086096098143, 2.7253437042236797e-05), u: [0. 0.], noise: [9.14085138 8.5709452 ]
Start training ...
Step: 12, L: 314.9022216796875, J: 18.30284881591797
Current x: (-0.00045566892436799404, 0.28839786403359674, -0.00022865328788757277), u: [0. 0.], noise: [9.62081296 8.60559586]
Start training ...
Step: 12, L: 311.48870849609375, J: 18.80921173095703
Current x: (-0.000499485496135906, 0.31047729803740987, -0.00042756938934326123), u: [0. 0.], noise: [9.53725823 8.65874458]
Start training ...
Step: 12, L: 225.04934692382812, J: 18.712400436401367
Current x: (-0.0005428853150521704, 0.33339837298478797, -0.0005249637126922603), u: [0. 0.], noise: [10.48886152 10.27918447]
Start training ...
Step: 12, 

Step: 12, L: 304.8476257324219, J: 20.233253479003906
Current x: (0.004010033177920874, 1.8728518405691017, -0.022788688564300538), u: [0. 0.], noise: [8.41347102 8.68100239]
Start training ...
Step: 12, L: 249.9671630859375, J: 19.751537322998047
Current x: (0.004566986830561581, 1.9280777295021871, -0.023522049379348757), u: [0. 0.], noise: [8.0749574  9.55817682]
Start training ...
Step: 12, L: 231.29092407226562, J: 19.027034759521484
Current x: (0.005162893177705977, 1.9840316220548997, -0.024282163333892826), u: [0. 0.], noise: [9.03808029 9.95949264]
Start training ...
Step: 12, L: 228.17784118652344, J: 19.693973541259766
Simulation ends in 72 steps
Episode 13 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [9.3034067  9.24032028]
Start training ...
Step: 13, L: 235.65940856933594, J: 18.549884796142578
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [9.02233249 7.99822099]
Start training ...
Step: 13, L: 316.39892578125, J: 20.153499603271484
Current x: (0.0, 0.0008733

Step: 13, L: 432.01861572265625, J: 21.388776779174805
Current x: (0.00017446217159611434, 0.8030326245205218, -0.008557946014404298), u: [0. 0.], noise: [6.83360825 9.00617459]
Start training ...
Step: 13, L: 379.52313232421875, J: 22.67848014831543
Current x: (0.0002546568472236105, 0.8386821376339562, -0.008929962396621704), u: [0. 0.], noise: [8.33935147 8.15164682]
Start training ...
Step: 13, L: 382.7845764160156, J: 21.890230178833008
Current x: (0.00034840695792189263, 0.87493457101518, -0.009519235467910767), u: [0. 0.], noise: [7.84047137 7.34049889]
Start training ...
Step: 13, L: 414.829833984375, J: 20.385406494140625
Current x: (0.00045688327148682526, 0.9118550383751933, -0.010089738035202026), u: [0. 0.], noise: [8.75086652 8.21744732]
Start training ...
Step: 13, L: 362.31060791015625, J: 20.321308135986328
Current x: (0.0005798104897914852, 0.9493125339731003, -0.010610243368148803), u: [0. 0.], noise: [ 8.24138625 10.31528702]
Start training ...
Step: 13, L: 346.6231

Step: 14, L: 324.61956787109375, J: 20.75157928466797
Current x: (0.00030146495407947866, 0.10868330080888769, -0.00623008999824524), u: [0. 0.], noise: [8.75182969 8.84418402]
Start training ...
Step: 14, L: 256.1636962890625, J: 21.699668884277344
Current x: (0.0003703120935219417, 0.12119684772901232, -0.006637259721755983), u: [0. 0.], noise: [10.17977229  8.79227818]
Start training ...
Step: 14, L: 325.9411315917969, J: 22.446884155273438
Current x: (0.0004501216371455117, 0.1344889619029269, -0.0070536648273468025), u: [0. 0.], noise: [10.39003494  6.97664193]
Start training ...
Step: 14, L: 315.7468566894531, J: 21.865421295166016
Current x: (0.0005425233304545044, 0.14869723925931758, -0.007331320524215699), u: [0. 0.], noise: [9.42288415 7.75177955]
Start training ...
Step: 14, L: 340.2007751464844, J: 21.27521514892578
Current x: (0.0006471747935834334, 0.1636611410457005, -0.0072676369667053235), u: [0. 0.], noise: [9.13928579 9.49888296]
Start training ...
Step: 14, L: 246.

Step: 14, L: 318.8626403808594, J: 22.031782150268555
Current x: (0.01040629530382926, 1.3730450769275329, 0.006101167631149296), u: [0. 0.], noise: [9.36080877 8.75683339]
Start training ...
Step: 14, L: 345.79193115234375, J: 23.883678436279297
Current x: (0.010637313503440658, 1.4188153209256171, 0.006484996938705449), u: [0. 0.], noise: [7.7541993  9.03725714]
Start training ...
Step: 14, L: 343.9621887207031, J: 22.519664764404297
Current x: (0.01085727789487451, 1.465416295347926, 0.006929223775863653), u: [0. 0.], noise: [ 8.93782384 10.54497632]
Start training ...
Step: 14, L: 297.99334716796875, J: 23.15474510192871
Current x: (0.01106635310841318, 1.512715380084222, 0.007245144844055181), u: [0. 0.], noise: [8.68774716 9.30984009]
Start training ...
Step: 14, L: 303.1474914550781, J: 21.06951904296875
Current x: (0.011261928362211567, 1.5609816980012126, 0.007400350713729864), u: [0. 0.], noise: [9.81951038 9.73675456]
Start training ...
Step: 14, L: 236.03733825683594, J: 22

Step: 15, L: 353.38623046875, J: 22.513565063476562
Current x: (0.001718247028629125, 0.3883625276319153, -0.0008576661109924301), u: [0. 0.], noise: [9.27648771 7.94315753]
Start training ...
Step: 15, L: 489.8866271972656, J: 24.940494537353516
Current x: (0.0018584435453787211, 0.4122834145923834, -0.000267839574813841), u: [0. 0.], noise: [8.54374605 8.49209194]
Start training ...
Step: 15, L: 377.0092468261719, J: 24.110977172851562
Current x: (0.002000116932504343, 0.43694526537417144, 0.00045531992912292674), u: [0. 0.], noise: [9.51937355 9.30400353]
Start training ...
Step: 15, L: 266.6579895019531, J: 23.825416564941406
Current x: (0.0021422466068141094, 0.4623297000029347, 0.0011836448192596458), u: [0. 0.], noise: [10.16102477  9.64129953]
Start training ...
Step: 15, L: 173.0983123779297, J: 23.4940185546875
Current x: (0.0022835192152576, 0.4886154721975037, 0.0019335067272186306), u: [0. 0.], noise: [8.81402735 9.44995782]
Start training ...
Step: 15, L: 380.836608886718

Step: 16, L: 332.46026611328125, J: 25.145339965820312
Current x: (-9.60548883192614e-07, 0.016002657044565237, 0.0003257788181304932), u: [0. 0.], noise: [8.53298876 8.63029138]
Start training ...
Step: 16, L: 322.10382080078125, J: 24.26335906982422
Current x: (-2.131529598622308e-06, 0.021175278257222478, 0.0005985299587249756), u: [0. 0.], noise: [8.41310578 8.84150958]
Start training ...
Step: 16, L: 303.05621337890625, J: 22.165775299072266
Current x: (-3.861653627393191e-06, 0.027083227427507253, 0.0008615508556365967), u: [0. 0.], noise: [7.88242054 6.72952181]
Start training ...
Step: 16, L: 301.9980773925781, J: 23.929885864257812
Current x: (-6.624518041785724e-06, 0.03373563786709813, 0.001081731367111206), u: [0. 0.], noise: [7.31588183 8.95895586]
Start training ...
Step: 16, L: 361.59564208984375, J: 23.74513053894043
Current x: (-1.0646275438793263e-05, 0.040868241993514784, 0.0014172017574310303), u: [0. 0.], noise: [8.05251833 9.64119563]
Start training ...
Step: 16, 

Step: 16, L: 260.315185546875, J: 24.745059967041016
Current x: (-0.018504397980560538, 1.0783626456952407, 0.03791742568016054), u: [0. 0.], noise: [7.79408756 7.19738067]
Start training ...
Step: 16, L: 277.056640625, J: 24.11154556274414
Current x: (-0.01986589611365082, 1.118210508681869, 0.039091312122344984), u: [0. 0.], noise: [8.69599737 7.89850532]
Start training ...
Step: 16, L: 352.7540283203125, J: 24.947540283203125
Current x: (-0.02128422441570714, 1.1585754409554938, 0.040324869251251234), u: [0. 0.], noise: [7.25606126 8.82043159]
Start training ...
Step: 16, L: 285.775146484375, J: 25.957834243774414
Current x: (-0.022767406281111863, 1.1996175556117643, 0.04163817558288575), u: [0. 0.], noise: [9.0904842  7.55938587]
Start training ...
Step: 16, L: 410.80255126953125, J: 26.457324981689453
Current x: (-0.02431539882350351, 1.2412850125808514, 0.04279504485130311), u: [0. 0.], noise: [8.54618627 7.80265913]
Start training ...
Step: 16, L: 246.50157165527344, J: 24.4104

Step: 17, L: 297.7884521484375, J: 23.627864837646484
Current x: (0.00101803498471921, 0.19301138681087054, -0.005953930759429931), u: [0. 0.], noise: [9.60833227 7.44951085]
Start training ...
Step: 17, L: 317.151611328125, J: 24.947813034057617
Current x: (0.001154282054315748, 0.21003108714917046, -0.006353421449661254), u: [0. 0.], noise: [8.7070898  8.14478172]
Start training ...
Step: 17, L: 337.0982360839844, J: 25.387361526489258
Current x: (0.0013006851862123977, 0.22777554166933392, -0.006537029981613159), u: [0. 0.], noise: [6.66770152 9.24974377]
Start training ...
Step: 17, L: 316.9258728027344, J: 25.399517059326172
Current x: (0.0014577949502575741, 0.24622414932655765, -0.0066644077777862545), u: [0. 0.], noise: [6.97059873 8.36612691]
Start training ...
Step: 17, L: 344.10003662109375, J: 24.27399444580078
Current x: (0.0016253099218342384, 0.2652834674923635, -0.007049989748001098), u: [0. 0.], noise: [8.2233752  8.05995294]
Start training ...
Step: 17, L: 235.5274658

Step: 17, L: 281.9045104980469, J: 27.384563446044922
Current x: (0.04336457421843749, 1.8683923248093943, -0.04038830194473268), u: [0. 0.], noise: [7.06443308 7.68474443]
Start training ...
Step: 17, L: 366.4754638671875, J: 26.621524810791016
Current x: (0.0455825394786282, 1.921815228054883, -0.040818276071548476), u: [0. 0.], noise: [8.68602995 9.75699855]
Start training ...
Step: 17, L: 417.5460510253906, J: 26.957229614257812
Current x: (0.04786005797022881, 1.9757308463031944, -0.04131028132438661), u: [0. 0.], noise: [8.11652146 8.89047612]
Start training ...
Step: 17, L: 278.43206787109375, J: 25.862266540527344
Simulation ends in 76 steps
Episode 18 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [8.19287382 8.00316935]
Start training ...
Step: 18, L: 342.4478759765625, J: 27.663938522338867
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [9.5152398  9.45234676]
Start training ...
Step: 18, L: 355.9937744140625, J: 25.883243560791016
Current x: (0.0, 0.00063860430145

Step: 18, L: 258.15716552734375, J: 26.612518310546875
Current x: (-0.0027725724456412167, 0.6075938134238817, 0.003982523393630981), u: [0. 0.], noise: [8.75647074 8.70827463]
Start training ...
Step: 18, L: 261.0420227050781, J: 26.412982940673828
Current x: (-0.0029492656074275457, 0.6361057026886486, 0.003952710533142089), u: [0. 0.], noise: [7.12473037 8.77560081]
Start training ...
Step: 18, L: 256.0638732910156, J: 26.92494773864746
Current x: (-0.0031329141262084765, 0.6653830525602483, 0.00392771725654602), u: [0. 0.], noise: [9.48999064 9.42275374]
Start training ...
Step: 18, L: 269.2481994628906, J: 27.558921813964844
Current x: (-0.0033228475694024578, 0.6952694231603076, 0.0037376369953155517), u: [0. 0.], noise: [7.57693187 7.3166851 ]
Start training ...
Step: 18, L: 298.18597412109375, J: 26.50796890258789
Current x: (-0.0035202093840505247, 0.7260660534335515, 0.003554280424118042), u: [0. 0.], noise: [7.97564398 7.94428698]
Start training ...
Step: 18, L: 346.65258789

Step: 19, L: 287.58819580078125, J: 25.45372772216797
Current x: (-2.419618150704233e-06, 0.01872297233831192, 0.0006277354717254638), u: [0. 0.], noise: [9.03737042 6.61712867]
Start training ...
Step: 19, L: 239.37167358398438, J: 26.722003936767578
Current x: (-5.02772452771641e-06, 0.023996001907100897, 0.0006092138290405274), u: [0. 0.], noise: [9.26130483 8.57096845]
Start training ...
Step: 19, L: 295.2674560546875, J: 28.123104095458984
Current x: (-8.618519275016259e-06, 0.0298534810728512, 0.0008327163696289063), u: [0. 0.], noise: [8.62570177 8.00703917]
Start training ...
Step: 19, L: 262.071044921875, J: 26.14756202697754
Current x: (-1.3295680716041278e-05, 0.03651318725601432, 0.0011252525329589844), u: [0. 0.], noise: [9.61129736 7.75147766]
Start training ...
Step: 19, L: 263.0953063964844, J: 27.531625747680664
Current x: (-1.935787748570475e-05, 0.04385516686458181, 0.0014796549797058106), u: [0. 0.], noise: [8.01907952 8.62921681]
Start training ...
Step: 19, L: 225

Step: 19, L: 368.233154296875, J: 28.08278465270996
Current x: (-0.016675832689812077, 0.9976076333425427, 0.0424384871006012), u: [0. 0.], noise: [8.27109924 7.26388752]
Start training ...
Step: 19, L: 272.7523193359375, J: 26.680097579956055
Current x: (-0.017938842867000943, 1.0340219628790048, 0.043654855203628544), u: [0. 0.], noise: [9.30944611 9.08115286]
Start training ...
Step: 19, L: 311.58099365234375, J: 28.88919448852539
Current x: (-0.01926776138866905, 1.0710073923299146, 0.044971944475173956), u: [0. 0.], noise: [7.07186156 9.69966368]
Start training ...
Step: 19, L: 306.08526611328125, J: 28.08531951904297
Current x: (-0.020676938303221423, 1.1088491294980105, 0.046311863088607796), u: [0. 0.], noise: [8.5554773  6.58244637]
Start training ...
Step: 19, L: 248.74122619628906, J: 28.099151611328125
Current x: (-0.022161514611318565, 1.1473853235852987, 0.04738900146484376), u: [0. 0.], noise: [7.54274697 8.85145752]
Start training ...
Step: 19, L: 295.8499450683594, J: 

Step: 20, L: 402.9627380371094, J: 31.966169357299805
Current x: (-0.001219595407095102, 0.1800081685231783, 0.017679874372482297), u: [0. 0.], noise: [8.73056661 7.97995129]
Start training ...
Step: 20, L: 289.4219665527344, J: 27.14258575439453
Current x: (-0.0014533750766276103, 0.19556062109098318, 0.01898611531257629), u: [0. 0.], noise: [8.56755651 8.81946355]
Start training ...
Step: 20, L: 274.5321044921875, J: 28.04616928100586
Current x: (-0.0017166971927386424, 0.21180286428694328, 0.020367417812347407), u: [0. 0.], noise: [9.84725424 7.85262272]
Start training ...
Step: 20, L: 213.7407684326172, J: 25.159563064575195
Current x: (-0.002013028522490677, 0.22880249612623005, 0.021723529577255242), u: [0. 0.], noise: [8.98566428 7.46748126]
Start training ...
Step: 20, L: 246.57510375976562, J: 28.471158981323242
Current x: (-0.0023454074384130787, 0.24659074853322946, 0.02327910447120666), u: [0. 0.], noise: [9.52156977 7.00632966]
Start training ...
Step: 20, L: 299.088012695

Step: 20, L: 281.78851318359375, J: 30.387422561645508
Current x: (-0.08795032754422792, 1.6480854168808081, 0.10219917659759516), u: [0. 0.], noise: [9.30207259 8.85654731]
Start training ...
Step: 20, L: 193.56072998046875, J: 28.206321716308594
Current x: (-0.09256723169751406, 1.6950819544782647, 0.10417520537376398), u: [0. 0.], noise: [5.89262063 6.88685566]
Start training ...
Step: 20, L: 301.66131591796875, J: 30.720867156982422
Current x: (-0.09736939255673396, 1.7429038791788274, 0.10619578666687006), u: [0. 0.], noise: [8.22118311 7.70254109]
Start training ...
Step: 20, L: 351.760986328125, J: 29.015417098999023
Current x: (-0.1023044432028759, 1.7910158233187563, 0.1081169444561004), u: [0. 0.], noise: [8.17893253 9.54189971]
Start training ...
Step: 20, L: 292.7073059082031, J: 28.435104370117188
Current x: (-0.10740827941471871, 1.8397301691906993, 0.11008996644020075), u: [0. 0.], noise: [6.93388615 9.2199258 ]
Start training ...
Step: 20, L: 333.3394775390625, J: 30.70

Step: 21, L: 477.69482421875, J: 31.375442504882812
Current x: (-0.0009320971971944733, 0.44219007322391424, 0.0015676497936248784), u: [0. 0.], noise: [8.34418268 8.2011779 ]
Start training ...
Step: 21, L: 285.3552551269531, J: 30.392057418823242
Current x: (-0.0009981830335040198, 0.4663827830854892, 0.0016308832645416265), u: [0. 0.], noise: [7.95059666 6.29200759]
Start training ...
Step: 21, L: 241.1993865966797, J: 27.580440521240234
Current x: (-0.0010668626018587517, 0.49124902697055056, 0.0017084172725677497), u: [0. 0.], noise: [8.04126029 8.96290717]
Start training ...
Step: 21, L: 196.3182373046875, J: 29.060871124267578
Current x: (-0.0011378649716762002, 0.5165585293870701, 0.001951810216903687), u: [0. 0.], noise: [8.46681433 8.26666179]
Start training ...
Step: 21, L: 251.0428009033203, J: 25.75760269165039
Current x: (-0.0012117723614364672, 0.5425874460777722, 0.0021030385494232185), u: [0. 0.], noise: [7.71037508 7.66119992]
Start training ...
Step: 21, L: 434.46408

Step: 22, L: 259.01788330078125, J: 28.401321411132812
Current x: (0.0, 0.0006692244949340822, 7.522735595703126e-05), u: [0. 0.], noise: [9.1213066 7.4272278]
Start training ...
Step: 22, L: 389.113525390625, J: 28.01628875732422
Current x: (0.0, 0.001986803657531739, 9.520525932312013e-05), u: [0. 0.], noise: [7.44954371 8.30854512]
Start training ...
Step: 22, L: 368.309326171875, J: 30.494308471679688
Current x: (-1.244902486198564e-07, 0.003978236254777914, 0.000284591007232666), u: [0. 0.], noise: [7.86993195 7.35080393]
Start training ...
Step: 22, L: 365.5185241699219, J: 29.808212280273438
Current x: (-3.9900579260534147e-07, 0.0065644777514376655, 0.0003880765914916992), u: [0. 0.], noise: [7.99873278 6.84973127]
Start training ...
Step: 22, L: 239.27415466308594, J: 30.38811683654785
Current x: (-1.1066897768411974e-06, 0.009691792741452172, 0.0005434750080108642), u: [0. 0.], noise: [7.51209355 7.9620649 ]
Start training ...
Step: 22, L: 210.33135986328125, J: 28.6021461486

Step: 22, L: 275.5508117675781, J: 29.0555419921875
Current x: (8.381394004190287e-05, 0.6384009193171146, -0.02183204607963563), u: [0. 0.], noise: [8.35905094 8.11969181]
Start training ...
Step: 22, L: 218.18466186523438, J: 29.287466049194336
Current x: (0.00033773427063898, 0.6664240281419131, -0.02338266348838807), u: [0. 0.], noise: [9.25725623 7.81744717]
Start training ...
Step: 22, L: 449.50030517578125, J: 31.917469024658203
Current x: (0.0006276282101208694, 0.6951136185223187, -0.02490934500694276), u: [0. 0.], noise: [8.04999855 7.3423158 ]
Start training ...
Step: 22, L: 288.5727844238281, J: 30.28099822998047
Current x: (0.000957443715472656, 0.7245292124668424, -0.026292045593261728), u: [0. 0.], noise: [7.6394325  9.01066148]
Start training ...
Step: 22, L: 292.98065185546875, J: 30.814546585083008
Current x: (0.0013255965018619078, 0.7545025603041174, -0.027603977918624886), u: [0. 0.], noise: [7.21639031 8.92170571]
Start training ...
Step: 22, L: 312.8429260253906,

Step: 23, L: 313.89190673828125, J: 28.892282485961914
Current x: (-1.2412596959988277e-05, 0.0355374118942321, 0.00041199102401733397), u: [0. 0.], noise: [7.96068786 6.95859652]
Start training ...
Step: 23, L: 218.45175170898438, J: 29.90044403076172
Current x: (-1.6288990653249236e-05, 0.042533069319029955, 0.000431095552444458), u: [0. 0.], noise: [8.12837075 8.03971259]
Start training ...
Step: 23, L: 332.88079833984375, J: 30.205297470092773
Current x: (-2.078004547220452e-05, 0.050039655099266035, 0.0005504091739654541), u: [0. 0.], noise: [8.3977267  9.19188273]
Start training ...
Step: 23, L: 310.8037109375, J: 30.25627899169922
Current x: (-2.596809914508671e-05, 0.05818204904835736, 0.0006785885334014893), u: [0. 0.], noise: [8.34445595 8.38669961]
Start training ...
Step: 23, L: 309.5836486816406, J: 30.063926696777344
Current x: (-3.212430109811666e-05, 0.0671024038363565, 0.0007273522853851318), u: [0. 0.], noise: [6.75004815 8.27374702]
Start training ...
Step: 23, L: 22

Step: 23, L: 371.812744140625, J: 29.675174713134766
Current x: (-0.00042871429437566324, 1.0418209791819533, -0.0019422828674316394), u: [0. 0.], noise: [7.70901279 9.04463302]
Start training ...
Step: 23, L: 288.6197509765625, J: 28.537782669067383
Current x: (-0.0003864076457967172, 1.077153226115824, -0.0016121282577514634), u: [0. 0.], noise: [7.72655108 9.60210417]
Start training ...
Step: 23, L: 280.99114990234375, J: 31.650920867919922
Current x: (-0.000340846967139234, 1.113179834574629, -0.001415535640716551), u: [0. 0.], noise: [7.5082757  7.75825809]
Start training ...
Step: 23, L: 336.1893615722656, J: 30.681636810302734
Current x: (-0.00029249268821323146, 1.1499583063059047, -0.0014064983367919903), u: [0. 0.], noise: [8.52386339 7.69282205]
Start training ...
Step: 23, L: 188.60028076171875, J: 29.72527313232422
Current x: (-0.00024197737786619845, 1.1872824297974665, -0.0014224592685699443), u: [0. 0.], noise: [9.03919139 9.15539316]
Start training ...
Step: 23, L: 273

Step: 24, L: 291.6966247558594, J: 31.19874382019043
Current x: (0.0005029916100426038, 0.16104870334476454, -0.006855469465255739), u: [0. 0.], noise: [7.7051499 8.5284851]
Start training ...
Step: 24, L: 281.92962646484375, J: 33.24634552001953
Current x: (0.0005971528006734496, 0.17530412133145273, -0.007127077007293703), u: [0. 0.], noise: [6.39180284 7.08790862]
Start training ...
Step: 24, L: 294.0755310058594, J: 31.6949462890625
Current x: (0.0007024428230027904, 0.1902018646661816, -0.007481018066406252), u: [0. 0.], noise: [7.70329034 7.82658343]
Start training ...
Step: 24, L: 268.8626708984375, J: 30.735082626342773
Current x: (0.0008173398582128059, 0.20546654491906455, -0.007904569721221926), u: [0. 0.], noise: [8.09941602 7.60438321]
Start training ...
Step: 24, L: 356.86578369140625, J: 31.860183715820312
Current x: (0.0009438547117382519, 0.2213031690999866, -0.008340450668334963), u: [0. 0.], noise: [7.72954216 8.11849389]
Start training ...
Step: 24, L: 342.120941162

Step: 24, L: 314.23040771484375, J: 32.465675354003906
Current x: (0.03125534948538597, 1.5484736703718938, -0.03620978603363038), u: [0. 0.], noise: [7.4645101  8.19728062]
Start training ...
Step: 24, L: 385.07525634765625, J: 31.696819305419922
Current x: (0.03284899001308691, 1.591910294341291, -0.03696656079292298), u: [0. 0.], noise: [8.09990571 7.74024873]
Start training ...
Step: 24, L: 283.42236328125, J: 30.938114166259766
Current x: (0.034499329158390234, 1.6359310707608852, -0.037796612644195565), u: [0. 0.], noise: [8.95480598 8.32801985]
Start training ...
Step: 24, L: 203.59640502929688, J: 32.022830963134766
Current x: (0.036208210572282004, 1.6805537804688586, -0.038590698766708384), u: [0. 0.], noise: [7.5298877  7.81787736]
Start training ...
Step: 24, L: 279.9759826660156, J: 32.806365966796875
Current x: (0.03798239965996815, 1.7259225383724532, -0.03932210626602174), u: [0. 0.], noise: [8.51124978 8.53895919]
Start training ...
Step: 24, L: 218.7378387451172, J: 3

Step: 25, L: 272.891357421875, J: 33.691192626953125
Current x: (0.0016348423838165155, 0.33425516300257613, -0.012484060573577883), u: [0. 0.], noise: [8.06824402 6.85421946]
Start training ...
Step: 25, L: 163.16436767578125, J: 31.224960327148438
Current x: (0.0018619896453095956, 0.3540716010859847, -0.013014588642120364), u: [0. 0.], noise: [7.01804255 7.79829329]
Start training ...
Step: 25, L: 272.392578125, J: 31.363323211669922
Current x: (0.0021077657166256006, 0.37439916922799316, -0.013423714256286624), u: [0. 0.], noise: [6.58181587 7.41453536]
Start training ...
Step: 25, L: 296.34393310546875, J: 33.758750915527344
Current x: (0.0023728240949962764, 0.3952272454603934, -0.013910864925384524), u: [0. 0.], noise: [6.5901918  7.38033232]
Start training ...
Step: 25, L: 273.396728515625, J: 32.7977180480957
Current x: (0.0026566702110793093, 0.4164738307144953, -0.014481287574768069), u: [0. 0.], noise: [7.98182823 6.84344105]
Start training ...
Step: 25, L: 249.147735595703

Step: 25, L: 299.45751953125, J: 32.19915008544922
Current x: (0.04616203913570102, 1.8555641753882588, -0.03370657134056094), u: [0. 0.], noise: [7.0040836  7.11199169]
Start training ...
Step: 25, L: 378.6444091796875, J: 33.67555236816406
Current x: (0.048163451610761195, 1.901570254273562, -0.03342381854057314), u: [0. 0.], noise: [8.02715899 7.36876239]
Start training ...
Step: 25, L: 222.36111450195312, J: 30.23362159729004
Current x: (0.05021243552735555, 1.9480061388994308, -0.033151856565475486), u: [0. 0.], noise: [8.1312757  8.21675157]
Start training ...
Step: 25, L: 220.7479705810547, J: 32.34849548339844
Current x: (0.05231286891256527, 1.9949997557971089, -0.03281405496597292), u: [0. 0.], noise: [7.8369034  7.95684214]
Start training ...
Step: 25, L: 254.29290771484375, J: 31.441926956176758
Simulation ends in 85 steps
Episode 26 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [8.33351461 6.99326285]
Start training ...
Step: 26, L: 258.1169128417969, J: 30.0957

Step: 26, L: 329.6796875, J: 34.66926193237305
Current x: (-0.0009123158400393365, 0.49929976045348534, -0.004026699113845825), u: [0. 0.], noise: [7.69314726 7.90422521]
Start training ...
Step: 26, L: 278.7112121582031, J: 35.29572296142578
Current x: (-0.0009336940643773412, 0.5230038887604981, -0.004435050058364868), u: [0. 0.], noise: [6.22671486 7.94404513]
Start training ...
Step: 26, L: 288.9346923828125, J: 34.758758544921875
Current x: (-0.0009487917132646657, 0.5472867416280053, -0.0048645088195800774), u: [0. 0.], noise: [7.70163816 7.5462361 ]
Start training ...
Step: 26, L: 272.89605712890625, J: 33.99415588378906
Current x: (-0.0009576045796901741, 0.5720056565742996, -0.005465700578689574), u: [0. 0.], noise: [7.8940342 8.4161899]
Start training ...
Step: 26, L: 318.25341796875, J: 33.133201599121094
Current x: (-0.0009590001334872227, 0.5972683409057619, -0.0060513521194458), u: [0. 0.], noise: [7.7319988  7.97343931]
Start training ...
Step: 26, L: 234.01632690429688,

Step: 27, L: 285.30133056640625, J: 35.32741928100586
Current x: (-7.461475804878094e-07, 0.005780561220063346, 0.0005189560890197755), u: [0. 0.], noise: [7.30769778 7.90699755]
Start training ...
Step: 27, L: 296.55328369140625, J: 34.09370422363281
Current x: (-1.8784222673433921e-06, 0.008510181712377041, 0.0007247332096099855), u: [0. 0.], noise: [7.14868198 7.85348298]
Start training ...
Step: 27, L: 333.30145263671875, J: 34.82958221435547
Current x: (-3.8002727788373895e-06, 0.011780271497493805, 0.0008705803394317629), u: [0. 0.], noise: [8.07524063 7.49120837]
Start training ...
Step: 27, L: 300.2684631347656, J: 33.94977569580078
Current x: (-6.809379902762989e-06, 0.015569577372694888, 0.0009459473609924318), u: [0. 0.], noise: [7.48475945 6.50045724]
Start training ...
Step: 27, L: 371.13739013671875, J: 34.31636428833008
Current x: (-1.1173671398340193e-05, 0.019934527669899355, 0.0010797176361083986), u: [0. 0.], noise: [7.18341806 8.43216901]
Start training ...
Step: 27

Step: 27, L: 241.7265625, J: 33.2581787109375
Current x: (0.007743662739234179, 0.7429317153701303, -0.028825868034362802), u: [0. 0.], noise: [6.77272987 8.8475289 ]
Start training ...
Step: 27, L: 292.66455078125, J: 31.09780502319336
Current x: (0.008493524764522439, 0.7712165488652672, -0.02947958812713624), u: [0. 0.], noise: [8.03442546 8.49117725]
Start training ...
Step: 27, L: 202.20236206054688, J: 31.856006622314453
Current x: (0.009288407304917182, 0.8000817592708857, -0.030340788078308114), u: [0. 0.], noise: [9.12651394 8.06917158]
Start training ...
Step: 27, L: 334.6394348144531, J: 34.513404846191406
Current x: (0.010131999590105904, 0.8296178120777012, -0.03124766321182252), u: [0. 0.], noise: [8.23572071 7.49242055]
Start training ...
Step: 27, L: 297.6820373535156, J: 33.224185943603516
Current x: (0.011027756938404784, 0.8598916420932735, -0.03204880418777467), u: [0. 0.], noise: [7.21699568 7.06955823]
Start training ...
Step: 27, L: 279.64508056640625, J: 34.2308

Step: 28, L: 270.55126953125, J: 34.629310607910156
Current x: (2.0238728427558986e-05, 0.042657472004340224, -0.0009701964855194091), u: [0. 0.], noise: [7.54602288 7.74180218]
Start training ...
Step: 28, L: 286.5614013671875, J: 35.51126480102539
Current x: (2.7850389668537376e-05, 0.04979204287379237, -0.001063614225387573), u: [0. 0.], noise: [8.27046699 5.77818972]
Start training ...
Step: 28, L: 296.7882080078125, J: 34.501216888427734
Current x: (3.6945270049625856e-05, 0.05747439548681119, -0.001176609897613525), u: [0. 0.], noise: [8.18097555 7.47783512]
Start training ...
Step: 28, L: 252.31398010253906, J: 32.47050094604492
Current x: (4.7534385235211755e-05, 0.06558061295154755, -0.001040377855300903), u: [0. 0.], noise: [6.98039377 8.09756361]
Start training ...
Step: 28, L: 221.95111083984375, J: 32.798118591308594
Current x: (5.996593126340427e-05, 0.07427171048929399, -0.0008338317394256587), u: [0. 0.], noise: [7.42253014 6.76414368]
Start training ...
Step: 28, L: 35

Step: 28, L: 297.66680908203125, J: 35.60304260253906
Current x: (-0.004846141102505582, 0.988152999644191, 0.03626488661766054), u: [0. 0.], noise: [7.01953084 9.48104953]
Start training ...
Step: 28, L: 172.05648803710938, J: 33.2790412902832
Current x: (-0.005492906924648202, 1.0208517692949561, 0.03789936771392824), u: [0. 0.], noise: [6.89767527 6.61362221]
Start training ...
Step: 28, L: 196.78341674804688, J: 35.408226013183594
Current x: (-0.006199498797167101, 1.0542185120177494, 0.039287696933746355), u: [0. 0.], noise: [7.2632193  7.51527766]
Start training ...
Step: 28, L: 327.9910888671875, J: 35.46392059326172
Current x: (-0.0069572853741291536, 1.087954414223536, 0.04070443143844606), u: [0. 0.], noise: [8.02346855 8.75660717]
Start training ...
Step: 28, L: 349.67608642578125, J: 34.559417724609375
Current x: (-0.007773118329608433, 1.1221860257979612, 0.04209596009254458), u: [0. 0.], noise: [7.11983896 7.77415859]
Start training ...
Step: 28, L: 280.8826904296875, J: 

Step: 29, L: 232.60496520996094, J: 34.551429748535156
Current x: (-0.0003765850858931966, 0.11909612336972272, 0.009162150859832765), u: [0. 0.], noise: [6.77524536 7.12182309]
Start training ...
Step: 29, L: 250.12059020996094, J: 31.101390838623047
Current x: (-0.0004629255042025606, 0.1304296255816955, 0.009945178604125977), u: [0. 0.], noise: [7.60559957 6.27259706]
Start training ...
Step: 29, L: 352.0933532714844, J: 35.42523193359375
Current x: (-0.0005619984477457567, 0.1421717762669699, 0.01069354853630066), u: [0. 0.], noise: [7.58065494 6.81677665]
Start training ...
Step: 29, L: 222.2369384765625, J: 32.56664276123047
Current x: (-0.0006748732782747817, 0.1543206779921939, 0.01157521872520447), u: [0. 0.], noise: [7.0742024  6.64059785]
Start training ...
Step: 29, L: 287.25042724609375, J: 34.80455017089844
Current x: (-0.0008031437795179635, 0.1669282406321845, 0.012533276748657228), u: [0. 0.], noise: [7.86448037 6.72837687]
Start training ...
Step: 29, L: 214.809539794

Step: 29, L: 193.84481811523438, J: 34.831939697265625
Current x: (-0.043990522775624, 1.0988167330354048, 0.08089866085052487), u: [0. 0.], noise: [7.37874048 6.9613311 ]
Start training ...
Step: 29, L: 202.231201171875, J: 35.2991943359375
Current x: (-0.04675589105798829, 1.132893919228697, 0.08251265397071834), u: [0. 0.], noise: [7.15444538 7.76615677]
Start training ...
Step: 29, L: 221.0909423828125, J: 34.9484748840332
Current x: (-0.0496371420946688, 1.1674194225565278, 0.08416838803291317), u: [0. 0.], noise: [9.23442805 7.64202401]
Start training ...
Step: 29, L: 129.52496337890625, J: 33.01799011230469
Current x: (-0.05264136732448959, 1.2024509097248652, 0.08576295094490047), u: [0. 0.], noise: [6.2752889  7.97933815]
Start training ...
Step: 29, L: 346.06854248046875, J: 35.526390075683594
Current x: (-0.05578747128459165, 1.2381830678537065, 0.08751675429344173), u: [0. 0.], noise: [6.71596745 8.30106224]
Start training ...
Step: 29, L: 305.43572998046875, J: 37.12722778

Step: 30, L: 244.27178955078125, J: 37.15117645263672
Current x: (-1.4281958147231794e-05, 0.08977946939377944, 0.0011486220836639405), u: [0. 0.], noise: [7.3614876  8.51643996]
Start training ...
Step: 30, L: 242.88294982910156, J: 34.96586608886719
Current x: (-2.183923579448623e-05, 0.09923868696757061, 0.0011632004261016846), u: [0. 0.], noise: [7.09370346 7.63590651]
Start training ...
Step: 30, L: 194.27359008789062, J: 35.43247604370117
Current x: (-3.1220286889823586e-05, 0.10930469627196358, 0.00106228346824646), u: [0. 0.], noise: [8.52724522 5.83318206]
Start training ...
Step: 30, L: 239.56649780273438, J: 35.77764892578125
Current x: (-4.231468651316508e-05, 0.11986266562418409, 0.0009071461677551271), u: [0. 0.], noise: [7.9595198  7.86022555]
Start training ...
Step: 30, L: 294.02294921875, J: 36.97031021118164
Current x: (-5.493457036048684e-05, 0.13087567695180072, 0.0010214152336120608), u: [0. 0.], noise: [7.72669313 6.6835214 ]
Start training ...
Step: 30, L: 196.1

Step: 30, L: 222.35733032226562, J: 35.601966857910156
Current x: (-0.0038003323035706504, 1.0366050946333203, 0.0022303493499755867), u: [0. 0.], noise: [8.79101245 7.29427328]
Start training ...
Step: 30, L: 295.9697265625, J: 36.08151626586914
Current x: (-0.003998674286763875, 1.0682271545874846, 0.0019054292201995857), u: [0. 0.], noise: [7.53758126 9.71091266]
Start training ...
Step: 30, L: 260.48089599609375, J: 37.69850158691406
Current x: (-0.004200603847518769, 1.1004767390595485, 0.0017301830291748055), u: [0. 0.], noise: [6.18588163 8.10124778]
Start training ...
Step: 30, L: 277.8359069824219, J: 35.11195755004883
Current x: (-0.004405819984578659, 1.1334701697199001, 0.00133760371208191), u: [0. 0.], noise: [8.56740039 7.42558057]
Start training ...
Step: 30, L: 214.00074768066406, J: 33.641441345214844
Current x: (-0.00461350805528797, 1.1669113111820184, 0.0007534877777099618), u: [0. 0.], noise: [7.75116873 5.66473513]
Start training ...
Step: 30, L: 175.8347778320312

Step: 31, L: 279.6788635253906, J: 36.21099853515625
Current x: (0.0001706094942536067, 0.06345840202530435, -0.0036911991119384763), u: [0. 0.], noise: [8.06534562 7.05130613]
Start training ...
Step: 31, L: 277.9830322265625, J: 33.30720901489258
Current x: (0.00020606805491083728, 0.07127024904312086, -0.003911394929885864), u: [0. 0.], noise: [7.52492568 6.37974466]
Start training ...
Step: 31, L: 263.94586181640625, J: 37.315032958984375
Current x: (0.0002471064599693569, 0.07961275091627074, -0.004030186796188354), u: [0. 0.], noise: [7.66263868 7.16319316]
Start training ...
Step: 31, L: 199.8275909423828, J: 35.35540771484375
Current x: (0.00029358351701395453, 0.08836470922458237, -0.004034460544586181), u: [0. 0.], noise: [6.68400128 7.21330406]
Start training ...
Step: 31, L: 259.2115478515625, J: 35.623104095458984
Current x: (0.0003460356452683755, 0.09761823872924548, -0.003988789749145507), u: [0. 0.], noise: [8.63643439 7.36426213]
Start training ...
Step: 31, L: 263.88

Step: 31, L: 308.299072265625, J: 35.05315017700195
Current x: (0.00010316193191100772, 0.8930509351070408, 0.018429084920883167), u: [0. 0.], noise: [5.27028732 6.95891716]
Start training ...
Step: 31, L: 246.34527587890625, J: 37.03520202636719
Current x: (-0.0002120696352615679, 0.9224714075252043, 0.019224899339675892), u: [0. 0.], noise: [7.79923047 8.10400138]
Start training ...
Step: 31, L: 263.38916015625, J: 35.87373733520508
Current x: (-0.0005498372309581562, 0.9521335926960842, 0.019851850795745838), u: [0. 0.], noise: [8.37669499 6.35249578]
Start training ...
Step: 31, L: 241.1661376953125, J: 36.547821044921875
Current x: (-0.0009181767460683594, 0.9824048071489548, 0.02044832520484923), u: [0. 0.], noise: [7.31900031 7.24328283]
Start training ...
Step: 31, L: 247.9571533203125, J: 36.84320068359375
Current x: (-0.0013157545105069814, 1.013167650458254, 0.021247219514846788), u: [0. 0.], noise: [6.35124432 7.14467916]
Start training ...
Step: 31, L: 339.63433837890625, 

Step: 32, L: 288.41253662109375, J: 36.939125061035156
Current x: (-4.9001858167428104e-05, 0.04374590071100485, 0.002805367469787598), u: [0. 0.], noise: [8.2695004  7.12765142]
Start training ...
Step: 32, L: 324.7308349609375, J: 37.90448760986328
Current x: (-6.471458443784787e-05, 0.05055642505743094, 0.0034536682605743416), u: [0. 0.], noise: [7.59980728 7.18955747]
Start training ...
Step: 32, L: 307.9266052246094, J: 37.68198013305664
Current x: (-8.474677196246179e-05, 0.05792565853971715, 0.004216154003143312), u: [0. 0.], noise: [8.62404268 8.56748164]
Start training ...
Step: 32, L: 373.5335998535156, J: 35.90576934814453
Current x: (-0.00010988670531849088, 0.0657928196832406, 0.005019664716720582), u: [0. 0.], noise: [7.82389588 5.78928039]
Start training ...
Step: 32, L: 274.781494140625, J: 36.01245880126953
Current x: (-0.00014227482868700797, 0.07439811799756027, 0.005828831481933595), u: [0. 0.], noise: [6.33534543 6.52141909]
Start training ...
Step: 32, L: 193.4090

Step: 32, L: 241.2453155517578, J: 35.306129455566406
Current x: (-0.028355843582796584, 0.8789852483849789, 0.04783978199958802), u: [0. 0.], noise: [8.13654115 6.23567856]
Start training ...
Step: 32, L: 184.68589782714844, J: 33.81264877319336
Current x: (-0.03010150854155147, 0.908067762399955, 0.04877329063415528), u: [0. 0.], noise: [6.65555792 6.4845228 ]
Start training ...
Step: 32, L: 335.70269775390625, J: 34.94694900512695
Current x: (-0.031915903664169964, 0.9376048540872204, 0.049896885538101204), u: [0. 0.], noise: [7.79702497 6.46109041]
Start training ...
Step: 32, L: 163.83795166015625, J: 35.04078674316406
Current x: (-0.033794361881429674, 0.9674733913213078, 0.051037583971023565), u: [0. 0.], noise: [4.87423115 7.67568958]
Start training ...
Step: 32, L: 318.7925720214844, J: 36.69384765625
Current x: (-0.035743934138403384, 0.9977849655794983, 0.05231187586784363), u: [0. 0.], noise: [6.75287501 7.39468102]
Start training ...
Step: 32, L: 287.3453369140625, J: 39.3

Step: 33, L: 175.59390258789062, J: 38.52595520019531
Current x: (6.727924679776022e-05, 0.047523518115649595, -0.0031184504508972176), u: [0. 0.], noise: [7.28383756 7.28973704]
Start training ...
Step: 33, L: 210.81430053710938, J: 35.74541091918945
Current x: (8.963193204685931e-05, 0.05450007167293674, -0.0034576120376586922), u: [0. 0.], noise: [7.22373323 8.00782923]
Start training ...
Step: 33, L: 229.10745239257812, J: 38.36977767944336
Current x: (0.00011652930708914674, 0.06195297564600681, -0.003797363567352296), u: [0. 0.], noise: [6.07390171 7.8568491 ]
Start training ...
Step: 33, L: 175.17279052734375, J: 34.24134826660156
Current x: (0.00014869315539244835, 0.06994802687116866, -0.004215524721145631), u: [0. 0.], noise: [7.93670086 8.2315149 ]
Start training ...
Step: 33, L: 363.0362243652344, J: 38.05470275878906
Current x: (0.00018614700355529585, 0.07835514313697836, -0.004811980628967287), u: [0. 0.], noise: [7.83461833 6.24577278]
Start training ...
Step: 33, L: 24

Step: 33, L: 307.5727233886719, J: 36.38648986816406
Current x: (0.014323748554663805, 0.7670458240106807, -0.03362951059341432), u: [0. 0.], noise: [7.28414171 6.33194565]
Start training ...
Step: 33, L: 263.95416259765625, J: 37.73685836791992
Current x: (0.01529008664476767, 0.7939794801803097, -0.034726381921768204), u: [0. 0.], noise: [7.94306115 7.93479795]
Start training ...
Step: 33, L: 301.7655029296875, J: 38.319095611572266
Current x: (0.016302206338393643, 0.8212929751669035, -0.035728033638000506), u: [0. 0.], noise: [7.69882941 7.70785561]
Start training ...
Step: 33, L: 280.21600341796875, J: 36.37886047363281
Current x: (0.01736945301063304, 0.8492122987885986, -0.03672885899543764), u: [0. 0.], noise: [7.0214201  5.74800299]
Start training ...
Step: 33, L: 266.83087158203125, J: 36.58052444458008
Current x: (0.01849173302840227, 0.8776903076766407, -0.037730587005615254), u: [0. 0.], noise: [7.84358426 6.45436554]
Start training ...
Step: 33, L: 212.80038452148438, J: 

Current x: (-1.2075920652708672e-05, 0.017764993628591085, 0.0006185331344604492), u: [0. 0.], noise: [5.92823727 7.18930373]
Start training ...
Step: 34, L: 218.78968811035156, J: 34.49635314941406
Current x: (-1.8137189397477234e-05, 0.022123524778168327, 0.00032888584136962887), u: [0. 0.], noise: [5.73598617 7.49134587]
Start training ...
Step: 34, L: 146.48974609375, J: 34.854705810546875
Current x: (-2.5009821484995158e-05, 0.026812809808135146, -8.686809539794923e-05), u: [0. 0.], noise: [6.11053573 7.76011866]
Start training ...
Step: 34, L: 290.3042907714844, J: 38.751861572265625
Current x: (-3.2317481789847976e-05, 0.03184382797808199, -0.0006781579971313476), u: [0. 0.], noise: [6.31562672 7.30269276]
Start training ...
Step: 34, L: 217.66879272460938, J: 40.78288650512695
Current x: (-3.950465036443992e-05, 0.0372809115534094, -0.0014344061851501466), u: [0. 0.], noise: [6.77099176 6.31380188]
Start training ...
Step: 34, L: 304.1652526855469, J: 37.943206787109375
Current

Start training ...
Step: 34, L: 249.14236450195312, J: 38.77920913696289
Current x: (0.012001031081861171, 0.6029629841251318, -0.030531100082397455), u: [0. 0.], noise: [6.73982674 6.88297474]
Start training ...
Step: 34, L: 196.53732299804688, J: 36.153079986572266
Current x: (0.01292150044155961, 0.6260578251156903, -0.031199390316009514), u: [0. 0.], noise: [7.01149983 6.01490367]
Start training ...
Step: 34, L: 205.38214111328125, J: 37.65896224975586
Current x: (0.013883555249468072, 0.6495333113148937, -0.03188199534416198), u: [0. 0.], noise: [7.84778156 6.00845183]
Start training ...
Step: 34, L: 265.7569885253906, J: 36.725486755371094
Current x: (0.014886245048758118, 0.6733298039119067, -0.03246494073867797), u: [0. 0.], noise: [7.92590544 6.22925503]
Start training ...
Step: 34, L: 243.2444305419922, J: 39.96409606933594
Current x: (0.01593310380198834, 0.6975302157117612, -0.03286395316123961), u: [0. 0.], noise: [7.6056548  7.66047023]
Start training ...
Step: 34, L: 232

Step: 35, L: 236.0602569580078, J: 38.39849853515625
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.97844244 6.37822595]
Start training ...
Step: 35, L: 321.26861572265625, J: 39.718021392822266
Current x: (0.0, 0.0004259360733032226, 3.6404085159301756e-05), u: [0. 0.], noise: [6.6249694  8.10498507]
Start training ...
Step: 35, L: 202.27450561523438, J: 36.65766906738281
Current x: (0.0, 0.001206538993835449, 0.00013282985687255858), u: [0. 0.], noise: [7.77455744 6.94227017]
Start training ...
Step: 35, L: 289.8829345703125, J: 39.41880798339844
Current x: (-5.3623052588447824e-08, 0.002479137385345972, 8.125405311584471e-05), u: [0. 0.], noise: [6.68592072 6.79102881]
Start training ...
Step: 35, L: 355.6666259765625, J: 39.75743865966797
Current x: (-3.027295122194818e-07, 0.0042424185031312905, 0.00011290698051452636), u: [0. 0.], noise: [7.0208155  6.04889508]
Start training ...
Step: 35, L: 296.71563720703125, J: 37.45410919189453
Current x: (-6.613416503395035e-07, 0.006372

Step: 35, L: 220.9320068359375, J: 38.625572204589844
Current x: (4.414450799904595e-05, 0.4250644030071289, -0.008942476415634159), u: [0. 0.], noise: [7.22718236 6.38135085]
Start training ...
Step: 35, L: 247.50851440429688, J: 38.296531677246094
Current x: (0.00012306265169605466, 0.4439619000902668, -0.009658527898788455), u: [0. 0.], noise: [5.5779713  6.06462981]
Start training ...
Step: 35, L: 251.62916564941406, J: 36.6422004699707
Current x: (0.0002141500325089651, 0.4632391961474095, -0.010289996242523197), u: [0. 0.], noise: [7.33912841 6.83558313]
Start training ...
Step: 35, L: 298.72808837890625, J: 38.46408462524414
Current x: (0.0003164822771564316, 0.48269969800103285, -0.010970130443573001), u: [0. 0.], noise: [7.36226481 5.91447722]
Start training ...
Step: 35, L: 244.1446990966797, J: 37.46039581298828
Current x: (0.00043340003693211315, 0.5025965959342878, -0.011599910116195681), u: [0. 0.], noise: [7.31739409 7.63671332]
Start training ...
Step: 35, L: 212.219909

Step: 35, L: 264.9454650878906, J: 39.79884338378906
Current x: (0.030643725711854024, 1.7657415867796225, -0.03865428361892702), u: [0. 0.], noise: [5.69912354 6.86926476]
Start training ...
Step: 35, L: 181.7095947265625, J: 32.864830017089844
Current x: (0.03221608390558255, 1.8039719544036346, -0.0396376443386078), u: [0. 0.], noise: [7.97774422 6.50470536]
Start training ...
Step: 35, L: 218.36045837402344, J: 37.856300354003906
Current x: (0.03383701220538357, 1.8424772219879415, -0.04073801918029787), u: [0. 0.], noise: [7.01343501 5.53473387]
Start training ...
Step: 35, L: 175.85427856445312, J: 38.16630554199219
Current x: (0.035515330492819755, 1.881448596974503, -0.04169109015464784), u: [0. 0.], noise: [7.13104476 7.24026197]
Start training ...
Step: 35, L: 339.9693603515625, J: 36.61969757080078
Current x: (0.0372447533937091, 1.920692747684954, -0.04249629101753236), u: [0. 0.], noise: [5.92055708 6.72689332]
Start training ...
Step: 35, L: 264.27734375, J: 38.0146980285

Step: 36, L: 154.79507446289062, J: 36.61396026611328
Current x: (0.006578474511337875, 0.3403748477224973, -0.012973943853378295), u: [0. 0.], noise: [7.70989969 7.27294087]
Start training ...
Step: 36, L: 301.2256774902344, J: 39.198368072509766
Current x: (0.007059517556543115, 0.35689545505533976, -0.012726102781295776), u: [0. 0.], noise: [7.48178831 6.98549773]
Start training ...
Step: 36, L: 298.1087951660156, J: 36.051944732666016
Current x: (0.0075599987108512, 0.3739332204411153, -0.012434565830230712), u: [0. 0.], noise: [6.48455765 6.82255178]
Start training ...
Step: 36, L: 168.83059692382812, J: 39.00226593017578
Current x: (0.00807889058520268, 0.3914365972879445, -0.01209339985847473), u: [0. 0.], noise: [7.1517597  6.10665074]
Start training ...
Step: 36, L: 247.32357788085938, J: 38.93677520751953
Current x: (0.008614328846472974, 0.4092895822431558, -0.01178603329658508), u: [0. 0.], noise: [6.69649305 5.97659955]
Start training ...
Step: 36, L: 188.86138916015625, J

Step: 36, L: 147.18228149414062, J: 38.385650634765625
Current x: (0.031875055694850114, 1.4601857264123308, 0.03731269917488098), u: [0. 0.], noise: [7.46832531 7.11181685]
Start training ...
Step: 36, L: 243.7030029296875, J: 39.965850830078125
Current x: (0.03201454454795844, 1.4949310118405832, 0.038901028442382815), u: [0. 0.], noise: [6.29529784 6.38250837]
Start training ...
Step: 36, L: 241.30130004882812, J: 41.203277587890625
Current x: (0.03209964357839264, 1.5301522966376642, 0.04052500853538513), u: [0. 0.], noise: [7.61468183 7.12328879]
Start training ...
Step: 36, L: 264.1239929199219, J: 37.23945617675781
Current x: (0.0321354370778248, 1.565659402885913, 0.042140267562866214), u: [0. 0.], noise: [7.52784501 7.02155523]
Start training ...
Step: 36, L: 168.90155029296875, J: 35.35853576660156
Current x: (0.03211152128614968, 1.601658096144009, 0.043804665899276736), u: [0. 0.], noise: [6.73980455 6.3125106 ]
Start training ...
Step: 36, L: 205.87350463867188, J: 39.7821

Step: 37, L: 305.67999267578125, J: 39.82728576660156
Current x: (0.0009144962571053974, 0.17917178066583728, -0.01415161809921265), u: [0. 0.], noise: [6.44220674 6.27420715]
Start training ...
Step: 37, L: 214.45108032226562, J: 39.344459533691406
Current x: (0.0010773250381120318, 0.19099275757951747, -0.014753080844879154), u: [0. 0.], noise: [6.46439975 5.48811957]
Start training ...
Step: 37, L: 196.08221435546875, J: 39.71954345703125
Current x: (0.0012581490012019908, 0.20310424851039285, -0.01533774361610413), u: [0. 0.], noise: [6.906179   7.12676753]
Start training ...
Step: 37, L: 328.9202880859375, J: 40.61427688598633
Current x: (0.001456605973153952, 0.21542986130998726, -0.015824778366088872), u: [0. 0.], noise: [5.16373725 7.07269841]
Start training ...
Step: 37, L: 296.19866943359375, J: 38.397544860839844
Current x: (0.0016765854749256066, 0.2281776037114846, -0.016333871984481815), u: [0. 0.], noise: [7.65205264 7.05562196]
Start training ...
Step: 37, L: 217.244781

Step: 37, L: 320.863037109375, J: 39.91649627685547
Current x: (0.04060822926074197, 1.0742462570506492, -0.05625412712097168), u: [0. 0.], noise: [7.90296434 6.30672866]
Start training ...
Step: 37, L: 307.01617431640625, J: 41.30976867675781
Current x: (0.042755292651169556, 1.1039892767974102, -0.057129866313934324), u: [0. 0.], noise: [6.62174765 6.87977005]
Start training ...
Step: 37, L: 127.11897277832031, J: 34.550682067871094
Current x: (0.0449822492760387, 1.1341700180848018, -0.05784598197937012), u: [0. 0.], noise: [6.7878533  7.08599088]
Start training ...
Step: 37, L: 201.2353515625, J: 41.249794006347656
Current x: (0.04728629794241835, 1.16471770846899, -0.05858789992332458), u: [0. 0.], noise: [7.45388818 6.32565239]
Start training ...
Step: 37, L: 262.4682922363281, J: 41.032676696777344
Current x: (0.049670556472703874, 1.1956694627115647, -0.05935963163375854), u: [0. 0.], noise: [7.9882454 6.1044488]
Start training ...
Step: 37, L: 152.11190795898438, J: 37.5359992

Step: 38, L: 277.7953186035156, J: 37.44567108154297
Current x: (5.7018288237649455e-05, 0.06297845084915422, -0.0022541674137115486), u: [0. 0.], noise: [7.49713534 7.78731288]
Start training ...
Step: 38, L: 224.50814819335938, J: 38.28706359863281
Current x: (7.548639499444143e-05, 0.07026944859493149, -0.002452566623687745), u: [0. 0.], noise: [5.99331393 6.93387546]
Start training ...
Step: 38, L: 142.58755493164062, J: 37.196842193603516
Current x: (9.739986943582365e-05, 0.07810788731985506, -0.002679983615875245), u: [0. 0.], noise: [6.06540954 6.65807801]
Start training ...
Step: 38, L: 268.63421630859375, J: 41.47035217285156
Current x: (0.00012248381989563167, 0.08625804104420395, -0.003001456785202027), u: [0. 0.], noise: [6.55605022 6.88341   ]
Start training ...
Step: 38, L: 263.8892822265625, J: 40.295738220214844
Current x: (0.00015097764017219648, 0.09469953898475288, -0.0033821968078613287), u: [0. 0.], noise: [5.96639056 6.55910314]
Start training ...
Step: 38, L: 20

Current x: (0.011755775958148557, 0.807813119179513, -0.020275270986557015), u: [0. 0.], noise: [6.48409672 5.6963281 ]
Start training ...
Step: 38, L: 190.93927001953125, J: 37.491085052490234
Current x: (0.012461652051039199, 0.833930034014515, -0.02070687332153321), u: [0. 0.], noise: [7.06409159 5.91232131]
Start training ...
Step: 38, L: 304.72137451171875, J: 39.95677185058594
Current x: (0.013192222593055058, 0.8602837409666414, -0.021059698820114145), u: [0. 0.], noise: [7.3975084  6.80105792]
Start training ...
Step: 38, L: 200.01254272460938, J: 38.58185577392578
Current x: (0.013949661308455921, 0.8869538110082096, -0.021297347259521492), u: [0. 0.], noise: [5.96266813 7.03746163]
Start training ...
Step: 38, L: 261.33416748046875, J: 40.584232330322266
Current x: (0.014736999566897044, 0.9140624228441322, -0.021475350618362435), u: [0. 0.], noise: [6.5969079  6.70867972]
Start training ...
Step: 38, L: 284.1988525390625, J: 41.443016052246094
Current x: (0.01555202256003094

Step: 39, L: 203.6962890625, J: 40.00353240966797
Current x: (-2.473251523330154e-05, 0.03489136839110005, 0.0026793116569519037), u: [0. 0.], noise: [6.45831569 6.63317473]
Start training ...
Step: 39, L: 301.76007080078125, J: 42.16126251220703
Current x: (-3.702100567687902e-05, 0.04019783524970367, 0.0031524235248565667), u: [0. 0.], noise: [5.71912301 6.41941001]
Start training ...
Step: 39, L: 204.50311279296875, J: 39.45021057128906
Current x: (-5.2817110299903115e-05, 0.04583244648386314, 0.0036080494880676266), u: [0. 0.], noise: [6.63594539 7.80697658]
Start training ...
Step: 39, L: 218.4906463623047, J: 40.22486877441406
Current x: (-7.243978816968349e-05, 0.05169990495038285, 0.003993646717071533), u: [0. 0.], noise: [6.21667322 7.64997576]
Start training ...
Step: 39, L: 148.28634643554688, J: 40.70611572265625
Current x: (-9.727353267963622e-05, 0.058030646275215494, 0.004262140798568726), u: [0. 0.], noise: [5.60682522 7.85618881]
Start training ...
Step: 39, L: 297.354

Step: 39, L: 303.0177917480469, J: 40.280582427978516
Current x: (-0.0006909803429898489, 0.6526211723671744, -0.015237786531448359), u: [0. 0.], noise: [5.85695472 5.2234996 ]
Start training ...
Step: 39, L: 229.97076416015625, J: 39.1579475402832
Current x: (-0.0004763862277017311, 0.6741269994920347, -0.015763217544555657), u: [0. 0.], noise: [5.30164134 6.77729342]
Start training ...
Step: 39, L: 268.47796630859375, J: 40.39447021484375
Current x: (-0.00024490860671688643, 0.6957597433680461, -0.016225303077697747), u: [0. 0.], noise: [7.31517034 6.57038247]
Start training ...
Step: 39, L: 231.44528198242188, J: 42.01816177368164
Current x: (5.608513249240571e-06, 0.7176192306460175, -0.01683495378494262), u: [0. 0.], noise: [5.91121588 7.07979859]
Start training ...
Step: 39, L: 305.3109130859375, J: 36.52947998046875
Current x: (0.0002786543758638786, 0.7398860904881389, -0.017370125722885127), u: [0. 0.], noise: [6.76445969 5.65837202]
Start training ...
Step: 39, L: 206.1978759

Step: 39, L: 163.5236358642578, J: 39.0640754699707
Current x: (0.036343913028802945, 1.8883625634574908, -0.04001056594848633), u: [0. 0.], noise: [6.96612576 6.84529179]
Start training ...
Step: 39, L: 197.34747314453125, J: 39.341217041015625
Current x: (0.03815366712287639, 1.9254634042444583, -0.040772012758255004), u: [0. 0.], noise: [7.43222536 6.65278237]
Start training ...
Step: 39, L: 297.4264831542969, J: 42.81324768066406
Current x: (0.0400186667376649, 1.962963281439654, -0.04152137613296509), u: [0. 0.], noise: [6.46129652 6.99366064]
Start training ...
Step: 39, L: 193.90054321289062, J: 38.87797164916992
Simulation ends in 107 steps
Episode 40 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.42924148 7.18825811]
Start training ...
Step: 40, L: 172.35256958007812, J: 42.15230941772461
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.74739696 7.34718458]
Start training ...
Step: 40, L: 212.48193359375, J: 40.642547607421875
Current x: (0.0, 0.00038075003051757

Step: 40, L: 275.46905517578125, J: 40.5150032043457
Current x: (-0.0013688160913912666, 0.34920597428231315, 0.010411038398742675), u: [0. 0.], noise: [6.82419446 6.33128164]
Start training ...
Step: 40, L: 227.40679931640625, J: 40.02876281738281
Current x: (-0.0015268233933019109, 0.36482771337232045, 0.011194880247116089), u: [0. 0.], noise: [6.57012145 6.91210451]
Start training ...
Step: 40, L: 200.82997131347656, J: 41.472286224365234
Current x: (-0.0016985266639720524, 0.3807839287287009, 0.012028013372421265), u: [0. 0.], noise: [6.03011009 5.46723896]
Start training ...
Step: 40, L: 266.2806701660156, J: 41.266746520996094
Current x: (-0.0018853228092863157, 0.39710728214454605, 0.012826948165893555), u: [0. 0.], noise: [6.59033985 6.11724665]
Start training ...
Step: 40, L: 227.11578369140625, J: 38.31379699707031
Current x: (-0.0020859476476487743, 0.4135992872720761, 0.013682170057296753), u: [0. 0.], noise: [6.96180806 5.93500991]
Start training ...
Step: 40, L: 226.12045

Step: 40, L: 143.477783203125, J: 39.22361755371094
Current x: (-0.03628165984128361, 1.3838130981205234, 0.030702017259597786), u: [0. 0.], noise: [7.28880939 6.527424  ]
Start training ...
Step: 40, L: 218.7305908203125, J: 40.9571418762207
Current x: (-0.037943790411093296, 1.4151277554900645, 0.030442413425445566), u: [0. 0.], noise: [7.84381648 7.24274817]
Start training ...
Step: 40, L: 258.6740417480469, J: 39.460323333740234
Current x: (-0.039648332938577925, 1.4468423850102579, 0.030258948135375986), u: [0. 0.], noise: [7.92207491 5.38307282]
Start training ...
Step: 40, L: 200.49366760253906, J: 38.170352935791016
Current x: (-0.04139879551757731, 1.4790839720196405, 0.0301355896472931), u: [0. 0.], noise: [6.66541274 6.75728512]
Start training ...
Step: 40, L: 184.34555053710938, J: 41.27585983276367
Current x: (-0.043189511928977325, 1.511674464680339, 0.030266131353378304), u: [0. 0.], noise: [6.7444904 7.8430686]
Start training ...
Step: 40, L: 191.66995239257812, J: 42.6

Step: 41, L: 141.6674041748047, J: 38.72539138793945
Current x: (-8.719876537025977e-05, 0.12528077369880006, 0.005040721559524536), u: [0. 0.], noise: [7.46710476 6.56189992]
Start training ...
Step: 41, L: 218.15455627441406, J: 43.04350662231445
Current x: (-0.00012215043682549723, 0.13444304681103278, 0.005660822439193725), u: [0. 0.], noise: [6.08601277 6.21356001]
Start training ...
Step: 41, L: 138.86585998535156, J: 40.25361633300781
Current x: (-0.00016417370915549714, 0.14402720260530819, 0.006371443796157836), u: [0. 0.], noise: [5.96485815 7.08297073]
Start training ...
Step: 41, L: 130.4720458984375, J: 39.05731201171875
Current x: (-0.00021315951415148756, 0.1538602959871645, 0.007069310426712035), u: [0. 0.], noise: [5.81291817 6.58396749]
Start training ...
Step: 41, L: 228.299560546875, J: 42.07371139526367
Current x: (-0.000270458613606467, 0.16401714575251866, 0.0076553658008575425), u: [0. 0.], noise: [5.71442995 5.63297218]
Start training ...
Step: 41, L: 184.83746

Step: 41, L: 271.0567321777344, J: 44.222312927246094
Current x: (-0.021748184177331133, 0.8818023834685014, 0.02863912410736083), u: [0. 0.], noise: [8.21477173 5.79513855]
Start training ...
Step: 41, L: 140.63284301757812, J: 38.554901123046875
Current x: (-0.022951516745268526, 0.9066193647461216, 0.02878169484138488), u: [0. 0.], noise: [6.34220864 7.84733826]
Start training ...
Step: 41, L: 245.21270751953125, J: 38.73114013671875
Current x: (-0.024194966982661638, 0.9318557624799988, 0.02916622886657714), u: [0. 0.], noise: [7.13306826 6.48290997]
Start training ...
Step: 41, L: 248.29571533203125, J: 41.83308029174805
Current x: (-0.025479251501727196, 0.9575295271918978, 0.02940024995803832), u: [0. 0.], noise: [7.34142805 7.3057156 ]
Start training ...
Step: 41, L: 251.5118865966797, J: 42.81293869018555
Current x: (-0.026803243064404608, 0.9835833106344807, 0.02969928684234618), u: [0. 0.], noise: [5.78163542 6.36534674]
Start training ...
Step: 41, L: 187.58349609375, J: 42

Step: 42, L: 205.95111083984375, J: 41.04161834716797
Current x: (7.601792143647538e-05, 0.027050783510488445, -0.003545124053955077), u: [0. 0.], noise: [5.29188431 6.32705928]
Start training ...
Step: 42, L: 149.49310302734375, J: 39.427791595458984
Current x: (9.870688571900229e-05, 0.03122681258961315, -0.0040722207069396965), u: [0. 0.], noise: [6.94381238 7.05252838]
Start training ...
Step: 42, L: 171.35247802734375, J: 41.207313537597656
Current x: (0.00012551490121872879, 0.035583728784265516, -0.004702834844589232), u: [0. 0.], noise: [6.78431436 6.58230314]
Start training ...
Step: 42, L: 197.0164794921875, J: 40.622291564941406
Current x: (0.00015802251982871378, 0.040359267449045735, -0.005344320583343504), u: [0. 0.], noise: [5.96933394 6.40712739]
Start training ...
Step: 42, L: 212.89151000976562, J: 40.830177307128906
Current x: (0.0001968162145810175, 0.045490453052885634, -0.005965605211257933), u: [0. 0.], noise: [7.15215032 6.66279673]
Start training ...
Step: 42, 

Step: 42, L: 312.4157409667969, J: 43.67063522338867
Current x: (0.01996783634398924, 0.4796127500784774, -0.037674036121368396), u: [0. 0.], noise: [7.18222012 7.11487549]
Start training ...
Step: 42, L: 133.37069702148438, J: 42.752750396728516
Current x: (0.021238888333100208, 0.49655029431798386, -0.03828058462142943), u: [0. 0.], noise: [7.09984553 6.40428569]
Start training ...
Step: 42, L: 257.4750061035156, J: 41.183494567871094
Current x: (0.022563790510015058, 0.5139355335904116, -0.038880398654937726), u: [0. 0.], noise: [7.37750534 6.98216978]
Start training ...
Step: 42, L: 133.10452270507812, J: 41.062110900878906
Current x: (0.02394037466644471, 0.5316891966655412, -0.03941065673828123), u: [0. 0.], noise: [6.48655386 6.27431397]
Start training ...
Step: 42, L: 217.01901245117188, J: 42.12728500366211
Current x: (0.025372775744187916, 0.5498967419572544, -0.03990138125419615), u: [0. 0.], noise: [5.99074505 7.56108704]
Start training ...
Step: 42, L: 176.83172607421875, 

Step: 42, L: 202.33343505859375, J: 41.246376037597656
Current x: (0.14426817478525425, 1.6804423556175552, -0.0388932000160217), u: [0. 0.], noise: [6.81361322 7.13523292]
Start training ...
Step: 42, L: 275.46966552734375, J: 42.5724983215332
Current x: (0.1482488740175876, 1.7138848090666892, -0.03837029833793637), u: [0. 0.], noise: [7.50152844 7.98181303]
Start training ...
Step: 42, L: 212.89044189453125, J: 42.331321716308594
Current x: (0.15228381110233719, 1.7477400923227262, -0.0378795586109161), u: [0. 0.], noise: [7.34361653 5.75441587]
Start training ...
Step: 42, L: 163.16448974609375, J: 42.645328521728516
Current x: (0.15637814365230238, 1.78216157004978, -0.037436847352981535), u: [0. 0.], noise: [6.01136869 5.95118088]
Start training ...
Step: 42, L: 134.27906799316406, J: 45.682090759277344
Current x: (0.1605220791051444, 1.8169109113962185, -0.03683521604537961), u: [0.00175128 0.        ], noise: [6.48443368 5.57661725]
Start training ...
Step: 42, L: 177.601287841

Step: 43, L: 216.03945922851562, J: 42.38420867919922
Current x: (-0.0028764201389662857, 0.19642195325747272, 0.025535601377487196), u: [0. 0.], noise: [6.86613499 6.82994017]
Start training ...
Step: 43, L: 250.0507049560547, J: 41.43833923339844
Current x: (-0.0032484235857144095, 0.20732868410869715, 0.0265767632007599), u: [0. 0.], noise: [5.80756549 6.78500517]
Start training ...
Step: 43, L: 149.84820556640625, J: 41.12026596069336
Current x: (-0.0036553969840009805, 0.21862357599023222, 0.027621544504165662), u: [0. 0.], noise: [7.58106616 7.20505985]
Start training ...
Step: 43, L: 162.99978637695312, J: 41.654541015625
Current x: (-0.004095833421112469, 0.2301962803018171, 0.02856858186721803), u: [0. 0.], noise: [7.22608872 5.76314034]
Start training ...
Step: 43, L: 212.2996368408203, J: 41.20667266845703
Current x: (-0.00457710622919187, 0.2422660332094007, 0.029553219842910778), u: [0. 0.], noise: [6.53508691 5.63138109]
Start training ...
Step: 43, L: 179.26498413085938,

Step: 43, L: 139.70851135253906, J: 41.11015319824219
Current x: (-0.0815928126527923, 1.044006475087588, 0.08497276506423952), u: [0. 0.], noise: [7.17437808 5.2317018 ]
Start training ...
Step: 43, L: 187.80926513671875, J: 41.610347747802734
Current x: (-0.0852674454915654, 1.0693990880629949, 0.08607601428031923), u: [0. 0.], noise: [5.13105194 6.40636722]
Start training ...
Step: 43, L: 141.87222290039062, J: 45.16679382324219
Current x: (-0.08904736941095921, 1.0950468329322693, 0.08737353110313417), u: [0. 0.], noise: [7.36197292 6.87448689]
Start training ...
Step: 43, L: 293.9263000488281, J: 40.01591491699219
Current x: (-0.0929264802507643, 1.1208630482884545, 0.08854351639747621), u: [0. 0.], noise: [6.51563504 6.67409489]
Start training ...
Step: 43, L: 184.8292694091797, J: 42.161705017089844
Current x: (-0.09692982185967289, 1.1471164789220332, 0.08976225028038026), u: [0. 0.], noise: [5.25010509 5.69345354]
Start training ...
Step: 43, L: 168.71139526367188, J: 42.40661

Step: 44, L: 163.095947265625, J: 39.845184326171875
Current x: (1.2634755079975853e-05, 0.01646050789940838, -0.001588382863998413), u: [0. 0.], noise: [6.15826228 5.94514769]
Start training ...
Step: 44, L: 128.30606079101562, J: 41.253814697265625
Current x: (1.8990213811062093e-05, 0.02006245474619792, -0.0017608523845672607), u: [0. 0.], noise: [6.21661243 6.83831181]
Start training ...
Step: 44, L: 248.021728515625, J: 43.37699508666992
Current x: (2.7268156600765674e-05, 0.02389374104288186, -0.0019120104312896728), u: [0. 0.], noise: [7.21100144 5.65552614]
Start training ...
Step: 44, L: 169.3522491455078, J: 44.21894073486328
Current x: (3.7844877610088394e-05, 0.028049517716783767, -0.0021253384113311766), u: [0. 0.], noise: [6.63897248 6.65618828]
Start training ...
Step: 44, L: 170.9913330078125, J: 44.28339385986328
Current x: (5.088169061090036e-05, 0.032510944794561486, -0.0021831188678741453), u: [0. 0.], noise: [6.93087384 6.67641596]
Start training ...
Step: 44, L: 1

Step: 44, L: 258.92047119140625, J: 41.14664840698242
Current x: (0.004670380048566255, 0.4293957514903577, -0.015433712863922122), u: [0. 0.], noise: [6.66574142 6.39393667]
Start training ...
Step: 44, L: 147.3643798828125, J: 44.85187530517578
Current x: (0.005002164879224354, 0.44554994644327595, -0.01630416674613953), u: [0. 0.], noise: [6.1630299 7.607718 ]
Start training ...
Step: 44, L: 181.10513305664062, J: 44.500492095947266
Current x: (0.005354104841857825, 0.4620289536666167, -0.017147440147399908), u: [0. 0.], noise: [6.75074694 5.42494517]
Start training ...
Step: 44, L: 174.21102905273438, J: 42.073822021484375
Current x: (0.005728495867168433, 0.47890385267681107, -0.018135182380676275), u: [0. 0.], noise: [5.53462667 5.92221499]
Start training ...
Step: 44, L: 183.63143920898438, J: 44.00707244873047
Current x: (0.006123764063646136, 0.4960151418480593, -0.018990344476699835), u: [0. 0.], noise: [6.23685773 6.63331373]
Start training ...
Step: 44, L: 215.9222259521484

Step: 44, L: 254.1396484375, J: 45.11204147338867
Current x: (0.05094316739839603, 1.4761465095861712, -0.026372506189346324), u: [0. 0.], noise: [6.7428276  6.34506724]
Start training ...
Step: 44, L: 167.44180297851562, J: 42.77264404296875
Current x: (0.0527265655321895, 1.5059751368950987, -0.026722091674804698), u: [0. 0.], noise: [6.52780764 8.09256116]
Start training ...
Step: 44, L: 169.35433959960938, J: 42.40802001953125
Current x: (0.05454447572279514, 1.5361310985369017, -0.027031901121139536), u: [0. 0.], noise: [6.19456546 6.75678137]
Start training ...
Step: 44, L: 249.15167236328125, J: 43.78620910644531
Current x: (0.05640144994789663, 1.5667675751070407, -0.027498185873031628), u: [0. 0.], noise: [5.98477742 6.77970566]
Start training ...
Step: 44, L: 233.75474548339844, J: 41.596187591552734
Current x: (0.058293429863484805, 1.597717713248292, -0.028020692253112805), u: [0. 0.], noise: [7.12480063 5.65646829]
Start training ...
Step: 44, L: 119.55574035644531, J: 43.

Step: 45, L: 201.903564453125, J: 40.372684478759766
Current x: (0.0001222018851829657, 0.09307124355152838, 0.000184701299667358), u: [0. 0.], noise: [6.15322693 5.83150382]
Start training ...
Step: 45, L: 235.4537353515625, J: 43.9946174621582
Current x: (0.00013358383065826109, 0.10070269257273105, 0.0006900871753692622), u: [0. 0.], noise: [7.35490828 6.26815461]
Start training ...
Step: 45, L: 305.397705078125, J: 45.079795837402344
Current x: (0.00014474441660079074, 0.10855161464554303, 0.001227645349502563), u: [0. 0.], noise: [6.24582938 4.88415288]
Start training ...
Step: 45, L: 160.1708984375, J: 44.1157112121582
Current x: (0.00015496489250534649, 0.11678184270272239, 0.0018738789081573482), u: [0. 0.], noise: [5.9044169  6.24280701]
Start training ...
Step: 45, L: 197.54605102539062, J: 41.769840240478516
Current x: (0.00016381900168953848, 0.12514406812065768, 0.002656280136108398), u: [0. 0.], noise: [6.31160134 5.11297844]
Start training ...
Step: 45, L: 197.0899353027

Step: 45, L: 191.0704345703125, J: 44.56294250488281
Current x: (-0.012661312914153589, 0.6779551721161876, 0.04047197537422179), u: [0. 0.], noise: [6.73952116 5.1445481 ]
Start training ...
Step: 45, L: 206.25198364257812, J: 42.90631103515625
Current x: (-0.013672646731733101, 0.6983687505333824, 0.04138779807090758), u: [0. 0.], noise: [6.44001343 7.37862385]
Start training ...
Step: 45, L: 131.7572784423828, J: 42.11014175415039
Current x: (-0.014732064596595963, 0.7189887627338247, 0.042463118076324455), u: [0. 0.], noise: [6.13139794 5.84935098]
Start training ...
Step: 45, L: 134.2086944580078, J: 44.90692138671875
Current x: (-0.015848658434220387, 0.7400084553563246, 0.04344457702636718), u: [0. 0.], noise: [6.20422866 6.35701408]
Start training ...
Step: 45, L: 149.957275390625, J: 44.18258285522461
Current x: (-0.017016110981096295, 0.7612441429198802, 0.044454240655899036), u: [0. 0.], noise: [5.940258   6.39308915]
Start training ...
Step: 45, L: 220.20590209960938, J: 43

Current x: (-0.12060701343357877, 1.836372448075074, 0.08699801568984983), u: [0. 0.], noise: [6.16640022 6.21339983]
Start training ...
Step: 45, L: 113.65109252929688, J: 40.64856719970703
Current x: (-0.12499619419726905, 1.8688663346747174, 0.08855039830207823), u: [0. 0.], noise: [6.4411971  4.62360461]
Start training ...
Step: 45, L: 192.2391357421875, J: 42.15196990966797
Current x: (-0.1294929409631208, 1.9016125193862508, 0.09009808096885678), u: [0. 0.], noise: [5.90191559 5.50262014]
Start training ...
Step: 45, L: 101.50564575195312, J: 45.82733917236328
Current x: (-0.13408753899769765, 1.934479849097218, 0.09182752285003659), u: [0. 0.], noise: [7.27193153 5.83383481]
Start training ...
Step: 45, L: 122.42170715332031, J: 40.192161560058594
Current x: (-0.13878475074450056, 1.9675020065586333, 0.09359689426422116), u: [0. 0.], noise: [5.49117287 5.53056457]
Start training ...
Step: 45, L: 167.4183349609375, J: 43.700042724609375
Simulation ends in 118 steps
Episode 46 beg

Step: 46, L: 189.26882934570312, J: 46.561317443847656
Current x: (0.004212104204795773, 0.22404006162962967, -0.024974634122848505), u: [0. 0.], noise: [5.8502291  6.69417909]
Start training ...
Step: 46, L: 195.38043212890625, J: 42.517295837402344
Current x: (0.004618826854239308, 0.2349534704034913, -0.026505992126464838), u: [0. 0.], noise: [5.81532922 6.73273288]
Start training ...
Step: 46, L: 218.66744995117188, J: 41.71904754638672
Current x: (0.005056875448947242, 0.24613992885983016, -0.0281217451095581), u: [0. 0.], noise: [6.33445456 6.94134669]
Start training ...
Step: 46, L: 277.2201843261719, J: 45.198204040527344
Current x: (0.005528180030829163, 0.25759975268598084, -0.029829238462448112), u: [0. 0.], noise: [5.58805997 6.15850726]
Start training ...
Step: 46, L: 207.18551635742188, J: 43.55445861816406
Current x: (0.00603681356045429, 0.2694056316704567, -0.03159742102622985), u: [0. 0.], noise: [6.60649714 6.15961399]
Start training ...
Step: 46, L: 214.144989013671

Step: 46, L: 242.8200225830078, J: 45.36619186401367
Current x: (0.0937391291342014, 1.0476480920582267, -0.10091694650650024), u: [0. 0.], noise: [6.67005462 6.81743531]
Start training ...
Step: 46, L: 223.3419189453125, J: 45.996517181396484
Current x: (0.09803712147439009, 1.0724299253781975, -0.10202834892272948), u: [0. 0.], noise: [6.03890472 5.05012914]
Start training ...
Step: 46, L: 142.12710571289062, J: 46.41040802001953
Current x: (0.10247099452783141, 1.0975726455100594, -0.10315448942184446), u: [0. 0.], noise: [5.7983305  7.18519375]
Start training ...
Step: 46, L: 177.53549194335938, J: 41.689430236816406
Current x: (0.1070178109644361, 1.1228375022377604, -0.10418175234794615), u: [0. 0.], noise: [6.10305917 5.75254976]
Start training ...
Step: 46, L: 165.34033203125, J: 42.59051513671875
Current x: (0.1116983208865775, 1.1484128097350446, -0.10534770159721372), u: [0. 0.], noise: [6.80152017 6.09852208]
Start training ...
Step: 46, L: 150.1069793701172, J: 45.23177337

Step: 47, L: 158.44906616210938, J: 43.299686431884766
Current x: (1.0896147622751204e-06, 0.019494473015142923, -0.00020497140884399428), u: [0. 0.], noise: [6.74371688 5.91297319]
Start training ...
Step: 47, L: 188.0254669189453, J: 41.796390533447266
Current x: (3.17569824363167e-06, 0.023034508694765154, -0.00022408447265625015), u: [0. 0.], noise: [6.34824883 7.28377537]
Start training ...
Step: 47, L: 212.64154052734375, J: 46.233863830566406
Current x: (5.521207693479576e-06, 0.02685921340755335, -0.00016012320518493667), u: [0. 0.], noise: [7.16686017 6.01785513]
Start training ...
Step: 47, L: 183.54959106445312, J: 43.78148651123047
Current x: (8.172189648841515e-06, 0.031066120562617185, -0.00018971457481384293), u: [0. 0.], noise: [7.59704314 6.34703676]
Start training ...
Step: 47, L: 125.47984313964844, J: 45.18471145629883
Current x: (1.1034289490165563e-05, 0.03561049922787819, -0.00010440545082092302), u: [0. 0.], noise: [5.83293507 6.43481767]
Start training ...
Step

Step: 47, L: 198.25289916992188, J: 46.725013732910156
Current x: (-0.004100954031468574, 0.4517949995029633, 0.016076327753067016), u: [0. 0.], noise: [6.64720096 6.18133932]
Start training ...
Step: 47, L: 285.20513916015625, J: 46.189449310302734
Current x: (-0.00446885825033384, 0.4684766609070954, 0.016463329553604127), u: [0. 0.], noise: [6.85357619 5.85527786]
Start training ...
Step: 47, L: 182.89617919921875, J: 43.920265197753906
Current x: (-0.004857385163508215, 0.4854600106192609, 0.01689691753387451), u: [0. 0.], noise: [7.15649587 7.04450846]
Start training ...
Step: 47, L: 155.22506713867188, J: 43.755592346191406
Current x: (-0.005266834137806029, 0.5027330735716721, 0.017430335330963134), u: [0. 0.], noise: [6.96755706 6.47771702]
Start training ...
Step: 47, L: 149.7526397705078, J: 44.347511291503906
Current x: (-0.0057002772897160525, 0.5204450342082719, 0.017974951887130736), u: [0. 0.], noise: [6.63395017 7.03318777]
Start training ...
Step: 47, L: 156.0152435302

Current x: (-0.05500312820645614, 1.5391874922738977, 0.04499040737152098), u: [0. 0.], noise: [6.41310936 6.82702745]
Start training ...
Step: 47, L: 245.12054443359375, J: 46.850502014160156
Current x: (-0.05714055190221592, 1.5689543201823946, 0.04537090682983397), u: [0. 0.], noise: [6.16649031 6.77842763]
Start training ...
Step: 47, L: 121.41669464111328, J: 42.25403594970703
Current x: (-0.059337523420587346, 1.599062822034518, 0.04571001448631285), u: [0. 0.], noise: [6.01643865 5.65874483]
Start training ...
Step: 47, L: 156.58203125, J: 46.56282424926758
Current x: (-0.06159320705617608, 1.6294834835149972, 0.04598792839050291), u: [0. 0.], noise: [6.92606563 6.20913454]
Start training ...
Step: 47, L: 206.61767578125, J: 44.62364196777344
Current x: (-0.06390223938915224, 1.6600894438278888, 0.04630161166191099), u: [0. 0.], noise: [6.26470679 6.30044986]
Start training ...
Step: 47, L: 217.56564331054688, J: 46.85908126831055
Current x: (-0.06627165649835456, 1.691026535460

Step: 48, L: 122.74227905273438, J: 43.73219299316406
Current x: (-0.00024734374957282126, 0.13013773177842355, -0.000379943180084231), u: [0. 0.], noise: [5.70097771 6.19805723]
Start training ...
Step: 48, L: 245.86590576171875, J: 44.61872100830078
Current x: (-0.00026733641240970986, 0.13806072538670297, -0.0004957211971282985), u: [0. 0.], noise: [5.85592654 6.44962483]
Start training ...
Step: 48, L: 197.01300048828125, J: 46.791587829589844
Current x: (-0.0002868769795566848, 0.14619262235910926, -0.0006612071514129665), u: [0. 0.], noise: [6.50103814 6.90664822]
Start training ...
Step: 48, L: 175.57318115234375, J: 43.56477355957031
Current x: (-0.0003058075344551193, 0.1545740743332106, -0.000886062955856326), u: [0. 0.], noise: [6.57917329 6.35557821]
Start training ...
Step: 48, L: 143.2946014404297, J: 45.21528625488281
Current x: (-0.00032385156361600066, 0.1633152946375754, -0.0011514797687530547), u: [0. 0.], noise: [6.46033954 6.0111507 ]
Start training ...
Step: 48, L

Step: 48, L: 85.62274169921875, J: 47.626365661621094
Current x: (0.005124361765397863, 0.771503446536227, -0.01668617062568666), u: [0. 0.], noise: [5.45825055 7.88687274]
Start training ...
Step: 48, L: 249.91769409179688, J: 45.14290237426758
Current x: (0.0055544561251980795, 0.7919453698582857, -0.01725392179489137), u: [0. 0.], noise: [5.99287114 6.00611489]
Start training ...
Step: 48, L: 186.39553833007812, J: 46.42394256591797
Current x: (0.006006817352102579, 0.8127406197307634, -0.01806453518867494), u: [0. 0.], noise: [6.20699425 6.8453695 ]
Start training ...
Step: 48, L: 149.1702880859375, J: 45.58816146850586
Current x: (0.006479880508845725, 0.8337545896284914, -0.018876472949981703), u: [0. 0.], noise: [6.62243785 7.04042616]
Start training ...
Step: 48, L: 202.31568908691406, J: 48.30996322631836
Current x: (0.006976520871010918, 0.8550925829043686, -0.019752248239517226), u: [0. 0.], noise: [6.41677775 7.37960619]
Start training ...
Step: 48, L: 80.88038635253906, J:

Step: 48, L: 168.73162841796875, J: 43.916114807128906
Simulation ends in 126 steps
Episode 49 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.56504578 5.8513661 ]
Start training ...
Step: 49, L: 218.25787353515625, J: 47.23517608642578
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.31977655 6.48120074]
Start training ...
Step: 49, L: 220.9038543701172, J: 45.81865692138672
Current x: (0.0, 0.0002606412353515625, 7.136797904968262e-05), u: [0. 0.], noise: [6.65635596 5.48150575]
Start training ...
Step: 49, L: 205.32858276367188, J: 47.41444396972656
Current x: (0.0, 0.0007203802413940428, 2.659354209899902e-05), u: [0. 0.], noise: [6.33819342 6.37426458]
Start training ...
Step: 49, L: 212.8763427734375, J: 44.3401985168457
Current x: (-8.662546267932713e-08, 0.0014129053695284867, 9.930410385131834e-05), u: [0. 0.], noise: [5.53586462 6.60001229]
Start training ...
Step: 49, L: 201.451904296875, J: 44.228492736816406
Current x: (-2.0705785314261528e-07, 0.0023956762628

Step: 49, L: 268.048583984375, J: 46.66932678222656
Current x: (0.00432981575274476, 0.24517547984189272, -0.01664992794990539), u: [0. 0.], noise: [6.16110616 5.33662225]
Start training ...
Step: 49, L: 168.48109436035156, J: 44.611995697021484
Current x: (0.0046975636383084435, 0.25656583411115896, -0.01729133076667785), u: [0. 0.], noise: [6.14124089 6.48043718]
Start training ...
Step: 49, L: 153.6949005126953, J: 44.423274993896484
Current x: (0.005084454274242133, 0.26812480184881415, -0.017850285196304314), u: [0. 0.], noise: [6.28550574 6.43076139]
Start training ...
Step: 49, L: 209.81072998046875, J: 45.18527603149414
Current x: (0.005493168384159491, 0.27996474873855015, -0.018443159246444695), u: [0. 0.], noise: [6.42676037 6.38443585]
Start training ...
Step: 49, L: 137.4496307373047, J: 47.51803970336914
Current x: (0.005924580187262387, 0.2920951197059666, -0.019050558853149407), u: [0. 0.], noise: [6.61579585 5.55373238]
Start training ...
Step: 49, L: 198.9921112060547

Step: 49, L: 161.07826232910156, J: 46.712974548339844
Current x: (0.052707095592582257, 1.054631159933286, -0.023242075729370117), u: [0. 0.], noise: [6.44766858 7.00619454]
Start training ...
Step: 49, L: 207.24856567382812, J: 43.511329650878906
Current x: (0.054438268233457945, 1.0773431627473358, -0.023263175392150878), u: [0. 0.], noise: [6.59516564 6.00127552]
Start training ...
Step: 49, L: 113.56680297851562, J: 44.887664794921875
Current x: (0.056200707629741185, 1.1004191885072716, -0.023340127658843992), u: [0. 0.], noise: [6.544872  6.7044048]
Start training ...
Step: 49, L: 189.11155700683594, J: 43.03118896484375
Current x: (0.057992447705312346, 1.123773517565308, -0.023357690906524658), u: [0. 0.], noise: [4.84031644 6.21564542]
Start training ...
Step: 49, L: 213.1646728515625, J: 46.76409912109375
Current x: (0.059815108955185395, 1.147471413466371, -0.023391207456588743), u: [0. 0.], noise: [5.03785098 6.42849301]
Start training ...
Step: 49, L: 163.45877075195312, 

Step: 50, L: 143.90904235839844, J: 47.9090576171875
Current x: (2.395556262005534e-06, 0.008371084600057505, 0.00043816771507263185), u: [0. 0.], noise: [5.69658625 7.12255344]
Start training ...
Step: 50, L: 171.7699737548828, J: 45.8695068359375
Current x: (2.6828006878413252e-06, 0.010794244266421584, 0.0008000857830047607), u: [0. 0.], noise: [7.2056553  6.21472004]
Start training ...
Step: 50, L: 224.26248168945312, J: 47.72428894042969
Current x: (2.4083518261096696e-06, 0.013518317757786811, 0.0010194071292877198), u: [0. 0.], noise: [7.09735027 6.21610323]
Start training ...
Step: 50, L: 169.04574584960938, J: 46.574378967285156
Current x: (1.0601578890011059e-06, 0.01660342840200539, 0.0013378219604492187), u: [0. 0.], noise: [6.22928558 6.39543527]
Start training ...
Step: 50, L: 213.41067504882812, J: 45.76707077026367
Current x: (-1.6452187721494342e-06, 0.02003888372189468, 0.0017443614959716798), u: [0. 0.], noise: [5.82098887 6.95307335]
Start training ...
Step: 50, L: 

Step: 50, L: 126.16850280761719, J: 45.191165924072266
Current x: (-0.00485397288778997, 0.3853534632357, 0.01423839044570923), u: [0. 0.], noise: [6.21506919 6.04898404]
Start training ...
Step: 50, L: 184.1481475830078, J: 47.58465576171875
Current x: (-0.005176036586273642, 0.39941060850994325, 0.014606362819671632), u: [0. 0.], noise: [5.93934101 7.95867923]
Start training ...
Step: 50, L: 215.28640747070312, J: 44.69634246826172
Current x: (-0.005515561732741927, 0.41371303480510185, 0.014990943717956545), u: [0. 0.], noise: [6.523939  4.1316119]
Start training ...
Step: 50, L: 154.17201232910156, J: 46.635440826416016
Current x: (-0.005875386109353266, 0.42842411482772946, 0.015173590803146364), u: [0. 0.], noise: [5.71029316 6.36037874]
Start training ...
Step: 50, L: 211.99505615234375, J: 46.54364776611328
Current x: (-0.006251183564149162, 0.44321963021802946, 0.015595470619201662), u: [0. 0.], noise: [5.38058425 5.17631106]
Start training ...
Step: 50, L: 111.87677001953125,

Start training ...
Step: 50, L: 112.205810546875, J: 45.301422119140625
Current x: (-0.03888330624385666, 1.172167685498579, 0.02660194525718688), u: [0. 0.], noise: [4.97062758 6.03668742]
Start training ...
Step: 50, L: 240.3107147216797, J: 48.945831298828125
Current x: (-0.04029198470498328, 1.1952682071993723, 0.02651011004447936), u: [0. 0.], noise: [6.28678965 5.43671389]
Start training ...
Step: 50, L: 127.11348724365234, J: 48.15140914916992
Current x: (-0.04172994131090569, 1.218488070917597, 0.026311668872833243), u: [0. 0.], noise: [4.63125504 4.64989318]
Start training ...
Step: 50, L: 202.33697509765625, J: 47.87019348144531
Current x: (-0.0431989734123888, 1.241898873015572, 0.0261982352733612), u: [0. 0.], noise: [6.05742573 6.10954459]
Start training ...
Step: 50, L: 239.41529846191406, J: 50.46288299560547
Current x: (-0.04469242294795276, 1.2652564687543453, 0.026082937860488885), u: [0. 0.], noise: [5.32191024 6.48247044]
Start training ...
Step: 50, L: 141.83099365

Step: 51, L: 93.99154663085938, J: 39.27501678466797
Current x: (1.4034394380877093e-06, 0.025946224568470913, -0.0014326836585998532), u: [0. 0.], noise: [5.87354696 5.62595998]
Start training ...
Step: 51, L: 126.72508239746094, J: 48.824012756347656
Current x: (5.196388067273301e-06, 0.02945431827615219, -0.0015636165142059323), u: [0. 0.], noise: [5.99241691 5.77095415]
Start training ...
Step: 51, L: 88.0811538696289, J: 47.4809684753418
Current x: (1.0636851701824948e-05, 0.0331313614986872, -0.001669790649414062), u: [0. 0.], noise: [5.33968308 5.48422358]
Start training ...
Step: 51, L: 131.0830078125, J: 47.3634033203125
Current x: (1.7916654626705537e-05, 0.03700374033460287, -0.0017538185119628901), u: [0. 0.], noise: [5.9292377  5.42934085]
Start training ...
Step: 51, L: 224.9713134765625, J: 48.57042694091797
Current x: (2.700382240538364e-05, 0.04097750825604015, -0.0018523004055023187), u: [0. 0.], noise: [5.85787637 5.52763542]
Start training ...
Step: 51, L: 121.26766

Step: 51, L: 164.2464141845703, J: 46.99034881591797
Current x: (0.0008197080799150271, 0.3535389024610021, 0.009182631158828744), u: [0. 0.], noise: [6.49358028 6.48980059]
Start training ...
Step: 51, L: 152.3312530517578, J: 45.93320083618164
Current x: (0.0007640837235621972, 0.3663039682061311, 0.009635777950286873), u: [0. 0.], noise: [6.07458583 5.52468205]
Start training ...
Step: 51, L: 156.1171112060547, J: 47.86338424682617
Current x: (0.0006965373746152356, 0.37938631734038136, 0.010089302730560312), u: [0. 0.], noise: [5.98294902 7.05338244]
Start training ...
Step: 51, L: 121.05339050292969, J: 43.150169372558594
Current x: (0.0006178144015806302, 0.39264753942244046, 0.01059781789779664), u: [0. 0.], noise: [6.68720253 6.04158827]
Start training ...
Step: 51, L: 117.22779846191406, J: 48.83904266357422
Current x: (0.0005259389025162971, 0.4062313282717041, 0.010999289751052866), u: [0. 0.], noise: [6.15518552 4.58957229]
Start training ...
Step: 51, L: 139.2677001953125,

Step: 51, L: 151.68865966796875, J: 49.24278259277344
Current x: (-0.023022794241860572, 1.1114151212590797, 0.04526334667205812), u: [0. 0.], noise: [6.92321355 5.89529317]
Start training ...
Step: 51, L: 115.71629333496094, J: 46.060638427734375
Current x: (-0.024435094934541903, 1.1333155970733642, 0.04583730859756471), u: [0. 0.], noise: [5.85202411 6.52889385]
Start training ...
Step: 51, L: 197.74234008789062, J: 47.03874969482422
Current x: (-0.025905396666531168, 1.1555156106270006, 0.046514062547683734), u: [0. 0.], noise: [5.76716985 6.04292324]
Start training ...
Step: 51, L: 132.89328002929688, J: 48.79790496826172
Current x: (-0.02743242932602691, 1.1779714156067054, 0.04712312951087953), u: [0. 0.], noise: [7.22111254 5.77791818]
Start training ...
Step: 51, L: 148.63226318359375, J: 46.34828186035156
Current x: (-0.02901437571912075, 1.200625952518281, 0.04770462112426759), u: [0. 0.], noise: [5.51193832 6.28537253]
Start training ...
Step: 51, L: 227.98370361328125, J: 

Step: 52, L: 109.67926025390625, J: 46.52405548095703
Current x: (7.840540140255345e-07, 0.005141553028587577, -0.00022810454368591307), u: [0. 0.], noise: [6.58579183 6.30297157]
Start training ...
Step: 52, L: 143.4080810546875, J: 46.70970916748047
Current x: (1.2030834578981637e-06, 0.0066258825726952355, -0.0003122747421264648), u: [0. 0.], noise: [5.70782554 5.59392178]
Start training ...
Step: 52, L: 180.84649658203125, J: 47.328834533691406
Current x: (1.9161114492571563e-06, 0.008418088426045129, -0.0003681628704071044), u: [0. 0.], noise: [6.30452606 5.15025011]
Start training ...
Step: 52, L: 133.62257385253906, J: 47.33965301513672
Current x: (2.982064457938383e-06, 0.010359468956498488, -0.0004126605987548827), u: [0. 0.], noise: [5.96801912 6.4848627 ]
Start training ...
Step: 52, L: 181.58914184570312, J: 47.27287292480469
Current x: (4.469739771309473e-06, 0.01246532699034477, -0.0003417307376861571), u: [0. 0.], noise: [5.64157137 5.88084079]
Start training ...
Step: 5

Step: 52, L: 123.34122467041016, J: 45.918670654296875
Current x: (0.0028519558121319604, 0.29242527927880174, -0.009230278635025022), u: [0. 0.], noise: [5.59205725 6.04487357]
Start training ...
Step: 52, L: 112.691162109375, J: 45.848514556884766
Current x: (0.003069838665072691, 0.30310839081071866, -0.009379621791839597), u: [0. 0.], noise: [5.82730989 5.55081758]
Start training ...
Step: 52, L: 203.48826599121094, J: 47.179046630859375
Current x: (0.003298462576557857, 0.3139741458173719, -0.009574246597290037), u: [0. 0.], noise: [5.54512996 6.11063252]
Start training ...
Step: 52, L: 125.6704330444336, J: 46.88388442993164
Current x: (0.003537758585340151, 0.32499666357871576, -0.009741222143173215), u: [0. 0.], noise: [5.46723521 5.52994184]
Start training ...
Step: 52, L: 115.77059936523438, J: 46.423240661621094
Current x: (0.0037882139373289847, 0.3362037040903747, -0.009964747953414914), u: [0. 0.], noise: [6.25941972 6.40399941]
Start training ...
Step: 52, L: 206.8196716

Step: 52, L: 108.70232391357422, J: 47.77660369873047
Current x: (0.03059165720052277, 0.9413378579521183, -0.048766787147521966), u: [0. 0.], noise: [4.88725756 5.10981669]
Start training ...
Step: 52, L: 196.25918579101562, J: 51.02995681762695
Current x: (0.03200857986136227, 0.9603047913893125, -0.05033317747116088), u: [0. 0.], noise: [6.07472315 6.24218946]
Start training ...
Step: 52, L: 125.28160095214844, J: 47.76870346069336
Current x: (0.033474235719235884, 0.9792892437229496, -0.05192182369232177), u: [0. 0.], noise: [6.44247446 5.9368986 ]
Start training ...
Step: 52, L: 177.01998901367188, J: 47.61058044433594
Current x: (0.035001860338836226, 0.9985228274500195, -0.05352721652984618), u: [0. 0.], noise: [7.74956805 6.18411224]
Start training ...
Step: 52, L: 186.9483642578125, J: 47.078269958496094
Current x: (0.036593732047416894, 1.0180116802443093, -0.05508205180168151), u: [0. 0.], noise: [4.84993812 6.38805216]
Start training ...
Step: 52, L: 150.51129150390625, J: 

Step: 53, L: 200.2362060546875, J: 49.14794158935547
Current x: (0.0, 0.00040581591796874997, 1.6405773162841804e-05), u: [0. 0.], noise: [5.53801149 6.47064179]
Start training ...
Step: 53, L: 120.40187072753906, J: 42.485816955566406
Current x: (6.152806151573859e-08, 0.0009765501617863989, 0.00017612123489379886), u: [0. 0.], noise: [6.28506024 5.4730745 ]
Start training ...
Step: 53, L: 188.88087463378906, J: 49.099891662597656
Current x: (1.0335499827023785e-07, 0.0017671497695171488, 0.00024257369041442875), u: [0. 0.], noise: [6.20150387 5.00311708]
Start training ...
Step: 53, L: 136.6864013671875, J: 48.75879669189453
Current x: (-6.190378674533165e-08, 0.002752562843203696, 0.00039022474288940434), u: [0. 0.], noise: [5.49945693 5.75745231]
Start training ...
Step: 53, L: 205.88722229003906, J: 48.2036247253418
Current x: (-4.989571801694129e-07, 0.0038774379200579457, 0.0006577144622802735), u: [0. 0.], noise: [5.18438057 6.02750967]
Start training ...
Step: 53, L: 278.91952

Step: 53, L: 266.6454772949219, J: 44.42692565917969
Current x: (-0.004506320420912701, 0.2362143383038017, 0.02039026432037353), u: [0. 0.], noise: [5.25304245 6.60860118]
Start training ...
Step: 53, L: 212.26934814453125, J: 51.331275939941406
Current x: (-0.0049246184414669, 0.24639848108525966, 0.02042996788024902), u: [0. 0.], noise: [6.64031314 5.3458921 ]
Start training ...
Step: 53, L: 222.5377960205078, J: 50.259456634521484
Current x: (-0.005367100989374786, 0.2567875415774744, 0.020334115552902217), u: [0. 0.], noise: [5.25693808 5.46128541]
Start training ...
Step: 53, L: 100.98331451416016, J: 47.807823181152344
Current x: (-0.005834069612374057, 0.2673939724462265, 0.020367705345153805), u: [0. 0.], noise: [5.53464403 6.37555332]
Start training ...
Step: 53, L: 143.75796508789062, J: 47.74723815917969
Current x: (-0.006322831293145262, 0.27809100409324394, 0.02038086037635803), u: [0. 0.], noise: [5.21706588 6.31239142]
Start training ...
Step: 53, L: 219.92822265625, J:

Step: 53, L: 190.17263793945312, J: 45.94377899169922
Current x: (-0.050821433585237684, 0.9690841454367799, 0.014762068653106674), u: [0. 0.], noise: [5.62257845 5.41958636]
Start training ...
Step: 53, L: 141.37380981445312, J: 47.09101867675781
Current x: (-0.052280324508984066, 0.989203213016984, 0.014475016832351669), u: [0. 0.], noise: [5.07845542 5.56454901]
Start training ...
Step: 53, L: 168.0015411376953, J: 48.97418212890625
Current x: (-0.053755515360195065, 1.0094453767649343, 0.014208264255523666), u: [0. 0.], noise: [6.02684069 5.35842263]
Start training ...
Step: 53, L: 134.48583984375, J: 49.21921920776367
Current x: (-0.05524611144023552, 1.0297707294572063, 0.013892902326583847), u: [0. 0.], noise: [5.93499955 5.52476811]
Start training ...
Step: 53, L: 170.59329223632812, J: 48.43893814086914
Current x: (-0.05675288345917251, 1.0502534935758372, 0.01364438219070433), u: [0. 0.], noise: [6.18382707 5.51101253]
Start training ...
Step: 53, L: 118.57064819335938, J: 44

Current x: (0.0, 0.00014731811904907216, -0.0001180267333984375), u: [0. 0.], noise: [5.45922305 5.91264021]
Start training ...
Step: 54, L: 274.47918701171875, J: 44.81940841674805
Current x: (0.0, 0.00042797873687744115, -0.0002328453540802002), u: [0. 0.], noise: [5.8595073  5.50227921]
Start training ...
Step: 54, L: 120.33995056152344, J: 51.23438262939453
Current x: (1.3421838825285995e-07, 0.0008648256833024651, -0.00039300565719604496), u: [0. 0.], noise: [5.61232118 6.29657105]
Start training ...
Step: 54, L: 103.52972412109375, J: 47.70143127441406
Current x: (5.32990679938291e-07, 0.0014568511877946027, -0.0005174431800842286), u: [0. 0.], noise: [4.32416705 5.59775585]
Start training ...
Step: 54, L: 199.1856689453125, J: 50.960838317871094
Current x: (1.3997891770888668e-06, 0.002258765863471342, -0.0007103056907653809), u: [0. 0.], noise: [4.79456205 6.18073474]
Start training ...
Step: 54, L: 228.96157836914062, J: 48.10221862792969
Current x: (2.7799907739302234e-06, 0.

Step: 54, L: 175.60008239746094, J: 48.26206970214844
Current x: (0.004251261204958883, 0.17526806063226189, -0.011873878097534178), u: [0. 0.], noise: [6.29971368 5.29963788]
Start training ...
Step: 54, L: 96.22944641113281, J: 47.72552490234375
Current x: (0.004540601607548387, 0.18337815838493932, -0.012368094158172605), u: [0. 0.], noise: [4.45457566 5.70103605]
Start training ...
Step: 54, L: 158.72418212890625, J: 49.430755615234375
Current x: (0.00484371461552847, 0.19166710955783076, -0.012762302637100218), u: [0. 0.], noise: [6.01015101 4.6352746 ]
Start training ...
Step: 54, L: 145.219970703125, J: 47.66980743408203
Current x: (0.005159387858635595, 0.19999054416046752, -0.013281157159805296), u: [0. 0.], noise: [5.73457197 4.19934686]
Start training ...
Step: 54, L: 190.08648681640625, J: 50.089229583740234
Current x: (0.00548864674751052, 0.20839743464951704, -0.013662524032592771), u: [0. 0.], noise: [6.49040737 4.9019109 ]
Start training ...
Step: 54, L: 83.159454345703

Step: 54, L: 111.44679260253906, J: 49.895599365234375
Current x: (0.03258228549521179, 0.7003225456714428, -0.01831548690795897), u: [0. 0.], noise: [6.05682948 5.84754504]
Start training ...
Step: 54, L: 112.85667419433594, J: 42.890533447265625
Current x: (0.03365174655960647, 0.7166844008370369, -0.018490461635589588), u: [0. 0.], noise: [5.74279908 5.50461782]
Start training ...
Step: 54, L: 204.2747344970703, J: 50.596031188964844
Current x: (0.03474300984758753, 0.7332554938455085, -0.01864450793266295), u: [0. 0.], noise: [5.99219743 6.28252412]
Start training ...
Step: 54, L: 181.02340698242188, J: 49.701332092285156
Current x: (0.03585506894460239, 0.7499701363314426, -0.018774736070632923), u: [0. 0.], noise: [5.63628936 6.19641425]
Start training ...
Step: 54, L: 130.08473205566406, J: 49.27619552612305
Current x: (0.03699001232931009, 0.7669310375926179, -0.018933996868133532), u: [0. 0.], noise: [5.36566994 5.61632764]
Start training ...
Step: 54, L: 158.30113220214844, J

Step: 54, L: 146.88938903808594, J: 53.796104431152344
Current x: (0.1074624950586224, 1.6719599528507094, -0.011722682666778539), u: [0. 0.], noise: [6.50108101 4.52111101]
Start training ...
Step: 54, L: 212.08804321289062, J: 50.460044860839844
Current x: (0.10953610465839123, 1.6965055360320949, -0.011207855987548802), u: [0. 0.], noise: [6.11956002 6.25798526]
Start training ...
Step: 54, L: 110.10503387451172, J: 48.81251907348633
Current x: (0.11162263492813852, 1.7211722626802903, -0.010495032310485814), u: [0. 0.], noise: [5.1291648  6.03744231]
Start training ...
Step: 54, L: 175.85238647460938, J: 51.71942138671875
Current x: (0.11372303748203512, 1.7460956661240656, -0.00979605112075803), u: [0. 0.], noise: [6.29263811 5.87703431]
Start training ...
Step: 54, L: 134.17691040039062, J: 48.34063720703125
Current x: (0.11583515921081745, 1.7711546687610245, -0.009187897682189914), u: [0. 0.], noise: [6.59974393 5.90280174]
Start training ...
Step: 54, L: 215.1890869140625, J: 

Step: 55, L: 198.23709106445312, J: 47.84364318847656
Current x: (-0.0002969541609976987, 0.11781094029172917, 0.003982400941848753), u: [0. 0.], noise: [4.53385266 3.81580968]
Start training ...
Step: 55, L: 139.9935302734375, J: 50.7137451171875
Current x: (-0.0003452608094994681, 0.12442865971189164, 0.004164969635009764), u: [0. 0.], noise: [5.47415037 5.48061886]
Start training ...
Step: 55, L: 184.0828857421875, J: 49.27217102050781
Current x: (-0.00039689261970420053, 0.130900338789058, 0.004419342613220213), u: [0. 0.], noise: [5.4808411  6.00774994]
Start training ...
Step: 55, L: 164.3193359375, J: 47.91902542114258
Current x: (-0.00045308704479802984, 0.13748648527808652, 0.004673068714141844), u: [0. 0.], noise: [6.61887816 6.28927563]
Start training ...
Step: 55, L: 168.76318359375, J: 46.89319610595703
Current x: (-0.0005143586554281027, 0.14424047966760087, 0.004874103927612302), u: [0. 0.], noise: [6.83356001 5.51454587]
Start training ...
Step: 55, L: 130.1780853271484

Step: 55, L: 89.55760192871094, J: 48.395591735839844
Current x: (-0.015576214525090518, 0.6422038289951647, 0.03050994825363159), u: [0. 0.], noise: [5.27195784 5.51980981]
Start training ...
Step: 55, L: 155.8551025390625, J: 46.38844299316406
Current x: (-0.016400355474990372, 0.656958533482848, 0.0313956558227539), u: [0. 0.], noise: [5.60945468 6.01445391]
Start training ...
Step: 55, L: 190.7447052001953, J: 49.005943298339844
Current x: (-0.01725741694259418, 0.6718109124419616, 0.03225657820701599), u: [0. 0.], noise: [6.89410856 4.87215779]
Start training ...
Step: 55, L: 121.08295440673828, J: 50.63153839111328
Current x: (-0.018150966439828442, 0.6868441094708699, 0.03307700066566467), u: [0. 0.], noise: [5.33341594 5.60695354]
Start training ...
Step: 55, L: 217.04283142089844, J: 45.953277587890625
Current x: (-0.019082463303162548, 0.7020723210075286, 0.03409961824417114), u: [0. 0.], noise: [5.6078581  5.42102342]
Start training ...
Step: 55, L: 187.7770538330078, J: 52.

Step: 55, L: 98.07598876953125, J: 47.61240768432617
Current x: (-0.09776516673821932, 1.423238630635788, 0.08399250783920288), u: [0. 0.], noise: [5.42905605 5.58721461]
Start training ...
Step: 55, L: 139.6361083984375, J: 50.2252197265625
Current x: (-0.10129451753972264, 1.445486694322431, 0.08536856188774108), u: [0. 0.], noise: [5.38884426 5.0938074 ]
Start training ...
Step: 55, L: 157.09317016601562, J: 48.14190673828125
Current x: (-0.10491628800537132, 1.4678515015097462, 0.08672880010604858), u: [0. 0.], noise: [6.04732778 5.17329346]
Start training ...
Step: 55, L: 160.112060546875, J: 45.953025817871094
Current x: (-0.10862743870469044, 1.490279756418428, 0.08811854205131531), u: [0. 0.], noise: [6.42501939 6.47984158]
Start training ...
Step: 55, L: 224.99703979492188, J: 51.23554229736328
Current x: (-0.11243578255157857, 1.5128448560716004, 0.0895956874370575), u: [0. 0.], noise: [6.90548736 5.09691713]
Start training ...
Step: 55, L: 122.75782012939453, J: 49.436542510

Step: 56, L: 92.82703399658203, J: 46.527801513671875
Current x: (-0.00017352569528678786, 0.03281936217675634, 0.0044216898918151845), u: [0. 0.], noise: [5.11117547 4.88479992]
Start training ...
Step: 56, L: 147.81509399414062, J: 53.34029769897461
Current x: (-0.0002102457117718871, 0.03601204385946315, 0.004517613887786864), u: [0. 0.], noise: [4.49763877 6.12518547]
Start training ...
Step: 56, L: 116.06655883789062, J: 51.57933807373047
Current x: (-0.0002513856242347062, 0.03922331331988783, 0.004636175441741942), u: [0. 0.], noise: [6.17364361 4.91942593]
Start training ...
Step: 56, L: 132.5760498046875, J: 47.80779266357422
Current x: (-0.0002973245019682723, 0.04251585431187899, 0.004591982316970823), u: [0. 0.], noise: [5.95289526 6.53658794]
Start training ...
Step: 56, L: 195.98631286621094, J: 49.021644592285156
Current x: (-0.00034840630272064174, 0.0459366902897559, 0.004673210954666136), u: [0. 0.], noise: [6.58008207 5.01718119]
Start training ...
Step: 56, L: 150.9

Step: 56, L: 179.02333068847656, J: 51.77649688720703
Current x: (-0.011615816872011546, 0.36387447630673, 0.016101075601577763), u: [0. 0.], noise: [5.80836704 6.86410959]
Start training ...
Step: 56, L: 149.47705078125, J: 51.173946380615234
Current x: (-0.012178542218675242, 0.37545308851927844, 0.016794522237777713), u: [0. 0.], noise: [5.68960116 6.22338017]
Start training ...
Step: 56, L: 92.42837524414062, J: 48.921451568603516
Current x: (-0.012761670734394463, 0.3873177841486439, 0.01738239464759827), u: [0. 0.], noise: [5.06655854 5.22953553]
Start training ...
Step: 56, L: 208.13265991210938, J: 49.272003173828125
Current x: (-0.013364805592086252, 0.3993926098789154, 0.017916889142990115), u: [0. 0.], noise: [5.9145997  5.66282303]
Start training ...
Step: 56, L: 93.08724212646484, J: 48.56220245361328
Current x: (-0.013985836625352386, 0.4115158894601507, 0.018435085916519166), u: [0. 0.], noise: [5.5347782  5.82874791]
Start training ...
Step: 56, L: 161.38040161132812, J

Step: 56, L: 160.5589141845703, J: 45.988922119140625
Current x: (-0.0642345751946268, 1.0244600824057715, 0.05004200754165648), u: [0. 0.], noise: [5.57421742 4.66462211]
Start training ...
Step: 56, L: 188.94601440429688, J: 51.75539016723633
Current x: (-0.06638736050540651, 1.0430951413021539, 0.05062845203876494), u: [0. 0.], noise: [4.8783004 5.8578953]
Start training ...
Step: 56, L: 165.38870239257812, J: 51.03010940551758
Current x: (-0.06859136164550399, 1.0617718024699037, 0.05130585603713988), u: [0. 0.], noise: [5.03967175 6.39395404]
Start training ...
Step: 56, L: 198.12521362304688, J: 55.09211730957031
Current x: (-0.07084969526368058, 1.080539707515588, 0.0518853005170822), u: [0. 0.], noise: [5.98306638 6.28807654]
Start training ...
Step: 56, L: 87.71774291992188, J: 46.29179382324219
Current x: (-0.0731666643475396, 1.0994684706773463, 0.052329316759109484), u: [0. 0.], noise: [5.53389084 4.94107065]
Start training ...
Step: 56, L: 170.20034790039062, J: 47.6788635

Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.39919026 5.47800047]
Start training ...
Step: 57, L: 153.4242401123047, J: 49.836063385009766
Simulation ends in 2 steps
Episode 58 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.18737793 5.59701634]
Start training ...
Step: 58, L: 89.94540405273438, J: 54.407413482666016
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.26841863 5.61035896]
Start training ...
Step: 58, L: 136.68643188476562, J: 50.893226623535156
Current x: (0.0, 9.743942642211915e-05, -4.096384048461914e-05), u: [0. 0.], noise: [6.1725307  6.34270929]
Start training ...
Step: 58, L: 152.927001953125, J: 50.938838958740234
Current x: (0.0, 0.0003017566986083985, -0.0001161217212677002), u: [0. 0.], noise: [4.54103328 5.73025005]
Start training ...
Step: 58, L: 123.3282241821289, J: 50.33407974243164
Current x: (5.12672283192252e-08, 0.0007765979413022437, -0.0002082974433898926), u: [0. 0.], noise: [5.43745335 5.57669162]
Start training ...
Step: 58, L: 12

Step: 58, L: 115.42820739746094, J: 51.10419464111328
Current x: (0.005925872210631622, 0.13104534894616712, -0.019515604066848755), u: [0. 0.], noise: [4.32266109 6.64457344]
Start training ...
Step: 58, L: 170.87362670898438, J: 51.526336669921875
Current x: (0.006369713739141728, 0.13654565183722198, -0.01978088140487671), u: [0. 0.], noise: [5.52353817 6.0853687 ]
Start training ...
Step: 58, L: 127.54022216796875, J: 50.88113021850586
Current x: (0.006834957128054478, 0.14216146925231526, -0.02027834997177124), u: [0. 0.], noise: [5.18655886 5.94458056]
Start training ...
Step: 58, L: 159.38522338867188, J: 50.435218811035156
Current x: (0.00732316246021961, 0.147956950230856, -0.02083200159072876), u: [0. 0.], noise: [5.82896818 6.09769016]
Start training ...
Step: 58, L: 106.03473663330078, J: 48.61211013793945
Current x: (0.007833938360185269, 0.15388431633014282, -0.021461455392837524), u: [0. 0.], noise: [5.33009594 5.79601335]
Start training ...
Step: 58, L: 157.835647583007

Step: 58, L: 122.11962890625, J: 49.4483642578125
Current x: (0.059425181639842485, 0.5025208697893692, -0.056306678867340094), u: [0. 0.], noise: [6.23146888 5.29583074]
Start training ...
Step: 58, L: 93.4864501953125, J: 52.04347610473633
Current x: (0.06172825608433156, 0.5136879240108323, -0.056989937877655036), u: [0. 0.], noise: [4.9721397 5.5929215]
Start training ...
Step: 58, L: 101.66859436035156, J: 53.33588409423828
Current x: (0.06409620263452206, 0.5250248813712965, -0.05757963309288026), u: [0. 0.], noise: [5.03609392 5.37334649]
Start training ...
Step: 58, L: 130.0543212890625, J: 52.90892791748047
Current x: (0.06652432681793095, 0.5364356296646348, -0.05823140649795533), u: [0. 0.], noise: [6.11834672 5.29568983]
Start training ...
Step: 58, L: 119.26118469238281, J: 52.351070404052734
Current x: (0.06901235506122791, 0.5479045968584915, -0.05891690516471863), u: [0. 0.], noise: [5.40783804 5.72639471]
Start training ...
Step: 58, L: 112.0609130859375, J: 49.5917358

Step: 58, L: 178.20712280273438, J: 49.92213821411133
Current x: (0.24354783781426315, 1.1580390625774142, -0.0992299117565155), u: [0. 0.], noise: [5.64170488 5.93593008]
Start training ...
Step: 58, L: 179.57321166992188, J: 50.060909271240234
Current x: (0.2497076481746981, 1.1756465145077608, -0.100395348072052), u: [0. 0.], noise: [6.08253855 5.46849408]
Start training ...
Step: 58, L: 153.57623291015625, J: 53.63067626953125
Current x: (0.25598215485908027, 1.1934250345921393, -0.10159020690917968), u: [0. 0.], noise: [4.9733256  5.44261231]
Start training ...
Step: 58, L: 105.46199035644531, J: 50.55313491821289
Current x: (0.2623724338300837, 1.2113718415928039, -0.10272366127967833), u: [0.000674 0.      ], noise: [5.36918875 5.06445291]
Start training ...
Step: 58, L: 154.7610626220703, J: 52.44640350341797
Current x: (0.2688683466056908, 1.2293738720362986, -0.10390404429435729), u: [0.00276538 0.        ], noise: [5.62814729 4.86064834]
Start training ...
Step: 58, L: 219.9

Step: 58, L: 121.61708068847656, J: 50.643653869628906
Simulation ends in 180 steps
Episode 59 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.44799699 6.09118465]
Start training ...
Step: 59, L: 201.83306884765625, J: 50.96861267089844
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.43249278 5.78768895]
Start training ...
Step: 59, L: 101.03624725341797, J: 50.45368194580078
Current x: (0.0, 0.00027291817092895505, 3.5681247711181644e-05), u: [0. 0.], noise: [5.40845145 5.6794211 ]
Start training ...
Step: 59, L: 187.14578247070312, J: 54.473995208740234
Current x: (0.0, 0.0006868544883728029, 3.58428955078125e-05), u: [0. 0.], noise: [5.27417717 6.33979038]
Start training ...
Step: 59, L: 173.57705688476562, J: 51.568702697753906
Current x: (-3.956291253636635e-08, 0.0012285780556296226, 8.9076042175293e-06), u: [0. 0.], noise: [5.56431952 5.32563443]
Start training ...
Step: 59, L: 163.86215209960938, J: 52.365684509277344
Current x: (-1.2075364883479696e-07, 0.0019506

Current x: (0.003987246993773458, 0.13950666467628964, -0.021958022880554203), u: [0. 0.], noise: [4.82784893 4.47795615]
Start training ...
Step: 59, L: 67.11061096191406, J: 48.88353729248047
Current x: (0.0043622982713315194, 0.14594920213437967, -0.022842553138732916), u: [0. 0.], noise: [5.24182896 5.67202967]
Start training ...
Step: 59, L: 119.49960327148438, J: 48.97447967529297
Current x: (0.004757781615258366, 0.1523410957801955, -0.02369209413528443), u: [0. 0.], noise: [4.18931747 5.49490915]
Start training ...
Step: 59, L: 187.64785766601562, J: 52.704505920410156
Current x: (0.00517819283030954, 0.15884309054689028, -0.024584655189514165), u: [0. 0.], noise: [5.38297054 5.02143534]
Start training ...
Step: 59, L: 120.89068603515625, J: 49.532474517822266
Current x: (0.005621545860701585, 0.16533223623002222, -0.025607775402069096), u: [0. 0.], noise: [4.43415275 5.56852375]
Start training ...
Step: 59, L: 142.5458984375, J: 54.636253356933594
Current x: (0.006090475186901

Step: 59, L: 206.80198669433594, J: 53.8775520324707
Current x: (0.05478935535345735, 0.514498883058229, -0.062110720539093014), u: [0. 0.], noise: [5.8424406  5.14627194]
Start training ...
Step: 59, L: 62.04217529296875, J: 53.02655792236328
Current x: (0.05713906163068563, 0.5258295182966909, -0.063095094871521), u: [0. 0.], noise: [5.09287642 4.49572556]
Start training ...
Step: 59, L: 141.5811309814453, J: 49.643775939941406
Current x: (0.0595569757174261, 0.53727590586691, -0.06400985231399536), u: [0. 0.], noise: [5.39712442 5.45192101]
Start training ...
Step: 59, L: 142.0946044921875, J: 54.434532165527344
Current x: (0.062035349046615125, 0.5486982456699031, -0.0648648946762085), u: [0. 0.], noise: [5.83035148 6.47957377]
Start training ...
Step: 59, L: 138.33734130859375, J: 51.96537780761719
Current x: (0.06458311953896251, 0.5602222681431313, -0.06572541670799256), u: [0. 0.], noise: [5.54302811 4.90034282]
Start training ...
Step: 59, L: 116.15814208984375, J: 54.04775238

Step: 59, L: 188.94024658203125, J: 52.3822021484375
Current x: (0.25774477666446904, 1.1910256279842075, -0.1135437933921814), u: [0. 0.], noise: [6.06665497 5.3714073 ]
Start training ...
Step: 59, L: 108.59402465820312, J: 52.725074768066406
Current x: (0.2645259283709607, 1.2081359063311572, -0.11501058435440065), u: [0. 0.], noise: [5.60127757 4.89141596]
Start training ...
Step: 59, L: 170.2708740234375, J: 51.29557800292969
Current x: (0.2714366733036804, 1.2254016257771754, -0.11640785055160524), u: [0. 0.], noise: [5.67006953 6.19361661]
Start training ...
Step: 59, L: 154.22317504882812, J: 53.267852783203125
Current x: (0.2784678294456794, 1.2427286825935115, -0.11773413062095643), u: [0. 0.], noise: [5.11242277 5.19483534]
Start training ...
Step: 59, L: 98.66925048828125, J: 51.12792205810547
Current x: (0.2856367765131916, 1.260253078929205, -0.11911276535987855), u: [0. 0.], noise: [5.74569733 5.49595317]
Start training ...
Step: 59, L: 147.70704650878906, J: 48.44252014

Step: 60, L: 160.22103881835938, J: 52.301605224609375
Current x: (0.0, 0.0006603553123474118, -8.870019912719728e-05), u: [0. 0.], noise: [6.2578887  5.55177888]
Start training ...
Step: 60, L: 61.54975891113281, J: 52.911376953125
Current x: (2.0579856593338278e-08, 0.0012772804868738117, -0.00014591250419616702), u: [0. 0.], noise: [6.03151089 5.10538932]
Start training ...
Step: 60, L: 124.60518646240234, J: 50.88450622558594
Current x: (1.4591169971109335e-07, 0.002094172415482478, -0.00013251380920410158), u: [0. 0.], noise: [5.91816028 5.77479623]
Start training ...
Step: 60, L: 100.18898010253906, J: 48.819374084472656
Current x: (4.3374483829346414e-07, 0.0030437543270476703, -2.6502943038940447e-05), u: [0. 0.], noise: [5.77480483 5.28249394]
Start training ...
Step: 60, L: 129.321533203125, J: 53.30459976196289
Current x: (8.765258027140385e-07, 0.004181631920790347, 9.384436607360839e-05), u: [0. 0.], noise: [6.4706819  6.06449621]
Start training ...
Step: 60, L: 135.848831

Step: 60, L: 123.05572509765625, J: 52.28627014160156
Current x: (-0.0015881784463320663, 0.16286393056382056, 0.001325390124320983), u: [0. 0.], noise: [5.71844076 5.66439337]
Start training ...
Step: 60, L: 178.926513671875, J: 49.7080078125
Current x: (-0.001670567339552461, 0.1695164919007144, 0.001418343496322631), u: [0. 0.], noise: [4.83685748 5.52955973]
Start training ...
Step: 60, L: 159.4434051513672, J: 53.884674072265625
Current x: (-0.0017544649018393815, 0.17632633558590408, 0.0015167015790939324), u: [0. 0.], noise: [6.42932902 5.18117846]
Start training ...
Step: 60, L: 137.49989318847656, J: 52.16594314575195
Current x: (-0.0018398327776367775, 0.183191819921504, 0.0015457894325256341), u: [0. 0.], noise: [5.82865142 4.9562368 ]
Start training ...
Step: 60, L: 163.2857666015625, J: 49.038700103759766
Current x: (-0.0019269616203354966, 0.1902373537181818, 0.0016996923208236689), u: [0. 0.], noise: [5.48970685 5.98297662]
Start training ...
Step: 60, L: 143.62875366210

Step: 60, L: 137.15875244140625, J: 54.15005111694336
Current x: (-0.010192536319068326, 0.5631748601605953, 0.02534668974876404), u: [0. 0.], noise: [4.49429696 5.48353497]
Start training ...
Step: 60, L: 142.05030822753906, J: 52.07853317260742
Current x: (-0.010712336215523354, 0.5753820582425858, 0.026216041016578675), u: [0. 0.], noise: [5.63413785 5.61098929]
Start training ...
Step: 60, L: 199.3521270751953, J: 53.775726318359375
Current x: (-0.011257423904867952, 0.5876057190105485, 0.026986468505859375), u: [0. 0.], noise: [5.19825186 5.85034093]
Start training ...
Step: 60, L: 183.51617431640625, J: 51.30931854248047
Current x: (-0.011831988487801866, 0.5999725060450788, 0.027759210848808287), u: [0. 0.], noise: [5.20292873 5.32125942]
Start training ...
Step: 60, L: 131.67576599121094, J: 51.772438049316406
Current x: (-0.012436365701358996, 0.6124627500431297, 0.02846674427986145), u: [0. 0.], noise: [4.33921562 6.6448686 ]
Start training ...
Step: 60, L: 96.58307647705078,

Step: 60, L: 77.90597534179688, J: 53.72938537597656
Current x: (-0.0695254100079684, 1.2199783543021792, 0.045171621823310824), u: [0. 0.], noise: [4.90443867 6.32615706]
Start training ...
Step: 60, L: 101.71194458007812, J: 53.11907196044922
Current x: (-0.07184641294722748, 1.2373936353190345, 0.04561250262260434), u: [0. 0.], noise: [5.19172355 5.25804126]
Start training ...
Step: 60, L: 132.111572265625, J: 47.277992248535156
Current x: (-0.07421812905756181, 1.2549498303018627, 0.04591121156215665), u: [0. 0.], noise: [6.10269632 5.37760447]
Start training ...
Step: 60, L: 124.07261657714844, J: 51.56206130981445
Current x: (-0.07663749263205735, 1.2725689148613808, 0.046203288698196385), u: [0. 0.], noise: [4.80346573 5.44114358]
Start training ...
Step: 60, L: 103.50879669189453, J: 53.04802703857422
Current x: (-0.07910954514438757, 1.2903538197922464, 0.04656787502765653), u: [0. 0.], noise: [5.69415073 4.82590277]
Start training ...
Step: 60, L: 156.54476928710938, J: 51.17

Step: 61, L: 164.91201782226562, J: 52.34913635253906
Current x: (0.0, 0.0002074855270385742, -3.4591484069824225e-05), u: [0. 0.], noise: [4.83258365 4.18019912]
Start training ...
Step: 61, L: 194.18679809570312, J: 52.1827507019043
Current x: (0.0, 0.0004752531356811522, -5.885419845581056e-05), u: [0. 0.], noise: [6.24166086 5.0514851 ]
Start training ...
Step: 61, L: 133.43075561523438, J: 54.5980110168457
Current x: (3.1176554125526126e-08, 0.0006632990488382194, -1.787848472595216e-05), u: [0. 0.], noise: [4.66423617 6.70127358]
Start training ...
Step: 61, L: 84.184814453125, J: 55.40287399291992
Current x: (1.288180145853757e-07, 0.0009996595733817023, 0.00014211483001708982), u: [0. 0.], noise: [5.41398817 5.74671175]
Start training ...
Step: 61, L: 87.15390014648438, J: 52.80741882324219
Current x: (2.467792830094221e-07, 0.001491571001063854, 9.840440750122066e-05), u: [0. 0.], noise: [5.80695875 5.44754583]
Start training ...
Step: 61, L: 126.47125244140625, J: 52.62941741

Step: 61, L: 113.6578140258789, J: 55.31050109863281
Current x: (0.0017997776416522778, 0.11788984956227791, -0.0104048228263855), u: [0. 0.], noise: [6.49527232 5.55815176]
Start training ...
Step: 61, L: 205.94677734375, J: 53.80230712890625
Current x: (0.0019802377059749527, 0.12270452031646015, -0.010457832336425783), u: [0. 0.], noise: [5.72601507 4.82574896]
Start training ...
Step: 61, L: 99.95475769042969, J: 52.250762939453125
Current x: (0.0021732389180038197, 0.12774346821399687, -0.010417129802703859), u: [0. 0.], noise: [5.32625907 5.90656547]
Start training ...
Step: 61, L: 178.33306884765625, J: 55.55473327636719
Current x: (0.0023772747862834008, 0.13285653476516987, -0.010286400651931765), u: [0. 0.], noise: [5.11691952 6.82991981]
Start training ...
Step: 61, L: 173.00405883789062, J: 52.5944709777832
Current x: (0.002593011821836868, 0.1381118228020587, -0.010213702154159548), u: [0. 0.], noise: [5.16672612 5.34408985]
Start training ...
Step: 61, L: 148.798217773437

Step: 61, L: 132.02987670898438, J: 50.6223030090332
Current x: (0.02269912301812287, 0.4339983891112402, -0.008432486629486095), u: [0. 0.], noise: [5.57909392 5.50620802]
Start training ...
Step: 61, L: 99.5459976196289, J: 50.3858642578125
Current x: (0.023419494881750207, 0.4425801912740806, -0.008189878368377696), u: [0. 0.], noise: [5.11684998 5.14352854]
Start training ...
Step: 61, L: 129.36834716796875, J: 54.9749870300293
Current x: (0.02414921430098492, 0.4512894842604091, -0.007939981508255015), u: [0. 0.], noise: [4.72469377 6.40398307]
Start training ...
Step: 61, L: 145.61460876464844, J: 50.12071990966797
Current x: (0.024887336750969546, 0.46004378062503654, -0.007692752504348765), u: [0. 0.], noise: [5.49484428 5.15793358]
Start training ...
Step: 61, L: 112.55995178222656, J: 55.115875244140625
Current x: (0.02563429525736258, 0.4689299096472338, -0.007613452434539805), u: [0. 0.], noise: [6.03485702 5.51800737]
Start training ...
Step: 61, L: 130.1134033203125, J: 5

Step: 61, L: 161.67327880859375, J: 54.08004379272461
Current x: (0.06349469981950154, 0.948348479930008, -0.0051738250255584884), u: [0. 0.], noise: [5.69608941 4.91967622]
Start training ...
Step: 61, L: 93.44882202148438, J: 56.297828674316406
Current x: (0.06448685231987523, 0.9618209926598955, -0.00525466008186342), u: [0. 0.], noise: [5.37078952 4.93486535]
Start training ...
Step: 61, L: 130.110595703125, J: 55.295780181884766
Current x: (0.06548449720710305, 0.9753740677385865, -0.005257853841781633), u: [0. 0.], noise: [4.54758928 5.49837823]
Start training ...
Step: 61, L: 78.93328094482422, J: 56.58244323730469
Current x: (0.0664875573405557, 0.9889766940421814, -0.005217455196380632), u: [0. 0.], noise: [6.05334529 5.60046817]
Start training ...
Step: 61, L: 110.26274108886719, J: 53.97107696533203
Current x: (0.06749589947234363, 1.0026029031699617, -0.005272135448455828), u: [0. 0.], noise: [6.48072194 5.70625276]
Start training ...
Step: 61, L: 180.11595153808594, J: 53.

Step: 61, L: 130.02383422851562, J: 55.95586395263672
Current x: (0.11904486076472208, 1.7185693547554004, -0.01304400396347048), u: [0. 0.], noise: [5.60398015 5.60598257]
Start training ...
Step: 61, L: 115.49398803710938, J: 54.48899841308594
Current x: (0.12040176351444828, 1.7370349061273178, -0.01297633891105654), u: [0. 0.], noise: [6.06422903 5.12575244]
Start training ...
Step: 61, L: 173.51278686523438, J: 56.536067962646484
Current x: (0.12177328812950203, 1.7556403584185332, -0.012908874130249045), u: [0. 0.], noise: [5.69445193 5.30148984]
Start training ...
Step: 61, L: 144.59762573242188, J: 53.50426483154297
Current x: (0.12315933283623667, 1.7743837146456685, -0.01274756169319155), u: [0. 0.], noise: [5.11488483 5.80993252]
Start training ...
Step: 61, L: 85.88475036621094, J: 55.14996337890625
Current x: (0.12455957167078824, 1.7932455733726376, -0.01254695305824282), u: [0. 0.], noise: [5.41534936 5.74882847]
Start training ...
Step: 61, L: 146.83226013183594, J: 53.

Current x: (-5.227481754450384e-05, 0.02711479050142052, 0.0019839048385620122), u: [0. 0.], noise: [5.04488477 6.32560972]
Start training ...
Step: 62, L: 185.3055419921875, J: 54.746055603027344
Current x: (-7.11875855493878e-05, 0.029114743437394355, 0.002275514173507691), u: [0. 0.], noise: [5.6858618  5.49337555]
Start training ...
Step: 62, L: 135.74668884277344, J: 50.59701156616211
Current x: (-9.235615004800811e-05, 0.031270743619977594, 0.0024390510082244877), u: [0. 0.], noise: [4.94882583 5.44831434]
Start training ...
Step: 62, L: 172.56954956054688, J: 52.97259521484375
Current x: (-0.00011606856365877113, 0.0335636646448495, 0.002621836423873902), u: [0. 0.], noise: [4.522437  5.0607261]
Start training ...
Step: 62, L: 113.85169982910156, J: 50.46772766113281
Current x: (-0.00014231689035781956, 0.03591529662740215, 0.002754673004150391), u: [0. 0.], noise: [4.77494966 5.21217638]
Start training ...
Step: 62, L: 188.5291290283203, J: 54.28087615966797
Current x: (-0.0001

Current x: (-0.000910530652379163, 0.20964022753354705, -0.012819930744171143), u: [0. 0.], noise: [5.69205988 5.22538574]
Start training ...
Step: 62, L: 140.56472778320312, J: 50.533668518066406
Current x: (-0.0007966929032113491, 0.21579831310759984, -0.013472782421112061), u: [0. 0.], noise: [4.63134963 5.24422904]
Start training ...
Step: 62, L: 137.23138427734375, J: 50.50159454345703
Current x: (-0.0006688594470808, 0.22206705358207438, -0.014078966665267945), u: [0. 0.], noise: [4.99486831 5.23851482]
Start training ...
Step: 62, L: 121.33631896972656, J: 56.490386962890625
Current x: (-0.0005277212421937771, 0.22834226222186418, -0.014746438837051392), u: [0. 0.], noise: [6.12195686 5.4345762 ]
Start training ...
Step: 62, L: 143.5424346923828, J: 52.42036056518555
Current x: (-0.00037217596720825753, 0.23465970775951997, -0.01543827567100525), u: [0. 0.], noise: [5.33014908 5.30977793]
Start training ...
Step: 62, L: 133.2760009765625, J: 53.34467315673828
Current x: (-0.0001

Step: 62, L: 94.81330108642578, J: 52.373756408691406
Current x: (0.03459736798887442, 0.5893039346347246, -0.061963507032394394), u: [0. 0.], noise: [6.25133489 5.36033877]
Start training ...
Step: 62, L: 187.95809936523438, J: 51.20140838623047
Current x: (0.036453241233580874, 0.5992829525276007, -0.06298838024139403), u: [0. 0.], noise: [4.65570545 5.07704983]
Start training ...
Step: 62, L: 108.69677734375, J: 55.631500244140625
Current x: (0.03838101844576596, 0.6094399093319457, -0.06392415385246275), u: [0. 0.], noise: [5.06835851 5.76595874]
Start training ...
Step: 62, L: 167.8959503173828, J: 52.359920501708984
Current x: (0.04037006017909583, 0.619587211587723, -0.06490206189155577), u: [0. 0.], noise: [6.15851267 5.68051683]
Start training ...
Step: 62, L: 137.95059204101562, J: 55.48151397705078
Current x: (0.04242831221030572, 0.6298347327057846, -0.06594972996711729), u: [0. 0.], noise: [5.18810789 5.97818119]
Start training ...
Step: 62, L: 95.00819396972656, J: 55.498

Step: 62, L: 123.67713165283203, J: 57.583560943603516
Current x: (0.19930755400749137, 1.1236492511853602, -0.10685439672470091), u: [0. 0.], noise: [5.297976   5.18645567]
Start training ...
Step: 62, L: 91.39624786376953, J: 55.14458465576172
Current x: (0.20522186014580962, 1.1374660371065883, -0.10793621547222136), u: [0. 0.], noise: [5.13934568 5.13586113]
Start training ...
Step: 62, L: 199.5325469970703, J: 55.687225341796875
Current x: (0.21124798398181982, 1.151344286452756, -0.10900688219070433), u: [0. 0.], noise: [5.43447941 5.16544205]
Start training ...
Step: 62, L: 154.8831787109375, J: 52.42266082763672
Current x: (0.21738479928621549, 1.1652630768399679, -0.11007720043659208), u: [0. 0.], noise: [4.87609773 6.30788256]
Start training ...
Step: 62, L: 204.95191955566406, J: 52.36745071411133
Current x: (0.22363693233286766, 1.179254567904982, -0.11112061495780944), u: [0. 0.], noise: [5.44618671 5.77719327]
Start training ...
Step: 62, L: 65.88198852539062, J: 53.24292

Step: 62, L: 191.22862243652344, J: 57.57218933105469
Current x: (0.5920367781986442, 1.8082986190098, -0.14485140202045446), u: [0.02450791 0.        ], noise: [6.75846279 5.62574972]
Start training ...
Step: 62, L: 148.84661865234375, J: 52.77980041503906
Current x: (0.6040671213255698, 1.8253398087327761, -0.1453728129625321), u: [0.02387598 0.        ], noise: [6.29632558 5.13834011]
Start training ...
Step: 62, L: 124.17499542236328, J: 52.75977325439453
Current x: (0.6162765786049395, 1.842627875259372, -0.1457785017728806), u: [0.02369943 0.        ], noise: [5.86882079 5.38017419]
Start training ...
Step: 62, L: 155.6827392578125, J: 49.89326477050781
Current x: (0.6286520258346979, 1.860068709466872, -0.1460660044431687), u: [0.02343377 0.        ], noise: [5.54978322 5.81423329]
Start training ...
Step: 62, L: 51.070980072021484, J: 50.641258239746094
Current x: (0.6411912232898981, 1.8776438563194833, -0.14630227248668676), u: [0.02278094 0.        ], noise: [4.75698359 5.40

Current x: (0.0006825392161771787, 0.0583907659937455, -0.0069812955379486095), u: [0. 0.], noise: [5.01568599 5.11376795]
Start training ...
Step: 63, L: 122.82027435302734, J: 56.84806823730469
Current x: (0.0007659999903304853, 0.06163602965324884, -0.0073381027221679695), u: [0. 0.], noise: [5.07102146 5.36181956]
Start training ...
Step: 63, L: 90.35188293457031, J: 54.98242950439453
Current x: (0.0008565323780033579, 0.06491321399404573, -0.007704718112945558), u: [0. 0.], noise: [5.91997614 5.2886771 ]
Start training ...
Step: 63, L: 65.16853332519531, J: 54.20475769042969
Current x: (0.0009547204230841978, 0.06825265437581278, -0.008100413322448732), u: [0. 0.], noise: [5.45064386 5.33059506]
Start training ...
Step: 63, L: 106.80241394042969, J: 54.716739654541016
Current x: (0.001061544334248889, 0.07173192683395956, -0.008432978630065919), u: [0. 0.], noise: [6.29049544 4.28464517]
Start training ...
Step: 63, L: 128.72938537597656, J: 56.07512664794922
Current x: (0.0011771

Step: 63, L: 131.14300537109375, J: 58.087730407714844
Current x: (0.012636511787117553, 0.2780485283272858, -0.003432924556732179), u: [0. 0.], noise: [5.42276438 4.98498822]
Start training ...
Step: 63, L: 85.87294006347656, J: 58.07725524902344
Current x: (0.013050841453896882, 0.28463564990516654, -0.003339739274978639), u: [0. 0.], noise: [5.21334886 5.38368724]
Start training ...
Step: 63, L: 141.2861328125, J: 55.24359130859375
Current x: (0.013468744016740681, 0.2912825406493722, -0.003202776384353639), u: [0. 0.], noise: [4.66649067 5.60768755]
Start training ...
Step: 63, L: 105.42327880859375, J: 55.73752212524414
Current x: (0.013890185706858871, 0.29800812911986063, -0.00308284730911255), u: [0. 0.], noise: [4.83765208 6.24550665]
Start training ...
Step: 63, L: 81.75373840332031, J: 48.851768493652344
Current x: (0.014314917980674361, 0.3047801300759696, -0.0030570379257202156), u: [0. 0.], noise: [5.43368294 4.28446684]
Start training ...
Step: 63, L: 134.84982299804688,

Step: 63, L: 196.09622192382812, J: 56.3238525390625
Current x: (0.03527476791017872, 0.619071571919253, -0.011406330490112306), u: [0. 0.], noise: [5.25951772 4.48677825]
Start training ...
Step: 63, L: 222.878662109375, J: 56.88578414916992
Current x: (0.035999964948095174, 0.6288412558780015, -0.011655896186828614), u: [0. 0.], noise: [4.98417222 5.80464869]
Start training ...
Step: 63, L: 187.55294799804688, J: 54.71290969848633
Current x: (0.036736278692194625, 0.6386045060285459, -0.011828187942504884), u: [0. 0.], noise: [4.92770783 5.94340606]
Start training ...
Step: 63, L: 110.99077606201172, J: 57.084503173828125
Current x: (0.03748516748956093, 0.6484655650135426, -0.012082527351379395), u: [0. 0.], noise: [4.79765659 6.0970119 ]
Start training ...
Step: 63, L: 100.83180236816406, J: 54.214115142822266
Current x: (0.03824691454478778, 0.6584326593304234, -0.012438436603546143), u: [0. 0.], noise: [6.39543128 5.9020889 ]
Start training ...
Step: 63, L: 89.26677703857422, J: 

Step: 63, L: 182.45001220703125, J: 58.22144317626953
Current x: (0.08279169851458236, 1.1093961019864902, -0.036200495433807364), u: [0. 0.], noise: [5.84559913 5.03340696]
Start training ...
Step: 63, L: 95.1186294555664, J: 56.01707458496094
Current x: (0.08448038069335621, 1.122517477386839, -0.03717914435863494), u: [0. 0.], noise: [5.09077162 5.21773906]
Start training ...
Step: 63, L: 134.08758544921875, J: 56.18944549560547
Current x: (0.08620843680969625, 1.1357450405745377, -0.038076574039459216), u: [0. 0.], noise: [5.15782049 4.74097558]
Start training ...
Step: 63, L: 165.29026794433594, J: 56.26448440551758
Current x: (0.08797481025805196, 1.1490217424553988, -0.03898670046329497), u: [0. 0.], noise: [5.05035479 6.43135155]
Start training ...
Step: 63, L: 150.1021728515625, J: 60.09303283691406
Current x: (0.08977886582364163, 1.1623066064547625, -0.0398551423549652), u: [0. 0.], noise: [5.62766309 5.43329337]
Start training ...
Step: 63, L: 135.9937286376953, J: 59.84606

Step: 63, L: 189.21170043945312, J: 57.61178207397461
Current x: (0.2121744611254753, 1.8075864793427148, -0.051647198390960665), u: [0. 0.], noise: [5.10633127 4.75838182]
Start training ...
Step: 63, L: 122.52648162841797, J: 54.96672821044922
Current x: (0.21629235677061384, 1.824969516175167, -0.051347984051704376), u: [0. 0.], noise: [5.0000277  5.69502529]
Start training ...
Step: 63, L: 82.01226806640625, J: 56.635799407958984
Current x: (0.2204611782508874, 1.8423567089937423, -0.05101397476196286), u: [0. 0.], noise: [4.70819222 6.12902014]
Start training ...
Step: 63, L: 76.43659973144531, J: 52.02981948852539
Current x: (0.22468489254347002, 1.8598309974950167, -0.0507494652509689), u: [0. 0.], noise: [4.75513626 5.48332236]
Start training ...
Step: 63, L: 91.79681396484375, J: 57.1059684753418
Current x: (0.22896386778883573, 1.877406597406357, -0.050627038526535), u: [0. 0.], noise: [5.98554086 4.78203891]
Start training ...
Step: 63, L: 148.7537384033203, J: 56.6425704956

Step: 64, L: 177.41802978515625, J: 57.32746124267578
Current x: (0.0008507070974856371, 0.042099518697734525, -0.006217152452468874), u: [0. 0.], noise: [5.63466176 5.34472601]
Start training ...
Step: 64, L: 116.32166290283203, J: 56.002906799316406
Current x: (0.0009431771346296002, 0.04498526939518758, -0.006547695326805117), u: [0. 0.], noise: [3.95611763 5.4720205 ]
Start training ...
Step: 64, L: 139.61595153808594, J: 54.083251953125
Current x: (0.0010424731802566112, 0.04798793760173711, -0.006849244642257692), u: [0. 0.], noise: [5.5341049  5.56258729]
Start training ...
Step: 64, L: 55.78376007080078, J: 57.131107330322266
Current x: (0.0011479424397656023, 0.05095239947134361, -0.007302384257316591), u: [0. 0.], noise: [4.35978229 5.1261404 ]
Start training ...
Step: 64, L: 110.4095458984375, J: 54.553409576416016
Current x: (0.0012610120357307573, 0.05404550452109309, -0.007758372116088869), u: [0. 0.], noise: [6.529876  4.5794054]
Start training ...
Step: 64, L: 108.66348

Step: 64, L: 234.17864990234375, J: 54.65034484863281
Current x: (0.020876045630940435, 0.2549365225667315, -0.02709451775550843), u: [0. 0.], noise: [4.54844786 5.59312452]
Start training ...
Step: 64, L: 130.006591796875, J: 57.728172302246094
Current x: (0.021822035952460454, 0.2608712065379972, -0.027057751822471623), u: [0. 0.], noise: [5.04926604 4.25526385]
Start training ...
Step: 64, L: 101.05781555175781, J: 56.85151290893555
Current x: (0.02279550101494136, 0.26683867557428975, -0.027125453519821173), u: [0. 0.], noise: [5.32709265 4.22542762]
Start training ...
Step: 64, L: 120.01742553710938, J: 58.9616813659668
Current x: (0.023794138969720425, 0.2727552569478732, -0.02711375501155854), u: [0. 0.], noise: [5.34037625 6.42067062]
Start training ...
Step: 64, L: 123.78814697265625, J: 58.59440612792969
Current x: (0.024818685392792517, 0.27864573898562844, -0.026991890001297), u: [0. 0.], noise: [4.70921366 5.33673326]
Start training ...
Step: 64, L: 199.13357543945312, J: 

Step: 64, L: 117.91969299316406, J: 55.575687408447266
Current x: (0.08959853499814009, 0.576375856785656, -0.030344178390502945), u: [0. 0.], noise: [5.37773    5.73015933]
Start training ...
Step: 64, L: 113.31578063964844, J: 55.92534255981445
Current x: (0.09183686883372114, 0.5853655866277384, -0.030701373338699357), u: [0. 0.], noise: [4.8536903  4.60782249]
Start training ...
Step: 64, L: 120.42264556884766, J: 55.425819396972656
Current x: (0.09410890347403768, 0.59448459403651, -0.03109381122589113), u: [0. 0.], noise: [5.34324771 5.58117983]
Start training ...
Step: 64, L: 91.66607666015625, J: 54.289039611816406
Current x: (0.09640998169419289, 0.6035683068279261, -0.031461662340164204), u: [0. 0.], noise: [4.22758841 4.84780305]
Start training ...
Step: 64, L: 211.06436157226562, J: 57.4197883605957
Current x: (0.09874502264825002, 0.6127629342646251, -0.03185330662727358), u: [0. 0.], noise: [4.81918479 5.93358052]
Start training ...
Step: 64, L: 116.7358169555664, J: 53.1

Step: 64, L: 102.23120880126953, J: 58.49605178833008
Current x: (0.23082010118457078, 1.0491537595336071, -0.054997779035568295), u: [0. 0.], noise: [6.2648177  4.79529194]
Start training ...
Step: 64, L: 108.46783447265625, J: 51.106475830078125
Current x: (0.23512734782916866, 1.0613643921913316, -0.0553988846302033), u: [0. 0.], noise: [4.78256269 4.5744135 ]
Start training ...
Step: 64, L: 119.3918228149414, J: 59.79838943481445
Current x: (0.23949539196245126, 1.0736983635733057, -0.05565303764343267), u: [0. 0.], noise: [5.41349949 5.18320724]
Start training ...
Step: 64, L: 166.42169189453125, J: 57.64482116699219
Current x: (0.24391524618599453, 1.0859855970330687, -0.055886375713348445), u: [0. 0.], noise: [5.14119275 5.8090247 ]
Start training ...
Step: 64, L: 167.15911865234375, J: 56.4080924987793
Current x: (0.24839404386129832, 1.0983498605173274, -0.05609668455123907), u: [0. 0.], noise: [4.70256056 5.32536696]
Start training ...
Step: 64, L: 78.3735122680664, J: 55.138

Step: 64, L: 158.1713104248047, J: 54.15896224975586
Current x: (0.47809524553523086, 1.6376032940623197, -0.0694739401340485), u: [0. 0.], noise: [4.66238411 5.51007354]
Start training ...
Step: 64, L: 110.97926330566406, J: 58.591346740722656
Current x: (0.48520724021827993, 1.6522429394194695, -0.06964257693290715), u: [0. 0.], noise: [6.14611841 5.08965384]
Start training ...
Step: 64, L: 126.13655090332031, J: 54.69038391113281
Current x: (0.4923898501354351, 1.6669163765997432, -0.06989598269462591), u: [0. 0.], noise: [4.82839214 5.79457088]
Start training ...
Step: 64, L: 233.9394989013672, J: 58.44916915893555
Current x: (0.4996506456348023, 1.6817296674650526, -0.0700437419891358), u: [0. 0.], noise: [5.83129761 5.77406054]
Start training ...
Step: 64, L: 95.69708251953125, J: 57.374595642089844
Current x: (0.5069856309347772, 1.696621660784926, -0.07028811917304997), u: [0. 0.], noise: [5.32118057 5.07432282]
Start training ...
Step: 64, L: 102.48147583007812, J: 59.62036132

Step: 66, L: 148.63986206054688, J: 55.36514663696289
Current x: (3.5518230789946205e-06, 0.014818804041718798, 0.0012021426200866702), u: [0. 0.], noise: [4.69890894 4.54937799]
Start training ...
Step: 66, L: 88.34199523925781, J: 57.76495361328125
Current x: (-1.4141975161951223e-06, 0.016366309539078194, 0.0013408815383911135), u: [0. 0.], noise: [5.18601571 5.22798146]
Start training ...
Step: 66, L: 108.72195434570312, J: 56.75409698486328
Current x: (-7.491993864308671e-06, 0.017857643088273773, 0.001494573545455933), u: [0. 0.], noise: [5.43092507 5.05792818]
Start training ...
Step: 66, L: 119.33744049072266, J: 54.1988410949707
Current x: (-1.4966183512971135e-05, 0.019409375466284864, 0.0016440689563751224), u: [0. 0.], noise: [5.29054225 5.14217501]
Start training ...
Step: 66, L: 97.67227172851562, J: 56.0954475402832
Current x: (-2.400800886755512e-05, 0.021028992018281293, 0.0018308640480041508), u: [0. 0.], noise: [5.90553246 5.54468243]
Start training ...
Step: 66, L: 

Step: 66, L: 142.44810485839844, J: 57.57305145263672
Current x: (-0.004931496924664968, 0.1277757678644155, 0.016956669235229493), u: [0. 0.], noise: [4.54176822 5.47092407]
Start training ...
Step: 66, L: 85.7911605834961, J: 54.539756774902344
Current x: (-0.0052891876283173815, 0.1314367713096942, 0.01735980715751648), u: [0. 0.], noise: [5.21535559 5.1813643 ]
Start training ...
Step: 66, L: 90.10978698730469, J: 55.45002365112305
Current x: (-0.0056638557081504755, 0.13511789996142798, 0.01767002949714661), u: [0. 0.], noise: [5.26405958 5.55294369]
Start training ...
Step: 66, L: 76.27677154541016, J: 53.246131896972656
Current x: (-0.00605657138678222, 0.13885754395106578, 0.01798365092277527), u: [0. 0.], noise: [5.19085081 5.25960065]
Start training ...
Step: 66, L: 95.8111343383789, J: 55.38188171386719
Current x: (-0.006468399747435838, 0.1426977194004945, 0.01826838393211365), u: [0. 0.], noise: [5.22782102 5.45440779]
Start training ...
Step: 66, L: 165.00686645507812, J:

Step: 66, L: 115.5859146118164, J: 57.24347686767578
Current x: (-0.04306305356466145, 0.3448333704648486, 0.023956116533279434), u: [0. 0.], noise: [6.13284099 5.34682798]
Start training ...
Step: 66, L: 59.01158142089844, J: 56.81789016723633
Current x: (-0.04445345964549583, 0.3508529285532122, 0.023803233146667494), u: [0. 0.], noise: [5.00069076 4.20084238]
Start training ...
Step: 66, L: 141.86245727539062, J: 58.229827880859375
Current x: (-0.045871363926172276, 0.35703912420787504, 0.023728951072692885), u: [0. 0.], noise: [5.33431223 4.97056024]
Start training ...
Step: 66, L: 62.396575927734375, J: 57.808189392089844
Current x: (-0.04731116876287821, 0.36316421252996617, 0.02373465385437013), u: [0. 0.], noise: [5.13107234 3.90939257]
Start training ...
Step: 66, L: 168.42481994628906, J: 59.88922882080078
Current x: (-0.04877542368650975, 0.36933849800226864, 0.023776731872558608), u: [0. 0.], noise: [4.69800693 5.84606719]
Start training ...
Step: 66, L: 121.08307647705078,

Step: 66, L: 134.23846435546875, J: 57.85388946533203
Current x: (-0.13034280459143635, 0.6494205921074676, 0.03518074128627779), u: [0. 0.], noise: [4.40465129 4.6392078 ]
Start training ...
Step: 66, L: 60.74260711669922, J: 61.58568572998047
Current x: (-0.13294752305138638, 0.6574050613501435, 0.035887858867645284), u: [0. 0.], noise: [5.90833421 5.44668993]
Start training ...
Step: 66, L: 166.54141235351562, J: 58.869049072265625
Current x: (-0.13558405191658013, 0.6653123569264621, 0.03657152078151705), u: [0. 0.], noise: [5.46207751 5.2825635 ]
Start training ...
Step: 66, L: 91.80650329589844, J: 56.61317825317383
Current x: (-0.1382613227860148, 0.6733734237864336, 0.03730134711265566), u: [0. 0.], noise: [5.53821469 5.6671642 ]
Start training ...
Step: 66, L: 70.51231384277344, J: 53.582916259765625
Current x: (-0.14097787968400252, 0.6815272363218239, 0.038049124836921716), u: [0. 0.], noise: [5.15123068 5.57322494]
Start training ...
Step: 66, L: 122.95484161376953, J: 55.1

Current x: (-0.2949687483372638, 1.0712825642659747, 0.0710906394004822), u: [0. 0.], noise: [5.6610373  4.47832392]
Start training ...
Step: 66, L: 69.68668365478516, J: 56.07160949707031
Current x: (-0.3000067382913589, 1.081951215610383, 0.07188511908054354), u: [0. 0.], noise: [4.83011428 5.09098267]
Start training ...
Step: 66, L: 93.89663696289062, J: 59.55472183227539
Current x: (-0.30511674891408125, 1.0926502420165478, 0.07279787011146548), u: [0. 0.], noise: [4.67552859 5.16862741]
Start training ...
Step: 66, L: 187.44766235351562, J: 52.61955261230469
Current x: (-0.3102980160529637, 1.1033578158578583, 0.07368453433513644), u: [0. 0.], noise: [4.85821994 4.97312929]
Start training ...
Step: 66, L: 155.08973693847656, J: 55.43671417236328
Current x: (-0.3155508832725112, 1.1140661980074433, 0.07452188868522647), u: [0. 0.], noise: [5.00668489 4.7415313 ]
Start training ...
Step: 66, L: 99.78511047363281, J: 56.313819885253906
Current x: (-0.32087612679714556, 1.124774047380

Step: 66, L: 166.06402587890625, J: 59.9254150390625
Current x: (-0.6593974360850453, 1.6599819280429926, 0.1371822168827057), u: [0. 0.], noise: [5.87226712 6.02032623]
Start training ...
Step: 66, L: 162.4834442138672, J: 55.38640594482422
Current x: (-0.6696703256797564, 1.6733138772321325, 0.1385155421257019), u: [0. 0.], noise: [4.77448378 4.62686771]
Start training ...
Step: 66, L: 77.65690612792969, J: 58.70340347290039
Current x: (-0.6801058492842748, 1.6868429129854838, 0.13983406147956848), u: [0. 0.], noise: [5.29782241 5.39555707]
Start training ...
Step: 66, L: 155.14646911621094, J: 58.73664855957031
Current x: (-0.6906711802011336, 1.7003220793647613, 0.1411673424243927), u: [0. 0.], noise: [4.73209064 5.37616724]
Start training ...
Step: 66, L: 108.65979766845703, J: 59.538307189941406
Current x: (-0.7013855541544844, 1.7138791460243574, 0.1424908499240875), u: [0. 0.], noise: [5.82930657 5.54385121]
Start training ...
Step: 66, L: 45.937191009521484, J: 61.071327209472

Step: 67, L: 83.36370086669922, J: 56.69576644897461
Current x: (-1.5255772494716875e-05, 0.0083354294978853, -0.0015882936239242557), u: [0. 0.], noise: [5.38017324 4.39508987]
Start training ...
Step: 67, L: 149.7872314453125, J: 60.05923080444336
Current x: (-1.2790310142216912e-05, 0.009192731945953776, -0.0016895093441009527), u: [0. 0.], noise: [5.53960566 4.77896662]
Start training ...
Step: 67, L: 129.09608459472656, J: 57.10798263549805
Current x: (-8.772249679574132e-06, 0.010046559444295312, -0.0016922167539596563), u: [0. 0.], noise: [5.02501582 5.7030256 ]
Start training ...
Step: 67, L: 88.84009552001953, J: 56.999755859375
Current x: (-3.010857657630081e-06, 0.010951242674386274, -0.001618860244750977), u: [0. 0.], noise: [5.43167732 5.29960139]
Start training ...
Step: 67, L: 77.76195526123047, J: 53.93036651611328
Current x: (4.565950679415319e-06, 0.011947728533324393, -0.001613304734230042), u: [0. 0.], noise: [5.48562828 5.49835513]
Start training ...
Step: 67, L: 1

Step: 67, L: 85.06883239746094, J: 56.80750274658203
Current x: (0.0023906851242091387, 0.09444916868424971, -0.00844041454792023), u: [0. 0.], noise: [4.82262801 6.05230922]
Start training ...
Step: 67, L: 115.47618865966797, J: 57.70047378540039
Current x: (0.0025480853218835964, 0.0976099081264011, -0.00884636244773865), u: [0. 0.], noise: [4.95957396 5.65697172]
Start training ...
Step: 67, L: 182.68380737304688, J: 61.04785919189453
Current x: (0.002714664308268196, 0.10087710253768113, -0.009375278449058536), u: [0. 0.], noise: [5.38106559 4.63044086]
Start training ...
Step: 67, L: 94.01844787597656, J: 55.510955810546875
Current x: (0.002890634953256408, 0.10422490997540076, -0.009973934268951419), u: [0. 0.], noise: [6.24410819 5.97270062]
Start training ...
Step: 67, L: 105.5560073852539, J: 57.8370361328125
Current x: (0.0030759915264658923, 0.10759282402301558, -0.010497527623176578), u: [0. 0.], noise: [5.11505    5.64119867]
Start training ...
Step: 67, L: 129.68966674804

Step: 67, L: 60.87556457519531, J: 58.10797882080078
Current x: (0.02412664225534857, 0.29642684343105524, -0.030494695711135878), u: [0. 0.], noise: [5.93919809 5.49367661]
Start training ...
Step: 67, L: 92.84146118164062, J: 60.71515655517578
Current x: (0.02516609370536381, 0.3026004685125577, -0.030937200665473952), u: [0. 0.], noise: [5.08191363 5.06071651]
Start training ...
Step: 67, L: 171.51797485351562, J: 60.824398040771484
Current x: (0.026240403955551724, 0.30893584951657244, -0.0313351534843445), u: [0. 0.], noise: [6.20989346 4.91331774]
Start training ...
Step: 67, L: 68.032958984375, J: 54.321407318115234
Current x: (0.027346087657331538, 0.31530400814083365, -0.031730986618995684), u: [0. 0.], noise: [5.0963457  5.08719926]
Start training ...
Step: 67, L: 99.59092712402344, J: 59.493682861328125
Current x: (0.02848662040755948, 0.3218029418109717, -0.0319971621990204), u: [0. 0.], noise: [5.11301977 5.22196511]
Start training ...
Step: 67, L: 81.50592803955078, J: 59

Step: 67, L: 171.36529541015625, J: 62.1262092590332
Current x: (0.11444191267238896, 0.6555238634099241, -0.04441462974548344), u: [0. 0.], noise: [4.94096365 5.42345776]
Start training ...
Step: 67, L: 79.90119934082031, J: 59.72844696044922
Current x: (0.1172786865812782, 0.6635926046134512, -0.04452786109447484), u: [0. 0.], noise: [5.6999737  5.42691025]
Start training ...
Step: 67, L: 161.3482208251953, J: 62.82796096801758
Current x: (0.12016147855289942, 0.6717157658957899, -0.044689341831207326), u: [0. 0.], noise: [5.5179531  5.25505106]
Start training ...
Step: 67, L: 118.45922088623047, J: 61.09526443481445
Current x: (0.12309379979010386, 0.6799695127258092, -0.04482351624965673), u: [0. 0.], noise: [5.63229881 5.42331728]
Start training ...
Step: 67, L: 64.68980407714844, J: 53.13132095336914
Current x: (0.1260742488521368, 0.6883184844295943, -0.04493140048980718), u: [0. 0.], noise: [5.53270959 4.76530425]
Start training ...
Step: 67, L: 134.52105712890625, J: 56.668624

Step: 67, L: 133.14105224609375, J: 56.96649169921875
Current x: (0.29398333424575407, 1.097365624906107, -0.041838817071914725), u: [0. 0.], noise: [4.72230944 4.83739082]
Start training ...
Step: 67, L: 129.94276428222656, J: 57.487308502197266
Current x: (0.2989914751801926, 1.108460537191091, -0.04146103899478917), u: [0. 0.], noise: [5.12468212 5.16128954]
Start training ...
Step: 67, L: 136.416015625, J: 61.0187873840332
Current x: (0.3040396011007251, 1.119529582892997, -0.041094769048690846), u: [0. 0.], noise: [5.33009167 5.07749832]
Start training ...
Step: 67, L: 116.0270767211914, J: 54.36869812011719
Current x: (0.3091303615110329, 1.130645341797303, -0.04073215987682348), u: [0. 0.], noise: [4.67966577 4.99208377]
Start training ...
Step: 67, L: 118.20018768310547, J: 55.8214111328125
Current x: (0.31426387963462077, 1.1418199810100123, -0.04034429140090948), u: [0. 0.], noise: [4.90232108 5.61505038]
Start training ...
Step: 67, L: 115.27286529541016, J: 58.2044143676757

Step: 67, L: 140.3563232421875, J: 58.619873046875
Current x: (0.5972591586178092, 1.7347903666986817, -0.031031178879737917), u: [0. 0.], noise: [3.99803738 4.48568384]
Start training ...
Step: 67, L: 97.83320617675781, J: 57.68937683105469
Current x: (0.6040714816775349, 1.7483861174223452, -0.030741861248016422), u: [0. 0.], noise: [4.59747497 5.77856689]
Start training ...
Step: 67, L: 99.26020812988281, J: 57.81344985961914
Current x: (0.6109101264981397, 1.7618488317935406, -0.030501308274269168), u: [0. 0.], noise: [5.01633349 4.45756463]
Start training ...
Step: 67, L: 101.07970428466797, J: 63.534507751464844
Current x: (0.6177806641772758, 1.7753676600444441, -0.030378864479065005), u: [0. 0.], noise: [5.66041324 4.99878711]
Start training ...
Step: 67, L: 131.3481903076172, J: 54.02892303466797
Current x: (0.624680094004199, 1.7888524374304269, -0.030200543808937136), u: [0. 0.], noise: [4.35322503 5.25222083]
Start training ...
Step: 67, L: 82.64897918701172, J: 62.46684265

Step: 68, L: 101.42901611328125, J: 61.091556549072266
Current x: (0.0008054753735897129, 0.01869811399631737, -0.008694665718078615), u: [0. 0.], noise: [5.41148968 4.43584503]
Start training ...
Step: 68, L: 81.8262939453125, J: 55.19527816772461
Current x: (0.0009170742375791806, 0.01977036912106764, -0.008786277437210084), u: [0. 0.], noise: [5.31864092 5.62247316]
Start training ...
Step: 68, L: 81.50342559814453, J: 59.948585510253906
Current x: (0.0010372349213472528, 0.020846320415311075, -0.008780324697494507), u: [0. 0.], noise: [4.6661998  5.03697515]
Start training ...
Step: 68, L: 74.8482666015625, J: 55.61361312866211
Current x: (0.0011670086481101302, 0.022035340920417357, -0.008804755210876465), u: [0. 0.], noise: [5.4659364  4.69072804]
Start training ...
Step: 68, L: 101.87673950195312, J: 60.717018127441406
Current x: (0.001305301967754232, 0.023213641481993896, -0.00886626329421997), u: [0. 0.], noise: [4.65770414 5.95890743]
Start training ...
Step: 68, L: 97.75700

Step: 68, L: 84.22563171386719, J: 59.82941436767578
Current x: (0.019893456351886378, 0.11319970247556445, -0.020875901556015018), u: [0. 0.], noise: [5.00801908 4.92123879]
Start training ...
Step: 68, L: 105.59040069580078, J: 55.8213005065918
Current x: (0.020690063271804837, 0.11617960480494091, -0.02132256126403809), u: [0. 0.], noise: [4.88943136 5.08773802]
Start training ...
Step: 68, L: 138.46548461914062, J: 59.38300323486328
Current x: (0.021507396908161662, 0.11917121661666985, -0.021760542964935306), u: [0. 0.], noise: [4.79128469 4.98030753]
Start training ...
Step: 68, L: 115.18327331542969, J: 59.42805480957031
Current x: (0.022346002812324173, 0.12217931853387565, -0.02221835532188416), u: [0. 0.], noise: [4.48128    5.07336164]
Start training ...
Step: 68, L: 124.76785278320312, J: 60.758968353271484
Current x: (0.023205870553465707, 0.12518334832141376, -0.022695069980621343), u: [0. 0.], noise: [4.98022119 4.87632963]
Start training ...
Step: 68, L: 53.201011657714

Step: 68, L: 145.81800842285156, J: 57.11081314086914
Current x: (0.08814349480570095, 0.29496346482771724, -0.032661313152313236), u: [0. 0.], noise: [5.48615955 5.59648239]
Start training ...
Step: 68, L: 88.40018463134766, J: 59.4498291015625
Current x: (0.09032775893095346, 0.2996452991536279, -0.03258778014183045), u: [0. 0.], noise: [5.14679578 4.65370727]
Start training ...
Step: 68, L: 107.83103942871094, J: 57.59723663330078
Current x: (0.09254821398368149, 0.30445380656554155, -0.03252527942657471), u: [0. 0.], noise: [3.90491624 4.96532108]
Start training ...
Step: 68, L: 75.98355102539062, J: 58.97494888305664
Current x: (0.09480060104691601, 0.3092608439124466, -0.03241346988677979), u: [0. 0.], noise: [5.33133728 5.36843822]
Start training ...
Step: 68, L: 154.42227172851562, J: 58.80237579345703
Current x: (0.09708183371843143, 0.3139734358473036, -0.032407700824737554), u: [0. 0.], noise: [5.63289408 4.52295547]
Start training ...
Step: 68, L: 85.42854309082031, J: 59.5

Step: 68, L: 140.22479248046875, J: 56.816680908203125
Current x: (0.21825489647434473, 0.5448381376723641, -0.029750400161743174), u: [0. 0.], noise: [4.68507606 4.76983052]
Start training ...
Step: 68, L: 145.01219177246094, J: 61.148704528808594
Current x: (0.2218591354767802, 0.5510551220298415, -0.029698563146591198), u: [0. 0.], noise: [5.13239591 4.0797591 ]
Start training ...
Step: 68, L: 66.18193054199219, J: 59.08817672729492
Current x: (0.2254914990550862, 0.5572361786441283, -0.02965520157814027), u: [0. 0.], noise: [5.04232854 4.42137183]
Start training ...
Step: 68, L: 90.10409545898438, J: 61.187625885009766
Current x: (0.2291512173866905, 0.5633570444689654, -0.029506576347351085), u: [0. 0.], noise: [4.58758464 5.64857972]
Start training ...
Step: 68, L: 72.54194641113281, J: 58.99582290649414
Current x: (0.2328389963989583, 0.5694428642201863, -0.029295855474472058), u: [0. 0.], noise: [4.812961 5.331215]
Start training ...
Step: 68, L: 99.43788146972656, J: 54.025676

Step: 68, L: 157.27561950683594, J: 59.69627380371094
Current x: (0.4176556389321288, 0.8623488909680029, -0.020844256043434154), u: [0. 0.], noise: [4.02882551 5.26629335]
Start training ...
Step: 68, L: 90.49417114257812, J: 59.77664566040039
Current x: (0.42248319826643105, 0.869560879551783, -0.020573423194885265), u: [0. 0.], noise: [4.96926967 5.05444962]
Start training ...
Step: 68, L: 118.41059875488281, J: 58.772579193115234
Current x: (0.4273301311803934, 0.8767211780475155, -0.0204263371706009), u: [0. 0.], noise: [4.93067657 4.48480349]
Start training ...
Step: 68, L: 115.70515441894531, J: 59.20191192626953
Current x: (0.4321976848625242, 0.8839026363946305, -0.020287769126892098), u: [0. 0.], noise: [5.12208272 5.06952723]
Start training ...
Step: 68, L: 101.2962875366211, J: 64.54247283935547
Current x: (0.43708446958349456, 0.8910444462910778, -0.020104613804817206), u: [0. 0.], noise: [5.26046768 5.16875806]
Start training ...
Step: 68, L: 149.4989013671875, J: 59.8118

Current x: (0.6692834760290548, 1.228853344331681, -0.01256281392574311), u: [0. 0.], noise: [5.77932346 4.96794056]
Start training ...
Step: 68, L: 86.74140930175781, J: 55.69424057006836
Current x: (0.674891185997432, 1.2371254155035833, -0.012432145237922675), u: [0. 0.], noise: [4.98101476 4.69521758]
Start training ...
Step: 68, L: 198.30581665039062, J: 63.76811218261719
Current x: (0.6805123971983348, 1.245491128258473, -0.012220338225364691), u: [0. 0.], noise: [5.59660831 5.71221   ]
Start training ...
Step: 68, L: 83.00071716308594, J: 61.20689392089844
Current x: (0.6861456377219383, 1.2538433894710515, -0.011979951500892646), u: [0. 0.], noise: [5.01870264 5.10124136]
Start training ...
Step: 68, L: 103.61443328857422, J: 56.971736907958984
Current x: (0.6917926976606658, 1.2623454481253293, -0.011751124978065498), u: [0. 0.], noise: [4.96692808 5.32810337]
Start training ...
Step: 68, L: 101.17530059814453, J: 60.13656234741211
Current x: (0.6974518809527738, 1.27087842852

Step: 68, L: 173.79400634765625, J: 55.66633605957031
Current x: (0.9275889692087552, 1.617296823120559, -0.01544724254608155), u: [0. 0.], noise: [5.73307739 4.3634417 ]
Start training ...
Step: 68, L: 53.028533935546875, J: 61.47126770019531
Current x: (0.9337576842980609, 1.62648450925837, -0.01560941820144654), u: [0. 0.], noise: [4.25528331 5.05451726]
Start training ...
Step: 68, L: 60.82136154174805, J: 60.07408905029297
Current x: (0.9399419951056274, 1.635700726885386, -0.015634630250930794), u: [0. 0.], noise: [5.57611258 4.1791147 ]
Start training ...
Step: 68, L: 109.85258483886719, J: 62.45163345336914
Current x: (0.9461408373809457, 1.6448668112063891, -0.01573976569175721), u: [0. 0.], noise: [4.34147078 5.68554662]
Start training ...
Step: 68, L: 106.71322631835938, J: 57.99992752075195
Current x: (0.9523549309732493, 1.6540272991048728, -0.015705201339721687), u: [0. 0.], noise: [5.12155877 5.15220397]
Start training ...
Step: 68, L: 113.81807708740234, J: 65.090896606

Step: 70, L: 90.52027893066406, J: 63.49987030029297
Current x: (0.0005667298618641577, 0.01530767545709947, -0.0061089215517044085), u: [0. 0.], noise: [5.49799187 4.98637918]
Start training ...
Step: 70, L: 77.12541198730469, J: 57.92203903198242
Current x: (0.0006408568375848136, 0.01585772677214256, -0.006287448787689211), u: [0. 0.], noise: [5.08185859 4.39174819]
Start training ...
Step: 70, L: 132.03538513183594, J: 63.680049896240234
Current x: (0.0007213885935777092, 0.016475195642504967, -0.0064148147344589256), u: [0. 0.], noise: [5.16344599 5.56497247]
Start training ...
Step: 70, L: 86.11372375488281, J: 61.72993469238281
Current x: (0.0008078767916502974, 0.017059006398355333, -0.0064731696128845235), u: [0. 0.], noise: [5.42607765 4.82787207]
Start training ...
Step: 70, L: 64.60030364990234, J: 61.62915802001953
Current x: (0.0009012470241345901, 0.0177346369156641, -0.006571677136421206), u: [0. 0.], noise: [5.59746212 5.38466686]
Start training ...
Step: 70, L: 139.79

Step: 70, L: 141.4585418701172, J: 59.597633361816406
Current x: (0.011640335561272917, 0.061853407629644516, -0.017068220472335815), u: [0. 0.], noise: [4.84348345 4.85381907]
Start training ...
Step: 70, L: 88.30447387695312, J: 54.80705261230469
Current x: (0.012143512618579034, 0.06323545069609644, -0.01736411955356598), u: [0. 0.], noise: [5.25113243 4.71515674]
Start training ...
Step: 70, L: 131.15646362304688, J: 64.5223388671875
Current x: (0.0126632404408728, 0.06460608269952875, -0.01766105217933655), u: [0. 0.], noise: [4.78398113 5.16815032]
Start training ...
Step: 70, L: 133.74664306640625, J: 59.943580627441406
Current x: (0.013200272977811116, 0.0659921934106454, -0.017904387211799624), u: [0. 0.], noise: [5.15802772 5.39338667]
Start training ...
Step: 70, L: 141.83892822265625, J: 61.60647964477539
Current x: (0.013754881112008576, 0.06739236204309389, -0.018186139154434205), u: [0. 0.], noise: [5.43494185 4.74111465]
Start training ...
Step: 70, L: 132.2889556884765

Step: 70, L: 48.7445068359375, J: 62.59833526611328
Current x: (0.04871903510166415, 0.13289302977858497, -0.010901978087425239), u: [0. 0.], noise: [5.05727652 4.45525962]
Start training ...
Step: 70, L: 117.83723449707031, J: 61.136356353759766
Current x: (0.04988574129172892, 0.13488703570797475, -0.010955555415153511), u: [0. 0.], noise: [4.65713036 5.05420241]
Start training ...
Step: 70, L: 62.4406623840332, J: 59.678226470947266
Current x: (0.05106281782336204, 0.13685123880844272, -0.010948931050300606), u: [0. 0.], noise: [5.34956078 4.88432355]
Start training ...
Step: 70, L: 104.05816650390625, J: 56.06907272338867
Current x: (0.05225053344610741, 0.13880551686186834, -0.010982013916969307), u: [0. 0.], noise: [5.1783612  5.64919218]
Start training ...
Step: 70, L: 97.0734634399414, J: 65.34440612792969
Current x: (0.05344945385489739, 0.1408021220556064, -0.010968573069572456), u: [0. 0.], noise: [4.76028388 5.28359926]
Start training ...
Step: 70, L: 122.81246185302734, J:

Step: 70, L: 159.82858276367188, J: 58.94800567626953
Current x: (0.11234804602216186, 0.24750818288955298, -0.01744339504241945), u: [0. 0.], noise: [4.56828201 5.09746471]
Start training ...
Step: 70, L: 107.48605346679688, J: 54.871002197265625
Current x: (0.1140532855491253, 0.25059750737666087, -0.017936002779006976), u: [0. 0.], noise: [5.03566492 4.63836208]
Start training ...
Step: 70, L: 134.44851684570312, J: 61.97593688964844
Current x: (0.11577538456486726, 0.253672259485563, -0.018481528759002705), u: [0. 0.], noise: [5.65656289 4.42149748]
Start training ...
Step: 70, L: 128.504150390625, J: 64.53437805175781
Current x: (0.11751483398690234, 0.2567332586407517, -0.01898732442855837), u: [0. 0.], noise: [5.37809889 5.68745901]
Start training ...
Step: 70, L: 155.4110565185547, J: 62.55540466308594
Current x: (0.11927290814447959, 0.25982089169926326, -0.019369613552093524), u: [0. 0.], noise: [5.58282378 4.62447318]
Start training ...
Step: 70, L: 105.46976470947266, J: 57

Step: 70, L: 128.9726104736328, J: 58.644893646240234
Current x: (0.2243564401160267, 0.40586005490633503, -0.03529849958419802), u: [0. 0.], noise: [3.74186597 4.87419124]
Start training ...
Step: 70, L: 96.38895416259766, J: 62.28998947143555
Current x: (0.22741852443773142, 0.4093812272857783, -0.03527321588993075), u: [0. 0.], noise: [4.94473905 4.72210173]
Start training ...
Step: 70, L: 89.91761779785156, J: 57.53883361816406
Current x: (0.2305110158339038, 0.4127824686870327, -0.03536116473674777), u: [0. 0.], noise: [5.23702431 4.91857862]
Start training ...
Step: 70, L: 99.57018280029297, J: 59.85313415527344
Current x: (0.23363759821509827, 0.4161687928319309, -0.035426849865913414), u: [0. 0.], noise: [4.55580261 5.44437523]
Start training ...
Step: 70, L: 85.3134994506836, J: 55.12617492675781
Current x: (0.23680008450926798, 0.4195890424494645, -0.0354606904268265), u: [0. 0.], noise: [5.37059236 5.32758869]
Start training ...
Step: 70, L: 109.74314880371094, J: 56.3602027

Step: 70, L: 65.27426147460938, J: 57.877967834472656
Current x: (0.4117056907726648, 0.5806003632119517, -0.028963891243934687), u: [0. 0.], noise: [4.8345912  4.93123359]
Start training ...
Step: 70, L: 55.00137710571289, J: 58.651065826416016
Current x: (0.41623140833789374, 0.5843854648758561, -0.029455154252052362), u: [0. 0.], noise: [5.20511129 5.16750448]
Start training ...
Step: 70, L: 144.41807556152344, J: 61.70344161987305
Current x: (0.4207854075785605, 0.5881657394646, -0.029956081461906488), u: [0. 0.], noise: [4.68383497 5.08147119]
Start training ...
Step: 70, L: 89.64468383789062, J: 60.503334045410156
Current x: (0.42536995510134784, 0.5920018257000529, -0.030453247952461297), u: [0. 0.], noise: [5.12929209 5.1993572 ]
Start training ...
Step: 70, L: 123.95628356933594, J: 61.73679733276367
Current x: (0.42998375128083477, 0.5958330044624489, -0.03099017803668981), u: [0. 0.], noise: [3.99773201 5.10757872]
Start training ...
Step: 70, L: 58.5297966003418, J: 59.9238

Step: 70, L: 105.2087173461914, J: 62.01830291748047
Current x: (0.6388417943445912, 0.7505742810075703, -0.055415994548797656), u: [0. 0.], noise: [6.12690257 4.63529497]
Start training ...
Step: 70, L: 113.50004577636719, J: 58.34403991699219
Current x: (0.6451185658251711, 0.7549137363080746, -0.05629570517539983), u: [0. 0.], noise: [5.08994544 4.3632918 ]
Start training ...
Step: 70, L: 142.2921142578125, J: 62.3626708984375
Current x: (0.6514549465732467, 0.7593467592818021, -0.05702625503540044), u: [0. 0.], noise: [5.77119637 5.04072093]
Start training ...
Step: 70, L: 110.35997009277344, J: 59.537200927734375
Current x: (0.6578445168834615, 0.763742608441403, -0.057684139537811326), u: [0. 0.], noise: [4.84420775 4.47854057]
Start training ...
Step: 70, L: 109.9979019165039, J: 59.111785888671875
Current x: (0.6642957100968163, 0.7682368917934901, -0.05826897649765019), u: [0. 0.], noise: [5.78090922 4.00226828]
Start training ...
Step: 70, L: 131.98292541503906, J: 63.5586166

Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [4.8266023  4.72862047]
Start training ...
Step: 71, L: 139.2941131591797, J: 63.38145446777344
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.39051864 5.0438751 ]
Start training ...
Step: 71, L: 64.59904479980469, J: 53.044124603271484
Simulation ends in 2 steps
Episode 72 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.35982624 5.75000037]
Start training ...
Step: 72, L: 116.88507080078125, J: 64.75743103027344
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.14713941 4.62698162]
Start training ...
Step: 72, L: 85.86361694335938, J: 63.89154815673828
Current x: (0.0, 0.00012998270416259757, -3.901743888854981e-05), u: [0. 0.], noise: [5.42026745 5.28469414]
Start training ...
Step: 72, L: 153.29873657226562, J: 65.35975646972656
Current x: (0.0, 0.00025637753677368135, -2.601909637451172e-05), u: [0. 0.], noise: [4.30038736 5.06467398]
Start training ...
Step: 72, L: 179.57904052734375, J: 56.24713134765625
Current x: (4.

Step: 72, L: 106.29844665527344, J: 65.53128051757812
Current x: (0.00023552319136138574, 0.0077286548644574776, -0.003785685467720033), u: [0. 0.], noise: [4.2075373  5.17375181]
Start training ...
Step: 72, L: 136.32406616210938, J: 58.53302764892578
Current x: (0.00025980169225287085, 0.00747062850035896, -0.004075083041191102), u: [0. 0.], noise: [4.55414825 4.35592551]
Start training ...
Step: 72, L: 102.18998718261719, J: 65.96430969238281
Current x: (0.00028763164542651394, 0.007169724266756005, -0.004461102080345155), u: [0. 0.], noise: [4.18262285 5.00107374]
Start training ...
Step: 72, L: 161.04013061523438, J: 58.96098327636719
Current x: (0.0003190925177914572, 0.006778820058395081, -0.0048272988557815566), u: [0. 0.], noise: [4.66217644 5.36258369]
Start training ...
Step: 72, L: 106.75780487060547, J: 64.5223388671875
Current x: (0.0003546503174335017, 0.006325276386296219, -0.005275340723991395), u: [0. 0.], noise: [4.65449047 4.04840609]
Start training ...
Step: 72, L:

Step: 89, L: 108.41748046875, J: 67.33480834960938
Simulation ends in 2 steps
Episode 90 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.95460986 4.34935192]
Start training ...
Step: 90, L: 54.82905197143555, J: 53.378971099853516
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.746354   3.33007261]
Start training ...
Step: 90, L: 99.7265396118164, J: 56.205528259277344
Simulation ends in 2 steps
Episode 91 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.6863228  3.55591112]
Start training ...
Step: 91, L: 91.80872344970703, J: 61.9638786315918
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [4.25041247 4.85369063]
Start training ...
Step: 91, L: 188.59320068359375, J: 60.223262786865234
Simulation ends in 2 steps
Episode 92 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [4.42238207 3.6994478 ]
Start training ...
Step: 92, L: 126.55249786376953, J: 59.843807220458984
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [4.40179612 4.37004305]
Start training 

Step: 115, L: 79.11980438232422, J: 62.32787322998047
Simulation ends in 2 steps
Episode 116 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.96692824 3.36091365]
Start training ...
Step: 116, L: 185.83123779296875, J: 58.16896057128906
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.39251407 3.4335058 ]
Start training ...
Step: 116, L: 164.95147705078125, J: 57.5966682434082
Simulation ends in 2 steps
Episode 117 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.15956013 3.68700317]
Start training ...
Step: 117, L: 110.13372039794922, J: 55.98801040649414
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.21506741 3.36458528]
Start training ...
Step: 117, L: 150.26547241210938, J: 62.51195526123047
Simulation ends in 2 steps
Episode 118 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.13451698 3.51456156]
Start training ...
Step: 118, L: 115.9100341796875, J: 60.90052795410156
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.03176787 3.03950454]
Start

Step: 140, L: 92.05743408203125, J: 61.05403518676758
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.1825169  2.18359839]
Start training ...
Step: 140, L: 152.5408172607422, J: 64.424072265625
Simulation ends in 2 steps
Episode 141 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.62098879 2.32156604]
Start training ...
Step: 141, L: 86.36907196044922, J: 58.17655944824219
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.01062418 2.75501164]
Start training ...
Step: 141, L: 135.56076049804688, J: 58.96141052246094
Simulation ends in 2 steps
Episode 142 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.07777887 2.48340465]
Start training ...
Step: 142, L: 44.203739166259766, J: 59.65894317626953
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.21310228 2.08300923]
Start training ...
Step: 142, L: 124.18228149414062, J: 60.494590759277344
Simulation ends in 2 steps
Episode 143 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.35752591 2.01993194]
Start 

Step: 165, L: 45.83522415161133, J: 61.0013313293457
Simulation ends in 2 steps
Episode 166 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.90914453 1.92532206]
Start training ...
Step: 166, L: 107.33642578125, J: 61.029876708984375
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.10988726 1.85767274]
Start training ...
Step: 166, L: 126.35997772216797, J: 63.35297393798828
Simulation ends in 2 steps
Episode 167 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.48515207 1.78341387]
Start training ...
Step: 167, L: 104.81253051757812, J: 59.169166564941406
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.93878283 1.89309383]
Start training ...
Step: 167, L: 84.0440673828125, J: 59.59449005126953
Simulation ends in 2 steps
Episode 168 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [2.02844245 1.90471973]
Start training ...
Step: 168, L: 65.10707092285156, J: 58.39811706542969
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.95772155 1.68286754]
Start tr

Step: 191, L: 60.55767059326172, J: 52.2443962097168
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.49137831 1.26150308]
Start training ...
Step: 191, L: 160.5039825439453, J: 51.57720184326172
Simulation ends in 2 steps
Episode 192 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.52653686 1.33659011]
Start training ...
Step: 192, L: 72.63883209228516, J: 59.29107666015625
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.54231596 1.29153543]
Start training ...
Step: 192, L: 97.11390686035156, J: 60.440460205078125
Simulation ends in 2 steps
Episode 193 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.38143777 1.47510197]
Start training ...
Step: 193, L: 57.523834228515625, J: 58.98699188232422
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.20071205 1.45246479]
Start training ...
Step: 193, L: 109.1611328125, J: 60.732398986816406
Simulation ends in 2 steps
Episode 194 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.27374776 1.3047278 ]
Start tra

Step: 217, L: 76.7884521484375, J: 54.27019500732422
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.24605603 1.26099698]
Start training ...
Step: 217, L: 128.01861572265625, J: 55.96013259887695
Simulation ends in 2 steps
Episode 218 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.14675243 1.19100406]
Start training ...
Step: 218, L: 51.063812255859375, J: 64.37228393554688
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.95992693 1.16285302]
Start training ...
Step: 218, L: 45.82996368408203, J: 61.0617790222168
Simulation ends in 2 steps
Episode 219 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.09492582 1.12351219]
Start training ...
Step: 219, L: 77.48371887207031, J: 62.821388244628906
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.01218003 1.00048461]
Start training ...
Step: 219, L: 112.15364074707031, J: 59.677703857421875
Simulation ends in 2 steps
Episode 220 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.17799886 1.07428812]
Start

Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.92251182 0.71105237]
Start training ...
Step: 244, L: 35.41484832763672, J: 59.42985534667969
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [1.00652379 0.77246168]
Start training ...
Step: 244, L: 68.05953979492188, J: 49.54588317871094
Simulation ends in 2 steps
Episode 245 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.89584512 1.03031372]
Start training ...
Step: 245, L: 125.53692626953125, J: 57.1393928527832
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.95551579 0.88824489]
Start training ...
Step: 245, L: 103.09150695800781, J: 61.34571838378906
Simulation ends in 2 steps
Episode 246 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.83227433 0.87036728]
Start training ...
Step: 246, L: 121.14724731445312, J: 57.01014709472656
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.77145156 0.78884484]
Start training ...
Step: 246, L: 46.80906677246094, J: 62.264808654785156
Simulation ends in 2 steps
Episode 

Step: 269, L: 132.6424560546875, J: 62.545555114746094
Simulation ends in 2 steps
Episode 270 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.74866637 0.70693182]
Start training ...
Step: 270, L: 108.28136444091797, J: 49.40140151977539
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.74868275 0.63741963]
Start training ...
Step: 270, L: 128.6778106689453, J: 65.20379638671875
Simulation ends in 2 steps
Episode 271 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.58852975 0.61594608]
Start training ...
Step: 271, L: 51.03086853027344, J: 55.32532501220703
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.7669432 0.6530535]
Start training ...
Step: 271, L: 102.9771728515625, J: 53.449798583984375
Simulation ends in 2 steps
Episode 272 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.68222615 0.64399253]
Start training ...
Step: 272, L: 65.57077026367188, J: 50.763755798339844
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.74117078 0.71762133]
Start 

Step: 294, L: 63.007057189941406, J: 56.61376190185547
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.45031154 0.57892294]
Start training ...
Step: 294, L: 69.77430725097656, J: 56.01338195800781
Simulation ends in 2 steps
Episode 295 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.55649472 0.49842888]
Start training ...
Step: 295, L: 91.47935485839844, J: 55.13075637817383
Current x: (0.0, 0.0, 0.0), u: [6.809394e-05 0.000000e+00], noise: [0.56415318 0.46434133]
Start training ...
Step: 295, L: 97.75796508789062, J: 58.985679626464844
Simulation ends in 2 steps
Episode 296 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.59952584 0.55801179]
Start training ...
Step: 296, L: 98.20182800292969, J: 59.67595672607422
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.56603671 0.52569553]
Start training ...
Step: 296, L: 91.66000366210938, J: 60.774932861328125
Simulation ends in 2 steps
Episode 297 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [0.556943

Step: 319, L: 105.37013244628906, J: 59.11531066894531
Simulation ends in 2 steps
Episode 320 begins...
Current x: (0.0, 0.0, 0.0), u: [0.01345224 0.        ], noise: [0.47452089 0.36727872]
Start training ...
Step: 320, L: 91.5865249633789, J: 57.9013671875
Current x: (0.0, 0.0, 0.0), u: [0.03361291 0.        ], noise: [0.40485878 0.39675567]
Start training ...
Step: 320, L: 80.25807189941406, J: 54.57395935058594
Simulation ends in 2 steps
Episode 321 begins...
Current x: (0.0, 0.0, 0.0), u: [0.02272386 0.        ], noise: [0.36934592 0.48958797]
Start training ...
Step: 321, L: 72.92108154296875, J: 56.945587158203125
Current x: (0.0, 0.0, 0.0), u: [0.04075079 0.        ], noise: [0.43458568 0.425866  ]
Start training ...
Step: 321, L: 84.86146545410156, J: 57.834022521972656
Simulation ends in 2 steps
Episode 322 begins...
Current x: (0.0, 0.0, 0.0), u: [0.03321211 0.        ], noise: [0.39631038 0.37014153]
Start training ...
Step: 322, L: 118.42058563232422, J: 59.81354522705078


Step: 344, L: 77.79093170166016, J: 57.9583625793457
Current x: (0.0, 0.0, 0.0), u: [0.0880971 0.       ], noise: [0.32862815 0.37598644]
Start training ...
Step: 344, L: 71.80818939208984, J: 55.166969299316406
Simulation ends in 2 steps
Episode 345 begins...
Current x: (0.0, 0.0, 0.0), u: [0.54853994 0.        ], noise: [0.36515966 0.36775032]
Start training ...
Step: 345, L: 104.71878051757812, J: 54.97874069213867
Current x: (0.0, 0.0, 0.0), u: [0.10572086 0.        ], noise: [0.27563806 0.33665937]
Start training ...
Step: 345, L: 207.39324951171875, J: 47.47917556762695
Simulation ends in 2 steps
Episode 346 begins...
Current x: (0.0, 0.0, 0.0), u: [0.59519446 0.        ], noise: [0.32029702 0.33578426]
Start training ...
Step: 346, L: 97.381103515625, J: 57.156005859375
Current x: (0.0, 0.0, 0.0), u: [0.11812251 0.        ], noise: [0.3162661  0.34178892]
Start training ...
Step: 346, L: 58.00559997558594, J: 51.12120056152344
Simulation ends in 2 steps
Episode 347 begins...
Cur

Step: 369, L: 115.80146026611328, J: 51.12013626098633
Simulation ends in 2 steps
Episode 370 begins...
Current x: (0.0, 0.0, 0.0), u: [2.403825 0.      ], noise: [0.26113923 0.26607448]
Start training ...
Step: 370, L: 49.46388244628906, J: 52.27293395996094
Current x: (0.0, 0.0, 0.0), u: [1.2066711 0.       ], noise: [0.26811673 0.211623  ]
Start training ...
Step: 370, L: 118.02303314208984, J: 52.20600128173828
Simulation ends in 2 steps
Episode 371 begins...
Current x: (0.0, 0.0, 0.0), u: [2.5052607 0.       ], noise: [0.24517378 0.24709214]
Start training ...
Step: 371, L: 85.1002197265625, J: 51.813358306884766
Current x: (0.0, 0.0, 0.0), u: [1.2768284 0.       ], noise: [0.23010303 0.26918889]
Start training ...
Step: 371, L: 116.46687316894531, J: 45.58967590332031
Simulation ends in 2 steps
Episode 372 begins...
Current x: (0.0, 0.0, 0.0), u: [2.603515 0.      ], noise: [0.27848109 0.26036273]
Start training ...
Step: 372, L: 117.22591400146484, J: 50.75630187988281
Current x

Step: 394, L: 88.51483154296875, J: 49.184852600097656
Simulation ends in 2 steps
Episode 395 begins...
Current x: (0.0, 0.0, 0.0), u: [4.5722113 0.       ], noise: [0.18108022 0.18339195]
Start training ...
Step: 395, L: 119.6059799194336, J: 59.691802978515625
Current x: (0.0, 0.0, 0.0), u: [2.9871912 0.       ], noise: [0.20208621 0.17236273]
Start training ...
Step: 395, L: 74.8451919555664, J: 55.87400436401367
Simulation ends in 2 steps
Episode 396 begins...
Current x: (0.0, 0.0, 0.0), u: [4.5404267 0.       ], noise: [0.18317499 0.19568013]
Start training ...
Step: 396, L: 79.10062408447266, J: 52.514434814453125
Current x: (0.0, 0.0, 0.0), u: [2.947958 0.      ], noise: [0.21319034 0.21342727]
Start training ...
Step: 396, L: 100.7553482055664, J: 53.80875778198242
Simulation ends in 2 steps
Episode 397 begins...
Current x: (0.0, 0.0, 0.0), u: [4.4979205 0.       ], noise: [0.22926253 0.19849547]
Start training ...
Step: 397, L: 37.59004211425781, J: 51.487281799316406
Current 

Step: 419, L: 125.29872131347656, J: 55.34913635253906
Current x: (0.0, 0.0, 0.0), u: [1.3128695 0.       ], noise: [0.13642908 0.14378843]
Start training ...
Step: 419, L: 65.18637084960938, J: 44.99734115600586
Simulation ends in 2 steps
Episode 420 begins...
Current x: (0.0, 0.0, 0.0), u: [2.9620373 0.       ], noise: [0.14862436 0.15850069]
Start training ...
Step: 420, L: 101.95862579345703, J: 57.75554656982422
Current x: (0.0, 0.0, 0.0), u: [1.2867061 0.       ], noise: [0.16887361 0.16835468]
Start training ...
Step: 420, L: 149.1984100341797, J: 55.18064498901367
Simulation ends in 2 steps
Episode 421 begins...
Current x: (0.0, 0.0, 0.0), u: [2.9336457 0.       ], noise: [0.14036063 0.14122643]
Start training ...
Step: 421, L: 35.12718963623047, J: 43.87255096435547
Current x: (0.0, 0.0, 0.0), u: [1.2543938 0.       ], noise: [0.17827527 0.16618559]
Start training ...
Step: 421, L: 106.57626342773438, J: 46.015541076660156
Simulation ends in 2 steps
Episode 422 begins...
Curre

Step: 443, L: 85.17540740966797, J: 45.806400299072266
Current x: (0.0, 0.0, 0.0), u: [2.116422 0.      ], noise: [0.12695504 0.13663539]
Start training ...
Step: 443, L: 108.67452239990234, J: 54.68781280517578
Simulation ends in 2 steps
Episode 444 begins...
Current x: (0.0, 0.0, 0.0), u: [4.0477066 0.       ], noise: [0.11982307 0.11644468]
Start training ...
Step: 444, L: 67.65065002441406, J: 42.30459976196289
Current x: (0.0, 0.0, 0.0), u: [2.2154841 0.       ], noise: [0.12873146 0.13620837]
Start training ...
Step: 444, L: 77.2166748046875, J: 49.116973876953125
Simulation ends in 2 steps
Episode 445 begins...
Current x: (0.0, 0.0, 0.0), u: [4.1428523 0.       ], noise: [0.13226226 0.12915702]
Start training ...
Step: 445, L: 111.47779846191406, J: 47.844024658203125
Current x: (0.0, 0.0, 0.0), u: [2.315649 0.      ], noise: [0.1143551  0.10829262]
Start training ...
Step: 445, L: 83.18751525878906, J: 54.16103744506836
Simulation ends in 2 steps
Episode 446 begins...
Current x

Current x: (0.0, 0.0, 0.0), u: [3.3589015 0.       ], noise: [0.08535545 0.09125707]
Start training ...
Step: 468, L: 55.891666412353516, J: 52.307952880859375
Simulation ends in 2 steps
Episode 469 begins...
Current x: (0.0, 0.0, 0.0), u: [5.2915244 0.       ], noise: [0.08315102 0.09453443]
Start training ...
Step: 469, L: 75.16444396972656, J: 48.128562927246094
Current x: (0.0, 0.0, 0.0), u: [3.291666 0.      ], noise: [0.09373981 0.1031651 ]
Start training ...
Step: 469, L: 108.78274536132812, J: 53.7185173034668
Simulation ends in 2 steps
Episode 470 begins...
Current x: (0.0, 0.0, 0.0), u: [5.2317123 0.       ], noise: [0.10808264 0.07730325]
Start training ...
Step: 470, L: 60.71327209472656, J: 38.48639678955078
Current x: (0.0, 0.0, 0.0), u: [3.2118108 0.       ], noise: [0.09567226 0.10096742]
Start training ...
Step: 470, L: 122.69335174560547, J: 49.08399963378906
Simulation ends in 2 steps
Episode 471 begins...
Current x: (0.0, 0.0, 0.0), u: [5.159534 0.      ], noise: [0

Step: 494, L: 87.7327880859375, J: 43.761390686035156
Current x: (0.0, 0.0, 0.0), u: [1.0436845 0.       ], noise: [0.07533345 0.08376895]
Start training ...
Step: 494, L: 80.92911529541016, J: 44.05075454711914
Simulation ends in 2 steps
Episode 495 begins...
Current x: (0.0, 0.0, 0.0), u: [3.30092 0.     ], noise: [0.07141247 0.06699529]
Start training ...
Step: 495, L: 70.01504516601562, J: 51.790802001953125
Current x: (0.0, 0.0, 0.0), u: [1.0157975 0.       ], noise: [0.0746089  0.08184984]
Start training ...
Step: 495, L: 118.38230895996094, J: 49.072486877441406
Simulation ends in 2 steps
Episode 496 begins...
Current x: (0.0, 0.0, 0.0), u: [3.2720942 0.       ], noise: [0.06493561 0.07718368]
Start training ...
Step: 496, L: 87.94429016113281, J: 40.98949432373047
Current x: (0.0, 0.0, 0.0), u: [0.9901409 0.       ], noise: [0.07659085 0.07706345]
Start training ...
Step: 496, L: 125.06102752685547, J: 48.19377899169922
Simulation ends in 2 steps
Episode 497 begins...
Current x

Step: 520, L: 131.29669189453125, J: 45.888641357421875
Current x: (0.0, 0.0, 0.0), u: [2.025565 0.      ], noise: [0.05958451 0.05839669]
Start training ...
Step: 520, L: 145.01368713378906, J: 45.84806442260742
Simulation ends in 2 steps
Episode 521 begins...
Current x: (0.0, 0.0, 0.0), u: [4.4868636 0.       ], noise: [0.04969834 0.05203049]
Start training ...
Step: 521, L: 135.35618591308594, J: 32.11308288574219
Current x: (0.0, 0.0, 0.0), u: [2.1272478 0.       ], noise: [0.06112957 0.05661982]
Start training ...
Step: 521, L: 87.71461486816406, J: 44.806121826171875
Simulation ends in 2 steps
Episode 522 begins...
Current x: (0.0, 0.0, 0.0), u: [4.586393 0.      ], noise: [0.06272347 0.05837186]
Start training ...
Step: 522, L: 89.375, J: 40.39655303955078
Current x: (0.0, 0.0, 0.0), u: [2.2417428 0.       ], noise: [0.04762235 0.05838519]
Start training ...
Step: 522, L: 116.46805572509766, J: 41.77940368652344
Simulation ends in 2 steps
Episode 523 begins...
Current x: (0.0, 0

Step: 544, L: 174.67922973632812, J: 30.57427215576172
Simulation ends in 2 steps
Episode 545 begins...
Current x: (0.0, 0.0, 0.0), u: [7.449161 0.      ], noise: [0.04060884 0.04290452]
Start training ...
Step: 545, L: 200.6763916015625, J: 23.76327133178711
Current x: (0.0, 0.0, 0.0), u: [5.84504 0.     ], noise: [0.04198227 0.04743235]
Start training ...
Step: 545, L: 384.5163879394531, J: 30.4106388092041
Simulation ends in 2 steps
Episode 546 begins...
Current x: (0.0, 0.0, 0.0), u: [7.635277 0.      ], noise: [0.0400059  0.04366626]
Start training ...
Step: 546, L: 256.2892761230469, J: 22.92659568786621
Current x: (0.0, 0.0, 0.0), u: [6.1118436 0.       ], noise: [0.04030277 0.04196468]
Start training ...
Step: 546, L: 330.529296875, J: 13.261665344238281
Simulation ends in 2 steps
Episode 547 begins...
Current x: (0.0, 0.0, 0.0), u: [7.8203716 0.       ], noise: [0.03678724 0.03978196]
Start training ...
Step: 547, L: 357.70123291015625, J: 22.64429473876953
Current x: (0.0, 0.

Step: 567, L: 4216.0048828125, J: -60.66731262207031
Simulation ends in 2 steps
Episode 568 begins...
Current x: (0.0, 0.0, 0.0), u: [9.7553625 0.       ], noise: [0.03302359 0.03392185]
Start training ...
Step: 568, L: 3344.36962890625, J: -50.8203239440918
Current x: (0.0, 0.0, 0.0), u: [9.362452 0.      ], noise: [0.03506727 0.0364844 ]
Start training ...
Step: 568, L: 5793.6279296875, J: -56.80870056152344
Current x: (0.0, 1.2308540344237173e-06, 0.0009754464149475098), u: [8.680729 0.      ], noise: [0.03693938 0.03805574]
Start training ...
Step: 568, L: 5362.07861328125, J: -66.56291198730469
Simulation ends in 3 steps
Episode 569 begins...
Current x: (0.0, 0.0, 0.0), u: [9.789115 0.      ], noise: [0.03064148 0.03367162]
Start training ...
Step: 569, L: 4284.109375, J: -61.06787109375
Current x: (0.0, 0.0, 0.0), u: [9.416047 0.      ], noise: [0.03148907 0.03510786]
Start training ...
Step: 569, L: 5395.00439453125, J: -56.31198501586914
Current x: (0.0, 4.3427886962890684e-06,

Step: 580, L: 2338.6279296875, J: -71.14852905273438
Current x: (0.0, 5.138591003417958e-05, 0.0029772375106811525), u: [8.601582 0.      ], noise: [0.03569917 0.0314645 ]
Start training ...
Step: 580, L: 2861.35791015625, J: -71.12801361083984
Current x: (-9.406878471100442e-07, 3.716389721479029e-05, 0.005887803840637208), u: [7.30347 0.     ], noise: [0.03199319 0.03200344]
Start training ...
Step: 580, L: 3706.640380859375, J: -71.03844451904297
Simulation ends in 5 steps
Episode 581 begins...
Current x: (0.0, 0.0, 0.0), u: [10.00837  0.     ], noise: [0.03068166 0.02702773]
Start training ...
Step: 581, L: 1253.0697021484375, J: -71.10773468017578
Current x: (0.0, 0.0, 0.0), u: [9.772464 0.      ], noise: [0.02382488 0.02995133]
Start training ...
Step: 581, L: 2700.47900390625, J: -71.04549407958984
Current x: (0.0, 2.560800933837884e-05, 0.001001202392578125), u: [9.35252 0.     ], noise: [0.02855217 0.02833453]
Start training ...
Step: 581, L: 2429.179443359375, J: -71.07017517

Step: 589, L: 1496.250732421875, J: -70.68341064453125
Simulation ends in 5 steps
Episode 590 begins...
Current x: (0.0, 0.0, 0.0), u: [10.032709  0.      ], noise: [0.02969903 0.02775363]
Start training ...
Step: 590, L: 1305.487548828125, J: -70.67670440673828
Current x: (0.0, 0.0, 0.0), u: [9.813577 0.      ], noise: [0.03050771 0.02679531]
Start training ...
Step: 590, L: 897.51171875, J: -70.66980743408203
Current x: (0.0, 2.8016227722167943e-05, 0.0010034654617309571), u: [9.424968 0.      ], noise: [0.02974718 0.02760277]
Start training ...
Step: 590, L: 672.5388793945312, J: -70.663818359375
Current x: (0.0, 6.212046813964839e-05, 0.0029886598587036133), u: [8.741201 0.      ], noise: [0.02719621 0.03001036]
Start training ...
Step: 590, L: 905.8505859375, J: -70.65786743164062
Current x: (-9.515176937514084e-07, 6.345602359946884e-05, 0.005916565418243409), u: [7.538535 0.      ], noise: [0.02342326 0.02588418]
Start training ...
Step: 590, L: 968.72509765625, J: -70.652351379

Step: 599, L: 400.5101318359375, J: -70.46577453613281
Current x: (0.0, 2.8578800201416037e-05, 0.0010040806770324707), u: [9.451707 0.      ], noise: [0.01962448 0.02697912]
Start training ...
Step: 599, L: 346.52349853515625, J: -70.46229553222656
Current x: (0.0, 6.449549865722659e-05, 0.0029910766601562505), u: [8.779992 0.      ], noise: [0.02642088 0.02556591]
Start training ...
Step: 599, L: 268.3063659667969, J: -70.45867919921875
Current x: (-9.537068979994485e-07, 6.924282259222745e-05, 0.005922507858276368), u: [7.602295 0.      ], noise: [0.0311272  0.02027147]
Start training ...
Step: 599, L: 333.50048828125, J: -70.4547119140625
Simulation ends in 5 steps
Episode 600 begins...
Current x: (0.0, 0.0, 0.0), u: [10.047263  0.      ], noise: [0.02038304 0.02490399]
Start training ...
Step: 600, L: 427.29144287109375, J: -70.45208740234375
Current x: (0.0, 0.0, 0.0), u: [9.830181 0.      ], noise: [0.02637333 0.02728943]
Start training ...
Step: 600, L: 158.44711303710938, J: -

Step: 609, L: 302.0391540527344, J: -70.30733489990234
Current x: (0.0, 0.0, 0.0), u: [9.851385 0.      ], noise: [0.0198249  0.02335936]
Start training ...
Step: 609, L: 72.84927368164062, J: -70.30458068847656
Current x: (0.0, 2.9472011566162028e-05, 0.0010061945915222168), u: [9.48016 0.     ], noise: [0.02293778 0.02278642]
Start training ...
Step: 609, L: 202.08831787109375, J: -70.30133819580078
Current x: (0.0, 6.740096282958963e-05, 0.0029971742630004886), u: [8.828277 0.      ], noise: [0.0217161  0.02139232]
Start training ...
Step: 609, L: 64.33171081542969, J: -70.29830932617188
Current x: (-9.584891015539195e-07, 7.691779934185975e-05, 0.00593618507385254), u: [7.679679 0.      ], noise: [0.02059564 0.0236962 ]
Start training ...
Step: 609, L: 295.32135009765625, J: -70.29450988769531
Simulation ends in 5 steps
Episode 610 begins...
Current x: (0.0, 0.0, 0.0), u: [10.060971  0.      ], noise: [0.02295586 0.02345578]
Start training ...
Step: 610, L: 430.55499267578125, J: -

Step: 616, L: 215.37783813476562, J: -70.16749572753906
Current x: (-4.594231928746461e-06, 6.3049534463105635e-06, 0.00977294216156006), u: [5.7635975 0.       ], noise: [0.01969919 0.02076617]
Start training ...
Step: 616, L: 89.81948852539062, J: -70.16445922851562
Simulation ends in 6 steps
Episode 617 begins...
Current x: (0.0, 0.0, 0.0), u: [10.070922  0.      ], noise: [0.0204993  0.01857815]
Start training ...
Step: 617, L: 123.16788482666016, J: -70.1612777709961
Current x: (0.0, 0.0, 0.0), u: [9.866117 0.      ], noise: [0.0233972 0.0205429]
Start training ...
Step: 617, L: 47.08091735839844, J: -70.15830993652344
Current x: (0.0, 2.9999965667724534e-05, 0.0010072842597961427), u: [9.505976 0.      ], noise: [0.01545628 0.02005137]
Start training ...
Step: 617, L: 92.29864501953125, J: -70.15460205078125
Current x: (0.0, 7.000563812255834e-05, 0.0030014656066894535), u: [8.865592 0.      ], noise: [0.01953776 0.0204112 ]
Start training ...
Step: 617, L: 71.5635986328125, J: -

Step: 624, L: 70.38397216796875, J: -70.02729034423828
Current x: (0.0, 3.0616516113281256e-05, 0.0010080535888671875), u: [9.520697 0.      ], noise: [0.01693498 0.01959145]
Start training ...
Step: 624, L: 50.56751251220703, J: -70.02436828613281
Current x: (0.0, 7.162926864624025e-05, 0.0030039799690246578), u: [8.892195 0.      ], noise: [0.01969553 0.01791279]
Start training ...
Step: 624, L: 220.56520080566406, J: -70.02072143554688
Current x: (-9.634191635672342e-07, 8.736386759081782e-05, 0.005951710414886475), u: [7.7875 0.    ], noise: [0.0238765  0.01789896]
Start training ...
Step: 624, L: 64.59874725341797, J: -70.01824951171875
Current x: (-4.60932919497997e-06, 1.5074726917177058e-05, 0.009788838577270509), u: [5.8387623 0.       ], noise: [0.0213807  0.01891818]
Start training ...
Step: 624, L: 353.1904296875, J: -70.01471710205078
Simulation ends in 6 steps
Episode 625 begins...
Current x: (0.0, 0.0, 0.0), u: [10.079036  0.      ], noise: [0.01952009 0.01817647]
Start 

Step: 631, L: 294.92266845703125, J: -69.88153076171875
Current x: (-4.630107024156556e-06, 3.105017320233855e-05, 0.009804310321807862), u: [5.9672985 0.       ], noise: [0.01880668 0.01681815]
Start training ...
Step: 631, L: 38.28972625732422, J: -69.87844848632812
Simulation ends in 6 steps
Episode 632 begins...
Current x: (0.0, 0.0, 0.0), u: [10.08878  0.     ], noise: [0.01825248 0.01421928]
Start training ...
Step: 632, L: 270.79998779296875, J: -69.87520599365234
Current x: (0.0, 0.0, 0.0), u: [9.892786 0.      ], noise: [0.01834273 0.01984793]
Start training ...
Step: 632, L: 80.20054626464844, J: -69.87199401855469
Current x: (0.0, 3.1125205993652316e-05, 0.001009281349182129), u: [9.548724 0.      ], noise: [0.01756856 0.01927575]
Start training ...
Step: 632, L: 203.2387237548828, J: -69.86870574951172
Current x: (0.0, 7.434809875488265e-05, 0.003007690811157227), u: [8.94127 0.     ], noise: [0.01508857 0.02069883]
Start training ...
Step: 632, L: 323.4912109375, J: -69.86

Step: 639, L: 310.6926574707031, J: -69.74079895019531
Current x: (0.0, 0.0, 0.0), u: [9.908175 0.      ], noise: [0.01517116 0.01503864]
Start training ...
Step: 639, L: 457.91845703125, J: -69.73774719238281
Current x: (0.0, 3.196081542968743e-05, 0.0010098094940185548), u: [9.569566 0.      ], noise: [0.0185363 0.0177916]
Start training ...
Step: 639, L: 48.650733947753906, J: -69.73443603515625
Current x: (0.0, 7.676013183593734e-05, 0.00301044979095459), u: [8.977184 0.      ], noise: [0.01637695 0.02003093]
Start training ...
Step: 639, L: 62.79377746582031, J: -69.73100280761719
Current x: (-9.70012140079111e-07, 0.00010114836735292316, 0.005968121147155762), u: [7.935568 0.      ], noise: [0.01641823 0.01870668]
Start training ...
Step: 639, L: 282.1463317871094, J: -69.727783203125
Current x: (-4.653516726388477e-06, 4.5891695086182634e-05, 0.009823145484924318), u: [6.0953484 0.       ], noise: [0.01604753 0.0169487 ]
Start training ...
Step: 639, L: 36.0828742980957, J: -69.

Step: 646, L: 46.243621826171875, J: -69.6053466796875
Current x: (0.0, 3.253216171264645e-05, 0.0010106106758117676), u: [9.595932 0.      ], noise: [0.01391919 0.01607482]
Start training ...
Step: 646, L: 235.97357177734375, J: -69.60232543945312
Current x: (0.0, 7.952950668334944e-05, 0.0030132948875427248), u: [9.018905 0.      ], noise: [0.01604553 0.01601965]
Start training ...
Step: 646, L: 228.418212890625, J: -69.59909057617188
Current x: (-9.728061942190256e-07, 0.00010811896186597101, 0.005975356674194336), u: [8.005486 0.      ], noise: [0.01706976 0.01334453]
Start training ...
Step: 646, L: 81.35798645019531, J: -69.59605407714844
Current x: (-4.67293244730154e-06, 6.080131568715314e-05, 0.009839311504364014), u: [6.215164 0.      ], noise: [0.01595334 0.01475356]
Start training ...
Step: 646, L: 49.67273712158203, J: -69.59283447265625
Simulation ends in 6 steps
Episode 647 begins...
Current x: (0.0, 0.0, 0.0), u: [10.10757  0.     ], noise: [0.01450062 0.01768657]
Start

Current x: (0.0, 3.378986740112305e-05, 0.0010117520332336425), u: [9.626634 0.      ], noise: [0.01671564 0.01364009]
Start training ...
Step: 653, L: 37.08618927001953, J: -69.46923828125
Current x: (0.0, 8.356269073486328e-05, 0.003017581558227539), u: [9.072809 0.      ], noise: [0.01605808 0.01157783]
Start training ...
Step: 653, L: 154.4825897216797, J: -69.46601867675781
Current x: (-9.77047765237664e-07, 0.00011803402493048288, 0.005986382007598877), u: [8.096479 0.      ], noise: [0.01660611 0.0144907 ]
Start training ...
Step: 653, L: 85.51122283935547, J: -69.463134765625
Current x: (-4.7002248009683345e-06, 8.154569515855402e-05, 0.009862911415100099), u: [6.3765187 0.       ], noise: [0.01358112 0.01477566]
Start training ...
Step: 653, L: 49.908607482910156, J: -69.4600601196289
Simulation ends in 6 steps
Episode 654 begins...
Current x: (0.0, 0.0, 0.0), u: [10.116678  0.      ], noise: [0.01296377 0.01477858]
Start training ...
Step: 654, L: 59.54325866699219, J: -69.45

Step: 660, L: 54.58293914794922, J: -69.32633972167969
Simulation ends in 6 steps
Episode 661 begins...
Current x: (0.0, 0.0, 0.0), u: [10.124722  0.      ], noise: [0.0116281  0.01446495]
Start training ...
Step: 661, L: 119.68390655517578, J: -69.32335662841797
Current x: (0.0, 0.0, 0.0), u: [9.95339 0.     ], noise: [0.01521627 0.01151387]
Start training ...
Step: 661, L: 64.73894500732422, J: -69.32000732421875
Current x: (0.0, 3.40815010070801e-05, 0.0010121884346008302), u: [9.64743 0.     ], noise: [0.0152486  0.01558209]
Start training ...
Step: 661, L: 74.15951538085938, J: -69.31636047363281
Current x: (0.0, 8.517497253417966e-05, 0.0030200860977172856), u: [9.110334 0.      ], noise: [0.0110289  0.01098008]
Start training ...
Step: 661, L: 278.1326904296875, J: -69.31350708007812
Current x: (-9.796221979325897e-07, 0.00012309402860237296, 0.005992693424224854), u: [8.158616 0.      ], noise: [0.01403985 0.01362682]
Start training ...
Step: 661, L: 48.96678924560547, J: -69.3

Current x: (0.0, 0.0, 0.0), u: [9.975998 0.      ], noise: [0.01355284 0.01325506]
Start training ...
Step: 668, L: 101.19281005859375, J: -69.1846923828125
Current x: (0.0, 3.535923385620116e-05, 0.0010137846946716308), u: [9.687648 0.      ], noise: [0.01163448 0.01042024]
Start training ...
Step: 668, L: 63.23970031738281, J: -69.18148803710938
Current x: (0.0, 8.999903869628902e-05, 0.003025198936462402), u: [9.174081 0.      ], noise: [0.01353626 0.0120254 ]
Start training ...
Step: 668, L: 70.67007446289062, J: -69.17776489257812
Current x: (-9.84354608982849e-07, 0.00013460859375054107, 0.006005499458312988), u: [8.273081 0.      ], noise: [0.0143242  0.01237144]
Start training ...
Step: 668, L: 73.97967529296875, J: -69.17487335205078
Current x: (-4.751780035734962e-06, 0.0001181782526372237, 0.009903359127044677), u: [6.6851096 0.       ], noise: [0.0121988  0.01172606]
Start training ...
Step: 668, L: 210.99803161621094, J: -69.17189025878906
Simulation ends in 6 steps
Episod

Step: 675, L: 71.46903991699219, J: -69.05230712890625
Current x: (0.0, 9.322522354125964e-05, 0.0030290276527404785), u: [9.229973 0.      ], noise: [0.01350833 0.01283131]
Start training ...
Step: 675, L: 51.50737762451172, J: -69.04927062988281
Current x: (-9.886698743910327e-07, 0.00014356731226204856, 0.006015180110931396), u: [8.371475 0.      ], noise: [0.01095156 0.01140003]
Start training ...
Step: 675, L: 71.44827270507812, J: -69.04615783691406
Current x: (-4.781098363434202e-06, 0.00013853648704586398, 0.009924397563934326), u: [6.855039 0.      ], noise: [0.01183875 0.01013602]
Start training ...
Step: 675, L: 60.33941650390625, J: -69.0424575805664
Simulation ends in 6 steps
Episode 676 begins...
Current x: (0.0, 0.0, 0.0), u: [10.150412  0.      ], noise: [0.01417279 0.01077212]
Start training ...
Step: 676, L: 161.76095581054688, J: -69.03948974609375
Current x: (0.0, 0.0, 0.0), u: [9.998573 0.      ], noise: [0.01152896 0.01199883]
Start training ...
Step: 676, L: 74.1

Step: 681, L: 32.251487731933594, J: -68.9192886352539
Current x: (-4.809275965294964e-06, 0.0001602086003298596, 0.009946054458618164), u: [7.0235987 0.       ], noise: [0.00932821 0.01127357]
Start training ...
Step: 681, L: 48.76371765136719, J: -68.91616821289062
Current x: (-1.3739425320847198e-05, 3.434766824802384e-05, 0.014713845348358155), u: [4.4619803 0.       ], noise: [0.00988007 0.01090707]
Start training ...
Step: 681, L: 42.664512634277344, J: -68.9132080078125
Simulation ends in 7 steps
Episode 682 begins...
Current x: (0.0, 0.0, 0.0), u: [10.161078  0.      ], noise: [0.00961934 0.01240676]
Start training ...
Step: 682, L: 54.19930648803711, J: -68.91018676757812
Current x: (0.0, 0.0, 0.0), u: [10.015974  0.      ], noise: [0.00978746 0.00946855]
Start training ...
Step: 682, L: 150.86741638183594, J: -68.9073257446289
Current x: (0.0, 3.731045150756826e-05, 0.0010158291816711425), u: [9.752686 0.      ], noise: [0.01180082 0.00965309]
Start training ...
Step: 682, L:

Current x: (-1.3852683935235563e-05, 7.093919181028142e-05, 0.014752653884887694), u: [4.7429905 0.       ], noise: [0.00813459 0.01160912]
Start training ...
Step: 687, L: 171.6717071533203, J: -68.78404235839844
Simulation ends in 7 steps
Episode 688 begins...
Current x: (0.0, 0.0, 0.0), u: [10.171641  0.      ], noise: [0.01239176 0.00922041]
Start training ...
Step: 688, L: 56.38114929199219, J: -68.78089141845703
Current x: (0.0, 0.0, 0.0), u: [10.035493  0.      ], noise: [0.00915848 0.00822678]
Start training ...
Step: 688, L: 48.47077941894531, J: -68.77825164794922
Current x: (0.0, 3.832535171508791e-05, 0.0010174813270568848), u: [9.786722 0.      ], noise: [0.00963296 0.00906417]
Start training ...
Step: 688, L: 65.08477783203125, J: -68.77490234375
Current x: (0.0, 0.00010093844604492182, 0.0030386051177978515), u: [9.346269 0.      ], noise: [0.0117978  0.00890306]
Start training ...
Step: 688, L: 65.14926147460938, J: -68.77203369140625
Current x: (-9.976828982664325e-07,

Step: 694, L: 40.46168899536133, J: -68.63575744628906
Current x: (-4.883963595701119e-06, 0.00021351774150458596, 0.010002272415161134), u: [7.4810624 0.       ], noise: [0.00932546 0.00954476]
Start training ...
Step: 694, L: 60.289512634277344, J: -68.6328125
Current x: (-1.405759968574887e-05, 0.00014313324196534374, 0.014826349353790284), u: [5.273851 0.      ], noise: [0.00877485 0.00985593]
Start training ...
Step: 694, L: 54.34196472167969, J: -68.62998962402344
Simulation ends in 7 steps
Episode 695 begins...
Current x: (0.0, 0.0, 0.0), u: [10.186392  0.      ], noise: [0.00803357 0.00947182]
Start training ...
Step: 695, L: 224.3240509033203, J: -68.62666320800781
Current x: (0.0, 0.0, 0.0), u: [10.061204  0.      ], noise: [0.00868835 0.01015663]
Start training ...
Step: 695, L: 44.99877166748047, J: -68.6238021850586
Current x: (0.0, 3.9389747619628794e-05, 0.0010184953689575196), u: [9.835305 0.      ], noise: [0.01131729 0.01098032]
Start training ...
Step: 695, L: 36.718

Step: 700, L: 32.58926010131836, J: -68.50361633300781
Simulation ends in 7 steps
Episode 701 begins...
Current x: (0.0, 0.0, 0.0), u: [10.199138  0.      ], noise: [0.00917132 0.0100356 ]
Start training ...
Step: 701, L: 61.42692947387695, J: -68.50102233886719
Current x: (0.0, 0.0, 0.0), u: [10.087467  0.      ], noise: [0.00903723 0.00937443]
Start training ...
Step: 701, L: 60.51899337768555, J: -68.49830627441406
Current x: (0.0, 4.0834468841552674e-05, 0.0010198273658752442), u: [9.883108 0.      ], noise: [0.00609871 0.00998321]
Start training ...
Step: 701, L: 36.266273498535156, J: -68.495361328125
Current x: (0.0, 0.00011125682067871081, 0.003048367691040039), u: [9.516563 0.      ], noise: [0.00746327 0.00819144]
Start training ...
Step: 701, L: 35.0282096862793, J: -68.49252319335938
Current x: (-1.0095463060062717e-06, 0.0001905976526379137, 0.00606483039855957), u: [8.868673 0.      ], noise: [0.00874085 0.00728949]
Start training ...
Step: 701, L: 36.6829833984375, J: -6

Step: 707, L: 51.34492492675781, J: -68.36461639404297
Current x: (0.0, 0.00011676181030273431, 0.0030542951583862306), u: [9.610786 0.      ], noise: [0.00966615 0.00826494]
Start training ...
Step: 707, L: 46.12030029296875, J: -68.36236572265625
Current x: (-1.0164147132621552e-06, 0.00020550319644679544, 0.006080905151367187), u: [9.036965 0.      ], noise: [0.00742008 0.0084712 ]
Start training ...
Step: 707, L: 40.84456253051758, J: -68.35943603515625
Current x: (-4.973719354560034e-06, 0.00027611183366296586, 0.010068733978271485), u: [8.0242605 0.       ], noise: [0.00698525 0.00789099]
Start training ...
Step: 707, L: 29.009681701660156, J: -68.35617065429688
Current x: (-1.4435946788625178e-05, 0.0002709894732695578, 0.014960154247283936), u: [6.234293 0.      ], noise: [0.00889931 0.01032548]
Start training ...
Step: 707, L: 28.87570571899414, J: -68.35333251953125
Current x: (-3.1992430528531284e-05, 8.874005173480345e-05, 0.020653910064697266), u: [3.0535338 0.       ], no

Step: 712, L: 45.36482238769531, J: -68.23486328125
Current x: (-3.273715481708659e-05, 0.00023013718129261004, 0.020799011421203613), u: [4.0871997 0.       ], noise: [0.00812361 0.00956366]
Start training ...
Step: 712, L: 35.06810760498047, J: -68.2319107055664
Simulation ends in 8 steps
Episode 713 begins...
Current x: (0.0, 0.0, 0.0), u: [10.232431  0.      ], noise: [0.00807774 0.00946443]
Start training ...
Step: 713, L: 48.35997772216797, J: -68.22892761230469
Current x: (0.0, 0.0, 0.0), u: [10.157397  0.      ], noise: [0.00922649 0.00872752]
Start training ...
Step: 713, L: 37.033592224121094, J: -68.2261962890625
Current x: (0.0, 4.399732971191409e-05, 0.0010231044769287109), u: [10.017426  0.      ], noise: [0.00561508 0.00857034]
Start training ...
Step: 713, L: 51.7486457824707, J: -68.22344207763672
Current x: (0.0, 0.00012452977371215827, 0.003061998653411865), u: [9.76112 0.     ], noise: [0.00788411 0.00898136]
Start training ...
Step: 713, L: 44.541927337646484, J: -

Step: 718, L: 26.75943374633789, J: -68.10160064697266
Current x: (0.0, 0.0, 0.0), u: [10.191417  0.      ], noise: [0.0074827  0.00675933]
Start training ...
Step: 718, L: 65.17472076416016, J: -68.0992431640625
Current x: (0.0, 4.50971069335937e-05, 0.001024456787109375), u: [10.085623  0.      ], noise: [0.0066888  0.00816937]
Start training ...
Step: 718, L: 57.864044189453125, J: -68.09648132324219
Current x: (0.0, 0.00012976010513305643, 0.003068127536773682), u: [9.891151 0.      ], noise: [0.00830091 0.0074275 ]
Start training ...
Step: 718, L: 41.223846435546875, J: -68.09304809570312
Current x: (-1.0347504537688094e-06, 0.00024347067663642958, 0.0061202125549316404), u: [9.542291 0.      ], noise: [0.00789919 0.00746327]
Start training ...
Step: 718, L: 33.31431579589844, J: -68.09046936035156
Current x: (-5.109053095524513e-06, 0.000366864527774144, 0.010161500072479249), u: [8.923549 0.      ], noise: [0.00710294 0.00701232]
Start training ...
Step: 718, L: 44.8168869018554

Step: 720, L: 40.3197021484375, J: -67.96701049804688
Current x: (-0.08577255507040328, 0.2924632513733109, 0.701077174282074), u: [37.073242  0.      ], noise: [0.00660472 0.00749417]
Start training ...
Step: 720, L: 24.83544921875, J: -67.96430969238281
Current x: (-0.10280764419215142, 0.3300185661824337, 0.7704649275779724), u: [37.074112  0.      ], noise: [0.00769059 0.00680807]
Start training ...
Step: 720, L: 40.32665252685547, J: -67.96133422851562
Current x: (-0.1222350195601618, 0.36942690205170153, 0.8435599158287048), u: [37.07388  0.     ], noise: [0.00814103 0.00822736]
Start training ...
Step: 720, L: 33.52764129638672, J: -67.95099639892578
Current x: (-0.14424550152850976, 0.410515668250231, 0.9203624033927917), u: [37.07258  0.     ], noise: [0.00835925 0.00830213]
Start training ...
Step: 720, L: 38.2832145690918, J: -67.95526123046875
Current x: (-0.16902667884106723, 0.4530892228331599, 1.0008722701072692), u: [37.069912  0.      ], noise: [0.00717549 0.00678803]


Step: 721, L: 149.37330627441406, J: -67.83942413330078
Current x: (-0.24313518128746464, 0.695453834356353, 1.2684883680343628), u: [37.07629  0.     ], noise: [0.00878722 0.00650913]
Start training ...
Step: 721, L: 42.8768424987793, J: -67.83609008789062
Current x: (-0.27948956858633045, 0.7467462577271327, 1.3657425256729125), u: [37.068806  0.      ], noise: [0.00808841 0.00848349]
Start training ...
Step: 721, L: 105.18136596679688, J: -67.83366394042969
Current x: (-0.3193849112804154, 0.7981619877583548, 1.4667045404434202), u: [37.056473  0.      ], noise: [0.00785632 0.00725901]
Start training ...
Step: 721, L: 174.59808349609375, J: -67.83006286621094
Simulation ends in 36 steps
Episode 722 begins...
Current x: (0.0, 0.0, 0.0), u: [10.347453  0.      ], noise: [0.00773378 0.00706202]
Start training ...
Step: 722, L: 183.3018798828125, J: -67.82732391357422
Current x: (0.0, 0.0, 0.0), u: [10.482825  0.      ], noise: [0.00798696 0.00749457]
Start training ...
Step: 722, L: 41

Step: 723, L: 44.72834777832031, J: -67.7069320678711
Current x: (0.0, 5.616764450073239e-05, 0.0010359356880187988), u: [10.734605  0.      ], noise: [0.00760386 0.00765011]
Start training ...
Step: 723, L: 25.69426155090332, J: -67.70414733886719
Current x: (0.0, 0.00018290818405151358, 0.0031220705986022947), u: [11.116853  0.      ], noise: [0.00777174 0.00731421]
Start training ...
Step: 723, L: 30.1566162109375, J: -67.70159912109375
Current x: (-1.113616043848473e-06, 0.00040363403240505915, 0.006281661319732666), u: [11.745971  0.      ], noise: [0.00751486 0.00668351]
Start training ...
Step: 723, L: 26.98023796081543, J: -67.69857025146484
Current x: (-5.702696366840243e-06, 0.0007565483488643939, 0.010552982997894287), u: [12.776257  0.      ], noise: [0.00707462 0.00726461]
Start training ...
Step: 723, L: 60.46178436279297, J: -67.69562530517578
Current x: (-1.7679067997202837e-05, 0.0013044563659399852, 0.015998984909057615), u: [14.43608  0.     ], noise: [0.00866561 0.0

Step: 724, L: 40.40480422973633, J: -67.58297729492188
Current x: (-1.783467672446997e-05, 0.0013582043556773335, 0.016051446437835692), u: [14.747796  0.      ], noise: [0.00757747 0.00571336]
Start training ...
Step: 724, L: 59.513633728027344, J: -67.57861328125
Current x: (-4.3666236789991584e-05, 0.002249631070966834, 0.02282028646469116), u: [17.484123  0.      ], noise: [0.0067642  0.00756812]
Start training ...
Step: 724, L: 195.285888671875, J: -67.57594299316406
Current x: (-9.319045983149449e-05, 0.0036359763731784412, 0.031064092540740964), u: [21.362282  0.      ], noise: [0.00675847 0.00729047]
Start training ...
Step: 724, L: 21.40565299987793, J: -67.57289123535156
Current x: (-0.00018264319278512366, 0.0057907115703613125, 0.04105623044967651), u: [26.088459  0.      ], noise: [0.00725572 0.00891014]
Start training ...
Step: 724, L: 29.242782592773438, J: -67.56957244873047
Current x: (-0.00033848887411708076, 0.009101048408464412, 0.05318454332351684), u: [30.62158  0

Step: 725, L: 348.857666015625, J: -67.44401550292969
Current x: (-0.001723204057105019, 0.03167200471035459, 0.10811496400833129), u: [36.45594  0.     ], noise: [0.0073666 0.0064288]
Start training ...
Step: 725, L: 442.6561584472656, J: -67.44029235839844
Current x: (-0.002703216149002454, 0.04396337934516348, 0.13317176265716552), u: [36.77965  0.     ], noise: [0.0058406  0.00737445]
Start training ...
Step: 725, L: 38.63040542602539, J: -67.43673706054688
Current x: (-0.004076752952508095, 0.05889943369853936, 0.1618742491722107), u: [36.928093  0.      ], noise: [0.00867616 0.00699404]
Start training ...
Step: 725, L: 98.244140625, J: -67.43319702148438
Current x: (-0.005938819850675751, 0.07650119726021802, 0.19425454750061033), u: [37.000156  0.      ], noise: [0.0068926  0.00719569]
Start training ...
Step: 725, L: 219.28170776367188, J: -67.429443359375
Current x: (-0.008396302809170957, 0.09676804004265553, 0.2303278233528137), u: [37.037434  0.      ], noise: [0.00687822 0

Step: 726, L: 113.30364227294922, J: -67.29536437988281
Current x: (-0.015927586712381422, 0.14943473547207156, 0.317799956035614), u: [37.0718  0.    ], noise: [0.0063426  0.00601595]
Start training ...
Step: 726, L: 30.631912231445312, J: -67.29151916503906
Current x: (-0.0210219692068455, 0.17799079248071956, 0.3654144855499267), u: [37.078472  0.      ], noise: [0.00725675 0.00832715]
Start training ...
Step: 726, L: 301.0556945800781, J: -67.28819274902344
Current x: (-0.027275148039861415, 0.20908856721356278, 0.4167362278938293), u: [37.08273  0.     ], noise: [0.00676028 0.00779139]
Start training ...
Step: 726, L: 77.13972473144531, J: -67.284423828125
Current x: (-0.034853832652127, 0.24266983667479206, 0.47176571025848385), u: [37.085514  0.      ], noise: [0.00787541 0.0066529 ]
Start training ...
Step: 726, L: 76.39637756347656, J: -67.28070068359375
Current x: (-0.04393413412732687, 0.27866233660998385, 0.5305033625602722), u: [37.087345  0.      ], noise: [0.00773925 0.0

Step: 727, L: 27.144880294799805, J: -67.13460540771484
Current x: (-0.10012861287465255, 0.44987496795994125, 0.8079684648513794), u: [37.089767  0.      ], noise: [0.00683622 0.00572302]
Start training ...
Step: 727, L: 59.90443420410156, J: -67.1312484741211
Current x: (-0.11975401234197454, 0.4964755992006205, 0.8855538763999939), u: [37.08935  0.     ], noise: [0.00767396 0.00723418]
Start training ...
Step: 727, L: 340.3197326660156, J: -67.12733459472656
Current x: (-0.1420614830407604, 0.5446588839747368, 0.966848376083374), u: [37.088516  0.      ], noise: [0.00775469 0.00627897]
Start training ...
Step: 727, L: 37.429237365722656, J: -67.12367248535156
Current x: (-0.1672418090339086, 0.5942093517019862, 1.0518518551826477), u: [37.087128  0.      ], noise: [0.00755184 0.00774836]
Start training ...
Step: 727, L: 211.19439697265625, J: -67.12007141113281
Current x: (-0.1954760459892475, 0.6448858600970807, 1.1405643335342406), u: [37.084976  0.      ], noise: [0.00640066 0.00

Step: 728, L: 67.9712905883789, J: -66.97731018066406
Simulation ends in 36 steps
Episode 729 begins...
Current x: (0.0, 0.0, 0.0), u: [10.435318  0.      ], noise: [0.00732177 0.00642243]
Start training ...
Step: 729, L: 92.86766052246094, J: -66.97413635253906
Current x: (0.0, 0.0, 0.0), u: [10.648453  0.      ], noise: [0.00677413 0.00720372]
Start training ...
Step: 729, L: 67.54750061035156, J: -66.97026062011719
Current x: (0.0, 6.390613937377929e-05, 0.0010436217308044434), u: [10.999908  0.      ], noise: [0.00612392 0.00683362]
Start training ...
Step: 729, L: 508.0286865234375, J: -66.96620178222656
Current x: (0.0, 0.0002130553550720214, 0.003152045726776123), u: [11.580711  0.      ], noise: [0.00677191 0.00607882]
Start training ...
Step: 729, L: 36.208580017089844, J: -66.96221923828125
Current x: (-1.1493264210813323e-06, 0.00048249057305945836, 0.0063603895187377925), u: [12.537928  0.      ], noise: [0.00681071 0.00653273]
Start training ...
Step: 729, L: 105.084716796

Step: 730, L: 76.27259826660156, J: -66.8167724609375
Current x: (-4.79523802300518e-05, 0.0030371182853826772, 0.023608749294281008), u: [21.189302  0.      ], noise: [0.00793217 0.00542883]
Start training ...
Step: 730, L: 310.59619140625, J: -66.81167602539062
Current x: (-0.00010529966130923393, 0.005051723812191865, 0.032481089019775394), u: [25.965372  0.      ], noise: [0.00787767 0.00661374]
Start training ...
Step: 730, L: 28.977508544921875, J: -66.80763244628906
Current x: (-0.00021269912897201894, 0.00820500481895732, 0.043472609424591066), u: [30.57673  0.     ], noise: [0.0063258  0.00791358]
Start training ...
Step: 730, L: 318.6879577636719, J: -66.80342102050781
Current x: (-0.0004044691872709679, 0.012973901892957884, 0.05706079330444336), u: [33.849777  0.      ], noise: [0.00645389 0.00617544]
Start training ...
Step: 730, L: 358.8699951171875, J: -66.79824829101562
Current x: (-0.0007291842949646128, 0.01981800587697171, 0.07370649156570434), u: [35.608368  0.     

Step: 731, L: 68.03089141845703, J: -66.63928985595703
Current x: (-0.003201626074976179, 0.05669883252659582, 0.14592541885375976), u: [36.93487  0.     ], noise: [0.00599153 0.00585343]
Start training ...
Step: 731, L: 28.99655532836914, J: -66.63484954833984
Current x: (-0.004774057143233868, 0.0740717866146604, 0.17707647161483764), u: [37.005547  0.      ], noise: [0.00637309 0.00632599]
Start training ...
Step: 731, L: 107.33362579345703, J: -66.62968444824219
Current x: (-0.0068837233015699, 0.09411914449955314, 0.2119210256576538), u: [37.04171  0.     ], noise: [0.00678494 0.00645992]
Start training ...
Step: 731, L: 23.821834564208984, J: -66.62551879882812
Current x: (-0.009645475177116128, 0.1168294410999378, 0.2504661393165588), u: [37.06142  0.     ], noise: [0.00702167 0.00625709]
Start training ...
Step: 731, L: 504.8127136230469, J: -66.6209716796875
Current x: (-0.013186634802232977, 0.1421813362305532, 0.29271545677185057), u: [37.072807  0.      ], noise: [0.0058681

Step: 732, L: 336.8664245605469, J: -66.47565460205078
Current x: (-0.030376959012481896, 0.2376728230714374, 0.44570840473175055), u: [37.08764  0.     ], noise: [0.00536037 0.00637844]
Start training ...
Step: 732, L: 318.98468017578125, J: -66.47232055664062
Current x: (-0.03864003490298312, 0.273471691099654, 0.5031053502082825), u: [37.089428  0.      ], noise: [0.00588616 0.00719845]
Start training ...
Step: 732, L: 780.8004760742188, J: -66.46682739257812
Current x: (-0.04850245448197165, 0.3116370562922315, 0.5642109577178955), u: [37.0906  0.    ], noise: [0.00733922 0.0072376 ]
Start training ...
Step: 732, L: 457.1396179199219, J: -66.46339416503906
Current x: (-0.060153765846352875, 0.3520719338158287, 0.6290253767967224), u: [37.09132  0.     ], noise: [0.00652003 0.00662355]
Start training ...
Step: 732, L: 128.7823486328125, J: -66.45968627929688
Current x: (-0.07378927359995469, 0.3946612387138676, 0.6975488660812378), u: [37.091713  0.      ], noise: [0.00755959 0.0059

Step: 733, L: 73.42493438720703, J: -66.27711486816406
Current x: (-0.2432146272673365, 0.7443605397646508, 1.2872928904533383), u: [37.08316  0.     ], noise: [0.00611531 0.00712091]
Start training ...
Step: 733, L: 611.9785766601562, J: -66.27205657958984
Current x: (-0.27982324745328835, 0.7974752130825382, 1.3857507161140439), u: [37.077538  0.      ], noise: [0.00716753 0.00738695]
Start training ...
Step: 733, L: 199.32032775878906, J: -66.26762390136719
Current x: (-0.3199934235608331, 0.850646550323804, 1.4879167571067806), u: [37.06834  0.     ], noise: [0.00615658 0.00649285]
Start training ...
Step: 733, L: 324.1277160644531, J: -66.26240539550781
Simulation ends in 35 steps
Episode 734 begins...
Current x: (0.0, 0.0, 0.0), u: [10.513044  0.      ], noise: [0.00628204 0.00744038]
Start training ...
Step: 734, L: 588.0753784179688, J: -66.25847625732422
Current x: (0.0, 0.0, 0.0), u: [10.803608  0.      ], noise: [0.00694025 0.00747863]
Start training ...
Step: 734, L: 39.962

Step: 735, L: 346.39642333984375, J: -66.07919311523438
Current x: (-5.133113024171047e-05, 0.0036503588853740664, 0.024224441623687747), u: [23.583698  0.      ], noise: [0.00570451 0.006366  ]
Start training ...
Step: 735, L: 240.73519897460938, J: -66.07559204101562
Current x: (-0.00011464648777388002, 0.006131571426091341, 0.03356405019760132), u: [28.486105  0.      ], noise: [0.0072699  0.00623452]
Start training ...
Step: 735, L: 167.95164489746094, J: -66.07229614257812
Current x: (-0.00023511569006051035, 0.009990668661186058, 0.04526196241378785), u: [32.525017  0.      ], noise: [0.0067628  0.00531264]
Start training ...
Step: 735, L: 147.9530029296875, J: -66.06581115722656
Current x: (-0.00045122317026432304, 0.01571712183169545, 0.05980858869552613), u: [34.96209  0.     ], noise: [0.0059056  0.00624506]
Start training ...
Step: 735, L: 91.17752838134766, J: -66.0640869140625
Current x: (-0.000814549644963486, 0.023712952134620626, 0.07760786161422731), u: [36.135094  0. 

Step: 736, L: 244.6095733642578, J: -65.87301635742188
Current x: (-0.014284207199915504, 0.15692887773301226, 0.3075695052146912), u: [37.0808  0.    ], noise: [0.00588906 0.00647851]
Start training ...
Step: 736, L: 684.197509765625, J: -65.86756134033203
Current x: (-0.01904017994915462, 0.1863579881606476, 0.3550362199783325), u: [37.08582  0.     ], noise: [0.0051191  0.00504915]
Start training ...
Step: 736, L: 101.63803100585938, J: -65.86575317382812
Current x: (-0.024919122700224377, 0.21834134515318537, 0.4062109559059143), u: [37.089046  0.      ], noise: [0.00569825 0.00642526]
Start training ...
Step: 736, L: 744.574951171875, J: -65.8562240600586
Current x: (-0.03208761227855367, 0.25282194784547185, 0.4610942806243896), u: [37.091194  0.      ], noise: [0.00554915 0.00608598]
Start training ...
Step: 736, L: 540.0811767578125, J: -65.85777282714844
Current x: (-0.040722085801156216, 0.2897297551856278, 0.519686437511444), u: [37.092636  0.      ], noise: [0.00481328 0.00

Step: 737, L: 967.3565673828125, J: -65.66157531738281
Current x: (-0.11350819622459081, 0.511243384124577, 0.8727111250877381), u: [37.095734  0.      ], noise: [0.00741307 0.00663129]
Start training ...
Step: 737, L: 338.2529602050781, J: -65.65933227539062
Current x: (-0.1351340542688839, 0.5604380892314196, 0.9537912610054017), u: [37.0952  0.    ], noise: [0.00612497 0.00618405]
Start training ...
Step: 737, L: 401.24713134765625, J: -65.6626205444336
Current x: (-0.1596027969616846, 0.6110370325410326, 1.038581048488617), u: [37.09427  0.     ], noise: [0.00637073 0.00598371]
Start training ...
Step: 737, L: 328.9634704589844, J: -65.65139770507812
Current x: (-0.18709808367380515, 0.6628019979734789, 1.1270803502082827), u: [37.0928  0.    ], noise: [0.00762954 0.00670358]
Start training ...
Step: 737, L: 888.041748046875, J: -65.63994598388672
Current x: (-0.2177907927806477, 0.7154689147925021, 1.2192891173362734), u: [37.090538  0.      ], noise: [0.00612876 0.00600923]
Start

Step: 738, L: 646.180419921875, J: -65.41506958007812
Current x: (-0.33333140052122445, 0.8826218073877379, 1.5262867759704588), u: [37.074165  0.      ], noise: [0.00661452 0.00591681]
Start training ...
Step: 738, L: 171.21815490722656, J: -65.43151092529297
Simulation ends in 35 steps
Episode 739 begins...
Current x: (0.0, 0.0, 0.0), u: [10.61101  0.     ], noise: [0.00638693 0.00686797]
Start training ...
Step: 739, L: 368.8752136230469, J: -65.41744232177734
Current x: (0.0, 0.0, 0.0), u: [10.998273  0.      ], noise: [0.0048791 0.0059099]
Start training ...
Step: 739, L: 454.0462341308594, J: -65.40478515625
Current x: (0.0, 8.142647171020507e-05, 0.0010610527992248534), u: [11.643031  0.      ], noise: [0.0062593  0.00628242]
Start training ...
Step: 739, L: 703.4669189453125, J: -65.40361022949219
Current x: (0.0, 0.00028275912475585934, 0.0032218297958374028), u: [12.7134075  0.       ], noise: [0.00653444 0.00594825]
Start training ...
Step: 739, L: 324.26092529296875, J: -65

Step: 740, L: 791.17919921875, J: -65.16322326660156
Current x: (-2.1878096733191884e-05, 0.002642952178738751, 0.017339663887023925), u: [21.842491  0.      ], noise: [0.00686261 0.00701825]
Start training ...
Step: 740, L: 561.9151000976562, J: -65.15523529052734
Current x: (-5.696108021586627e-05, 0.00465535775124595, 0.02523090839385986), u: [26.77657  0.     ], noise: [0.00691774 0.00672134]
Start training ...
Step: 740, L: 419.5876770019531, J: -65.14437866210938
Current x: (-0.00012994038049438973, 0.007872072044148151, 0.03530638637542724), u: [31.311272  0.      ], noise: [0.00607987 0.00578507]
Start training ...
Step: 740, L: 781.767578125, J: -65.14091491699219
Current x: (-0.0002705066397283778, 0.012785954533282821, 0.04805954093933105), u: [34.33324  0.     ], noise: [0.00528465 0.00602941]
Start training ...
Step: 740, L: 494.4953308105469, J: -65.13468933105469
Current x: (-0.000521640603168194, 0.019849198674924597, 0.06394385223388671), u: [35.86824  0.     ], noise:

Step: 741, L: 1211.09326171875, J: -64.81254577636719
Current x: (-0.005869034781900838, 0.09733334225571104, 0.20040031385421753), u: [37.06983  0.     ], noise: [0.00605454 0.00582107]
Start training ...
Step: 741, L: 479.7921142578125, J: -64.83439636230469
Current x: (-0.008356318101387615, 0.12060225863830701, 0.23850124597549438), u: [37.085117  0.      ], noise: [0.00636133 0.00628064]
Start training ...
Step: 741, L: 388.23406982421875, J: -64.82231903076172
Current x: (-0.011581755995557661, 0.14652413398665312, 0.280309184551239), u: [37.094063  0.      ], noise: [0.00568403 0.00592283]
Start training ...
Step: 741, L: 256.4731140136719, J: -64.80476379394531
Current x: (-0.015683615702553937, 0.17506977315768682, 0.3258256432533264), u: [37.09959  0.     ], noise: [0.00601335 0.0050091 ]
Start training ...
Step: 741, L: 391.2453918457031, J: -64.79686737060547
Current x: (-0.02081201410688934, 0.20620015579309708, 0.3750514842033386), u: [37.103172  0.      ], noise: [0.0058

Step: 742, L: 829.97021484375, J: -64.5340805053711
Current x: (-0.03539916518892405, 0.2809245819016414, 0.48963631734848023), u: [37.114334  0.      ], noise: [0.00615818 0.00463984]
Start training ...
Step: 742, L: 1760.4344482421875, J: -64.48155212402344
Current x: (-0.04474186944160211, 0.31979558031918653, 0.5503603468894959), u: [37.115505  0.      ], noise: [0.00492104 0.00606936]
Start training ...
Step: 742, L: 913.8568725585938, J: -64.49234008789062
Current x: (-0.055830586987454314, 0.36096188612955366, 0.6147959616661072), u: [37.116364  0.      ], noise: [0.00600633 0.00592355]
Start training ...
Step: 742, L: 308.98785400390625, J: -64.54280090332031
Current x: (-0.06886099944818963, 0.4043116172330292, 0.6829430121421813), u: [37.116947  0.      ], noise: [0.00587529 0.00614828]
Start training ...
Step: 742, L: 745.6389770507812, J: -64.47952270507812
Current x: (-0.08403294178345647, 0.44971332597567526, 0.7548017073631286), u: [37.117317  0.      ], noise: [0.006191

Step: 743, L: 1663.968505859375, J: -64.11798095703125
Current x: (-0.10261132641331731, 0.5019137662146258, 0.8355312218666079), u: [37.128963  0.      ], noise: [0.00581759 0.00589858]
Start training ...
Step: 743, L: 824.8684692382812, J: -64.14537048339844
Current x: (-0.1228340820807255, 0.5511496154178673, 0.9150966210365298), u: [37.12905  0.     ], noise: [0.00657912 0.00510375]
Start training ...
Step: 743, L: 482.2423095703125, J: -64.20620727539062
Current x: (-0.14581138712370934, 0.6018958011423194, 0.9983749085426333), u: [37.128845  0.      ], noise: [0.00676233 0.00578486]
Start training ...
Step: 743, L: 164.06484985351562, J: -64.16891479492188
Current x: (-0.17173254673159294, 0.6539255094940127, 1.0853662487983706), u: [37.128277  0.      ], noise: [0.00509009 0.00642678]
Start training ...
Step: 743, L: 653.3634643554688, J: -64.08143615722656
Current x: (-0.20077578139845342, 0.7069860521183207, 1.176070571613312), u: [37.12719  0.     ], noise: [0.00548476 0.0057

Step: 745, L: 696.27294921875, J: -63.77245330810547
Current x: (0.0, 0.0, 0.0), u: [11.448616  0.      ], noise: [0.0052122  0.00641463]
Start training ...
Step: 745, L: 1191.7708740234375, J: -63.64805603027344
Current x: (0.0, 0.00010422949218749991, 0.0010838924407958984), u: [12.469097  0.      ], noise: [0.00675491 0.0046795 ]
Start training ...
Step: 745, L: 1146.5709228515625, J: -63.62847900390625
Current x: (0.0, 0.00037348321151733395, 0.0033125262260437012), u: [14.149957  0.      ], noise: [0.00643925 0.00601775]
Start training ...
Step: 745, L: 645.3270874023438, J: -63.69202423095703
Current x: (-1.3527551309873315e-06, 0.0009097893669770538, 0.006788277244567872), u: [16.813719  0.      ], noise: [0.00622671 0.0056999 ]
Start training ...
Step: 745, L: 90.45609283447266, J: -63.78383255004883
Current x: (-7.396838328888919e-06, 0.001881329112066734, 0.01167906608581543), u: [20.715769  0.      ], noise: [0.0063445  0.00664509]
Start training ...
Step: 745, L: 180.244216

Step: 746, L: 453.68359375, J: -63.194801330566406
Current x: (-0.004636088078677759, 0.0923119114458602, 0.18161188488006594), u: [37.14247  0.     ], noise: [0.00615698 0.00620089]
Start training ...
Step: 746, L: 1052.8162841796875, J: -63.080345153808594
Current x: (-0.006750610343053935, 0.11510411308017833, 0.2182251452445984), u: [37.157875  0.      ], noise: [0.00448893 0.00539362]
Start training ...
Step: 746, L: 130.7188262939453, J: -63.17449188232422
Current x: (-0.00953620526601899, 0.1405696923774109, 0.2585526481628418), u: [37.16698  0.     ], noise: [0.00469826 0.00627301]
Start training ...
Step: 746, L: 520.6751098632812, J: -63.15922546386719
Current x: (-0.013126471778106646, 0.16868289767922223, 0.30259584817886354), u: [37.17276  0.     ], noise: [0.00563995 0.00547441]
Start training ...
Step: 746, L: 228.75506591796875, J: -63.163917541503906
Current x: (-0.017667310024364197, 0.19940932256087346, 0.35035558910369874), u: [37.17666  0.     ], noise: [0.00643557

Step: 747, L: 1044.361328125, J: -62.469444274902344
Current x: (-0.03976651178688351, 0.3156101272526176, 0.5249738833427429), u: [37.20866  0.     ], noise: [0.00575225 0.00515652]
Start training ...
Step: 747, L: 1407.953369140625, J: -62.54249954223633
Current x: (-0.05002538684264156, 0.3567911551652796, 0.5882439455032348), u: [37.20999  0.     ], noise: [0.00595664 0.00657181]
Start training ...
Step: 747, L: 674.7100830078125, J: -62.69197082519531
Current x: (-0.062149671043135825, 0.4002119303129199, 0.6552349331855774), u: [37.211094  0.      ], noise: [0.0053056  0.00574722]
Start training ...
Step: 747, L: 921.4925537109375, J: -62.629146575927734
Current x: (-0.07633943649879335, 0.44574830784587477, 0.725946858215332), u: [37.21202  0.     ], noise: [0.00525952 0.00592856]
Start training ...
Step: 747, L: 946.2003784179688, J: -62.642608642578125
Current x: (-0.09279731699553456, 0.49325504741087883, 0.8003798483848571), u: [37.21278  0.     ], noise: [0.00502666 0.00518

Step: 748, L: 61.956600189208984, J: -62.14862823486328
Current x: (-0.18882768064093847, 0.7106568845477581, 1.147919899272919), u: [37.241196  0.      ], noise: [0.00581153 0.00538364]
Start training ...
Step: 748, L: 932.9532470703125, J: -61.85057830810547
Current x: (-0.21999807149815956, 0.7653218002058784, 1.2415453437805177), u: [37.24064  0.     ], noise: [0.00547187 0.00502622]
Start training ...
Step: 748, L: 978.8146362304688, J: -61.99182891845703
Current x: (-0.25456555333018005, 0.820534498721806, 1.3388949505805972), u: [37.239353  0.      ], noise: [0.00617986 0.00524698]
Start training ...
Step: 748, L: 573.5654907226562, J: -61.95195770263672
Current x: (-0.29265805358362373, 0.8759706543945016, 1.4399686655044557), u: [37.236874  0.      ], noise: [0.00547097 0.00568308]
Start training ...
Step: 748, L: 58.79773712158203, J: -61.86060333251953
Current x: (-0.33437591566043423, 0.9312819388036738, 1.5447664092063904), u: [37.232307  0.      ], noise: [0.00602132 0.00

Step: 750, L: 240.40902709960938, J: -61.429500579833984
Current x: (0.0, 0.00013906416320800779, 0.001118964958190918), u: [13.751384  0.      ], noise: [0.00473937 0.00487412]
Start training ...
Step: 750, L: 1266.696533203125, J: -61.2310905456543
Current x: (0.0, 0.0005130781478881834, 0.003452704048156738), u: [16.342823  0.      ], noise: [0.00512956 0.00518844]
Start training ...
Step: 750, L: 1030.3897705078125, J: -61.313507080078125
Current x: (-1.5398071083577323e-06, 0.0012821910482948501, 0.007161568069458008), u: [20.233126  0.      ], noise: [0.00537833 0.00552507]
Start training ...
Step: 750, L: 290.12530517578125, J: -61.264060974121094
Current x: (-8.725858223847946e-06, 0.0027056081890189966, 0.012504708480834962), u: [25.282093  0.      ], noise: [0.0067095  0.00543474]
Start training ...
Step: 750, L: 785.6522827148438, J: -61.282169342041016
Current x: (-3.0409685295110297e-05, 0.005172376415994512, 0.019871146774291994), u: [30.375368  0.      ], noise: [0.00574

Step: 751, L: 530.14453125, J: -60.477352142333984
Current x: (-0.007652063528720138, 0.13227947005070595, 0.23546455631256105), u: [37.32378  0.     ], noise: [0.00577649 0.00659776]
Start training ...
Step: 751, L: 152.51329040527344, J: -60.54354476928711
Current x: (-0.010739473271306609, 0.15974982198431237, 0.27783073673248293), u: [37.329876  0.      ], noise: [0.00520422 0.00486793]
Start training ...
Step: 751, L: 625.3826293945312, J: -60.365570068359375
Current x: (-0.014697915899818557, 0.18986976427858862, 0.3239292127609253), u: [37.334183  0.      ], noise: [0.00580836 0.00525801]
Start training ...
Step: 751, L: 1259.62939453125, J: -60.329322814941406
Current x: (-0.019680482060703237, 0.2225995125020399, 0.3737607099533081), u: [37.33748  0.     ], noise: [0.00492185 0.00573382]
Start training ...
Step: 751, L: 1318.6859130859375, J: -60.280845642089844
Current x: (-0.02585172461279467, 0.2578885608253672, 0.4273256807327271), u: [37.34015  0.     ], noise: [0.0051066

Step: 752, L: 850.1431884765625, J: -59.362457275390625
Current x: (-0.08111695179779695, 0.47072811376002865, 0.7519565678596497), u: [37.4052  0.    ], noise: [0.00471327 0.00540284]
Start training ...
Step: 752, L: 755.359130859375, J: -59.33666229248047
Current x: (-0.09840369860212513, 0.5195589710928716, 0.8280245106697083), u: [37.406895  0.      ], noise: [0.00555504 0.00525546]
Start training ...
Step: 752, L: 303.864013671875, J: -59.30086135864258
Current x: (-0.11824616995932592, 0.5701414703474805, 0.9078329049110413), u: [37.40845  0.     ], noise: [0.00526663 0.00481789]
Start training ...
Step: 752, L: 1316.40576171875, J: -59.166893005371094
Current x: (-0.14084481724154074, 0.622273649579107, 0.9913820183753967), u: [37.409885  0.      ], noise: [0.00484988 0.00575695]
Start training ...
Step: 752, L: 1061.783447265625, J: -58.97639465332031
Current x: (-0.16639268825597875, 0.6757277722738231, 1.0786720219612123), u: [37.411175  0.      ], noise: [0.00604673 0.005135

Step: 754, L: 1224.6533203125, J: -57.62089920043945
Current x: (-7.967812683544368e-06, 0.0022165353797113834, 0.01201630916595459), u: [23.365831  0.      ], noise: [0.00493784 0.00576912]
Start training ...
Step: 754, L: 1191.8385009765625, J: -57.556396484375
Current x: (-2.741981461592278e-05, 0.0042707481620235434, 0.018970254707336424), u: [28.700533  0.      ], noise: [0.0060052  0.00583509]
Start training ...
Step: 754, L: 597.1253662109375, J: -58.292701721191406
Current x: (-7.496111282229711e-05, 0.007681446100505514, 0.028260700225830077), u: [33.068714  0.      ], noise: [0.00586195 0.00644351]
Start training ...
Step: 754, L: 651.9506225585938, J: -58.082489013671875
Current x: (-0.00017696724485586176, 0.012981864600226364, 0.04042121601104736), u: [35.581284  0.      ], noise: [0.00598302 0.00602991]
Start training ...
Step: 754, L: 493.728759765625, J: -57.71467971801758
Current x: (-0.00037245021285883914, 0.020608064187132305, 0.05588854522705078), u: [36.704742  0.

Current x: (-0.001068244655910594, 0.03512679321458643, 0.0890400842666626), u: [37.304413  0.      ], noise: [0.00542315 0.00568329]
Start training ...
Step: 755, L: 670.1968383789062, J: -56.912654876708984
Current x: (-0.0017913948054605936, 0.04890504751133873, 0.11361534690856934), u: [37.460632  0.      ], noise: [0.00555663 0.00601662]
Start training ...
Step: 755, L: 1317.19287109375, J: -56.324623107910156
Current x: (-0.002846363813821445, 0.06541907157486987, 0.14192102489471436), u: [37.532368  0.      ], noise: [0.00586331 0.00550827]
Start training ...
Step: 755, L: 588.2513427734375, J: -56.24477005004883
Current x: (-0.00432615923871607, 0.08467515685568003, 0.17397272033691408), u: [37.56858  0.     ], noise: [0.0053059  0.00498607]
Start training ...
Step: 755, L: 872.2833862304688, J: -56.906429290771484
Current x: (-0.0063369924056817, 0.10666686998131297, 0.20977768802642824), u: [37.588707  0.      ], noise: [0.0047656  0.00558177]
Start training ...
Step: 755, L:

Step: 756, L: 1046.3189697265625, J: -54.82781982421875
Current x: (-0.03821809800662959, 0.26956683565003897, 0.49918493843078615), u: [37.724033  0.      ], noise: [0.00547104 0.00533852]
Start training ...
Step: 756, L: 253.946044921875, J: -55.21094512939453
Current x: (-0.048069100471879844, 0.3079681392175479, 0.5605171642303467), u: [37.727974  0.      ], noise: [0.00475466 0.00507482]
Start training ...
Step: 756, L: 695.0407104492188, J: -54.78815460205078
Current x: (-0.059726507842392515, 0.34870146000703833, 0.625621806716919), u: [37.731556  0.      ], noise: [0.00546802 0.00635854]
Start training ...
Step: 756, L: 459.6908264160156, J: -55.80989074707031
Current x: (-0.07339014838387259, 0.3916500981155012, 0.6944992145538331), u: [37.734756  0.      ], noise: [0.00520606 0.00528862]
Start training ...
Step: 756, L: 564.1675415039062, J: -54.45599365234375
Current x: (-0.089264046713534, 0.4366772115025884, 0.7671496887207032), u: [37.737793  0.      ], noise: [0.0045605 

Step: 772, L: 370.60076904296875, J: -53.433143615722656
Current x: (0.0, 0.0, 0.0), u: [8.411708 0.      ], noise: [0.0050086  0.00454169]
Start training ...
Step: 772, L: 1179.5589599609375, J: -53.52011489868164
Simulation ends in 2 steps
Episode 773 begins...
Current x: (0.0, 0.0, 0.0), u: [9.105194 0.      ], noise: [0.00429833 0.00459748]
Start training ...
Step: 773, L: 871.9927978515625, J: -53.244354248046875
Current x: (0.0, 0.0, 0.0), u: [8.340916 0.      ], noise: [0.00361982 0.00420082]
Start training ...
Step: 773, L: 715.8741455078125, J: -52.91993713378906
Simulation ends in 2 steps
Episode 774 begins...
Current x: (0.0, 0.0, 0.0), u: [9.067664 0.      ], noise: [0.00430816 0.00429332]
Start training ...
Step: 774, L: 90.43354034423828, J: -53.26479721069336
Current x: (0.0, 0.0, 0.0), u: [8.27223 0.     ], noise: [0.00403076 0.00469818]
Start training ...
Step: 774, L: 393.75628662109375, J: -54.291038513183594
Simulation ends in 2 steps
Episode 775 begins...
Current x

Step: 797, L: 1335.961669921875, J: -51.23102569580078
Current x: (0.0, 0.0, 0.0), u: [6.9134808 0.       ], noise: [0.00384015 0.0030319 ]
Start training ...
Step: 797, L: 114.48368072509766, J: -51.89288330078125
Simulation ends in 2 steps
Episode 798 begins...
Current x: (0.0, 0.0, 0.0), u: [8.279808 0.      ], noise: [0.00302459 0.00318487]
Start training ...
Step: 798, L: 1379.90234375, J: -51.203590393066406
Current x: (0.0, 0.0, 0.0), u: [6.8489294 0.       ], noise: [0.00397685 0.00363123]
Start training ...
Step: 798, L: 830.58544921875, J: -50.58213806152344
Simulation ends in 2 steps
Episode 799 begins...
Current x: (0.0, 0.0, 0.0), u: [8.252605 0.      ], noise: [0.00348589 0.00346829]
Start training ...
Step: 799, L: 1514.5048828125, J: -51.38793182373047
Current x: (0.0, 0.0, 0.0), u: [6.7944126 0.       ], noise: [0.00297351 0.00391788]
Start training ...
Step: 799, L: 1283.9927978515625, J: -51.28024673461914
Simulation ends in 2 steps
Episode 800 begins...
Current x: (

Step: 821, L: 607.71337890625, J: -47.630531311035156
Simulation ends in 2 steps
Episode 822 begins...
Current x: (0.0, 0.0, 0.0), u: [7.671031  0.3974976], noise: [0.0025683  0.00283144]
Start training ...
Step: 822, L: 1093.076416015625, J: -47.991363525390625
Current x: (0.0, 0.0, 0.0), u: [6.0497036  0.44549912], noise: [0.00299573 0.00270046]
Start training ...
Step: 822, L: 904.5047607421875, J: -47.082374572753906
Simulation ends in 2 steps
Episode 823 begins...
Current x: (0.0, 0.0, 0.0), u: [7.6370664  0.46361947], noise: [0.00276673 0.00339069]
Start training ...
Step: 823, L: 473.0714416503906, J: -48.94044494628906
Current x: (0.0, 0.0, 0.0), u: [6.041154   0.50118655], noise: [0.00261779 0.00289664]
Start training ...
Step: 823, L: 989.1727294921875, J: -46.20646667480469
Simulation ends in 2 steps
Episode 824 begins...
Current x: (0.0, 0.0, 0.0), u: [7.597535   0.52974284], noise: [0.00243794 0.00240407]
Start training ...
Step: 824, L: 1014.3901977539062, J: -46.82717895

Step: 844, L: 1226.798095703125, J: -34.44647979736328
Current x: (0.0, 0.0, 0.0), u: [5.057656 1.408672], noise: [0.00212633 0.00214183]
Start training ...
Step: 844, L: 1424.80712890625, J: -39.00187683105469
Simulation ends in 2 steps
Episode 845 begins...
Current x: (0.0, 0.0, 0.0), u: [6.4996    1.6633172], noise: [0.00218174 0.00243539]
Start training ...
Step: 845, L: 1047.4281005859375, J: -36.97916793823242
Current x: (0.0, 0.0, 0.0), u: [5.015878  1.4453146], noise: [0.00234226 0.00241561]
Start training ...
Step: 845, L: 867.4027099609375, J: -35.364288330078125
Simulation ends in 2 steps
Episode 846 begins...
Current x: (0.0, 0.0, 0.0), u: [6.440653  1.7060487], noise: [0.00195777 0.00226756]
Start training ...
Step: 846, L: 935.5028686523438, J: -35.10951614379883
Current x: (0.0, 0.0, 0.0), u: [4.953631  1.4761448], noise: [0.00171692 0.00163478]
Start training ...
Step: 846, L: 990.8452758789062, J: -32.0788459777832
Simulation ends in 2 steps
Episode 847 begins...
Curre

Step: 867, L: 314.5622253417969, J: -37.33089828491211
Simulation ends in 2 steps
Episode 868 begins...
Current x: (0.0, 0.0, 0.0), u: [4.691301  2.2948565], noise: [0.0016986  0.00162222]
Start training ...
Step: 868, L: 778.6853637695312, J: -36.919864654541016
Current x: (0.0, 0.0, 0.0), u: [2.9906392 1.6891873], noise: [0.0016392  0.00179418]
Start training ...
Step: 868, L: 854.3232421875, J: -35.90162658691406
Simulation ends in 2 steps
Episode 869 begins...
Current x: (0.0, 0.0, 0.0), u: [4.6307282 2.3153374], noise: [0.00196868 0.00183359]
Start training ...
Step: 869, L: 1386.60546875, J: -30.90589714050293
Current x: (0.0, 0.0, 0.0), u: [2.9296062 1.6962899], noise: [0.00161273 0.00159526]
Start training ...
Step: 869, L: 943.548095703125, J: -33.15363311767578
Simulation ends in 2 steps
Episode 870 begins...
Current x: (0.0, 0.0, 0.0), u: [4.5649486 2.3324943], noise: [0.00152287 0.00172232]
Start training ...
Step: 870, L: 1429.97900390625, J: -34.83854675292969
Current x: 

Step: 893, L: 432.5190124511719, J: -44.76177978515625
Simulation ends in 2 steps
Episode 894 begins...
Current x: (0.0, 0.0, 0.0), u: [3.5161774 2.7192547], noise: [0.00121224 0.00144225]
Start training ...
Step: 894, L: 227.15830993652344, J: -46.94317626953125
Current x: (0.0, 0.0, 0.0), u: [2.0515335 1.885911 ], noise: [0.00140011 0.00115151]
Start training ...
Step: 894, L: 202.69467163085938, J: -46.597755432128906
Simulation ends in 2 steps
Episode 895 begins...
Current x: (0.0, 0.0, 0.0), u: [3.4699044 2.7217815], noise: [0.00121593 0.00140119]
Start training ...
Step: 895, L: 195.38723754882812, J: -43.99102783203125
Current x: (0.0, 0.0, 0.0), u: [2.0028164 1.8730289], noise: [0.00128728 0.00115659]
Start training ...
Step: 895, L: 292.75433349609375, J: -44.36170196533203
Simulation ends in 2 steps
Episode 896 begins...
Current x: (0.0, 0.0, 0.0), u: [3.4244244 2.7242134], noise: [0.00133423 0.00117288]
Start training ...
Step: 896, L: 347.6348571777344, J: -44.7772674560546

Step: 917, L: 297.7806091308594, J: -49.704158782958984
Current x: (0.0, 0.0, 0.0), u: [1.4990156 1.8408607], noise: [0.00105731 0.00121657]
Start training ...
Step: 917, L: 126.47036743164062, J: -49.926734924316406
Simulation ends in 2 steps
Episode 918 begins...
Current x: (0.0, 0.0, 0.0), u: [2.8523455 2.8848329], noise: [0.00093147 0.00108987]
Start training ...
Step: 918, L: 63.13903045654297, J: -51.56139373779297
Current x: (0.0, 0.0, 0.0), u: [1.4833876 1.8409411], noise: [0.00092162 0.00096394]
Start training ...
Step: 918, L: 472.91864013671875, J: -50.128787994384766
Simulation ends in 2 steps
Episode 919 begins...
Current x: (0.0, 0.0, 0.0), u: [2.8354282 2.8935976], noise: [0.00104004 0.00106997]
Start training ...
Step: 919, L: 64.30701446533203, J: -52.115997314453125
Current x: (0.0, 0.0, 0.0), u: [1.4725299 1.8446856], noise: [0.00095438 0.00115601]
Start training ...
Step: 919, L: 132.7529754638672, J: -51.655704498291016
Simulation ends in 2 steps
Episode 920 begins

Step: 942, L: 70.76934051513672, J: -53.70798873901367
Simulation ends in 2 steps
Episode 943 begins...
Current x: (0.0, 0.0, 0.0), u: [2.4046533 3.0097368], noise: [0.00075838 0.00088197]
Start training ...
Step: 943, L: 140.5048370361328, J: -53.20489501953125
Current x: (0.0, 0.0, 0.0), u: [1.1240324 1.806983 ], noise: [0.00076056 0.0007918 ]
Start training ...
Step: 943, L: 23.175256729125977, J: -53.81023406982422
Simulation ends in 2 steps
Episode 944 begins...
Current x: (0.0, 0.0, 0.0), u: [2.3909032 3.0164666], noise: [0.00081446 0.0006996 ]
Start training ...
Step: 944, L: 253.84500122070312, J: -53.505428314208984
Current x: (0.0, 0.0, 0.0), u: [1.1174115 1.8107734], noise: [0.00101446 0.0008313 ]
Start training ...
Step: 944, L: 80.64859008789062, J: -53.69232177734375
Simulation ends in 2 steps
Episode 945 begins...
Current x: (0.0, 0.0, 0.0), u: [2.3798022 3.0249987], noise: [0.00085486 0.00073404]
Start training ...
Step: 945, L: 15.07886028289795, J: -54.77546691894531


Step: 966, L: 78.28267669677734, J: -54.17691421508789
Simulation ends in 2 steps
Episode 967 begins...
Current x: (0.0, 0.0, 0.0), u: [2.1305602 3.184262 ], noise: [0.0006414  0.00068147]
Start training ...
Step: 967, L: 79.03255462646484, J: -54.46583557128906
Current x: (0.0, 0.0, 0.0), u: [0.9755693 1.896898 ], noise: [0.00064597 0.00075451]
Start training ...
Step: 967, L: 178.0564727783203, J: -54.026824951171875
Simulation ends in 2 steps
Episode 968 begins...
Current x: (0.0, 0.0, 0.0), u: [2.1176174 3.189056 ], noise: [0.00060805 0.00061271]
Start training ...
Step: 968, L: 152.6244354248047, J: -54.50937271118164
Current x: (0.0, 0.0, 0.0), u: [0.9658774 1.8963194], noise: [0.00058533 0.00057379]
Start training ...
Step: 968, L: 389.2027587890625, J: -52.842472076416016
Simulation ends in 2 steps
Episode 969 begins...
Current x: (0.0, 0.0, 0.0), u: [2.1028087 3.1917055], noise: [0.00061373 0.00071999]
Start training ...
Step: 969, L: 237.07772827148438, J: -54.88654327392578


Step: 991, L: 80.33447265625, J: -55.38845443725586
Simulation ends in 2 steps
Episode 992 begins...
Current x: (0.0, 0.0, 0.0), u: [1.8348821 3.27791  ], noise: [0.00053943 0.00055049]
Start training ...
Step: 992, L: 53.58854675292969, J: -55.44139099121094
Current x: (0.0, 0.0, 0.0), u: [0.75936973 1.8822845 ], noise: [0.00053224 0.00056192]
Start training ...
Step: 992, L: 193.19668579101562, J: -55.08283996582031
Simulation ends in 2 steps
Episode 993 begins...
Current x: (0.0, 0.0, 0.0), u: [1.8266143 3.2833114], noise: [0.00045881 0.0004906 ]
Start training ...
Step: 993, L: 109.60499572753906, J: -55.13428497314453
Current x: (0.0, 0.0, 0.0), u: [0.75431037 1.8845406 ], noise: [0.00052378 0.0005338 ]
Start training ...
Step: 993, L: 186.90219116210938, J: -55.312095642089844
Simulation ends in 2 steps
Episode 994 begins...
Current x: (0.0, 0.0, 0.0), u: [1.8178244 3.2878265], noise: [0.00047236 0.00049596]
Start training ...
Step: 994, L: 125.84571838378906, J: -55.581089019775

In [15]:
xs, us, cs, x_nexts = [], [], [], []
for exp in exps:
    x, u, c, x_next, _ = exp
    xs.append(x)
    us.append(u)
    cs.append(c)
    x_nexts.append(x_next)

            # q = self.q_network.forward(x, u)
            # a = self.target_p_network.forward(x_next)
            # q_= self.target_q_network.forward(x_next, a)

            # l = (q - c - self.alpha * q_)**2
            # j = self.q_network.forward(x, self.p_network(x))

            # L += l
            # J += j
        
        # xs = torch.as_tensor(np.array(xs)).float()
        # us = torch.as_tensor(np.array(us)).float()
        # cs = torch.as_tensor(np.array(cs)).float()
        # x_nexts = torch.as_tensor(np.array(x_nexts)).float()

        
xs = tf.convert_to_tensor(np.array(xs), dtype=tf.float32)
us = tf.convert_to_tensor(np.array(us), dtype=tf.float32)
cs = tf.convert_to_tensor(np.array(cs), dtype=tf.float32)
x_nexts = tf.convert_to_tensor(np.array(x_nexts), dtype=tf.float32)

In [16]:
u_nexts = pg.target_p_network(x_nexts)
# u_nexts = tf.cast(u_nexts, tf.float32)
u_nexts

<tf.Tensor: shape=(64, 2), dtype=float32, numpy=
array([[-27.397186 ,  -3.6126623],
       [-20.294073 ,  -3.0222569],
       [-19.01974  ,  -2.848195 ],
       [-19.38033  ,  -2.8981712],
       [-19.01974  ,  -2.848195 ],
       [-19.01974  ,  -2.848195 ],
       [-28.191956 ,  -3.3507142],
       [-26.69639  ,  -3.7980733],
       [-26.883615 ,  -3.5038812],
       [-17.440256 ,  -2.660512 ],
       [-20.086332 ,  -2.9943938],
       [-27.932484 ,  -3.5136137],
       [-16.571728 ,  -2.5351715],
       [-19.829245 ,  -2.9596193],
       [-19.01974  ,  -2.848195 ],
       [-27.993149 ,  -3.5337703],
       [-28.11476  ,  -3.4955015],
       [-17.214678 ,  -2.6281576],
       [-20.005571 ,  -2.9835043],
       [-22.90214  ,  -3.299993 ],
       [-24.485182 ,  -3.5085058],
       [-15.993697 ,  -2.4506803],
       [-17.194021 ,  -2.6251884],
       [-28.204874 ,  -3.355771 ],
       [-28.206867 ,  -3.3618321],
       [-28.20878  ,  -3.3570218],
       [-19.01974  ,  -2.848195 ],
      

In [17]:
tf.concat([x_nexts, u_nexts], 1)

<tf.Tensor: shape=(64, 8), dtype=float32, numpy=
array([[ 3.25826253e-03,  1.19594403e-01, -6.22657947e-02,
         4.96867262e-02,  4.21463728e-01, -4.84144479e-01,
        -2.73971863e+01, -3.61266232e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -6.76920637e-02, -3.04079335e-02,
        -2.02940731e+01, -3.02225685e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -9.80999991e-02,  0.00000000e+00,
        -1.90197392e+01, -2.84819508e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -8.99247900e-02, -8.17520823e-03,
        -1.93803291e+01, -2.89817119e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -9.80999991e-02,  0.00000000e+00,
        -1.90197392e+01, -2.84819508e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -9.80999991e-02,  0.00000000e+00,
        -1.90197392e+01, -2.

In [18]:
x_nexts

<tf.Tensor: shape=(64, 6), dtype=float32, numpy=
array([[ 3.25826253e-03,  1.19594403e-01, -6.22657947e-02,
         4.96867262e-02,  4.21463728e-01, -4.84144479e-01],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -6.76920637e-02, -3.04079335e-02],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -9.80999991e-02,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -8.99247900e-02, -8.17520823e-03],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -9.80999991e-02,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00, -9.80999991e-02,  0.00000000e+00],
       [-2.83988323e-02,  8.16333771e-01,  7.32125193e-02,
        -2.45994374e-01,  3.85994387e+00,  1.96686253e-01],
       [ 1.35652733e-03,  4.46521072e-03, -8.00625831e-02,
         3.17685790e-02, -1.44398987e-01, -1.02543187e+00],

In [19]:
u

array([2.9685502, 6.2623725], dtype=float32)

In [20]:
quadrotor.reset()
animate_quadrotor(quadrotor, pg, 1000)

CalledProcessError: Command '['ffmpeg', '-f', 'rawvideo', '-vcodec', 'rawvideo', '-s', '864x864', '-pix_fmt', 'rgba', '-r', '100.0', '-loglevel', 'error', '-i', 'pipe:', '-vcodec', 'h264', '-pix_fmt', 'yuv420p', '-y', '/tmp/tmpl1gtbdgx/temp.m4v']' returned non-zero exit status 255.

In [None]:
eps

In [None]:
x = exps[0][0]
x_next = exps[0][3]

In [None]:
p_network(torch.as_tensor(x).float())

In [None]:
torch.as_tensor(x).float()

In [None]:
exps

In [None]:
pg(x)[0]

In [None]:
for _ in range(1000):
    if np.random.rand() < eps:
        u = np.random.rand(2) * 10 + 10

        print(u)

In [None]:
u = pg(x)
u

In [None]:
len(x.shape)

In [None]:
q_network.trainable_weights

In [None]:
q_network