In [1]:
%load_ext autoreload
%autoreload 2


In [2]:
import matplotlib.pyplot as plt
import matplotlib as mp
import matplotlib.animation as animation
import numpy as np
import IPython

from envs.Quadrotor import Quadrotor

In [3]:
m = 1.0
I = 1.0
r = 1.0

quadrotor = Quadrotor(m, I, r)

In [4]:
quadrotor = Quadrotor(m, I, r)
u = np.array([10, 10])

for i in range(50):
    quadrotor.step(u)

In [5]:
fig = mp.figure.Figure(figsize=[8, 8])
mp.backends.backend_agg.FigureCanvasAgg(fig)
ax = fig.add_subplot(111, autoscale_on=False, xlim=[-2, 12], ylim=[-2, 12])
ax.grid()

#create the quadrotor
center, = ax.plot([], [], 'k', marker="o")
lines = []
    
for i in range(8):
    line, = ax.plot([], [], 'k', lw=2)
    lines.append(line)

In [6]:
def drawQuadrotor(quadrotor, ax, center, lines, t):
    r = quadrotor.r
    h = r/2
    x, y, theta, u, v, omega = quadrotor.x
    
    for line in lines: #reset all lines
        line.set_data([],[])
    
    R = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    t = np.array([[x], [y]])
    
    A = np.array([-r, h/2])
    B = np.array([ r, h/2])
    C = np.array([ r,-h/2])
    D = np.array([-r,-h/2])
    
    E = np.array([-r+h/2, h/2])
    F = np.array([-r+h/2, h   ])
    G = np.array([ r-h/2, h/2])
    H = np.array([ r-h/2, h   ])
    
    I = np.array([F[0]-h*np.cos(np.pi*t), F[1]])
    J = np.array([F[0]+h*np.cos(np.pi*t), F[1]])
    K = np.array([H[0]-h*np.cos(np.pi*t), H[1]])
    L = np.array([H[0]+h*np.cos(np.pi*t), H[1]])
    
    coords = np.vstack([A, B, C, D, E, F, G, H, I, J, K, L])
    coords = coords.T
    
    coords = R @ coords + t
    
    A = coords[:, 0]
    B = coords[:, 1]
    C = coords[:, 2]
    D = coords[:, 3]
    E = coords[:, 4]
    F = coords[:, 5]
    G = coords[:, 6]
    H = coords[:, 7]
    I = coords[:, 8]
    J = coords[:, 9]
    K = coords[:, 10]
    L = coords[:, 11]
    
    center.set_data([x], [y])
    
    lines[0].set_data([A[0], B[0]], [A[1], B[1]])
    lines[1].set_data([B[0], C[0]], [B[1], C[1]])
    lines[2].set_data([C[0], D[0]], [C[1], D[1]])
    lines[3].set_data([A[0], D[0]], [A[1], D[1]])
    
    lines[4].set_data([E[0], F[0]], [E[1], F[1]])
    lines[5].set_data([G[0], H[0]], [G[1], H[1]])
    
    lines[6].set_data([I[0], J[0]], [I[1], J[1]])
    lines[7].set_data([K[0], L[0]], [K[1], L[1]])
    
    return lines

In [7]:
def animate_quadrotor(quadrotor, controller, horizon):
    dt = quadrotor.dt
    
    fig = mp.figure.Figure(figsize=[12, 12])
    mp.backends.backend_agg.FigureCanvasAgg(fig)
    ax = fig.add_subplot(111, autoscale_on=False, xlim=[-2, 42], ylim=[-2, 42])
    ax.grid()

    #create the quadrotor
    center, = ax.plot([], [], 'k', marker="o")
    lines = []

    for i in range(8):
        line, = ax.plot([], [], 'k', lw=2)
        lines.append(line)
    
    # simulate with controller
    def animate(i):
        nonlocal lines
        t = dt * i
        lines = drawQuadrotor(quadrotor, ax, center, lines, t)
        u = controller(quadrotor.x)
        quadrotor.step(u)
        return lines
        
    def init():
        return animate(0)
    
    ani = animation.FuncAnimation(fig, animate, np.arange(0, horizon),
        interval=1000*dt, blit=True, init_func=init)
    plt.close(fig)
    plt.close(ani._fig)
    IPython.display.display_html(IPython.core.display.HTML(ani.to_html5_video()))

In [8]:
def controller(x):
    return np.random.randint(0, 20, 2)

In [9]:
quadrotor = Quadrotor(m, I, r)

# animate_quadrotor(quadrotor, lambda x: np.array([5, 5]), 1000)

# Debug on Buffer

In [10]:
from infrastructure.buffer import ReplayBuffer

In [11]:
buffer = ReplayBuffer()

quadrotor = Quadrotor(m, I, r)

x = quadrotor.reset()
print(x)
terminal = False
i = 0

while not terminal:
    u = controller(x)
    x_next, cost, terminal = quadrotor.step(u)
    buffer.push((x, u, cost, x_next, terminal))
    
    x = x_next
    i += 1

print(f"Simulation ends in {i} steps")

[0. 0. 0. 0. 0. 0.]
Simulation ends in 72 steps


# Debug on Agent

In [12]:
from utils.util import build_network
from agents.pg_agent import PGAgent

import tensorflow as tf

In [15]:
n_states = 6
n_actions = 2

q_network = build_network(n_states+n_actions, 1, 4, 256)
p_network = build_network(n_states, n_actions, 4, 256, "relu")

pg = PGAgent(q_network, p_network, 0.99, 0.1, 1e-5, 1e-5)

# animate_quadrotor(quadrotor, pg, 1000)

In [None]:
# tf.keras.backend.set_floatx('float64')

quadrotor = Quadrotor(m, I, r)
buffer = ReplayBuffer(10**4)

print("Start pre-sampling")
for t in range(100):
    x = quadrotor.reset()
    terminal = False
    i = 0
    
    while not terminal:
        u = np.random.rand(2) * 20
        x_next, cost, terminal = quadrotor.step(u)
        buffer.push((x, u, cost, x_next, terminal))
        x = x_next
        
        i += 1
    
    print(f"Simulation ends in {i} steps")

for t in range(1000):
    print(f"Episode {t} begins...")
    x = quadrotor.reset()
    terminal = False
    i = 0
    
    mu = 5*np.exp(- t/1000)
    std = np.exp(- t/1000)
    
    while not terminal:
        u = pg(x)
#         u = u[0]
        noise = np.random.normal(loc=mu, scale=std, size=2)
        print(f"Current x: {(x[0], x[1], x[2])}, u: {u}, noise: {noise}")
        u += noise
        u[u<0] = 0
        
        x_next, cost, terminal = quadrotor.step(u)
        buffer.push((x, u, cost, x_next, terminal))
    
        x = x_next
        i+=1
    
#     if t % 10 == 0:
        print("Start training ...")
        exps = buffer.sample(64)
        L, J = pg.train(exps)

#         pg.update_target_networks()
        
        print(f"Step: {t}, L: {L}, J: {J}")
    
    print(f"Simulation ends in {i} steps")


Start pre-sampling
Simulation ends in 69 steps
Simulation ends in 62 steps
Simulation ends in 70 steps
Simulation ends in 65 steps
Simulation ends in 63 steps
Simulation ends in 61 steps
Simulation ends in 60 steps
Simulation ends in 60 steps
Simulation ends in 73 steps
Simulation ends in 72 steps
Simulation ends in 65 steps
Simulation ends in 63 steps
Simulation ends in 62 steps
Simulation ends in 63 steps
Simulation ends in 65 steps
Simulation ends in 57 steps
Simulation ends in 62 steps
Simulation ends in 63 steps
Simulation ends in 59 steps
Simulation ends in 60 steps
Simulation ends in 64 steps
Simulation ends in 63 steps
Simulation ends in 58 steps
Simulation ends in 68 steps
Simulation ends in 61 steps
Simulation ends in 61 steps
Simulation ends in 64 steps
Simulation ends in 61 steps
Simulation ends in 68 steps
Simulation ends in 64 steps
Simulation ends in 60 steps
Simulation ends in 64 steps
Simulation ends in 65 steps
Simulation ends in 56 steps
Simulation ends in 58 steps
S

Step: 0, L: 421.7577209472656, J: 0.5683866739273071
Current x: (0.0014895280872176853, 0.0, -0.02095368382930756), u: [0.00548973 0.        ], noise: [4.72965738 4.77305196]
Start training ...
Step: 0, L: 383.75213623046875, J: 0.5959936380386353
Current x: (0.0017020501428735499, 0.0, -0.02188583855628968), u: [0.00545469 0.        ], noise: [5.8460364  4.50152129]
Start training ...
Step: 0, L: 502.9924011230469, J: 0.6813647150993347
Current x: (0.001934493919674429, 0.0, -0.022821783757209786), u: [0.00456203 0.        ], noise: [5.3582589  3.87632747]
Start training ...
Step: 0, L: 317.1077880859375, J: 0.6084882020950317
Current x: (0.0021895943227645343, 0.0, -0.02362273197174073), u: [0.00310292 0.        ], noise: [6.25867973 4.17310722]
Start training ...
Step: 0, L: 322.76715087890625, J: 0.6710420846939087
Current x: (0.0024657782818434857, 0.0, -0.024275030827522286), u: [0.00152276 0.        ], noise: [4.46765045 6.409431  ]
Start training ...
Step: 0, L: 423.17681884765

Step: 0, L: 456.42364501953125, J: 0.8908765316009521
Current x: (0.036071537568106236, 0.0049642750800900975, -0.026633523249626184), u: [0. 0.], noise: [6.8408303 3.0337765]
Start training ...
Step: 0, L: 316.1346435546875, J: 0.8005180358886719
Current x: (0.0374471618356628, 0.004975774115289819, -0.02694166095256808), u: [0. 0.], noise: [8.1537669  4.94293103]
Start training ...
Step: 0, L: 472.7117004394531, J: 0.8110999464988708
Current x: (0.038849082551829346, 0.0049933836548684525, -0.026869093275070217), u: [0. 0.], noise: [6.40656728 3.52778207]
Start training ...
Step: 0, L: 368.25067138671875, J: 0.8172740936279297
Current x: (0.04028628367876082, 0.005339187689996456, -0.026475442051887538), u: [0. 0.], noise: [5.15121295 5.09469514]
Start training ...
Step: 0, L: 310.7549743652344, J: 0.7909530401229858
Current x: (0.041750174289169995, 0.005697068048446371, -0.025793912315368678), u: [0. 0.], noise: [6.31546029 6.25150939]
Start training ...
Step: 0, L: 293.12774658203

Step: 0, L: 388.11468505859375, J: 1.0514905452728271
Current x: (0.11710903119393153, 0.08193208052915393, -0.016803404879570043), u: [0. 0.], noise: [3.40365765 5.6721127 ]
Start training ...
Step: 0, L: 322.4030456542969, J: 1.013786792755127
Current x: (0.11941574202918827, 0.08431286126522312, -0.01644976973533634), u: [0. 0.], noise: [4.83421304 5.2176399 ]
Start training ...
Step: 0, L: 293.9664001464844, J: 0.9838072061538696
Current x: (0.121737702531212, 0.08662009091292634, -0.016322980070114172), u: [0. 0.], noise: [4.95514534 6.08269749]
Start training ...
Step: 0, L: 337.2512512207031, J: 1.0058679580688477
Current x: (0.1240761973545506, 0.08895136988266689, -0.01623453307151798), u: [0. 0.], noise: [6.46411342 6.64550725]
Start training ...
Step: 0, L: 354.35675048828125, J: 1.10725998878479
Current x: (0.12643270842654716, 0.09140528608476362, -0.01625884127616886), u: [0. 0.], noise: [5.82189397 5.55093814]
Start training ...
Step: 0, L: 299.7854919433594, J: 0.986984

Step: 0, L: 322.80389404296875, J: 1.360187292098999
Current x: (0.2427066860237987, 0.2100946760220231, -0.012053811907768303), u: [0.         0.00095115], noise: [3.21826038 3.17161334]
Start training ...
Step: 0, L: 260.086669921875, J: 1.2488532066345215
Current x: (0.24571759657565748, 0.2130291330611169, -0.012420719408989006), u: [0.         0.00591378], noise: [5.34788486 4.24476237]
Start training ...
Step: 0, L: 297.45965576171875, J: 1.2579236030578613
Current x: (0.24873621032097581, 0.21562162615270294, -0.012783057332038933), u: [0.         0.00657008], noise: [6.28344435 5.75485062]
Start training ...
Step: 0, L: 337.8509826660156, J: 1.4361426830291748
Current x: (0.25176674586221387, 0.21819290123802135, -0.013035674405098016), u: [0.         0.00424807], noise: [5.086443   4.07244435]
Start training ...
Step: 0, L: 264.366455078125, J: 1.3202285766601562
Current x: (0.2548126780036699, 0.22098756437964212, -0.013236089110374506), u: [0.         0.00550432], noise: [4.

Step: 0, L: 277.556884765625, J: 1.574653148651123
Current x: (0.3957571248721359, 0.3213179736506827, -0.011822204756736836), u: [0.         0.01670796], noise: [4.77556642 5.74439103]
Start training ...
Step: 0, L: 347.39251708984375, J: 1.6979103088378906
Current x: (0.3994007208581544, 0.3242711629636986, -0.012309066462516866), u: [0.         0.01610091], noise: [5.1050723  4.99631563]
Start training ...
Step: 0, L: 309.0648498535156, J: 1.7209899425506592
Current x: (0.4030567732150727, 0.32729694514125013, -0.012894481396675192), u: [0.         0.01588648], noise: [4.01612276 5.41374447]
Start training ...
Step: 0, L: 318.2403564453125, J: 1.7232691049575806
Current x: (0.4067252789417931, 0.330353399559264, -0.01347063071727761), u: [0.        0.0168554], noise: [3.52015186 4.32986661]
Start training ...
Step: 0, L: 237.5058135986328, J: 1.487933874130249
Current x: (0.41040596413975766, 0.3333733507619606, -0.014188130831718528), u: [0.         0.01944039], noise: [3.97045797 

Step: 0, L: 278.982421875, J: 2.0924320220947266
Current x: (0.5868672182858606, 0.4923933561615504, -0.03412397351264964), u: [0. 0.], noise: [4.37621218 3.86471436]
Start training ...
Step: 0, L: 250.914306640625, J: 1.8821080923080444
Current x: (0.5915015620562829, 0.4968619417643116, -0.03543370053768168), u: [0.         0.00113175], noise: [6.72008355 4.57116651]
Start training ...
Step: 0, L: 374.8656921386719, J: 2.0578629970550537
Current x: (0.5961640216859875, 0.501173140282354, -0.03669227778911601), u: [0. 0.], noise: [2.77285063 4.59241887]
Start training ...
Step: 0, L: 299.564208984375, J: 2.064263343811035
Current x: (0.600866486028852, 0.5056318680973797, -0.03773607647418986), u: [0.        0.0031809], noise: [3.40514232 6.31853302]
Start training ...
Step: 0, L: 314.2994079589844, J: 2.0106935501098633
Current x: (0.6055959691583834, 0.5098456270847829, -0.03896183197498332), u: [0.         0.00416807], noise: [3.97722245 3.62735776]
Start training ...
Step: 0, L: 2

Step: 0, L: 326.677490234375, J: 2.4876480102539062
Current x: (0.854145499234458, 0.6565107740992495, -0.1089739652872087), u: [0.         0.00517195], noise: [6.71906273 4.15691933]
Start training ...
Step: 0, L: 231.91786193847656, J: 2.1295926570892334
Current x: (0.8617666714167571, 0.659207259454752, -0.11059320404529585), u: [0.        0.0027976], noise: [5.91196645 5.65686401]
Start training ...
Step: 0, L: 241.65365600585938, J: 2.3055286407470703
Current x: (0.8695061853002654, 0.6620044057453949, -0.11195674564838423), u: [0. 0.], noise: [5.8921852 7.3143741]
Start training ...
Step: 0, L: 281.0459899902344, J: 2.283168077468872
Current x: (0.8773734128185724, 0.6649706455562506, -0.11329505679607405), u: [0. 0.], noise: [4.36240094 5.45515852]
Start training ...
Step: 0, L: 250.83563232421875, J: 2.172231674194336
Current x: (0.8853881879895132, 0.6682682731696522, -0.11477558681964888), u: [0. 0.], noise: [6.2657144  5.48567045]
Start training ...
Step: 0, L: 342.780883789

Step: 1, L: 222.8419952392578, J: 2.463114023208618
Current x: (0.0010053797639045473, 0.0015516775825051142, -0.01469019892215729), u: [0. 0.], noise: [6.70253109 4.77795664]
Start training ...
Step: 1, L: 309.4642333984375, J: 2.6433305740356445
Current x: (0.0011381011105468012, 0.001833409856220338, -0.016039831137657168), u: [0. 0.], noise: [5.50232509 5.69265631]
Start training ...
Step: 1, L: 321.2203369140625, J: 2.9578683376312256
Current x: (0.0012876869155950062, 0.002282067038932162, -0.017197005915641786), u: [0. 0.], noise: [6.26615464 4.80360116]
Start training ...
Step: 1, L: 171.608154296875, J: 2.6663711071014404
Current x: (0.0014552285120924243, 0.002869078372126334, -0.018373213839530948), u: [0. 0.], noise: [6.08538788 5.0493295 ]
Start training ...
Step: 1, L: 272.397705078125, J: 2.7075581550598145
Current x: (0.0016418058354709965, 0.003581901577952595, -0.019403166413307193), u: [0. 0.], noise: [4.15167747 5.44714894]
Start training ...
Step: 1, L: 238.8548431

Current x: (0.039028729095664114, 0.05538330285389454, -0.05527781112194062), u: [0. 0.], noise: [4.41366369 5.3556918 ]
Start training ...
Step: 1, L: 263.654052734375, J: 3.0108933448791504
Current x: (0.04080227461772893, 0.05627737935502295, -0.05646764314174653), u: [0. 0.], noise: [5.48784062 3.19500025]
Start training ...
Step: 1, L: 245.33370971679688, J: 3.362025022506714
Current x: (0.04262979550213638, 0.05716589923361381, -0.05775167796611787), u: [0. 0.], noise: [6.63618755 5.30567869]
Start training ...
Step: 1, L: 205.41348266601562, J: 3.3235549926757812
Current x: (0.04450632028760815, 0.05794031921178117, -0.05880642874240876), u: [0. 0.], noise: [7.52115854 5.57166878]
Start training ...
Step: 1, L: 271.53436279296875, J: 3.2058908939361572
Current x: (0.04645177302226796, 0.05892593487630923, -0.05972812864780427), u: [0. 0.], noise: [4.84576205 4.79454712]
Start training ...
Step: 1, L: 256.85107421875, J: 3.032844066619873
Current x: (0.04847417562661756, 0.060237

Step: 1, L: 269.64886474609375, J: 3.7538764476776123
Current x: (0.20258600119215261, 0.11971018048330413, -0.10558744699954989), u: [0.00026085 0.        ], noise: [4.56677744 5.50467045]
Start training ...
Step: 1, L: 309.6770935058594, J: 3.6973495483398438
Current x: (0.20801987362525412, 0.12095747466419308, -0.10736120100021365), u: [0.00093332 0.        ], noise: [6.16158185 4.22952856]
Start training ...
Step: 1, L: 238.006103515625, J: 3.4841396808624268
Current x: (0.21355989316827836, 0.12222533059966416, -0.10922871825695041), u: [0. 0.], noise: [3.46748054 4.90513221]
Start training ...
Step: 1, L: 248.57130432128906, J: 3.669076442718506
Current x: (0.21921126873361935, 0.12354540753534674, -0.1109029368400574), u: [0.00150449 0.        ], noise: [3.80689476 6.69869476]
Start training ...
Step: 1, L: 196.52467346191406, J: 3.287909746170044
Current x: (0.22495391553262759, 0.12471675608447258, -0.11272092058658603), u: [0.00314713 0.        ], noise: [3.38779647 4.599623

Step: 1, L: 206.92416381835938, J: 3.8744988441467285
Current x: (0.5779381325852583, 0.1674439466212819, -0.20556884641647344), u: [0. 0.], noise: [4.49839294 5.81375323]
Start training ...
Step: 1, L: 322.99615478515625, J: 4.369352340698242
Current x: (0.5901067120395469, 0.16840914785906483, -0.20762269973754888), u: [0. 0.], noise: [5.83748816 5.55507442]
Start training ...
Step: 1, L: 220.2261962890625, J: 3.8015291690826416
Current x: (0.6024857872108749, 0.169402851520269, -0.20980808906555182), u: [0. 0.], noise: [5.41617405 5.42338459]
Start training ...
Step: 1, L: 250.6853485107422, J: 3.951046943664551
Current x: (0.615099702110811, 0.17053034448375093, -0.21196523699760442), u: [0. 0.], noise: [3.63379916 7.0877847 ]
Start training ...
Step: 1, L: 313.0363464355469, J: 3.9994707107543945
Current x: (0.6279393748837219, 0.17173702313357148, -0.21412310600280765), u: [0. 0.], noise: [7.75005087 4.6437946 ]
Start training ...
Step: 1, L: 309.4720458984375, J: 4.2249593734741

Step: 2, L: 156.13888549804688, J: 4.126625061035156
Current x: (0.0002222522190743383, 0.0033993740203946913, -0.00590015835762024), u: [0. 0.], noise: [5.10166333 5.95061836]
Start training ...
Step: 2, L: 187.83377075195312, J: 4.261697292327881
Current x: (0.00026609688898585624, 0.0038191827686455317, -0.006617518830299378), u: [0. 0.], noise: [5.88864818 4.81687509]
Start training ...
Step: 2, L: 243.27706909179688, J: 3.8171207904815674
Current x: (0.0003164625420979393, 0.00436320041739377, -0.007419774818420411), u: [0. 0.], noise: [4.31440815 3.52112837]
Start training ...
Step: 2, L: 218.99594116210938, J: 4.745467662811279
Current x: (0.0003739125432018209, 0.004996746879376493, -0.008114853501319887), u: [0. 0.], noise: [4.01730292 3.59100107]
Start training ...
Step: 2, L: 317.9894104003906, J: 5.004685401916504
Current x: (0.0004371762829414216, 0.005432825468623212, -0.00873060419559479), u: [0. 0.], noise: [6.16035607 7.03375872]
Start training ...
Step: 2, L: 285.2525

Step: 2, L: 286.11614990234375, J: 4.983509063720703
Current x: (0.028863158418053623, 0.015596569784935697, -0.09323224172592165), u: [0.00595631 0.        ], noise: [5.40536735 6.46147863]
Start training ...
Step: 2, L: 210.7147979736328, J: 4.7605671882629395
Current x: (0.03077132518013647, 0.016099063444063776, -0.09604980707168581), u: [0.00570272 0.        ], noise: [4.86610138 5.42765299]
Start training ...
Step: 2, L: 252.087890625, J: 5.013462066650391
Current x: (0.032790024451749146, 0.01680268104425339, -0.0989723879337311), u: [0.00594696 0.        ], noise: [5.57959063 6.76970414]
Start training ...
Step: 2, L: 236.013916015625, J: 5.069103717803955
Current x: (0.03490749777638536, 0.01755049713483493, -0.10195055365562442), u: [0.00554598 0.        ], noise: [4.50601159 5.14924692]
Start training ...
Step: 2, L: 185.1771697998047, J: 4.47614860534668
Current x: (0.03714705433946879, 0.018546791031724488, -0.10504713606834414), u: [0.00614575 0.        ], noise: [5.95554

Step: 2, L: 167.96066284179688, J: 4.776697158813477
Current x: (0.24460408004662748, 0.06338616045069807, -0.22857176458835596), u: [0. 0.], noise: [5.09776908 6.02643716]
Start training ...
Step: 2, L: 258.98260498046875, J: 5.478017330169678
Current x: (0.25369693190635584, 0.06515519353127851, -0.2314532536745071), u: [0. 0.], noise: [3.20038975 4.6889702 ]
Start training ...
Step: 2, L: 300.257568359375, J: 5.449892997741699
Current x: (0.2630418434540682, 0.06702671432558702, -0.23442760956287378), u: [0. 0.], noise: [5.54413814 4.83581531]
Start training ...
Step: 2, L: 158.93841552734375, J: 4.7142109870910645
Current x: (0.272567730817872, 0.06868513339333904, -0.2375508234739303), u: [0. 0.], noise: [3.55776595 6.34263958]
Start training ...
Step: 2, L: 273.01800537109375, J: 5.552822113037109
Current x: (0.2823347302673505, 0.07037215598959351, -0.24060320513248437), u: [0. 0.], noise: [5.15223208 5.36692988]
Start training ...
Step: 2, L: 218.13259887695312, J: 5.2948269844

Step: 2, L: 267.2949523925781, J: 6.0727996826171875
Current x: (0.9733270146357466, 0.15612386582072568, -0.3674905236005781), u: [0. 0.], noise: [4.51551357 5.44392279]
Start training ...
Step: 2, L: 280.0086364746094, J: 5.8903632164001465
Current x: (0.9966807533299726, 0.1582762231940915, -0.37074534003734566), u: [0. 0.], noise: [4.31692247 3.64443733]
Start training ...
Step: 2, L: 231.70550537109375, J: 5.586068630218506
Simulation ends in 162 steps
Episode 3 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.15728095 5.6186554 ]
Start training ...
Step: 3, L: 329.730712890625, J: 5.991033554077148
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [5.74760981 5.10606437]
Start training ...
Step: 3, L: 272.13104248046875, J: 5.295743942260742
Current x: (0.0, 9.659361267089842e-05, -4.6137428283691405e-05), u: [0. 0.], noise: [5.39159819 3.63130493]
Start training ...
Step: 3, L: 297.34234619140625, J: 5.884408950805664
Current x: (0.0, 0.00029755461883544914, -2.812032699

Step: 3, L: 201.87551879882812, J: 5.756455421447754
Current x: (0.0013647916684824892, 0.01707424342458464, -0.015670857453346253), u: [0. 0.], noise: [5.12800121 5.86461366]
Start training ...
Step: 3, L: 261.4518737792969, J: 6.55159854888916
Current x: (0.0015337990814044558, 0.018012028063284218, -0.01640437273979187), u: [0. 0.], noise: [5.16443498 5.57637843]
Start training ...
Step: 3, L: 302.99169921875, J: 6.416784286499023
Current x: (0.0017200321591444548, 0.019067939203362028, -0.017211549258232116), u: [0. 0.], noise: [4.21798856 4.76274238]
Start training ...
Step: 3, L: 241.99038696289062, J: 6.1591901779174805
Current x: (0.0019238840770614061, 0.020216787152719257, -0.018059920120239256), u: [0. 0.], noise: [4.77141412 4.3598896 ]
Start training ...
Step: 3, L: 282.00665283203125, J: 6.023428440093994
Current x: (0.0021431924612374544, 0.021282575184971238, -0.01896276638507843), u: [0. 0.], noise: [4.08236695 6.25419695]
Start training ...
Step: 3, L: 171.15524291992

Current x: (0.04088720771870345, 0.08269354788388153, -0.06171610589027405), u: [0. 0.], noise: [5.74250812 3.84739369]
Start training ...
Step: 3, L: 247.53012084960938, J: 6.781964302062988
Current x: (0.04280832144781706, 0.08360497652238601, -0.06264261810779571), u: [0. 0.], noise: [5.98415365 5.32747801]
Start training ...
Step: 3, L: 313.5575256347656, J: 7.218398571014404
Current x: (0.044788582752854245, 0.08449256959473, -0.06337961890697479), u: [0. 0.], noise: [3.45471358 3.56901879]
Start training ...
Step: 3, L: 303.0780029296875, J: 6.475959777832031
Current x: (0.046839656749362794, 0.08552810721094553, -0.06405095212459563), u: [0. 0.], noise: [5.16444209 6.33325862]
Start training ...
Step: 3, L: 292.4208679199219, J: 6.492128849029541
Current x: (0.04893521709539506, 0.08628360780965243, -0.06473371586799621), u: [0. 0.], noise: [4.09545415 5.83920851]
Start training ...
Step: 3, L: 228.94927978515625, J: 6.741244316101074
Current x: (0.051104370965112125, 0.08720552

Step: 3, L: 294.4259033203125, J: 7.468738555908203
Current x: (0.19575856092262897, 0.10221528089471113, -0.1142840008020401), u: [0. 0.], noise: [4.17116648 4.72008076]
Start training ...
Step: 3, L: 241.5517120361328, J: 7.160590171813965
Current x: (0.20147722844783592, 0.10333284401709159, -0.11540954332351684), u: [0. 0.], noise: [2.19721593 6.21404076]
Start training ...
Step: 3, L: 294.5966796875, J: 6.610660552978516
Current x: (0.20729728765190034, 0.10435273178153225, -0.11658997728824615), u: [0. 0.], noise: [4.24126884 5.576524  ]
Start training ...
Step: 3, L: 324.0367126464844, J: 7.622763633728027
Current x: (0.21321420543610953, 0.10522714981074652, -0.11817209372520446), u: [0. 0.], noise: [5.50657073 4.57868119]
Start training ...
Step: 3, L: 249.94847106933594, J: 6.468899726867676
Current x: (0.2192453296954267, 0.10609568191217748, -0.11988773567676543), u: [0. 0.], noise: [3.42959301 5.38642172]
Start training ...
Step: 3, L: 215.96978759765625, J: 6.835402488708

Step: 3, L: 277.9824523925781, J: 7.266055107116699
Current x: (0.6002536404071531, 0.16256326330564722, -0.18488953852653497), u: [0. 0.], noise: [4.92248649 5.59674504]
Start training ...
Step: 3, L: 368.31243896484375, J: 7.839811325073242
Current x: (0.6129057106783212, 0.16480443900080055, -0.18653339555263512), u: [0. 0.], noise: [4.42732644 4.73180887]
Start training ...
Step: 3, L: 283.8099365234375, J: 7.270265579223633
Current x: (0.625751164356511, 0.16709860948981126, -0.18824467844963066), u: [0. 0.], noise: [6.28463435 7.2407281 ]
Start training ...
Step: 3, L: 267.58349609375, J: 7.820248603820801
Current x: (0.6387664774569763, 0.16931180523739983, -0.1899864095449447), u: [0. 0.], noise: [4.70550055 2.82388719]
Start training ...
Step: 3, L: 337.92095947265625, J: 6.734975814819336
Current x: (0.6520348972459931, 0.17187264359326065, -0.1918237499713897), u: [0. 0.], noise: [4.62509984 6.03651875]
Start training ...
Step: 3, L: 238.73348999023438, J: 6.291752338409424


Step: 4, L: 284.346923828125, J: 7.729992389678955
Current x: (-0.00018621873711201876, 0.00011814334189577157, 0.007444668817520141), u: [0. 0.], noise: [4.72004961 3.13996884]
Start training ...
Step: 4, L: 184.35540771484375, J: 7.115370750427246
Current x: (-0.00023509871903967035, 0.00032871250667772895, 0.00797649631500244), u: [0. 0.], noise: [5.42528314 5.28382214]
Start training ...
Step: 4, L: 309.9254150390625, J: 7.912222862243652
Current x: (-0.0002898301701954523, 0.00034426171556647955, 0.008666331863403319), u: [0. 0.], noise: [4.48574509 4.67854226]
Start training ...
Step: 4, L: 352.5168151855469, J: 7.442575931549072
Current x: (-0.000353103644058655, 0.00044968731036466316, 0.00937031350135803), u: [0. 0.], noise: [5.05643999 3.73236612]
Start training ...
Step: 4, L: 278.3238220214844, J: 7.854300498962402
Current x: (-0.00042431909424173886, 0.0004905072477601037, 0.010055015420913693), u: [0. 0.], noise: [5.78904945 6.48491883]
Start training ...
Step: 4, L: 186.

Step: 4, L: 271.1222229003906, J: 7.43111515045166
Current x: (-0.023732982176868762, 0.00561092496190576, 0.054941921877861004), u: [0. 0.], noise: [3.48441297 4.51535406]
Start training ...
Step: 4, L: 297.1275634765625, J: 8.233388900756836
Current x: (-0.02514565894015191, 0.005981796446356814, 0.0558478365421295), u: [0. 0.], noise: [2.90397812 6.00657684]
Start training ...
Step: 4, L: 235.63595581054688, J: 7.535651206970215
Current x: (-0.026602265853305305, 0.006170437554058866, 0.05665065708160399), u: [0. 0.], noise: [5.44215797 5.72600792]
Start training ...
Step: 4, L: 350.3431701660156, J: 8.362113952636719
Current x: (-0.028108610423190836, 0.006267744918984822, 0.05714321773052214), u: [0. 0.], noise: [4.73410399 5.91905806]
Start training ...
Step: 4, L: 266.8219909667969, J: 9.083419799804688
Current x: (-0.02967818954706799, 0.006499077185906086, 0.05760739336013792), u: [0. 0.], noise: [5.13484623 3.57571723]
Start training ...
Step: 4, L: 200.3040313720703, J: 7.83

Step: 4, L: 384.56988525390625, J: 8.548922538757324
Current x: (-0.14869283042696668, 0.021045315747603873, 0.08269990055561063), u: [0.         0.01769207], noise: [5.26251703 5.49070966]
Start training ...
Step: 4, L: 323.468994140625, J: 8.107484817504883
Current x: (-0.15304654355703506, 0.02054604288491889, 0.08333915302753446), u: [0.         0.01692605], noise: [5.47540801 4.52390859]
Start training ...
Step: 4, L: 311.48760986328125, J: 9.030552864074707
Current x: (-0.15748923057840405, 0.020139180742585666, 0.08395381700992582), u: [0.        0.0170721], noise: [5.30344874 5.51752958]
Start training ...
Step: 4, L: 271.5509033203125, J: 8.794748306274414
Current x: (-0.16201529552645516, 0.01974946656317072, 0.0846619383573532), u: [0.        0.0162415], noise: [4.01572941 3.87659511]
Start training ...
Step: 4, L: 247.83831787109375, J: 7.477832794189453
Current x: (-0.1666322431997734, 0.019458740249820757, 0.08534694440364836), u: [0.         0.01917496], noise: [3.525674

Step: 4, L: 233.72158813476562, J: 8.785924911499023
Current x: (-0.43453766460846366, 0.013609098636365051, 0.10944087617397305), u: [0. 0.], noise: [7.24413972 4.86459225]
Start training ...
Step: 4, L: 278.627685546875, J: 7.883303165435791
Current x: (-0.4432429964174038, 0.014596534671993355, 0.11062303204536435), u: [0. 0.], noise: [3.78304642 4.08830619]
Start training ...
Step: 4, L: 249.38919067382812, J: 8.178791999816895
Current x: (-0.4520805828743025, 0.015806599665689817, 0.11204314267635342), u: [0. 0.], noise: [6.84020934 5.3190481 ]
Start training ...
Step: 4, L: 405.7572021484375, J: 10.138916969299316
Current x: (-0.46100506712780565, 0.016817988522526764, 0.11343272736072536), u: [0. 0.], noise: [6.28381933 4.14386143]
Start training ...
Step: 4, L: 201.7740020751953, J: 7.471942901611328
Current x: (-0.4700655026629769, 0.01805667898619653, 0.1149744282007217), u: [0. 0.], noise: [5.83017702 5.14225625]
Start training ...
Step: 4, L: 306.6104736328125, J: 8.1935081

Current x: (-0.9575727212110607, 0.07264776652170227, 0.20269142880439756), u: [0. 0.], noise: [5.08599735 4.8339478 ]
Start training ...
Step: 4, L: 249.53201293945312, J: 9.045782089233398
Current x: (-0.9728371208190398, 0.07368876485093691, 0.2057937407016754), u: [0. 0.], noise: [4.59778495 6.44920694]
Start training ...
Step: 4, L: 297.094970703125, J: 10.009117126464844
Current x: (-0.9883012152514362, 0.0747204498674052, 0.20892125759124752), u: [0. 0.], noise: [4.8944008  5.82720154]
Start training ...
Step: 4, L: 413.6421203613281, J: 10.270294189453125
Simulation ends in 203 steps
Episode 5 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.5270153  4.14517714]
Start training ...
Step: 5, L: 178.73382568359375, J: 7.791377067565918
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.5674636 5.1009414]
Start training ...
Step: 5, L: 381.9685974121094, J: 9.411261558532715
Current x: (0.0, 8.621925735473629e-05, 0.0002381837844848633), u: [0. 0.], noise: [4.09993355 3.8

Step: 5, L: 277.796630859375, J: 8.648253440856934
Current x: (-0.01029280922133262, 0.01054684214117736, 0.019487099504470824), u: [0. 0.], noise: [6.13584611 2.93903877]
Start training ...
Step: 5, L: 282.66522216796875, J: 9.387518882751465
Current x: (-0.010957455404619886, 0.01192974766903972, 0.01928744349479675), u: [0. 0.], noise: [5.2177057  4.80250774]
Start training ...
Step: 5, L: 402.6419372558594, J: 10.402498245239258
Current x: (-0.011639784786225205, 0.013238969335794123, 0.019407468223571776), u: [0. 0.], noise: [5.35307156 5.43104689]
Start training ...
Step: 5, L: 330.88897705078125, J: 9.39014720916748
Current x: (-0.012341439400885371, 0.014569026037681614, 0.01956901273727417), u: [0. 0.], noise: [6.07127841 3.14780162]
Start training ...
Step: 5, L: 298.0390625, J: 9.289920806884766
Current x: (-0.013064021945744065, 0.01599629151935894, 0.01972275972366333), u: [0. 0.], noise: [4.06891642 4.85780223]
Start training ...
Step: 5, L: 367.88043212890625, J: 9.67091

Step: 5, L: 337.91070556640625, J: 10.597359657287598
Current x: (-0.06230267425100744, 0.05756234337972761, 0.05888790822029115), u: [0. 0.], noise: [5.45042061 5.50247219]
Start training ...
Step: 5, L: 312.62603759765625, J: 10.560754776000977
Current x: (-0.06431527648756859, 0.05857881768540844, 0.060489480257034316), u: [0. 0.], noise: [6.55829528 6.0942034 ]
Start training ...
Step: 5, L: 367.8494873046875, J: 11.028423309326172
Current x: (-0.06639234074397693, 0.05970768265599849, 0.06208584709167482), u: [0. 0.], noise: [4.31788042 5.02602161]
Start training ...
Step: 5, L: 306.91412353515625, J: 9.180834770202637
Current x: (-0.0685458926403247, 0.06111848339668705, 0.06372862310409548), u: [0. 0.], noise: [4.7997     5.23186624]
Start training ...
Step: 5, L: 217.38697814941406, J: 10.666357040405273
Current x: (-0.07075741968497286, 0.062480874099738196, 0.06530058503150943), u: [0. 0.], noise: [4.34472073 4.72546151]
Start training ...
Step: 5, L: 251.61569213867188, J: 9

Step: 5, L: 374.2633056640625, J: 10.609810829162598
Current x: (-0.2514656543737285, 0.13013050179314997, 0.10648624823093415), u: [0. 0.], noise: [4.4689198 4.013369 ]
Start training ...
Step: 5, L: 238.73948669433594, J: 9.435426712036133
Current x: (-0.2578486811638, 0.13206916634232477, 0.10647966606616975), u: [0. 0.], noise: [5.34538807 4.20108539]
Start training ...
Step: 5, L: 322.85784912109375, J: 10.877222061157227
Current x: (-0.2643218620535475, 0.13387025510068173, 0.10651863896846772), u: [0. 0.], noise: [4.3718764  4.34113325]
Start training ...
Step: 5, L: 185.003173828125, J: 8.379264831542969
Current x: (-0.27089650149897676, 0.1356395844633827, 0.1066720421075821), u: [0. 0.], noise: [7.05540722 4.19325794]
Start training ...
Step: 5, L: 321.3408508300781, J: 10.226417541503906
Current x: (-0.2775637753219614, 0.13729427640008585, 0.10682851955890657), u: [0. 0.], noise: [4.8552195  6.34521972]
Start training ...
Step: 5, L: 351.28570556640625, J: 10.38689422607421

Step: 5, L: 319.2309875488281, J: 10.628410339355469
Current x: (-0.6355384714506932, 0.21206001655952525, 0.11474559860229497), u: [0. 0.], noise: [4.46208333 5.15917206]
Start training ...
Step: 5, L: 322.74578857421875, J: 10.28864860534668
Current x: (-0.6468450919590923, 0.21419335385343888, 0.11466643862724309), u: [0. 0.], noise: [4.66247575 5.90692282]
Start training ...
Step: 5, L: 259.0636901855469, J: 9.31427001953125
Current x: (-0.6582618700395404, 0.21630148974296357, 0.11451756978034977), u: [0. 0.], noise: [5.10739526 5.74011957]
Start training ...
Step: 5, L: 403.5084533691406, J: 12.72298812866211
Current x: (-0.6697995782396046, 0.21847862460059067, 0.11424425621032719), u: [0. 0.], noise: [4.3232071  4.54390889]
Start training ...
Step: 5, L: 247.2910919189453, J: 11.085611343383789
Current x: (-0.6814612382084894, 0.22075240587462663, 0.11390767021179203), u: [0. 0.], noise: [4.61111241 5.47374897]
Start training ...
Step: 5, L: 497.6291198730469, J: 12.29631996154

Step: 6, L: 419.49658203125, J: 11.588072776794434
Current x: (-1.5820451581089915e-07, 0.0010074982269496068, -8.991267681121839e-05), u: [0. 0.], noise: [5.11216775 4.77322602]
Start training ...
Step: 6, L: 296.349609375, J: 10.905712127685547
Current x: (5.865658151540455e-07, 0.0007399165760905912, -0.00012695193290710463), u: [0. 0.], noise: [4.80813805 3.98591345]
Start training ...
Step: 6, L: 342.94366455078125, J: 10.234220504760742
Current x: (1.4202183704531168e-06, 0.0004798743308732074, -0.0001300970315933229), u: [0. 0.], noise: [4.7104145  4.42892567]
Start training ...
Step: 6, L: 257.41998291015625, J: 10.42486572265625
Current x: (2.365513104868283e-06, 0.0001182371956041404, -5.1019692420959634e-05), u: [0. 0.], noise: [6.16982178 4.67675115]
Start training ...
Step: 6, L: 398.6544189453125, J: 11.584229469299316
Current x: (3.429707932226173e-06, 0.0, 5.620653629302961e-05), u: [0. 0.], noise: [4.84362007 4.06560207]
Start training ...
Step: 6, L: 364.9622802734375

Step: 6, L: 342.37664794921875, J: 12.026721954345703
Current x: (-0.00175699526479026, 0.021670717685573035, 0.007024127125740047), u: [0. 0.], noise: [5.01670567 6.18316051]
Start training ...
Step: 6, L: 441.95867919921875, J: 11.69068431854248
Current x: (-0.0018819765313500198, 0.022501774257282153, 0.007855452179908748), u: [0. 0.], noise: [4.44585441 3.31844844]
Start training ...
Step: 6, L: 317.684326171875, J: 12.964472770690918
Current x: (-0.0020148246610144837, 0.02347178973407306, 0.008570131754875178), u: [0. 0.], noise: [4.31297404 6.08183229]
Start training ...
Step: 6, L: 355.2344665527344, J: 11.319961547851562
Current x: (-0.0021537719388316843, 0.02423721152801348, 0.009397551894187922), u: [0. 0.], noise: [4.66540743 6.14718082]
Start training ...
Step: 6, L: 398.3572998046875, J: 12.075502395629883
Current x: (-0.002301627593218759, 0.02506107573956011, 0.010048086190223689), u: [0. 0.], noise: [6.00192343 3.87350299]
Start training ...
Step: 6, L: 291.7349853515

Start training ...
Step: 6, L: 340.7701416015625, J: 11.126334190368652
Current x: (-0.01917129265756106, 0.08827532874670224, 0.008131261897087096), u: [0. 0.], noise: [6.24921208 4.91824575]
Start training ...
Step: 6, L: 321.7514953613281, J: 12.20382308959961
Current x: (-0.019888434700141684, 0.09154895491349978, 0.0076438484668731686), u: [0. 0.], noise: [2.68140035 4.94399085]
Start training ...
Step: 6, L: 354.1489562988281, J: 11.478638648986816
Current x: (-0.020614657194890078, 0.09495828992038319, 0.007289531683921813), u: [0. 0.], noise: [5.03679989 5.09047246]
Start training ...
Step: 6, L: 357.4555969238281, J: 12.405502319335938
Current x: (-0.021346708366213437, 0.09814914175104485, 0.006708955860137939), u: [0. 0.], noise: [5.81134583 3.89040949]
Start training ...
Step: 6, L: 285.368896484375, J: 10.790489196777344
Current x: (-0.022086141778915748, 0.10137169384026831, 0.006123012804985046), u: [0. 0.], noise: [6.9926142  5.59668064]
Start training ...
Step: 6, L: 3

Step: 6, L: 251.12879943847656, J: 11.978222846984863
Current x: (-0.05152676102504279, 0.24282175563917624, -0.0195001135110855), u: [0. 0.], noise: [4.01172975 7.69372028]
Start training ...
Step: 6, L: 252.04539489746094, J: 11.306222915649414
Current x: (-0.052092819739978896, 0.24710927046690348, -0.019944022917747488), u: [0. 0.], noise: [4.35040042 4.66984122]
Start training ...
Step: 6, L: 416.5028076171875, J: 13.534451484680176
Current x: (-0.05263605414100401, 0.251586107755021, -0.020756131386756888), u: [0. 0.], noise: [6.59545381 4.79115002]
Start training ...
Step: 6, L: 413.9398193359375, J: 13.825286865234375
Current x: (-0.05316129974384064, 0.2559837898264271, -0.021600183939933768), u: [0. 0.], noise: [6.30015408 5.47213701]
Start training ...
Step: 6, L: 254.09408569335938, J: 11.589361190795898
Current x: (-0.05366291285813332, 0.2605388870604173, -0.022263806128501883), u: [0. 0.], noise: [4.78124237 4.30075067]
Start training ...
Step: 6, L: 358.48699951171875, 

Step: 6, L: 270.95623779296875, J: 11.744194030761719
Current x: (-0.05007962205176398, 0.45824848143785124, -0.044775108194351175), u: [0. 0.], noise: [5.61209199 4.91752337]
Start training ...
Step: 6, L: 352.8999938964844, J: 11.642766952514648
Current x: (-0.04926266488234529, 0.4628900634349847, -0.04551641378402708), u: [0. 0.], noise: [4.70156504 4.83806873]
Start training ...
Step: 6, L: 437.86041259765625, J: 13.568696975708008
Current x: (-0.0483985769977468, 0.4676025516545893, -0.04618826251029966), u: [0. 0.], noise: [4.42271858 5.01457828]
Start training ...
Step: 6, L: 316.1338806152344, J: 12.20838737487793
Current x: (-0.047491083108403186, 0.4722870153312348, -0.0468737616062164), u: [0. 0.], noise: [4.80994255 5.655672  ]
Start training ...
Step: 6, L: 476.62744140625, J: 13.310843467712402
Current x: (-0.046540015481452895, 0.4769332022180644, -0.04761844668388364), u: [0. 0.], noise: [5.40482314 3.33286101]
Start training ...
Step: 6, L: 254.05270385742188, J: 11.6

Step: 6, L: 476.94805908203125, J: 12.80416488647461
Current x: (0.03705901051145002, 0.677700263413511, -0.09412923800945278), u: [0. 0.], noise: [6.62331712 3.29460934]
Start training ...
Step: 6, L: 384.7598876953125, J: 11.504026412963867
Current x: (0.040745000698035445, 0.6832938645564578, -0.09495888123512264), u: [0. 0.], noise: [3.27506887 5.83978287]
Start training ...
Step: 6, L: 295.9046630859375, J: 11.670576095581055
Current x: (0.04452420977321672, 0.688893867824476, -0.0954556536674499), u: [0. 0.], noise: [4.64781176 6.0607857 ]
Start training ...
Step: 6, L: 312.3148193359375, J: 11.957304000854492
Current x: (0.04838984243399978, 0.694420249762618, -0.09620889749526973), u: [0. 0.], noise: [3.05211008 4.69673662]
Start training ...
Step: 6, L: 437.17474365234375, J: 13.370295524597168
Current x: (0.052357539546085, 0.7000316164023888, -0.09710343871116633), u: [0. 0.], noise: [5.97449247 6.27594849]
Start training ...
Step: 6, L: 317.1641845703125, J: 12.029714584350

Step: 6, L: 511.4937744140625, J: 15.20476245880127
Current x: (0.3455345907437268, 0.9607252453291864, -0.15488760724067682), u: [0. 0.], noise: [3.59660818 3.77170326]
Start training ...
Step: 6, L: 337.7060546875, J: 12.220296859741211
Current x: (0.354969264836789, 0.9661159539708727, -0.1564975129604339), u: [0. 0.], noise: [5.8615918  5.40129717]
Start training ...
Step: 6, L: 356.4483337402344, J: 12.62135124206543
Current x: (0.3645176091721008, 0.9712536730559348, -0.1581249281883239), u: [0. 0.], noise: [4.71942141 4.97083751]
Start training ...
Step: 6, L: 377.7980041503906, J: 13.552993774414062
Current x: (0.3742414963146978, 0.9765229169220409, -0.15970631394386284), u: [0. 0.], noise: [4.68157351 5.12616347]
Start training ...
Step: 6, L: 387.6148376464844, J: 11.918108940124512
Current x: (0.38411797286996935, 0.9817680973911124, -0.16131284132003776), u: [0. 0.], noise: [3.28438073 6.06818104]
Start training ...
Step: 6, L: 342.8443298339844, J: 14.174447059631348
Curr

Step: 6, L: 364.84796142578125, J: 14.89859390258789
Simulation ends in 327 steps
Episode 7 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [4.08648657 5.01720467]
Start training ...
Step: 7, L: 253.98153686523438, J: 11.478860855102539
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.5400896 4.2909073]
Start training ...
Step: 7, L: 421.916015625, J: 15.676395416259766
Current x: (0.0, 0.0, -9.307184219360353e-05), u: [0. 0.], noise: [4.35976115 4.512032  ]
Start training ...
Step: 7, L: 336.869873046875, J: 14.796161651611328
Current x: (0.0, 3.146885681152331e-05, 3.8774538040161136e-05), u: [0. 0.], noise: [4.73916597 5.75754931]
Start training ...
Step: 7, L: 454.2883605957031, J: 13.975125312805176
Current x: (8.257141863980587e-08, 0.0, 0.00015539383888244632), u: [0. 0.], noise: [2.65918076 3.79125209]
Start training ...
Step: 7, L: 320.23486328125, J: 15.669897079467773
Current x: (1.2444230766763804e-07, 6.319781440168037e-06, 0.000170174789428711), u: [0. 0.], nois

Step: 7, L: 348.4376220703125, J: 12.35371208190918
Current x: (0.009381443254488867, 0.001349547520095035, -0.05085929951667787), u: [0. 0.], noise: [5.59772437 5.62769411]
Start training ...
Step: 7, L: 214.16802978515625, J: 10.017228126525879
Current x: (0.010201856877752496, 0.0010199860647969324, -0.05205578367710115), u: [0. 0.], noise: [4.9850853  4.11238597]
Start training ...
Step: 7, L: 359.6885986328125, J: 13.542587280273438
Current x: (0.011079337581409353, 0.0008305149098192602, -0.05325526480674745), u: [0. 0.], noise: [6.50223149 7.44291866]
Start training ...
Step: 7, L: 393.5732116699219, J: 13.206584930419922
Current x: (0.012004154499130143, 0.000568558538404158, -0.05436747596263887), u: [0. 0.], noise: [4.36037405 3.26535214]
Start training ...
Step: 7, L: 366.12701416015625, J: 14.13228988647461
Current x: (0.013003201585187833, 0.0007181401636069, -0.055573755836486834), u: [0. 0.], noise: [5.84920547 5.67449007]
Start training ...
Step: 7, L: 367.6959533691406

Step: 7, L: 358.50433349609375, J: 14.080253601074219
Current x: (0.1119314789423022, 0.0027067368826387207, -0.10005603468418131), u: [0. 0.], noise: [5.0698791  5.28741285]
Start training ...
Step: 7, L: 282.6874694824219, J: 14.141532897949219
Current x: (0.11616206565999904, 0.0031843979387186886, -0.10089118046760569), u: [0. 0.], noise: [3.7971479 4.332733 ]
Start training ...
Step: 7, L: 402.12945556640625, J: 14.480386734008789
Current x: (0.12049611051085796, 0.0037116080847145456, -0.101748079609871), u: [0. 0.], noise: [4.64589713 5.3171832 ]
Start training ...
Step: 7, L: 469.430908203125, J: 14.419820785522461
Current x: (0.12491203960768964, 0.0040666721146603545, -0.1026585372686387), u: [0. 0.], noise: [5.59760662 5.53433627]
Start training ...
Step: 7, L: 288.2537841796875, J: 13.263941764831543
Current x: (0.12942916630260476, 0.004431891313040116, -0.10363612353801736), u: [0. 0.], noise: [4.1692297  3.54854741]
Start training ...
Step: 7, L: 409.5852966308594, J: 14

Step: 7, L: 377.56280517578125, J: 15.611422538757324
Current x: (0.40734068877047236, 0.0, -0.12211728138923661), u: [0. 0.], noise: [4.45475225 5.8705113 ]
Start training ...
Step: 7, L: 417.498046875, J: 13.978314399719238
Current x: (0.41659075304954946, 0.0, -0.1218196145534517), u: [0. 0.], noise: [4.28438943 4.88991158]
Start training ...
Step: 7, L: 236.25405883789062, J: 13.162063598632812
Current x: (0.4259665934916356, 0.0, -0.12166352362632767), u: [0. 0.], noise: [4.63356356 1.77313665]
Start training ...
Step: 7, L: 381.6962890625, J: 15.1395263671875
Current x: (0.43545391869920425, 0.0, -0.12156798491477981), u: [0. 0.], noise: [5.14698042 3.21614124]
Start training ...
Step: 7, L: 367.16156005859375, J: 15.263848304748535
Current x: (0.44501899792698124, 0.0, -0.12118640351295486), u: [0. 0.], noise: [6.0771117  5.39197204]
Start training ...
Step: 7, L: 253.42303466796875, J: 11.932832717895508
Current x: (0.4546854956935656, 0.0, -0.1206117382049562), u: [0. 0.], noi

Step: 8, L: 339.4269104003906, J: 14.899297714233398
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [3.62082764 4.97648054]
Start training ...
Step: 8, L: 532.81982421875, J: 18.080217361450195
Current x: (0.0, 0.0, 0.00016078391075134279), u: [0. 0.], noise: [4.42945914 4.52504232]
Start training ...
Step: 8, L: 458.83697509765625, J: 16.77124786376953
Current x: (0.0, 0.0, 0.0001860025405883789), u: [0. 0.], noise: [5.23603221 4.15412658]
Start training ...
Step: 8, L: 281.7922058105469, J: 13.763599395751953
Current x: (-1.4397398611844212e-07, 0.0, 0.00020166282653808593), u: [0. 0.], noise: [3.09043297 6.17437293]
Start training ...
Step: 8, L: 351.89556884765625, J: 12.428765296936035
Current x: (-4.626073078331886e-07, 0.0, 0.0003255136489868164), u: [0. 0.], noise: [4.01666498 5.93966688]
Start training ...
Step: 8, L: 374.1711120605469, J: 14.586957931518555
Current x: (-9.680773206512112e-07, 0.0, 0.00014097044467926027), u: [0. 0.], noise: [6.50406262 3.23491307]
Start train

Step: 8, L: 432.64105224609375, J: 13.81910514831543
Current x: (-0.004706346457378382, 0.04135436271260292, 0.026781135177612297), u: [0. 0.], noise: [5.95466973 4.9787798 ]
Start training ...
Step: 8, L: 363.984619140625, J: 16.510009765625
Current x: (-0.00509604246696907, 0.042310246950604063, 0.027687851953506464), u: [0. 0.], noise: [3.743893   4.38415136]
Start training ...
Step: 8, L: 360.7554016113281, J: 14.845184326171875
Current x: (-0.005515015996043223, 0.04337808409708584, 0.028692157745361324), u: [0. 0.], noise: [3.83255387 6.03731748]
Start training ...
Step: 8, L: 324.7598571777344, J: 13.500982284545898
Current x: (-0.0059564914580488675, 0.04427741412141298, 0.02963243768215179), u: [0. 0.], noise: [5.88017093 5.41987325]
Start training ...
Step: 8, L: 429.26898193359375, J: 15.257019996643066
Current x: (-0.006426281824654234, 0.04518232502396859, 0.0303522412776947), u: [0. 0.], noise: [4.10748775 3.05325004]
Start training ...
Step: 8, L: 356.26837158203125, J: 

Step: 8, L: 274.04876708984375, J: 14.134796142578125
Current x: (-0.0540363095249626, 0.0684453690629434, 0.05084911973476408), u: [0. 0.], noise: [4.75345086 4.53145375]
Start training ...
Step: 8, L: 296.72796630859375, J: 14.425867080688477
Current x: (-0.05614046245182145, 0.069084284942345, 0.05126222529411314), u: [0. 0.], noise: [5.974891   3.73548723]
Start training ...
Step: 8, L: 314.5732727050781, J: 15.210972785949707
Current x: (-0.05829180795739199, 0.06966949116060395, 0.051697530579566935), u: [0. 0.], noise: [4.80349658 5.96970872]
Start training ...
Step: 8, L: 312.5545654296875, J: 15.448282241821289
Current x: (-0.060492909226572, 0.07024345966862247, 0.05235677626132963), u: [0. 0.], noise: [5.5778892  6.65689967]
Start training ...
Step: 8, L: 363.4654541015625, J: 14.307050704956055
Current x: (-0.06274968049889774, 0.07091230933556338, 0.05289940068721769), u: [0. 0.], noise: [4.70720296 4.28269386]
Start training ...
Step: 8, L: 371.4219055175781, J: 16.443412

Step: 8, L: 362.4122009277344, J: 15.98696517944336
Current x: (-0.1975901391184672, 0.100248049554191, 0.09394782977104187), u: [0. 0.], noise: [5.04099    6.10150235]
Start training ...
Step: 8, L: 380.0149841308594, J: 13.912818908691406
Current x: (-0.20256184874838856, 0.10174360270966831, 0.09582740290164947), u: [0. 0.], noise: [5.18342689 5.56797571]
Start training ...
Step: 8, L: 319.9455261230469, J: 15.014113426208496
Current x: (-0.20763808575359682, 0.10336749141992764, 0.09760092477798461), u: [0. 0.], noise: [6.67367659 6.98225292]
Start training ...
Step: 8, L: 359.61895751953125, J: 15.159984588623047
Current x: (-0.21281719304104324, 0.10508058764906039, 0.09933599178791046), u: [0. 0.], noise: [4.93800606 6.81860011]
Start training ...
Step: 8, L: 381.3509521484375, J: 15.220489501953125
Current x: (-0.21812937195681387, 0.10717177771774025, 0.10104020113945007), u: [0. 0.], noise: [5.02503096 5.17042523]
Start training ...
Step: 8, L: 322.31671142578125, J: 16.81630

Step: 8, L: 328.51025390625, J: 14.447265625
Current x: (-0.5160738904156432, 0.19021111204059543, 0.15102443752288816), u: [0. 0.], noise: [5.0139534  4.77141606]
Start training ...
Step: 8, L: 269.6021728515625, J: 13.29787540435791
Current x: (-0.526259200287298, 0.1913045269967908, 0.15168651936054228), u: [0. 0.], noise: [4.45585146 5.70500834]
Start training ...
Step: 8, L: 323.36944580078125, J: 13.560079574584961
Current x: (-0.5365917320155603, 0.19238434071278498, 0.1523728549003601), u: [0. 0.], noise: [5.92135105 4.71666764]
Start training ...
Step: 8, L: 398.508544921875, J: 15.457490921020508
Current x: (-0.547077799928561, 0.19348757337120281, 0.15293427474498747), u: [0. 0.], noise: [5.79005987 7.45052135]
Start training ...
Step: 8, L: 278.02130126953125, J: 15.235980033874512
Current x: (-0.5577253358579587, 0.19466128236256505, 0.15361616291999816), u: [0. 0.], noise: [3.19003818 5.00259586]
Start training ...
Step: 8, L: 269.9896545410156, J: 14.643962860107422
Curr

Current x: (6.178767500214603e-06, 0.001276811737375039, -0.002252546548843384), u: [0. 0.], noise: [5.88929493 4.97245675]
Start training ...
Step: 9, L: 354.4267578125, J: 16.71771240234375
Current x: (1.0923921641065424e-05, 0.001617457565842543, -0.0026690055608749393), u: [0. 0.], noise: [5.38205768 6.82182856]
Start training ...
Step: 9, L: 376.05810546875, J: 16.181884765625
Current x: (1.811573381114728e-05, 0.0020632757943429805, -0.002993780755996704), u: [0. 0.], noise: [6.23358559 3.55783869]
Start training ...
Step: 9, L: 442.84765625, J: 16.131668090820312
Current x: (2.8564766082439215e-05, 0.002748478279282005, -0.0034625330209732057), u: [0. 0.], noise: [5.33025309 5.5433921 ]
Start training ...
Step: 9, L: 272.20404052734375, J: 15.023337364196777
Current x: (4.194513158884703e-05, 0.00343181875609398, -0.0036637106180191044), u: [0. 0.], noise: [6.91223239 5.65548994]
Start training ...
Step: 9, L: 297.74298095703125, J: 13.439855575561523
Current x: (5.9090525330022

Step: 9, L: 310.8634033203125, J: 16.777481079101562
Current x: (0.009848487254679858, 0.06798969011195993, -0.04034972014427186), u: [0. 0.], noise: [4.84464203 4.99178736]
Start training ...
Step: 9, L: 394.61505126953125, J: 16.088390350341797
Current x: (0.010572228177410747, 0.07029056370110459, -0.04188995730876923), u: [0. 0.], noise: [4.38697455 4.26749274]
Start training ...
Step: 9, L: 338.16363525390625, J: 13.721426963806152
Current x: (0.011335648049355388, 0.07259327962396792, -0.043444909000396735), u: [0. 0.], noise: [3.38497846 6.80032865]
Start training ...
Step: 9, L: 272.08831787109375, J: 15.738484382629395
Current x: (0.012135310843235216, 0.07477968299148349, -0.044987912535667425), u: [0. 0.], noise: [5.49525214 6.1286142 ]
Start training ...
Step: 9, L: 249.1980743408203, J: 14.15849494934082
Current x: (0.0129792096941777, 0.07700265604236754, -0.046872451090812686), u: [0. 0.], noise: [5.64053832 5.52747868]
Start training ...
Step: 9, L: 413.10791015625, J: 

Step: 9, L: 546.8441772460938, J: 17.875410079956055
Current x: (0.11274380309151251, 0.1955527803989996, -0.09493077132701874), u: [0. 0.], noise: [4.87559719 4.69381993]
Start training ...
Step: 9, L: 335.9759521484375, J: 14.586261749267578
Current x: (0.1169174188442545, 0.19862131637556074, -0.09566815052032471), u: [0. 0.], noise: [5.00550657 5.74672506]
Start training ...
Step: 9, L: 262.88818359375, J: 15.069363594055176
Current x: (0.12118174142790206, 0.20166148538114773, -0.09638735201358795), u: [0. 0.], noise: [4.59774112 4.27626337]
Start training ...
Step: 9, L: 333.8399658203125, J: 16.948638916015625
Current x: (0.1255487717848402, 0.204790960864644, -0.09718067536354065), u: [0. 0.], noise: [6.14018862 5.20997363]
Start training ...
Step: 9, L: 387.72869873046875, J: 13.808030128479004
Current x: (0.13000120393867282, 0.20782271776853384, -0.09794185092449188), u: [0. 0.], noise: [4.5809376 5.9822339]
Start training ...
Step: 9, L: 454.2491455078125, J: 12.84865570068

Step: 9, L: 278.1508483886719, J: 14.994258880615234
Current x: (0.4215904144417284, 0.3555042892510447, -0.10717106075286861), u: [0. 0.], noise: [4.03972239 5.02195504]
Start training ...
Step: 9, L: 310.0064697265625, J: 15.360147476196289
Current x: (0.4307918041914578, 0.35951863753500857, -0.1070656400442123), u: [0. 0.], noise: [3.97457934 4.47525516]
Start training ...
Step: 9, L: 276.48919677734375, J: 15.72091293334961
Current x: (0.44009012310702017, 0.3634529546344027, -0.10705844259262082), u: [0. 0.], noise: [6.36387975 6.0596375 ]
Start training ...
Step: 9, L: 319.04608154296875, J: 15.597140312194824
Current x: (0.44947873797776616, 0.36724641679623554, -0.10710131270885465), u: [0. 0.], noise: [4.68308522 6.11158474]
Start training ...
Step: 9, L: 326.4180908203125, J: 13.30994987487793
Current x: (0.45900010316308587, 0.3712941178648873, -0.10711375861167904), u: [0. 0.], noise: [5.19956382 4.46477764]
Start training ...
Step: 9, L: 408.8882751464844, J: 14.477570533

Step: 9, L: 287.616455078125, J: 14.68355655670166
Current x: (0.9972274460728454, 0.5947077990167238, -0.08624523832798), u: [0. 0.], noise: [4.7975551  4.73474378]
Start training ...
Step: 9, L: 405.2621154785156, J: 15.09228515625
Simulation ends in 199 steps
Episode 10 begins...
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [4.27619267 6.02170871]
Start training ...
Step: 10, L: 330.6707458496094, J: 15.454940795898438
Current x: (0.0, 0.0, 0.0), u: [0. 0.], noise: [6.04734333 3.7516209 ]
Start training ...
Step: 10, L: 372.0626220703125, J: 16.81564712524414
Current x: (0.0, 4.8790115356445315e-05, -0.0001745515823364258), u: [0. 0.], noise: [5.68370615 6.31154541]
Start training ...
Step: 10, L: 377.4260559082031, J: 15.500476837158203
Current x: (0.0, 9.647668075561514e-05, -0.00011953094005584717), u: [0. 0.], noise: [4.87879338 6.19396267]
Start training ...
Step: 10, L: 295.4798889160156, J: 14.305312156677246
Current x: (2.093790146372523e-07, 0.00036268839343892706, -0.000

Step: 10, L: 366.2987365722656, J: 16.561424255371094
Current x: (0.005572167602944431, 0.06498383601222742, -0.03249442336559296), u: [0. 0.], noise: [6.85321315 5.0715145 ]
Start training ...
Step: 10, L: 390.9822692871094, J: 14.805502891540527
Current x: (0.006063624001282003, 0.06645645016441899, -0.033672077107429504), u: [0. 0.], noise: [4.18796662 4.89808934]
Start training ...
Step: 10, L: 398.3594665527344, J: 15.760087013244629
Current x: (0.006593822298225534, 0.06813990765274704, -0.03467156097888947), u: [0. 0.], noise: [5.56088132 6.31634143]
Start training ...
Step: 10, L: 177.6912841796875, J: 12.582918167114258
Current x: (0.007154609451086029, 0.06975045567281471, -0.035742057108879094), u: [0. 0.], noise: [4.0925606  3.97504401]
Start training ...
Step: 10, L: 278.41156005859375, J: 14.841048240661621
Current x: (0.007756568536770454, 0.07156701207989907, -0.036888099265098576), u: [0. 0.], noise: [5.02993516 5.67428214]
Start training ...
Step: 10, L: 479.380615234

Step: 10, L: 262.852783203125, J: 14.747062683105469
Current x: (0.08173261292496056, 0.13031140916189374, -0.07613260560035705), u: [0. 0.], noise: [6.69202266 4.35147561]
Start training ...
Step: 10, L: 362.54241943359375, J: 15.432055473327637
Current x: (0.08495764501245284, 0.1322239075262553, -0.07650302615165709), u: [0. 0.], noise: [5.41945106 5.97779751]
Start training ...
Step: 10, L: 262.0718994140625, J: 14.065608024597168
Current x: (0.08826667293795873, 0.1342565568336136, -0.07663939199447631), u: [0. 0.], noise: [6.14234035 5.05677307]
Start training ...
Step: 10, L: 391.13018798828125, J: 18.133739471435547
Current x: (0.0916628082418566, 0.13644459744807885, -0.07683159246444701), u: [0. 0.], noise: [4.13782986 3.50139982]
Start training ...
Step: 10, L: 313.5960693359375, J: 16.150144577026367
Current x: (0.0951446888768901, 0.13876826210290236, -0.07691523623466491), u: [0. 0.], noise: [4.11617767 3.87973607]
Start training ...
Step: 10, L: 341.2372741699219, J: 12.

Step: 10, L: 339.155517578125, J: 15.275355339050293
Current x: (0.2921250260638049, 0.21900929758522192, -0.09252546272277835), u: [0. 0.], noise: [4.8277599  5.36521387]
Start training ...
Step: 10, L: 373.0069580078125, J: 18.004276275634766
Current x: (0.29891476250226834, 0.22135004955141022, -0.0925623047590256), u: [0. 0.], noise: [2.60460658 5.60706864]
Start training ...
Step: 10, L: 275.5675048828125, J: 12.226723670959473
Current x: (0.30579867539732697, 0.22372473895563827, -0.0926528922080994), u: [0. 0.], noise: [5.74875252 3.95574717]
Start training ...
Step: 10, L: 395.5583801269531, J: 16.98408317565918
Current x: (0.3127584889629261, 0.22593608064355492, -0.09304372584819796), u: [0. 0.], noise: [4.72812214 5.0937981 ]
Start training ...
Step: 10, L: 279.1308898925781, J: 16.13471221923828
Current x: (0.31980808893805046, 0.22813270988787757, -0.09325525894165042), u: [0. 0.], noise: [5.18827794 4.6930064 ]
Start training ...
Step: 10, L: 226.50503540039062, J: 14.687

Current x: (0.6812879171682457, 0.30492591098153526, -0.12356309325695038), u: [0. 0.], noise: [6.96953704 4.5728214 ]
Start training ...
Step: 10, L: 349.5049743652344, J: 16.945573806762695
Current x: (0.6925941481184634, 0.30705437475767733, -0.12459596502780915), u: [0. 0.], noise: [4.19859214 3.69534474]
Start training ...
Step: 10, L: 373.3363342285156, J: 17.684030532836914
Current x: (0.7040426373774201, 0.3093472742358885, -0.12538916523456572), u: [0. 0.], noise: [3.54420233 5.33684658]
Start training ...
Step: 10, L: 300.6106262207031, J: 15.805702209472656
Current x: (0.7155892276246493, 0.31144244801293025, -0.12613204069137574), u: [0. 0.], noise: [4.13783526 4.67899114]
Start training ...
Step: 10, L: 326.425048828125, J: 17.323575973510742
Current x: (0.7272468850180503, 0.3134377541641333, -0.12705418055057527), u: [0. 0.], noise: [4.96109842 6.27777916]
Start training ...
Step: 10, L: 300.8116455078125, J: 14.27580738067627
Current x: (0.7390154561952812, 0.3153267387

Step: 11, L: 400.15203857421875, J: 17.499404907226562
Current x: (-0.00022028839918920908, 0.0, 0.007913232588768006), u: [0. 0.], noise: [5.36335241 4.06374876]
Start training ...
Step: 11, L: 373.5450134277344, J: 17.70889663696289
Current x: (-0.0002726500998552174, 0.0, 0.00814713809490204), u: [0. 0.], noise: [3.66554108 2.4739715 ]
Start training ...
Step: 11, L: 330.829345703125, J: 13.003111839294434
Current x: (-0.0003324716070581855, 0.0, 0.008511003947257996), u: [0. 0.], noise: [4.31107504 6.44948691]
Start training ...
Step: 11, L: 294.0463562011719, J: 13.404787063598633
Current x: (-0.00039729500496406696, 0.0, 0.00899402675628662), u: [0. 0.], noise: [4.01186418 4.55223099]
Start training ...
Step: 11, L: 367.061767578125, J: 16.043018341064453
Current x: (-0.0004712766108204134, 0.0, 0.009263208413124084), u: [0. 0.], noise: [4.98586476 4.07658543]
Start training ...
Step: 11, L: 288.1064453125, J: 15.194845199584961
Current x: (-0.0005529606823771952, 0.0, 0.00947835

Step: 11, L: 203.60296630859375, J: 13.291013717651367
Current x: (-0.014071133434930716, 5.892725028327051e-05, 0.006680345988273629), u: [0. 0.], noise: [4.97139051 5.15490761]
Start training ...
Step: 11, L: 407.33355712890625, J: 14.479867935180664
Current x: (-0.014622467243071034, 0.00031489031799831243, 0.006408161377906807), u: [0. 0.], noise: [5.65845263 5.94969468]
Start training ...
Step: 11, L: 411.283447265625, J: 17.78158187866211
Current x: (-0.015180565718922812, 0.0006024606809108147, 0.006117625069618233), u: [0. 0.], noise: [4.96543476 3.29052666]
Start training ...
Step: 11, L: 394.57257080078125, J: 16.77313232421875
Current x: (-0.01574610283157802, 0.0010698218764972685, 0.0057979645490646445), u: [0. 0.], noise: [2.52870763 5.01047628]
Start training ...
Step: 11, L: 334.27984619140625, J: 15.765047073364258
Current x: (-0.016316690600383887, 0.0013817637648403831, 0.005645794820785531), u: [0. 0.], noise: [5.50394977 4.36023044]
Start training ...
Step: 11, L: 

Step: 11, L: 300.62652587890625, J: 16.224870681762695
Current x: (-0.039121148949347956, 0.0006016848084173129, -0.014502450346946699), u: [0. 0.], noise: [6.01304509 3.47951327]
Start training ...
Step: 11, L: 380.29229736328125, J: 18.243743896484375
Current x: (-0.03953656641421995, 0.0008983538061987413, -0.01483086099624632), u: [0. 0.], noise: [4.2358683  4.42843793]
Start training ...
Step: 11, L: 368.97015380859375, J: 15.899271965026855
Current x: (-0.03993821782585112, 0.001163178829409708, -0.014905918431282025), u: [0. 0.], noise: [4.50367444 3.23303928]
Start training ...
Step: 11, L: 295.4841613769531, J: 11.688234329223633
Current x: (-0.0403270197958014, 0.0013133392307948975, -0.015000232791900615), u: [0. 0.], noise: [3.01533164 4.94409684]
Start training ...
Step: 11, L: 257.7391662597656, J: 14.978793144226074
Current x: (-0.04070428991018819, 0.0012560850727656105, -0.014967483639717082), u: [0. 0.], noise: [5.18656295 5.81394924]
Start training ...
Step: 11, L: 3

Step: 11, L: 250.9210205078125, J: 13.169126510620117
Current x: (-0.038915500103708006, 0.003579782003983657, -0.021124573636054956), u: [0. 0.], noise: [6.02328779 5.0484783 ]
Start training ...
Step: 11, L: 294.4147644042969, J: 16.33728790283203
Current x: (-0.038344962832627415, 0.004066246993924057, -0.019958367514610255), u: [0. 0.], noise: [5.46939641 5.45863481]
Start training ...
Step: 11, L: 312.4511413574219, J: 14.690016746520996
Current x: (-0.03775103866762546, 0.004678641545562514, -0.018694680428504907), u: [0. 0.], noise: [4.84368024 6.20310415]
Start training ...
Step: 11, L: 355.8570251464844, J: 17.531822204589844
Current x: (-0.037135305382855456, 0.0054026216449012026, -0.01742991716861721), u: [0. 0.], noise: [3.31199397 5.43310634]
Start training ...
Step: 11, L: 264.3325500488281, J: 17.03045654296875
Current x: (-0.036498921690576204, 0.0062500871523273865, -0.016301096272468528), u: [0. 0.], noise: [4.73547708 3.76750757]
Start training ...
Step: 11, L: 264.

Step: 11, L: 264.3072509765625, J: 15.228362083435059
Current x: (-0.004200749524275219, 0.028056123412235626, -0.004657770156860288), u: [0. 0.], noise: [3.77239962 5.3933381 ]
Start training ...
Step: 11, L: 372.16339111328125, J: 11.863430976867676
Current x: (-0.003254506508224308, 0.0283873247323055, -0.004646514773368771), u: [0. 0.], noise: [4.50756408 5.71683224]
Start training ...
Step: 11, L: 320.45648193359375, J: 19.98581314086914
Current x: (-0.002303994317468467, 0.028654089920517524, -0.004797353243827755), u: [0. 0.], noise: [3.32964213 4.43941651]
Start training ...
Step: 11, L: 267.3721618652344, J: 14.666149139404297
Current x: (-0.0013487313632165566, 0.028962283646658617, -0.005069118523597651), u: [0. 0.], noise: [4.43611304 5.74739543]
Start training ...
Step: 11, L: 397.78619384765625, J: 15.387669563293457
Current x: (-0.0003897413315919453, 0.02906637425548311, -0.005451861238479547), u: [0. 0.], noise: [3.3733003  3.79386372]
Start training ...
Step: 11, L: 2

Step: 11, L: 258.5744323730469, J: 13.552515029907227
Current x: (0.051265476176546476, 0.04609107163007038, -0.060930164003372075), u: [0. 0.], noise: [4.82203602 4.7448218 ]
Start training ...
Step: 11, L: 320.0400390625, J: 16.92761993408203
Current x: (0.053256121617590815, 0.04711901651992937, -0.06285080597400654), u: [0. 0.], noise: [5.80680948 4.04652681]
Start training ...
Step: 11, L: 381.31561279296875, J: 16.379711151123047
Current x: (0.05530502202237558, 0.04812087194744104, -0.06476372656822194), u: [0. 0.], noise: [3.86081181 4.12268178]
Start training ...
Step: 11, L: 371.72137451171875, J: 14.928862571716309
Current x: (0.05741581067598285, 0.04912511550474275, -0.06650061891078937), u: [0. 0.], noise: [4.83475453 5.18965771]
Start training ...
Step: 11, L: 316.2503967285156, J: 12.981679916381836
Current x: (0.05957826727098263, 0.049945034705596816, -0.06826369824409473), u: [0. 0.], noise: [6.27309835 5.0704953 ]
Start training ...
Step: 11, L: 282.44439697265625, 

Step: 11, L: 441.41656494140625, J: 16.732118606567383
Current x: (0.24145335088775335, 0.078019396306616, -0.11265035731792436), u: [0. 0.], noise: [3.72214645 4.07931676]
Start training ...
Step: 11, L: 276.11248779296875, J: 15.839478492736816
Current x: (0.24777998421400213, 0.07905226454534128, -0.11345967738628374), u: [0. 0.], noise: [6.07054227 5.72716961]
Start training ...
Step: 11, L: 356.52899169921875, J: 15.79273509979248
Current x: (0.2541943155433788, 0.07987933426077314, -0.11430471446514116), u: [0. 0.], noise: [4.90621183 3.11461589]
Start training ...
Step: 11, L: 183.46121215820312, J: 15.661576271057129
Current x: (0.2607422163297545, 0.08089758969589247, -0.1151154142618178), u: [0. 0.], noise: [3.82800307 5.55155893]
Start training ...
Step: 11, L: 318.93145751953125, J: 14.55292797088623
Current x: (0.26738159944886947, 0.08173169382460839, -0.11574695446491229), u: [0. 0.], noise: [3.61709288 5.05091949]
Start training ...
Step: 11, L: 348.4405517578125, J: 15

Step: 11, L: 203.197509765625, J: 15.532232284545898
Current x: (0.6243360079155912, 0.09721963637926893, -0.18456305830478656), u: [0. 0.], noise: [4.0905014  5.89646016]
Start training ...
Step: 11, L: 314.68804931640625, J: 15.532493591308594
Current x: (0.6363353562447742, 0.09644346427587851, -0.18711802906990038), u: [0. 0.], noise: [5.26448119 4.19247362]
Start training ...
Step: 11, L: 346.53082275390625, J: 11.315184593200684
Current x: (0.648517982322946, 0.09566802698379656, -0.1898535957098006), u: [0. 0.], noise: [4.62131384 5.98129014]
Start training ...
Step: 11, L: 294.60101318359375, J: 19.856201171875
Current x: (0.660876534249326, 0.09484077754481038, -0.19248196158409106), u: [0. 0.], noise: [5.10761056 3.63265291]
Start training ...
Step: 11, L: 435.09320068359375, J: 17.66094970703125
Current x: (0.6734351733461893, 0.09407373760512322, -0.19524632508754716), u: [0. 0.], noise: [5.71311507 4.32210694]
Start training ...
Step: 11, L: 298.839599609375, J: 15.4628095

Current x: (3.086701182020344e-05, 0.0, -0.003615290141105652), u: [0. 0.], noise: [4.7416061 3.9402949]
Start training ...
Step: 12, L: 298.7792053222656, J: 12.078859329223633
Current x: (4.533272188184528e-05, 0.0, -0.004063006448745727), u: [0. 0.], noise: [4.35494323 5.59576065]
Start training ...
Step: 12, L: 324.42529296875, J: 16.796417236328125
Current x: (6.2937184207273e-05, 0.0, -0.0044305916309356685), u: [0. 0.], noise: [5.69707639 6.03712165]
Start training ...
Step: 12, L: 328.7657470703125, J: 14.023895263671875
Current x: (8.458461309472642e-05, 0.0, -0.004922258567810058), u: [0. 0.], noise: [5.01770487 3.99889595]
Start training ...
Step: 12, L: 246.51583862304688, J: 16.168237686157227
Current x: (0.00011143096874843748, 0.0, -0.00544793004989624), u: [0. 0.], noise: [4.00853051 4.28347831]
Start training ...
Step: 12, L: 282.96563720703125, J: 17.137191772460938
Current x: (0.00014271551044016845, 0.0, -0.0058717206239700314), u: [0. 0.], noise: [5.28982046 6.0110

Current x: (0.02328903852246947, 0.0, -0.07552989280223844), u: [0.00037427 0.        ], noise: [4.71183042 5.41503565]
Start training ...
Step: 12, L: 309.26263427734375, J: 15.567320823669434
Current x: (0.024809897782870526, 0.0, -0.07815552995204923), u: [0.00072856 0.        ], noise: [6.06627507 4.65599829]
Start training ...
Step: 12, L: 303.30523681640625, J: 17.686491012573242
Current x: (0.026407175273405928, 0.0, -0.08085145022869107), u: [5.49815e-05 0.00000e+00], noise: [5.36677066 6.33739659]
Start training ...
Step: 12, L: 277.2004699707031, J: 13.326303482055664
Current x: (0.028088173657626003, 0.0, -0.08340626995563503), u: [0. 0.], noise: [5.06369699 4.09839271]
Start training ...
Step: 12, L: 310.843017578125, J: 15.845345497131348
Current x: (0.029863699306502286, 0.0, -0.08605814678668972), u: [0. 0.], noise: [4.3342612 5.7371154]
Start training ...
Step: 12, L: 313.39337158203125, J: 11.389650344848633
Current x: (0.03171555395478963, 0.0, -0.08861349318027492), 

Step: 12, L: 254.354248046875, J: 13.280202865600586
Current x: (0.2036040955972364, 0.0008078923654780902, -0.19753059258460992), u: [0. 0.], noise: [4.60846582 5.84105148]
Start training ...
Step: 12, L: 376.47039794921875, J: 13.488494873046875
Current x: (0.21116587165914663, 0.0010845437519965146, -0.20084948530197136), u: [0. 0.], noise: [5.33221039 5.9024471 ]
Start training ...
Step: 12, L: 334.74609375, J: 15.673194885253906
Current x: (0.21893271797547245, 0.0014048269252301213, -0.20429163661003105), u: [0. 0.], noise: [3.67120529 4.2056478 ]
Start training ...
Step: 12, L: 227.76229858398438, J: 17.243927001953125
Current x: (0.22692369773946808, 0.0018449913097364828, -0.20779081158637994), u: [0. 0.], noise: [3.82092405 4.54990962]
Start training ...
Step: 12, L: 320.2008056640625, J: 14.621862411499023
Current x: (0.23507447803833995, 0.0020754610288461046, -0.21134343082904808), u: [0. 0.], noise: [5.61036469 4.61323438]
Start training ...
Step: 12, L: 366.4226379394531

Current x: (0.7577761339694349, 0.0, -0.36067752480506887), u: [0. 0.], noise: [3.80949704 4.09366177]
Start training ...
Step: 12, L: 317.932373046875, J: 17.664241790771484
Current x: (0.7768983235928245, 0.0, -0.36416620700359337), u: [0. 0.], noise: [4.09469813 3.30319296]
Start training ...
Step: 12, L: 355.3710632324219, J: 13.94857120513916
Current x: (0.7962994222190157, 0.0, -0.3676833056688308), u: [0. 0.], noise: [6.08428604 3.39984963]
Start training ...
Step: 12, L: 318.0076599121094, J: 13.582598686218262
Current x: (0.8159640117559123, 0.0, -0.37112125382423394), u: [0. 0.], noise: [5.31763042 3.2370322 ]
Start training ...
Step: 12, L: 311.4209289550781, J: 16.493417739868164
Current x: (0.8359695128594485, 0.0, -0.37429075832366937), u: [0. 0.], noise: [4.42970571 5.79094828]
Start training ...
Step: 12, L: 319.93743896484375, J: 14.883628845214844
Current x: (0.8562852578568249, 0.0, -0.3772522030115127), u: [0. 0.], noise: [6.69439063 4.39758448]
Start training ...
S

Step: 13, L: 258.02728271484375, J: 14.317706108093262
Current x: (-0.001216099375293502, 0.00011547914620676974, 0.008957463169097901), u: [0. 0.], noise: [4.61905479 4.80281336]
Start training ...
Step: 13, L: 394.4981384277344, J: 15.842964172363281
Current x: (-0.0013573947679639217, 0.00029780519948396965, 0.00905969181060791), u: [0. 0.], noise: [5.08151997 4.738226  ]
Start training ...
Step: 13, L: 234.16368103027344, J: 10.354372024536133
Current x: (-0.0015071296516244617, 0.000441280286721559, 0.009143544578552246), u: [0. 0.], noise: [5.92683681 5.08317977]
Start training ...
Step: 13, L: 313.4079895019531, J: 16.819381713867188
Current x: (-0.0016657608008435252, 0.0005856896767170022, 0.009261726760864258), u: [0. 0.], noise: [3.35383605 7.00891757]
Start training ...
Step: 13, L: 252.85435485839844, J: 14.274100303649902
Current x: (-0.0018344588682742981, 0.0008500547822360288, 0.009464274644851686), u: [0. 0.], noise: [4.84592848 3.07373219]
Start training ...
Step: 13

Step: 13, L: 350.6910400390625, J: 14.497893333435059
Current x: (-0.017432472635772012, 0.0003958767088722308, 0.012820681357383724), u: [0. 0.], noise: [3.82080062 5.37730693]
Start training ...
Step: 13, L: 264.2130432128906, J: 19.24535369873047
Current x: (-0.018059371451301823, 0.0004388342303659998, 0.012701172208786006), u: [0. 0.], noise: [3.18490962 5.80495528]
Start training ...
Step: 13, L: 220.77130126953125, J: 15.238272666931152
Current x: (-0.018698062544592404, 0.0004205269302486385, 0.012426012420654293), u: [0. 0.], noise: [3.89849845 4.07691685]
Start training ...
Step: 13, L: 330.2059326171875, J: 13.933457374572754
Current x: (-0.019348171513623787, 0.0003201336492574178, 0.011888848042488093), u: [0. 0.], noise: [4.62482281 5.17371723]
Start training ...
Step: 13, L: 265.5981750488281, J: 15.415148735046387
Current x: (-0.02000819048849407, 3.6220319525857435e-05, 0.011333841848373409), u: [0. 0.], noise: [3.83959953 5.58180263]
Start training ...
Step: 13, L: 31

Current x: (-0.043506750925879877, 0.030156485579777792, -0.05710668847560883), u: [0. 0.], noise: [5.52426013 3.75487353]
Start training ...
Step: 13, L: 323.65081787109375, J: 16.521562576293945
Current x: (-0.043352359338219404, 0.03153388129532274, -0.059263694262504586), u: [0. 0.], noise: [4.54922679 4.58552551]
Start training ...
Step: 13, L: 347.32550048828125, J: 15.055122375488281
Current x: (-0.04314500648719115, 0.032856677758250574, -0.06124376139640809), u: [0. 0.], noise: [5.02935766 4.29023463]
Start training ...
Step: 13, L: 355.8301086425781, J: 15.257804870605469
Current x: (-0.04288354940323787, 0.0341103457707799, -0.0632274584054947), u: [0. 0.], noise: [5.47861101 5.49130093]
Start training ...
Step: 13, L: 364.91961669921875, J: 16.67700958251953
Current x: (-0.042565051309165415, 0.03531322568635838, -0.06513724312782287), u: [0. 0.], noise: [6.68879186 2.04980098]
Start training ...
Step: 13, L: 237.10028076171875, J: 13.322141647338867
Current x: (-0.04217723

Step: 13, L: 335.4873046875, J: 17.73713493347168
Current x: (0.041206177685008116, 0.09166478330661498, -0.11935018615722656), u: [0. 0.], noise: [5.19213753 5.14162029]
Start training ...
Step: 13, L: 354.1177673339844, J: 15.425603866577148
Current x: (0.04532584706665034, 0.09288168226217838, -0.1205778657913208), u: [0. 0.], noise: [3.99155179 6.24763092]
Start training ...
Step: 13, L: 324.1708679199219, J: 14.40721607208252
Current x: (0.04956855744017529, 0.09414360574690424, -0.12180049366950989), u: [0. 0.], noise: [4.96026902 3.61794818]
Start training ...
Step: 13, L: 396.0903015136719, J: 17.614439010620117
Current x: (0.05393443075042075, 0.09544101320263569, -0.12324872946739197), u: [0. 0.], noise: [4.76785205 3.47431022]
Start training ...
Step: 13, L: 464.9596252441406, J: 15.35461711883545
Current x: (0.058404529024166305, 0.09660888722427327, -0.12456273319721223), u: [0. 0.], noise: [4.38356211 4.51998235]
Start training ...
Step: 13, L: 291.89453125, J: 14.9341096

Step: 13, L: 207.47415161132812, J: 14.915960311889648
Current x: (0.33653590844921777, 0.13213686369569047, -0.1639948962450028), u: [0. 0.], noise: [2.95115814 5.92212029]
Start training ...
Step: 13, L: 296.676025390625, J: 13.145591735839844
Current x: (0.3465292999024343, 0.13243384729003618, -0.16525852432250981), u: [0. 0.], noise: [4.5699225  4.01194279]
Start training ...
Step: 13, L: 294.7945556640625, J: 14.024630546569824
Current x: (0.3566675571925749, 0.13262525332774822, -0.1668192486047745), u: [0. 0.], noise: [5.7996707  4.86996498]
Start training ...
Step: 13, L: 393.7855529785156, J: 18.946388244628906
Current x: (0.3669469924649944, 0.13268215384236148, -0.1683241749286652), u: [0. 0.], noise: [5.42450361 5.72567309]
Start training ...
Step: 13, L: 195.71859741210938, J: 15.049833297729492
Current x: (0.3774035934100441, 0.13281020624699036, -0.16973613069057467), u: [0. 0.], noise: [3.4979106 6.2776878]
Start training ...
Step: 13, L: 349.714599609375, J: 15.786905

Step: 13, L: 285.3993835449219, J: 13.739896774291992
Current x: (0.9362310533731397, 0.15755760171451277, -0.23485537810325624), u: [0. 0.], noise: [5.85533107 5.51446112]
Start training ...
Step: 13, L: 245.7799835205078, J: 15.070362091064453
Current x: (0.9548678006219535, 0.15854848089306622, -0.23648323330879212), u: [0. 0.], noise: [4.69768064 4.11528527]
Start training ...
Step: 13, L: 412.75555419921875, J: 17.066240310668945
Current x: (0.9737691255864693, 0.1596641269232096, -0.23807700152397157), u: [0. 0.], noise: [5.15603519 4.50488911]
Start training ...
Step: 13, L: 288.32470703125, J: 13.357291221618652
Current x: (0.9928769252821289, 0.1606555411691459, -0.23961253023147586), u: [0. 0.], noise: [3.65448893 6.72304562]
Start training ...
Step: 13, L: 394.551513671875, J: 15.940217971801758
Simulation ends in 265 steps
Episode 14 begins...
Current x: (0.0, 0.0, 0.0), u: [0.         0.03455542], noise: [4.7851257  2.78072317]
Start training ...
Step: 14, L: 302.977600097

Step: 14, L: 409.43994140625, J: 15.915975570678711
Current x: (-0.0028807807691161205, 0.0, 0.007138183093070988), u: [0.         0.21704383], noise: [4.97515223 6.42918132]
Start training ...
Step: 14, L: 424.33758544921875, J: 12.649810791015625
Current x: (-0.0030723746439919354, 0.0, 0.007178164696693424), u: [0.         0.22006294], noise: [6.26921636 5.80244866]
Start training ...
Step: 14, L: 224.42694091796875, J: 15.050008773803711
Current x: (-0.003272264000095543, 0.0, 0.007051039004325871), u: [0.         0.22350076], noise: [3.98932568 3.54472274]
Start training ...
Step: 14, L: 299.865234375, J: 15.265336990356445
Current x: (-0.0034809764852417797, 0.0, 0.006948583817481998), u: [0.         0.23001209], noise: [4.81539528 4.93721793]
Start training ...
Step: 14, L: 303.7744140625, J: 13.97480583190918
Current x: (-0.003695158803322947, 0.0, 0.006868238854408268), u: [0.         0.23476568], noise: [6.50820469 4.45845214]
Start training ...
Step: 14, L: 259.598876953125,

Step: 14, L: 408.5848693847656, J: 17.08357048034668
Current x: (-0.017035285557546947, 0.03544257547860411, -0.01414699530601501), u: [0.         0.47019595], noise: [5.60540599 6.25415982]
Start training ...
Step: 14, L: 374.1494140625, J: 19.7377986907959
Current x: (-0.017300165725535797, 0.03845518202531294, -0.015337285470962518), u: [0.         0.47438708], noise: [4.75354942 5.8413084 ]
Start training ...
Step: 14, L: 419.332275390625, J: 13.584259986877441
Current x: (-0.017547603567535017, 0.04171964134222475, -0.0166394706249237), u: [0.         0.47931802], noise: [5.05480267 6.25843922]
Start training ...
Step: 14, L: 257.0229797363281, J: 14.04039478302002
Current x: (-0.017778064859017048, 0.04510989490796025, -0.018097870349884027), u: [0.         0.48285258], noise: [3.67438108 5.50695564]
Start training ...
Step: 14, L: 276.0945129394531, J: 15.850601196289062
Current x: (-0.01798890485942727, 0.04869824128372023, -0.019724565505981437), u: [0.         0.48832628], no

Step: 14, L: 251.55606079101562, J: 12.308631896972656
Current x: (0.00015894088375634067, 0.18613819599926953, -0.0938502308130264), u: [0.         0.36493132], noise: [4.02887264 5.58299885]
Start training ...
Step: 14, L: 295.81109619140625, J: 15.389894485473633
Current x: (0.001866454120421152, 0.1906461171585309, -0.09696731445789335), u: [0.         0.35220888], noise: [5.87457642 4.27536869]
Start training ...
Step: 14, L: 336.58258056640625, J: 18.30249786376953
Current x: (0.0036674624919519386, 0.19516632810734552, -0.10027630388736722), u: [0.         0.33559898], noise: [2.06158885 6.84546129]
Start training ...
Step: 14, L: 293.0283203125, J: 13.049325942993164
Current x: (0.005570147919721956, 0.19975082094933813, -0.10346059343814847), u: [0.        0.3245445], noise: [4.27131123 5.24286638]
Start training ...
Step: 14, L: 321.35882568359375, J: 13.655261039733887
Current x: (0.007565359969763423, 0.20427393569408742, -0.10715683014392849), u: [0.         0.30765635], n

Step: 14, L: 236.82058715820312, J: 15.705890655517578
Current x: (0.25320353622757164, 0.42627402756403887, -0.26385177876949306), u: [0. 0.], noise: [5.657531   4.35068367]
Start training ...
Step: 14, L: 241.56405639648438, J: 12.471630096435547
Current x: (0.2634439663311455, 0.430824424606056, -0.2674184931516647), u: [0. 0.], noise: [5.28384437 6.15234884]
Start training ...
Step: 14, L: 356.46246337890625, J: 18.158714294433594
Current x: (0.27394541163745734, 0.4353600073077194, -0.2708545228242874), u: [0. 0.], noise: [4.61814398 6.31895041]
Start training ...
Step: 14, L: 269.73468017578125, J: 16.757110595703125
Current x: (0.28474904985846183, 0.4400175608395819, -0.27437740294933316), u: [0. 0.], noise: [5.94722545 5.06118283]
Start training ...
Step: 14, L: 242.73748779296875, J: 11.355086326599121
Current x: (0.2958453153852689, 0.4447479499627601, -0.2780703636884689), u: [0. 0.], noise: [3.7693876  5.32572872]
Start training ...
Step: 14, L: 272.5274963378906, J: 12.73

Step: 15, L: 324.9556884765625, J: 14.099964141845703
Current x: (0.0, 0.00041534846496582014, -0.00037436423301696783), u: [0.        1.6207036], noise: [4.13498226 5.10965097]
Start training ...
Step: 15, L: 284.5130310058594, J: 15.578866004943848
Current x: (-6.608056641996602e-08, 0.000554183446344458, -0.0009139760494232178), u: [0.        1.6984625], noise: [5.06691093 3.76880827]
Start training ...
Step: 15, L: 315.9862060546875, J: 17.615886688232422
Current x: (2.745982268263736e-07, 0.0007985520887676342, -0.0017131251335144046), u: [0.        1.7518088], noise: [5.62337077 3.53313583]
Start training ...
Step: 15, L: 307.77471923828125, J: 11.559024810791016
Current x: (1.5780758538233293e-06, 0.0011153384506881058, -0.0025523102283477785), u: [0.        1.8236642], noise: [5.0559699  5.85581644]
Start training ...
Step: 15, L: 232.47738647460938, J: 13.786991119384766
Current x: (4.750283538662905e-06, 0.0015419547777809045, -0.0033576527118682866), u: [0.        2.0076356]

Step: 15, L: 334.41204833984375, J: 13.85773754119873
Current x: (0.08093292087255788, 0.4179909214964894, -0.42824216017723093), u: [ 0.       13.208262], noise: [6.07647477 5.61920084]
Start training ...
Step: 15, L: 301.2248229980469, J: 13.80722427368164
Current x: (0.09055554138489678, 0.4459714658660996, -0.4579050050497056), u: [ 0.       13.335634], noise: [5.11074701 5.77340422]
Start training ...
Step: 15, L: 472.7369384765625, J: 15.316060066223145
Current x: (0.10121235357772682, 0.47523651437237474, -0.48884294872283945), u: [ 0.      13.42019], noise: [5.54899652 5.8289349 ]
Start training ...
Step: 15, L: 379.25921630859375, J: 15.767669677734375
Current x: (0.11293984961528802, 0.5056930306674734, -0.5211807216882707), u: [ 0.       13.483594], noise: [4.68024173 6.02930006]
Start training ...
Step: 15, L: 292.42724609375, J: 12.367700576782227
Current x: (0.1258318769697302, 0.5373579155050205, -0.5548885075092317), u: [ 0.       13.507007], noise: [4.33664171 5.016550

Step: 16, L: 270.718994140625, J: 12.957540512084961
Current x: (0.003919428976474531, 0.09479842411879133, -0.09913796830177307), u: [ 0.       12.122422], noise: [4.36819254 6.50159616]
Start training ...
Step: 16, L: 398.09112548828125, J: 16.28310203552246
Current x: (0.004926732986693358, 0.10769356707996594, -0.11268678665161133), u: [ 0.       12.741471], noise: [4.51781022 6.23399772]
Start training ...
Step: 16, L: 199.341552734375, J: 10.951887130737305
Current x: (0.00616160390413765, 0.12189564157160225, -0.12766118750572206), u: [ 0.       13.319625], noise: [5.01016933 5.15754591]
Start training ...
Step: 16, L: 348.4855041503906, J: 11.26295280456543
Current x: (0.0076606531168521876, 0.13745114363180805, -0.1440813542366028), u: [ 0.       13.837698], noise: [4.11996401 4.31035281]
Start training ...
Step: 16, L: 381.11273193359375, J: 12.792304039001465
Current x: (0.009458730736108537, 0.15435526665911356, -0.1618482210636139), u: [ 0.       14.261032], noise: [3.8031

Step: 17, L: 251.25009155273438, J: 9.857816696166992
Current x: (2.3823014504522572e-07, 0.00234674725092457, -0.001491100549697876), u: [0.        3.9196072], noise: [4.23803871 5.0507045 ]
Start training ...
Step: 17, L: 264.272705078125, J: 12.724931716918945
Current x: (1.1381981246320098e-06, 0.0038732005169004368, -0.0027555953025817873), u: [0.        4.2786794], noise: [5.28641095 5.57191908]
Start training ...
Step: 17, L: 178.154052734375, J: 11.144879341125488
Current x: (4.007663338232832e-06, 0.005739487428042218, -0.004493317413330079), u: [0.       4.812876], noise: [3.89913045 6.36334252]
Start training ...
Step: 17, L: 390.55731201171875, J: 17.15651512145996
Current x: (1.104827027082712e-05, 0.008138469458890317, -0.006687458276748657), u: [0.        5.3502665], noise: [4.75051464 5.86901247]
Start training ...
Step: 17, L: 319.8862609863281, J: 12.842853546142578
Current x: (2.4862687589922818e-05, 0.011063971252046655, -0.009609308004379273), u: [0.       5.951511

Step: 17, L: 256.33233642578125, J: 10.884925842285156
Current x: (0.23381848529607047, 0.9187284578718679, -0.9421577893733979), u: [ 0.22987117 17.718943  ], noise: [5.01656726 4.76402362]
Start training ...
Step: 17, L: 349.4454345703125, J: 12.570087432861328
Current x: (0.2608650988281629, 0.9685024137112871, -0.9996834522724152), u: [ 0.2576504 17.526184 ], noise: [5.41812594 4.19708731]
Start training ...
Step: 17, L: 332.109375, J: 12.165493965148926
Current x: (0.29015454661290496, 1.0189259808552054, -1.0589327678203584), u: [ 0.28783402 17.30279   ], noise: [4.20433849 4.88032719]
Start training ...
Step: 17, L: 187.7232666015625, J: 9.684488296508789
Current x: (0.3217490760500541, 1.0698496546149547, -1.1197868330478669), u: [ 0.3197126 17.032547 ], noise: [5.34088543 6.12233889]
Start training ...
Step: 17, L: 147.58279418945312, J: 10.910322189331055
Current x: (0.3556692458793246, 1.1210988915308726, -1.1824099927425384), u: [ 0.35882837 16.708832  ], noise: [3.84583529

Step: 18, L: 269.21563720703125, J: 12.855487823486328
Current x: (0.024360057869546405, 0.2935171848597944, -0.2749691822528839), u: [ 0.02035208 17.823538  ], noise: [4.80180096 4.97705516]
Start training ...
Step: 18, L: 287.1529846191406, J: 15.069721221923828
Current x: (0.029192539604266626, 0.32265947785574867, -0.3035697334766388), u: [ 0.02362467 18.042845  ], noise: [4.65308938 4.35808976]
Start training ...
Step: 18, L: 336.314453125, J: 14.01401138305664
Current x: (0.034775026603647445, 0.3534792765974645, -0.3339681286334991), u: [ 0.02794435 18.22243   ], noise: [4.0684772  6.64636339]
Start training ...
Step: 18, L: 241.69374084472656, J: 14.952987670898438
Current x: (0.04116694198432506, 0.3859020288983329, -0.36613894581794737), u: [ 0.0312283 18.38478  ], noise: [4.36214329 5.00177369]
Start training ...
Step: 18, L: 240.5662078857422, J: 16.48851776123047
Current x: (0.04850832116094887, 0.4200802672115546, -0.40038700013160705), u: [ 0.03465816 18.510757  ], noise

Step: 19, L: 302.31524658203125, J: 15.862743377685547
Current x: (0.0019018654088460397, 0.07490455328479034, -0.07889569015502931), u: [ 0.      12.20071], noise: [5.33469602 4.33302317]
Start training ...
Step: 19, L: 247.79290771484375, J: 8.49494457244873
Current x: (0.002536406498026923, 0.08608685600545532, -0.09175733079910281), u: [ 0.      12.89948], noise: [5.66672941 2.77418904]
Start training ...
Step: 19, L: 298.18231201171875, J: 13.244184494018555
Current x: (0.003343301135280983, 0.09846819913955426, -0.10573887519836428), u: [4.89751110e-03 1.35086155e+01], noise: [5.43434258 5.46004817]
Start training ...
Step: 19, L: 344.885498046875, J: 10.667441368103027
Current x: (0.004345734918575969, 0.1119936047778792, -0.12072111349105838), u: [1.323819e-02 1.420107e+01], noise: [3.18036315 5.13979079]
Start training ...
Step: 19, L: 420.62640380859375, J: 18.41054344177246
Current x: (0.005605774467099427, 0.12696516860758317, -0.13705629425048832), u: [ 0.02378004 14.72988

Step: 20, L: 264.5694885253906, J: 14.97248363494873
Current x: (3.2220965572324854e-05, 0.012947731429133677, -0.009286350965499878), u: [0.       6.000208], noise: [5.02727873 4.58518764]
Start training ...
Step: 20, L: 248.8806915283203, J: 12.972930908203125
Current x: (5.833784380730892e-05, 0.016697001731483193, -0.012328837871551514), u: [0.      6.55131], noise: [5.90128973 5.49398278]
Start training ...
Step: 20, L: 301.6332702636719, J: 11.47031307220459
Current x: (9.895299135028056e-05, 0.021026472186653815, -0.01592713646888733), u: [2.8524688e-03 7.2963004e+00], noise: [4.45085452 3.09320039]
Start training ...
Step: 20, L: 383.1970520019531, J: 16.249080657958984
Current x: (0.00016169362933178455, 0.026169464528723513, -0.020139835357666017), u: [6.9321413e-03 7.7445941e+00], noise: [5.47816458 4.57940558]
Start training ...
Step: 20, L: 259.62689208984375, J: 12.900150299072266
Current x: (0.00024807424825393877, 0.031815589439510975, -0.024946113586425782), u: [0.0098

Step: 20, L: 297.2884521484375, J: 12.805651664733887
Current x: (0.4484006099769591, 1.2934024949006366, -1.3207814398765565), u: [ 0.34229937 16.567167  ], noise: [5.66179669 5.87628222]
Start training ...
Step: 20, L: 482.5703125, J: 14.157853126525879
Current x: (0.49159935238777197, 1.3472787873222163, -1.388537146282196), u: [ 0.40566176 16.073332  ], noise: [7.50895608 4.6694011 ]
Start training ...
Step: 20, L: 316.8216552734375, J: 10.981660842895508
Current x: (0.5375544022051758, 1.4008779242808862, -1.4579367880821228), u: [ 0.4790629 15.523158 ], noise: [7.01596151 6.06077691]
Start training ...
Step: 20, L: 238.10220336914062, J: 11.452213287353516
Current x: (0.5863277211110572, 1.4540154808625476, -1.5286192414283752), u: [ 0.563816 14.877153], noise: [4.87934332 4.36367652]
Start training ...
Step: 20, L: 344.4439697265625, J: 17.170513153076172
Simulation ends in 56 steps
Episode 21 begins...
Current x: (0.0, 0.0, 0.0), u: [0.01273899 2.0353856 ], noise: [6.48929228 5

Step: 21, L: 270.48590087890625, J: 16.534515380859375
Current x: (0.09805120554110229, 0.5317959191701099, -0.5691592988491058), u: [ 0.       18.663246], noise: [6.05384257 4.216625  ]
Start training ...
Step: 21, L: 281.40972900390625, J: 9.816481590270996
Current x: (0.11235507062312204, 0.5717695923866772, -0.6122899701833724), u: [ 0.      18.70028], noise: [5.28798474 3.75119363]
Start training ...
Step: 21, L: 416.6983642578125, J: 14.428943634033203
Current x: (0.1282182431942497, 0.6131995095718256, -0.6571032444000243), u: [ 0.       18.698776], noise: [3.08383575 4.98907989]
Start training ...
Step: 21, L: 296.39007568359375, J: 16.386749267578125
Current x: (0.14567572149568675, 0.6559184409829869, -0.7036328674554824), u: [ 0.       18.646332], noise: [5.2396538  4.65677793]
Start training ...
Step: 21, L: 449.4101867675781, J: 15.957157135009766
Current x: (0.16476848401533056, 0.6997760611918006, -0.7522228926181792), u: [ 0.       18.593441], noise: [3.97931275 3.99070

Step: 22, L: 385.71661376953125, J: 14.723441123962402
Current x: (0.008579229812007688, 0.1555160138838567, -0.15625916557312014), u: [ 0.       15.611593], noise: [5.43745717 7.26860158]
Start training ...
Step: 22, L: 364.0740966796875, J: 14.71629524230957
Current x: (0.010617133292275928, 0.17410549193017905, -0.17539046213626863), u: [ 0.       16.231962], noise: [4.22406369 5.34313489]
Start training ...
Step: 22, L: 271.2147216796875, J: 17.337722778320312
Current x: (0.013095727557996113, 0.19451123412517107, -0.1962660324573517), u: [ 0.       16.687347], noise: [5.34365462 5.36554921]
Start training ...
Step: 22, L: 238.32977294921875, J: 12.430936813354492
Current x: (0.01602449816634969, 0.2164763126785296, -0.21887670619487762), u: [ 0.       17.134645], noise: [5.83643253 4.07599434]
Start training ...
Step: 22, L: 211.74826049804688, J: 8.711860656738281
Current x: (0.019487524582922102, 0.24014744926452866, -0.2431583041191101), u: [ 0.       17.519695], noise: [3.2622

Current x: (2.132676667574591e-05, 0.009180097964426707, -0.008260244226455687), u: [0.07993475 4.6464386 ], noise: [5.24786684 4.22944055]
Start training ...
Step: 23, L: 494.81781005859375, J: 11.293633460998535
Current x: (4.0131081310482726e-05, 0.011589745495834159, -0.011137406516075133), u: [0.07750262 5.099198  ], noise: [4.93279105 3.94705847]
Start training ...
Step: 23, L: 377.2266540527344, J: 16.438501358032227
Current x: (7.066784991436838e-05, 0.014438712669711154, -0.014369376564025878), u: [0.07564084 5.532082  ], noise: [4.3725583 5.5708443]
Start training ...
Step: 23, L: 549.3433837890625, J: 16.400856018066406
Current x: (0.00011685964499032893, 0.017712247571716636, -0.018004942870140074), u: [0.07203788 6.105213  ], noise: [4.23306975 4.65310111]
Start training ...
Step: 23, L: 244.3631134033203, J: 11.979671478271484
Current x: (0.0001853966689461117, 0.021559734480098385, -0.02230598187446594), u: [0.06783123 6.6183586 ], noise: [3.61581929 3.99687824]
Start tr

Step: 23, L: 538.9520874023438, J: 12.61457633972168
Current x: (0.42898777049112424, 1.1492645908536923, -1.2808798876285552), u: [ 0.0664605 17.13467  ], noise: [6.240506   2.75565262]
Start training ...
Step: 23, L: 295.9217834472656, J: 13.100444793701172
Current x: (0.4704961715034801, 1.199799338034981, -1.3480963161230086), u: [ 0.12603198 16.674795  ], noise: [2.33114253 4.47228802]
Start training ...
Step: 23, L: 369.23126220703125, J: 13.349374771118164
Current x: (0.5145149744139922, 1.250101992796356, -1.416671080303192), u: [ 0.1915909 16.129696 ], noise: [4.34396626 6.55537286]
Start training ...
Step: 23, L: 337.1534118652344, J: 16.639232635498047
Current x: (0.5608359115911521, 1.2999449800610952, -1.4871148353815078), u: [ 0.27015847 15.481238  ], noise: [5.73891376 5.397178  ]
Start training ...
Step: 23, L: 365.76031494140625, J: 18.599924087524414
Current x: (0.6098466444689076, 1.3492248468556371, -1.5593735415935517), u: [ 0.35806525 14.775211  ], noise: [5.03826

Step: 24, L: 496.3218078613281, J: 15.179351806640625
Current x: (0.12157976566346107, 0.5856643160456418, -0.6402153728008269), u: [ 0.       19.450527], noise: [5.46919961 5.5660176 ]
Start training ...
Step: 24, L: 343.766845703125, J: 15.305791854858398
Current x: (0.13862074789165105, 0.6280480001183545, -0.6869404263973234), u: [ 0.       19.455011], noise: [5.99922736 4.53378479]
Start training ...
Step: 24, L: 221.4512939453125, J: 14.557044982910156
Current x: (0.1574828514804386, 0.6718955406598048, -0.7356202144145964), u: [ 0.       19.435928], noise: [5.61043985 4.30138976]
Start training ...
Step: 24, L: 389.1764831542969, J: 13.971933364868164
Current x: (0.17824671913441018, 0.7170807250137972, -0.786098959302902), u: [ 0.       19.382887], noise: [5.48102496 3.82682325]
Start training ...
Step: 24, L: 440.07415771484375, J: 15.951132774353027
Current x: (0.20097995959472031, 0.7634607952467415, -0.838390392065048), u: [ 0.       19.298155], noise: [4.80490984 3.3093168

Step: 25, L: 603.4784545898438, J: 17.486038208007812
Current x: (0.016232562191298757, 0.21625431801194656, -0.22105737669467926), u: [ 0.       17.870798], noise: [4.21549578 4.54990481]
Start training ...
Step: 25, L: 413.3231201171875, J: 14.774433135986328
Current x: (0.01976560226617382, 0.24081437682215015, -0.24614020051956176), u: [ 0.       18.212305], noise: [3.11246448 4.88372449]
Start training ...
Step: 25, L: 297.9942932128906, J: 10.02850341796875
Current x: (0.02388267132741643, 0.266992239319703, -0.2730435449361801), u: [ 0.       18.484486], noise: [2.80816981 2.86535218]
Start training ...
Step: 25, L: 475.5812683105469, J: 13.733997344970703
Current x: (0.028638342633035146, 0.2947309591792383, -0.30194524579048154), u: [ 0.       18.679535], noise: [3.86182859 3.11995836]
Start training ...
Step: 25, L: 383.7294921875, J: 16.124034881591797
Current x: (0.03404546719716458, 0.3238149856469587, -0.33270111343860626), u: [ 0.       18.878656], noise: [5.23774955 4.3

Step: 26, L: 303.2739562988281, J: 10.974209785461426
Current x: (0.0004901131408880903, 0.03588941017383249, -0.03386995885372163), u: [0.04343162 8.56532   ], noise: [4.92793592 4.87191192]
Start training ...
Step: 26, L: 369.436279296875, J: 13.53587532043457
Current x: (0.0006831558038544255, 0.04242700828566891, -0.040714644432067876), u: [0.02122074 9.304533  ], noise: [5.19022817 4.45056371]
Start training ...
Step: 26, L: 714.1268310546875, J: 15.84007453918457
Current x: (0.0009385363984411528, 0.04982341059033397, -0.04840591642856598), u: [ 0.       10.056405], noise: [4.85266816 5.62713187]
Start training ...
Step: 26, L: 362.1556396484375, J: 14.90859603881836
Current x: (0.0012711172739847847, 0.0581338955973666, -0.056951553201675414), u: [ 0.       10.885222], noise: [3.36742688 3.40787049]
Start training ...
Step: 26, L: 276.8634033203125, J: 12.351974487304688
Current x: (0.0017030667170048488, 0.06751459565057208, -0.06658027675151824), u: [ 0.       11.447998], nois

Step: 27, L: 397.5921936035156, J: 16.975126266479492
Current x: (4.3546139102489237e-07, 0.0007365988095772672, -0.0015834532737731934), u: [0.24747515 2.8212066 ], noise: [4.22377494 3.43096738]
Start training ...
Step: 27, L: 300.89697265625, J: 13.450909614562988
Current x: (2.0097200632057143e-06, 0.001446861814760049, -0.002549593305587769), u: [0.24621573 2.9219503 ], noise: [4.9236371  4.49642371]
Start training ...
Step: 27, L: 248.3057861328125, J: 12.669977188110352
Current x: (5.281982103019696e-06, 0.0022484658713705246, -0.003693825721740723), u: [0.239033  3.2258995], noise: [5.51724956 5.79813373]
Start training ...
Step: 27, L: 217.29034423828125, J: 14.609737396240234
Current x: (1.1763726420715303e-05, 0.0033278884683774564, -0.005062910270690918), u: [0.22462596 3.7651381 ], noise: [4.67420594 4.57237985]
Start training ...
Step: 27, L: 625.2169799804688, J: 12.192085266113281
Current x: (2.370504924268199e-05, 0.00490433252192416, -0.006758769941329956), u: [0.2154

Step: 27, L: 345.8135986328125, J: 18.411603927612305
Current x: (0.26039054025202085, 0.9688589231230692, -0.9731132135868075), u: [ 0.       19.925947], noise: [5.31123441 4.65919256]
Start training ...
Step: 27, L: 292.3536376953125, J: 14.861825942993164
Current x: (0.290315466934623, 1.0221886910032698, -1.0336153449058536), u: [ 0.      19.74024], noise: [5.38727316 6.26254654]
Start training ...
Step: 27, L: 418.8502197265625, J: 13.757476806640625
Current x: (0.3227117522738263, 1.0762198137876775, -1.096044866895676), u: [ 0.       19.508362], noise: [5.3933928  4.38614831]
Start training ...
Step: 27, L: 488.7459716796875, J: 16.272125244140625
Current x: (0.35780492918824963, 1.1308762163628552, -1.1605359401702884), u: [ 0.       19.239779], noise: [4.58215502 4.86873773]
Start training ...
Step: 27, L: 518.021240234375, J: 17.311155319213867
Current x: (0.3955029908108787, 1.1858904199613147, -1.2268771252155306), u: [ 0.      18.91204], noise: [3.88198942 4.82691875]
Star

Step: 28, L: 618.7728271484375, J: 17.599048614501953
Current x: (0.054250820130319635, 0.4261375132766602, -0.42641408362388605), u: [ 0.       19.745966], noise: [7.23781208 3.06950937]
Start training ...
Step: 28, L: 812.8447875976562, J: 14.941940307617188
Current x: (0.06356438067569405, 0.46251846635913724, -0.46360459709167473), u: [ 0.       19.911505], noise: [4.05471261 5.50396136]
Start training ...
Step: 28, L: 327.4542541503906, J: 15.722550392150879
Current x: (0.0741209712005024, 0.5006546353266412, -0.5023528769016266), u: [ 0.       20.005507], noise: [3.87166139 3.79317649]
Start training ...
Step: 28, L: 367.86431884765625, J: 13.34180736541748
Current x: (0.0859953948026632, 0.5404457538858154, -0.5432372321128844), u: [ 0.       20.059628], noise: [4.20040323 5.19881437]
Start training ...
Step: 28, L: 289.884033203125, J: 15.157508850097656
Current x: (0.09920211524165636, 0.5816810456825787, -0.5861142894268035), u: [ 0.       20.103065], noise: [5.74842075 4.408

Step: 29, L: 677.96533203125, J: 13.032169342041016
Current x: (0.010840597140265943, 0.19237823670040052, -0.17621159884929657), u: [ 0.       17.648436], noise: [4.07904154 4.9893697 ]
Start training ...
Step: 29, L: 554.9713745117188, J: 18.19183349609375
Current x: (0.01339587969390289, 0.21521714275339576, -0.19782949423789978), u: [ 0.       18.084173], noise: [4.94724905 5.93905317]
Start training ...
Step: 29, L: 850.7862548828125, J: 15.704880714416504
Current x: (0.01641951151309223, 0.23970536206080942, -0.22130326616764068), u: [ 0.       18.522844], noise: [3.90289341 4.39725143]
Start training ...
Step: 29, L: 216.98208618164062, J: 10.615774154663086
Current x: (0.020012533738835138, 0.2660531232784337, -0.24668463582992553), u: [ 0.      18.84532], noise: [4.41507034 5.07134632]
Start training ...
Step: 29, L: 379.8355712890625, J: 16.627334594726562
Current x: (0.024194324026612285, 0.29403676800421713, -0.2739677257299423), u: [ 0.       19.153706], noise: [5.54464148

Current x: (0.0012348322070909782, 0.055083538485119064, -0.057025830745697025), u: [ 0.021127 10.373833], noise: [5.32342252 4.25177294]
Start training ...
Step: 30, L: 334.4000549316406, J: 13.792607307434082
Current x: (0.001659769604981509, 0.06386361284084878, -0.0659557351589203), u: [ 0.       11.158959], noise: [6.37453323 4.5663553 ]
Start training ...
Step: 30, L: 923.3867797851562, J: 15.085576057434082
Current x: (0.0021985267621684973, 0.07365645655525155, -0.07581374511718751), u: [ 0.       12.055819], noise: [5.88409674 5.5067052 ]
Start training ...
Step: 30, L: 243.70175170898438, J: 13.715080261230469
Current x: (0.002882939432440556, 0.08467347989873253, -0.08660683331489565), u: [ 0.       12.969103], noise: [5.49170361 5.16340602]
Start training ...
Step: 30, L: 352.5320129394531, J: 15.045035362243652
Current x: (0.0037449394679696466, 0.09704743013774915, -0.09856776418685914), u: [ 0.       13.811149], noise: [5.31442789 8.02085018]
Start training ...
Step: 30,

Step: 31, L: 445.2022399902344, J: 19.860763549804688
Current x: (6.272386280063558e-06, 0.006388951405736939, -0.0028578989028930665), u: [0.31879514 4.85468   ], noise: [5.12525356 4.81777335]
Start training ...
Step: 31, L: 577.5928344726562, J: 14.749617576599121
Current x: (1.2648333998724665e-05, 0.008917046160444446, -0.004279218673706055), u: [0.28688392 5.428595  ], noise: [3.5904571  5.84041932]
Start training ...
Step: 31, L: 392.0458068847656, J: 16.12215232849121
Current x: (2.3344419502395077e-05, 0.011975785018073313, -0.006123378944396973), u: [0.25619155 5.990145  ], noise: [5.45830771 4.54996146]
Start training ...
Step: 31, L: 167.5802001953125, J: 8.523798942565918
Current x: (4.052194200959507e-05, 0.01556814557083462, -0.008706706523895264), u: [0.21885286 6.6652193 ], noise: [4.64227478 2.90018392]
Start training ...
Step: 31, L: 389.588623046875, J: 16.844295501708984
Current x: (6.765271367993588e-05, 0.019804936274437353, -0.011772594833374023), u: [0.19372323

Step: 31, L: 448.98321533203125, J: 13.7449951171875
Current x: (0.28691127236484726, 1.0413132269706502, -1.0800601578712463), u: [ 0.       20.336239], noise: [4.74788754 4.40606022]
Start training ...
Step: 31, L: 525.5346069335938, J: 14.162561416625977
Current x: (0.32044312818074894, 1.0959006990931508, -1.1457368909835814), u: [ 0.       20.096561], noise: [5.53324605 6.29239989]
Start training ...
Step: 31, L: 275.111328125, J: 13.723360061645508
Current x: (0.35657597781485983, 1.1508969709830519, -1.2134130652427673), u: [ 0.       19.802443], noise: [5.5638701  6.11761516]
Start training ...
Step: 31, L: 363.5552673339844, J: 13.770426750183105
Current x: (0.3956169859040773, 1.2062286346535538, -1.2831748111724852), u: [ 0.       19.453823], noise: [5.01764193 5.21344938]
Start training ...
Step: 31, L: 588.6434326171875, J: 16.646324157714844
Current x: (0.43760745682669006, 1.2616806817472745, -1.3549721758842468), u: [ 0.       19.045174], noise: [5.56177405 4.30939184]


Step: 32, L: 758.970458984375, J: 16.575576782226562
Current x: (0.08345667509024787, 0.5031398086996964, -0.5417349872112274), u: [ 0.       20.566599], noise: [6.03179415 4.63226716]
Start training ...
Step: 32, L: 611.34375, J: 16.443740844726562
Current x: (0.09667062422136677, 0.5428600207397938, -0.5850649171113967), u: [ 0.      20.66653], noise: [6.24992213 4.85658131]
Start training ...
Step: 32, L: 558.4719848632812, J: 15.096999168395996
Current x: (0.11149489901877359, 0.5842751236908008, -0.6303115542411803), u: [ 0.       20.742182], noise: [4.25251295 4.69306322]
Start training ...
Step: 32, L: 743.2045288085938, J: 13.011812210083008
Current x: (0.12807385070264832, 0.6273580689124194, -0.6774855103254318), u: [ 0.       20.759361], noise: [6.55880232 3.63806499]
Start training ...
Step: 32, L: 472.1804504394531, J: 13.111310005187988
Current x: (0.1464025884568559, 0.6718583217226605, -0.726777739572525), u: [ 0.       20.779564], noise: [5.37120646 3.72579918]
Start t

Step: 33, L: 613.9642333984375, J: 16.801790237426758
Current x: (0.004146649795389711, 0.11553462236212006, -0.10637952733039854), u: [ 0.       16.256248], noise: [4.25005465 2.35079045]
Start training ...
Step: 33, L: 331.50927734375, J: 12.633111000061035
Current x: (0.005326081882588768, 0.13212096959582234, -0.12245997948646543), u: [ 0.       16.839607], noise: [5.33556468 2.49504307]
Start training ...
Step: 33, L: 249.11932373046875, J: 10.700666427612305
Current x: (0.006748208302358086, 0.1499991051689445, -0.13997612996101377), u: [ 0.       17.444891], noise: [5.58246199 5.6290594 ]
Start training ...
Step: 33, L: 613.647705078125, J: 15.4213228225708
Current x: (0.008471691598225396, 0.1693447871300352, -0.15889218897819518), u: [ 0.       18.114655], noise: [4.69447747 4.97280758]
Start training ...
Step: 33, L: 782.9208984375, J: 14.846879005432129
Current x: (0.010594987660506247, 0.19054708240346807, -0.17955739674568175), u: [ 0.       18.654312], noise: [4.35560564 

Step: 34, L: 489.2978515625, J: 17.226253509521484
Current x: (0.00024668634793752675, 0.032559167112583776, -0.022942366933822633), u: [0.11502258 9.069986  ], noise: [5.65161996 4.96763497]
Start training ...
Step: 34, L: 393.2471923828125, J: 16.005517959594727
Current x: (0.00035952431181114847, 0.03903849317732481, -0.028396089458465577), u: [0.04988638 9.98216   ], noise: [4.47808202 5.08372794]
Start training ...
Step: 34, L: 342.0730895996094, J: 15.27116584777832
Current x: (0.0005177939592405352, 0.04651672447088845, -0.03467690987586975), u: [ 0.       10.818192], noise: [5.06520043 5.69720448]
Start training ...
Step: 34, L: 699.102783203125, J: 18.61486053466797
Current x: (0.0007316950207659485, 0.05497255153522014, -0.04201152219772339), u: [ 0.       11.767645], noise: [2.7479782  4.61318718]
Start training ...
Step: 34, L: 852.9580688476562, J: 11.6500244140625
Current x: (0.0010204159266518022, 0.06460414090142408, -0.050491154050827027), u: [ 0.       12.447751], noi

Step: 35, L: 507.3458251953125, J: 18.933372497558594
Current x: (1.9165414944934097e-08, 0.0019355775488394665, -0.0008889609336853027), u: [0.55311406 3.5449128 ], noise: [4.09743967 5.01206651]
Start training ...
Step: 35, L: 512.69873046875, J: 16.328426361083984
Current x: (4.643368594527457e-07, 0.0033065000253138287, -0.0018280071258544922), u: [0.52221245 3.9184744 ], noise: [4.91625354 4.32891606]
Start training ...
Step: 35, L: 294.6066589355469, J: 16.85376739501953
Current x: (2.0836062256293432e-06, 0.005017175268193607, -0.0031576958656311035), u: [0.48580787 4.3474746 ], noise: [3.08005496 3.58391793]
Start training ...
Step: 35, L: 745.0274658203125, J: 18.095245361328125
Current x: (6.2046585773584364e-06, 0.0071154339063494694, -0.004768277072906495), u: [0.47110173 4.5272293 ], noise: [6.23348807 5.49556465]
Start training ...
Step: 35, L: 887.6341552734375, J: 15.16905403137207
Current x: (1.3956188456491803e-05, 0.009382412345060369, -0.006815411281585694), u: [0.4

Step: 35, L: 564.8931274414062, J: 11.456195831298828
Current x: (0.4287756140906315, 1.2329399808130903, -1.3210450735092167), u: [ 0.       20.106972], noise: [4.33610345 5.31683524]
Start training ...
Step: 35, L: 515.0716552734375, J: 15.502335548400879
Current x: (0.47220785905504953, 1.2888523102619838, -1.3946976880550388), u: [ 0.       19.705809], noise: [6.49262759 5.91831188]
Start training ...
Step: 35, L: 563.7984008789062, J: 16.217498779296875
Current x: (0.5185237617788265, 1.3445191944037898, -1.4704590729713443), u: [ 0.       19.231422], noise: [3.61916858 5.28382494]
Start training ...
Step: 35, L: 351.8446960449219, J: 14.154385566711426
Current x: (0.5680016697482932, 1.3997677315180024, -1.5481336071491245), u: [ 0.      18.68605], noise: [3.30577837 4.75023383]
Start training ...
Step: 35, L: 562.8391723632812, J: 15.015385627746582
Simulation ends in 54 steps
Episode 36 begins...
Current x: (0.0, 0.0, 0.0), u: [0.70610374 2.1287158 ], noise: [5.14450368 5.66993

Step: 36, L: 515.4995727539062, J: 16.4248046875
Current x: (0.07843735606558536, 0.5395291157646886, -0.5368409235954285), u: [ 0.       21.876215], noise: [5.30956736 7.05711803]
Start training ...
Step: 36, L: 282.67913818359375, J: 16.55694580078125
Current x: (0.09136264928435252, 0.5828657839393444, -0.5820458809375763), u: [ 0.      21.95684], noise: [3.9221357  4.99941613]
Start training ...
Step: 36, L: 763.840087890625, J: 17.22240447998047
Current x: (0.1060392061678024, 0.62816404249696, -0.6296132146835327), u: [ 0.       21.988863], noise: [4.62673294 6.54683558]
Start training ...
Step: 36, L: 758.7926025390625, J: 16.427223205566406
Current x: (0.12241325359360322, 0.675060695849528, -0.6794839604854583), u: [ 0.       22.011295], noise: [4.59862846 5.05948646]
Start training ...
Step: 36, L: 517.9071044921875, J: 18.244295120239258
Current x: (0.14074001160563085, 0.7236567202352151, -0.7317456028938293), u: [ 0.       22.007565], noise: [4.15553664 5.51493637]
Start t

Step: 37, L: 426.56597900390625, J: 14.083744049072266
Current x: (0.003980772767619801, 0.10841737139643251, -0.1080931274175644), u: [ 0.       16.168917], noise: [3.64963349 4.07501549]
Start training ...
Step: 37, L: 949.83935546875, J: 14.7664213180542
Current x: (0.0051728418899308955, 0.12463360502790032, -0.12437225499153137), u: [ 0.      16.80048], noise: [3.95224662 4.0678714 ]
Start training ...
Step: 37, L: 923.5353393554688, J: 15.912027359008789
Current x: (0.006622681385794244, 0.14224425008886118, -0.14231081249713898), u: [ 0.       17.401934], noise: [4.60979697 2.50353973]
Start training ...
Step: 37, L: 965.8912353515625, J: 18.75007438659668
Current x: (0.008380425028239942, 0.16133678292361114, -0.1619409803390503), u: [ 0.       17.929157], noise: [3.7137084  5.00552162]
Start training ...
Step: 37, L: 659.7625732421875, J: 15.914706230163574
Current x: (0.010485871061508714, 0.18187506003690834, -0.18310071589946747), u: [ 0.     18.4487], noise: [2.87019127 5.

Step: 38, L: 325.48907470703125, J: 20.775283813476562
Current x: (0.00017482035215390295, 0.030185377078177845, -0.020756505870819092), u: [0.       9.036566], noise: [3.02661789 4.40497845]
Start training ...
Step: 38, L: 441.5172119140625, J: 16.701292037963867
Current x: (0.0002674521243035091, 0.03672958531459863, -0.0261022349357605), u: [0.       9.649373], noise: [5.1410999  4.13794311]
Start training ...
Step: 38, L: 808.6566162109375, J: 13.618492126464844
Current x: (0.0003942635932794687, 0.04393925506640644, -0.03248945665359497), u: [ 0.       10.475481], noise: [4.17692403 5.7650407 ]
Start training ...
Step: 38, L: 679.7235717773438, J: 16.345643997192383
Current x: (0.0005704768513035741, 0.052060121753633795, -0.03974130001068115), u: [ 0.       11.373395], noise: [3.72224456 5.54321452]
Start training ...
Step: 38, L: 445.0229187011719, J: 16.133739471435547
Current x: (0.0008130136128875176, 0.06124065555242882, -0.04819950323104858), u: [ 0.       12.228442], noise

Step: 38, L: 501.7257080078125, J: 14.592935562133789
Simulation ends in 53 steps
Episode 39 begins...
Current x: (0.0, 0.0, 0.0), u: [0.86787397 2.0891283 ], noise: [4.09076688 5.77930686]
Start training ...
Step: 39, L: 1044.7750244140625, J: 16.066381454467773
Current x: (0.0, 0.0, 0.0), u: [0.8239108 2.4428732], noise: [5.48810927 6.1078536 ]
Start training ...
Step: 39, L: 529.354736328125, J: 15.947437286376953
Current x: (0.0, 0.0003017075958251954, -0.0002909793853759766), u: [0.74983   3.0400486], noise: [3.78377456 4.59805544]
Start training ...
Step: 39, L: 384.42156982421875, J: 15.596498489379883
Current x: (0.0, 0.001108689910888672, -0.0008058294773101807), u: [0.71567494 3.3152    ], noise: [5.24794559 4.77529539]
Start training ...
Step: 39, L: 406.4754943847656, J: 16.694461822509766
Current x: (3.5417163714697224e-07, 0.00215184308049072, -0.0016311295032501224), u: [0.6536557 3.8139894], noise: [5.02515425 6.00633471]
Start training ...
Step: 39, L: 712.748046875, J

Step: 39, L: 537.3207397460938, J: 13.744461059570312
Current x: (0.3401117753927731, 1.1081281482965573, -1.1838530425071716), u: [ 0.       21.402163], noise: [3.9087766  5.71327297]
Start training ...
Step: 39, L: 1109.360107421875, J: 20.066097259521484
Current x: (0.37864629578862197, 1.165271277689212, -1.2547315332889557), u: [ 0.       21.131115], noise: [5.0920792  4.93593019]
Start training ...
Step: 39, L: 770.84619140625, J: 17.168869018554688
Current x: (0.4200538656482926, 1.2226041350641002, -1.3279306898117065), u: [ 0.       20.817411], noise: [2.80255148 5.7797921 ]
Start training ...
Step: 39, L: 654.9959106445312, J: 16.349449157714844
Current x: (0.4644230041810934, 1.2799245074425485, -1.4032273429393767), u: [ 0.       20.428156], noise: [3.7206504  4.97180295]
Start training ...
Step: 39, L: 601.921630859375, J: 15.14321517944336
Current x: (0.5116458382644921, 1.3369709002776342, -1.4809034613609313), u: [ 0.       19.983282], noise: [5.31059656 3.23278667]
Sta

Step: 40, L: 376.36309814453125, J: 14.854292869567871
Current x: (0.12151223143037648, 0.7762097187633104, -0.6982004415988923), u: [ 0.       22.407597], noise: [4.12928075 5.66536842]
Start training ...
Step: 40, L: 769.9938354492188, J: 17.184226989746094
Current x: (0.14016192651497972, 0.8299013896677128, -0.7529171508789063), u: [ 0.       22.382086], noise: [5.23347891 4.04271974]
Start training ...
Step: 40, L: 374.3986511230469, J: 17.78406524658203
Current x: (0.16088171174432164, 0.885078753562784, -0.810028228712082), u: [ 0.       22.345661], noise: [4.89037222 3.91483032]
Start training ...
Step: 40, L: 558.9026489257812, J: 19.641035079956055
Current x: (0.18376619636725663, 0.9415852138862307, -0.8692584391593934), u: [ 0.      22.28617], noise: [4.52863941 4.91268817]
Start training ...
Step: 40, L: 321.6217346191406, J: 10.100577354431152
Current x: (0.20890695872129286, 0.9992584576857206, -0.9306256617069245), u: [ 0.       22.199352], noise: [4.574797   4.80442083

Step: 41, L: 678.4644775390625, J: 17.552040100097656
Current x: (0.044609109108301, 0.41589423418903926, -0.39925586075782776), u: [ 0.       21.336409], noise: [5.9297824  6.97887415]
Start training ...
Step: 41, L: 563.124267578125, J: 17.10321044921875
Current x: (0.05297441231527584, 0.4533791180017223, -0.43734739265441896), u: [ 0.       21.536272], noise: [4.11853462 4.07057331]
Start training ...
Step: 41, L: 536.2281494140625, J: 17.113460540771484
Current x: (0.06267093360169895, 0.4930381725481539, -0.4776774745464325), u: [ 0.       21.648607], noise: [4.64943503 5.1053208 ]
Start training ...
Step: 41, L: 436.4559326171875, J: 15.272937774658203
Current x: (0.07362643774002862, 0.5344089848457889, -0.5201563876152039), u: [ 0.       21.753304], noise: [7.09395678 5.74605869]
Start training ...
Step: 41, L: 720.8284912109375, J: 14.971902847290039
Current x: (0.08602561054000546, 0.5775876199087867, -0.5648457499027253), u: [ 0.       21.872995], noise: [5.50694313 3.67061

Step: 42, L: 730.8651123046875, J: 19.720382690429688
Current x: (0.004393011477159878, 0.16292005507843724, -0.1227187607765198), u: [ 0.      18.86221], noise: [5.73956536 4.97327778]
Start training ...
Step: 42, L: 868.5714111328125, J: 14.960860252380371
Current x: (0.005781211873595042, 0.18425231257534658, -0.1416199806213379), u: [ 0.      19.50238], noise: [5.05619544 3.25883941]
Start training ...
Step: 42, L: 602.6136474609375, J: 11.14276123046875
Current x: (0.007531443355628584, 0.20753883337487083, -0.1623307927131653), u: [ 0.       19.996809], noise: [5.68630372 4.2565175 ]
Start training ...
Step: 42, L: 854.81640625, J: 14.562263488769531
Current x: (0.00967430945284551, 0.2325982464930459, -0.18481210727691652), u: [ 0.      20.47518], noise: [4.3237482  4.45983352]
Start training ...
Step: 42, L: 461.17529296875, J: 12.467438697814941
Current x: (0.012301056247421723, 0.25963126182142315, -0.20915012407302858), u: [ 0.       20.856337], noise: [4.59871859 5.8448171 

Step: 43, L: 819.0933837890625, J: 16.998807907104492
Current x: (0.0003087398747133664, 0.045059802228328144, -0.029542568206787113), u: [ 0.       11.734383], noise: [3.92350316 2.53189476]
Start training ...
Step: 43, L: 569.7900390625, J: 13.736383438110352
Current x: (0.00046020101835266805, 0.0541919135773094, -0.037272451925277714), u: [ 0.      12.42317], noise: [4.22518437 4.86762524]
Start training ...
Step: 43, L: 806.1299438476562, J: 17.358898162841797
Current x: (0.0006653916308218426, 0.06416120933421067, -0.046036613082885744), u: [ 0.       13.328019], noise: [4.93102296 5.04170991]
Start training ...
Step: 43, L: 531.4981689453125, J: 17.14755630493164
Current x: (0.0009507590083599018, 0.07529960870247959, -0.05610733542442322), u: [ 0.       14.290957], noise: [4.39226869 4.15717626]
Start training ...
Step: 43, L: 411.18853759765625, J: 16.40229034423828
Current x: (0.0013433572729291915, 0.08778461462527276, -0.06752192850112915), u: [ 0.       15.132072], noise: 

Step: 44, L: 526.361328125, J: 21.208660125732422
Current x: (3.0261903585634708e-06, 0.008383791807147432, -0.003193224859237671), u: [0.622245 5.39314 ], noise: [4.79882839 5.07533103]
Start training ...
Step: 44, L: 1341.537353515625, J: 16.925029754638672
Current x: (8.064257614842477e-06, 0.011285454806487193, -0.004546282863616943), u: [0.5041419 6.083752 ], noise: [3.11671168 3.88913314]
Start training ...
Step: 44, L: 690.6015625, J: 21.22661590576172
Current x: (1.8176204789574e-05, 0.014795064058132846, -0.0064040805816650385), u: [0.43270043 6.5026016 ], noise: [4.82949525 4.34064824]
Start training ...
Step: 44, L: 593.9608154296875, J: 16.096736907958984
Current x: (3.4468228740162644e-05, 0.01868303311715655, -0.008897081470489501), u: [0.3143074 7.1961985], noise: [5.44934569 4.7582861 ]
Start training ...
Step: 44, L: 1056.25, J: 16.103666305541992
Current x: (6.107423950072124e-05, 0.0232005137364477, -0.011948187828063964), u: [0.16922417 8.046629  ], noise: [4.748994

Step: 44, L: 348.89190673828125, J: 12.689197540283203
Current x: (0.3861219111665266, 1.2766614889998837, -1.267307430267334), u: [ 0.       21.742254], noise: [4.59409792 4.87900782]
Start training ...
Step: 44, L: 457.9689025878906, J: 16.86358642578125
Current x: (0.42803827681396356, 1.3367214933993952, -1.3409337931632994), u: [ 0.       21.468367], noise: [5.45537431 3.49119795]
Start training ...
Step: 44, L: 590.3638916015625, J: 17.41526222229004
Current x: (0.47293352301362934, 1.3967333733879097, -1.4167628725051877), u: [ 0.       21.158361], noise: [3.98344757 6.3123289 ]
Start training ...
Step: 44, L: 909.1934204101562, J: 16.170597076416016
Current x: (0.5207902648044753, 1.4564572385006858, -1.4945423708915708), u: [ 0.       20.758581], noise: [4.53256046 3.734736  ]
Start training ...
Step: 44, L: 526.2598876953125, J: 17.2271785736084
Simulation ends in 53 steps
Episode 45 begins...
Current x: (0.0, 0.0, 0.0), u: [1.2847476 1.9140503], noise: [4.45931358 5.03228277

Step: 45, L: 729.0728759765625, J: 19.24850845336914
Current x: (0.0797806761127062, 0.553438887490148, -0.5409753776073456), u: [ 0.       22.413126], noise: [5.59851664 5.33319576]
Start training ...
Step: 45, L: 599.823974609375, J: 20.317184448242188
Current x: (0.09305131235115215, 0.5982275293683256, -0.5872239196300507), u: [ 0.      22.49717], noise: [2.97390612 7.85901209]
Start training ...
Step: 45, L: 691.9227294921875, J: 19.76213836669922
Current x: (0.10803911565876463, 0.6448935136808525, -0.635687242269516), u: [ 0.       22.534414], noise: [6.42400001 3.27670111]
Start training ...
Step: 45, L: 791.0331420898438, J: 18.149267196655273
Current x: (0.124873579658054, 0.6933531678830513, -0.6868887925624847), u: [ 0.       22.577305], noise: [5.61718805 4.50670904]
Start training ...
Step: 45, L: 629.5330810546875, J: 20.338464736938477
Current x: (0.14362194124995234, 0.7434256652626015, -0.740029054403305), u: [ 0.       22.592787], noise: [2.75403511 3.77604547]
Start

Step: 46, L: 751.9152221679688, J: 20.562408447265625
Current x: (0.01186997436557368, 0.25193880863658935, -0.20043764972686767), u: [ 0.      20.65615], noise: [3.50437457 3.52896456]
Start training ...
Step: 46, L: 761.0559692382812, J: 15.696664810180664
Current x: (0.014905777252923266, 0.2801882429079435, -0.2260479567527771), u: [ 0.       20.965115], noise: [5.34340885 6.02928794]
Start training ...
Step: 46, L: 248.3331298828125, J: 18.007204055786133
Current x: (0.01849287303157161, 0.3101701906613002, -0.25372633781433107), u: [ 0.       21.317055], noise: [3.94079256 4.92857028]
Start training ...
Step: 46, L: 475.42938232421875, J: 18.93231964111328
Current x: (0.022804748983188395, 0.3423226513197217, -0.28356981821060184), u: [ 0.       21.571802], noise: [3.72233997 3.6698446 ]
Start training ...
Step: 46, L: 719.4546508789062, J: 16.425373077392578
Current x: (0.02787434241405619, 0.3764161081637684, -0.3156437818527222), u: [ 0.       21.770447], noise: [3.34692195 4.

Step: 47, L: 1016.774169921875, J: 16.84964370727539
Current x: (0.002644524811917712, 0.1220384365154315, -0.09381478481292725), u: [ 0.      16.94684], noise: [6.64383238 3.9800995 ]
Start training ...
Step: 47, L: 941.19677734375, J: 20.780155181884766
Current x: (0.0035665491716326096, 0.139359392017469, -0.10967264523506165), u: [ 0.      17.77342], noise: [4.2728831  6.06923122]
Start training ...
Step: 47, L: 836.58203125, J: 14.32342529296875
Current x: (0.004746848873828036, 0.1584443006296321, -0.12695881633758546), u: [ 0.       18.488478], noise: [5.73641982 5.25193092]
Start training ...
Step: 47, L: 551.449462890625, J: 20.76178741455078
Current x: (0.0062348812998491345, 0.1793428707589444, -0.14620196433067323), u: [ 0.       19.164278], noise: [4.15384376 6.17690015]
Start training ...
Step: 47, L: 943.312744140625, J: 16.42876434326172
Current x: (0.008096143498360749, 0.2021843993700377, -0.16724551115036013), u: [ 0.       19.726465], noise: [5.33975632 3.62344822]


Step: 48, L: 865.8154296875, J: 18.829648971557617
Current x: (0.00043633528389344157, 0.05455837173237345, -0.03349499316215516), u: [ 0.       12.495201], noise: [3.81453913 3.39272991]
Start training ...
Step: 48, L: 753.8216552734375, J: 16.470264434814453
Current x: (0.000640069423390974, 0.06442257474984008, -0.04129556517601014), u: [ 0.       13.305633], noise: [4.09340121 2.86633262]
Start training ...
Step: 48, L: 649.3905029296875, J: 20.027774810791016
Current x: (0.0009097846361556701, 0.07527491972412675, -0.05030347638130189), u: [ 0.       14.107535], noise: [3.30788362 6.11231893]
Start training ...
Step: 48, L: 1174.917724609375, J: 13.15046501159668
Current x: (0.0012631630379828518, 0.087171073553602, -0.0605192439556122), u: [ 0.       15.059495], noise: [4.44215229 4.27165263]
Start training ...
Step: 48, L: 623.5560302734375, J: 17.861167907714844
Current x: (0.0017348442362126516, 0.10043602509162475, -0.07242620854377749), u: [ 0.       15.938863], noise: [4.93

Step: 49, L: 576.0072631835938, J: 16.808971405029297
Current x: (1.0448612718918937e-06, 0.006129764522820083, -0.001757951307296753), u: [0.79268247 5.0693827 ], noise: [4.51910227 4.44412535]
Start training ...
Step: 49, L: 1091.061279296875, J: 20.105712890625
Current x: (3.0744374075321014e-06, 0.008788954489797229, -0.003126405715942383), u: [0.6473709 5.652692 ], noise: [4.31819213 2.55270216]
Start training ...
Step: 49, L: 782.7857055664062, J: 16.169832229614258
Current x: (7.71022644928818e-06, 0.01194967142470412, -0.004915032434463501), u: [0.5498047 6.041183 ], noise: [4.66307208 4.9402239 ]
Start training ...
Step: 49, L: 344.9638671875, J: 11.8111572265625
Current x: (1.6463784186412117e-05, 0.015446477583861303, -0.007027642250061036), u: [0.36875892 6.7650905 ], noise: [4.90610096 3.39714699]
Start training ...
Step: 49, L: 598.1334228515625, J: 15.603960037231445
Current x: (3.3176853203509277e-05, 0.01958169262625975, -0.009717105102539064), u: [0.21424228 7.3916683

Step: 49, L: 724.6134643554688, J: 15.639585494995117
Current x: (0.4406887535606988, 1.3734747833968717, -1.3759014696598058), u: [ 0.       22.039045], noise: [6.00821371 4.54572904]
Start training ...
Step: 49, L: 1030.94873046875, J: 24.094722747802734
Current x: (0.48637870081594975, 1.4349267401466959, -1.4535504117488867), u: [ 0.       21.775398], noise: [3.42145085 5.37458937]
Start training ...
Step: 49, L: 835.8914794921875, J: 20.285511016845703
Current x: (0.5352662418557091, 1.496028903712518, -1.5332570099353795), u: [ 0.      21.45076], noise: [5.67710302 3.95421408]
Start training ...
Step: 49, L: 820.5589599609375, J: 22.187410354614258
Simulation ends in 52 steps
Episode 50 begins...
Current x: (0.0, 0.0, 0.0), u: [1.5640051 1.936556 ], noise: [4.13431229 6.22398667]
Start training ...
Step: 50, L: 871.0032958984375, J: 21.535425186157227
Current x: (0.0, 0.0, 0.0), u: [1.4386936 2.43251  ], noise: [4.18921342 4.62218367]
Start training ...
Step: 50, L: 708.788513183

Step: 50, L: 487.68304443359375, J: 18.49722671508789
Current x: (0.10512295667699925, 0.6349890724888281, -0.6402030786037446), u: [ 0.       22.782333], noise: [4.13676722 4.82908367]
Start training ...
Step: 50, L: 725.4317626953125, J: 21.640836715698242
Current x: (0.12152796992858292, 0.6825843366971427, -0.6912210543155671), u: [ 0.       22.804543], noise: [5.10754247 3.26788347]
Start training ...
Step: 50, L: 727.0993041992188, J: 17.909412384033203
Current x: (0.13982948737025688, 0.7317447242056031, -0.7445864951610566), u: [ 0.       22.814245], noise: [4.20244241 4.02771261]
Start training ...
Step: 50, L: 310.253662109375, J: 12.507831573486328
Current x: (0.16011866054257118, 0.7823264290674926, -0.8000484243869782), u: [ 0.      22.79701], noise: [4.31851833 4.87425498]
Start training ...
Step: 50, L: 871.520263671875, J: 19.869773864746094
Current x: (0.18251161265912907, 0.8342100402806842, -0.8577743051052094), u: [ 0.       22.762327], noise: [4.11723408 4.2156522 

Step: 51, L: 1008.3654174804688, J: 19.052021026611328
Current x: (0.013730236106559388, 0.2624099637988131, -0.22452803711891173), u: [ 0.       21.493607], noise: [5.59515657 4.6684329 ]
Start training ...
Step: 51, L: 478.83404541015625, J: 10.280048370361328
Current x: (0.017247445558852485, 0.29207325162777464, -0.25264090914726256), u: [ 0.       21.836353], noise: [2.84134311 3.47287418]
Start training ...
Step: 51, L: 757.671875, J: 11.383627891540527
Current x: (0.021471717103791102, 0.32385154611602285, -0.2828104694843292), u: [ 0.      22.06209], noise: [5.22854033 5.15799669]
Start training ...
Step: 51, L: 586.3260498046875, J: 12.828401565551758
Current x: (0.0263996456519913, 0.35737453550421644, -0.31522681822776794), u: [ 0.       22.314907], noise: [3.32220685 4.14107167]
Start training ...
Step: 51, L: 758.783447265625, J: 17.277240753173828
Current x: (0.03223307122550027, 0.39303248531983426, -0.3498423216342926), u: [ 0.       22.489807], noise: [4.69219187 3.972

Step: 52, L: 895.573974609375, J: 17.102933883666992
Current x: (0.0010968561019535745, 0.07541132924904018, -0.05670234913825988), u: [ 0.       15.475307], noise: [5.64133051 3.08791808]
Start training ...
Step: 52, L: 282.6537780761719, J: 17.31415367126465
Current x: (0.0015415742989208926, 0.08835423703473634, -0.06804324350357055), u: [ 0.      16.40612], noise: [4.62858188 4.03390032]
Start training ...
Step: 52, L: 839.748046875, J: 18.137786865234375
Current x: (0.0021234644791452654, 0.10273271034242645, -0.08067632727622985), u: [ 0.       17.275324], noise: [3.85852191 5.83238059]
Start training ...
Step: 52, L: 908.9830932617188, J: 23.877197265625
Current x: (0.0028757979755220917, 0.11863124300531289, -0.09489055500030516), u: [ 0.       18.121101], noise: [3.47539301 4.35338444]
Start training ...
Step: 52, L: 567.81396484375, J: 22.74064064025879
Current x: (0.0038454491633308644, 0.13623662737159015, -0.11102970089912413), u: [ 0.      18.80885], noise: [4.96520464 5.

Current x: (1.393744531954751e-05, 0.01085739626591428, -0.0059633958339691175), u: [0.3697975 6.2723455], noise: [4.57219382 4.6942492 ]
Start training ...
Step: 53, L: 780.8662109375, J: 16.33523178100586
Current x: (2.7835727505605617e-05, 0.014286783098560194, -0.008189999008178713), u: [0.15297483 6.990707  ], noise: [4.7511266  5.18075285]
Start training ...
Step: 53, L: 631.7614135742188, J: 20.691421508789062
Current x: (5.122087330972892e-05, 0.01832600029436831, -0.011019062519073489), u: [0.        7.8266644], noise: [4.32701007 5.70286854]
Start training ...
Step: 53, L: 848.5901489257812, J: 19.55744171142578
Current x: (8.859074596661526e-05, 0.023091716374750538, -0.014574861907958987), u: [0.       8.725691], noise: [3.92791589 6.15390603]
Start training ...
Step: 53, L: 694.6260986328125, J: 13.841099739074707
Current x: (0.00014563645846139888, 0.028661978498844127, -0.01905091361999512), u: [0.       9.690222], noise: [4.90879164 4.2214782 ]
Start training ...
Step: 

Step: 54, L: 768.6119384765625, J: 13.319650650024414
Current x: (0.0, 0.0011663316078186034, 4.725236892700197e-05), u: [1.3681893 3.091161 ], noise: [5.27441713 4.28131565]
Start training ...
Step: 54, L: 782.3872680664062, J: 16.796016693115234
Current x: (1.1079102958349307e-07, 0.002099480684659729, -0.0002343215465545654), u: [1.2223078 3.599555 ], noise: [4.00219299 3.61389984]
Start training ...
Step: 54, L: 1149.35986328125, J: 16.967092514038086
Current x: (1.5535747046683121e-07, 0.0034531380912350483, -0.0005888824939727783), u: [1.1273069 3.921304 ], noise: [5.05397493 6.3564994 ]
Start training ...
Step: 54, L: 1235.759765625, J: 16.665817260742188
Current x: (4.913720139062068e-07, 0.005069591049296406, -0.0011423388481140136), u: [0.88856333 4.7237186 ], noise: [5.54256993 6.18442679]
Start training ...
Step: 54, L: 354.4329528808594, J: 16.914831161499023
Current x: (1.796633231009198e-06, 0.00735095226841931, -0.0021054473400115964), u: [0.6211637 5.61915  ], noise: [

Step: 54, L: 984.8126220703125, J: 17.45583724975586
Current x: (0.3720064518789001, 1.2301716515022803, -1.2709543655872348), u: [ 0.       22.451973], noise: [5.64784663 4.63208003]
Start training ...
Step: 54, L: 664.2110595703125, J: 20.26268768310547
Current x: (0.4137154695625003, 1.2903229217463956, -1.3466728198051456), u: [ 0.       22.246052], noise: [4.2075327  5.28247548]
Start training ...
Step: 54, L: 425.47216796875, J: 14.682493209838867
Current x: (0.45855163784927483, 1.3504599915548987, -1.424534894704819), u: [ 0.       21.991976], noise: [4.1791346  4.57187348]
Start training ...
Step: 54, L: 735.247314453125, J: 19.561952590942383
Current x: (0.5064820380838444, 1.410321401275966, -1.5047290691375736), u: [ 0.       21.698568], noise: [4.66146082 4.10046673]
Start training ...
Step: 54, L: 1168.112548828125, J: 22.927209854125977
Simulation ends in 51 steps
Episode 55 begins...
Current x: (0.0, 0.0, 0.0), u: [1.7939217 1.8636364], noise: [3.95062884 4.0829679 ]
St

Step: 55, L: 562.695556640625, J: 18.413715362548828
Current x: (0.12468831032861541, 0.7454987990149768, -0.6965034609794618), u: [ 0.       23.248026], noise: [6.06563554 4.67184245]
Start training ...
Step: 55, L: 1134.880615234375, J: 19.486820220947266
Current x: (0.14388436214520625, 0.798744767508516, -0.7511036252498628), u: [ 0.       23.269049], noise: [3.78633189 3.35680973]
Start training ...
Step: 55, L: 590.830322265625, J: 17.22711181640625
Current x: (0.1652607181398097, 0.8536167301744504, -0.8078892127990724), u: [ 0.       23.254324], noise: [4.67781226 4.08753659]
Start training ...
Step: 55, L: 435.1127014160156, J: 14.30907154083252
Current x: (0.1887125414000422, 0.9097306297088523, -0.8669587530612948), u: [ 0.       23.231396], noise: [6.55046059 5.8649223 ]
Start training ...
Step: 55, L: 650.4765625, J: 13.691563606262207
Current x: (0.21447884320820534, 0.9670761709343626, -0.9282946980476382), u: [ 0.       23.202425], noise: [3.78468763 5.76699366]
Start t

Step: 56, L: 1174.034912109375, J: 21.235111236572266
Current x: (0.08004331333221625, 0.6341253327390172, -0.5700391829967497), u: [ 0.       23.547852], noise: [4.39904948 2.85658262]
Start training ...
Step: 56, L: 1243.8919677734375, J: 22.265262603759766
Current x: (0.09391813514920822, 0.6835236249905642, -0.6198717773437499), u: [ 0.       23.589853], noise: [5.84111154 7.12356752]
Start training ...
Step: 56, L: 777.8284301757812, J: 19.944795608520508
Current x: (0.10945531329599333, 0.7345342004266849, -0.671904910135269), u: [ 0.       23.636421], noise: [4.34050828 6.48538414]
Start training ...
Step: 56, L: 338.9020080566406, J: 16.16400718688965
Current x: (0.12711605673525067, 0.7875391427118049, -0.7264252737998961), u: [ 0.       23.653526], noise: [4.26792392 3.66515963]
Start training ...
Step: 56, L: 1397.709228515625, J: 19.405059814453125
Current x: (0.14692200333578412, 0.8422602343581362, -0.7835237670421599), u: [ 0.      23.65202], noise: [4.76487582 5.1922324

Step: 57, L: 715.3106689453125, J: 17.18309783935547
Current x: (0.02553674171571145, 0.3632987646968318, -0.3134500504016876), u: [ 0.       22.981632], noise: [3.91031754 4.2419054 ]
Start training ...
Step: 57, L: 1242.447021484375, J: 16.355573654174805
Current x: (0.03141897888309778, 0.40077533373152296, -0.3490918348789215), u: [ 0.       23.132313], noise: [5.15102679 4.648448  ]
Start training ...
Step: 57, L: 775.00244140625, J: 14.934446334838867
Current x: (0.03826120481526827, 0.4402325898134637, -0.38706494145393366), u: [ 0.       23.274897], noise: [4.6231298  5.14690346]
Start training ...
Step: 57, L: 1307.0888671875, J: 17.073965072631836
Current x: (0.046229844690999604, 0.4818033925758443, -0.4273010214328765), u: [ 0.       23.388494], noise: [4.42540095 4.43442066]
Start training ...
Step: 57, L: 853.8978271484375, J: 16.112998962402344
Current x: (0.055445838523620956, 0.5254532250383642, -0.4699169685840606), u: [ 0.       23.476255], noise: [5.61487959 4.83181

Current x: (0.00759069362530748, 0.21972726059294276, -0.17051340441703797), u: [ 0.       21.378277], noise: [5.34354793 4.72536743]
Start training ...
Step: 58, L: 974.372802734375, J: 13.874750137329102
Current x: (0.009898519910626173, 0.24704247800737503, -0.1951873255252838), u: [ 0.       21.778252], noise: [3.73660586 3.71741019]
Start training ...
Step: 58, L: 861.822265625, J: 17.49555206298828
Current x: (0.012739968333396516, 0.2764758091309617, -0.22193725619316101), u: [ 0.       22.073627], noise: [6.15670246 2.16374767]
Start training ...
Step: 58, L: 841.9630737304688, J: 17.850475311279297
Current x: (0.01614837751592891, 0.30779585906461865, -0.2508630924701691), u: [ 0.       22.355183], noise: [5.74959267 3.10411719]
Start training ...
Step: 58, L: 837.4527587890625, J: 17.273122787475586
Current x: (0.020225820446492297, 0.3410997687240978, -0.2815969960689545), u: [ 0.       22.601236], noise: [4.60224748 5.08739094]
Start training ...
Step: 58, L: 950.3230590820

Step: 59, L: 599.4210815429688, J: 17.151784896850586
Current x: (0.0022642568745021943, 0.10156467712111403, -0.08456086931228637), u: [ 0.       16.902859], noise: [5.90433333 3.64619844]
Start training ...
Step: 59, L: 519.2188720703125, J: 10.011773109436035
Current x: (0.0030561301462822825, 0.1170866261979466, -0.09934053497314453), u: [ 0.      17.77412], noise: [4.14924094 4.01324204]
Start training ...
Step: 59, L: 785.368408203125, J: 21.2379207611084
Current x: (0.00407142909319708, 0.13426346211925339, -0.11558467302322388), u: [ 0.       18.504198], noise: [4.78394734 4.44869842]
Start training ...
Step: 59, L: 1337.373779296875, J: 16.84221649169922
Current x: (0.005343960075475027, 0.15304017109343987, -0.1335926233291626), u: [ 0.       19.208864], noise: [3.59104919 4.77318881]
Start training ...
Step: 59, L: 1128.000244140625, J: 20.851545333862305
Current x: (0.006936373079312508, 0.17359105703996197, -0.15341746854782107), u: [ 0.       19.800547], noise: [5.5837055

Step: 60, L: 637.0999755859375, J: 20.2596378326416
Current x: (0.0004451250116457099, 0.05624506233702937, -0.038961783313751226), u: [ 0.       13.938083], noise: [5.33175186 4.35350877]
Start training ...
Step: 60, L: 540.0869140625, J: 14.582347869873047
Current x: (0.0006772647742917253, 0.06690488331993526, -0.048529741811752326), u: [ 0.       15.038069], noise: [3.80026716 3.4110876 ]
Start training ...
Step: 60, L: 874.0322875976562, J: 12.931706428527832
Current x: (0.0010014220147260464, 0.07894424593566529, -0.05939368429183961), u: [ 0.       15.937852], noise: [3.97450163 4.49680569]
Start training ...
Step: 60, L: 544.448974609375, J: 16.377521514892578
Current x: (0.0014335127572978634, 0.09222493144284281, -0.07172251563072206), u: [ 0.       16.875023], noise: [4.13453989 4.34492391]
Start training ...
Step: 60, L: 344.3038330078125, J: 18.523914337158203
Current x: (0.002010493276967138, 0.10696122897726223, -0.08569736251831056), u: [ 0.       17.751818], noise: [3.

Step: 61, L: 1693.885009765625, J: 22.05654525756836
Current x: (3.4471546256332993e-06, 0.006365745479307631, -0.0036463850975036625), u: [0.64333344 5.6750627 ], noise: [5.08736859 5.46902044]
Start training ...
Step: 61, L: 1123.828857421875, J: 20.119522094726562
Current x: (9.245327472101038e-06, 0.009026799744458948, -0.005487630796432496), u: [0.32028368 6.5693083 ], noise: [3.68537173 4.66867474]
Start training ...
Step: 61, L: 624.9389038085938, J: 21.305721282958984
Current x: (2.119668294432268e-05, 0.012394321238121571, -0.00787021460533142), u: [0.0627807 7.263829 ], noise: [4.0533045  4.54462195]
Start training ...
Step: 61, L: 591.7333984375, J: 20.213144302368164
Current x: (4.1513142186861535e-05, 0.016305183583289588, -0.010976031160354614), u: [0.       8.025145], noise: [3.33012403 5.25954891]
Start training ...
Step: 61, L: 1022.1436157226562, J: 18.039960861206055
Current x: (7.436242418327416e-05, 0.020827450280702403, -0.01485108437538147), u: [0.       8.853334

Step: 61, L: 900.087890625, J: 17.382610321044922
Current x: (0.3894028228354374, 1.200122974705061, -1.3105506173610688), u: [ 0.       22.934229], noise: [5.30312694 4.4480468 ]
Start training ...
Step: 61, L: 1096.255859375, J: 22.643178939819336
Current x: (0.4328881794512774, 1.2587916512328485, -1.3880342288494112), u: [ 0.       22.760386], noise: [4.60380533 4.71868228]
Start training ...
Step: 61, L: 658.9566650390625, J: 21.087570190429688
Current x: (0.47953201399185014, 1.3173203819701906, -1.467725755262375), u: [ 0.       22.549294], noise: [3.82151523 4.85103256]
Start training ...
Step: 61, L: 972.8975830078125, J: 20.20090103149414
Current x: (0.5293307029820123, 1.3754512071994045, -1.5497048078060152), u: [ 0.       22.297602], noise: [5.30624861 5.01806889]
Start training ...
Step: 61, L: 1496.9595947265625, J: 22.88351821899414
Simulation ends in 50 steps
Episode 62 begins...
Current x: (0.0, 0.0, 0.0), u: [1.9184597 2.1093986], noise: [3.55244195 5.40369248]
Start

Step: 62, L: 375.8751525878906, J: 19.071714401245117
Current x: (0.2738748859539633, 1.0495166621268526, -1.0837186230659486), u: [ 0.       23.731133], noise: [3.06202349 2.74412917]
Start training ...
Step: 62, L: 1521.1806640625, J: 19.40115737915039
Current x: (0.3082767373405883, 1.1085751921110267, -1.1538185181617737), u: [ 0.       23.638151], noise: [4.89479698 4.86861346]
Start training ...
Step: 62, L: 757.2357177734375, J: 17.65285873413086
Current x: (0.3452888115783863, 1.1680352012563275, -1.226259737110138), u: [ 0.       23.526274], noise: [5.12169061 5.7786665 ]
Start training ...
Step: 62, L: 835.9039916992188, J: 19.16756820678711
Current x: (0.3853548477179564, 1.227866970530872, -1.3010621529579163), u: [ 0.       23.385977], noise: [5.54052213 5.01961549]
Start training ...
Step: 62, L: 1297.004638671875, J: 17.940488815307617
Current x: (0.4286612288792795, 1.2878805355736878, -1.3782828938484193), u: [ 0.       23.221836], noise: [4.97552397 4.47189754]
Start 

Step: 63, L: 703.930908203125, J: 16.34654998779297
Current x: (0.1925051678335656, 0.8598515333353774, -0.9059751034259798), u: [ 0.       24.021688], noise: [6.29820724 3.94558284]
Start training ...
Step: 63, L: 1032.7779541015625, J: 20.087858200073242
Current x: (0.21911418908607141, 0.9153778985671349, -0.9700416977882387), u: [ 0.      24.00648], noise: [5.08510995 4.50926764]
Start training ...
Step: 63, L: 492.4794616699219, J: 18.478130340576172
Current x: (0.2484199964372224, 0.972037164450035, -1.036275198507309), u: [ 0.      23.96932], noise: [5.10011439 4.4265464 ]
Start training ...
Step: 63, L: 1032.5413818359375, J: 18.085472106933594
Current x: (0.28049756966048855, 1.0296147696165687, -1.1048517629623413), u: [ 0.       23.913816], noise: [6.20375991 3.23032794]
Start training ...
Step: 63, L: 992.85107421875, J: 18.79802131652832
Current x: (0.31545751437311953, 1.087917757183399, -1.1757579025745393), u: [ 0.       23.847525], noise: [5.32898186 4.56599603]
Start 

Step: 64, L: 950.6209716796875, J: 17.64572525024414
Current x: (0.07860948262877335, 0.5928552572966639, -0.5706697893142701), u: [ 0.       24.180645], noise: [4.25234913 1.84415052]
Start training ...
Step: 64, L: 1255.55859375, J: 21.99188232421875
Current x: (0.09234049930832634, 0.6408684276967233, -0.6214925958633424), u: [ 0.      24.21706], noise: [4.86725895 3.2883933 ]
Start training ...
Step: 64, L: 1129.616455078125, J: 19.367084503173828
Current x: (0.10770707473765231, 0.6904485390111554, -0.6744926469802858), u: [ 0.       24.248566], noise: [4.63517855 1.94971723]
Start training ...
Step: 64, L: 752.6065063476562, J: 17.098262786865234
Current x: (0.12495854902393168, 0.7416795850876126, -0.7297565176010133), u: [ 0.      24.26246], noise: [4.2638245  4.17193916]
Start training ...
Step: 64, L: 655.5987548828125, J: 18.26510238647461
Current x: (0.14413557651928763, 0.7943377982016686, -0.7871766984939577), u: [ 0.       24.263496], noise: [3.609656   4.07129731]
Start

Step: 65, L: 297.30487060546875, J: 17.415225982666016
Current x: (0.016254666472271466, 0.2656156433334781, -0.2510254898071289), u: [ 0.       22.474766], noise: [3.32800351 2.43916729]
Start training ...
Step: 65, L: 393.97369384765625, J: 19.40542984008789
Current x: (0.02033486760271008, 0.29607921241796914, -0.2819680160522461), u: [ 0.       22.705347], noise: [4.5547725  6.52312405]
Start training ...
Step: 65, L: 765.0507202148438, J: 19.76763153076172
Current x: (0.025116591183368734, 0.3282974597711169, -0.3150691352844238), u: [ 0.       22.966028], noise: [5.02125638 5.79947322]
Start training ...
Step: 65, L: 641.2255859375, J: 18.179054260253906
Current x: (0.03083832168273413, 0.362779620602101, -0.3506376243591309), u: [ 0.       23.183334], noise: [3.55624014 3.49285609]
Start training ...
Step: 65, L: 747.1162109375, J: 21.015987396240234
Current x: (0.037607043698928776, 0.399493141813075, -0.38858053779602053), u: [ 0.       23.328447], noise: [4.4972454  5.0514166

Step: 66, L: 462.8635559082031, J: 14.615917205810547
Current x: (0.006850023212812385, 0.18973072907393013, -0.15813186540603638), u: [ 0.      21.61101], noise: [4.71828844 7.2621765 ]
Start training ...
Step: 66, L: 977.6294555664062, J: 20.144102096557617
Current x: (0.008952447084457877, 0.2144352105346831, -0.18125281200408935), u: [ 0.      22.13111], noise: [6.29824779 3.43351913]
Start training ...
Step: 66, L: 1505.707275390625, J: 20.62394905090332
Current x: (0.011583848217468972, 0.241475928187123, -0.20678924837112428), u: [ 0.       22.537657], noise: [3.98503771 4.40508649]
Start training ...
Step: 66, L: 664.5154418945312, J: 21.380035400390625
Current x: (0.014789615954004331, 0.2706697377723499, -0.23425232276916505), u: [ 0.      22.85304], noise: [3.70691007 5.55388347]
Start training ...
Step: 66, L: 1182.9091796875, J: 14.41421127319336
Current x: (0.01863038859341209, 0.30190943431024636, -0.2640111678123474), u: [ 0.      23.12811], noise: [3.99370503 4.4288832

Step: 67, L: 964.4349365234375, J: 21.670948028564453
Current x: (0.0037592880289598883, 0.13244351173902527, -0.11158955845832826), u: [ 0.       19.989904], noise: [4.60324904 3.57758224]
Start training ...
Step: 67, L: 560.9385986328125, J: 18.6664981842041
Current x: (0.005004451200192812, 0.15211086494983195, -0.12986354842185976), u: [ 0.       20.634329], noise: [4.86853794 6.19914426]
Start training ...
Step: 67, L: 965.016845703125, J: 22.73904037475586
Current x: (0.0065633183534025375, 0.17359677037146698, -0.15003396210670475), u: [ 0.       21.275316], noise: [6.00030738 5.51487324]
Start training ...
Step: 67, L: 518.2890014648438, J: 16.039777755737305
Current x: (0.00853272287692594, 0.19724518245775952, -0.17240086932182316), u: [ 0.       21.833178], noise: [3.59283536 4.759774  ]
Start training ...
Step: 67, L: 669.6016845703125, J: 16.838008880615234
Current x: (0.010992252573969059, 0.22315480738952567, -0.19684676480293278), u: [ 0.       22.235176], noise: [5.509

Step: 68, L: 737.5550537109375, J: 16.925045013427734
Current x: (0.0002787871481475696, 0.03887595661916387, -0.029396838283538815), u: [ 0.       11.819734], noise: [4.57769407 6.22580088]
Start training ...
Step: 68, L: 653.5494995117188, J: 16.358644485473633
Current x: (0.0004223905589034543, 0.0463800100280473, -0.036945039510726925), u: [ 0.       13.157473], noise: [3.76576487 4.31561888]
Start training ...
Step: 68, L: 944.8013305664062, J: 21.128841400146484
Current x: (0.0006324895291153254, 0.0551644087942472, -0.04584002475738525), u: [ 0.       14.260951], noise: [4.54790717 4.67497255]
Start training ...
Step: 68, L: 1357.3857421875, J: 22.820058822631836
Current x: (0.0009210376851835676, 0.06509024377764938, -0.05610574259757995), u: [ 0.       15.444566], noise: [5.78582265 4.7421604 ]
Start training ...
Step: 68, L: 2255.101806640625, J: 21.656208038330078
Current x: (0.0013171980855711482, 0.07638099499565108, -0.06781026220321655), u: [ 0.      16.67033], noise: [3

Step: 69, L: 730.9715576171875, J: 13.09908390045166
Current x: (5.1655371636132566e-05, 0.020555935717243633, -0.010825477600097657), u: [0.       9.267162], noise: [5.0941898  4.43379254]
Start training ...
Step: 69, L: 1209.977294921875, J: 18.68912696838379
Current x: (8.762275478235476e-05, 0.026005141827367465, -0.015206450033187867), u: [ 0.       10.342836], noise: [4.4047073  5.89074075]
Start training ...
Step: 69, L: 550.826171875, J: 19.971813201904297
Current x: (0.00014393638061551483, 0.03235275212026269, -0.020448098945617676), u: [ 0.       11.573349], noise: [4.48117586 4.36459017]
Start training ...
Step: 69, L: 946.1082763671875, J: 19.551013946533203
Current x: (0.00023163230144329774, 0.039782952270232305, -0.026872634935379027), u: [ 0.      12.70064], noise: [4.18340572 6.29106594]
Start training ...
Step: 69, L: 1502.478515625, J: 16.175357818603516
Current x: (0.0003610785211831599, 0.04827363705485926, -0.03444284734725952), u: [ 0.       13.999702], noise: [

Step: 70, L: 1614.76513671875, J: 17.991344451904297
Current x: (0.0, 0.0003929846229553223, -0.00018859739303588868), u: [1.694595  2.8981168], noise: [5.7979412  4.33101808]
Start training ...
Step: 70, L: 1461.9349365234375, J: 20.033172607421875
Current x: (0.0, 0.0010088373489379882, -0.00040112156867980957), u: [1.453258  3.5525398], noise: [4.36070696 3.87581668]
Start training ...
Step: 70, L: 800.2627563476562, J: 20.82614517211914
Current x: (2.7764687749681283e-07, 0.0021158571591820305, -0.0005873056411743164), u: [1.277189 4.028192], noise: [4.92163162 3.74160436]
Start training ...
Step: 70, L: 791.2437744140625, J: 18.171512603759766
Current x: (1.0864717985752784e-06, 0.0035661089643330155, -0.0009349288940429688), u: [1.0659827 4.596748 ], noise: [4.25205947 4.65508219]
Start training ...
Step: 70, L: 1706.64501953125, J: 19.779651641845703
Current x: (2.71568139863057e-06, 0.005432222177135289, -0.0014396497249603273), u: [0.8170792 5.262537 ], noise: [6.00678739 2.95

Current x: (0.453951789028327, 1.3698281729147717, -1.4576520092964174), u: [ 0.       24.020254], noise: [4.88944033 2.79325881]
Start training ...
Step: 70, L: 1770.576171875, J: 24.31684684753418
Current x: (0.5030268200780738, 1.4313478673667386, -1.5418091657161714), u: [ 0.       23.866959], noise: [3.74328639 3.55562906]
Start training ...
Step: 70, L: 894.9420776367188, J: 22.227479934692383
Simulation ends in 50 steps
Episode 71 begins...
Current x: (0.0, 0.0, 0.0), u: [2.0059662 2.0921285], noise: [2.39919873 4.11400365]
Start training ...
Step: 71, L: 809.2044067382812, J: 20.53396987915039
Current x: (0.0, 0.0, 0.0), u: [1.9576625 2.218488 ], noise: [4.49602049 3.92623311]
Start training ...
Step: 71, L: 1203.041015625, J: 21.078479766845703
Current x: (0.0, 8.012966537475588e-05, -0.00018009672164916995), u: [1.8171477 2.589973 ], noise: [4.44078177 6.38189253]
Start training ...
Step: 71, L: 1118.1771240234375, J: 17.871206283569336
Current x: (0.0, 0.0004390997238159181,

Step: 71, L: 1080.6475830078125, J: 22.00788116455078
Current x: (0.37414435173709926, 1.2187130291593626, -1.2889880072593694), u: [ 0.      24.20214], noise: [4.32079752 4.28504066]
Start training ...
Step: 71, L: 573.3377075195312, J: 23.02398109436035
Current x: (0.41698273622582027, 1.2799790535407667, -1.3673407951831822), u: [ 0.       24.084656], noise: [4.57301608 4.12263517]
Start training ...
Step: 71, L: 929.828125, J: 18.256141662597656
Current x: (0.4629725047363633, 1.3411764451676789, -1.4481102214813237), u: [ 0.       23.942886], noise: [4.90836528 6.127652  ]
Start training ...
Step: 71, L: 696.9251098632812, J: 18.55347442626953
Current x: (0.5121726920813288, 1.4020551786114783, -1.5312430751323705), u: [ 0.       23.766493], noise: [3.88151837 3.83498018]
Start training ...
Step: 71, L: 492.65802001953125, J: 16.615550994873047
Simulation ends in 50 steps
Episode 72 begins...
Current x: (0.0, 0.0, 0.0), u: [2.0021734 2.0455961], noise: [4.86575576 3.60096387]
Star

Current x: (0.21033207724323635, 0.9717576233919627, -0.9586364215373993), u: [ 0.       24.343046], noise: [5.65404502 5.80108036]
Start training ...
Step: 72, L: 731.6072998046875, J: 19.349611282348633
Current x: (0.23914485731860238, 1.0309678194562273, -1.0253589725494385), u: [ 0.       24.312069], noise: [5.11814078 3.81657277]
Start training ...
Step: 72, L: 1112.404296875, J: 20.986499786376953
Current x: (0.27088739117612065, 1.0912541090913541, -1.0945305317401886), u: [ 0.       24.264515], noise: [5.86208182 2.90995754]
Start training ...
Step: 72, L: 1597.92626953125, J: 26.869773864746094
Current x: (0.3054721938743624, 1.1522842152105521, -1.1660031409263611), u: [ 0.       24.205439], noise: [5.15361857 4.88584308]
Start training ...
Step: 72, L: 1040.2713623046875, J: 15.642439842224121
Current x: (0.3429929986455278, 1.2138479276523138, -1.2396069892406465), u: [ 0.       24.123192], noise: [4.35623663 4.11216024]
Start training ...
Step: 72, L: 713.57177734375, J: 1

Step: 73, L: 1261.537841796875, J: 20.489166259765625
Current x: (0.05333411642709834, 0.5172456397458118, -0.4699943479061127), u: [ 0.       24.376076], noise: [3.6730761  3.95158515]
Start training ...
Step: 73, L: 762.7589111328125, J: 25.206748962402344
Current x: (0.06391996720735435, 0.5629584515786017, -0.5161864750862122), u: [ 0.       24.460978], noise: [3.28037629 5.82151514]
Start training ...
Step: 73, L: 905.0042724609375, J: 17.51028060913086
Current x: (0.07595507131804533, 0.6105433557654961, -0.564844060754776), u: [ 0.       24.533186], noise: [4.39419329 4.72070686]
Start training ...
Step: 73, L: 1464.930908203125, J: 15.775472640991211
Current x: (0.08964672821346414, 0.6600662487010819, -0.616201858139038), u: [ 0.       24.595406], noise: [5.65611477 4.55472674]
Start training ...
Step: 73, L: 671.79736328125, J: 17.178081512451172
Current x: (0.10513951369813225, 0.7114503015702931, -0.6700456254482269), u: [ 0.       24.650738], noise: [5.70869981 4.49122046]

Step: 74, L: 1003.4384765625, J: 14.489057540893555
Current x: (0.0237538121921562, 0.3344889235974293, -0.3109852919578553), u: [ 0.       24.262371], noise: [5.71948643 4.73750498]
Start training ...
Step: 74, L: 1253.639404296875, J: 20.5186767578125
Current x: (0.029497797870079993, 0.37108184683141177, -0.3470965736389161), u: [ 0.       24.442303], noise: [5.22971746 5.0798298 ]
Start training ...
Step: 74, L: 1298.235595703125, J: 20.80905532836914
Current x: (0.036304184916033364, 0.4099991661332544, -0.38553589429855356), u: [ 0.       24.588482], noise: [4.3751194 4.7225944]
Start training ...
Step: 74, L: 757.9439086914062, J: 20.99344825744629
Current x: (0.04429272205618118, 0.4512034254633268, -0.4264044565200807), u: [ 0.       24.700932], noise: [4.40332995 3.87820757]
Start training ...
Step: 74, L: 1421.45068359375, J: 21.658164978027344
Current x: (0.053548047938846065, 0.4945480376842579, -0.46976661462783825), u: [ 0.      24.78985], noise: [5.83327354 3.30949831]


Step: 75, L: 1077.210205078125, J: 19.509368896484375
Current x: (0.0072583074275619315, 0.19541922813179133, -0.16589370527267458), u: [ 0.       22.555046], noise: [4.529208   4.22527813]
Start training ...
Step: 75, L: 1046.238525390625, J: 17.270612716674805
Current x: (0.009500458804831746, 0.2216206032940938, -0.19064624938964847), u: [ 0.       22.953346], noise: [5.67664373 5.45586613]
Start training ...
Step: 75, L: 1511.230224609375, J: 18.44217872619629
Current x: (0.012259636486327781, 0.24992894734305626, -0.21762390527725223), u: [ 0.       23.322739], noise: [3.90336377 3.22412313]
Start training ...
Step: 75, L: 1305.5496826171875, J: 20.491413116455078
Current x: (0.01566471892481991, 0.2806031203069663, -0.24687481803894046), u: [ 0.       23.579437], noise: [3.89108647 4.86209698]
Start training ...
Step: 75, L: 765.1918334960938, J: 22.209171295166016
Current x: (0.01972725275083491, 0.31326949360117234, -0.2783900805473328), u: [ 0.      23.81249], noise: [4.170437

Step: 76, L: 927.27685546875, J: 18.414884567260742
Current x: (0.0020816171237855114, 0.12106656624923046, -0.09031288537979125), u: [ 0.       20.561186], noise: [4.19347792 5.12289496]
Start training ...
Step: 76, L: 727.5523681640625, J: 19.51230239868164
Current x: (0.0029507354138559616, 0.1401732168479256, -0.10724897212982176), u: [ 0.       21.294413], noise: [4.80625444 3.30487707]
Start training ...
Step: 76, L: 1076.0565185546875, J: 20.81168556213379
Current x: (0.004089319886836123, 0.16127444681037711, -0.12633411912918088), u: [ 0.       21.889223], noise: [4.2835786  4.58124909]
Start training ...
Step: 76, L: 691.793212890625, J: 20.14313507080078
Current x: (0.005542671562306172, 0.18431833577361148, -0.14739856948852537), u: [ 0.       22.412378], noise: [3.63347177 4.12723785]
Start training ...
Step: 76, L: 832.339599609375, J: 19.60432243347168
Current x: (0.007383519153005185, 0.20943212027308253, -0.17068170919418332), u: [ 0.       22.831682], noise: [3.444230

Step: 77, L: 1105.62109375, J: 15.98596477508545
Current x: (3.4672172858223614e-05, 0.03172086888464039, -0.013356392812728882), u: [ 0.       10.475829], noise: [3.83282816 4.56233238]
Start training ...
Step: 77, L: 771.4943237304688, J: 13.61467170715332
Current x: (7.286565739234098e-05, 0.038251311363602096, -0.018132269287109377), u: [ 0.       11.561017], noise: [5.62354085 4.21713805]
Start training ...
Step: 77, L: 1905.218505859375, J: 22.619468688964844
Current x: (0.0001362632265168028, 0.04568768440681691, -0.02402867913246155), u: [ 0.       12.820891], noise: [4.48666304 4.86033658]
Start training ...
Step: 77, L: 1083.5823974609375, J: 17.821826934814453
Current x: (0.00023846479938490552, 0.054282875163216476, -0.03094055042266846), u: [ 0.       14.071349], noise: [4.2407362  5.90827538]
Start training ...
Step: 77, L: 1043.303955078125, J: 18.869564056396484
Current x: (0.00039392776407384724, 0.06411321523398396, -0.03917187829017639), u: [ 0.       15.382623], noi

Step: 78, L: 698.3245849609375, J: 22.390552520751953
Current x: (-1.791887629283832e-05, 0.021241544332922312, -0.00353275489807129), u: [0.       9.310249], noise: [4.9278919  6.54504903]
Start training ...
Step: 78, L: 725.2634887695312, J: 12.441568374633789
Current x: (-1.8802186981928585e-05, 0.026866136120694474, -0.006041749334335328), u: [ 0.       10.692813], noise: [4.50191067 4.04178757]
Start training ...
Step: 78, L: 2201.03564453125, J: 18.32132339477539
Current x: (-1.2343321396196034e-05, 0.03358803391675451, -0.009643484401702884), u: [ 0.       11.826094], noise: [5.48751275 3.98186524]
Start training ...
Step: 78, L: 829.220458984375, J: 20.225706100463867
Current x: (5.737691374704089e-06, 0.04125254772670461, -0.014268488359451298), u: [ 0.       13.089525], noise: [2.9116198  5.35869139]
Start training ...
Step: 78, L: 1187.91650390625, J: 21.79918670654297
Current x: (4.435464027224667e-05, 0.0500655096360381, -0.01992553691864014), u: [ 0.       14.298821], noi

Current x: (1.485459990732535e-07, 0.0017457416112529396, -0.0012443231105804444), u: [1.3716688 4.075323 ], noise: [5.43476685 6.0400616 ]
Start training ...
Step: 79, L: 849.7581176757812, J: 25.590389251708984
Current x: (1.0769318229021183e-06, 0.003297415939337546, -0.0021124408245086672), u: [0.963064  5.0869503], noise: [5.61555457 4.0507622 ]
Start training ...
Step: 79, L: 1512.064208984375, J: 23.16565704345703
Current x: (4.110938471644383e-06, 0.005560271116809484, -0.003311453485488892), u: [0.6264033 5.9173036], noise: [3.24317114 6.62429375]
Start training ...
Step: 79, L: 980.6666259765625, J: 19.704700469970703
Current x: (1.0464924674597184e-05, 0.008413755935843304, -0.004766375541687013), u: [0.23925251 6.9128046 ], noise: [5.41570695 3.47299185]
Start training ...
Step: 79, L: 1163.9591064453125, J: 21.445270538330078
Current x: (2.2253383872851934e-05, 0.011927348852813897, -0.007088499879837038), u: [0.        7.7878323], noise: [4.10439086 4.95839274]
Start trai

Step: 80, L: 343.91705322265625, J: 22.139625549316406
Current x: (0.0, 0.0, 0.0), u: [2.0194187 2.4121368], noise: [2.92524011 4.00110975]
Start training ...
Step: 80, L: 894.63916015625, J: 21.334367752075195
Current x: (0.0, 0.0004575540008544922, 0.00023199124336242678), u: [1.9256899 2.6537259], noise: [4.55193145 5.05983771]
Start training ...
Step: 80, L: 967.4531860351562, J: 20.847049713134766
Current x: (0.0, 0.0010698985404968262, 0.0003171236991882325), u: [1.6694906 3.2812254], noise: [3.6587246  5.75187799]
Start training ...
Step: 80, L: 1147.2315673828125, J: 17.79326629638672
Current x: (-3.292230622838561e-07, 0.0021203615417065857, 0.0002786619663238526), u: [1.393716  3.9727662], noise: [5.78213835 3.54088002]
Start training ...
Step: 80, L: 1720.453369140625, J: 24.724082946777344
Current x: (-1.1138775645269898e-06, 0.0036259563295279766, -0.00013028855323791495), u: [1.1102837 4.6522493], noise: [4.6976954 5.0072869]
Start training ...
Step: 80, L: 1516.489501953

Step: 80, L: 741.76318359375, J: 21.6890811920166
Current x: (0.2623834921869999, 1.0976448095871048, -1.0984369644165037), u: [ 0.       25.172676], noise: [4.29473987 4.81287151]
Start training ...
Step: 80, L: 1060.110595703125, J: 25.038455963134766
Current x: (0.2963459359824954, 1.159788100581322, -1.1725581068992612), u: [ 0.       25.137096], noise: [5.691328   2.98139801]
Start training ...
Step: 80, L: 1490.941650390625, J: 24.579330444335938
Current x: (0.3333610301787022, 1.222510105479448, -1.2492483301162718), u: [ 0.       25.094503], noise: [4.60526095 4.2748851 ]
Start training ...
Step: 80, L: 1869.636962890625, J: 24.46625518798828
Current x: (0.37349252996301546, 1.285562238480749, -1.328181269931793), u: [ 0.       25.034359], noise: [5.58214427 4.46504154]
Start training ...
Step: 80, L: 884.84814453125, J: 25.246427536010742
Current x: (0.41684736536981054, 1.3487070912812817, -1.4095906225204464), u: [ 0.       24.961027], noise: [5.74539227 3.99259473]
Start tr

Step: 81, L: 1014.4255981445312, J: 24.655860900878906
Current x: (0.10739272857232059, 0.7239082881140783, -0.6915916818618775), u: [ 0.       25.212997], noise: [3.87247482 5.10843157]
Start training ...
Step: 81, L: 636.0830078125, J: 23.008493423461914
Current x: (0.12527504473006043, 0.7779030937223, -0.7495260425567627), u: [ 0.       25.237143], noise: [5.0440682  3.63931062]
Start training ...
Step: 81, L: 762.6109619140625, J: 25.716896057128906
Current x: (0.1453381248843896, 0.8335506228730285, -0.810105298614502), u: [ 0.      25.25494], noise: [5.48450014 6.87938871]
Start training ...
Step: 81, L: 1620.778564453125, J: 24.056373596191406
Current x: (0.16771218262396168, 0.890700174355925, -0.8730677932739258), u: [ 0.      25.26724], noise: [3.19433375 3.63787668]
Start training ...
Step: 81, L: 1022.921630859375, J: 18.612171173095703
Current x: (0.1928111969237413, 0.9494622512021749, -0.9386952705383301), u: [ 0.       25.261566], noise: [5.28382184 3.1749591 ]
Start t

Step: 82, L: 1558.126953125, J: 24.908838272094727
Current x: (0.018346806707495122, 0.30684399390496686, -0.27858535919189453), u: [ 0.       24.481737], noise: [5.95871685 5.62173871]
Start training ...
Step: 82, L: 217.85086059570312, J: 16.183696746826172
Current x: (0.023164322827651716, 0.34225346192113126, -0.31356857080459594), u: [ 0.       24.685825], noise: [6.54372199 3.96494601]
Start training ...
Step: 82, L: 1180.009033203125, J: 17.821685791015625
Current x: (0.02897353414191184, 0.3801491127773063, -0.3509662582397461), u: [ 0.       24.850208], noise: [5.75710749 4.26362515]
Start training ...
Step: 82, L: 1641.327392578125, J: 25.93756866455078
Current x: (0.03586833783842999, 0.4204116009615896, -0.3905746506690979), u: [ 0.      24.98331], noise: [5.71162328 3.10977435]
Start training ...
Step: 82, L: 2386.26513671875, J: 27.47949981689453
Current x: (0.04396202305691657, 0.46296761330544106, -0.43251871585845947), u: [ 0.       25.089941], noise: [4.2347942  4.196

Step: 83, L: 1189.2559814453125, J: 18.681324005126953
Current x: (0.007135061012652776, 0.2157181317248049, -0.17851541166305543), u: [ 0.       23.549973], noise: [4.00931936 3.77461804]
Start training ...
Step: 83, L: 873.5130615234375, J: 18.57431411743164
Current x: (0.0095098686115511, 0.24401446324822684, -0.20611505041122438), u: [ 0.       23.860273], noise: [5.55492017 3.97266925]
Start training ...
Step: 83, L: 1102.3768310546875, J: 25.574783325195312
Current x: (0.01244106860794909, 0.2744133911543841, -0.23604621629714967), u: [ 0.       24.140543], noise: [4.96433546 5.6135107 ]
Start training ...
Step: 83, L: 1332.472412109375, J: 21.865800857543945
Current x: (0.016055580389802898, 0.30709943458285976, -0.2682051844596863), u: [ 0.      24.38113], noise: [4.34068969 3.83938135]
Start training ...
Step: 83, L: 1083.8392333984375, J: 20.658931732177734
Current x: (0.020482017489066143, 0.3421800433946451, -0.3028431244850159), u: [ 0.      24.56095], noise: [5.54037804 5

Current x: (0.000814435424271039, 0.11138434922184189, -0.07147984075546265), u: [ 0.       20.436329], noise: [4.80124557 4.43098294]
Start training ...
Step: 84, L: 1129.064453125, J: 18.696449279785156
Current x: (0.0013682787801632423, 0.12952250402600818, -0.08702636055946351), u: [ 0.      21.24348], noise: [3.85315999 4.60452187]
Start training ...
Step: 84, L: 1347.0523681640625, J: 23.166025161743164
Current x: (0.0021340119586309167, 0.14963893830007136, -0.10457948703765871), u: [ 0.      21.91216], noise: [5.14417235 5.90249071]
Start training ...
Step: 84, L: 1101.8511962890625, J: 20.958696365356445
Current x: (0.0031578974153389037, 0.17173324887717317, -0.1243320977687836), u: [ 0.       22.536789], noise: [3.27530032 4.16052142]
Start training ...
Step: 84, L: 1978.4718017578125, J: 26.145082473754883
Current x: (0.00452583661815243, 0.19612443497332424, -0.14635175638198855), u: [ 0.       22.985706], noise: [3.73741298 4.1841742 ]
Start training ...
Step: 84, L: 1069

Current x: (4.674670084034994e-05, 0.033305573849299504, -0.013344487905502321), u: [ 0.        12.1043825], noise: [4.17486868 5.00358597]
Start training ...
Step: 85, L: 1713.010498046875, J: 21.693740844726562
Current x: (9.093161107541778e-05, 0.04073189756817819, -0.018281738567352296), u: [ 0.      13.46352], noise: [3.75471125 4.65747207]
Start training ...
Step: 85, L: 325.92315673828125, J: 17.437347412109375
Current x: (0.00016351653438200913, 0.04930531548383561, -0.024512299299240113), u: [ 0.       14.770951], noise: [3.10800216 4.07811508]
Start training ...
Step: 85, L: 358.367431640625, J: 21.69523811340332
Current x: (0.0002760918197983029, 0.05908493822380345, -0.032179488182067874), u: [ 0.       15.965893], noise: [5.55793262 4.54928865]
Start training ...
Step: 85, L: 955.9896850585938, J: 21.801822662353516
Current x: (0.00044248353654831985, 0.07007860809746969, -0.04142078347206116), u: [ 0.       17.303476], noise: [3.63879713 4.57145461]
Start training ...
Ste

Step: 86, L: 1700.9912109375, J: 25.90313148498535
Current x: (-5.033266298011527e-05, 0.014887001257700344, 0.0005577831745147703), u: [0.        7.7245717], noise: [4.27318005 3.97133213]
Start training ...
Step: 86, L: 1223.5311279296875, J: 20.665790557861328
Current x: (-7.065511907314553e-05, 0.01902475267975911, -0.0012576582431793218), u: [0.       8.663916], noise: [3.61617204 5.00780653]
Start training ...
Step: 86, L: 830.8222045898438, J: 17.514301300048828
Current x: (-9.186830374481357e-05, 0.023778412232002232, -0.003815372037887574), u: [0.       9.779424], noise: [5.54802996 5.25022773]
Start training ...
Step: 86, L: 1382.840576171875, J: 23.352170944213867
Current x: (-0.00011090726259876379, 0.029279859937292336, -0.0073786408901214615), u: [ 0.       11.207398], noise: [4.78349683 4.15727162]
Start training ...
Step: 86, L: 1129.841064453125, J: 19.727466583251953
Current x: (-0.00012209508906134566, 0.03585806091458049, -0.011890071916580203), u: [ 0.       12.513

Step: 87, L: 1599.35009765625, J: 24.70299530029297
Current x: (0.0, 7.191690826416006e-05, 6.350603103637696e-05), u: [1.9725109 2.6694736], noise: [3.38473207 3.50228476]
Start training ...
Step: 87, L: 544.2099609375, J: 20.708953857421875
Current x: (0.0, 0.0006741548843383788, 9.5491361618042e-05), u: [1.855191 2.92803 ], noise: [4.73262957 4.08226417]
Start training ...
Step: 87, L: 1968.0750732421875, J: 23.91042137145996
Current x: (-7.321611098172895e-08, 0.0014482929816839565, 4.602518081665041e-05), u: [1.6034697 3.4632137], noise: [6.38208202 4.42590249]
Start training ...
Step: 87, L: 709.4547119140625, J: 20.85521125793457
Current x: (-2.762824640264064e-07, 0.0026012424741969336, -4.568834304809567e-05), u: [1.204849 4.296791], noise: [3.75237915 2.7976908 ]
Start training ...
Step: 87, L: 577.6400146484375, J: 24.677587509155273
Current x: (-5.524122641141778e-07, 0.0043606587771623205, -0.00012775831222534175), u: [1.0460311 4.6542377], noise: [4.42622559 5.57461815]
S

Step: 87, L: 313.4236755371094, J: 14.555135726928711
Current x: (0.30056696333481414, 1.0897725834802054, -1.1779780189990998), u: [ 0.       25.016266], noise: [4.21647578 3.33117291]
Start training ...
Step: 87, L: 1235.0072021484375, J: 17.331615447998047
Current x: (0.33803605777047807, 1.1493409689982932, -1.2536149249076844), u: [ 0.       24.976257], noise: [3.89495623 4.11039714]
Start training ...
Step: 87, L: 1273.8892822265625, J: 21.484743118286133
Current x: (0.3785135170891604, 1.2091748802868576, -1.3316649270534517), u: [ 0.      24.92217], noise: [5.78043013 4.40672944]
Start training ...
Step: 87, L: 718.3197021484375, J: 21.79052734375
Current x: (0.4221246194120271, 1.2690564543344676, -1.412234099006653), u: [ 0.       24.856344], noise: [4.32653786 4.9369389 ]
Start training ...
Step: 87, L: 831.6162109375, J: 19.69644546508789
Current x: (0.4691467478202475, 1.328788623854871, -1.4951581179142002), u: [ 0.      24.77277], noise: [4.08851084 5.1685694 ]
Start tra

Current x: (0.08161229307527448, 0.6069531603131644, -0.5901268352031708), u: [ 0.       25.270767], noise: [4.53137835 4.54227041]
Start training ...
Step: 88, L: 1928.077880859375, J: 24.264259338378906
Current x: (0.09637038267171846, 0.6574893824789408, -0.6435205512046814), u: [ 0.       25.320791], noise: [3.05997919 5.75928412]
Start training ...
Step: 88, L: 2155.71875, J: 23.63627815246582
Current x: (0.11303962374646868, 0.7098981796324444, -0.6994424332141876), u: [ 0.       25.356737], noise: [4.77561474 4.9375668 ]
Start training ...
Step: 88, L: 685.4613037109375, J: 21.795745849609375
Current x: (0.13175732131864026, 0.7640571414883388, -0.7581663249969482), u: [ 0.       25.386982], noise: [5.1124021  4.73935999]
Start training ...
Step: 88, L: 1090.935302734375, J: 23.38677978515625
Current x: (0.15273278922962533, 0.8199186580194703, -0.8194420855998993), u: [ 0.       25.409363], noise: [5.61238589 5.91248696]
Start training ...
Step: 88, L: 1075.221923828125, J: 23.

Step: 89, L: 1510.141845703125, J: 21.127056121826172
Current x: (0.04390700243314853, 0.4543719137821259, -0.43341062316894535), u: [ 0.       24.951422], noise: [2.71965803 2.80824538]
Start training ...
Step: 89, L: 1560.375732421875, J: 22.476131439208984
Current x: (0.053118109159289384, 0.49803709968268717, -0.47822406415939334), u: [ 0.       25.024033], noise: [5.04572087 4.09026305]
Start training ...
Step: 89, L: 1599.12744140625, J: 22.638967514038086
Current x: (0.06360925153284268, 0.543487402150583, -0.5255415060043336), u: [ 0.       25.098625], noise: [4.41043665 4.37655304]
Start training ...
Step: 89, L: 1440.7633056640625, J: 19.230926513671875
Current x: (0.07567244891545435, 0.5909894776029364, -0.5752658053398133), u: [ 0.       25.156944], noise: [5.43203745 4.87280849]
Start training ...
Step: 89, L: 1029.2589111328125, J: 19.613147735595703
Current x: (0.08943562481962763, 0.6404418361127509, -0.6274965787887574), u: [ 0.       25.208021], noise: [4.42894306 1.

Step: 90, L: 800.3207397460938, J: 21.28465461730957
Current x: (0.011435957925597044, 0.23713973963089735, -0.2174236685752869), u: [ 0.       23.489313], noise: [3.01542412 4.92598149]
Start training ...
Step: 90, L: 881.3578491210938, J: 15.760037422180176
Current x: (0.01471447213562385, 0.2664640481849763, -0.24679117488861085), u: [ 0.       23.778578], noise: [3.65304211 4.2031694 ]
Start training ...
Step: 90, L: 905.3984375, J: 20.448537826538086
Current x: (0.01867099305997208, 0.2978764295795103, -0.27869866819381717), u: [ 0.      24.02142], noise: [3.99287068 6.17595859]
Start training ...
Step: 90, L: 1736.52978515625, J: 25.650188446044922
Current x: (0.023400331685894515, 0.33137544055449764, -0.3130390319824219), u: [ 0.       24.245003], noise: [5.8497518  4.66369729]
Start training ...
Step: 90, L: 618.9174194335938, J: 19.14363670349121
Current x: (0.029070260384734446, 0.3671805509403089, -0.3499998465538025), u: [ 0.      24.43612], noise: [3.98133571 6.00279422]


Step: 91, L: 683.8496704101562, J: 14.99325180053711
Current x: (0.0005000377098151767, 0.0861281010636592, -0.052477987194061285), u: [ 0.       18.820078], noise: [4.00054559 3.65767106]
Start training ...
Step: 91, L: 1286.421875, J: 25.683996200561523
Current x: (0.0008364324616093176, 0.10083478595358593, -0.06511103115081787), u: [ 0.     19.7757], noise: [6.85277202 5.81531541]
Start training ...
Step: 91, L: 1034.650390625, J: 21.170372009277344
Current x: (0.0013117162035782045, 0.11720465513686418, -0.07959179544448852), u: [ 0.       20.796896], noise: [3.90542773 5.27266025]
Start training ...
Step: 91, L: 1088.9215087890625, J: 22.58069610595703
Current x: (0.001998095557121201, 0.1358310282268466, -0.09594638404846191), u: [ 0.       21.555733], noise: [2.72391375 3.61125548]
Start training ...
Step: 91, L: 1612.8818359375, J: 19.13683319091797
Current x: (0.0029227993727168654, 0.15646441029244834, -0.11451738538742065), u: [ 0.      22.12649], noise: [4.44107418 5.15859

Step: 92, L: 1663.6868896484375, J: 20.262981414794922
Current x: (-3.5475966925715936e-05, 0.01402695735085769, 0.0017375914096832267), u: [0.        7.6216702], noise: [4.29141666 4.55806378]
Start training ...
Step: 92, L: 2275.5810546875, J: 19.784683227539062
Current x: (-5.2357308707059095e-05, 0.018108848866655782, 0.0007322983741760245), u: [0.        8.6456995], noise: [4.17546249 4.89724918]
Start training ...
Step: 92, L: 1569.6400146484375, J: 22.408897399902344
Current x: (-7.210066215789643e-05, 0.02285685303114267, -0.0010618264198303232), u: [0.       9.813821], noise: [4.24620857 5.15837946]
Start training ...
Step: 92, L: 1642.0853271484375, J: 25.183673858642578
Current x: (-9.314153181238925e-05, 0.028395697769739374, -0.0037926998138427745), u: [ 0.       11.119479], noise: [4.74993711 4.30946052]
Start training ...
Step: 92, L: 2125.11767578125, J: 18.977294921875
Current x: (-0.00011214174045212629, 0.03487538228338156, -0.007596172428131105), u: [ 0.       12.43

Step: 93, L: 1971.93701171875, J: 21.84345245361328
Current x: (1.9826796780516863e-07, 0.007026596376067187, -0.0016822266101837158), u: [0.54769933 5.811336  ], noise: [4.23517611 4.43244937]
Start training ...
Step: 93, L: 1047.10009765625, J: 20.552587509155273
Current x: (1.5645748872160502e-06, 0.00976405383356722, -0.0029481935977935794), u: [0.1325698 6.642587 ], noise: [5.21675456 5.7272248 ]
Start training ...
Step: 93, L: 1333.8641357421875, J: 25.33176612854004
Current x: (5.458705500431056e-06, 0.013023175256799555, -0.004760251617431641), u: [0.        7.8384933], noise: [5.67731704 5.45057678]
Start training ...
Step: 93, L: 720.7853393554688, J: 18.711103439331055
Current x: (1.4576772666427824e-05, 0.017073202507880546, -0.007274358415603638), u: [0.       9.161756], noise: [3.59303765 4.33151292]
Start training ...
Step: 93, L: 1536.1903076171875, J: 23.41335678100586
Current x: (3.272328307671483e-05, 0.022038846949594574, -0.010549640512466431), u: [ 0.       10.251

Step: 94, L: 1573.0712890625, J: 23.016536712646484
Current x: (-1.0848554157786627e-06, 0.0041320072555670285, 0.000945009231567383), u: [0.86080974 5.064039  ], noise: [5.53220084 4.86628746]
Start training ...
Step: 94, L: 1463.9876708984375, J: 24.60034942626953
Current x: (-3.0715647726578368e-06, 0.006445206422392224, 0.0009531934261322024), u: [0.3385838 6.0481606], noise: [3.74508638 5.17253949]
Start training ...
Step: 94, L: 1576.7215576171875, J: 22.68561553955078
Current x: (-6.6008443678802986e-06, 0.009409738615838971, 0.0006076460361480716), u: [0.        6.9614086], noise: [5.31278671 3.90192072]
Start training ...
Step: 94, L: 1812.646240234375, J: 16.938968658447266
Current x: (-1.1588926313599467e-05, 0.012923707202037955, -0.00045160436630248984), u: [0.        7.9336967], noise: [5.2483162  4.44024059]
Start training ...
Step: 94, L: 1551.114501953125, J: 19.718002319335938
Current x: (-1.755994341702055e-05, 0.017074286993199676, -0.0020659090042114254), u: [0.   

Step: 95, L: 996.21875, J: 22.611034393310547
Current x: (0.0, 0.0012961138076782227, 0.0004635436534881592), u: [1.9483733 3.0067277], noise: [3.1924617  4.42120417]
Start training ...
Step: 95, L: 1344.657958984375, J: 22.293399810791016
Current x: (-1.8494301381828474e-07, 0.0022564298138754144, 0.0007696191310882568), u: [1.7143699 3.450389 ], noise: [5.85888898 5.19171379]
Start training ...
Step: 95, L: 2068.2275390625, J: 20.717905044555664
Current x: (-9.525032054509807e-07, 0.0034926223444316667, 0.0008469849109649658), u: [1.1993617 4.3706846], noise: [4.36002886 4.00652874]
Start training ...
Step: 95, L: 1659.789794921875, J: 21.552236557006836
Current x: (-2.968028450603705e-06, 0.005369350458906954, 0.0008174663543701172), u: [0.8540366 5.014148 ], noise: [4.01960307 2.87596839]
Start training ...
Step: 95, L: 1400.1259765625, J: 24.2442626953125
Current x: (-6.163962845990448e-06, 0.007658738428102099, 0.0005061655044555663), u: [0.5996357 5.500088 ], noise: [4.40961257 

Current x: (0.4780155352810846, 1.4022709963541744, -1.5258997507572176), u: [ 0.      24.85712], noise: [4.39893746 3.63139695]
Start training ...
Step: 95, L: 1977.1318359375, J: 22.429550170898438
Simulation ends in 50 steps
Episode 96 begins...
Current x: (0.0, 0.0, 0.0), u: [2.722841 1.750293], noise: [3.35337266 4.6074527 ]
Start training ...
Step: 96, L: 910.0238037109375, J: 25.461769104003906
Current x: (0.0, 0.0, 0.0), u: [2.5143218 2.1238954], noise: [5.15795989 5.70761147]
Start training ...
Step: 96, L: 1917.8087158203125, J: 23.905195236206055
Current x: (0.0, 0.00026239599609375, -2.8153181076049808e-05), u: [2.059411  2.9257338], noise: [4.44233785 4.92070242]
Start training ...
Step: 96, L: 1295.9814453125, J: 24.750497817993164
Current x: (0.0, 0.0010941707916259764, -7.222886085510255e-05), u: [1.6861687 3.5945177], noise: [4.7618843  5.39239558]
Start training ...
Step: 96, L: 1356.7022705078125, J: 16.27056121826172
Current x: (4.0394703869681364e-08, 0.00237976404

Step: 96, L: 973.6966552734375, J: 23.23969841003418
Current x: (0.32892617172481964, 1.1957572590845729, -1.2367509477138516), u: [ 0.       25.090714], noise: [3.55343244 5.54131281]
Start training ...
Step: 96, L: 2105.306640625, J: 20.775875091552734
Current x: (0.36877097582370416, 1.2584624213709108, -1.3147197649955746), u: [ 0.       25.043833], noise: [4.05649703 5.18065922]
Start training ...
Step: 96, L: 1337.9423828125, J: 24.13622283935547
Current x: (0.4118453613665865, 1.3213074136446394, -1.395396441698074), u: [ 0.       24.989563], noise: [3.52737313 5.07955979]
Start training ...
Step: 96, L: 1256.822021484375, J: 26.16556167602539
Current x: (0.4582360593257626, 1.3840396987881294, -1.4786899177551267), u: [ 0.       24.922968], noise: [4.1051476  5.21557501]
Start training ...
Step: 96, L: 1500.947509765625, J: 20.104122161865234
Current x: (0.5079348592905594, 1.4463772491841058, -1.5646375688076017), u: [ 0.      24.84418], noise: [4.00020683 5.06756607]
Start tr

Step: 97, L: 1760.337646484375, J: 24.631000518798828
Current x: (0.12828566001861277, 0.7269929477599674, -0.7441931769371033), u: [ 0.       24.913496], noise: [4.37039399 4.86375195]
Start training ...
Step: 97, L: 2031.4150390625, J: 20.815696716308594
Current x: (0.14895676095396643, 0.7807494287131285, -0.803112928724289), u: [ 0.       24.935846], noise: [4.65078937 4.50170865]
Start training ...
Step: 97, L: 1232.21435546875, J: 23.723175048828125
Current x: (0.17194094989294598, 0.8360369287395323, -0.8645733659744264), u: [ 0.       24.948465], noise: [3.11222251 3.34180044]
Start training ...
Step: 97, L: 1412.1051025390625, J: 27.568927764892578
Current x: (0.19737786820723902, 0.8927107629141743, -0.9285124798297884), u: [ 0.       24.946964], noise: [4.84281624 3.16060877]
Start training ...
Step: 97, L: 1738.431640625, J: 27.36739158630371
Current x: (0.22520394583909964, 0.9504415079617821, -0.9949693980216981), u: [ 0.       24.942097], noise: [5.18187058 3.63002358]
S

Step: 98, L: 1841.7799072265625, J: 24.577129364013672
Current x: (0.03693183264239172, 0.421293900804604, -0.3965124744892121), u: [ 0.      24.44547], noise: [4.31395609 4.4409732 ]
Start training ...
Step: 98, L: 1202.0849609375, J: 24.215614318847656
Current x: (0.044982963482947606, 0.462259285018104, -0.43852669320106513), u: [ 0.       24.561348], noise: [4.73004094 4.40789605]
Start training ...
Step: 98, L: 1523.9576416015625, J: 26.03272247314453
Current x: (0.054316306294985045, 0.5053061190563497, -0.4829981606006623), u: [ 0.       24.661062], noise: [2.48624518 6.63628303]
Start training ...
Step: 98, L: 1642.098876953125, J: 23.685611724853516
Current x: (0.06508054099752003, 0.5504230128365771, -0.5298935482501984), u: [ 0.       24.739017], noise: [5.48876382 4.09829764]
Start training ...
Step: 98, L: 1411.9716796875, J: 22.978313446044922
Current x: (0.07741380886187499, 0.5975508033350698, -0.5796700459003449), u: [ 0.       24.809212], noise: [4.6240171  4.01353756

Step: 99, L: 1151.941162109375, J: 19.593036651611328
Current x: (0.03444786746384762, 0.40348570406643364, -0.3769808719158173), u: [ 0.       24.550562], noise: [6.64473091 5.207066  ]
Start training ...
Step: 99, L: 1762.70751953125, J: 23.385753631591797
Current x: (0.04218134492501779, 0.44460925975653076, -0.4179456080436707), u: [ 0.       24.693596], noise: [5.43476511 3.30778769]
Start training ...
Step: 99, L: 1163.1878662109375, J: 21.376760482788086
Current x: (0.05125484791838198, 0.4881364349211207, -0.46122163376808173), u: [ 0.       24.802288], noise: [3.50042885 4.26234166]
Start training ...
Step: 99, L: 854.4678955078125, J: 24.213258743286133
Current x: (0.061685469894576156, 0.5337384224270176, -0.5067543212890626), u: [ 0.       24.889011], noise: [4.43381454 3.88343253]
Start training ...
Step: 99, L: 1617.581787109375, J: 23.721548080444336
Current x: (0.07356537517781303, 0.5812756419202119, -0.5548434289455415), u: [ 0.       24.965069], noise: [2.71961011 6.

Step: 100, L: 1183.1309814453125, J: 20.78302764892578
Current x: (0.0055066886282412915, 0.16615533959035617, -0.1482482174396515), u: [ 0.       22.396225], noise: [4.09045316 5.45409576]
Start training ...
Step: 100, L: 1689.7882080078125, J: 24.547016143798828
Current x: (0.007363042721224714, 0.19025992702694924, -0.1719308407306671), u: [ 0.       22.842382], noise: [4.25409527 3.93258209]
Start training ...
Step: 100, L: 2189.896484375, J: 22.46336555480957
Current x: (0.00969118053209813, 0.21654255702869707, -0.19798945088386535), u: [ 0.       23.195385], noise: [3.47311188 3.98330233]
Start training ...
Step: 100, L: 1378.71142578125, J: 30.807132720947266
Current x: (0.012550179145476969, 0.24490134464478994, -0.2263001480579376), u: [ 0.       23.486439], noise: [3.83847622 4.98571631]
Start training ...
Step: 100, L: 2890.96533203125, J: 25.019447326660156
Current x: (0.016012093943216944, 0.275284431015158, -0.2569814028263092), u: [ 0.       23.748169], noise: [4.252501

Step: 101, L: 1516.4071044921875, J: 25.538436889648438
Current x: (0.0009016630918515132, 0.09039943846044438, -0.05854072012901306), u: [ 0.      18.59504], noise: [6.61185311 5.7973656 ]
Start training ...
Step: 101, L: 2832.288330078125, J: 28.538806915283203
Current x: (0.0013406184449934995, 0.10553503001693561, -0.07202430410385131), u: [ 0.      19.65215], noise: [3.36134418 3.17422581]
Start training ...
Step: 101, L: 1905.666015625, J: 29.497121810913086
Current x: (0.0019609713023747886, 0.12278473620652615, -0.0872859432697296), u: [ 0.      20.36073], noise: [5.51947262 5.76340199]
Start training ...
Step: 101, L: 1248.0048828125, J: 22.577831268310547
Current x: (0.0027697763546448233, 0.14166542482622893, -0.10449408569335936), u: [ 0.       21.119976], noise: [5.43181943 5.73674822]
Start training ...
Step: 101, L: 2567.328857421875, J: 23.92226219177246
Current x: (0.0038544349995614435, 0.16271742715379897, -0.12376269412040708), u: [ 0.       21.756393], noise: [3.82

Step: 102, L: 1456.4775390625, J: 24.919906616210938
Current x: (-1.8933970993702702e-05, 0.012052505633161775, 0.0016529850482940676), u: [0.12010245 6.425964  ], noise: [3.37219418 4.4861645 ]
Start training ...
Step: 102, L: 2280.462646484375, J: 30.85362434387207
Current x: (-3.0497670179694434e-05, 0.015908003803495274, 0.0009760354518890383), u: [0.        7.1214848], noise: [4.07504477 3.3212307 ]
Start training ...
Step: 102, L: 1968.376708984375, J: 21.011608123779297
Current x: (-4.4442398141823525e-05, 0.02022294247266168, -0.000442897319793701), u: [0.       7.801505], noise: [3.81966105 5.26573352]
Start training ...
Step: 102, L: 1533.1015625, J: 24.805522918701172
Current x: (-5.9804110750189766e-05, 0.025008656477993853, -0.002498597192764282), u: [0.      8.78528], noise: [4.38239654 4.79766649]
Start training ...
Step: 102, L: 1144.084716796875, J: 27.2298583984375
Current x: (-7.441790711034297e-05, 0.030502060312512985, -0.005479054832458496), u: [0.       9.851492]

Step: 103, L: 1246.31396484375, J: 24.640291213989258
Current x: (0.0, 0.0, 0.0), u: [2.9983447 1.8066918], noise: [4.35393325 4.89048832]
Start training ...
Step: 103, L: 1427.606201171875, J: 20.617408752441406
Current x: (0.0, 0.0003773549499511718, 0.00011104130744934082), u: [2.6617234 2.3389065], noise: [4.56338168 4.77796354]
Start training ...
Step: 103, L: 1494.3564453125, J: 21.577362060546875
Current x: (0.0, 0.0011786556549072263, 0.00028759236335754396), u: [2.2960327 2.9202812], noise: [3.72960826 4.64750268]
Start training ...
Step: 103, L: 1723.7498779296875, J: 22.673904418945312
Current x: (-1.5925516756845673e-07, 0.0024331538722310465, 0.0004749669551849365), u: [1.9826882 3.4382577], noise: [6.44538424 5.12152193]
Start training ...
Step: 103, L: 3245.091796875, J: 29.656612396240234
Current x: (-7.094468460987931e-07, 0.004065994513045499, 0.0005081272602081299), u: [1.4002603 4.3573   ], noise: [4.31226175 3.21635432]
Start training ...
Step: 103, L: 647.77026367

Step: 103, L: 2032.0047607421875, J: 19.143905639648438
Current x: (0.3464878068216972, 1.3069457301614638, -1.2719107945919035), u: [ 0.       24.718033], noise: [3.9021177  3.45261866]
Start training ...
Step: 103, L: 2145.147216796875, J: 22.90819549560547
Current x: (0.3874664874622955, 1.3708354534921072, -1.3509537440299986), u: [ 0.      24.67702], noise: [6.18112123 4.37637151]
Start training ...
Step: 103, L: 1493.57470703125, J: 22.306427001953125
Current x: (0.43151025117836894, 1.4346885766107509, -1.432423547029495), u: [ 0.       24.625937], noise: [4.4007063  5.23509411]
Start training ...
Step: 103, L: 2621.455078125, J: 30.697708129882812
Current x: (0.47899266296436227, 1.4983290798709472, -1.5161805768966672), u: [ 0.       24.559643], noise: [3.95446469 5.46100441]
Start training ...
Step: 103, L: 1262.658203125, J: 22.200328826904297
Simulation ends in 51 steps
Episode 104 begins...
Current x: (0.0, 0.0, 0.0), u: [3.1505966 1.7243788], noise: [4.48728288 5.76767299

Current x: (0.188431372365671, 0.9003266935219637, -0.9288470740318301), u: [ 0.       24.586342], noise: [5.14495803 4.84954429]
Start training ...
Step: 104, L: 1668.3134765625, J: 27.262529373168945
Current x: (0.21537160960727508, 0.9576268247348856, -0.9954255679130557), u: [ 0.       24.579592], noise: [4.07487072 5.01446071]
Start training ...
Step: 104, L: 842.06201171875, J: 25.733095169067383
Current x: (0.24508153097629617, 1.0160165109844808, -1.0644331545829775), u: [ 0.       24.560432], noise: [5.18804565 4.21786943]
Start training ...
Step: 104, L: 1546.53662109375, J: 25.77921485900879
Current x: (0.27761624333226487, 1.0752572778142377, -1.1359926594734193), u: [ 0.       24.534128], noise: [3.37539534 3.32074329]
Start training ...
Step: 104, L: 1596.89794921875, J: 26.83960723876953
Current x: (0.31312136076843894, 1.1351644124743836, -1.2099111899375916), u: [ 0.      24.49384], noise: [4.37932442 4.68316348]
Start training ...
Step: 104, L: 1630.218505859375, J: 1

Step: 105, L: 1667.385986328125, J: 19.54300308227539
Current x: (0.047328017247975085, 0.5271315280308817, -0.4573691021919251), u: [ 0.       24.044346], noise: [4.83516108 4.41483073]
Start training ...
Step: 105, L: 2019.943359375, J: 24.835878372192383
Current x: (0.05693905008735835, 0.5721925564292875, -0.5029093866825104), u: [ 0.      24.14479], noise: [4.16281061 3.76730461]
Start training ...
Step: 105, L: 1334.428955078125, J: 26.28640365600586
Current x: (0.0680203247808344, 0.6192598107096376, -0.5508120727539063), u: [ 0.       24.224802], noise: [3.84481137 3.58615045]
Start training ...
Step: 105, L: 1429.1282958984375, J: 22.408859252929688
Current x: (0.08064753528558215, 0.6681564169303568, -0.6010896872997284), u: [ 0.       24.289385], noise: [3.49628942 5.62634205]
Start training ...
Step: 105, L: 1799.50634765625, J: 27.3699951171875
Current x: (0.09493154324758298, 0.7187694102387453, -0.6537639159202576), u: [ 0.      24.34655], noise: [5.76953406 5.1162181 ]


Step: 106, L: 1100.754638671875, J: 21.63923454284668
Current x: (0.017307227903173888, 0.2905261727460084, -0.2617417110919953), u: [ 0.       22.878914], noise: [5.98981514 4.16204261]
Start training ...
Step: 106, L: 1852.434814453125, J: 26.09526824951172
Current x: (0.021632649699591742, 0.32245963697011254, -0.294088187122345), u: [ 0.       23.157276], noise: [3.84659975 3.05675199]
Start training ...
Step: 106, L: 1764.2269287109375, J: 29.34014892578125
Current x: (0.0268127867923572, 0.35660267818271585, -0.3285397773265839), u: [ 0.       23.365648], noise: [3.95565455 4.49512148]
Start training ...
Step: 106, L: 2012.82080078125, J: 28.407283782958984
Current x: (0.03286428317776429, 0.39264172233755334, -0.36522811031341557), u: [ 0.       23.557482], noise: [4.81333847 4.35658924]
Start training ...
Step: 106, L: 1877.5068359375, J: 27.25311279296875
Current x: (0.039942372261145546, 0.43071123713352544, -0.4043069548130036), u: [ 0.       23.727228], noise: [4.29115569 4

Step: 107, L: 2781.183349609375, J: 23.8732852935791
Current x: (0.005662931960691614, 0.18414167843232906, -0.14797284708023073), u: [ 0.      21.38846], noise: [3.89508583 5.78348707]
Start training ...
Step: 107, L: 1349.462646484375, J: 26.983654022216797
Current x: (0.0074881286913500945, 0.20816064949155005, -0.17087729392051698), u: [ 0.       21.877361], noise: [5.5394875  4.72416465]
Start training ...
Step: 107, L: 1789.968994140625, J: 23.74518585205078
Current x: (0.009771357377411433, 0.23427137381113286, -0.19610942687988284), u: [ 0.       22.299393], noise: [5.87303193 4.42114869]
Start training ...
Step: 107, L: 1825.611572265625, J: 21.483413696289062
Current x: (0.01260113414535674, 0.2625683892552238, -0.22344776368141175), u: [ 0.       22.660181], noise: [5.34207391 2.70125366]
Start training ...
Step: 107, L: 928.9384765625, J: 30.493026733398438
Current x: (0.01606601241044721, 0.29308128710109166, -0.25287085142135624), u: [ 0.       22.941942], noise: [4.18544

Step: 108, L: 1924.9581298828125, J: 26.489803314208984
Current x: (0.00021567555678624664, 0.06057541665757584, -0.03245394558906556), u: [ 0.       14.249254], noise: [4.67714477 4.82475066]
Start training ...
Step: 108, L: 1622.9512939453125, J: 28.048274993896484
Current x: (0.00037395536248956455, 0.07103109135978564, -0.04100910606384278), u: [ 0.        15.4216385], noise: [5.50031843 5.82923694]
Start training ...
Step: 108, L: 1857.4837646484375, J: 21.974773406982422
Current x: (0.0006093034911218104, 0.0828796303797611, -0.051003952550888076), u: [ 0.       16.639118], noise: [4.83104917 3.21961854]
Start training ...
Step: 108, L: 1470.754638671875, J: 23.13629913330078
Current x: (0.0009543251284189487, 0.09642003967827312, -0.06257385473251345), u: [ 0.       17.579002], noise: [5.25400369 5.5573135 ]
Start training ...
Step: 108, L: 1604.521484375, J: 24.62405014038086
Current x: (0.0014252198369956416, 0.11144521676901482, -0.07564652562141422), u: [ 0.       18.569899]

Step: 109, L: 577.88818359375, J: 23.12004280090332
Current x: (-9.719767391127589e-05, 0.021686998670927365, 0.0012799481868743905), u: [0.       8.051468], noise: [3.89994713 3.56214479]
Start training ...
Step: 109, L: 1217.940673828125, J: 27.401718139648438
Current x: (-0.00013027562067319053, 0.026831550575157893, -0.0007292411327362051), u: [0.       8.810788], noise: [5.70583449 4.51602727]
Start training ...
Step: 109, L: 1382.6705322265625, J: 23.043033599853516
Current x: (-0.00016533922223010142, 0.03254645723813112, -0.0035097970485687244), u: [0.       9.904232], noise: [4.70232855 3.89662606]
Start training ...
Step: 109, L: 2709.637939453125, J: 22.796661376953125
Current x: (-0.00019901488478597622, 0.03918362839442287, -0.00705245108604431), u: [ 0.       10.914223], noise: [5.42874245 5.36916848]
Start training ...
Step: 109, L: 1908.0701904296875, J: 26.99079132080078
Current x: (-0.00022619631750005965, 0.046690106871945095, -0.011504958105087278), u: [ 0.       12

Step: 110, L: 1594.513916015625, J: 23.595821380615234
Current x: (-1.2807705812932358e-06, 0.004148578975505276, 0.001910205364227295), u: [1.5606296 4.244913 ], noise: [4.85748271 3.861957  ]
Start training ...
Step: 110, L: 1505.38330078125, J: 20.272777557373047
Current x: (-4.274717899801046e-06, 0.006439357459237934, 0.002201831865310669), u: [1.1531304 4.8483377], noise: [3.59214235 3.6938825 ]
Start training ...
Step: 110, L: 1103.285888671875, J: 24.659286499023438
Current x: (-1.004323347054837e-05, 0.009201631538211465, 0.0023245826244354248), u: [0.8478281 5.3437996], noise: [2.90391353 5.59756493]
Start training ...
Step: 110, L: 1224.0328369140625, J: 20.694637298583984
Current x: (-1.873742917247419e-05, 0.012311651671463343, 0.002067638635635376), u: [0.42586607 6.0613375 ], noise: [5.9726664  5.70286463]
Start training ...
Step: 110, L: 2236.388916015625, J: 26.530717849731445
Current x: (-3.084715561869386e-05, 0.015909978404634907, 0.001091732358932495), u: [0.      

Step: 110, L: 2903.63134765625, J: 31.796157836914062
Simulation ends in 51 steps
Episode 111 begins...
Current x: (0.0, 0.0, 0.0), u: [3.5170758 1.5875154], noise: [4.20866148 3.38266629]
Start training ...
Step: 111, L: 1342.427734375, J: 25.23365592956543
Current x: (0.0, 0.0, 0.0), u: [3.2800634 1.8767381], noise: [4.68311356 3.52410969]
Start training ...
Step: 111, L: 1134.138427734375, J: 21.475374221801758
Current x: (0.0, 0.0002885919036865235, 0.0002755555629730225), u: [2.9842567 2.2512205], noise: [4.12410219 4.51726355]
Start training ...
Step: 111, L: 2263.541015625, J: 26.11566162109375
Current x: (0.0, 0.0009325863189697265, 0.0008073440074920654), u: [2.641369  2.7306414], noise: [5.40800136 4.544991  ]
Start training ...
Step: 111, L: 1534.71435546875, J: 25.358043670654297
Current x: (-3.8238410986478304e-07, 0.0019832649314468244, 0.0013731199264526367), u: [2.1737187 3.374973 ], noise: [4.03276693 3.81576067]
Start training ...
Step: 111, L: 1080.870361328125, J: 2

Current x: (0.20141681984650134, 0.9779557883645196, -0.9557653708457948), u: [ 0.       24.233696], noise: [3.22821292 5.3629132 ]
Start training ...
Step: 111, L: 1143.928955078125, J: 24.77112579345703
Current x: (0.22939366269200073, 1.036233119240913, -1.0225664055824282), u: [ 0.      24.22971], noise: [4.92839265 3.92435172]
Start training ...
Step: 111, L: 2165.831787109375, J: 27.38988494873047
Current x: (0.2600514911678768, 1.0954233891597553, -1.0920042799949647), u: [ 0.      24.21866], noise: [3.12093647 4.35565828]
Start training ...
Step: 111, L: 3547.023193359375, J: 28.880170822143555
Current x: (0.2935327363910795, 1.1553568417685645, -1.1637647213935853), u: [ 0.       24.194302], noise: [5.28099434 5.84218705]
Start training ...
Step: 111, L: 1382.6322021484375, J: 28.571537017822266
Current x: (0.32982710059920484, 1.2157695182126198, -1.2380705008506776), u: [ 0.      24.16383], noise: [4.99571311 3.89976366]
Start training ...
Step: 111, L: 2065.767333984375, J:

Step: 112, L: 927.3923950195312, J: 18.053442001342773
Current x: (0.03630614961328369, 0.44675937007412225, -0.3999506415367126), u: [ 0.       23.213646], noise: [6.51183429 2.63470877]
Start training ...
Step: 112, L: 1542.324462890625, J: 28.736309051513672
Current x: (0.04411291892443402, 0.48684290066471125, -0.44137660498619075), u: [ 0.       23.364326], noise: [3.48505852 5.96768094]
Start training ...
Step: 112, L: 1525.597900390625, J: 21.76227378845215
Current x: (0.05317970617615553, 0.5289260641007781, -0.48473622055053706), u: [ 0.     23.4921], noise: [3.75426534 5.02907676]
Start training ...
Step: 112, L: 1905.61181640625, J: 20.628368377685547
Current x: (0.06364838778255093, 0.5729954300161588, -0.530680531167984), u: [ 0.       23.596155], noise: [4.35665004 5.15288801]
Start training ...
Step: 112, L: 1734.0419921875, J: 29.21533203125
Current x: (0.07562102434104885, 0.6189395204238312, -0.5791015330314636), u: [ 0.       23.687315], noise: [3.32681853 4.31318497

Step: 113, L: 2127.650634765625, J: 23.11658477783203
Current x: (0.0012347255030682141, 0.12746554440660213, -0.08306985263824464), u: [ 0.       18.115284], noise: [4.44745449 5.16523333]
Start training ...
Step: 113, L: 1306.7755126953125, J: 20.726009368896484
Current x: (0.0018983486769573485, 0.1449074579802332, -0.09918565139770509), u: [ 0.       18.890171], noise: [3.24604118 5.24428489]
Start training ...
Step: 113, L: 1894.7490234375, J: 22.377029418945312
Current x: (0.0027920428880044636, 0.16413160727438836, -0.11718475646972658), u: [ 0.       19.552687], noise: [5.51246125 4.657311  ]
Start training ...
Step: 113, L: 1295.3924560546875, J: 26.46695899963379
Current x: (0.003956867277349871, 0.185099349120551, -0.1372727031707764), u: [ 0.       20.175388], noise: [3.60598354 4.86220518]
Start training ...
Step: 113, L: 1269.5322265625, J: 26.734424591064453
Current x: (0.005469196950306952, 0.2080379522618221, -0.1592304035186768), u: [ 0.      20.69009], noise: [5.3327

Current x: (-0.00032940071142127214, 0.04937007611084062, -0.004900687456130981), u: [ 0.       11.342823], noise: [4.98347856 3.09036823]
Start training ...
Step: 114, L: 996.5382690429688, J: 19.775850296020508
Current x: (-0.0003915375370116464, 0.05778606588551096, -0.009895667314529418), u: [ 0.       12.296849], noise: [5.39652518 4.64781638]
Start training ...
Step: 114, L: 1544.700439453125, J: 21.469528198242188
Current x: (-0.00044415889765532377, 0.06716269932853289, -0.015835618448257448), u: [ 0.       13.426782], noise: [4.12634894 4.64318277]
Start training ...
Step: 114, L: 1342.7315673828125, J: 24.182653427124023
Current x: (-0.0004746725204173213, 0.07779234241906609, -0.022930383634567262), u: [ 0.       14.469362], noise: [5.11810725 5.19622433]
Start training ...
Step: 114, L: 2243.822265625, J: 28.45490264892578
Current x: (-0.0004700383764831718, 0.08966033859627544, -0.03141951050758362), u: [ 0.       15.564203], noise: [5.10016927 4.62261549]
Start training .

Step: 115, L: 1321.7340087890625, J: 19.444679260253906
Current x: (-4.210412247196797e-06, 0.01548106366727074, -0.0013044795036315918), u: [0.        6.6338987], noise: [4.61038444 4.60392454]
Start training ...
Step: 115, L: 1379.505126953125, J: 27.59484100341797
Current x: (-5.090198509585817e-06, 0.01952080887525958, -0.002703748941421509), u: [0.        7.4093623], noise: [2.48098437 3.93838086]
Start training ...
Step: 115, L: 2249.2021484375, J: 24.095027923583984
Current x: (-3.902619176421419e-06, 0.024164373482205695, -0.004765762281417847), u: [0.       7.999806], noise: [3.49240831 2.84161744]
Start training ...
Step: 115, L: 1450.526123046875, J: 26.29320526123047
Current x: (1.0238963953092502e-06, 0.029209805806793075, -0.007714451456069947), u: [0.       8.604886], noise: [4.20530565 4.53164899]
Start training ...
Step: 115, L: 1900.46240234375, J: 21.22231674194336
Current x: (1.2781549596199814e-05, 0.03470760503227156, -0.011398042106628418), u: [0.       9.510387]

Step: 116, L: 1748.230224609375, J: 25.347618103027344
Current x: (0.0, 0.0006326688232421875, 0.00035231523513793946), u: [2.56301   2.9635904], noise: [3.71368102 5.66242512]
Start training ...
Step: 116, L: 2349.832275390625, J: 25.452503204345703
Current x: (0.0, 0.001896594841003418, 0.0008632022857666016), u: [2.1218042 3.5734687], noise: [5.40538994 4.99904385]
Start training ...
Step: 116, L: 2022.69775390625, J: 25.053123474121094
Current x: (-5.250450311484655e-07, 0.003669791380897989, 0.0011391568660736083), u: [1.5763242 4.291794 ], noise: [3.67058524 4.05523489]
Start training ...
Step: 116, L: 1140.1116943359375, J: 23.023056030273438
Current x: (-2.4398202477375855e-06, 0.006071957985961413, 0.0013105795860290526), u: [1.2477856 4.7692356], noise: [4.26418697 4.5416885 ]
Start training ...
Step: 116, L: 1294.379150390625, J: 25.542789459228516
Current x: (-5.903158004638763e-06, 0.008852517591748382, 0.001171990394592285), u: [0.8170105 5.3904285], noise: [4.63921786 5.

Step: 116, L: 799.273193359375, J: 17.84609603881836
Current x: (0.4404330401854856, 1.3369515175262572, -1.4181569736957547), u: [ 0.      23.37441], noise: [3.22535988 3.71817914]
Start training ...
Step: 116, L: 2705.3759765625, J: 33.036231994628906
Current x: (0.4871552843540311, 1.3952299138140924, -1.4988056959152218), u: [ 0.       23.297636], noise: [3.95900293 3.41389807]
Start training ...
Step: 116, L: 1130.56005859375, J: 23.05692481994629
Simulation ends in 51 steps
Episode 117 begins...
Current x: (0.0, 0.0, 0.0), u: [3.9036076 1.4944342], noise: [3.69623215 5.1391472 ]
Start training ...
Step: 117, L: 1849.48828125, J: 21.695276260375977
Current x: (0.0, 0.0, 0.0), u: [3.5502093 1.9489095], noise: [4.04947323 5.07487296]
Start training ...
Step: 117, L: 1557.3154296875, J: 19.655986785888672
Current x: (0.0, 0.0004423421325683592, 9.662580490112305e-05), u: [3.1605816 2.45553  ], noise: [3.96902272 6.70236167]
Start training ...
Step: 117, L: 1649.61328125, J: 24.709011

Step: 117, L: 1392.443603515625, J: 24.22008514404297
Current x: (0.1235621231802908, 0.8007752074240142, -0.7382997566223143), u: [ 0.       23.491262], noise: [2.22500903 4.40710902]
Start training ...
Step: 117, L: 3195.17236328125, J: 25.3460693359375
Current x: (0.14293984467810053, 0.853588321580569, -0.7955409741401671), u: [ 0.      23.52898], noise: [3.94155749 3.22037799]
Start training ...
Step: 117, L: 1181.208251953125, J: 27.84174156188965
Current x: (0.16434496403695895, 0.9076484028348119, -0.8553495279312132), u: [ 0.       23.560122], noise: [4.61663627 4.79327901]
Start training ...
Step: 117, L: 2000.122802734375, J: 30.606897354125977
Current x: (0.18794215855523885, 0.9628755366448194, -0.9174388619422911), u: [ 0.       23.584988], noise: [4.51266494 3.07055233]
Start training ...
Step: 117, L: 2843.32470703125, J: 25.59756851196289
Current x: (0.21402793242550772, 1.0192843557689482, -0.9819018724441526), u: [ 0.       23.595667], noise: [3.00964522 4.181289  ]


Step: 118, L: 2477.65087890625, J: 29.95609474182129
Current x: (0.01095456520589768, 0.3017552593715064, -0.22491421813964846), u: [ 0.       21.105146], noise: [4.19946111 5.02657608]
Start training ...
Step: 118, L: 1614.5074462890625, J: 23.746335983276367
Current x: (0.014196455230399273, 0.3319418875530278, -0.25318342962265017), u: [ 0.       21.460463], noise: [5.02896938 4.29586415]
Start training ...
Step: 118, L: 2494.427978515625, J: 27.817829132080078
Current x: (0.018114799644880872, 0.36410423975305084, -0.2836458673477173), u: [ 0.       21.774124], noise: [4.71993992 4.4396751 ]
Start training ...
Step: 118, L: 1364.831298828125, J: 23.045576095581055
Current x: (0.022804276198147316, 0.3982659777345481, -0.31618104085922244), u: [ 0.      22.04741], noise: [4.48519729 2.7349185 ]
Start training ...
Step: 118, L: 2389.5078125, J: 23.762557983398438
Current x: (0.028359457200937965, 0.4344164829983301, -0.3508656002044678), u: [ 0.       22.268282], noise: [3.99797757 3

Step: 119, L: 2251.26708984375, J: 22.817001342773438
Current x: (-0.00011678080541948282, 0.09664643460663408, -0.03586503596305847), u: [ 0.       14.074751], noise: [4.83425301 3.69990627]
Start training ...
Step: 119, L: 1856.13916015625, J: 28.06167984008789
Current x: (1.3432570880686381e-05, 0.10963189505188888, -0.045488637638092036), u: [ 0.       14.931394], noise: [4.70761209 2.91729208]
Start training ...
Step: 119, L: 1703.97802734375, J: 29.675735473632812
Current x: (0.0002247155002142549, 0.12389579252076613, -0.05640627961158752), u: [ 0.      15.71013], noise: [3.39788684 4.56411659]
Start training ...
Step: 119, L: 2433.10546875, J: 32.43476867675781
Current x: (0.0005385685711514472, 0.139431986429512, -0.06863802900314331), u: [ 0.       16.490803], noise: [5.1175573  4.88154124]
Start training ...
Step: 119, L: 1526.9573974609375, J: 20.28818702697754
Current x: (0.000985876540041224, 0.15635062874205927, -0.08255741438865662), u: [ 0.      17.29364], noise: [5.06

Step: 120, L: 1703.1885986328125, J: 21.23351287841797
Current x: (-1.2437255459076249e-05, 0.009659394861953207, 0.003920439672470093), u: [1.1975791 5.1154838], noise: [4.56587457 4.54626565]
Start training ...
Step: 120, L: 1793.039306640625, J: 25.280420303344727
Current x: (-2.494515716854999e-05, 0.013392775708907265, 0.00437091064453125), u: [0.7935599 5.7043233], noise: [2.83759607 4.69467506]
Start training ...
Step: 120, L: 2215.984619140625, J: 22.0725154876709
Current x: (-4.3500401293357925e-05, 0.01768766503406201, 0.00443155198097229), u: [0.50698334 6.2017074 ], noise: [4.64371646 5.94419241]
Start training ...
Step: 120, L: 2444.845703125, J: 21.060806274414062
Current x: (-6.818808093761882e-05, 0.022404556379852614, 0.0038154090881347656), u: [0.      6.98862], noise: [3.32840767 3.87786632]
Start training ...
Step: 120, L: 2145.90234375, J: 30.04256248474121
Current x: (-0.00010054081384414634, 0.027870090775754128, 0.0024997461795806886), u: [0.        7.4964705], 

Step: 120, L: 1898.800537109375, J: 27.435089111328125
Current x: (0.23477040437244423, 1.104857733258661, -1.0284007427215576), u: [ 0.       23.107681], noise: [2.28207561 4.71821555]
Start training ...
Step: 120, L: 1197.9310302734375, J: 29.06960105895996
Current x: (0.26502762686029874, 1.1626142069948067, -1.0956355618476867), u: [ 0.       23.091717], noise: [4.55856384 3.79120157]
Start training ...
Step: 120, L: 1444.719970703125, J: 25.47966766357422
Current x: (0.2978635206657289, 1.22094382186903, -1.1654247631072998), u: [ 0.       23.071392], noise: [4.70841509 4.11002721]
Start training ...
Step: 120, L: 2109.47998046875, J: 20.753440856933594
Current x: (0.33349525125343055, 1.2797308260275269, -1.237446399784088), u: [ 0.       23.037975], noise: [4.65339875 4.02164686]
Start training ...
Step: 120, L: 2957.78759765625, J: 26.866119384765625
Current x: (0.37205751717410235, 1.3387944384929316, -1.3117153367996215), u: [ 0.       22.989828], noise: [3.9971837  4.4707451

Step: 121, L: 1740.30615234375, J: 26.56126594543457
Current x: (0.04857846262050257, 0.5419986719082229, -0.4443503678798676), u: [ 0.      22.00599], noise: [5.9926208  3.31523243]
Start training ...
Step: 121, L: 2237.224853515625, J: 20.112239837646484
Current x: (0.057858176006183384, 0.5836123329790522, -0.4855731444358826), u: [ 0.      22.18613], noise: [3.74770898 4.12040949]
Start training ...
Step: 121, L: 1844.03515625, J: 23.55835723876953
Current x: (0.06848398201223187, 0.6270722899575975, -0.5287287811756135), u: [ 0.       22.331305], noise: [4.32410336 4.39550523]
Start training ...
Step: 121, L: 1894.879150390625, J: 25.577030181884766
Current x: (0.08051246588151073, 0.6722092675393785, -0.5741403009414674), u: [ 0.      22.46332], noise: [5.60013711 3.20236537]
Start training ...
Step: 121, L: 2377.15234375, J: 26.89565658569336
Current x: (0.09410727003635679, 0.7190463333032385, -0.6217920913219454), u: [ 0.       22.579838], noise: [2.75302614 3.61450318]
Start 

Step: 122, L: 1684.62548828125, J: 23.9888916015625
Current x: (0.00046571232847508585, 0.13622887515732235, -0.0650466724395752), u: [ 0.       14.584159], noise: [4.04851702 3.65124745]
Start training ...
Step: 122, L: 2344.87841796875, J: 26.560333251953125
Current x: (0.0008685188337462073, 0.15173008558271633, -0.0777380292892456), u: [ 0.     15.2859], noise: [5.88755525 4.85385186]
Start training ...
Step: 122, L: 2362.39208984375, J: 21.797897338867188
Current x: (0.0014161726565724132, 0.16847397583865767, -0.09184807510375977), u: [ 0.       16.083881], noise: [3.82524818 4.0454417 ]
Start training ...
Step: 122, L: 3048.738037109375, J: 25.190746307373047
Current x: (0.0021659538975260127, 0.18683173620240306, -0.10738334054946899), u: [ 0.       16.736895], noise: [5.67746384 3.94163821]
Start training ...
Step: 122, L: 873.3627319335938, J: 26.13602066040039
Current x: (0.003135444059944614, 0.20659385681388856, -0.1245490135192871), u: [ 0.      17.40299], noise: [3.80503

Step: 123, L: 736.549560546875, J: 21.60189437866211
Current x: (-8.41346675212239e-05, 0.04058468013066453, -0.0034085014820098886), u: [0.      8.24073], noise: [4.46370561 4.05869383]
Start training ...
Step: 123, L: 1499.9677734375, J: 22.890188217163086
Current x: (-9.908281742660448e-05, 0.048015421165677086, -0.0063037871360778815), u: [0.       8.866252], noise: [4.47536297 3.26320015]
Start training ...
Step: 123, L: 2362.492431640625, J: 27.502971649169922
Current x: (-0.00010831726298663594, 0.05614146548189417, -0.009982644653320313), u: [0.      9.46187], noise: [4.35906611 5.16920267]
Start training ...
Step: 123, L: 2832.486572265625, J: 30.168956756591797
Current x: (-0.00010708445624838264, 0.06494695825927871, -0.014426911067962648), u: [ 0.       10.229972], noise: [4.47313916 4.14220065]
Start training ...
Step: 123, L: 2004.2005615234375, J: 27.19387435913086
Current x: (-8.689478340286223e-05, 0.07467037031680526, -0.01989837818145752), u: [ 0.      10.95337], noi

Step: 123, L: 2015.4473876953125, J: 31.870006561279297
Current x: (0.45556187988907104, 1.44365424100265, -1.438290040588379), u: [ 0.       22.292292], noise: [4.84943764 3.31783208]
Start training ...
Step: 123, L: 1100.5474853515625, J: 24.721282958984375
Current x: (0.5023117670350157, 1.5012772193363355, -1.5173362420082093), u: [ 0.       22.165123], noise: [4.89031622 5.84991905]
Start training ...
Step: 123, L: 2922.75830078125, J: 23.867965698242188
Simulation ends in 55 steps
Episode 124 begins...
Current x: (0.0, 0.0, 0.0), u: [4.0991087 1.808793 ], noise: [3.3576902  4.71928391]
Start training ...
Step: 124, L: 2184.615966796875, J: 25.66799545288086
Current x: (0.0, 0.0, 0.0), u: [3.8485901 2.12724  ], noise: [4.86109875 3.85176241]
Start training ...
Step: 124, L: 1376.88330078125, J: 19.751964569091797
Current x: (0.0, 0.00041748756790161145, 9.287223815917969e-05), u: [3.5431376 2.4796426], noise: [3.72504907 3.0629504 ]
Start training ...
Step: 124, L: 1726.4499511718

Step: 124, L: 2373.99560546875, J: 26.456214904785156
Current x: (0.07948272547479662, 0.7116908225301563, -0.5856588931560517), u: [ 0.       21.917603], noise: [2.08115766 5.15762156]
Start training ...
Step: 124, L: 1946.798095703125, J: 26.72438621520996
Current x: (0.09313852568427596, 0.7587761211594587, -0.634071958732605), u: [ 0.       22.023163], noise: [2.04496403 5.59598216]
Start training ...
Step: 124, L: 1393.53271484375, J: 26.037492752075195
Current x: (0.10840594077490427, 0.8073101612069005, -0.6849844309329987), u: [ 0.       22.111965], noise: [4.75076492 4.18084842]
Start training ...
Step: 124, L: 2353.735107421875, J: 18.92407989501953
Current x: (0.12543074698593937, 0.8572530066121561, -0.738454321193695), u: [ 0.       22.198523], noise: [3.9449251  5.62478036]
Start training ...
Step: 124, L: 1615.4296875, J: 25.953310012817383
Current x: (0.14441955923539215, 0.9086189564900602, -0.7940784163951873), u: [ 0.       22.265276], noise: [5.19387236 3.59852936]


Step: 125, L: 2327.213623046875, J: 21.843048095703125
Current x: (0.0009077406983407717, 0.16568863943706572, -0.07999830908775331), u: [ 0.       14.534522], noise: [2.89546156 5.30973996]
Start training ...
Step: 125, L: 1323.4912109375, J: 22.135820388793945
Current x: (0.0014946967346936894, 0.18326947071027685, -0.09434966368675234), u: [ 0.       15.172729], noise: [2.29548506 4.16832176]
Start training ...
Step: 125, L: 1852.8009033203125, J: 22.067237854003906
Current x: (0.002263372740348075, 0.20213600177997953, -0.11039589819908144), u: [ 0.       15.722241], noise: [3.78533306 2.42938362]
Start training ...
Step: 125, L: 1417.169921875, J: 25.303258895874023
Current x: (0.003235885996023621, 0.22217556328643898, -0.12814668936729434), u: [ 0.      16.23292], noise: [5.45911914 5.25263756]
Start training ...
Step: 125, L: 1899.228759765625, J: 30.75873374938965
Current x: (0.004450082664937187, 0.24341446663032287, -0.1473341096401215), u: [ 0.       16.871006], noise: [6.0

Step: 126, L: 1369.1083984375, J: 21.19464874267578
Current x: (-5.861376154302882e-05, 0.05644037040773395, -0.005321676063537598), u: [0.4660228 8.153514 ], noise: [4.51053185 3.86426245]
Start training ...
Step: 126, L: 1258.0794677734375, J: 19.553884506225586
Current x: (-6.377731089067797e-05, 0.06520673383361678, -0.008792483282089234), u: [0.22818878 8.679684  ], noise: [3.72126844 3.62775282]
Start training ...
Step: 126, L: 1152.212646484375, J: 22.90009117126465
Current x: (-5.9897070284151586e-05, 0.07469150633136888, -0.012967412662506105), u: [0.043677 9.16081 ], noise: [3.28860868 5.10261683]
Start training ...
Step: 126, L: 1250.625244140625, J: 22.878131866455078
Current x: (-4.172316771678321e-05, 0.08482090530612015, -0.017978140020370487), u: [0.       9.730298], noise: [3.51305422 5.13365024]
Start training ...
Step: 126, L: 1543.2535400390625, J: 18.88643455505371
Current x: (-7.328178897401797e-07, 0.09572872760982915, -0.024081981563568118), u: [ 0.      10.3310

Step: 126, L: 1866.7607421875, J: 24.434288024902344
Current x: (0.3982779079334333, 1.3688799769574167, -1.2904633680343627), u: [ 0.      21.14011], noise: [2.02529213 5.05523961]
Start training ...
Step: 126, L: 2355.49951171875, J: 20.168048858642578
Current x: (0.4404117545830667, 1.4258846637783744, -1.3618619604587554), u: [ 0.       20.960445], noise: [4.79545751 2.51649845]
Start training ...
Step: 126, L: 2580.26904296875, J: 22.57488250732422
Current x: (0.4852575013820504, 1.4826891469255608, -1.4356775586128234), u: [ 0.       20.768421], noise: [3.55757075 4.1238723 ]
Start training ...
Step: 126, L: 1593.802978515625, J: 25.45899200439453
Current x: (0.5328690029597166, 1.539099049329834, -1.5113613053798676), u: [ 0.       20.531462], noise: [4.34341662 4.62269375]
Start training ...
Step: 126, L: 800.1275024414062, J: 20.441396713256836
Simulation ends in 58 steps
Episode 127 begins...
Current x: (0.0, 0.0, 0.0), u: [4.411324  1.7515173], noise: [4.76833551 4.90932677]

Step: 127, L: 1313.1739501953125, J: 25.20242691040039
Current x: (0.06207156424268782, 0.7061291045694457, -0.5127880531311036), u: [ 0.       20.210352], noise: [4.83519178 5.99805241]
Start training ...
Step: 127, L: 2530.4462890625, J: 16.769712448120117
Current x: (0.07327773379591845, 0.7512776367915059, -0.5557897009372712), u: [ 0.       20.384514], noise: [4.48677944 3.74788719]
Start training ...
Step: 127, L: 1399.1025390625, J: 17.802946090698242
Current x: (0.08600692889328582, 0.7981502460705512, -0.6009286700248719), u: [ 0.       20.512482], noise: [4.60919748 6.41198981]
Start training ...
Step: 127, L: 2543.638671875, J: 27.140907287597656
Current x: (0.10024611279667281, 0.8464730089320582, -0.6480322013378145), u: [ 0.      20.63747], noise: [4.14460872 5.320019  ]
Start training ...
Step: 127, L: 1306.518798828125, J: 25.451263427734375
Current x: (0.11626823773422838, 0.8964157031211383, -0.6973672599792482), u: [ 0.       20.726248], noise: [2.80607498 2.468326  

Step: 128, L: 504.2203063964844, J: 26.139328002929688
Current x: (0.00029369208861994327, 0.25592456755660764, -0.08675739245414735), u: [ 0.       14.143985], noise: [5.60528034 5.57633148]
Start training ...
Step: 128, L: 1344.4246826171875, J: 17.755414962768555
Current x: (0.0009031103072940382, 0.2781759064955538, -0.1012772478580475), u: [ 0.       14.718463], noise: [5.16904833 5.09632662]
Start training ...
Step: 128, L: 2498.2294921875, J: 24.3613338470459
Current x: (0.0017319712646662031, 0.3019692799049148, -0.11720860686302187), u: [ 0.       15.243483], noise: [2.27025464 3.56954633]
Start training ...
Step: 128, L: 1233.5625, J: 21.35358238220215
Current x: (0.002813429313127747, 0.327267234913379, -0.13460453991889956), u: [ 0.       15.615931], noise: [5.42876422 3.50654653]
Start training ...
Step: 128, L: 2886.1162109375, J: 22.861793518066406
Current x: (0.004141436163440832, 0.35367805278404313, -0.15365475039482118), u: [ 0.       16.063154], noise: [5.70004484 5

Step: 129, L: 1087.4095458984375, J: 28.70684051513672
Current x: (-0.00033554148587702684, 0.046865808850610514, 0.011111748790740967), u: [2.1764162 6.274021 ], noise: [4.73311943 2.16752561]
Start training ...
Step: 129, L: 1218.95263671875, J: 20.24441909790039
Current x: (-0.00044280127120564935, 0.055142721695134546, 0.011336298942565918), u: [2.052302  6.5865417], noise: [4.02550562 3.40303357]
Start training ...
Step: 129, L: 1674.483154296875, J: 25.994531631469727
Current x: (-0.0005671184420301869, 0.06397364795480659, 0.011407647991180419), u: [1.94331   6.9481587], noise: [4.84595147 4.58025851]
Start training ...
Step: 129, L: 1649.4447021484375, J: 25.735227584838867
Current x: (-0.0007096496882073718, 0.07343020925452141, 0.011087820243835448), u: [1.7913569 7.4219413], noise: [3.9728078  4.47935999]
Start training ...
Step: 129, L: 1541.577880859375, J: 22.727569580078125
Current x: (-0.0008730766439482018, 0.0837374192126479, 0.010294076871871947), u: [1.6782079 7.864

Current x: (0.25037612612578486, 1.5074945378980482, -1.060138659191132), u: [ 0.62916356 19.313704  ], noise: [1.65819729 2.74312956]
Start training ...
Step: 129, L: 1206.5546875, J: 20.33502769470215
Current x: (0.2811312367956577, 1.5692561344734752, -1.1230545688629154), u: [ 0.76856714 19.185888  ], noise: [5.62284352 4.90462175]
Start training ...
Step: 129, L: 1731.753662109375, J: 17.40428924560547
Current x: (0.314010191612028, 1.6312265562701822, -1.187947425746918), u: [ 0.920897 19.056847], noise: [4.9260294  5.52128572]
Start training ...
Step: 129, L: 1801.745361328125, J: 18.61536407470703
Current x: (0.34963686921226156, 1.6935356348857717, -1.2546101924896242), u: [ 1.1011755 18.89138  ], noise: [4.43703568 4.65116665]
Start training ...
Step: 129, L: 1918.494384765625, J: 18.745220184326172
Current x: (0.38808578786092246, 1.7560002860813704, -1.3231460799217227), u: [ 1.3031715 18.6973   ], noise: [4.23342039 3.58408067]
Start training ...
Step: 129, L: 1996.9698486

Step: 130, L: 1401.150390625, J: 25.80548858642578
Current x: (0.0036584441664503694, 0.5097474857173733, -0.17922332582473757), u: [ 1.0749893 16.262499 ], noise: [5.10025029 4.95708487]
Start training ...
Step: 130, L: 2222.471435546875, J: 27.280841827392578
Current x: (0.005808188124216261, 0.545699354306387, -0.20109786138534547), u: [ 1.029463 16.596731], noise: [4.50863793 4.9481397 ]
Start training ...
Step: 130, L: 1424.5462646484375, J: 24.23409652709961
Current x: (0.008446286973548676, 0.5833658253906934, -0.2244768313407898), u: [ 0.99735713 16.896431  ], noise: [4.61907217 4.54508369]
Start training ...
Step: 130, L: 2358.173095703125, J: 27.011247634887695
Current x: (0.011625355147998999, 0.6227050156917695, -0.24945647821426392), u: [ 0.96792924 17.172981  ], noise: [2.84189468 4.3214408 ]
Start training ...
Step: 130, L: 1832.693359375, J: 20.265132904052734
Current x: (0.015406723307316308, 0.6637011140163639, -0.2760186336517334), u: [ 0.96571076 17.392042  ], noise

Step: 131, L: 2274.265380859375, J: 30.355613708496094
Current x: (-0.000538371429593201, 0.06344490124914869, 0.014396910762786864), u: [2.8517778 6.3423796], noise: [4.82004225 4.12552018]
Start training ...
Step: 131, L: 2368.552734375, J: 24.287473678588867
Current x: (-0.0006895054107207175, 0.0730228222395396, 0.015007237291336057), u: [2.7766056 6.729657 ], noise: [6.02610397 3.37681315]
Start training ...
Step: 131, L: 1978.7265625, J: 30.914566040039062
Current x: (-0.000866754081273983, 0.08343352714219676, 0.015337955904006956), u: [2.6694317 7.136122 ], noise: [3.98423815 5.68665408]
Start training ...
Step: 131, L: 2341.45947265625, J: 23.871566772460938
Current x: (-0.0010723791413130488, 0.09475393708400238, 0.015538298463821409), u: [2.6249273 7.5827813], noise: [4.76978894 3.61150773]
Start training ...
Step: 131, L: 2805.81103515625, J: 27.765064239501953
Current x: (-0.0013078759173095818, 0.1070407625509591, 0.01512173042297363), u: [2.5610943 7.9732714], noise: [2.

Step: 131, L: 1328.140625, J: 19.125289916992188
Current x: (0.2579489762553496, 1.8561532691016116, -0.9636918828964233), u: [ 2.6240149 17.987656 ], noise: [5.85417368 4.37819545]
Start training ...
Step: 131, L: 2395.3916015625, J: 23.829641342163086
Current x: (0.28821799153679184, 1.9274551923176273, -1.0177324220657349), u: [ 2.7469673 17.912348 ], noise: [4.04394502 4.41331943]
Start training ...
Step: 131, L: 1416.3604736328125, J: 20.55133819580078
Current x: (0.32102023814731717, 1.9995357424876765, -1.0731617274284364), u: [ 2.8945918 17.803055 ], noise: [4.53465431 4.8072278 ]
Start training ...
Step: 131, L: 2100.935546875, J: 19.83238410949707
Simulation ends in 65 steps
Episode 132 begins...
Current x: (0.0, 0.0, 0.0), u: [4.4236383 1.8492023], noise: [4.74885701 5.78291951]
Start training ...
Step: 132, L: 2048.83935546875, J: 28.794483184814453
Current x: (0.0, 0.0, 0.0), u: [4.3533278 2.1483266], noise: [5.10999121 3.28255688]
Start training ...
Step: 132, L: 2622.959

Step: 132, L: 1880.1640625, J: 20.989704132080078
Current x: (-0.00752217000404724, 0.723460439872101, -0.1512902342796326), u: [ 2.8508399 15.450839 ], noise: [3.59645482 4.36074181]
Start training ...
Step: 132, L: 1920.89794921875, J: 21.718528747558594
Current x: (-0.006188678557807159, 0.7683535751125833, -0.17034491891860964), u: [ 2.844986 15.650696], noise: [3.17820737 4.93226666]
Start training ...
Step: 132, L: 1240.6856689453125, J: 27.23774528503418
Current x: (-0.0044594297502759984, 0.8148616034975431, -0.19073603219985966), u: [ 2.846657 15.834541], noise: [4.50534378 3.9559927 ]
Start training ...
Step: 132, L: 1622.8946533203125, J: 18.95522117614746
Current x: (-0.0022791473147692983, 0.8630107386148337, -0.21258312234878543), u: [ 2.8331203 16.020231 ], noise: [3.39459664 4.52867432]
Start training ...
Step: 132, L: 1682.5062255859375, J: 23.084129333496094
Current x: (0.00041570774689104724, 0.9128439042584392, -0.23567406587600712), u: [ 2.8343277 16.181269 ], nois

Step: 133, L: 2343.01025390625, J: 28.97956085205078
Current x: (-0.0005384922012147942, 0.1475816242373903, 0.0014174802303314204), u: [4.021939 8.093228], noise: [5.60669626 5.84633248]
Start training ...
Step: 133, L: 1902.4979248046875, J: 17.72951889038086
Current x: (-0.0006300944554538405, 0.1638902054835411, -0.00019065361022949263), u: [4.0102024 8.535494 ], noise: [5.44615691 5.22953237]
Start training ...
Step: 133, L: 1488.7568359375, J: 26.59392738342285
Current x: (-0.0007250374536704866, 0.181574603896274, -0.0022298799991607673), u: [3.9967697 8.95233  ], noise: [4.64311865 4.79293302]
Start training ...
Step: 133, L: 2155.18994140625, J: 17.242572784423828
Current x: (-0.0008195377277831139, 0.20060014086238448, -0.004699973106384278), u: [3.99624  9.332183], noise: [2.86943103 3.11790521]
Start training ...
Step: 133, L: 2160.329345703125, J: 28.743385314941406
Current x: (-0.0009090463860033208, 0.2208831873540828, -0.007680603647232056), u: [4.0202374 9.616988 ], no

Step: 134, L: 1264.6990966796875, J: 17.021240234375
Current x: (-2.729892198705776e-07, 0.003068529358384236, 0.001359104108810425), u: [4.134566  2.7646024], noise: [4.62809359 5.73980464]
Start training ...
Step: 134, L: 2611.2109375, J: 34.019287109375
Current x: (-1.526124009003241e-06, 0.005351933937177992, 0.0022518630981445313), u: [4.147137  3.0410523], noise: [5.34334918 5.05086077]
Start training ...
Step: 134, L: 1540.001220703125, J: 23.23233413696289
Current x: (-5.126032240298022e-06, 0.008381043616773429, 0.0031704473018646243), u: [4.1369896 3.3228042], noise: [3.35504572 4.44297913]
Start training ...
Step: 134, L: 1739.05078125, J: 19.123199462890625
Current x: (-1.268525275681468e-05, 0.01218738877528098, 0.00422888879776001), u: [4.152233  3.5271957], noise: [5.44836294 3.57115648]
Start training ...
Step: 134, L: 2149.48876953125, J: 23.924911499023438
Current x: (-2.5081876030715148e-05, 0.016538508087621438, 0.005259955501556397), u: [4.116109  3.7710736], noise

Step: 134, L: 2398.14697265625, J: 26.64804458618164
Current x: (0.008932374336865757, 1.1552254645389217, -0.22283693881034847), u: [ 4.4392333 14.738809 ], noise: [5.36031507 4.4754385 ]
Start training ...
Step: 134, L: 2813.201416015625, J: 26.8181095123291
Current x: (0.012911850719817585, 1.2138153935277955, -0.24263582391738886), u: [ 4.4257174 14.876206 ], noise: [3.80912786 3.63414976]
Start training ...
Step: 134, L: 2814.22607421875, J: 20.650442123413086
Current x: (0.01753252412259584, 1.2742539636198442, -0.26337617888450615), u: [ 4.4282694 14.9846325], noise: [3.62551236 5.09918715]
Start training ...
Step: 134, L: 3453.09716796875, J: 22.532678604125977
Current x: (0.022795783273151873, 1.3363077119379294, -0.28514408502578725), u: [ 4.434273 15.091531], noise: [4.7177624  4.87717998]
Start training ...
Step: 134, L: 1980.2032470703125, J: 25.20805549621582
Current x: (0.028791581775586855, 1.4000971922041463, -0.3081149950504302), u: [ 4.4315343 15.20303  ], noise: [5.

Step: 135, L: 2268.81103515625, J: 20.4400577545166
Current x: (-0.005293633798320395, 0.21571600542277666, 0.025131708574295045), u: [4.9732237 8.308642 ], noise: [4.13371252 5.70330351]
Start training ...
Step: 135, L: 923.3685302734375, J: 26.903396606445312
Current x: (-0.0060222415249056695, 0.2364193331857613, 0.024390885639190674), u: [5.0390387 8.617348 ], noise: [4.24021215 5.2436414 ]
Start training ...
Step: 135, L: 1800.541259765625, J: 20.420228958129883
Current x: (-0.006808944834037593, 0.25845281901225353, 0.023159561777114868), u: [5.0932994 8.91435  ], noise: [5.49422632 3.96532107]
Start training ...
Step: 135, L: 2471.98388671875, J: 23.568321228027344
Current x: (-0.007652083641114166, 0.281818640520292, 0.021470064067840574), u: [5.110784 9.20734 ], noise: [4.12689268 6.61745794]
Start training ...
Step: 135, L: 1258.87646484375, J: 19.682971954345703
Current x: (-0.008549566590668946, 0.30654955244876014, 0.01955135178565979), u: [5.1722965 9.516796 ], noise: [2.

Step: 136, L: 1565.35986328125, J: 23.792463302612305
Current x: (-1.122364974345557e-06, 0.003911611792935835, 0.0014541619300842286), u: [4.2597537 2.7693627], noise: [4.54014775 3.40410521]
Start training ...
Step: 136, L: 2265.451904296875, J: 34.328956604003906
Current x: (-3.375171553401224e-06, 0.006226057396643413, 0.0021341412067413332), u: [4.264843  2.9247992], noise: [3.31370937 3.40216921]
Start training ...
Step: 136, L: 1820.67333984375, J: 15.690654754638672
Current x: (-7.805347768589548e-06, 0.009056838377065564, 0.0030767637729644777), u: [4.28498   3.0521717], noise: [3.77099371 5.0378813 ]
Start training ...
Step: 136, L: 1992.2916259765625, J: 20.077123641967773
Current x: (-1.520315614810448e-05, 0.012297168234728262, 0.004144544744491578), u: [4.3545814 3.2517884], noise: [3.33531463 3.52878895]
Start training ...
Step: 136, L: 2793.251220703125, J: 23.41107940673828
Current x: (-2.7568707665279936e-05, 0.016171093111231685, 0.0052089178562164315), u: [4.386382 

Step: 136, L: 1953.5828857421875, J: 23.676347732543945
Current x: (-0.023040268008148165, 1.094295340685905, -0.07435439352989197), u: [ 6.0535445 12.268288 ], noise: [2.94688233 4.04111396]
Start training ...
Step: 136, L: 2894.67041015625, J: 22.037296295166016
Current x: (-0.02325531136822002, 1.1515751728137968, -0.08447183427810669), u: [ 6.06526 12.36672], noise: [3.06453693 4.49628725]
Start training ...
Step: 136, L: 1773.27392578125, J: 33.189537048339844
Current x: (-0.0232823383800153, 1.2103979947699648, -0.09532017254829406), u: [ 6.0776577 12.462572 ], noise: [4.23847239 4.11302727]
Start training ...
Step: 136, L: 2763.3740234375, J: 22.386775970458984
Current x: (-0.023090060437580707, 1.2708298290259514, -0.10694183177947997), u: [ 6.076778 12.565096], noise: [4.71683277 6.36562545]
Start training ...
Step: 136, L: 1629.86962890625, J: 25.078584671020508
Current x: (-0.022641838060593306, 1.3329576286390863, -0.11918943800926207), u: [ 6.0784187 12.672069 ], noise: [5

Step: 137, L: 2213.050048828125, J: 24.132247924804688
Current x: (-0.005356413607068537, 0.28974927742707407, 0.037678240060806265), u: [6.259548  7.7574544], noise: [4.95977177 4.88096392]
Start training ...
Step: 137, L: 2487.49853515625, J: 30.36703109741211
Current x: (-0.006184549932537842, 0.3140107798275235, 0.038905400896072376), u: [6.326887 7.976514], noise: [5.1537288  2.95124694]
Start training ...
Step: 137, L: 2318.41357421875, J: 22.288734436035156
Current x: (-0.00710255675021252, 0.33967536279614347, 0.03999065194129943), u: [6.356533  8.1698065], noise: [4.34242511 5.87903483]
Start training ...
Step: 137, L: 1847.8948974609375, J: 20.300397872924805
Current x: (-0.008107722264200968, 0.36659808774823294, 0.04113118844032287), u: [6.435475 8.38002 ], noise: [5.16885475 4.75362626]
Start training ...
Step: 137, L: 2065.49658203125, J: 22.59848403930664
Current x: (-0.009211829462440695, 0.39501261395735976, 0.041936736631393426), u: [6.4813275 8.584061 ], noise: [4.02

Step: 138, L: 2304.974609375, J: 25.87458610534668
Current x: (-0.00011364651377918101, 0.031410520426763164, 0.009611044025421143), u: [4.948492  3.8848197], noise: [4.12259256 4.56332268]
Start training ...
Step: 138, L: 1869.787353515625, J: 22.757064819335938
Current x: (-0.00016813528482744196, 0.03826755405020812, 0.011156071853637695), u: [5.059257  4.0730057], noise: [4.87017678 4.78737088]
Start training ...
Step: 138, L: 1958.5616455078125, J: 26.067562103271484
Current x: (-0.0002394616019618677, 0.04589542936725374, 0.0127633939743042), u: [5.1758795 4.286085 ], noise: [5.84830396 4.34434256]
Start training ...
Step: 138, L: 2636.294189453125, J: 27.384967803955078
Current x: (-0.0003317495315326357, 0.05442116877652582, 0.014477621841430664), u: [5.2734127 4.511852 ], noise: [4.59264273 4.69216587]
Start training ...
Step: 138, L: 2998.885498046875, J: 32.75287628173828
Current x: (-0.0004491227326835941, 0.06393120906504014, 0.016431225299835206), u: [5.3927855 4.721897 ]

Step: 138, L: 1882.05126953125, J: 20.97710609436035
Current x: (-0.0736597115616531, 1.3184213984569504, 0.0689850790977478), u: [ 7.3589516 10.2254095], noise: [4.41124408 3.53415789]
Start training ...
Step: 138, L: 2150.851806640625, J: 23.66081428527832
Current x: (-0.07866869346247658, 1.3800227821646778, 0.06731161699295043), u: [ 7.3613033 10.291356 ], noise: [3.96855127 5.68668041]
Start training ...
Step: 138, L: 3337.209228515625, J: 25.971755981445312
Current x: (-0.08385365297485793, 1.4431900697761422, 0.06543921775817871), u: [ 7.3783255 10.352909 ], noise: [5.44441668 4.17579284]
Start training ...
Step: 138, L: 2098.9912109375, J: 15.89682388305664
Current x: (-0.08922228754875947, 1.5081009625043789, 0.06310200033187865), u: [ 7.374686 10.420712], noise: [3.61848467 4.40057958]
Start training ...
Step: 138, L: 1764.9580078125, J: 24.4344482421875
Current x: (-0.09476978011677757, 1.5747601454041578, 0.060594186973571766), u: [ 7.390086 10.469832], noise: [3.92174815 4

Step: 139, L: 2129.263427734375, J: 24.49243927001953
Current x: (-0.016955939068217418, 0.4104489431845349, 0.09654530496597288), u: [7.4192934 7.2189016], noise: [3.85242609 5.67355392]
Start training ...
Step: 139, L: 1431.269287109375, J: 23.856760025024414
Current x: (-0.019272793441859344, 0.44029909046728305, 0.10097917022705076), u: [7.4998784 7.354076 ], noise: [3.64341388 2.61903368]
Start training ...
Step: 139, L: 2748.57666015625, J: 19.330078125
Current x: (-0.02182257932745246, 0.47157340228936523, 0.10525096197128293), u: [7.5246344 7.468278 ], noise: [5.84053146 3.2792097 ]
Start training ...
Step: 139, L: 2100.97119140625, J: 27.47272491455078
Current x: (-0.024585234689798473, 0.5039675974451318, 0.10963977203369138), u: [7.5346026 7.602943 ], noise: [3.97155072 2.96513346]
Start training ...
Step: 139, L: 1705.3272705078125, J: 29.782093048095703
Current x: (-0.027601209744406486, 0.5377787145832927, 0.11429034986495969), u: [7.556605 7.713623], noise: [3.85118545 4

Current x: (-0.000314301429235413, 0.04911824676554401, 0.016397178220748904), u: [5.409974  3.8568754], noise: [4.91347762 3.30980222]
Start training ...
Step: 140, L: 1590.0606689453125, J: 20.951101303100586
Current x: (-0.00042599793451610654, 0.057457717794241345, 0.018916243219375614), u: [5.5099945 3.9944146], noise: [3.80708834 5.84959365]
Start training ...
Step: 140, L: 2184.1650390625, J: 21.355623245239258
Current x: (-0.0005663720316816805, 0.06656496664896061, 0.021750985670089727), u: [5.7172894 4.155177 ], noise: [4.05412035 3.89708345]
Start training ...
Step: 140, L: 2354.66064453125, J: 23.883441925048828
Current x: (-0.0007429895527825937, 0.07660698178382969, 0.024533035516738896), u: [5.8443737 4.2912426], noise: [4.54410449 5.00293477]
Start training ...
Step: 140, L: 1179.06494140625, J: 27.269174575805664
Current x: (-0.0009583722548893687, 0.08744994225498769, 0.027487000322341925), u: [6.0144634 4.4498444], noise: [3.62376415 5.25582737]
Start training ...
St

Step: 140, L: 2308.534912109375, J: 22.8704776763916
Current x: (-0.1190244305493551, 1.2714182219814676, 0.22084700760841372), u: [8.203145 8.053541], noise: [4.39195802 4.3704134 ]
Start training ...
Step: 140, L: 2912.54638671875, J: 26.844947814941406
Current x: (-0.12845467832261628, 1.3290547504313042, 0.22819812779426576), u: [8.200707 8.107197], noise: [4.0612091  4.04881366]
Start training ...
Step: 140, L: 1518.07275390625, J: 26.822040557861328
Current x: (-0.13843298392121148, 1.3881514190586028, 0.2355663628101349), u: [8.197872 8.156653], noise: [2.39265235 4.17003689]
Start training ...
Step: 140, L: 1680.21875, J: 27.440593719482422
Current x: (-0.1489636785305351, 1.4486455783558914, 0.24294518837928775), u: [8.209409 8.195732], noise: [5.91848187 3.76336862]
Start training ...
Step: 140, L: 2300.849853515625, J: 25.635284423828125
Current x: (-0.16002924660927015, 1.5103871671370228, 0.25015039734840394), u: [8.189234 8.247633], noise: [4.28041404 4.24917553]
Start tr

Step: 141, L: 1374.6763916015625, J: 19.21817398071289
Current x: (-0.010904123064677269, 0.32808209228325713, 0.08930790390968321), u: [7.796801 5.812174], noise: [3.42588343 2.7547581 ]
Start training ...
Step: 141, L: 2545.588134765625, J: 29.5357723236084
Current x: (-0.012599002247407785, 0.35446129774368274, 0.09579681620597838), u: [7.848265 5.892416], noise: [2.50297837 3.00931003]
Start training ...
Step: 141, L: 2646.341552734375, J: 29.546781539916992
Current x: (-0.014470383493563388, 0.3818305780098615, 0.10255130381584165), u: [7.9091234 5.963475 ], noise: [4.06324901 3.64757516]
Start training ...
Step: 141, L: 1928.5379638671875, J: 22.515525817871094
Current x: (-0.016525920091705806, 0.410135327750133, 0.10945074324607847), u: [7.968725 6.045422], noise: [4.10402127 5.38490542]
Start training ...
Step: 141, L: 1964.002197265625, J: 33.149192810058594
Current x: (-0.018802409751966682, 0.439606080399944, 0.11658631482124326), u: [8.059336 6.128188], noise: [4.17088907 

Step: 142, L: 2378.858642578125, J: 30.83829116821289
Current x: (-1.2768785291806973e-05, 0.008256782152393581, 0.00505060248374939), u: [4.4816914 2.841251 ], noise: [5.2929936 4.7902653]
Start training ...
Step: 142, L: 2021.5203857421875, J: 28.414764404296875
Current x: (-2.6276996039350503e-05, 0.011533746967898505, 0.006550680875778199), u: [4.676382  2.9579298], noise: [4.30114427 4.62696738]
Start training ...
Step: 142, L: 1830.635009765625, J: 24.61978530883789
Current x: (-4.85763493136487e-05, 0.015570309623966652, 0.00826507616043091), u: [4.859111  3.0596435], noise: [4.30142709 5.92033207]
Start training ...
Step: 142, L: 1483.525390625, J: 28.655284881591797
Current x: (-8.172514021620821e-05, 0.0202820791149114, 0.010118734407424928), u: [5.115373  3.1789322], noise: [5.01562861 4.93987926]
Start training ...
Step: 142, L: 2576.614990234375, J: 34.077606201171875
Current x: (-0.00012986703370609394, 0.025826838083313982, 0.011990448856353761), u: [5.326091  3.2966344]

Current x: (-0.13126941932960426, 1.1320538065116654, 0.3253899484157563), u: [8.946566 5.952285], noise: [4.10501312 5.15688776]
Start training ...
Step: 142, L: 2408.55126953125, J: 31.49032974243164
Current x: (-0.142874288273831, 1.1847411100142813, 0.3392998845577241), u: [8.94677  5.981019], noise: [1.6634029  5.68309978]
Start training ...
Step: 142, L: 1815.6103515625, J: 26.193222045898438
Current x: (-0.15525152400112727, 1.2387367078638267, 0.3534040612697602), u: [8.971157 6.001002], noise: [4.80715065 3.99664846]
Start training ...
Step: 142, L: 1416.0919189453125, J: 23.838285446166992
Current x: (-0.16837010824718174, 1.2938517445273505, 0.36740284333229073), u: [8.948088  6.0330687], noise: [4.98007834 4.74149676]
Start training ...
Step: 142, L: 1710.9346923828125, J: 28.81220054626465
Current x: (-0.1823115629729132, 1.3502164417308165, 0.3817796911716462), u: [8.93117  6.062693], noise: [3.70645054 5.18496646]
Start training ...
Step: 142, L: 2401.6591796875, J: 30.3

Step: 143, L: 2574.728515625, J: 22.411834716796875
Current x: (-0.009341598790689727, 0.3339795481961631, 0.09790898056030274), u: [8.937129  4.5595636], noise: [5.32935384 3.98674404]
Start training ...
Step: 143, L: 2191.785400390625, J: 18.63516616821289
Current x: (-0.01100933308565009, 0.36153342925169324, 0.10603749346733093), u: [8.994951  4.6065474], noise: [3.15745891 4.89039164]
Start training ...
Step: 143, L: 2052.03271484375, J: 27.006282806396484
Current x: (-0.012900068394642668, 0.3903766636686429, 0.11473802394866944), u: [9.079529 4.640004], noise: [5.83844025 4.8120602 ]
Start training ...
Step: 143, L: 2169.12353515625, J: 22.68680763244629
Current x: (-0.015019938016464972, 0.42039167319693027, 0.12370410161018372), u: [9.139103 4.682391], noise: [3.79088262 5.03685916]
Start training ...
Step: 143, L: 2797.619140625, J: 26.638153076171875
Current x: (-0.017418811470511435, 0.4518466622944348, 0.13321676969528198), u: [9.20827   4.7146006], noise: [5.18363483 3.22

Step: 144, L: 2314.1865234375, J: 20.392391204833984
Current x: (-3.3169619067177926e-05, 0.02143618348984163, 0.006890014600753784), u: [5.37387   2.9952955], noise: [4.13356345 3.98203614]
Start training ...
Step: 144, L: 1040.8079833984375, J: 18.317474365234375
Current x: (-5.7250297439331136e-05, 0.02696799520863624, 0.008661302423477173), u: [5.5582232 3.0605686], noise: [5.74846646 5.04525979]
Start training ...
Step: 144, L: 2201.029296875, J: 30.564502716064453
Current x: (-9.268891247971416e-05, 0.03316724420901342, 0.01068560037612915), u: [5.82339   3.1454396], noise: [5.07238411 4.05325719]
Start training ...
Step: 144, L: 1857.7430419921875, J: 29.084430694580078
Current x: (-0.00014494108582597889, 0.0403266721500395, 0.013029984426498413), u: [6.0237274 3.2181408], noise: [5.3337066  3.61554045]
Start training ...
Step: 144, L: 1288.3201904296875, J: 21.826181411743164
Current x: (-0.0002165279198107325, 0.04831444388663247, 0.015744076204299927), u: [6.2005377 3.289986

Step: 144, L: 1505.55712890625, J: 30.73291778564453
Current x: (-0.146480847131803, 1.1130069956972206, 0.4383277592658997), u: [9.265332 4.322882], noise: [5.62877862 3.04693654]
Start training ...
Step: 144, L: 1548.901611328125, J: 25.871835708618164
Current x: (-0.16038035665371356, 1.1631522235836036, 0.45849372811317446), u: [9.204753 4.335491], noise: [4.37859561 4.17337533]
Start training ...
Step: 144, L: 1857.4293212890625, J: 22.079524993896484
Current x: (-0.1752248050185481, 1.2143323671655826, 0.479412126159668), u: [9.168924  4.3409023], noise: [4.29617895 3.5978094 ]
Start training ...
Step: 144, L: 1879.1124267578125, J: 32.06962585449219
Current x: (-0.19104705017292695, 1.2665125642275985, 0.5008379724025727), u: [9.122497  4.3485126], noise: [4.40762261 3.83183603]
Start training ...
Step: 144, L: 1345.5284423828125, J: 24.00102424621582
Current x: (-0.20785656264652572, 1.319610849468992, 0.5228164577960969), u: [9.076524  4.3548684], noise: [5.38797869 3.79750671

Step: 145, L: 2095.55517578125, J: 27.235233306884766
Current x: (-0.01411554473723058, 0.34707689217952475, 0.1356951590538025), u: [9.390142  3.2308054], noise: [5.06359171 5.16732958]
Start training ...
Step: 145, L: 1453.8563232421875, J: 24.4654541015625
Current x: (-0.016413772747896523, 0.3739067477961837, 0.14755625925064086), u: [9.479005  3.2257905], noise: [3.04585956 2.87955767]
Start training ...
Step: 145, L: 2524.94677734375, J: 20.63384246826172
Current x: (-0.01902113882209635, 0.402019783858858, 0.16002291936874388), u: [9.490744 3.239989], noise: [4.06107832 5.21099085]
Start training ...
Step: 145, L: 2261.43701171875, J: 18.038101196289062
Current x: (-0.021902408871432803, 0.4309945963312212, 0.17313153109550475), u: [9.565378  3.2347429], noise: [4.61420853 5.66804308]
Start training ...
Step: 145, L: 1807.002197265625, J: 21.75033950805664
Current x: (-0.025134273399436684, 0.4611605774029101, 0.1867502269744873), u: [9.641144  3.2270658], noise: [4.07216049 2.5

Step: 146, L: 2737.27734375, J: 23.640151977539062
Current x: (-7.977040712378735e-06, 0.008764124692702522, 0.003026855707168579), u: [4.9777813 2.57068  ], noise: [4.04524101 4.62971207]
Start training ...
Step: 146, L: 2190.646240234375, J: 27.265592575073242
Current x: (-1.5886867276714237e-05, 0.012121214655897683, 0.004079114770889282), u: [5.233673  2.5884156], noise: [4.94173884 4.21906605]
Start training ...
Step: 146, L: 2209.4560546875, J: 27.979320526123047
Current x: (-2.8707280084379444e-05, 0.01611963872474476, 0.005313636922836304), u: [5.4774265 2.611865 ], noise: [4.36730651 3.38590738]
Start training ...
Step: 146, L: 2030.583251953125, J: 22.31537437438965
Current x: (-4.845519142356393e-05, 0.020835338154287543, 0.006884952116012574), u: [5.6510024 2.6372983], noise: [3.63453728 4.85475959]
Start training ...
Step: 146, L: 1595.616943359375, J: 29.415367126464844
Current x: (-7.662119534244721e-05, 0.02615426576393564, 0.008840963411331178), u: [5.918579  2.6510663

Step: 146, L: 1139.0548095703125, J: 29.580642700195312
Current x: (-0.09558315940955332, 0.8260429047785768, 0.41351789717674253), u: [9.638473  2.5203264], noise: [4.3746189  5.36541895]
Start training ...
Step: 146, L: 1594.301025390625, J: 29.590652465820312
Current x: (-0.10603331684082139, 0.8672104926167152, 0.43696136007308956), u: [9.61751   2.5073383], noise: [3.55235574 5.29468175]
Start training ...
Step: 146, L: 2120.43603515625, J: 30.34511375427246
Current x: (-0.11736344219370536, 0.9094023850945484, 0.4610175575256347), u: [9.593992 2.495646], noise: [5.97331205 5.24749392]
Start training ...
Step: 146, L: 2161.3203125, J: 24.38591194152832
Current x: (-0.12958107318707734, 0.9525134179957018, 0.48561053953170774), u: [9.544958  2.4846096], noise: [2.55478376 4.10407794]
Start training ...
Step: 146, L: 3317.08056640625, J: 35.63744354248047
Current x: (-0.1428356917987653, 0.9967311349288471, 0.5109859380245209), u: [9.490113 2.486712], noise: [4.01684659 5.45225937]


Step: 147, L: 3283.244384765625, J: 25.99266815185547
Current x: (-0.0010294536875254806, 0.08963219328950386, 0.04035310745239258), u: [7.6643343 2.4287016], noise: [4.7730526  4.54272277]
Start training ...
Step: 147, L: 2827.2626953125, J: 27.468965530395508
Current x: (-0.0013612776240852368, 0.1017461656513024, 0.04643507533073426), u: [7.8851523 2.4057815], noise: [4.88776502 3.10344597]
Start training ...
Step: 147, L: 1465.4136962890625, J: 24.456090927124023
Current x: (-0.0017714008923948887, 0.11481843914493683, 0.053063639402389534), u: [8.007962 2.403338], noise: [4.5337461  3.03522711]
Start training ...
Step: 147, L: 2126.7998046875, J: 34.82659149169922
Current x: (-0.0022663869318728202, 0.12873595643452626, 0.060418572521209725), u: [8.117126  2.3987854], noise: [4.79224965 5.38678803]
Start training ...
Step: 147, L: 1361.728271484375, J: 27.05290985107422
Current x: (-0.0028567380812487054, 0.14346797034038763, 0.06848381991386414), u: [8.355012 2.357437], noise: [3

Step: 147, L: 1465.535888671875, J: 17.51106071472168
Current x: (-0.4383209727471106, 1.520215429959615, 0.9990672215938569), u: [7.0515103 2.2200794], noise: [3.38419198 4.65435647]
Start training ...
Step: 147, L: 1616.4163818359375, J: 22.645488739013672
Current x: (-0.46970969861947554, 1.5660743284276635, 1.0357151180267334), u: [6.861537  2.2378898], noise: [4.65705614 5.25459212]
Start training ...
Step: 147, L: 929.9427490234375, J: 22.086734771728516
Current x: (-0.5025541494040056, 1.6118888559573274, 1.0727191411018373), u: [6.6518393 2.251088 ], noise: [4.52447446 4.6730016 ]
Start training ...
Step: 147, L: 2363.63134765625, J: 30.25621795654297
Current x: (-0.5370339845333169, 1.6576917788739907, 1.1101257752895357), u: [6.4229016 2.2693703], noise: [3.62639793 4.06617714]
Start training ...
Step: 147, L: 3004.624755859375, J: 27.292373657226562
Current x: (-0.5731039450929307, 1.7033784254128148, 1.1479576318740847), u: [6.187817  2.2925115], noise: [3.78338323 4.091262

Step: 148, L: 2610.061279296875, J: 24.178804397583008
Current x: (-0.017277822238033234, 0.32127945137395775, 0.1758532509803772), u: [9.7896385 1.422853 ], noise: [3.29819284 4.23085204]
Start training ...
Step: 148, L: 980.2603149414062, J: 26.187320709228516
Current x: (-0.020075890790274817, 0.345335539539009, 0.19136047554016114), u: [9.832787  1.3926449], noise: [4.76445144 3.55157028]
Start training ...
Step: 148, L: 1771.103271484375, J: 21.42351531982422
Current x: (-0.0232018393347934, 0.37025587750751926, 0.20761111273765565), u: [9.838668  1.3702966], noise: [4.01184284 3.97010327]
Start training ...
Step: 148, L: 1809.613037109375, J: 27.523677825927734
Current x: (-0.026699455971620288, 0.3961136905087671, 0.22482705221176147), u: [9.851575  1.3448086], noise: [4.70258076 4.15930135]
Start training ...
Step: 148, L: 1618.88720703125, J: 30.024608612060547
Current x: (-0.030592641220905145, 0.4228683841956204, 0.24289400277137757), u: [9.863888  1.3128172], noise: [4.0820

Step: 148, L: 2407.04541015625, J: 27.36149024963379
Current x: (-0.9315738676791837, 1.9471566796919035, 1.5633978829383852), u: [2.4730523 2.116285 ], noise: [2.95436576 3.72247768]
Start training ...
Step: 148, L: 2630.95458984375, J: 34.84619140625
Simulation ends in 77 steps
Episode 149 begins...
Current x: (0.0, 0.0, 0.0), u: [3.3623178 2.5868587], noise: [3.71715547 4.26284184]
Start training ...
Step: 149, L: 1857.073486328125, J: 31.744884490966797
Current x: (0.0, 0.0, 0.0), u: [3.630816  2.5391006], noise: [2.84476181 4.77709423]
Start training ...
Step: 149, L: 1627.42919921875, J: 21.985427856445312
Current x: (0.0, 0.0004119174423217774, 2.2977304458618168e-05), u: [3.9258132 2.4824052], noise: [4.33490726 4.31009784]
Start training ...
Step: 149, L: 2415.81103515625, J: 28.880901336669922
Current x: (0.0, 0.0012220121688842772, -3.810710906982421e-05), u: [4.2255454 2.4282796], noise: [4.67914914 3.92997756]
Start training ...
Step: 149, L: 2463.20751953125, J: 23.771289

Step: 149, L: 3036.7529296875, J: 27.260969161987305
Current x: (-0.08098611300838604, 0.674532658606709, 0.45622699594497673), u: [9.876956   0.20949845], noise: [3.05828741 5.33221286]
Start training ...
Step: 149, L: 2342.17236328125, J: 22.221223831176758
Current x: (-0.09068322837002463, 0.7091325985317095, 0.4845159759998321), u: [9.812863   0.20263705], noise: [4.05631684 4.61086853]
Start training ...
Step: 149, L: 1860.5533447265625, J: 29.491931915283203
Current x: (-0.10119437213873575, 0.7444102537927824, 0.5135443092823028), u: [9.707766   0.21116033], noise: [3.96531929 4.26606691]
Start training ...
Step: 149, L: 1781.1728515625, J: 28.27655601501465
Current x: (-0.11257571823765831, 0.7803601408618479, 0.5434782099246978), u: [9.581461   0.23060317], noise: [3.4301338  3.29238677]
Start training ...
Step: 149, L: 2899.25830078125, J: 34.773887634277344
Current x: (-0.12484873010777364, 0.81690993607602, 0.5743316963672637), u: [9.415492   0.27287075], noise: [3.03558383

Current x: (-0.0002636080210003739, 0.06833682495916873, 0.023150488185882574), u: [8.919187   0.93978155], noise: [4.91151476 4.35630023]
Start training ...
Step: 150, L: 3231.11572265625, J: 28.38128662109375
Current x: (-0.00039012769881505176, 0.07936520228535297, 0.028278447198867802), u: [9.145943 0.836878], noise: [4.38482146 5.89250487]
Start training ...
Step: 150, L: 2694.2607421875, J: 30.73058319091797
Current x: (-0.0005609228587574846, 0.0913247454262309, 0.03425986824035645), u: [9.459782  0.6902769], noise: [4.50401561 5.35612578]
Start training ...
Step: 150, L: 1499.2042236328125, J: 28.402420043945312
Current x: (-0.0007890029331300977, 0.10432849325838778, 0.04092142744064332), u: [9.712738  0.5628519], noise: [5.41889305 3.40504252]
Start training ...
Step: 150, L: 1132.0302734375, J: 18.626670837402344
Current x: (-0.0010856242806713792, 0.11835108691824484, 0.04837472610473634), u: [9.823659   0.49244422], noise: [3.4127085  4.71979942]
Start training ...
Step: 1

Current x: (-0.42201806424869615, 1.3592319410148854, 1.2258077326774597), u: [5.226695   0.84391415], noise: [4.36417921 3.50468369]
Start training ...
Step: 150, L: 1809.472900390625, J: 31.30986785888672
Current x: (-0.4529325511202457, 1.3959540854157992, 1.2712466246128082), u: [4.7682977 0.9939479], noise: [4.92822309 4.08289501]
Start training ...
Step: 150, L: 1707.325439453125, J: 24.40066909790039
Current x: (-0.4851588528795946, 1.4321666431723488, 1.3172097442150117), u: [4.293698  1.1396359], noise: [3.02635827 5.23200595]
Start training ...
Step: 150, L: 1971.347900390625, J: 25.928903579711914
Current x: (-0.5187967043730334, 1.4678341480873884, 1.3636348316192628), u: [3.8720856 1.2698992], noise: [4.85480027 5.59582105]
Start training ...
Step: 150, L: 1538.2996826171875, J: 20.366304397583008
Current x: (-0.5537599380585727, 1.5028641468254462, 1.4101547604560853), u: [3.3897157 1.407149 ], noise: [4.13355854 5.4037081 ]
Start training ...
Step: 150, L: 1604.667602539

Step: 151, L: 1802.290283203125, J: 22.434738159179688
Current x: (-0.034651111496577916, 0.4064703551094128, 0.3188519342899323), u: [10.222076  0.      ], noise: [5.34941363 3.76862842]
Start training ...
Step: 151, L: 2387.70556640625, J: 28.215145111083984
Current x: (-0.0398983890462243, 0.4328783363550703, 0.3437395799160004), u: [10.152465  0.      ], noise: [3.19177807 4.75618754]
Start training ...
Step: 151, L: 1538.26123046875, J: 27.832462310791016
Current x: (-0.04575193399196866, 0.46014184738400427, 0.36980751175880433), u: [10.116224  0.      ], noise: [4.38202004 3.43746306]
Start training ...
Step: 151, L: 1822.903076171875, J: 29.029579162597656
Current x: (-0.052215481981403765, 0.4881285156181782, 0.39673424916267397), u: [9.999584 0.      ], noise: [5.21432887 4.39520521]
Start training ...
Step: 151, L: 2315.10205078125, J: 30.261791229248047
Current x: (-0.059327290927724624, 0.5168065036844357, 0.42476706471443176), u: [9.922511 0.      ], noise: [2.98982737 4.

Step: 152, L: 2424.703125, J: 33.10368347167969
Current x: (-2.2164266367929966e-07, 0.02096439411299542, 0.004435039424896241), u: [6.9839687 1.2868359], noise: [4.22157986 4.21730282]
Start training ...
Step: 152, L: 1298.33203125, J: 25.828100204467773
Current x: (-7.2564875228718495e-06, 0.026398067628912978, 0.006573971414566041), u: [7.3078985 1.1394334], noise: [4.8968748  4.52695877]
Start training ...
Step: 152, L: 2256.09130859375, J: 34.240257263183594
Current x: (-2.1702119830135374e-05, 0.03252169333916603, 0.00928304433822632), u: [7.6754236 0.9692961], noise: [2.8432839  3.43115197]
Start training ...
Step: 152, L: 1945.1171875, J: 35.16815948486328
Current x: (-4.7896121108835935e-05, 0.03945139705548393, 0.012645955324172975), u: [7.835102  0.8959454], noise: [4.63401561 4.84944901]
Start training ...
Step: 152, L: 2281.505126953125, J: 24.018299102783203
Current x: (-8.793944132246849e-05, 0.04689195200150429, 0.016620692253112794), u: [8.197677  0.7182471], noise: [5

Step: 152, L: 1454.2392578125, J: 29.780048370361328
Current x: (-0.30451070347232506, 1.0537662433503454, 1.0323018040657046), u: [6.3841004 0.       ], noise: [3.79942329 3.65092369]
Start training ...
Step: 152, L: 1724.5113525390625, J: 35.057029724121094
Current x: (-0.3299521833675606, 1.088673377223841, 1.076825497245789), u: [5.928002   0.17905824], noise: [4.04296583 3.34927572]
Start training ...
Step: 152, L: 2794.44677734375, J: 26.203441619873047
Current x: (-0.3565813253349116, 1.123309002660759, 1.1220024504661563), u: [5.440008   0.39665124], noise: [3.73603868 6.10475039]
Start training ...
Step: 152, L: 2681.37109375, J: 32.27193832397461
Current x: (-0.3843990228830511, 1.1576036647330183, 1.1678236670494082), u: [5.033216  0.5660777], noise: [2.55856895 4.44983827]
Start training ...
Step: 152, L: 2325.76025390625, J: 29.08416175842285
Current x: (-0.4136292131649146, 1.1915975386256767, 1.2139123481273653), u: [4.577129   0.76801753], noise: [5.55597823 2.8373918 ]

Step: 153, L: 1952.1063232421875, J: 29.759248733520508
Current x: (-0.015352327774718422, 0.2867791504925711, 0.22347449636459352), u: [11.029161  0.      ], noise: [4.3766261  4.43287697]
Start training ...
Step: 153, L: 2369.2294921875, J: 31.60369300842285
Current x: (-0.018329557462115453, 0.30928396886415244, 0.24478736596107484), u: [11.086701  0.      ], noise: [5.45480047 5.548877  ]
Start training ...
Step: 153, L: 1556.564208984375, J: 32.43382263183594
Current x: (-0.021746449761044506, 0.33274232156761663, 0.267197526550293), u: [11.212419  0.      ], noise: [4.18604217 3.47522775]
Start training ...
Step: 153, L: 1558.681884765625, J: 35.47216033935547
Current x: (-0.02569870248987875, 0.35736285834381126, 0.2907069495677948), u: [11.177411  0.      ], noise: [4.22136361 4.28969538]
Start training ...
Step: 153, L: 2304.13427734375, J: 33.699554443359375
Current x: (-0.030149276159001002, 0.38282278989342833, 0.3154086958885193), u: [11.17893  0.     ], noise: [4.96197361

Current x: (2.5450453300960367e-06, 0.022805651161364585, 0.004913458633422851), u: [7.589589  0.8725358], noise: [3.13636608 3.26377931]
Start training ...
Step: 154, L: 1716.6646728515625, J: 20.89004898071289
Current x: (-5.793637871675421e-06, 0.028292625419935016, 0.006938643932342529), u: [7.772789  0.7781177], noise: [4.12207759 4.06126268]
Start training ...
Step: 154, L: 1931.5400390625, J: 35.796165466308594
Current x: (-2.1434806749418024e-05, 0.03428480877376117, 0.009622793197631836), u: [8.09494    0.60652393], noise: [3.75732342 4.34257592]
Start training ...
Step: 154, L: 2517.6181640625, J: 28.232940673828125
Current x: (-4.8687180741780974e-05, 0.04096937656518091, 0.013012491130828856), u: [8.417408  0.4297167], noise: [5.12793432 1.70275685]
Start training ...
Step: 154, L: 1636.50048828125, J: 25.14443588256836
Current x: (-9.210690937065726e-05, 0.048353002867578584, 0.017092505407333373), u: [8.454134  0.3978024], noise: [5.21474903 4.96585579]
Start training ...

Step: 154, L: 2933.507568359375, J: 31.343116760253906
Current x: (-0.2656179909558946, 0.9621618118970648, 0.9717805056095123), u: [8.06599 0.     ], noise: [3.55643477 5.29098071]
Start training ...
Step: 154, L: 1435.076416015625, J: 30.702367782592773
Current x: (-0.2899759516823598, 0.9985029238430532, 1.0173078708171845), u: [7.6711054 0.       ], noise: [5.52277973 3.51956222]
Start training ...
Step: 154, L: 2155.7255859375, J: 30.08512306213379
Current x: (-0.31573077529093796, 1.034816664249, 1.0634683804512024), u: [7.139649 0.      ], noise: [5.03597988 5.45668709]
Start training ...
Step: 154, L: 2040.111083984375, J: 30.6174259185791
Current x: (-0.34290740514391543, 1.0710279604587556, 1.11059632229805), u: [6.6594734 0.       ], noise: [5.07822851 6.03033635]
Start training ...
Step: 154, L: 2179.7900390625, J: 30.089553833007812
Current x: (-0.37162518026909974, 1.107114911285628, 1.1583961583137512), u: [6.162586 0.      ], noise: [3.65486067 4.10473168]
Start trainin

Step: 155, L: 2836.06591796875, J: 34.070152282714844
Current x: (-0.021647416229452775, 0.3471796033948589, 0.2763948796749115), u: [12.240687  0.      ], noise: [4.06367828 4.45241449]
Start training ...
Step: 155, L: 1856.0499267578125, J: 32.481353759765625
Current x: (-0.025677530138357996, 0.37289206230467437, 0.3018997847080231), u: [12.279817  0.      ], noise: [1.70406091 4.19551763]
Start training ...
Step: 155, L: 1714.0155029296875, J: 34.10997772216797
Current x: (-0.030274073995955726, 0.3996204177416805, 0.32858988480567936), u: [12.265665  0.      ], noise: [4.15395898 5.52938738]
Start training ...
Step: 155, L: 1657.7332763671875, J: 38.334861755371094
Current x: (-0.035411154141767646, 0.4271034932306986, 0.3562588208675385), u: [12.333765  0.      ], noise: [3.75976898 4.71085562]
Start training ...
Step: 155, L: 2369.319580078125, J: 25.984966278076172
Current x: (-0.041256547976550186, 0.4556830391353424, 0.385016780614853), u: [12.322764  0.      ], noise: [3.158

Step: 156, L: 1968.2587890625, J: 26.88593292236328
Current x: (-0.00010181936651647213, 0.03646478129449155, 0.015043232440948486), u: [8.5783     0.23757546], noise: [4.71691694 2.45827834]
Start training ...
Step: 156, L: 2240.039794921875, J: 29.902034759521484
Current x: (-0.00016039155116764263, 0.043687709616582054, 0.019331376457214354), u: [8.70248   0.1545333], noise: [3.67536029 6.23962371]
Start training ...
Step: 156, L: 2163.322509765625, J: 36.09897232055664
Current x: (-0.00024301856938923797, 0.051528564174139056, 0.02467945680618286), u: [9.271635 0.      ], noise: [4.63545477 4.60095277]
Start training ...
Step: 156, L: 1772.9765625, J: 25.570165634155273
Current x: (-0.0003619321824088462, 0.06026526773103914, 0.03062590556144714), u: [9.646792 0.      ], noise: [4.9403758  4.81266027]
Start training ...
Step: 156, L: 3309.17724609375, J: 39.42593765258789
Current x: (-0.0005265180042866428, 0.0698712120055977, 0.037502968025207514), u: [10.053447  0.      ], noise:

Step: 156, L: 1721.1883544921875, J: 30.903623580932617
Current x: (-0.35956035378925694, 1.1218817626628472, 1.1717019754409792), u: [7.5443983 0.       ], noise: [5.10348801 4.04980386]
Start training ...
Step: 156, L: 1319.55419921875, J: 29.462848663330078
Current x: (-0.3903506424113633, 1.16119885536727, 1.2245023100376131), u: [6.881232 0.      ], noise: [4.50270808 2.22436256]
Start training ...
Step: 156, L: 2583.8974609375, J: 38.67485046386719
Current x: (-0.4226794782983995, 1.200183793594, 1.278162452888489), u: [6.131001 0.      ], noise: [4.00166095 4.42560173]
Start training ...
Step: 156, L: 2425.78662109375, J: 37.329246520996094
Current x: (-0.45628836147766005, 1.2386496168654975, 1.3327385534763339), u: [5.427712 0.      ], noise: [3.48807424 3.01428583]
Start training ...
Step: 156, L: 1540.912109375, J: 36.870330810546875
Current x: (-0.49129118005644723, 1.276554409831669, 1.3878853600502017), u: [4.672022   0.28152436], noise: [5.1883372  4.40282751]
Start trai

Step: 157, L: 2219.1953125, J: 40.49003601074219
Current x: (-0.07592665663384764, 0.5664101583381371, 0.5323253584384919), u: [13.01269  0.     ], noise: [4.40414794 3.93898932]
Start training ...
Step: 157, L: 1705.621826171875, J: 42.902305603027344
Current x: (-0.08665656235638826, 0.6006013106850082, 0.5696234258651734), u: [12.878004  0.      ], noise: [5.37785717 5.16464258]
Start training ...
Step: 157, L: 2431.71875, J: 26.156282424926758
Current x: (-0.09847035812658367, 0.6356515434580405, 0.6082692780971528), u: [12.779104  0.      ], noise: [4.2099554  4.25616707]
Start training ...
Step: 157, L: 2152.390625, J: 34.964111328125
Current x: (-0.11154725672280105, 0.6716930265060942, 0.6482242522239686), u: [12.609624  0.      ], noise: [3.61761004 5.19924187]
Start training ...
Step: 157, L: 2649.121337890625, J: 34.45665740966797
Current x: (-0.12583820954698677, 0.7084969740848109, 0.6894525156497957), u: [12.458407  0.      ], noise: [3.39396008 4.14893149]
Start training

Step: 158, L: 2681.256591796875, J: 38.40239715576172
Current x: (-0.005891601478469904, 0.1930318444694201, 0.14092765798568724), u: [13.541061  0.      ], noise: [2.65346429 4.51050914]
Start training ...
Step: 158, L: 2121.2568359375, J: 36.98103332519531
Current x: (-0.007436175591891105, 0.2133869732258752, 0.15959695472717284), u: [13.71712  0.     ], noise: [4.88204882 2.00072564]
Start training ...
Step: 158, L: 1591.924560546875, J: 31.65892791748047
Current x: (-0.00927157600432698, 0.23481107868932358, 0.1794346529960632), u: [13.730937  0.      ], noise: [3.12283239 3.84793146]
Start training ...
Step: 158, L: 2479.31005859375, J: 48.47602844238281
Current x: (-0.0114343505662586, 0.2572879941495506, 0.20093219547271726), u: [13.8321905  0.       ], noise: [4.13905044 3.45903147]
Start training ...
Step: 158, L: 1925.314208984375, J: 38.07268142700195
Current x: (-0.013966595269159872, 0.2808208425323393, 0.22373032178878782), u: [13.903768  0.      ], noise: [6.18356451 5.

Step: 159, L: 2649.8642578125, J: 39.43811798095703
Current x: (-8.678005344210335e-06, 0.019101021396872586, 0.005169643211364746), u: [8.413297  0.3021895], noise: [4.0892473  3.26875216]
Start training ...
Step: 159, L: 1854.8988037109375, J: 29.160411834716797
Current x: (-1.9612418442675076e-05, 0.024709204075060456, 0.007749605417251587), u: [8.713084  0.1216069], noise: [3.92763699 4.19986449]
Start training ...
Step: 159, L: 2492.11376953125, J: 28.427961349487305
Current x: (-3.88562134772758e-05, 0.030943713907743035, 0.011222727870941163), u: [9.130068 0.      ], noise: [3.465809   4.82819998]
Start training ...
Step: 159, L: 2231.262451171875, J: 39.34524154663086
Current x: (-7.124490685452429e-05, 0.03789339205982304, 0.01552777533531189), u: [9.6046 0.    ], noise: [3.18968566 4.32954289]
Start training ...
Step: 159, L: 2075.627685546875, J: 37.64580535888672
Current x: (-0.00012318775617785639, 0.04560436809329814, 0.02060959048271179), u: [9.992135 0.      ], noise: [

Step: 159, L: 2108.7783203125, J: 35.34688949584961
Current x: (-0.33424701127494716, 1.1149592188365378, 1.1700526423931124), u: [10.19408  0.     ], noise: [3.07893495 4.13196842]
Start training ...
Step: 159, L: 2908.42724609375, J: 39.605289459228516
Current x: (-0.36529341889600947, 1.157864389911008, 1.2270573429584506), u: [9.5749 0.    ], noise: [2.50415704 2.39016869]
Start training ...
Step: 159, L: 2194.84033203125, J: 32.78128433227539
Current x: (-0.3979424271601181, 1.2004675349852743, 1.2849761481761934), u: [8.86274 0.     ], noise: [5.25798064 4.05457779]
Start training ...
Step: 159, L: 1682.4228515625, J: 25.684165954589844
Current x: (-0.4319537146525714, 1.2425773069709336, 1.3438638422489169), u: [8.065426 0.      ], noise: [2.18222139 3.76510031]
Start training ...
Step: 159, L: 3084.5703125, J: 29.495798110961914
Current x: (-0.46770879597215964, 1.2842185213632202, 1.4037581505298617), u: [7.307881 0.      ], noise: [4.45767912 4.32362837]
Start training ...
St

Step: 160, L: 1480.212646484375, J: 26.72906494140625
Current x: (-0.06329046533368769, 0.5220320753926239, 0.49579319868087784), u: [15.253701  0.      ], noise: [3.90476634 2.97770816]
Start training ...
Step: 160, L: 1823.7705078125, J: 34.154693603515625
Current x: (-0.07319696638408975, 0.5573804531996673, 0.5337325654506685), u: [15.135016  0.      ], noise: [5.13067817 4.31285515]
Start training ...
Step: 160, L: 1930.05419921875, J: 33.11854553222656
Current x: (-0.08415655066090716, 0.5936949106321711, 0.5732900081634523), u: [15.049882  0.      ], noise: [3.90034129 6.54378711]
Start training ...
Step: 160, L: 1936.7596435546875, J: 31.809492111206055
Current x: (-0.09636656931678628, 0.6311443700244105, 0.6144427347660066), u: [15.028746  0.      ], noise: [4.48522261 3.5422002 ]
Start training ...
Step: 160, L: 1911.5775146484375, J: 33.97871017456055
Current x: (-0.10995938053580284, 0.6697546348835216, 0.6568361050605775), u: [14.858114  0.      ], noise: [4.19811056 3.92

Step: 161, L: 2496.2763671875, J: 30.511730194091797
Current x: (-0.007594470526589663, 0.20066688467005267, 0.1626166181087494), u: [14.977964  0.      ], noise: [4.88022593 5.2583783 ]
Start training ...
Step: 161, L: 2078.60302734375, J: 28.575035095214844
Current x: (-0.009544149262278168, 0.2220336824959098, 0.18325035519599914), u: [15.266666  0.      ], noise: [4.11514599 4.27577928]
Start training ...
Step: 161, L: 2412.23291015625, J: 33.576576232910156
Current x: (-0.01190046740607917, 0.2448980010571692, 0.20534407343864441), u: [15.443135  0.      ], noise: [3.74659463 4.22269151]
Start training ...
Step: 161, L: 2687.7890625, J: 33.462886810302734
Current x: (-0.014687889496596834, 0.2691074680112581, 0.2289483950138092), u: [15.583405  0.      ], noise: [3.21459566 4.42846829]
Start training ...
Step: 161, L: 2158.07421875, J: 37.75325012207031
Current x: (-0.01795270027950125, 0.2946279897552264, 0.2540494204044342), u: [15.70392  0.     ], noise: [5.93093885 5.89253952]

Step: 162, L: 1711.61181640625, J: 31.697498321533203
Current x: (-2.504505085646499e-06, 0.023147321012979612, 0.007183833646774293), u: [8.7376146e+00 1.9931588e-03], noise: [3.64410662 2.66589536]
Start training ...
Step: 162, L: 1983.0372314453125, J: 31.78093719482422
Current x: (-1.5573463420531897e-05, 0.02894427903947169, 0.010588595724105837), u: [8.929662 0.      ], noise: [3.16995187 3.55868314]
Start training ...
Step: 162, L: 3052.0283203125, J: 31.405372619628906
Current x: (-3.945371833223074e-05, 0.03526515924631265, 0.014964741086959842), u: [9.249377 0.      ], noise: [4.77123963 3.34595214]
Start training ...
Step: 162, L: 2643.99560546875, J: 42.740638732910156
Current x: (-7.991360065319867e-05, 0.04217078133335583, 0.020194979429245), u: [9.656736 0.      ], noise: [5.42815789 4.34353266]
Start training ...
Step: 162, L: 1722.4150390625, J: 31.221843719482422
Current x: (-0.0001463611346301008, 0.049831865919435184, 0.026492684221267705), u: [10.273399  0.      ],

Step: 162, L: 2334.980712890625, J: 35.7672233581543
Current x: (-0.35800753003839253, 1.1194318341245297, 1.2246390481472016), u: [11.546586  0.      ], noise: [4.22010487 5.11191658]
Start training ...
Step: 162, L: 1701.993408203125, J: 23.982498168945312
Current x: (-0.391229725151059, 1.1630274977371913, 1.2846855926036835), u: [10.857896  0.      ], noise: [3.94656755 3.70147998]
Start training ...
Step: 162, L: 1052.28076171875, J: 40.78474807739258
Current x: (-0.42641593639845854, 1.2063505423465588, 1.3457976145267487), u: [10.068105  0.      ], noise: [4.46714003 3.10191662]
Start training ...
Step: 162, L: 1800.115478515625, J: 26.98804473876953
Current x: (-0.4633775130391122, 1.2492148676218995, 1.408019934797287), u: [9.176236 0.      ], noise: [3.94684389 3.96911676]
Start training ...
Step: 162, L: 1735.14013671875, J: 31.037519454956055
Current x: (-0.5020583502721505, 1.2914916869718847, 1.4713855879306794), u: [8.254515 0.      ], noise: [3.93919777 2.7146644 ]
Star

Step: 163, L: 2366.67724609375, J: 35.82759094238281
Current x: (-0.06827605118518716, 0.520412125149944, 0.5198927082061768), u: [16.216295  0.      ], noise: [4.34527492 4.21876504]
Start training ...
Step: 163, L: 2334.574951171875, J: 34.694496154785156
Current x: (-0.07910948004468124, 0.556322919042139, 0.5595751569747925), u: [16.154078  0.      ], noise: [5.24585101 3.17119614]
Start training ...
Step: 163, L: 2515.232666015625, J: 33.988380432128906
Current x: (-0.09117396376855925, 0.5934033299822168, 0.6008918861389161), u: [16.029451  0.      ], noise: [3.67796576 4.23942542]
Start training ...
Step: 163, L: 2033.6964111328125, J: 31.80681800842285
Current x: (-0.10454274718434793, 0.6315850963593742, 0.644031488609314), u: [15.916501  0.      ], noise: [4.86453691 4.93266595]
Start training ...
Step: 163, L: 1642.986328125, J: 35.70878601074219
Current x: (-0.11926543322270816, 0.6707610741680111, 0.6887178901672364), u: [15.799093  0.      ], noise: [4.31407883 3.21342478

In [None]:
xs, us, cs, x_nexts = [], [], [], []
for exp in exps:
    x, u, c, x_next, _ = exp
    xs.append(x)
    us.append(u)
    cs.append(c)
    x_nexts.append(x_next)

            # q = self.q_network.forward(x, u)
            # a = self.target_p_network.forward(x_next)
            # q_= self.target_q_network.forward(x_next, a)

            # l = (q - c - self.alpha * q_)**2
            # j = self.q_network.forward(x, self.p_network(x))

            # L += l
            # J += j
        
        # xs = torch.as_tensor(np.array(xs)).float()
        # us = torch.as_tensor(np.array(us)).float()
        # cs = torch.as_tensor(np.array(cs)).float()
        # x_nexts = torch.as_tensor(np.array(x_nexts)).float()

        
xs = tf.convert_to_tensor(np.array(xs), dtype=tf.float32)
us = tf.convert_to_tensor(np.array(us), dtype=tf.float32)
cs = tf.convert_to_tensor(np.array(cs), dtype=tf.float32)
x_nexts = tf.convert_to_tensor(np.array(x_nexts), dtype=tf.float32)

In [None]:
u_nexts = pg.target_p_network(x_nexts)
# u_nexts = tf.cast(u_nexts, tf.float32)
u_nexts

In [None]:
tf.concat([x_nexts, u_nexts], 1)

In [None]:
x_nexts

In [None]:
u

In [None]:
quadrotor.reset()
animate_quadrotor(quadrotor, pg, 1000)

In [None]:
eps

In [None]:
x = exps[0][0]
x_next = exps[0][3]

In [None]:
p_network(torch.as_tensor(x).float())

In [None]:
torch.as_tensor(x).float()

In [None]:
exps

In [None]:
pg(x)[0]

In [None]:
for _ in range(1000):
    if np.random.rand() < eps:
        u = np.random.rand(2) * 10 + 10

        print(u)

In [None]:
u = pg(x)
u

In [None]:
len(x.shape)

In [None]:
q_network.trainable_weights

In [None]:
q_network