In [2]:
%load_ext autoreload
%autoreload 2
from collections import defaultdict
from typing import Optional

import numpy as np
import torch
import tqdm
from tensordict.nn import TensorDictModule
from tensordict.tensordict import TensorDict, TensorDictBase
from torch import nn

from torchrl.data import BoundedTensorSpec, CompositeSpec, UnboundedContinuousTensorSpec
from torchrl.envs import (
    CatTensors,
    EnvBase,
    Transform,
    TransformedEnv,
    UnsqueezeTransform,
)
from torchrl.envs.transforms.transforms import _apply_to_composite
from torchrl.envs.utils import check_env_specs, step_mdp

import random
from pytorch3d import transforms
import math
from linguamechanica.kinematics import DifferentiableOpenChainMechanism
from linguamechanica.kinematics import UrdfRobotLibrary

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
error_done_threshold = 1e-3
weights = torch.Tensor([1.0, 1.0, 1.0, 1.0, 1.0, 1.0]).cuda()
urdf_robot = UrdfRobotLibrary.dobot_cr5()
chain_index = -1#3
used_open_chain = urdf_robot.extract_open_chains(0.3)[chain_index].to(weights.device)
thetas_count = used_open_chain.screws.shape[0]
pose_count = 6
on_manifold_count = (9 * 2) + ( 2 * thetas_count)
batch_size = 1024

In [4]:
thetas = torch.Tensor([[0.0],[0.0],[1.5]]).cuda()
transformation = used_open_chain.forward_transformation(
    thetas
)
pose = transforms.se3_log_map(transformation.get_matrix())
print(pose)
target_thetas = torch.Tensor([[1.0],[0.0],[0.5]]).cuda()
target_transformation = used_open_chain.forward_transformation(
    target_thetas
)
target_pose = transforms.se3_log_map(target_transformation.get_matrix())
print(target_pose)
used_open_chain = used_open_chain.to(thetas.device)

tensor([[ 8.9191e-01, -6.4650e-01,  6.4650e-01,  4.0799e-06, -2.2214e+00,
          2.2214e+00],
        [ 8.9191e-01, -6.4650e-01,  6.4650e-01,  4.0799e-06, -2.2214e+00,
          2.2214e+00],
        [ 6.5409e-01,  1.5070e-01, -2.5170e-01, -2.3000e+00,  2.2827e+00,
         -3.1305e-01]], device='cuda:0')
tensor([[-4.1440e-01, -4.1333e-01,  7.1470e-01,  1.3417e+00, -6.6351e-01,
          7.5451e-01],
        [ 8.9191e-01, -6.4650e-01,  6.4650e-01,  4.0799e-06, -2.2214e+00,
          2.2214e+00],
        [-6.2325e-01, -3.5645e-01,  7.3428e-01,  7.2195e-01,  8.1363e-01,
         -5.5225e-01]], device='cuda:0')


In [5]:
#print("!!!!!!!!!!!!!!!!!!!!!")
#print("thetas", thetas)
#print("target_pose", target_pose.shape)
#print("target_pose", target_pose)
#print("!!!!!!!!!!!!!!!!!!!!!")

In [6]:
def compute_error_pose(open_chain, thetas, target_pose):
    current_transformation = open_chain.forward_transformation(thetas)
    target_transformation = transforms.se3_exp_map(target_pose)
    current_trans_to_target = current_transformation.compose(
        transforms.Transform3d(matrix=target_transformation).inverse()
    )
    current_trans_to_target = current_trans_to_target.to(thetas.device).get_matrix()
    error_pose = transforms.se3_log_map(current_trans_to_target)
    return error_pose

compute_error_pose(used_open_chain, thetas, target_pose)

def compute_reward(thetas, target_pose, weights, error_done_threshold, open_chain):
    if len(thetas.shape) == 1:
        thetas = thetas.unsqueeze(0)
    if len(target_pose.shape) == 1:
        target_pose = target_pose.unsqueeze(0)
    open_chain = open_chain.to(thetas.device)
    error_pose = compute_error_pose(
        open_chain, thetas, target_pose
    )
    weights = weights.to(thetas.device)
    pose_error = DifferentiableOpenChainMechanism.compute_weighted_error(
        error_pose, weights
    )
    done = pose_error < error_done_threshold
    reward = - pose_error
    return reward, done

In [7]:
angles = torch.Tensor([0.1, -0.1, 0.2, -0.2])
angles_sin = angles.sin()
angles_cos = angles.cos()
torch.atan2(angles_sin, angles_cos) - angles

tensor([0., 0., 0., 0.])

In [55]:
def _step(self, tensordict):
    thetas = torch.atan2(tensordict["thetas_sin"], tensordict["thetas_cos"])
    theta_deltas = tensordict["action"]    
    #print("thetas", thetas.shape)
    #print("theta_deltas", theta_deltas.shape)
    theta_deltas_sin, theta_deltas_cos = None, None
    print("len(theta_deltas.shape)", len(theta_deltas.shape))
    if len(theta_deltas.shape) == 3:
        theta_deltas_sin = theta_deltas[:, :, 0]
        theta_deltas_cos = theta_deltas[:, :, 1]
    else:
        theta_deltas_sin = theta_deltas[0]
        theta_deltas_cos = theta_deltas[1]
    theta_deltas = torch.atan2(theta_deltas_sin, theta_deltas_cos)
    print("theta_deltas", theta_deltas.shape, theta_deltas_sin.shape, theta_deltas_cos.shape)
    
    #print("theta_deltas", theta_deltas.shape)
    #print("thetas", thetas.shape)
    #max_theta_deltas = tensordict["params", "max_theta_deltas"]
    #print(thetas.shape, theta_deltas.shape)
    new_thetas = thetas + theta_deltas
    #new_thetas = new_thetas.clamp(-max_theta_deltas, max_theta_deltas)
    target_pose = tensordict["target_pose"]
    #print("target_pose in _step", target_pose)
    #print("STEP", f"Target: {target_pose}", f"Thetas: {thetas}", f"New Thetas: {new_thetas}")
    #TODO: I have no idea if this is a good idea or not
    #new_thetas = force_parameters_within_bounds(new_thetas)
    #print("----------------------------")
    #print("new_thetas.shape", new_thetas.shape)
    #print("target_pose", target_pose)
    #print("weights.shape", weights.shape)
    #print("----------------------------")
    reward, done = compute_reward(new_thetas, target_pose, weights, error_done_threshold, self.open_chain)
    done = torch.zeros_like(reward, dtype=torch.bool)
    out = TensorDict(
        {
            "next": {
                "thetas_sin": new_thetas.sin(),
                "thetas_cos": new_thetas.cos(),
                "target_pose": target_pose,
                "params": tensordict["params"],
                "reward": reward,
                "done": done,
            }
        },
        tensordict.shape,
    )
    return out
def uniformly_sample_parameters_within_constraints(open_chain, batch_size):
    samples = []
    for sample_idx in range(batch_size):
        coordinates = []
        for i in range(len(open_chain.joint_limits)):
            # TODO: check if unconstrained works
            coordinates.append(
                random.uniform(
                    open_chain.joint_limits[i][0],
                    open_chain.joint_limits[i][1],
                )
            )
        samples.append(torch.Tensor(coordinates).unsqueeze(0))
    return torch.cat(samples, 0)
def generate_random_target_pose(target_thetas, open_chain):
    if len(target_thetas.shape) == 1:
        target_thetas = target_thetas.unsqueeze(0)
    open_chain = open_chain.to(target_thetas.device)
    target_transformation = open_chain.forward_transformation(
        target_thetas
    )
    target_pose = transforms.se3_log_map(
        target_transformation.get_matrix()
    )
    if target_thetas.shape[0] == 1:
        target_thetas = target_thetas.squeeze(0)
    #print("generate_random_target_pose", target_pose)
    return target_pose
def _reset(self, tensordict):
    if tensordict is None or tensordict.is_empty():
        # if no tensordict is passed, we generate a single set of hyperparameters
        # Otherwise, we assume that the input tensordict contains all the relevant
        # parameters to get started.
        tensordict = self.gen_params(batch_size=self.batch_size)
    batch_size = 1 if len(tensordict.shape) == 0 else tensordict.shape[0]
    thetas = uniformly_sample_parameters_within_constraints(self.open_chain, batch_size).to(device=self.device)
    if batch_size == 1:
        thetas = thetas.squeeze(0)    
    #thetas = force_parameters_within_bounds(thetas)
    #TODO: randommize this better
    target_thetas = thetas + ((torch.randn(thetas.shape).to(self.device)) * 0.01)
    #target_thetas = force_parameters_within_bounds(target_thetas)
    target_pose   = generate_random_target_pose(target_thetas, self.open_chain)
    if batch_size == 1:
        target_pose = target_pose.squeeze(0)
    #print("target_pose.shape", target_pose.shape)
    out = TensorDict(
        {
            "thetas_sin": thetas.sin(),
            "thetas_cos": thetas.cos(),
            "target_pose": target_pose,
            "params": tensordict["params"],
        },
        batch_size=tensordict.shape,
    )
    return out
def _make_spec(self, td_params):
    # Under the hood, this will populate self.output_spec["observation"]
    self.observation_spec = CompositeSpec(
        thetas_sin=BoundedTensorSpec(
            minimum=-torch.ones(thetas_count),# * torch.pi,
            maximum= torch.ones(thetas_count),# * torch.pi,
            shape=(thetas_count),
            dtype=torch.float32,
        ),
        thetas_cos=BoundedTensorSpec(
            minimum=-torch.ones(thetas_count),# * torch.pi,
            maximum= torch.ones(thetas_count),# * torch.pi,
            shape=(thetas_count),
            dtype=torch.float32,
        ),
        #TODO: bounds are wrong. They need to be the ones in the robot constraints
        target_pose=BoundedTensorSpec(
            minimum=-torch.ones(pose_count) * 10000.0,
            maximum= torch.ones(pose_count) * 10000.0,
            shape=(pose_count),
            dtype=torch.float32,
        ),
        # we need to add the "params" to the observation specs, as we want
        # to pass it at each step during a rollout
        params=make_composite_from_td(td_params["params"]),
        shape=(),
    )
    # since the environment is stateless, we expect the previous output as input.
    # For this, EnvBase expects some state_spec to be available
    self.state_spec = self.observation_spec.clone()
    # action-spec will be automatically wrapped in input_spec when
    # `self.action_spec = spec` will be called supported
    #TODO: bounds are wrong
    self.action_spec = BoundedTensorSpec(
        minimum=-torch.ones(thetas_count * 2),# * 100000.0,
        maximum=+torch.ones(thetas_count * 2),# * 100000.0,
        shape=(thetas_count, thetas_count),
        dtype=torch.float32,
    )
    self.reward_spec = UnboundedContinuousTensorSpec(shape=(*td_params.shape, 1))


def make_composite_from_td(td):
    # custom funtion to convert a tensordict in a similar spec structure
    # of unbounded values.
    composite = CompositeSpec(
        {
            key: make_composite_from_td(tensor)
            if isinstance(tensor, TensorDictBase)
            else UnboundedContinuousTensorSpec(
                dtype=tensor.dtype, device=tensor.device, shape=tensor.shape
            )
            for key, tensor in td.items()
        },
        shape=td.shape,
    )
    return composite

def _set_seed(self, seed: Optional[int]):
    rng = torch.manual_seed(seed)
    self.rng = rng
def gen_params(batch_size=None) -> TensorDictBase:
    if batch_size is None:
        batch_size = []
    td = TensorDict(
        {
            "params": TensorDict(
                {
                    "max_theta_deltas": torch.ones(thetas_count) * torch.pi,
                },
                [],
            )
        },
        [],
    )
    if batch_size:
        td = td.expand(batch_size).contiguous()
    return td
class InverseKinematicsEnv(EnvBase):
    metadata = {
        "render_modes": ["human", "rgb_array"],
        "render_fps": 30,
    }
    batch_locked = False

    def __init__(self, open_chain=None, td_params=None, seed=None, device="cuda:0"):
        super().__init__()
        if td_params is None:
            td_params = self.gen_params()
        self.open_chain = open_chain
        super().__init__(device=device, batch_size=[])
        self._make_spec(td_params)
        if seed is None:
            seed = torch.empty((), dtype=torch.int64).random_().item()
        self.set_seed(seed)

    # Helpers: _make_step and gen_params
    gen_params = staticmethod(gen_params)
    _make_spec = _make_spec

    # Mandatory methods: _step, _reset and _set_seed
    _reset = _reset
    _step = _step#staticmethod(_step)
    _set_seed = _set_seed
    
env = InverseKinematicsEnv(open_chain=used_open_chain)
check_env_specs(env)
#print("observation_spec:", env.observation_spec)
#print("state_spec:", env.state_spec)
#print("reward_spec:", env.reward_spec)
td = env.reset()
#print("reset tensordict", td)
td = env.rand_step(td)
#print("random step tensordict", td)
def get_pose_and_error_pose(thetas, target_pose, open_chain):
    open_chain = open_chain.to(thetas.device)
    error_pose = compute_error_pose(open_chain, thetas, target_pose)
    transformation = open_chain.forward_transformation(thetas)
    pose = transforms.se3_log_map(transformation.get_matrix())
    return pose, error_pose

len(theta_deltas.shape) 1
theta_deltas torch.Size([]) torch.Size([]) torch.Size([])
len(theta_deltas.shape) 1
theta_deltas torch.Size([]) torch.Size([]) torch.Size([])
len(theta_deltas.shape) 1
theta_deltas torch.Size([]) torch.Size([]) torch.Size([])
check_env_specs succeeded!
len(theta_deltas.shape) 1
theta_deltas torch.Size([]) torch.Size([]) torch.Size([])


In [56]:
transformed_env = TransformedEnv(env)
check_env_specs(transformed_env)
def simple_rollout(steps=100):
    # preallocate:
    data = TensorDict({}, [steps])
    # reset
    _data = transformed_env.reset()
    for i in range(steps):
        _data["action"] = transformed_env.action_spec.rand()
        _data = transformed_env.step(_data)
        data[i] = _data
        _data = step_mdp(_data, keep_other=True)
    return data


#print("data from rollout:", simple_rollout(100))

len(theta_deltas.shape) 1
theta_deltas torch.Size([]) torch.Size([]) torch.Size([])
len(theta_deltas.shape) 1
theta_deltas torch.Size([]) torch.Size([]) torch.Size([])
len(theta_deltas.shape) 1
theta_deltas torch.Size([]) torch.Size([]) torch.Size([])
check_env_specs succeeded!


In [57]:
#batch_size = 10  # number of environments to be executed in batch
#td = transformed_env.reset(transformed_env.gen_params(batch_size=[batch_size]))
#print(f"reset (batch size of {batch_size})", td)
#td = transformed_env.rand_step(td)
#print(f"rand step (batch size of {batch_size})", td)

In [58]:
rollout = transformed_env.rollout(
    3,
    auto_reset=False,  # we're executing the reset out of the ``rollout`` call
    tensordict=transformed_env.reset(transformed_env.gen_params(batch_size=[batch_size])),
)
#print("rollout of len 3 (batch size of 10):", rollout)

len(theta_deltas.shape) 2
theta_deltas torch.Size([12]) torch.Size([12]) torch.Size([12])


RuntimeError: The size of tensor a (6) must match the size of tensor b (12) at non-singleton dimension 1

In [59]:
torch.manual_seed(0)
transformed_env.set_seed(0)

net = nn.Sequential(
    nn.Linear(in_features=1, out_features=4, bias=True),
    nn.Tanh(),
    nn.Linear(in_features=4, out_features=thetas_count, bias=True)
).cuda()

import torch.nn.functional as F

class InverseKinematicsActor(nn.Module):
    def __init__(
        self, open_chain, weights=128
    ):
        super(InverseKinematicsActor, self).__init__()        
        self.open_chain = open_chain
        thetas_count = used_open_chain.screws.shape[0]
        self.fc1 = nn.Linear(in_features=on_manifold_count, out_features=weights, bias=True)
        self.fc2 = nn.Linear(in_features=weights+on_manifold_count, out_features=weights, bias=True)
        self.fc3 = nn.Linear(in_features=weights+on_manifold_count, out_features=weights, bias=True)
        self.fc4 = nn.Linear(in_features=weights+on_manifold_count, out_features=weights, bias=True)
        self.fc5 = nn.Linear(in_features=weights+on_manifold_count, out_features=weights, bias=True)
        self.fc6 = nn.Linear(in_features=weights+on_manifold_count, out_features=weights, bias=True)
        self.fc_cos = nn.Linear(in_features=weights+on_manifold_count, out_features=thetas_count, bias=True)
        self.fc_sin = nn.Linear(in_features=weights+on_manifold_count, out_features=thetas_count, bias=True)

    def forward(self, thetas_sin, thetas_cos, target_pose):
        thetas = torch.atan2(thetas_sin, thetas_cos)
        pose, error_pose = self.open_chain.compute_pose_and_error_pose(thetas, target_pose)
        # pose decomposition
        pose_linear =  pose[:, :3]
        pose_angular_cos =  pose[:, 3:].cos()
        pose_angular_sin =  pose[:, 3:].sin()
        # error pose decomposition 
        error_pose_linear =  error_pose[:, :3]
        error_pose_angular_cos =  error_pose[:, 3:].cos()
        error_pose_angular_sin =  error_pose[:, 3:].sin()
        # manifold error with all information
        manifold_error = torch.cat([pose_linear, pose_angular_cos , pose_angular_sin, error_pose_linear, error_pose_angular_cos, error_pose_angular_sin, thetas_cos, thetas_sin], 1)      
        x = torch.cat([F.tanh(self.fc1(manifold_error)), manifold_error],1)
        x = torch.cat([F.tanh(self.fc2(x)), manifold_error],1)
        x = torch.cat([F.tanh(self.fc3(x)), manifold_error],1)
        x = torch.cat([F.tanh(self.fc4(x)), manifold_error],1)
        x = torch.cat([F.tanh(self.fc5(x)), manifold_error],1)
        x = torch.cat([F.tanh(self.fc6(x)), manifold_error],1)
        cos = self.fc_cos(x).cos()
        sin = self.fc_sin(x).sin()
        return torch.cat([sin.unsqueeze(2), cos.unsqueeze(2)], 2)
        
used_open_chain = used_open_chain.to("cuda:0")
net = InverseKinematicsActor(used_open_chain).cuda()


In [None]:
policy = TensorDictModule(
    net,
    in_keys=["thetas_sin", "thetas_cos", "target_pose"],#["on_manifold_error"], #["target_pose"],
    out_keys=["action"],
).cuda()
optim = torch.optim.Adam(policy.parameters(), lr=2e-5)
batch_size = 1024
iterations = 20000_000
pbar = tqdm.tqdm(range(iterations // batch_size))
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, iterations)
logs = defaultdict(list)

# current_env = env 
current_env = transformed_env.cuda()

for _ in pbar:
    init_td = current_env.reset(current_env.gen_params(batch_size=[batch_size])).cuda()
    rollout = current_env.rollout(5, policy, tensordict=init_td, auto_reset=False).cuda()
    #print(rollout)
    traj_return = rollout["next", "reward"].mean()
    (-traj_return).backward()
    gn = torch.nn.utils.clip_grad_norm_(net.parameters(), 1.0)
    optim.step()
    optim.zero_grad()
    pbar.set_description(
        f"reward: {traj_return: 4.4f}, "
        f"last reward: {rollout[..., -1]['next', 'reward'].mean(): 4.4f} / STD {rollout[..., -1]['next', 'reward'].std():4.4} / MIN {rollout[..., -1]['next', 'reward'].min():4.4} , gradient norm: {gn: 4.4}"        
    )
    logs["return"].append(traj_return.item())
    logs["last_reward"].append(rollout[..., -1]["next", "reward"].mean().item())
    scheduler.step()


def plot():
    import matplotlib
    from matplotlib import pyplot as plt

    is_ipython = "inline" in matplotlib.get_backend()
    if is_ipython:
        from IPython import display

    with plt.ion():
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.plot(logs["return"])
        plt.title("returns")
        plt.xlabel("iteration")
        plt.subplot(1, 2, 2)
        plt.plot(logs["last_reward"])
        plt.title("last reward")
        plt.xlabel("iteration")
        if is_ipython:
            display.display(plt.gcf())
            display.clear_output(wait=True)
        plt.show()


plot()


  0%|                                                                                                                                                                                                 | 0/19531 [00:00<?, ?it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.8377, last reward: -4.6453 / STD 2.195 / MIN -10.97 , gradient norm:  5.332:   0%|                                                                                               | 1/19531 [00:00<1:23:22,  3.90it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.8593, last reward: -4.6010 / STD 2.145 / MIN -10.91 , gradient norm:  5.938:   0%|                                                                                               | 2/19531 [00:00<1:25:46,  3.79it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.8026, last reward: -4.6185 / STD 2.153 / MIN -11.16 , gradient norm:  5.644:   0%|                                                                                               | 3/19531 [00:00<1:23:03,  3.92it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.8070, last reward: -4.5987 / STD 2.137 / MIN -10.8 , gradient norm:  6.135:   0%|                                                                                                | 4/19531 [00:01<1:24:55,  3.83it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.7492, last reward: -4.5503 / STD 2.197 / MIN -12.03 , gradient norm:  5.771:   0%|                                                                                               | 6/19531 [00:01<1:21:26,  4.00it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -2.7162, last reward: -4.5048 / STD 2.181 / MIN -12.16 , gradient norm:  6.048:   0%|                                                                                               | 7/19531 [00:01<1:26:31,  3.76it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.7134, last reward: -4.5162 / STD 2.217 / MIN -11.42 , gradient norm:  5.489:   0%|                                                                                               | 8/19531 [00:02<1:26:21,  3.77it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.7323, last reward: -4.4918 / STD 2.168 / MIN -11.57 , gradient norm:  6.182:   0%|                                                                                               | 9/19531 [00:02<1:25:12,  3.82it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6893, last reward: -4.4881 / STD 2.295 / MIN -12.26 , gradient norm:  5.449:   0%|                                                                                              | 10/19531 [00:02<1:25:52,  3.79it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6751, last reward: -4.4286 / STD 2.149 / MIN -11.74 , gradient norm:  5.77:   0%|                                                                                               | 11/19531 [00:02<1:29:03,  3.65it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6891, last reward: -4.4296 / STD 2.106 / MIN -11.6 , gradient norm:  5.684:   0%|                                                                                               | 12/19531 [00:03<1:27:39,  3.71it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6531, last reward: -4.3830 / STD 2.164 / MIN -11.15 , gradient norm:  5.552:   0%|                                                                                              | 13/19531 [00:03<1:32:55,  3.50it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6729, last reward: -4.3955 / STD 2.137 / MIN -11.79 , gradient norm:  5.933:   0%|                                                                                              | 14/19531 [00:03<1:29:25,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6579, last reward: -4.4148 / STD 2.172 / MIN -11.46 , gradient norm:  5.516:   0%|                                                                                              | 15/19531 [00:04<1:29:53,  3.62it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6623, last reward: -4.3644 / STD 2.09 / MIN -11.02 , gradient norm:  5.361:   0%|                                                                                               | 16/19531 [00:04<1:27:53,  3.70it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6017, last reward: -4.3042 / STD 2.205 / MIN -11.69 , gradient norm:  5.666:   0%|                                                                                              | 18/19531 [00:04<1:24:52,  3.83it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6528, last reward: -4.3574 / STD 2.111 / MIN -12.95 , gradient norm:  5.582:   0%|                                                                                              | 19/19531 [00:05<1:22:30,  3.94it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6529, last reward: -4.4034 / STD 2.244 / MIN -11.53 , gradient norm:  5.343:   0%|                                                                                              | 20/19531 [00:05<1:22:36,  3.94it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.6347, last reward: -4.3407 / STD 2.18 / MIN -11.12 , gradient norm:  5.088:   0%|                                                                                               | 21/19531 [00:05<1:19:47,  4.08it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.5956, last reward: -4.3937 / STD 2.261 / MIN -12.31 , gradient norm:  5.302:   0%|                                                                                              | 22/19531 [00:05<1:17:19,  4.20it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.5982, last reward: -4.3851 / STD 2.232 / MIN -11.39 , gradient norm:  5.309:   0%|                                                                                              | 23/19531 [00:05<1:15:18,  4.32it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.5301, last reward: -4.2330 / STD 2.171 / MIN -11.22 , gradient norm:  5.411:   0%|                                                                                              | 24/19531 [00:06<1:13:55,  4.40it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -2.5726, last reward: -4.3115 / STD 2.219 / MIN -11.81 , gradient norm:  5.382:   0%|▏                                                                                             | 26/19531 [00:06<1:17:33,  4.19it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.5170, last reward: -4.2396 / STD 2.186 / MIN -11.34 , gradient norm:  5.537:   0%|▏                                                                                             | 27/19531 [00:06<1:16:54,  4.23it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.4399, last reward: -4.1368 / STD 2.19 / MIN -10.92 , gradient norm:  5.539:   0%|▏                                                                                              | 28/19531 [00:07<1:15:11,  4.32it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.4289, last reward: -4.0819 / STD 2.14 / MIN -11.11 , gradient norm:  5.382:   0%|▏                                                                                              | 29/19531 [00:07<1:17:07,  4.21it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.4685, last reward: -4.1483 / STD 2.137 / MIN -11.48 , gradient norm:  5.493:   0%|▏                                                                                             | 30/19531 [00:07<1:15:35,  4.30it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -2.4476, last reward: -4.1318 / STD 2.123 / MIN -11.37 , gradient norm:  5.402:   0%|▏                                                                                             | 31/19531 [00:07<1:20:19,  4.05it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.4454, last reward: -4.1331 / STD 2.231 / MIN -11.97 , gradient norm:  5.137:   0%|▏                                                                                             | 32/19531 [00:08<1:20:37,  4.03it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.4125, last reward: -4.1499 / STD 2.205 / MIN -11.31 , gradient norm:  5.472:   0%|▏                                                                                             | 33/19531 [00:08<1:22:27,  3.94it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3526, last reward: -4.0074 / STD 2.111 / MIN -10.64 , gradient norm:  5.243:   0%|▏                                                                                             | 34/19531 [00:08<1:25:25,  3.80it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3962, last reward: -4.0345 / STD 2.127 / MIN -11.74 , gradient norm:  5.496:   0%|▏                                                                                             | 35/19531 [00:08<1:27:09,  3.73it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3761, last reward: -4.0379 / STD 2.22 / MIN -11.95 , gradient norm:  5.45:   0%|▏                                                                                               | 36/19531 [00:09<1:25:04,  3.82it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3620, last reward: -3.9722 / STD 2.118 / MIN -11.62 , gradient norm:  5.269:   0%|▏                                                                                             | 38/19531 [00:09<1:24:51,  3.83it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3428, last reward: -3.9342 / STD 2.111 / MIN -10.54 , gradient norm:  5.029:   0%|▏                                                                                             | 39/19531 [00:09<1:22:00,  3.96it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3217, last reward: -3.9146 / STD 2.087 / MIN -11.29 , gradient norm:  4.904:   0%|▏                                                                                             | 39/19531 [00:10<1:22:00,  3.96it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3266, last reward: -3.9273 / STD 2.127 / MIN -11.72 , gradient norm:  5.247:   0%|▏                                                                                             | 40/19531 [00:10<1:24:27,  3.85it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.3266, last reward: -3.9273 / STD 2.127 / MIN -11.72 , gradient norm:  5.247:   0%|▏                                                                                             | 41/19531 [00:10<1:22:46,  3.92it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2518, last reward: -3.8564 / STD 2.157 / MIN -10.89 , gradient norm:  4.964:   0%|▏                                                                                             | 42/19531 [00:10<1:21:58,  3.96it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2703, last reward: -3.8131 / STD 2.142 / MIN -12.17 , gradient norm:  5.103:   0%|▏                                                                                             | 43/19531 [00:10<1:21:45,  3.97it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2778, last reward: -3.9064 / STD 2.07 / MIN -10.4 , gradient norm:  5.007:   0%|▏                                                                                               | 44/19531 [00:11<1:22:34,  3.93it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2299, last reward: -3.8153 / STD 2.132 / MIN -11.44 , gradient norm:  4.991:   0%|▏                                                                                             | 45/19531 [00:11<1:21:50,  3.97it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2160, last reward: -3.7910 / STD 2.115 / MIN -10.94 , gradient norm:  5.16:   0%|▏                                                                                              | 46/19531 [00:12<1:24:48,  3.83it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2037, last reward: -3.7447 / STD 2.117 / MIN -11.32 , gradient norm:  5.132:   0%|▏                                                                                             | 47/19531 [00:12<1:26:54,  3.74it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2037, last reward: -3.7447 / STD 2.117 / MIN -11.32 , gradient norm:  5.132:   0%|▏                                                                                             | 48/19531 [00:12<1:24:40,  3.83it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.1958, last reward: -3.8236 / STD 2.143 / MIN -10.66 , gradient norm:  4.787:   0%|▏                                                                                             | 49/19531 [00:12<1:23:17,  3.90it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.1380, last reward: -3.6436 / STD 2.066 / MIN -11.49 , gradient norm:  5.268:   0%|▏                                                                                             | 51/19531 [00:13<1:19:59,  4.06it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.2025, last reward: -3.7755 / STD 2.136 / MIN -11.28 , gradient norm:  4.993:   0%|▎                                                                                             | 52/19531 [00:13<1:16:50,  4.23it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -2.2050, last reward: -3.7447 / STD 2.09 / MIN -11.98 , gradient norm:  5.135:   0%|▎                                                                                              | 53/19531 [00:13<1:21:25,  3.99it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.1756, last reward: -3.7742 / STD 2.179 / MIN -11.12 , gradient norm:  5.146:   0%|▎                                                                                             | 54/19531 [00:13<1:25:30,  3.80it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.1303, last reward: -3.6616 / STD 2.067 / MIN -10.48 , gradient norm:  4.839:   0%|▎                                                                                             | 55/19531 [00:14<1:26:36,  3.75it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.1156, last reward: -3.6319 / STD 2.106 / MIN -11.29 , gradient norm:  5.005:   0%|▎                                                                                             | 56/19531 [00:14<1:29:33,  3.62it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.0906, last reward: -3.5899 / STD 2.044 / MIN -11.17 , gradient norm:  4.962:   0%|▎                                                                                             | 57/19531 [00:14<1:29:47,  3.61it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.0963, last reward: -3.6115 / STD 2.079 / MIN -10.56 , gradient norm:  5.049:   0%|▎                                                                                             | 58/19531 [00:14<1:31:27,  3.55it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.0388, last reward: -3.4591 / STD 2.01 / MIN -11.17 , gradient norm:  4.626:   0%|▎                                                                                              | 59/19531 [00:15<1:30:07,  3.60it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.0613, last reward: -3.5192 / STD 2.018 / MIN -10.71 , gradient norm:  5.113:   0%|▎                                                                                             | 60/19531 [00:15<1:27:44,  3.70it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.0096, last reward: -3.4855 / STD 2.019 / MIN -11.6 , gradient norm:  4.737:   0%|▎                                                                                              | 61/19531 [00:15<1:35:42,  3.39it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.0781, last reward: -3.6140 / STD 2.122 / MIN -10.41 , gradient norm:  4.89:   0%|▎                                                                                              | 63/19531 [00:16<1:35:00,  3.42it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -2.0199, last reward: -3.4407 / STD 1.934 / MIN -10.81 , gradient norm:  5.217:   0%|▎                                                                                             | 64/19531 [00:16<1:29:35,  3.62it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -1.9631, last reward: -3.3850 / STD 1.98 / MIN -11.07 , gradient norm:  5.038:   0%|▎                                                                                              | 66/19531 [00:17<1:23:20,  3.89it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.9315, last reward: -3.2796 / STD 1.896 / MIN -11.57 , gradient norm:  5.08:   0%|▎                                                                                              | 67/19531 [00:17<1:20:25,  4.03it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -1.9625, last reward: -3.3434 / STD 1.909 / MIN -10.42 , gradient norm:  5.115:   0%|▎                                                                                             | 68/19531 [00:17<1:26:33,  3.75it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.9585, last reward: -3.3559 / STD 1.979 / MIN -10.31 , gradient norm:  4.871:   0%|▎                                                                                             | 69/19531 [00:17<1:25:03,  3.81it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.9380, last reward: -3.2906 / STD 1.934 / MIN -12.25 , gradient norm:  4.979:   0%|▎                                                                                             | 70/19531 [00:18<1:25:33,  3.79it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.9164, last reward: -3.3248 / STD 1.89 / MIN -10.62 , gradient norm:  4.848:   0%|▎                                                                                              | 71/19531 [00:18<1:24:45,  3.83it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.9214, last reward: -3.3205 / STD 2.012 / MIN -11.18 , gradient norm:  4.75:   0%|▎                                                                                              | 72/19531 [00:18<1:23:15,  3.90it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.8731, last reward: -3.2314 / STD 1.929 / MIN -10.75 , gradient norm:  5.147:   0%|▎                                                                                             | 73/19531 [00:19<1:27:29,  3.71it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.8458, last reward: -3.1944 / STD 1.893 / MIN -12.6 , gradient norm:  4.805:   0%|▎                                                                                              | 74/19531 [00:19<1:27:26,  3.71it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.8288, last reward: -3.1571 / STD 1.876 / MIN -12.53 , gradient norm:  4.913:   0%|▎                                                                                             | 75/19531 [00:19<1:30:06,  3.60it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.8288, last reward: -3.1571 / STD 1.876 / MIN -12.53 , gradient norm:  4.913:   0%|▎                                                                                             | 76/19531 [00:19<1:30:24,  3.59it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.8311, last reward: -3.1194 / STD 1.792 / MIN -10.98 , gradient norm:  4.992:   0%|▎                                                                                             | 77/19531 [00:20<1:29:33,  3.62it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.8156, last reward: -3.1023 / STD 1.806 / MIN -10.65 , gradient norm:  4.85:   0%|▍                                                                                              | 78/19531 [00:20<1:29:09,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.8010, last reward: -3.1127 / STD 1.842 / MIN -10.08 , gradient norm:  5.074:   0%|▍                                                                                             | 79/19531 [00:20<1:29:00,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.7669, last reward: -3.0877 / STD 1.913 / MIN -11.38 , gradient norm:  4.801:   0%|▍                                                                                             | 80/19531 [00:20<1:28:00,  3.68it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.7566, last reward: -3.0248 / STD 1.787 / MIN -11.93 , gradient norm:  5.024:   0%|▍                                                                                             | 81/19531 [00:21<1:29:30,  3.62it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.7760, last reward: -3.0520 / STD 1.796 / MIN -10.98 , gradient norm:  5.205:   0%|▍                                                                                             | 83/19531 [00:21<1:29:37,  3.62it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -1.6903, last reward: -2.9291 / STD 1.808 / MIN -11.62 , gradient norm:  4.627:   0%|▍                                                                                             | 85/19531 [00:22<1:25:31,  3.79it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -1.6660, last reward: -2.8480 / STD 1.705 / MIN -10.18 , gradient norm:  4.901:   0%|▍                                                                                             | 87/19531 [00:22<1:22:56,  3.91it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.7131, last reward: -2.9679 / STD 1.851 / MIN -10.3 , gradient norm:  5.094:   0%|▍                                                                                              | 88/19531 [00:23<1:24:02,  3.86it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.6437, last reward: -2.8485 / STD 1.734 / MIN -11.25 , gradient norm:  5.179:   0%|▍                                                                                             | 89/19531 [00:23<1:23:11,  3.90it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -1.6371, last reward: -2.7900 / STD 1.662 / MIN -10.79 , gradient norm:  5.019:   0%|▍                                                                                             | 90/19531 [00:23<1:24:33,  3.83it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.6340, last reward: -2.7944 / STD 1.618 / MIN -11.35 , gradient norm:  5.187:   0%|▍                                                                                             | 91/19531 [00:23<1:26:52,  3.73it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.6140, last reward: -2.7799 / STD 1.63 / MIN -9.984 , gradient norm:  5.157:   0%|▍                                                                                              | 92/19531 [00:24<1:28:56,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.6074, last reward: -2.7648 / STD 1.656 / MIN -10.61 , gradient norm:  5.159:   0%|▍                                                                                             | 93/19531 [00:24<1:28:01,  3.68it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.5903, last reward: -2.7263 / STD 1.71 / MIN -10.03 , gradient norm:  4.872:   0%|▍                                                                                              | 94/19531 [00:24<1:25:18,  3.80it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.6061, last reward: -2.7682 / STD 1.643 / MIN -10.3 , gradient norm:  4.992:   0%|▍                                                                                              | 95/19531 [00:24<1:23:48,  3.87it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.5690, last reward: -2.6956 / STD 1.638 / MIN -9.965 , gradient norm:  4.81:   0%|▍                                                                                              | 96/19531 [00:25<1:27:21,  3.71it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.5786, last reward: -2.6953 / STD 1.606 / MIN -9.734 , gradient norm:  5.02:   0%|▍                                                                                              | 97/19531 [00:25<1:27:25,  3.70it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.5404, last reward: -2.6093 / STD 1.478 / MIN -9.476 , gradient norm:  5.167:   1%|▍                                                                                             | 99/19531 [00:26<1:25:48,  3.77it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -1.5090, last reward: -2.5195 / STD 1.461 / MIN -11.27 , gradient norm:  5.029:   1%|▍                                                                                            | 100/19531 [00:26<1:28:03,  3.68it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.5434, last reward: -2.6538 / STD 1.566 / MIN -9.42 , gradient norm:  5.185:   1%|▍                                                                                             | 101/19531 [00:26<1:30:14,  3.59it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.5033, last reward: -2.5683 / STD 1.484 / MIN -9.799 , gradient norm:  4.984:   1%|▍                                                                                            | 102/19531 [00:26<1:31:21,  3.54it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.5011, last reward: -2.5372 / STD 1.503 / MIN -9.877 , gradient norm:  5.154:   1%|▍                                                                                            | 103/19531 [00:27<1:28:24,  3.66it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.4893, last reward: -2.5244 / STD 1.499 / MIN -9.993 , gradient norm:  4.971:   1%|▍                                                                                            | 104/19531 [00:27<1:26:38,  3.74it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.4947, last reward: -2.5544 / STD 1.529 / MIN -9.957 , gradient norm:  5.276:   1%|▍                                                                                            | 105/19531 [00:27<1:26:47,  3.73it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.4302, last reward: -2.4118 / STD 1.424 / MIN -9.329 , gradient norm:  5.17:   1%|▌                                                                                             | 107/19531 [00:28<1:28:58,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3899, last reward: -2.3431 / STD 1.337 / MIN -9.479 , gradient norm:  4.957:   1%|▌                                                                                            | 108/19531 [00:28<1:24:56,  3.81it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.4307, last reward: -2.4184 / STD 1.412 / MIN -9.782 , gradient norm:  4.958:   1%|▌                                                                                            | 109/19531 [00:28<1:20:38,  4.01it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.4153, last reward: -2.4069 / STD 1.403 / MIN -11.42 , gradient norm:  5.024:   1%|▌                                                                                            | 110/19531 [00:28<1:17:14,  4.19it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024

reward: -1.3588, last reward: -2.2476 / STD 1.24 / MIN -9.741 , gradient norm:  4.843:   1%|▌                                                                                             | 111/19531 [00:29<1:23:01,  3.90it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3246, last reward: -2.1927 / STD 1.155 / MIN -11.01 , gradient norm:  5.153:   1%|▌                                                                                            | 112/19531 [00:29<1:22:46,  3.91it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3403, last reward: -2.2311 / STD 1.206 / MIN -9.784 , gradient norm:  5.278:   1%|▌                                                                                            | 113/19531 [00:29<1:26:01,  3.76it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3186, last reward: -2.1940 / STD 1.159 / MIN -9.722 , gradient norm:  4.971:   1%|▌                                                                                            | 114/19531 [00:29<1:25:22,  3.79it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3361, last reward: -2.2523 / STD 1.303 / MIN -9.026 , gradient norm:  5.144:   1%|▌                                                                                            | 115/19531 [00:30<1:27:55,  3.68it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3356, last reward: -2.2407 / STD 1.264 / MIN -9.39 , gradient norm:  5.001:   1%|▌                                                                                             | 116/19531 [00:30<1:27:31,  3.70it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3206, last reward: -2.1900 / STD 1.158 / MIN -8.954 , gradient norm:  4.91:   1%|▌                                                                                             | 117/19531 [00:30<1:28:53,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.3090, last reward: -2.1824 / STD 1.184 / MIN -9.219 , gradient norm:  4.903:   1%|▌                                                                                            | 118/19531 [00:31<1:30:04,  3.59it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2698, last reward: -2.1363 / STD 1.247 / MIN -9.216 , gradient norm:  4.985:   1%|▌                                                                                            | 119/19531 [00:31<1:26:53,  3.72it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2483, last reward: -2.0860 / STD 1.134 / MIN -8.683 , gradient norm:  5.008:   1%|▌                                                                                            | 120/19531 [00:31<1:28:45,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2788, last reward: -2.1536 / STD 1.269 / MIN -9.898 , gradient norm:  4.904:   1%|▌                                                                                            | 121/19531 [00:31<1:26:36,  3.73it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2375, last reward: -2.1019 / STD 1.222 / MIN -8.878 , gradient norm:  4.659:   1%|▌                                                                                            | 122/19531 [00:32<1:25:33,  3.78it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2377, last reward: -2.0686 / STD 1.124 / MIN -9.719 , gradient norm:  4.643:   1%|▌                                                                                            | 123/19531 [00:32<1:25:50,  3.77it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2377, last reward: -2.0686 / STD 1.124 / MIN -9.719 , gradient norm:  4.643:   1%|▌                                                                                            | 124/19531 [00:32<1:27:37,  3.69it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2016, last reward: -1.9947 / STD 1.066 / MIN -9.572 , gradient norm:  4.849:   1%|▌                                                                                            | 125/19531 [00:32<1:28:06,  3.67it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.2117, last reward: -2.0204 / STD 1.133 / MIN -10.23 , gradient norm:  4.893:   1%|▌                                                                                            | 126/19531 [00:33<1:29:39,  3.61it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.1842, last reward: -1.9762 / STD 1.065 / MIN -9.342 , gradient norm:  4.695:   1%|▌                                                                                            | 127/19531 [00:33<1:31:05,  3.55it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.1850, last reward: -1.9688 / STD 0.9692 / MIN -8.734 , gradient norm:  4.91:   1%|▌                                                                                            | 128/19531 [00:33<1:41:04,  3.20it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.1364, last reward: -1.8830 / STD 1.026 / MIN -9.182 , gradient norm:  4.709:   1%|▌                                                                                            | 129/19531 [00:34<1:35:19,  3.39it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.1723, last reward: -1.9576 / STD 1.095 / MIN -8.893 , gradient norm:  4.58:   1%|▋                                                                                             | 130/19531 [00:34<1:31:05,  3.55it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.1381, last reward: -1.8974 / STD 1.019 / MIN -8.524 , gradient norm:  4.487:   1%|▌                                                                                            | 131/19531 [00:34<1:29:56,  3.59it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.1261, last reward: -1.8600 / STD 0.9918 / MIN -10.0 , gradient norm:  4.588:   1%|▋                                                                                            | 132/19531 [00:35<1:31:42,  3.53it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.1324, last reward: -1.8791 / STD 1.015 / MIN -8.285 , gradient norm:  4.768:   1%|▋                                                                                            | 133/19531 [00:35<1:28:54,  3.64it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.0886, last reward: -1.7987 / STD 0.9072 / MIN -9.931 , gradient norm:  4.623:   1%|▋                                                                                           | 134/19531 [00:35<1:29:30,  3.61it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.0929, last reward: -1.8147 / STD 0.9745 / MIN -8.963 , gradient norm:  4.538:   1%|▋                                                                                           | 135/19531 [00:35<1:30:27,  3.57it/s]

len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])
len(theta_deltas.shape) 3
theta_deltas torch.Size([1024, 6]) torch.Size([1024, 6]) torch.Size([1024, 6])


reward: -1.0889, last reward: -1.7950 / STD 0.9053 / MIN -9.207 , gradient norm:  4.549:   1%|▋                                                                                           | 136/19531 [00:36<1:28:05,  3.67it/s]