In [11]:
import numpy as np
import pandas as pd
from typing import Optional, Tuple
import sys

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

from pickle import dump
from sklearn.preprocessing import MinMaxScaler
import time
from tqdm import tqdm

import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    print("cuda is available")
else:
    print("cuda is NOT available")


import shutil
import warnings
import pickle

warnings.filterwarnings("ignore")
import logging

logging.disable(logging.CRITICAL)

from nn_functions import surrogate
from moving_average import moving_average_1d
import copy
from GAMMA_obj_temp_depth import GAMMA_obj

import sys
sys.path.append('../1_model')
from TiDE import TideModule, quantile_loss, TiDE_forward


cuda is available


In [12]:
INPUT_DATA_DIR = "data"
SIM_DIR_NAME = "single_track_square"
BASE_LASER_FILE_DIR = "laser_power_profiles/csv"
CLOUD_TARGET_BASE_PATH = "result"
solidus_temp = 1600
window = 50
sim_interval = 5
init_runs = 60 #50 

GAMMA_class = GAMMA_obj(INPUT_DATA_DIR, SIM_DIR_NAME, BASE_LASER_FILE_DIR, CLOUD_TARGET_BASE_PATH, solidus_temp, window, init_runs, sim_interval,laser_power_number=1)
init_avg = GAMMA_class.run_initial_steps()
init_avg = torch.tensor(init_avg,dtype=torch.float32)[:,-window:] # shape = [2,50]

100%|██████████| 300/300 [00:06<00:00, 45.14it/s]


In [13]:
df_one_print = pd.read_csv('single_track_ref.csv')

loc_X_list = df_one_print["X"].to_numpy().reshape(-1,1)
loc_Y_list = df_one_print["Y"].to_numpy().reshape(-1,1)
loc_Z_list = df_one_print["Z"].to_numpy().reshape(-1,1)
dist_X_list = df_one_print["Dist_to_nearest_X"].to_numpy().reshape(-1,1)
dist_Y_list = df_one_print["Dist_to_nearest_Y"].to_numpy().reshape(-1,1)
scan_spd_list = df_one_print["scanning_speed"].to_numpy().reshape(-1,1)

# laser on/off indicator
laser_on_off = df_one_print["laser_power_number"].to_numpy().reshape(-1,1)

# laser power
laser_power_ref = torch.tensor(df_one_print["Laser_power"].to_numpy().reshape(-1,1),dtype=torch.float32)
laser_power_past = laser_power_ref[:window]

# fix_covariates = torch.tensor(np.concatenate((loc_X_list,loc_Y_list,loc_Z_list,dist_X_list,dist_Y_list,scan_spd_list, laser_on_off),axis=1),dtype=torch.float32)
fix_covariates = torch.tensor(np.concatenate((loc_Z_list,dist_X_list,dist_Y_list),axis=1),dtype=torch.float32)

# temporary ref
# apply moving average for mp temp
mp_temp_raw = df_one_print["melt_pool_temperature"].to_numpy()
mp_temp_mv = moving_average_1d(mp_temp_raw,4)
mp_temp = copy.deepcopy(mp_temp_raw)
mp_temp[1:-2] = mp_temp_mv
mp_temp = mp_temp

mp_temp_ref = torch.tensor(mp_temp,dtype=torch.float32)

In [14]:
print("laser_power_ref shape:", laser_power_ref.size())
print("laser_power_ref min:", torch.min(laser_power_ref).item())
print("laser_power_ref max:", torch.max(laser_power_ref).item())

print("fix_covariates shape:", fix_covariates.size())

col_min = torch.min(fix_covariates, dim=0).values
col_max = torch.max(fix_covariates, dim=0).values

print("fix_covariates column-wise min:", col_min.cpu().numpy())
print("fix_covariates column-wise max:", col_max.cpu().numpy())

nan_indices = torch.nonzero(torch.isnan(mp_temp_ref), as_tuple=True)[0]
print("NaN indices:", nan_indices)

valid_values = mp_temp_ref[~torch.isnan(mp_temp_ref)]

print("mp_temp_ref (valid) min:", valid_values.min().item())
print("mp_temp_ref (valid) max:", valid_values.max().item())



laser_power_ref shape: torch.Size([6295, 1])
laser_power_ref min: 531.5235595703125
laser_power_ref max: 600.7783203125
fix_covariates shape: torch.Size([6295, 3])
fix_covariates column-wise min: [0.   0.75 0.75]
fix_covariates column-wise max: [ 7.5 20.  20. ]
NaN indices: tensor([6292, 6294])
mp_temp_ref (valid) min: 436.1014099121094
mp_temp_ref (valid) max: 3834.751220703125


In [15]:
import torch

# values from user
x_min = torch.tensor([[0.0, 0.75, 0.75, 504.26]], dtype=torch.float32).to(device)
x_max = torch.tensor([[7.5, 20.0, 20.0, 732.298]], dtype=torch.float32).to(device)

y_min = torch.tensor([[436.608, -0.559]], dtype=torch.float32).to(device)
y_max = torch.tensor([[4509.855, 0.551]], dtype=torch.float32).to(device)


In [16]:
def normalize_x(x, dim_id):
    x_min_selected = x_min[0, dim_id]
    x_max_selected = x_max[0, dim_id]
    return 2 * (x - x_min_selected) / (x_max_selected - x_min_selected) - 1

def inverse_normalize_x(x_norm, dim_id):
    x_min_selected = x_min[0, dim_id]
    x_max_selected = x_max[0, dim_id]
    return 0.5 * (x_norm + 1) * (x_max_selected - x_min_selected) + x_min_selected

def normalize_y(y, dim_id):
    y_min_selected = y_min[0, dim_id]
    y_max_selected = y_max[0, dim_id]
    return 2 * (y - y_min_selected) / (y_max_selected - y_min_selected) - 1

def inverse_normalize_y(y_norm, dim_id):
    y_min_selected = y_min[0, dim_id]
    y_max_selected = y_max[0, dim_id]
    return 0.5 * (y_norm + 1) * (y_max_selected - y_min_selected) + y_min_selected


In [17]:
def run_one_step_policy(GAMMA_obj, policy_model, P, window):
    # Reference trajectory for temperature (original scale)
    mp_temp_ref = GAMMA_obj.ref[GAMMA_obj.MPC_counter:GAMMA_obj.MPC_counter + P]  # [50]

    '''
    print("mp_temp_ref shape:", mp_temp_ref.shape)
    print("mp_temp_ref:", mp_temp_ref[:,50,:])
    '''

    mp_temp_ref_t = torch.as_tensor(mp_temp_ref, dtype=torch.float32, device=device).reshape(1, P, 1)  # [1, P, 1]

    # Past input (original scale)
    mp_temp_past_t = GAMMA_obj.x_past.T.unsqueeze(0).to(device)  # [1, 50, 2]
    laser_past_t = GAMMA_obj.u_past.view(1, -1, 1).to(device)     # [1, 50, 1]

    '''
    print("mp_temp_ref_t shape:", mp_temp_ref_t.shape)  # [1, 50, 1]
    print("mp_temp_ref_t:", mp_temp_ref_t[:,50,:])
    print("laser_past_t shape:", laser_past_t.shape)  # [1, 50, 1]
    print("laser_past_t:", laser_past_t[:,50,:])
    '''

    fix_cov_past = GAMMA_obj.fix_cov_all[GAMMA_obj.MPC_counter - window:GAMMA_obj.MPC_counter, :]
    fix_cov_past_t = torch.as_tensor(fix_cov_past, dtype=torch.float32, device=device).unsqueeze(0)  # [1, 50, 3]

    # Normalize
    fix_cov_past_s = normalize_x(fix_cov_past_t, dim_id=[0, 1, 2])    # assume features 0~2 in x
    laser_past_s = normalize_x(laser_past_t, dim_id=[3])             # laser power at feature 3
    mp_temp_past_s = normalize_y(mp_temp_past_t, dim_id=[0, 1])       # temp and depth
    policy_in_past = torch.cat((fix_cov_past_s, laser_past_s, mp_temp_past_s), dim=2)  # [1, 50, 6]

    # Future covariates
    fix_cov_future = GAMMA_obj.fix_cov_all[GAMMA_obj.MPC_counter:GAMMA_obj.MPC_counter + P, :]
    fix_cov_future_t = torch.as_tensor(fix_cov_future, dtype=torch.float32, device=device).unsqueeze(0)  # [1, P, 3]
    fix_cov_future_s = normalize_x(fix_cov_future_t, dim_id=[0, 1, 2])
    mp_temp_ref_s = normalize_y(mp_temp_ref_t, dim_id=[0])[:, :, 0].unsqueeze(-1)

    # Constraints
    depth_upper_const = 0.4126
    depth_lower_const = 0.1423
    y_const_t = torch.tensor([[depth_upper_const, depth_lower_const]] * P, dtype=torch.float32, device=device).reshape(1, P, 2)
    y_const_s = normalize_y(y_const_t, dim_id=[1])  # assume dim=1 is depth

    policy_in_future = torch.cat((fix_cov_future_s, mp_temp_ref_s, y_const_s), dim=2)  # [1, P, 6]

    # Policy inference
    print("policy_in_past shape:", policy_in_past.shape)  # [1, 50, 6]
    print("policy_in_future shape:", policy_in_future.shape)  # [1, P, 6]
    print("policy_in_past_first component:", policy_in_past[0, 0, :])  # Last past input
    print("policy_in_future_first component:", policy_in_future[0, 0, :])  # First future input
    print("policy_in_past_last component:", policy_in_past[0, -1, :])  # Last past input
    print("policy_in_future_last component:", policy_in_future[0, -1, :])  # First future input

    u_pred = policy_model((policy_in_past, policy_in_future))
    print("u_pred shape:", u_pred.shape)  # [1, P, 1]
    print("u_pred:", u_pred.T)  

    u_first = u_pred[0,0]
    u_applied = float(inverse_normalize_x(u_first, dim_id=[3]))  # laser power

    # Simulate one step
    x_current, depth_current = GAMMA_obj.run_sim_interval(u_applied)

    # Update past sequence
    GAMMA_obj.x_past[:, :-1] = GAMMA_obj.x_past[:, 1:]
    GAMMA_obj.x_past[0, -1] = x_current
    GAMMA_obj.x_past[1, -1] = depth_current

    GAMMA_obj.u_past[:-1] = GAMMA_obj.u_past[1:].clone()
    GAMMA_obj.u_past[-1] = u_applied

    # Save state
    GAMMA_obj.MPC_counter += 1

    # FIXED: device-matched saving
    new_state = torch.tensor([[x_current, depth_current]], device=GAMMA_obj.x_past_save.device)
    GAMMA_obj.x_past_save = torch.cat((GAMMA_obj.x_past_save, new_state), dim=0)

    new_u = torch.tensor([[u_applied]], device=GAMMA_obj.u_past_save.device)
    GAMMA_obj.u_past_save = torch.cat((GAMMA_obj.u_past_save, new_u), dim=0)

In [18]:
import matplotlib.pyplot as plt
import numpy as np

def plot_fig(MPC_GAMMA, N_step, save_path=None):
    plt.figure(figsize=[8, 6])

    plt.subplot(3, 1, 1)
    plt.plot(MPC_GAMMA.x_past_save[:N_step, 0], label="GAMMA simulation")
    plt.plot(MPC_GAMMA.ref[:N_step], label="Reference")
    plt.legend()
    plt.xlabel("MPC time step (*** sec/iteration)")
    plt.ylabel("Melt Pool Temperature (K)")

    plt.subplot(3, 1, 2)
    plt.plot(MPC_GAMMA.x_past_save[:N_step, 1], label="GAMMA simulation")
    plt.plot(np.linspace(0, N_step, N_step), 0.225 * np.ones(N_step), linestyle='--', label="Upper Bound")
    plt.plot(np.linspace(0, N_step, N_step), 0.075 * np.ones(N_step), linestyle='--', label="Lower Bound")
    plt.xlabel("MPC time step (*** sec/iteration)")
    plt.ylabel("Melt Pool Depth (mm)")
    plt.legend()

    plt.subplot(3, 1, 3)
    plt.plot(MPC_GAMMA.u_past_save[:N_step])
    plt.ylabel("Laser Power (W)")
    plt.xlabel("MPC time step (*** sec/iteration)")
    plt.xlim(0, N_step)

    if save_path:
        plt.tight_layout()
        plt.savefig(save_path)
        plt.close()
        print(f"Saved plot to {save_path}")
    else:
        plt.tight_layout()
        plt.show()


In [19]:
print(laser_power_past.size())
print(laser_power_past)



torch.Size([50, 1])
tensor([[533.8476],
        [533.8514],
        [533.8552],
        [533.8588],
        [533.8624],
        [533.8660],
        [533.8694],
        [533.8729],
        [533.8762],
        [533.8795],
        [533.8827],
        [533.8858],
        [533.8889],
        [533.8918],
        [533.8948],
        [533.8976],
        [533.9004],
        [533.9031],
        [533.9058],
        [533.9083],
        [533.9108],
        [533.9133],
        [533.9156],
        [533.9180],
        [533.9202],
        [533.9224],
        [533.9245],
        [533.9265],
        [533.9285],
        [533.9304],
        [533.9323],
        [533.9340],
        [533.9357],
        [533.9374],
        [533.9390],
        [533.9404],
        [533.9419],
        [533.9433],
        [533.9446],
        [533.9459],
        [533.9470],
        [533.9482],
        [533.9492],
        [533.9502],
        [533.9511],
        [533.9520],
        [533.9528],
        [533.9536],
        [533.9542],


In [20]:
import importlib
import policy
importlib.reload(policy)
from policy import PolicyNN

from tqdm import tqdm
import torch

P = 50
window = 50
N_step = len(mp_temp_ref) - init_runs + P

model = PolicyNN(
    past_input_dim=6,
    future_input_dim=6,
    output_dim=1,
    p=P,
    window=window,
    hidden_dim=1024,
    n_layers=3,
    dropout_p=0.1
).to(device)

model_path = "/home/ftk3187/github/DPC_research/02_DED/2_policy/trainresults/policy_model_0722_3L_1024H_s0_c0.pth"
# model_path = "/home/ftk3187/github/DPC_research/02_DED/2_policy/trainresults/policy_model_0716_epoch100_3L_1024H_s2_c2.pth"
model.load_state_dict(torch.load(model_path, map_location="cpu"))
model.eval()

# 초기화
GAMMA_class.ref = mp_temp_ref[10:].clone()
GAMMA_class.fix_cov_all = fix_covariates[10:].clone()
GAMMA_class.x_past = init_avg[:,-50:].clone()
GAMMA_class.u_past = laser_power_past[-50:].clone()

GAMMA_class.x_hat_current = GAMMA_class.x_past[:, -1]
GAMMA_class.x_sys_current = GAMMA_class.x_past[:, -1].reshape(2, 1)

GAMMA_class.x_past_save = GAMMA_class.x_past.T.clone()
GAMMA_class.u_past_save = GAMMA_class.u_past.clone()
GAMMA_class.MPC_counter = window

# 실행 루프
for i in tqdm(range(N_step - P)):
    run_one_step_policy(GAMMA_class, model, P=P, window=window)

    # 원하는 구간마다 저장
    if i % 1000 == 0:
        print(GAMMA_class.u_past_save.size())
        print(GAMMA_class.u_past_save)
        # plot_fig(GAMMA_class, N_step)


  0%|          | 0/6235 [00:00<?, ?it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.5842, -1.0000, -0.7405, -0.0450, -0.7402], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.7154, -1.0000, -0.0175,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-8.0000e-01,  6.8942e-01, -1.0000e+00, -7.3956e-01,  4.0293e-04,
        -6.7398e-01], device='cuda:0')
policy_in_future_last component: tensor([-0.8000,  0.0109, -1.0000, -0.0167,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.2977],
         [ 0.7053],
         [ 0.9353],
         [ 1.6759],
         [ 5.2705],
         [ 2.5240],
         [ 2.9497],
         [ 4.7353],
         [ 4.6564],
         [ 4.5267],
         [-1.7325],
         [ 0.2982],
         [-0.3515],
         [-1.2579],
         [-0.9179],
         [-0.7537],
         [ 0.0094],
         [-0.0702],
         [ 6.0531],
 

  0%|          | 1/6235 [00:00<12:37,  8.23it/s]

torch.Size([51, 1])
tensor([[533.8476],
        [533.8514],
        [533.8552],
        [533.8588],
        [533.8624],
        [533.8660],
        [533.8694],
        [533.8729],
        [533.8762],
        [533.8795],
        [533.8827],
        [533.8858],
        [533.8889],
        [533.8918],
        [533.8948],
        [533.8976],
        [533.9004],
        [533.9031],
        [533.9058],
        [533.9083],
        [533.9108],
        [533.9133],
        [533.9156],
        [533.9180],
        [533.9202],
        [533.9224],
        [533.9245],
        [533.9265],
        [533.9285],
        [533.9304],
        [533.9323],
        [533.9340],
        [533.9357],
        [533.9374],
        [533.9390],
        [533.9404],
        [533.9419],
        [533.9433],
        [533.9446],
        [533.9459],
        [533.9470],
        [533.9482],
        [533.9492],
        [533.9502],
        [533.9511],
        [533.9520],
        [533.9528],
        [533.9536],
        [533.9542],


  0%|          | 2/6235 [00:00<12:21,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.5322, -1.0000, -0.7404, -0.0265, -0.7264], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.7674, -1.0000, -0.0217,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.7414, -1.0000,  0.2541,  0.1600, -0.6271], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.0410, -1.0000, -0.0157,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1712],
         [ 0.5816],
         [ 0.8497],
         [ 1.5551],
         [ 5.2043],
         [ 2.4260],
         [ 2.8833],
         [ 4.6895],
         [ 4.6761],
         [ 4.5352],
         [-1.8837],
         [ 0.1436],
         [-0.4692],
         [-1.3101],
         [-0.8903],
         [-0.6961],
         [ 0.0512],
         [-0.0501],
         [ 6.1213],
         [ 4.2626],
         [ 0.

  0%|          | 3/6235 [00:00<12:17,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.5062, -1.0000, -0.7404, -0.0294, -0.7154], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.7934, -1.0000, -0.0205,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.7674, -1.0000,  0.1712,  0.2251, -0.5719], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.0670, -1.0000, -0.0166,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1295],
         [ 0.5403],
         [ 0.8095],
         [ 1.5146],
         [ 5.1636],
         [ 2.3978],
         [ 2.8627],
         [ 4.6659],
         [ 4.6778],
         [ 4.5369],
         [-1.9653],
         [ 0.0691],
         [-0.5240],
         [-1.3340],
         [-0.8797],
         [-0.6813],
         [ 0.0552],
         [-0.0573],
         [ 6.1363],
         [ 4.2619],
         [ 0.

  0%|          | 4/6235 [00:00<12:16,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4803, -1.0000, -0.7404, -0.0286, -0.7043], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8194, -1.0000, -0.0149,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.7934, -1.0000,  0.1295,  0.2528, -0.5526], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.0930, -1.0000, -0.0192,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1409],
         [ 0.5345],
         [ 0.7992],
         [ 1.5229],
         [ 5.1764],
         [ 2.4129],
         [ 2.8625],
         [ 4.6387],
         [ 4.6496],
         [ 4.5165],
         [-2.0202],
         [ 0.0254],
         [-0.5527],
         [-1.3515],
         [-0.8945],
         [-0.7045],
         [ 0.0168],
         [-0.0932],
         [ 6.0877],
         [ 4.2121],
         [ 0.

  0%|          | 5/6235 [00:00<12:15,  8.47it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4543, -1.0000, -0.7403, -0.0291, -0.7016], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8454, -1.0000, -0.0124,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8194, -1.0000,  0.1409,  0.2620, -0.5388], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.1190, -1.0000, -0.0197,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 1.4322e-01],
         [ 5.2405e-01],
         [ 7.9838e-01],
         [ 1.5235e+00],
         [ 5.1759e+00],
         [ 2.4164e+00],
         [ 2.8583e+00],
         [ 4.6186e+00],
         [ 4.6337e+00],
         [ 4.5025e+00],
         [-2.0617e+00],
         [-4.3945e-04],
         [-5.6850e-01],
         [-1.3549e+00],
         [-8.9460e-01],
         [-7.0603e-01],
         [ 3.3724e-03],
     

  0%|          | 6/6235 [00:00<12:14,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4283, -1.0000, -0.7403, -0.0309, -0.6988], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8714, -1.0000, -0.0106,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8454, -1.0000,  0.1432,  0.2685, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.1450, -1.0000, -0.0163,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1180],
         [ 0.4973],
         [ 0.7891],
         [ 1.4882],
         [ 5.1269],
         [ 2.3880],
         [ 2.8438],
         [ 4.6017],
         [ 4.6417],
         [ 4.5045],
         [-2.0972],
         [-0.0239],
         [-0.5807],
         [-1.3421],
         [-0.8624],
         [-0.6648],
         [ 0.0363],
         [-0.1036],
         [ 6.1143],
         [ 4.2380],
         [ 0.

  0%|          | 7/6235 [00:00<12:14,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4023, -1.0000, -0.7403, -0.0262, -0.6988], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8974, -1.0000, -0.0165,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8714, -1.0000,  0.1180,  0.2666, -0.5168], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.1710, -1.0000, -0.0224,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0757],
         [ 0.4617],
         [ 0.7709],
         [ 1.4594],
         [ 5.0865],
         [ 2.3702],
         [ 2.8386],
         [ 4.5853],
         [ 4.6510],
         [ 4.5107],
         [-2.1391],
         [-0.0529],
         [-0.5945],
         [-1.3284],
         [-0.8292],
         [-0.6268],
         [ 0.0653],
         [-0.0955],
         [ 6.1474],
         [ 4.2694],
         [ 0.

  0%|          | 8/6235 [00:00<12:17,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.3763, -1.0000, -0.7403, -0.0239, -0.6988], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.9234, -1.0000, -0.0162,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8974, -1.0000,  0.0757,  0.2706, -0.5195], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.1970, -1.0000, -0.0184,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0503],
         [ 0.4378],
         [ 0.7568],
         [ 1.4485],
         [ 5.0685],
         [ 2.3658],
         [ 2.8355],
         [ 4.5710],
         [ 4.6516],
         [ 4.5112],
         [-2.1693],
         [-0.0741],
         [-0.6066],
         [-1.3258],
         [-0.8195],
         [-0.6180],
         [ 0.0660],
         [-0.1037],
         [ 6.1530],
         [ 4.2758],
         [ 0.

  0%|          | 9/6235 [00:01<12:15,  8.47it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.3503, -1.0000, -0.7402, -0.0251, -0.6988], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.9493, -1.0000, -0.0210,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.9234, -1.0000,  0.0503,  0.2821, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.2230, -1.0000, -0.0153,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0377],
         [ 0.4239],
         [ 0.7444],
         [ 1.4345],
         [ 5.0336],
         [ 2.3579],
         [ 2.8302],
         [ 4.5736],
         [ 4.6608],
         [ 4.5164],
         [-2.1880],
         [-0.0851],
         [-0.6167],
         [-1.3279],
         [-0.8168],
         [-0.6138],
         [ 0.0685],
         [-0.1108],
         [ 6.1771],
         [ 4.2978],
         [ 0.

  0%|          | 10/6235 [00:01<12:30,  8.30it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.3243, -1.0000, -0.7402, -0.0242, -0.6933], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.9753, -1.0000, -0.0236,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.9493, -1.0000,  0.0377,  0.2749, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.2490, -1.0000, -0.0121,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0198],
         [ 0.4095],
         [ 0.7332],
         [ 1.4227],
         [ 4.9939],
         [ 2.3494],
         [ 2.8273],
         [ 4.5839],
         [ 4.6770],
         [ 4.5277],
         [-2.1945],
         [-0.0848],
         [-0.6196],
         [-1.3245],
         [-0.8095],
         [-0.6036],
         [ 0.0794],
         [-0.1111],
         [ 6.2167],
         [ 4.3355],
         [ 0.

  0%|          | 11/6235 [00:01<12:39,  8.20it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.2983, -1.0000, -0.7402, -0.0228, -0.6905], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.9987, -1.0000, -0.0203,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.9753, -1.0000,  0.0198,  0.2617, -0.5113], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.2750, -1.0000, -0.0082,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 6.2291e-03],
         [ 3.9945e-01],
         [ 7.2351e-01],
         [ 1.4235e+00],
         [ 4.9624e+00],
         [ 2.3492e+00],
         [ 2.8245e+00],
         [ 4.5915e+00],
         [ 4.6868e+00],
         [ 4.5350e+00],
         [-2.1960e+00],
         [-8.1994e-02],
         [-6.2532e-01],
         [-1.3329e+00],
         [-8.2063e-01],
         [-6.1464e-01],
         [ 6.8753e-02],
     

  0%|          | 12/6235 [00:01<12:32,  8.27it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.2723, -1.0000, -0.7401, -0.0232, -0.6878], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.9727, -1.0000, -0.0197,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.9987, -1.0000,  0.0062,  0.2574, -0.5113], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3010, -1.0000, -0.0086,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-5.0621e-03],
         [ 3.9086e-01],
         [ 7.1456e-01],
         [ 1.4213e+00],
         [ 4.9267e+00],
         [ 2.3434e+00],
         [ 2.8142e+00],
         [ 4.5960e+00],
         [ 4.6968e+00],
         [ 4.5417e+00],
         [-2.1991e+00],
         [-8.2570e-02],
         [-6.3481e-01],
         [-1.3456e+00],
         [-8.3712e-01],
         [-6.2992e-01],
         [ 5.4616e-02],
     

  0%|          | 13/6235 [00:01<12:26,  8.34it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.2463, -1.0000, -0.7401, -0.0227, -0.6878], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.9467, -1.0000, -0.0155,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.9727, -1.0000, -0.0051,  0.2500, -0.5140], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3270, -1.0000, -0.0113,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0196],
         [ 0.3815],
         [ 0.7069],
         [ 1.4118],
         [ 4.8818],
         [ 2.3309],
         [ 2.8012],
         [ 4.6072],
         [ 4.7154],
         [ 4.5535],
         [-2.2012],
         [-0.0816],
         [-0.6455],
         [-1.3593],
         [-0.8504],
         [-0.6376],
         [ 0.0517],
         [-0.1554],
         [ 6.3064],
         [ 4.4240],
         [ 0.

  0%|          | 14/6235 [00:01<12:24,  8.35it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.2203, -1.0000, -0.7401, -0.0208, -0.6878], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.9207, -1.0000, -0.0158,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.9467, -1.0000, -0.0196,  0.2480, -0.5195], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3530, -1.0000, -0.0174,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0430],
         [ 0.3667],
         [ 0.6969],
         [ 1.3946],
         [ 4.8305],
         [ 2.3115],
         [ 2.7863],
         [ 4.6255],
         [ 4.7422],
         [ 4.5724],
         [-2.2030],
         [-0.0782],
         [-0.6561],
         [-1.3723],
         [-0.8605],
         [-0.6401],
         [ 0.0553],
         [-0.1593],
         [ 6.3669],
         [ 4.4777],
         [ 0.

  0%|          | 15/6235 [00:01<12:20,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1943, -1.0000, -0.7401, -0.0177, -0.6905], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8947, -1.0000, -0.0165,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.9207, -1.0000, -0.0430,  0.2605, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3789, -1.0000, -0.0135,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0499],
         [ 0.3606],
         [ 0.6892],
         [ 1.3872],
         [ 4.7818],
         [ 2.2948],
         [ 2.7648],
         [ 4.6430],
         [ 4.7628],
         [ 4.5888],
         [-2.2145],
         [-0.0770],
         [-0.6731],
         [-1.3973],
         [-0.8869],
         [-0.6636],
         [ 0.0376],
         [-0.1810],
         [ 6.4160],
         [ 4.5174],
         [ 0.

  0%|          | 16/6235 [00:01<12:25,  8.34it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1683, -1.0000, -0.7400, -0.0143, -0.6933], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8687, -1.0000, -0.0175,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8947, -1.0000, -0.0499,  0.2582, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.4049, -1.0000, -0.0090,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0521],
         [ 0.3530],
         [ 0.6795],
         [ 1.3791],
         [ 4.7339],
         [ 2.2752],
         [ 2.7400],
         [ 4.6595],
         [ 4.7767],
         [ 4.5990],
         [-2.2151],
         [-0.0691],
         [-0.6861],
         [-1.4240],
         [-0.9192],
         [-0.6932],
         [ 0.0134],
         [-0.2088],
         [ 6.4559],
         [ 4.5552],
         [ 0.

  0%|          | 17/6235 [00:02<12:28,  8.30it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1424, -1.0000, -0.7400, -0.0099, -0.6823], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8427, -1.0000, -0.0201,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8687, -1.0000, -0.0521,  0.2401, -0.5223], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.4309, -1.0000, -0.0041,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0377],
         [ 0.3592],
         [ 0.6866],
         [ 1.3838],
         [ 4.6975],
         [ 2.2644],
         [ 2.7231],
         [ 4.6839],
         [ 4.7965],
         [ 4.6159],
         [-2.2006],
         [-0.0406],
         [-0.6851],
         [-1.4421],
         [-0.9487],
         [-0.7207],
         [-0.0074],
         [-0.2357],
         [ 6.5042],
         [ 4.6006],
         [ 0.

  0%|          | 18/6235 [00:02<12:23,  8.36it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1164, -1.0000, -0.7400, -0.0080, -0.6767], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.8167, -1.0000, -0.0185,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8427, -1.0000, -0.0377,  0.2321, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.4569, -1.0000,  0.0026,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0365],
         [ 0.3552],
         [ 0.6831],
         [ 1.3820],
         [ 4.6561],
         [ 2.2441],
         [ 2.7009],
         [ 4.7019],
         [ 4.8119],
         [ 4.6284],
         [-2.1880],
         [-0.0200],
         [-0.6856],
         [-1.4558],
         [-0.9681],
         [-0.7368],
         [-0.0175],
         [-0.2541],
         [ 6.5479],
         [ 4.6448],
         [ 0.

  0%|          | 19/6235 [00:02<12:29,  8.29it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0904, -1.0000, -0.7400, -0.0052, -0.6795], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.7907, -1.0000, -0.0209,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.8167, -1.0000, -0.0365,  0.2319, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.4829, -1.0000, -0.0013,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0368],
         [ 0.3472],
         [ 0.6710],
         [ 1.3659],
         [ 4.5750],
         [ 2.2140],
         [ 2.6701],
         [ 4.7095],
         [ 4.8219],
         [ 4.6379],
         [-2.1804],
         [-0.0110],
         [-0.6957],
         [-1.4753],
         [-0.9894],
         [-0.7560],
         [-0.0307],
         [-0.2768],
         [ 6.5851],
         [ 4.6827],
         [ 0.

  0%|          | 20/6235 [00:02<12:34,  8.24it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0644, -1.0000, -0.7399, -0.0023, -0.6795], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.7647, -1.0000, -0.0228,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.7907, -1.0000, -0.0368,  0.2397, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5089, -1.0000, -0.0018,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.8761e-02],
         [ 3.4347e-01],
         [ 6.5806e-01],
         [ 1.3436e+00],
         [ 4.4646e+00],
         [ 2.1798e+00],
         [ 2.6361e+00],
         [ 4.7185e+00],
         [ 4.8316e+00],
         [ 4.6477e+00],
         [-2.1694e+00],
         [-7.8352e-04],
         [-7.0679e-01],
         [-1.4966e+00],
         [-1.0124e+00],
         [-7.7789e-01],
         [-4.4709e-02],
     

  0%|          | 21/6235 [00:02<12:57,  7.99it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0384, -1.0000, -0.7399,  0.0018, -0.6823], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.7387, -1.0000, -0.0228,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.7647, -1.0000, -0.0288,  0.2574, -0.5333], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5349, -1.0000, -0.0094,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0109],
         [ 0.3480],
         [ 0.6530],
         [ 1.3271],
         [ 4.3667],
         [ 2.1539],
         [ 2.6106],
         [ 4.7389],
         [ 4.8505],
         [ 4.6652],
         [-2.1460],
         [ 0.0265],
         [-0.7057],
         [-1.5096],
         [-1.0330],
         [-0.7967],
         [-0.0541],
         [-0.3232],
         [ 6.6832],
         [ 4.7795],
         [ 0.

  0%|          | 22/6235 [00:02<12:43,  8.13it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0124, -1.0000, -0.7399,  0.0033, -0.6767], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.7127, -1.0000, -0.0230,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.7387, -1.0000, -0.0109,  0.2600, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5609, -1.0000, -0.0085,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.8656e-03],
         [ 3.3894e-01],
         [ 6.2764e-01],
         [ 1.3016e+00],
         [ 4.2583e+00],
         [ 2.1229e+00],
         [ 2.5844e+00],
         [ 4.7593e+00],
         [ 4.8655e+00],
         [ 4.6794e+00],
         [-2.1381e+00],
         [ 3.9208e-02],
         [-7.1042e-01],
         [-1.5210e+00],
         [-1.0436e+00],
         [-8.0528e-01],
         [-5.3304e-02],
     

  0%|          | 23/6235 [00:02<12:31,  8.27it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0136, -1.0000, -0.7399, -0.0074, -0.6712], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.6868, -1.0000, -0.0223,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.7127, -1.0000, -0.0029,  0.2459, -0.5113], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5869, -1.0000, -0.0082,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0223],
         [ 0.3464],
         [ 0.6268],
         [ 1.2959],
         [ 4.1842],
         [ 2.1160],
         [ 2.5753],
         [ 4.7729],
         [ 4.8832],
         [ 4.7005],
         [-2.1170],
         [ 0.0708],
         [-0.6998],
         [-1.5212],
         [-1.0549],
         [-0.8183],
         [-0.0624],
         [-0.3633],
         [ 6.7870],
         [ 4.8841],
         [ 0.

  0%|          | 24/6235 [00:02<12:23,  8.36it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0396, -1.0000, -0.7399, -0.0144, -0.6740], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.6608, -1.0000, -0.0203,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.6868, -1.0000,  0.0223,  0.2425, -0.5140], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.6129, -1.0000, -0.0074,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0383],
         [ 0.3457],
         [ 0.6187],
         [ 1.2815],
         [ 4.1018],
         [ 2.1059],
         [ 2.5657],
         [ 4.7662],
         [ 4.8880],
         [ 4.7098],
         [-2.0901],
         [ 0.0953],
         [-0.6868],
         [-1.5117],
         [-1.0538],
         [-0.8180],
         [-0.0606],
         [-0.3770],
         [ 6.8151],
         [ 4.9205],
         [ 0.

  0%|          | 25/6235 [00:02<12:17,  8.43it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0656, -1.0000, -0.7398, -0.0094, -0.6740], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.6348, -1.0000, -0.0212,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.6608, -1.0000,  0.0383,  0.2469, -0.5195], device='cuda:0')
policy_in_future_last component: tensor([-8.0000e-01, -6.3887e-01, -1.0000e+00, -4.6855e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0347],
         [ 0.3281],
         [ 0.5962],
         [ 1.2478],
         [ 3.9914],
         [ 2.0787],
         [ 2.5499],
         [ 4.7519],
         [ 4.8889],
         [ 4.7135],
         [-2.0653],
         [ 0.1109],
         [-0.6745],
         [-1.4931],
         [-1.0345],
         [-0.7960],
         [-0.0371],
         [-0.3761],
         [ 6.8494],
 

  0%|          | 26/6235 [00:03<12:14,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0916, -1.0000, -0.7398, -0.0075, -0.6740], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.6088, -1.0000, -0.0225,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.6348, -1.0000,  0.0347,  0.2593, -0.5223], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.6649, -1.0000, -0.0014,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 5.4116e-03],
         [ 2.9162e-01],
         [ 5.5601e-01],
         [ 1.2039e+00],
         [ 3.8855e+00],
         [ 2.0466e+00],
         [ 2.5346e+00],
         [ 4.7379e+00],
         [ 4.8908e+00],
         [ 4.7172e+00],
         [-2.0489e+00],
         [ 1.1381e-01],
         [-6.6898e-01],
         [-1.4762e+00],
         [-1.0101e+00],
         [-7.6656e-01],
         [-5.8151e-03],
     

  0%|          | 27/6235 [00:03<12:11,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.1176, -1.0000, -0.7398, -0.0086, -0.6767], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.5828, -1.0000, -0.0227,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.6088, -1.0000,  0.0054,  0.2747, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.6909, -1.0000,  0.0063,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0224],
         [ 0.2541],
         [ 0.5120],
         [ 1.1583],
         [ 3.7863],
         [ 2.0127],
         [ 2.5180],
         [ 4.7330],
         [ 4.8965],
         [ 4.7265],
         [-2.0258],
         [ 0.1252],
         [-0.6587],
         [-1.4590],
         [-0.9911],
         [-0.7451],
         [ 0.0175],
         [-0.3611],
         [ 6.9247],
         [ 5.0698],
         [ 0.

  0%|          | 28/6235 [00:03<12:09,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.1436, -1.0000, -0.7398, -0.0045, -0.6795], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.5568, -1.0000, -0.0231,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.5828, -1.0000, -0.0224,  0.2674, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7169, -1.0000,  0.0048,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-5.6592e-02],
         [ 2.0477e-01],
         [ 4.4828e-01],
         [ 1.1068e+00],
         [ 3.6760e+00],
         [ 1.9683e+00],
         [ 2.4921e+00],
         [ 4.7288e+00],
         [ 4.8973e+00],
         [ 4.7314e+00],
         [-2.0107e+00],
         [ 1.2626e-01],
         [-6.5626e-01],
         [-1.4492e+00],
         [-9.7487e-01],
         [-7.2717e-01],
         [ 3.9128e-02],
     

  0%|          | 29/6235 [00:03<12:06,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  1.6956e-01, -1.0000e+00, -7.3978e-01,  5.6124e-04,
        -6.7674e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.5308, -1.0000, -0.0225,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.5568, -1.0000, -0.0566,  0.2484, -0.5140], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7428, -1.0000, -0.0019,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0568],
         [ 0.1888],
         [ 0.4256],
         [ 1.0842],
         [ 3.5891],
         [ 1.9460],
         [ 2.4811],
         [ 4.7224],
         [ 4.8992],
         [ 4.7399],
         [-1.9703],
         [ 0.1557],
         [-0.6348],
         [-1.4311],
         [-0.9628],
         [-0.7161],
         [ 0.0543],
         [-0.3599],
         [ 6.9933],
 

  0%|          | 30/6235 [00:03<12:06,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.1956, -1.0000, -0.7398, -0.0022, -0.6712], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.5048, -1.0000, -0.0207,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.5308, -1.0000, -0.0568,  0.2404, -0.5168], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7688, -1.0000, -0.0015,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0400],
         [ 0.1908],
         [ 0.4223],
         [ 1.0763],
         [ 3.5311],
         [ 1.9406],
         [ 2.4789],
         [ 4.7116],
         [ 4.9003],
         [ 4.7495],
         [-1.9184],
         [ 0.1931],
         [-0.6088],
         [-1.4147],
         [-0.9598],
         [-0.7152],
         [ 0.0594],
         [-0.3651],
         [ 7.0096],
         [ 5.2006],
         [ 0.

  0%|          | 31/6235 [00:03<12:05,  8.55it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.2215, -1.0000, -0.7397, -0.0052, -0.6712], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.4788, -1.0000, -0.0194,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.5048, -1.0000, -0.0400,  0.2364, -0.5195], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7948, -1.0000, -0.0101,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0360],
         [ 0.1810],
         [ 0.4109],
         [ 1.0606],
         [ 3.4659],
         [ 1.9239],
         [ 2.4720],
         [ 4.6998],
         [ 4.9039],
         [ 4.7625],
         [-1.8603],
         [ 0.2347],
         [-0.5788],
         [-1.3936],
         [-0.9505],
         [-0.7061],
         [ 0.0720],
         [-0.3654],
         [ 7.0346],
         [ 5.2437],
         [ 0.

  1%|          | 32/6235 [00:03<12:09,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.2475, -1.0000, -0.7397, -0.0050, -0.6740], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.4528, -1.0000, -0.0201,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.4788, -1.0000, -0.0360,  0.2398, -0.5223], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8208, -1.0000, -0.0116,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-3.5339e-02],
         [ 1.6589e-01],
         [ 3.9413e-01],
         [ 1.0428e+00],
         [ 3.4067e+00],
         [ 1.9066e+00],
         [ 2.4672e+00],
         [ 4.6929e+00],
         [ 4.9129e+00],
         [ 4.7819e+00],
         [-1.8066e+00],
         [ 2.7623e-01],
         [-5.4889e-01],
         [-1.3717e+00],
         [-9.3798e-01],
         [-6.9307e-01],
         [ 8.8976e-02],
     

  1%|          | 33/6235 [00:03<12:06,  8.53it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  2.7353e-01, -1.0000e+00, -7.3972e-01, -8.7887e-04,
        -6.7398e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.4268, -1.0000, -0.0192,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.4528, -1.0000, -0.0353,  0.2568, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8468, -1.0000, -0.0064,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0573],
         [ 0.1241],
         [ 0.3438],
         [ 1.0009],
         [ 3.3190],
         [ 1.8721],
         [ 2.4539],
         [ 4.6915],
         [ 4.9212],
         [ 4.7997],
         [-1.7812],
         [ 0.2898],
         [-0.5384],
         [-1.3596],
         [-0.9192],
         [-0.6705],
         [ 0.1163],
         [-0.3584],
         [ 7.1109],
 

  1%|          | 34/6235 [00:04<12:06,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.2995, -1.0000, -0.7397,  0.0032, -0.6685], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.4008, -1.0000, -0.0196,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.4268, -1.0000, -0.0573,  0.2566, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8728, -1.0000, -0.0140,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0950],
         [ 0.0620],
         [ 0.2687],
         [ 0.9434],
         [ 3.2117],
         [ 1.8236],
         [ 2.4334],
         [ 4.6923],
         [ 4.9267],
         [ 4.8136],
         [-1.7727],
         [ 0.2826],
         [-0.5424],
         [-1.3574],
         [-0.8997],
         [-0.6436],
         [ 0.1505],
         [-0.3516],
         [ 7.1611],
         [ 5.4206],
         [ 1.

  1%|          | 35/6235 [00:04<12:05,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  3.2552e-01, -1.0000e+00, -7.3969e-01,  4.2903e-04,
        -6.7123e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.3748, -1.0000, -0.0195,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.4008, -1.0000, -0.0950,  0.2377, -0.5223], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8988, -1.0000, -0.0136,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1168],
         [ 0.0184],
         [ 0.2074],
         [ 0.9001],
         [ 3.1131],
         [ 1.7867],
         [ 2.4192],
         [ 4.6918],
         [ 4.9245],
         [ 4.8233],
         [-1.7561],
         [ 0.2818],
         [-0.5441],
         [-1.3592],
         [-0.8881],
         [-0.6259],
         [ 0.1774],
         [-0.3477],
         [ 7.1979],
 

  1%|          | 36/6235 [00:04<12:07,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.3515, -1.0000, -0.7397, -0.0037, -0.6712], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.3489, -1.0000, -0.0170,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.3748, -1.0000, -0.1168,  0.2304, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.9248, -1.0000, -0.0249,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1405],
         [-0.0261],
         [ 0.1419],
         [ 0.8561],
         [ 3.0190],
         [ 1.7460],
         [ 2.3998],
         [ 4.6816],
         [ 4.9102],
         [ 4.8206],
         [-1.7313],
         [ 0.2826],
         [-0.5416],
         [-1.3566],
         [-0.8747],
         [-0.6071],
         [ 0.2036],
         [-0.3440],
         [ 7.2163],
         [ 5.5298],
         [ 1.

  1%|          | 37/6235 [00:04<12:07,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.3775, -1.0000, -0.7397, -0.0051, -0.6685], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.3229, -1.0000, -0.0183,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.3489, -1.0000, -0.1405,  0.2269, -0.5333], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.9508, -1.0000, -0.0241,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1721],
         [-0.0630],
         [ 0.0910],
         [ 0.8231],
         [ 2.9282],
         [ 1.7121],
         [ 2.3854],
         [ 4.6778],
         [ 4.9040],
         [ 4.8218],
         [-1.6899],
         [ 0.2955],
         [-0.5345],
         [-1.3557],
         [-0.8676],
         [-0.5976],
         [ 0.2237],
         [-0.3421],
         [ 7.2336],
         [ 5.5777],
         [ 1.

  1%|          | 38/6235 [00:04<12:08,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.4035, -1.0000, -0.7397, -0.0037, -0.6740], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.2969, -1.0000, -0.0181,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.3229, -1.0000, -0.1721,  0.2160, -0.5361], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.9768, -1.0000, -0.0245,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1841],
         [-0.0660],
         [ 0.0932],
         [ 0.8326],
         [ 2.8823],
         [ 1.7200],
         [ 2.3999],
         [ 4.6819],
         [ 4.9140],
         [ 4.8385],
         [-1.6169],
         [ 0.3422],
         [-0.5081],
         [-1.3481],
         [-0.8738],
         [-0.6086],
         [ 0.2253],
         [-0.3454],
         [ 7.2410],
         [ 5.6071],
         [ 1.

  1%|          | 39/6235 [00:04<12:14,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.4295, -1.0000, -0.7396, -0.0010, -0.6850], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.2709, -1.0000, -0.0193,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.2969, -1.0000, -0.1841,  0.2130, -0.5416], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9972, -0.0123,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2386],
         [-0.1003],
         [ 0.0775],
         [ 0.8158],
         [ 2.8324],
         [ 1.7230],
         [ 2.4069],
         [ 4.6708],
         [ 4.9061],
         [ 4.8372],
         [-1.5453],
         [ 0.3712],
         [-0.4927],
         [-1.3478],
         [-0.8840],
         [-0.6201],
         [ 0.2278],
         [-0.3452],
         [ 7.2263],
         [ 5.6234],
         [ 1.

  1%|          | 40/6235 [00:04<12:12,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  4.5548e-01, -1.0000e+00, -7.3963e-01, -6.5565e-06,
        -6.8226e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.2449, -1.0000, -0.0208,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.2709, -1.0000, -0.2386,  0.2086, -0.5444], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9712,  0.0059,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2589],
         [-0.1148],
         [ 0.0769],
         [ 0.8050],
         [ 2.7757],
         [ 1.7225],
         [ 2.4095],
         [ 4.6513],
         [ 4.8902],
         [ 4.8340],
         [-1.4637],
         [ 0.4210],
         [-0.4654],
         [-1.3413],
         [-0.8948],
         [-0.6311],
         [ 0.2287],
         [-0.3471],
         [ 7.2198],
 

  1%|          | 41/6235 [00:04<12:10,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  4.8148e-01, -1.0000e+00, -7.3962e-01, -9.1016e-05,
        -6.7398e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.2189, -1.0000, -0.0207,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.2449, -1.0000, -0.2589,  0.1907, -0.5416], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9452,  0.0094,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2263],
         [-0.0944],
         [ 0.0945],
         [ 0.8148],
         [ 2.7275],
         [ 1.7276],
         [ 2.4178],
         [ 4.6312],
         [ 4.8724],
         [ 4.8349],
         [-1.3757],
         [ 0.4911],
         [-0.4238],
         [-1.3270],
         [-0.9063],
         [-0.6455],
         [ 0.2224],
         [-0.3541],
         [ 7.2168],
 

  1%|          | 42/6235 [00:04<12:10,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.5075, -1.0000, -0.7396, -0.0010, -0.6712], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.1929, -1.0000, -0.0202,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.2189, -1.0000, -0.2263,  0.1793, -0.5444], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9192,  0.0219,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2236],
         [-0.1069],
         [ 0.0858],
         [ 0.7993],
         [ 2.6759],
         [ 1.7198],
         [ 2.4185],
         [ 4.5994],
         [ 4.8409],
         [ 4.8209],
         [-1.3036],
         [ 0.5382],
         [-0.3910],
         [-1.3108],
         [-0.9105],
         [-0.6489],
         [ 0.2239],
         [-0.3544],
         [ 7.1967],
         [ 5.6730],
         [ 1.

  1%|          | 43/6235 [00:05<12:08,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  5.3346e-01, -1.0000e+00, -7.3960e-01,  1.0359e-04,
        -6.7123e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.1669, -1.0000, -0.0172,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.1929, -1.0000, -0.2236,  0.1781, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.8933,  0.0192,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.0785e-01],
         [-1.1817e-01],
         [ 8.0805e-02],
         [ 7.6782e-01],
         [ 2.6063e+00],
         [ 1.6974e+00],
         [ 2.4087e+00],
         [ 4.5548e+00],
         [ 4.8008e+00],
         [ 4.8011e+00],
         [-1.2260e+00],
         [ 5.9056e-01],
         [-3.5241e-01],
         [-1.2856e+00],
         [-9.0592e-01],
         [-6.4067e-01

  1%|          | 44/6235 [00:05<12:07,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  5.5945e-01, -1.0000e+00, -7.3960e-01,  3.1424e-04,
        -6.7398e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.1409, -1.0000, -0.0171,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.1669, -1.0000, -0.2079,  0.1743, -0.5526], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.8673,  0.0211,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.2122e-01],
         [-1.5906e-01],
         [ 4.6343e-02],
         [ 7.0248e-01],
         [ 2.5117e+00],
         [ 1.6534e+00],
         [ 2.3847e+00],
         [ 4.4962e+00],
         [ 4.7449e+00],
         [ 4.7640e+00],
         [-1.1657e+00],
         [ 6.1632e-01],
         [-3.2327e-01],
         [-1.2535e+00],
         [-8.7967e-01],
         [-6.0867e-01

  1%|          | 45/6235 [00:05<12:07,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.5854, -1.0000, -0.7396,  0.0018, -0.6740], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.1149, -1.0000, -0.0176,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.1409, -1.0000, -0.2212,  0.1693, -0.5526], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.8413,  0.0176,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3082],
         [-0.2479],
         [-0.0182],
         [ 0.6249],
         [ 2.4528],
         [ 1.6241],
         [ 2.3753],
         [ 4.4605],
         [ 4.7104],
         [ 4.7342],
         [-1.1367],
         [ 0.6015],
         [-0.3199],
         [-1.2368],
         [-0.8560],
         [-0.5796],
         [ 0.3010],
         [-0.3021],
         [ 7.1026],
         [ 5.6878],
         [ 1.

  1%|          | 46/6235 [00:05<12:07,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.6114, -1.0000, -0.7396,  0.0078, -0.6740], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.0889, -1.0000, -0.0182,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.1149, -1.0000, -0.3082,  0.1709, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.8153,  0.0183,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3324],
         [-0.2816],
         [-0.0458],
         [ 0.6019],
         [ 2.4387],
         [ 1.6221],
         [ 2.3870],
         [ 4.4350],
         [ 4.6891],
         [ 4.7199],
         [-1.0813],
         [ 0.6321],
         [-0.2822],
         [-1.1961],
         [-0.8205],
         [-0.5449],
         [ 0.3358],
         [-0.2804],
         [ 7.0749],
         [ 5.6919],
         [ 1.

  1%|          | 47/6235 [00:05<12:06,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  6.3743e-01, -1.0000e+00, -7.3957e-01,  5.0533e-04,
        -6.6571e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.0629, -1.0000, -0.0213,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.0889, -1.0000, -0.3324,  0.1668, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7893,  0.0195,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3034],
         [-0.2610],
         [-0.0297],
         [ 0.6389],
         [ 2.5086],
         [ 1.6718],
         [ 2.4274],
         [ 4.4043],
         [ 4.6681],
         [ 4.7104],
         [-0.9890],
         [ 0.7011],
         [-0.2160],
         [-1.1418],
         [-0.7953],
         [-0.5282],
         [ 0.3457],
         [-0.2706],
         [ 7.0198],
 

  1%|          | 48/6235 [00:05<12:06,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.6634, -1.0000, -0.7396, -0.0055, -0.6657], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.0369, -1.0000, -0.0204,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.0629, -1.0000, -0.3034,  0.1594, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7633,  0.0179,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2223],
         [-0.1836],
         [ 0.0512],
         [ 0.7271],
         [ 2.6310],
         [ 1.7621],
         [ 2.4913],
         [ 4.3638],
         [ 4.6541],
         [ 4.7120],
         [-0.8557],
         [ 0.8164],
         [-0.1179],
         [-1.0665],
         [-0.7711],
         [-0.5184],
         [ 0.3422],
         [-0.2639],
         [ 6.9536],
         [ 5.6147],
         [ 1.

  1%|          | 49/6235 [00:05<12:06,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-8.0000e-01,  6.8942e-01, -1.0000e+00, -7.3956e-01,  4.0293e-04,
        -6.7398e-01], device='cuda:0')
policy_in_future_first component: tensor([-0.8000,  0.0109, -1.0000, -0.0167,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.0369, -1.0000, -0.2223,  0.1619, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7373,  0.0205,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-9.1240e-02],
         [-5.7212e-02],
         [ 1.8756e-01],
         [ 8.4230e-01],
         [ 2.7766e+00],
         [ 1.8745e+00],
         [ 2.5679e+00],
         [ 4.3281e+00],
         [ 4.6588e+00],
         [ 4.7366e+00],
         [-7.0874e-01],
         [ 9.6347e-01],
         [-2.2776e-04],
         [-9.7515e-01],
         [-7.4105e-01],
         [-5.0623e-01

  1%|          | 50/6235 [00:05<12:08,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7154, -1.0000,  0.2977,  0.0560, -0.6574], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.0150, -1.0000, -0.0165,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000,  0.0109, -1.0000, -0.0912,  0.1805, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7113,  0.0184,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1379],
         [-0.0877],
         [ 0.1918],
         [ 0.8178],
         [ 2.8335],
         [ 1.8990],
         [ 2.5918],
         [ 4.2878],
         [ 4.6532],
         [ 4.7317],
         [-0.6702],
         [ 0.9733],
         [ 0.0342],
         [-0.9181],
         [-0.6935],
         [-0.4563],
         [ 0.3781],
         [-0.2177],
         [ 6.8647],
         [ 5.5367],
         [ 1.

  1%|          | 51/6235 [00:06<12:06,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7414, -1.0000,  0.2541,  0.1600, -0.6271], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.0410, -1.0000, -0.0157,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.0150, -1.0000, -0.1379,  0.2094, -0.5637], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.6853,  0.0100,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2537],
         [-0.2053],
         [ 0.0903],
         [ 0.7233],
         [ 2.7624],
         [ 1.8336],
         [ 2.5453],
         [ 4.2813],
         [ 4.6491],
         [ 4.7221],
         [-0.7676],
         [ 0.8724],
         [-0.0396],
         [-0.9609],
         [-0.6983],
         [-0.4542],
         [ 0.3806],
         [-0.2276],
         [ 6.8706],
         [ 5.5451],
         [ 1.

  1%|          | 52/6235 [00:06<12:06,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7674, -1.0000,  0.1712,  0.2251, -0.5719], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.0670, -1.0000, -0.0166,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.0410, -1.0000, -0.2537,  0.2131, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.6593,  0.0059,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2810],
         [-0.2433],
         [ 0.0490],
         [ 0.7019],
         [ 2.7513],
         [ 1.8162],
         [ 2.5289],
         [ 4.2781],
         [ 4.6498],
         [ 4.7248],
         [-0.8237],
         [ 0.8255],
         [-0.0768],
         [-0.9880],
         [-0.7122],
         [-0.4716],
         [ 0.3599],
         [-0.2522],
         [ 6.8645],
         [ 5.5356],
         [ 1.

  1%|          | 53/6235 [00:06<12:04,  8.53it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7934, -1.0000,  0.1295,  0.2528, -0.5526], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.0930, -1.0000, -0.0192,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.0670, -1.0000, -0.2810,  0.1984, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-8.0000e-01, -1.0000e+00, -6.3333e-01, -3.1799e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2415],
         [-0.2100],
         [ 0.0727],
         [ 0.7429],
         [ 2.8039],
         [ 1.8526],
         [ 2.5545],
         [ 4.2842],
         [ 4.6657],
         [ 4.7459],
         [-0.8327],
         [ 0.8301],
         [-0.0766],
         [-0.9937],
         [-0.7290],
         [-0.4997],
         [ 0.3281],
         [-0.2772],
         [ 6.8498],
 

  1%|          | 54/6235 [00:06<12:05,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8194, -1.0000,  0.1409,  0.2620, -0.5388], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.1190, -1.0000, -0.0197,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.0930, -1.0000, -0.2415,  0.1885, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.6073, -0.0038,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2329],
         [-0.2040],
         [ 0.0877],
         [ 0.7551],
         [ 2.8308],
         [ 1.8682],
         [ 2.5653],
         [ 4.2771],
         [ 4.6726],
         [ 4.7571],
         [-0.8459],
         [ 0.8236],
         [-0.0808],
         [-0.9952],
         [-0.7338],
         [-0.5088],
         [ 0.3150],
         [-0.2892],
         [ 6.8379],
         [ 5.4938],
         [ 1.

  1%|          | 55/6235 [00:06<12:06,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8454, -1.0000,  0.1432,  0.2685, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.1450, -1.0000, -0.0163,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.1190, -1.0000, -0.2329,  0.1795, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5813,  0.0032,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2557],
         [-0.2256],
         [ 0.0790],
         [ 0.7356],
         [ 2.8190],
         [ 1.8542],
         [ 2.5571],
         [ 4.2689],
         [ 4.6702],
         [ 4.7548],
         [-0.8721],
         [ 0.7971],
         [-0.0988],
         [-1.0044],
         [-0.7347],
         [-0.5091],
         [ 0.3141],
         [-0.2917],
         [ 6.8327],
         [ 5.4905],
         [ 1.

  1%|          | 56/6235 [00:06<12:06,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8714, -1.0000,  0.1180,  0.2666, -0.5168], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.1710, -1.0000, -0.0224,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.1450, -1.0000, -0.2557,  0.1823, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5554,  0.0043,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2800],
         [-0.2454],
         [ 0.0702],
         [ 0.7213],
         [ 2.8138],
         [ 1.8453],
         [ 2.5527],
         [ 4.2641],
         [ 4.6683],
         [ 4.7530],
         [-0.8969],
         [ 0.7703],
         [-0.1187],
         [-1.0186],
         [-0.7421],
         [-0.5177],
         [ 0.3059],
         [-0.2977],
         [ 6.8234],
         [ 5.4837],
         [ 1.

  1%|          | 57/6235 [00:06<12:06,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8974, -1.0000,  0.0757,  0.2706, -0.5195], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.1970, -1.0000, -0.0184,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.1710, -1.0000, -0.2800,  0.1946, -0.5637], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5294,  0.0037,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2912],
         [-0.2538],
         [ 0.0683],
         [ 0.7187],
         [ 2.8231],
         [ 1.8470],
         [ 2.5542],
         [ 4.2619],
         [ 4.6677],
         [ 4.7539],
         [-0.9160],
         [ 0.7515],
         [-0.1350],
         [-1.0344],
         [-0.7563],
         [-0.5339],
         [ 0.2901],
         [-0.3081],
         [ 6.8130],
         [ 5.4737],
         [ 1.

  1%|          | 58/6235 [00:06<12:06,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.9234, -1.0000,  0.0503,  0.2821, -0.5306], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.2230, -1.0000, -0.0153,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.1970, -1.0000, -0.2912,  0.1917, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5034,  0.0038,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3017],
         [-0.2637],
         [ 0.0623],
         [ 0.7148],
         [ 2.8269],
         [ 1.8446],
         [ 2.5517],
         [ 4.2595],
         [ 4.6650],
         [ 4.7526],
         [-0.9350],
         [ 0.7343],
         [-0.1499],
         [-1.0482],
         [-0.7670],
         [-0.5453],
         [ 0.2789],
         [-0.3170],
         [ 6.8063],
         [ 5.4684],
         [ 1.

  1%|          | 59/6235 [00:06<12:04,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.9493, -1.0000,  0.0377,  0.2749, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.2490, -1.0000, -0.0121,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.2230, -1.0000, -0.3017,  0.1722, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.4774, -0.0044,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3059],
         [-0.2672],
         [ 0.0614],
         [ 0.7187],
         [ 2.8344],
         [ 1.8453],
         [ 2.5505],
         [ 4.2543],
         [ 4.6604],
         [ 4.7501],
         [-0.9454],
         [ 0.7253],
         [-0.1578],
         [-1.0566],
         [-0.7748],
         [-0.5554],
         [ 0.2679],
         [-0.3266],
         [ 6.7948],
         [ 5.4603],
         [ 1.

  1%|          | 60/6235 [00:07<12:12,  8.43it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.9753, -1.0000,  0.0198,  0.2617, -0.5113], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.2750, -1.0000, -0.0082,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.2490, -1.0000, -0.3059,  0.1578, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.4514, -0.0061,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3196],
         [-0.2763],
         [ 0.0612],
         [ 0.7194],
         [ 2.8361],
         [ 1.8423],
         [ 2.5453],
         [ 4.2418],
         [ 4.6505],
         [ 4.7420],
         [-0.9492],
         [ 0.7171],
         [-0.1642],
         [-1.0636],
         [-0.7825],
         [-0.5657],
         [ 0.2562],
         [-0.3361],
         [ 6.7751],
         [ 5.4493],
         [ 1.

  1%|          | 61/6235 [00:07<12:41,  8.11it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.9987, -1.0000,  0.0062,  0.2574, -0.5113], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.3010, -1.0000, -0.0086,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.2750, -1.0000, -0.3196,  0.1554, -0.5719], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.4254, -0.0019,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3477],
         [-0.2968],
         [ 0.0584],
         [ 0.7093],
         [ 2.8283],
         [ 1.8304],
         [ 2.5323],
         [ 4.2235],
         [ 4.6367],
         [ 4.7288],
         [-0.9545],
         [ 0.7042],
         [-0.1736],
         [-1.0715],
         [-0.7885],
         [-0.5718],
         [ 0.2491],
         [-0.3417],
         [ 6.7552],
         [ 5.4404],
         [ 1.

  1%|          | 62/6235 [00:07<12:38,  8.14it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.9727, -1.0000, -0.0051,  0.2500, -0.5140], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.3270, -1.0000, -0.0113,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.3010, -1.0000, -0.3477,  0.1627, -0.5775], device='cuda:0')
policy_in_future_last component: tensor([-8.0000e-01, -1.0000e+00, -3.9939e-01, -6.2507e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3746],
         [-0.3186],
         [ 0.0549],
         [ 0.6948],
         [ 2.8151],
         [ 1.8145],
         [ 2.5152],
         [ 4.2048],
         [ 4.6224],
         [ 4.7152],
         [-0.9621],
         [ 0.6906],
         [-0.1846],
         [-1.0805],
         [-0.7943],
         [-0.5760],
         [ 0.2448],
         [-0.3454],
         [ 6.7404],
 

  1%|          | 63/6235 [00:07<12:39,  8.12it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.9467, -1.0000, -0.0196,  0.2480, -0.5195], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.3530, -1.0000, -0.0174,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.3270, -1.0000, -0.3746,  0.1748, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-8.0000e-01, -1.0000e+00, -3.7340e-01, -5.5593e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3883],
         [-0.3322],
         [ 0.0509],
         [ 0.6847],
         [ 2.8053],
         [ 1.8019],
         [ 2.4997],
         [ 4.1906],
         [ 4.6101],
         [ 4.7046],
         [-0.9699],
         [ 0.6808],
         [-0.1941],
         [-1.0900],
         [-0.8024],
         [-0.5832],
         [ 0.2372],
         [-0.3519],
         [ 6.7291],
 

  1%|          | 64/6235 [00:07<12:37,  8.15it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.9207, -1.0000, -0.0430,  0.2605, -0.5306], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.3789, -1.0000, -0.0135,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.3530, -1.0000, -0.3883,  0.1689, -0.5830], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.3474, -0.0028,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3682],
         [-0.3171],
         [ 0.0623],
         [ 0.6995],
         [ 2.8245],
         [ 1.8098],
         [ 2.4984],
         [ 4.1840],
         [ 4.6044],
         [ 4.7035],
         [-0.9670],
         [ 0.6885],
         [-0.1922],
         [-1.0950],
         [-0.8153],
         [-0.5990],
         [ 0.2197],
         [-0.3645],
         [ 6.7171],
         [ 5.4166],
         [ 1.

  1%|          | 65/6235 [00:07<12:27,  8.25it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8947, -1.0000, -0.0499,  0.2582, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.4049, -1.0000, -0.0090,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.3789, -1.0000, -0.3682,  0.1406, -0.5719], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.3214, -0.0031,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3375],
         [-0.2914],
         [ 0.0850],
         [ 0.7255],
         [ 2.8609],
         [ 1.8288],
         [ 2.5065],
         [ 4.1773],
         [ 4.6001],
         [ 4.7051],
         [-0.9556],
         [ 0.7059],
         [-0.1812],
         [-1.0920],
         [-0.8241],
         [-0.6116],
         [ 0.2038],
         [-0.3752],
         [ 6.7005],
         [ 5.3994],
         [ 1.

  1%|          | 66/6235 [00:07<12:20,  8.33it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8687, -1.0000, -0.0521,  0.2401, -0.5223], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.4309, -1.0000, -0.0041,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.4049, -1.0000, -0.3375,  0.1288, -0.5775], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.2954, -0.0071,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3440],
         [-0.2977],
         [ 0.0901],
         [ 0.7243],
         [ 2.8742],
         [ 1.8287],
         [ 2.5033],
         [ 4.1639],
         [ 4.5897],
         [ 4.6971],
         [-0.9586],
         [ 0.7031],
         [-0.1807],
         [-1.0893],
         [-0.8226],
         [-0.6097],
         [ 0.2025],
         [-0.3765],
         [ 6.6833],
         [ 5.3877],
         [ 1.

  1%|          | 67/6235 [00:07<12:14,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8427, -1.0000, -0.0377,  0.2321, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.4569, -1.0000,  0.0026,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.4309, -1.0000, -0.3440,  0.1374, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.2694, -0.0032,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3911],
         [-0.3369],
         [ 0.0740],
         [ 0.6965],
         [ 2.8643],
         [ 1.8102],
         [ 2.4882],
         [ 4.1460],
         [ 4.5756],
         [ 4.6815],
         [-0.9763],
         [ 0.6783],
         [-0.1936],
         [-1.0911],
         [-0.8159],
         [-0.5997],
         [ 0.2098],
         [-0.3722],
         [ 6.6639],
         [ 5.3787],
         [ 1.

  1%|          | 68/6235 [00:08<12:11,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.8167, -1.0000, -0.0365,  0.2319, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.4829, -1.0000, -0.0013,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.4569, -1.0000, -0.3911,  0.1381, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.2434, -0.0020,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4307],
         [-0.3667],
         [ 0.0620],
         [ 0.6804],
         [ 2.8674],
         [ 1.8017],
         [ 2.4785],
         [ 4.1324],
         [ 4.5660],
         [ 4.6701],
         [-0.9914],
         [ 0.6562],
         [-0.2060],
         [-1.0958],
         [-0.8158],
         [-0.6005],
         [ 0.2062],
         [-0.3751],
         [ 6.6381],
         [ 5.3610],
         [ 1.

  1%|          | 69/6235 [00:08<12:09,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7907, -1.0000, -0.0368,  0.2397, -0.5306], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.5089, -1.0000, -0.0018,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.4829, -1.0000, -0.4307,  0.1329, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.2174, -0.0083,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4406],
         [-0.3703],
         [ 0.0658],
         [ 0.6883],
         [ 2.8938],
         [ 1.8099],
         [ 2.4791],
         [ 4.1264],
         [ 4.5640],
         [ 4.6675],
         [-0.9958],
         [ 0.6516],
         [-0.2079],
         [-1.0969],
         [-0.8200],
         [-0.6084],
         [ 0.1950],
         [-0.3829],
         [ 6.6164],
         [ 5.3415],
         [ 1.

  1%|          | 70/6235 [00:08<12:08,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7647, -1.0000, -0.0288,  0.2574, -0.5333], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.5349, -1.0000, -0.0094,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.5089, -1.0000, -0.4406,  0.1291, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1915, -0.0108,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4392],
         [-0.3639],
         [ 0.0743],
         [ 0.7059],
         [ 2.9307],
         [ 1.8246],
         [ 2.4841],
         [ 4.1269],
         [ 4.5691],
         [ 4.6715],
         [-1.0020],
         [ 0.6521],
         [-0.2060],
         [-1.0938],
         [-0.8197],
         [-0.6108],
         [ 0.1886],
         [-0.3879],
         [ 6.6055],
         [ 5.3271],
         [ 1.

  1%|          | 71/6235 [00:08<12:06,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7387, -1.0000, -0.0109,  0.2600, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.5609, -1.0000, -0.0085,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.5349, -1.0000, -0.4392,  0.1218, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1655, -0.0141,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4245],
         [-0.3474],
         [ 0.0895],
         [ 0.7321],
         [ 2.9766],
         [ 1.8463],
         [ 2.4944],
         [ 4.1291],
         [ 4.5758],
         [ 4.6783],
         [-1.0089],
         [ 0.6545],
         [-0.2024],
         [-1.0899],
         [-0.8196],
         [-0.6136],
         [ 0.1812],
         [-0.3935],
         [ 6.5937],
         [ 5.3087],
         [ 1.

  1%|          | 72/6235 [00:08<12:05,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.7127, -1.0000, -0.0029,  0.2459, -0.5113], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.5869, -1.0000, -0.0082,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.5609, -1.0000, -0.4245,  0.1225, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1395, -0.0229,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4067],
         [-0.3268],
         [ 0.1108],
         [ 0.7574],
         [ 3.0236],
         [ 1.8703],
         [ 2.5070],
         [ 4.1293],
         [ 4.5819],
         [ 4.6853],
         [-1.0159],
         [ 0.6552],
         [-0.2005],
         [-1.0880],
         [-0.8228],
         [-0.6201],
         [ 0.1703],
         [-0.3995],
         [ 6.5772],
         [ 5.2842],
         [ 1.

  1%|          | 73/6235 [00:08<12:02,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.6868, -1.0000,  0.0223,  0.2425, -0.5140], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.6129, -1.0000, -0.0074,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.5869, -1.0000, -0.4067,  0.1279, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1135, -0.0195,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3900],
         [-0.3081],
         [ 0.1315],
         [ 0.7738],
         [ 3.0643],
         [ 1.8907],
         [ 2.5179],
         [ 4.1275],
         [ 4.5868],
         [ 4.6908],
         [-1.0250],
         [ 0.6515],
         [-0.2017],
         [-1.0876],
         [-0.8274],
         [-0.6272],
         [ 0.1593],
         [-0.4039],
         [ 6.5593],
         [ 5.2577],
         [ 1.

  1%|          | 74/6235 [00:08<12:02,  8.53it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.6608, -1.0000,  0.0383,  0.2469, -0.5195], device='cuda:0')
policy_in_future_first component: tensor([-8.0000e-01, -6.3887e-01, -1.0000e+00, -4.6855e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.6129, -1.0000, -0.3900,  0.1285, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0875, -0.0166,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3894],
         [-0.3055],
         [ 0.1359],
         [ 0.7739],
         [ 3.0894],
         [ 1.8983],
         [ 2.5212],
         [ 4.1272],
         [ 4.5905],
         [ 4.6936],
         [-1.0466],
         [ 0.6347],
         [-0.2117],
         [-1.0912],
         [-0.8296],
         [-0.6304],
         [ 0.1528],
         [-0.4064],
         [ 6.5455],
 

  1%|          | 75/6235 [00:08<12:02,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.6348, -1.0000,  0.0347,  0.2593, -0.5223], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.6649, -1.0000, -0.0014,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.6389, -1.0000, -0.3894,  0.1396, -0.5912], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0615, -0.0135,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4304],
         [-0.3411],
         [ 0.1081],
         [ 0.7452],
         [ 3.0847],
         [ 1.8820],
         [ 2.5096],
         [ 4.1289],
         [ 4.5922],
         [ 4.6899],
         [-1.0918],
         [ 0.5924],
         [-0.2385],
         [-1.1016],
         [-0.8247],
         [-0.6228],
         [ 0.1579],
         [-0.4039],
         [ 6.5401],
         [ 5.2269],
         [ 1.

  1%|          | 76/6235 [00:09<12:03,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.6088, -1.0000,  0.0054,  0.2747, -0.5306], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.6909, -1.0000,  0.0063,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.6649, -1.0000, -0.4304,  0.1445, -0.5912], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0355, -0.0118,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4715],
         [-0.3773],
         [ 0.0771],
         [ 0.7220],
         [ 3.0809],
         [ 1.8662],
         [ 2.4988],
         [ 4.1347],
         [ 4.5956],
         [ 4.6869],
         [-1.1426],
         [ 0.5477],
         [-0.2670],
         [-1.1133],
         [-0.8183],
         [-0.6139],
         [ 0.1642],
         [-0.4027],
         [ 6.5383],
         [ 5.2210],
         [ 1.

  1%|          | 77/6235 [00:09<12:08,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.5828, -1.0000, -0.0224,  0.2674, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.7169, -1.0000,  0.0048,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.6909, -1.0000, -0.4715,  0.1348, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0095, -0.0127,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.5016],
         [-0.3988],
         [ 0.0652],
         [ 0.7207],
         [ 3.0994],
         [ 1.8670],
         [ 2.4999],
         [ 4.1411],
         [ 4.6018],
         [ 4.6869],
         [-1.1827],
         [ 0.5140],
         [-0.2867],
         [-1.1203],
         [-0.8119],
         [-0.6076],
         [ 0.1678],
         [-0.4028],
         [ 6.5289],
         [ 5.2083],
         [ 1.

  1%|▏         | 78/6235 [00:09<12:11,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.5568, -1.0000, -0.0566,  0.2484, -0.5140], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.7428, -1.0000, -0.0019,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.7169, -1.0000, -0.5016,  0.1288, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0165, -0.0195,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.5071],
         [-0.3935],
         [ 0.0830],
         [ 0.7435],
         [ 3.1414],
         [ 1.8866],
         [ 2.5144],
         [ 4.1473],
         [ 4.6128],
         [ 4.6931],
         [-1.2027],
         [ 0.5009],
         [-0.2923],
         [-1.1199],
         [-0.8073],
         [-0.6056],
         [ 0.1672],
         [-0.4029],
         [ 6.5148],
         [ 5.1885],
         [ 1.

  1%|▏         | 79/6235 [00:09<12:13,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.5308, -1.0000, -0.0568,  0.2404, -0.5168], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.7688, -1.0000, -0.0015,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.7428, -1.0000, -0.5071,  0.1191, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0425, -0.0219,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4983],
         [-0.3710],
         [ 0.1183],
         [ 0.7791],
         [ 3.1984],
         [ 1.9174],
         [ 2.5369],
         [ 4.1510],
         [ 4.6258],
         [ 4.7030],
         [-1.2056],
         [ 0.5034],
         [-0.2865],
         [-1.1122],
         [-0.8025],
         [-0.6054],
         [ 0.1641],
         [-0.4013],
         [ 6.4966],
         [ 5.1630],
         [ 1.

  1%|▏         | 80/6235 [00:09<12:10,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.5048, -1.0000, -0.0400,  0.2364, -0.5195], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.7948, -1.0000, -0.0101,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.7688, -1.0000, -0.4983,  0.1146, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0685, -0.0175,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.5138],
         [-0.3698],
         [ 0.1383],
         [ 0.7990],
         [ 3.2423],
         [ 1.9353],
         [ 2.5502],
         [ 4.1522],
         [ 4.6358],
         [ 4.7086],
         [-1.2160],
         [ 0.4954],
         [-0.2876],
         [-1.1072],
         [-0.7952],
         [-0.6005],
         [ 0.1658],
         [-0.3969],
         [ 6.4801],
         [ 5.1423],
         [ 1.

  1%|▏         | 81/6235 [00:09<12:08,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.4788, -1.0000, -0.0360,  0.2398, -0.5223], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.8208, -1.0000, -0.0116,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.7948, -1.0000, -0.5138,  0.1100, -0.5968], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0945, -0.0148,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-5.2913e-01],
         [-3.7473e-01],
         [ 1.4736e-01],
         [ 8.1251e-01],
         [ 3.2729e+00],
         [ 1.9435e+00],
         [ 2.5570e+00],
         [ 4.1571e+00],
         [ 4.6446e+00],
         [ 4.7132e+00],
         [-1.2374e+00],
         [ 4.8074e-01],
         [-2.9557e-01],
         [-1.1087e+00],
         [-7.8967e-01],
         [-5.9561e-01],
         [ 1.6877e-01],
     

  1%|▏         | 82/6235 [00:09<12:10,  8.43it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.4528, -1.0000, -0.0353,  0.2568, -0.5306], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.8468, -1.0000, -0.0064,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.8208, -1.0000, -0.5291,  0.1057, -0.5995], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1205, -0.0090,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-5.5417e-01],
         [-3.9329e-01],
         [ 1.4228e-01],
         [ 8.1583e-01],
         [ 3.2899e+00],
         [ 1.9422e+00],
         [ 2.5571e+00],
         [ 4.1622e+00],
         [ 4.6484e+00],
         [ 4.7124e+00],
         [-1.2726e+00],
         [ 4.5199e-01],
         [-3.1428e-01],
         [-1.1184e+00],
         [-7.8622e-01],
         [-5.9063e-01],
         [ 1.7300e-01],
     

  1%|▏         | 83/6235 [00:09<12:08,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.4268, -1.0000, -0.0573,  0.2566, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.8728, -1.0000, -0.0140,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.8468, -1.0000, -0.5542,  0.0990, -0.5995], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1465, -0.0078,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-5.7812e-01],
         [-4.1085e-01],
         [ 1.3975e-01],
         [ 8.2450e-01],
         [ 3.3116e+00],
         [ 1.9444e+00],
         [ 2.5576e+00],
         [ 4.1645e+00],
         [ 4.6493e+00],
         [ 4.7093e+00],
         [-1.3046e+00],
         [ 4.2381e-01],
         [-3.3351e-01],
         [-1.1319e+00],
         [-7.8854e-01],
         [-5.9275e-01],
         [ 1.7025e-01],
     

  1%|▏         | 84/6235 [00:09<12:11,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.4008, -1.0000, -0.0950,  0.2377, -0.5223], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.8988, -1.0000, -0.0136,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.8728, -1.0000, -0.5781,  0.0795, -0.6106], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1724, -0.0100,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-5.4640e-01],
         [-3.7318e-01],
         [ 1.7897e-01],
         [ 8.7284e-01],
         [ 3.3653e+00],
         [ 1.9769e+00],
         [ 2.5788e+00],
         [ 4.1689e+00],
         [ 4.6601e+00],
         [ 4.7201e+00],
         [-1.2970e+00],
         [ 4.3768e-01],
         [-3.2556e-01],
         [-1.1344e+00],
         [-7.9932e-01],
         [-6.0999e-01],
         [ 1.5266e-01],
     

  1%|▏         | 85/6235 [00:10<12:10,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.3748, -1.0000, -0.1168,  0.2304, -0.5306], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.9248, -1.0000, -0.0249,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.8988, -1.0000, -0.5464,  0.0680, -0.6188], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1984, -0.0098,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.5118],
         [-0.3316],
         [ 0.2244],
         [ 0.9212],
         [ 3.4188],
         [ 2.0099],
         [ 2.5988],
         [ 4.1657],
         [ 4.6655],
         [ 4.7265],
         [-1.2754],
         [ 0.4608],
         [-0.3099],
         [-1.1309],
         [-0.8090],
         [-0.6261],
         [ 0.1354],
         [-0.4123],
         [ 6.4078],
         [ 5.0518],
         [ 0.

  1%|▏         | 86/6235 [00:10<12:11,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.3489, -1.0000, -0.1405,  0.2269, -0.5333], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.9508, -1.0000, -0.0241,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.9248, -1.0000, -0.5118,  0.0562, -0.6243], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.2244, -0.0093,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4706],
         [-0.2893],
         [ 0.2706],
         [ 0.9638],
         [ 3.4632],
         [ 2.0372],
         [ 2.6127],
         [ 4.1602],
         [ 4.6670],
         [ 4.7312],
         [-1.2524],
         [ 0.4877],
         [-0.2935],
         [-1.1283],
         [-0.8201],
         [-0.6415],
         [ 0.1193],
         [-0.4189],
         [ 6.3886],
         [ 5.0278],
         [ 0.

  1%|▏         | 87/6235 [00:10<12:11,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.3229, -1.0000, -0.1721,  0.2160, -0.5361], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -0.9768, -1.0000, -0.0245,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.9508, -1.0000, -0.4706,  0.0544, -0.6354], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.2504, -0.0079,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4324],
         [-0.2565],
         [ 0.3076],
         [ 0.9899],
         [ 3.4893],
         [ 2.0532],
         [ 2.6186],
         [ 4.1534],
         [ 4.6633],
         [ 4.7317],
         [-1.2373],
         [ 0.5074],
         [-0.2827],
         [-1.1291],
         [-0.8303],
         [-0.6534],
         [ 0.1080],
         [-0.4229],
         [ 6.3749],
         [ 5.0095],
         [ 0.

  1%|▏         | 88/6235 [00:10<12:10,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.2969, -1.0000, -0.1841,  0.2130, -0.5416], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.9972, -0.0123,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -0.9768, -1.0000, -0.4324,  0.0620, -0.6464], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.2764, -0.0071,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4233],
         [-0.2525],
         [ 0.3214],
         [ 0.9921],
         [ 3.5040],
         [ 2.0574],
         [ 2.6152],
         [ 4.1392],
         [ 4.6496],
         [ 4.7213],
         [-1.2344],
         [ 0.5075],
         [-0.2831],
         [-1.1348],
         [-0.8393],
         [-0.6613],
         [ 0.0998],
         [-0.4257],
         [ 6.3519],
         [ 4.9896],
         [ 0.

  1%|▏         | 89/6235 [00:10<12:09,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.2709, -1.0000, -0.2386,  0.2086, -0.5444], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.9712,  0.0059,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.9972, -0.4233,  0.0937, -0.6878], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3024, -0.0088,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-4.1540e-01],
         [-2.5442e-01],
         [ 3.2776e-01],
         [ 9.8419e-01],
         [ 3.5040e+00],
         [ 2.0521e+00],
         [ 2.6071e+00],
         [ 4.1236e+00],
         [ 4.6316e+00],
         [ 4.7060e+00],
         [-1.2350e+00],
         [ 5.0391e-01],
         [-2.8584e-01],
         [-1.1407e+00],
         [-8.4543e-01],
         [-6.6426e-01],
         [ 9.6354e-02],
     

  1%|▏         | 90/6235 [00:10<12:10,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.2449, -1.0000, -0.2589,  0.1907, -0.5416], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.9452,  0.0094,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.9712, -0.4154,  0.1290, -0.6630], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3284, -0.0128,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3888],
         [-0.2413],
         [ 0.3444],
         [ 0.9819],
         [ 3.5081],
         [ 2.0495],
         [ 2.6028],
         [ 4.1112],
         [ 4.6163],
         [ 4.6938],
         [-1.2348],
         [ 0.5063],
         [-0.2828],
         [-1.1395],
         [-0.8452],
         [-0.6608],
         [ 0.0980],
         [-0.4220],
         [ 6.3155],
         [ 4.9582],
         [ 0.

  1%|▏         | 91/6235 [00:10<12:11,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.2189, -1.0000, -0.2263,  0.1793, -0.5444], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.9192,  0.0219,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.9452, -0.3888,  0.1268, -0.6133], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3544, -0.0147,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3338],
         [-0.2028],
         [ 0.3799],
         [ 0.9974],
         [ 3.5325],
         [ 2.0617],
         [ 2.6092],
         [ 4.0994],
         [ 4.6036],
         [ 4.6867],
         [-1.2215],
         [ 0.5233],
         [-0.2680],
         [-1.1306],
         [-0.8466],
         [-0.6627],
         [ 0.0931],
         [-0.4201],
         [ 6.2905],
         [ 4.9300],
         [ 0.

  1%|▏         | 92/6235 [00:10<12:10,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.1929, -1.0000, -0.2236,  0.1781, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.8933,  0.0192,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.9192, -0.3338,  0.1264, -0.6050], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3804, -0.0124,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3258],
         [-0.2057],
         [ 0.3830],
         [ 0.9792],
         [ 3.5284],
         [ 2.0539],
         [ 2.6026],
         [ 4.0888],
         [ 4.5915],
         [ 4.6758],
         [-1.2293],
         [ 0.5132],
         [-0.2706],
         [-1.1272],
         [-0.8417],
         [-0.6551],
         [ 0.0978],
         [-0.4144],
         [ 6.2697],
         [ 4.9102],
         [ 0.

  1%|▏         | 93/6235 [00:11<12:11,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.1669, -1.0000, -0.2079,  0.1743, -0.5526], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.8673,  0.0211,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.8933, -0.3258,  0.1435, -0.5995], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.4064, -0.0099,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3793],
         [-0.2639],
         [ 0.3475],
         [ 0.9194],
         [ 3.4838],
         [ 2.0133],
         [ 2.5744],
         [ 4.0766],
         [ 4.5751],
         [ 4.6547],
         [-1.2674],
         [ 0.4678],
         [-0.2951],
         [-1.1314],
         [-0.8251],
         [-0.6301],
         [ 0.1209],
         [-0.4000],
         [ 6.2576],
         [ 4.9071],
         [ 0.

  2%|▏         | 94/6235 [00:11<12:12,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.1409, -1.0000, -0.2212,  0.1693, -0.5526], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.8413,  0.0176,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.8673, -0.3793,  0.1559, -0.5940], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.4324, -0.0082,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4451],
         [-0.3272],
         [ 0.3096],
         [ 0.8696],
         [ 3.4514],
         [ 1.9777],
         [ 2.5499],
         [ 4.0725],
         [ 4.5667],
         [ 4.6393],
         [-1.3168],
         [ 0.4164],
         [-0.3251],
         [-1.1409],
         [-0.8095],
         [-0.6069],
         [ 0.1435],
         [-0.3872],
         [ 6.2534],
         [ 4.9091],
         [ 0.

  2%|▏         | 95/6235 [00:11<12:11,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.1149, -1.0000, -0.3082,  0.1709, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.8153,  0.0183,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.8413, -0.4451,  0.1472, -0.5775], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.4584, -0.0081,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4312],
         [-0.3118],
         [ 0.3304],
         [ 0.8948],
         [ 3.4928],
         [ 2.0004],
         [ 2.5667],
         [ 4.0841],
         [ 4.5787],
         [ 4.6485],
         [-1.3263],
         [ 0.4165],
         [-0.3228],
         [-1.1379],
         [-0.8068],
         [-0.6067],
         [ 0.1413],
         [-0.3879],
         [ 6.2440],
         [ 4.8923],
         [ 0.

  2%|▏         | 96/6235 [00:11<12:12,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0889, -1.0000, -0.3324,  0.1668, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.7893,  0.0195,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.8153, -0.4312,  0.1462, -0.5802], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.4844, -0.0114,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-3.8064e-01],
         [-2.5902e-01],
         [ 3.8341e-01],
         [ 9.4979e-01],
         [ 3.5653e+00],
         [ 2.0496e+00],
         [ 2.6007e+00],
         [ 4.0908e+00],
         [ 4.5927e+00],
         [ 4.6628e+00],
         [-1.2987e+00],
         [ 4.5060e-01],
         [-2.9452e-01],
         [-1.1169e+00],
         [-8.0253e-01],
         [-6.0863e-01],
         [ 1.3392e-01],
     

  2%|▏         | 97/6235 [00:11<12:13,  8.37it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0629, -1.0000, -0.3034,  0.1594, -0.5664], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.7633,  0.0179,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.7893, -0.3806,  0.1606, -0.5802], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5104, -0.0132,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3660],
         [-0.2408],
         [ 0.4130],
         [ 0.9640],
         [ 3.5939],
         [ 2.0669],
         [ 2.6123],
         [ 4.0861],
         [ 4.5954],
         [ 4.6626],
         [-1.2832],
         [ 0.4642],
         [-0.2786],
         [-1.0971],
         [-0.7882],
         [-0.5940],
         [ 0.1435],
         [-0.3770],
         [ 6.2090],
         [ 4.8463],
         [ 0.

  2%|▏         | 98/6235 [00:11<12:13,  8.36it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0369, -1.0000, -0.2223,  0.1619, -0.5664], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.7373,  0.0205,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.7633, -0.3660,  0.1658, -0.5719], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5363, -0.0098,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3903],
         [-0.2635],
         [ 0.4094],
         [ 0.9375],
         [ 3.5801],
         [ 2.0493],
         [ 2.6009],
         [ 4.0808],
         [ 4.5928],
         [ 4.6534],
         [-1.3014],
         [ 0.4442],
         [-0.2865],
         [-1.0896],
         [-0.7688],
         [-0.5681],
         [ 0.1668],
         [-0.3596],
         [ 6.2080],
         [ 4.8457],
         [ 0.

  2%|▏         | 99/6235 [00:11<12:13,  8.37it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000,  0.0109, -1.0000, -0.0912,  0.1805, -0.5609], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.7113,  0.0184,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.7373, -0.3903,  0.1595, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5623, -0.0167,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4434],
         [-0.3141],
         [ 0.3808],
         [ 0.8936],
         [ 3.5536],
         [ 2.0148],
         [ 2.5780],
         [ 4.0801],
         [ 4.5898],
         [ 4.6410],
         [-1.3494],
         [ 0.3955],
         [-0.3164],
         [-1.1000],
         [-0.7563],
         [-0.5474],
         [ 0.1875],
         [-0.3463],
         [ 6.2092],
         [ 4.8495],
         [ 0.

  2%|▏         | 100/6235 [00:11<12:19,  8.29it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0150, -1.0000, -0.1379,  0.2094, -0.5637], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.6853,  0.0100,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.7113, -0.4434,  0.1539, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5883, -0.0144,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4854],
         [-0.3527],
         [ 0.3516],
         [ 0.8687],
         [ 3.5379],
         [ 1.9882],
         [ 2.5588],
         [ 4.0892],
         [ 4.5928],
         [ 4.6346],
         [-1.3996],
         [ 0.3488],
         [-0.3503],
         [-1.1216],
         [-0.7582],
         [-0.5455],
         [ 0.1903],
         [-0.3468],
         [ 6.2086],
         [ 4.8480],
         [ 0.

  2%|▏         | 101/6235 [00:12<12:48,  7.98it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0410, -1.0000, -0.2537,  0.2131, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.6593,  0.0059,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.6853, -0.4854,  0.1449, -0.5747], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6143, -0.0151,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4823],
         [-0.3467],
         [ 0.3545],
         [ 0.8866],
         [ 3.5601],
         [ 1.9921],
         [ 2.5586],
         [ 4.1027],
         [ 4.6039],
         [ 4.6397],
         [-1.4179],
         [ 0.3383],
         [-0.3595],
         [-1.1297],
         [-0.7620],
         [-0.5516],
         [ 0.1824],
         [-0.3535],
         [ 6.2025],
         [ 4.8358],
         [ 0.

  2%|▏         | 102/6235 [00:12<12:35,  8.12it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0670, -1.0000, -0.2810,  0.1984, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-8.0000e-01, -1.0000e+00, -6.3333e-01, -3.1799e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.6593, -0.4823,  0.1336, -0.5802], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6403, -0.0178,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4609],
         [-0.3226],
         [ 0.3782],
         [ 0.9184],
         [ 3.6002],
         [ 2.0095],
         [ 2.5676],
         [ 4.1132],
         [ 4.6159],
         [ 4.6473],
         [-1.4157],
         [ 0.3486],
         [-0.3512],
         [-1.1227],
         [-0.7581],
         [-0.5496],
         [ 0.1810],
         [-0.3533],
         [ 6.1958],
 

  2%|▏         | 103/6235 [00:12<12:24,  8.23it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.0930, -1.0000, -0.2415,  0.1885, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.6073, -0.0038,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.6333, -0.4609,  0.1338, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6663, -0.0117,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4526],
         [-0.3123],
         [ 0.3964],
         [ 0.9338],
         [ 3.6303],
         [ 2.0189],
         [ 2.5717],
         [ 4.1211],
         [ 4.6233],
         [ 4.6495],
         [-1.4213],
         [ 0.3503],
         [-0.3489],
         [-1.1180],
         [-0.7536],
         [-0.5423],
         [ 0.1861],
         [-0.3475],
         [ 6.1942],
         [ 4.8157],
         [ 0.

  2%|▏         | 104/6235 [00:12<12:15,  8.33it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1190, -1.0000, -0.2329,  0.1795, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.5813,  0.0032,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.6073, -0.4526,  0.1306, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6923, -0.0119,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4453],
         [-0.3047],
         [ 0.4090],
         [ 0.9436],
         [ 3.6554],
         [ 2.0250],
         [ 2.5731],
         [ 4.1311],
         [ 4.6305],
         [ 4.6521],
         [-1.4347],
         [ 0.3446],
         [-0.3543],
         [-1.1216],
         [-0.7558],
         [-0.5416],
         [ 0.1857],
         [-0.3457],
         [ 6.1932],
         [ 4.8076],
         [ 0.

  2%|▏         | 105/6235 [00:12<12:11,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1450, -1.0000, -0.2557,  0.1823, -0.5609], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.5554,  0.0043,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.5813, -0.4453,  0.1230, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7183, -0.0110,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4303],
         [-0.2911],
         [ 0.4158],
         [ 0.9490],
         [ 3.6735],
         [ 2.0284],
         [ 2.5754],
         [ 4.1498],
         [ 4.6419],
         [ 4.6598],
         [-1.4440],
         [ 0.3403],
         [-0.3610],
         [-1.1290],
         [-0.7624],
         [-0.5471],
         [ 0.1813],
         [-0.3453],
         [ 6.1951],
         [ 4.7994],
         [ 0.

  2%|▏         | 106/6235 [00:12<12:08,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1710, -1.0000, -0.2800,  0.1946, -0.5637], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.5294,  0.0037,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.5554, -0.4303,  0.1249, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7443, -0.0156,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4141],
         [-0.2801],
         [ 0.4121],
         [ 0.9437],
         [ 3.6791],
         [ 2.0233],
         [ 2.5747],
         [ 4.1746],
         [ 4.6535],
         [ 4.6675],
         [-1.4562],
         [ 0.3329],
         [-0.3700],
         [-1.1377],
         [-0.7678],
         [-0.5501],
         [ 0.1812],
         [-0.3419],
         [ 6.2041],
         [ 4.7976],
         [ 0.

  2%|▏         | 107/6235 [00:12<12:05,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.1970, -1.0000, -0.2912,  0.1917, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.5034,  0.0038,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.5294, -0.4141,  0.1271, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7703, -0.0154,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3947],
         [-0.2652],
         [ 0.4131],
         [ 0.9422],
         [ 3.6906],
         [ 2.0227],
         [ 2.5803],
         [ 4.2069],
         [ 4.6706],
         [ 4.6795],
         [-1.4672],
         [ 0.3281],
         [-0.3763],
         [-1.1438],
         [-0.7709],
         [-0.5505],
         [ 0.1848],
         [-0.3355],
         [ 6.2170],
         [ 4.7986],
         [ 0.

  2%|▏         | 108/6235 [00:12<12:05,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.2230, -1.0000, -0.3017,  0.1722, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.4774, -0.0044,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.5034, -0.3947,  0.1292, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7963, -0.0182,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3692],
         [-0.2440],
         [ 0.4217],
         [ 0.9464],
         [ 3.7098],
         [ 2.0294],
         [ 2.5908],
         [ 4.2379],
         [ 4.6872],
         [ 4.6918],
         [-1.4728],
         [ 0.3273],
         [-0.3785],
         [-1.1471],
         [-0.7740],
         [-0.5519],
         [ 0.1871],
         [-0.3295],
         [ 6.2241],
         [ 4.7946],
         [ 0.

  2%|▏         | 109/6235 [00:12<12:03,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.2490, -1.0000, -0.3059,  0.1578, -0.5664], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.4514, -0.0061,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.4774, -0.3692,  0.1408, -0.5912], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.8223, -0.0208,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3407],
         [-0.2205],
         [ 0.4330],
         [ 0.9514],
         [ 3.7293],
         [ 2.0390],
         [ 2.6017],
         [ 4.2636],
         [ 4.7007],
         [ 4.7021],
         [-1.4742],
         [ 0.3275],
         [-0.3796],
         [-1.1496],
         [-0.7786],
         [-0.5556],
         [ 0.1860],
         [-0.3255],
         [ 6.2235],
         [ 4.7838],
         [ 0.

  2%|▏         | 110/6235 [00:13<12:01,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.2750, -1.0000, -0.3196,  0.1554, -0.5719], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.4254, -0.0019,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.4514, -0.3407,  0.1503, -0.5885], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.8483, -0.0191,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3195],
         [-0.2046],
         [ 0.4394],
         [ 0.9497],
         [ 3.7397],
         [ 2.0433],
         [ 2.6082],
         [ 4.2868],
         [ 4.7126],
         [ 4.7103],
         [-1.4787],
         [ 0.3240],
         [-0.3834],
         [-1.1528],
         [-0.7819],
         [-0.5574],
         [ 0.1866],
         [-0.3212],
         [ 6.2230],
         [ 4.7739],
         [ 0.

  2%|▏         | 111/6235 [00:13<12:02,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.3010, -1.0000, -0.3477,  0.1627, -0.5775], device='cuda:0')
policy_in_future_first component: tensor([-8.0000e-01, -1.0000e+00, -3.9939e-01, -6.2507e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.4254, -0.3195,  0.1516, -0.5830], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.8743, -0.0182,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3003],
         [-0.1906],
         [ 0.4453],
         [ 0.9469],
         [ 3.7455],
         [ 2.0463],
         [ 2.6151],
         [ 4.3127],
         [ 4.7269],
         [ 4.7204],
         [-1.4833],
         [ 0.3230],
         [-0.3856],
         [-1.1543],
         [-0.7830],
         [-0.5566],
         [ 0.1901],
         [-0.3156],
         [ 6.2294],
 

  2%|▏         | 112/6235 [00:13<12:02,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.3270, -1.0000, -0.3746,  0.1748, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-8.0000e-01, -1.0000e+00, -3.7340e-01, -5.5593e-04,  7.5063e-01,
         2.6360e-01], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3994, -0.3003,  0.1590, -0.5802], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9002, -0.0221,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2776],
         [-0.1733],
         [ 0.4546],
         [ 0.9469],
         [ 3.7504],
         [ 2.0511],
         [ 2.6241],
         [ 4.3383],
         [ 4.7417],
         [ 4.7310],
         [-1.4831],
         [ 0.3278],
         [-0.3832],
         [-1.1516],
         [-0.7817],
         [-0.5536],
         [ 0.1957],
         [-0.3087],
         [ 6.2386],
 

  2%|▏         | 113/6235 [00:13<12:00,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.3530, -1.0000, -0.3883,  0.1689, -0.5830], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.3474, -0.0028,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3734, -0.2776,  0.1705, -0.5775], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9262, -0.0213,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2573],
         [-0.1550],
         [ 0.4705],
         [ 0.9551],
         [ 3.7616],
         [ 2.0603],
         [ 2.6328],
         [ 4.3559],
         [ 4.7540],
         [ 4.7403],
         [-1.4790],
         [ 0.3374],
         [-0.3750],
         [-1.1420],
         [-0.7755],
         [-0.5475],
         [ 0.2006],
         [-0.3036],
         [ 6.2416],
         [ 4.7643],
         [ 0.

  2%|▏         | 114/6235 [00:13<12:01,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.3789, -1.0000, -0.3682,  0.1406, -0.5719], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.3214, -0.0031,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3474, -0.2573,  0.1848, -0.5747], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9522, -0.0221,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2451],
         [-0.1411],
         [ 0.4863],
         [ 0.9628],
         [ 3.7756],
         [ 2.0696],
         [ 2.6397],
         [ 4.3695],
         [ 4.7658],
         [ 4.7487],
         [-1.4806],
         [ 0.3413],
         [-0.3695],
         [-1.1326],
         [-0.7674],
         [-0.5391],
         [ 0.2062],
         [-0.2982],
         [ 6.2425],
         [ 4.7570],
         [ 0.

  2%|▏         | 115/6235 [00:13<12:01,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4049, -1.0000, -0.3375,  0.1288, -0.5775], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.2954, -0.0071,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3214, -0.2451,  0.2006, -0.5719], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9782, -0.0226,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2486],
         [-0.1428],
         [ 0.4898],
         [ 0.9532],
         [ 3.7733],
         [ 2.0656],
         [ 2.6389],
         [ 4.3854],
         [ 4.7776],
         [ 4.7551],
         [-1.4963],
         [ 0.3303],
         [-0.3751],
         [-1.1301],
         [-0.7594],
         [-0.5284],
         [ 0.2160],
         [-0.2903],
         [ 6.2497],
         [ 4.7564],
         [ 0.

  2%|▏         | 116/6235 [00:13<11:59,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4309, -1.0000, -0.3440,  0.1374, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.2694, -0.0032,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.2954, -0.2486,  0.1965, -0.5637], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9958, -0.0180,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2454],
         [-0.1392],
         [ 0.4936],
         [ 0.9490],
         [ 3.7736],
         [ 2.0637],
         [ 2.6384],
         [ 4.4029],
         [ 4.7897],
         [ 4.7628],
         [-1.5110],
         [ 0.3224],
         [-0.3793],
         [-1.1283],
         [-0.7541],
         [-0.5217],
         [ 0.2217],
         [-0.2857],
         [ 6.2572],
         [ 4.7555],
         [ 0.

  2%|▏         | 117/6235 [00:13<11:59,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4569, -1.0000, -0.3911,  0.1381, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.2434, -0.0020,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.2694, -0.2454,  0.1929, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9698, -0.0166,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2404],
         [-0.1341],
         [ 0.4978],
         [ 0.9476],
         [ 3.7705],
         [ 2.0618],
         [ 2.6379],
         [ 4.4176],
         [ 4.7991],
         [ 4.7679],
         [-1.5178],
         [ 0.3206],
         [-0.3786],
         [-1.1226],
         [-0.7464],
         [-0.5133],
         [ 0.2285],
         [-0.2808],
         [ 6.2626],
         [ 4.7555],
         [ 0.

  2%|▏         | 118/6235 [00:14<12:02,  8.47it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.4829, -1.0000, -0.4307,  0.1329, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.2174, -0.0083,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.2434, -0.2404,  0.1955, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9438, -0.0198,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2470],
         [-0.1381],
         [ 0.4974],
         [ 0.9391],
         [ 3.7565],
         [ 2.0536],
         [ 2.6351],
         [ 4.4305],
         [ 4.8063],
         [ 4.7697],
         [-1.5214],
         [ 0.3193],
         [-0.3771],
         [-1.1148],
         [-0.7341],
         [-0.4983],
         [ 0.2432],
         [-0.2707],
         [ 6.2725],
         [ 4.7639],
         [ 0.

  2%|▏         | 119/6235 [00:14<12:03,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.5089, -1.0000, -0.4406,  0.1291, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1915, -0.0108,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.2174, -0.2470,  0.1813, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.9178, -0.0212,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2424],
         [-0.1301],
         [ 0.5062],
         [ 0.9454],
         [ 3.7564],
         [ 2.0571],
         [ 2.6411],
         [ 4.4462],
         [ 4.8161],
         [ 4.7764],
         [-1.5185],
         [ 0.3280],
         [-0.3691],
         [-1.1044],
         [-0.7221],
         [-0.4847],
         [ 0.2570],
         [-0.2607],
         [ 6.2851],
         [ 4.7732],
         [ 0.

  2%|▏         | 120/6235 [00:14<12:04,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.5349, -1.0000, -0.4392,  0.1218, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1655, -0.0141,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1915, -0.2424,  0.1790, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.8918, -0.0254,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2274],
         [-0.1122],
         [ 0.5237],
         [ 0.9623],
         [ 3.7685],
         [ 2.0696],
         [ 2.6531],
         [ 4.4621],
         [ 4.8283],
         [ 4.7871],
         [-1.5095],
         [ 0.3432],
         [-0.3575],
         [-1.0940],
         [-0.7133],
         [-0.4752],
         [ 0.2666],
         [-0.2528],
         [ 6.2968],
         [ 4.7798],
         [ 0.

  2%|▏         | 121/6235 [00:14<12:05,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.5609, -1.0000, -0.4245,  0.1225, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1395, -0.0229,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1655, -0.2274,  0.1954, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.8658, -0.0271,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2164],
         [-0.0971],
         [ 0.5397],
         [ 0.9749],
         [ 3.7763],
         [ 2.0798],
         [ 2.6631],
         [ 4.4778],
         [ 4.8398],
         [ 4.7967],
         [-1.5038],
         [ 0.3539],
         [-0.3502],
         [-1.0874],
         [-0.7069],
         [-0.4671],
         [ 0.2755],
         [-0.2451],
         [ 6.3093],
         [ 4.7870],
         [ 0.

  2%|▏         | 122/6235 [00:14<12:06,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.5869, -1.0000, -0.4067,  0.1279, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1135, -0.0195,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1395, -0.2164,  0.1952, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.8398, -0.0244,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1927],
         [-0.0703],
         [ 0.5621],
         [ 0.9960],
         [ 3.7909],
         [ 2.0967],
         [ 2.6760],
         [ 4.4951],
         [ 4.8521],
         [ 4.8088],
         [-1.4940],
         [ 0.3712],
         [-0.3396],
         [-1.0802],
         [-0.7040],
         [-0.4643],
         [ 0.2783],
         [-0.2417],
         [ 6.3202],
         [ 4.7902],
         [ 0.

  2%|▏         | 123/6235 [00:14<12:05,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.6129, -1.0000, -0.3900,  0.1285, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0875, -0.0166,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1135, -0.1927,  0.1937, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.8138, -0.0229,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1797],
         [-0.0542],
         [ 0.5722],
         [ 1.0056],
         [ 3.7903],
         [ 2.1039],
         [ 2.6822],
         [ 4.5138],
         [ 4.8614],
         [ 4.8174],
         [-1.4932],
         [ 0.3791],
         [-0.3366],
         [-1.0786],
         [-0.7024],
         [-0.4616],
         [ 0.2823],
         [-0.2389],
         [ 6.3331],
         [ 4.7973],
         [ 0.

  2%|▏         | 124/6235 [00:14<12:05,  8.43it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.6389, -1.0000, -0.3894,  0.1396, -0.5912], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0615, -0.0135,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0875, -0.1797,  0.1987, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7878, -0.0241,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1887],
         [-0.0597],
         [ 0.5625],
         [ 0.9942],
         [ 3.7604],
         [ 2.0923],
         [ 2.6758],
         [ 4.5323],
         [ 4.8664],
         [ 4.8197],
         [-1.5075],
         [ 0.3697],
         [-0.3476],
         [-1.0871],
         [-0.7020],
         [-0.4573],
         [ 0.2905],
         [-0.2355],
         [ 6.3526],
         [ 4.8144],
         [ 0.

  2%|▏         | 125/6235 [00:14<12:04,  8.43it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.6649, -1.0000, -0.4304,  0.1445, -0.5912], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0355, -0.0118,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0615, -0.1887,  0.1890, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7619, -0.0238,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1933],
         [-0.0623],
         [ 0.5548],
         [ 0.9879],
         [ 3.7304],
         [ 2.0818],
         [ 2.6687],
         [ 4.5480],
         [ 4.8703],
         [ 4.8219],
         [-1.5151],
         [ 0.3648],
         [-0.3562],
         [-1.0958],
         [-0.7038],
         [-0.4560],
         [ 0.2955],
         [-0.2349],
         [ 6.3688],
         [ 4.8293],
         [ 0.

  2%|▏         | 126/6235 [00:14<12:05,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.6909, -1.0000, -0.4715,  0.1348, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0095, -0.0127,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0355, -0.1933,  0.1894, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7359, -0.0239,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1915],
         [-0.0579],
         [ 0.5562],
         [ 0.9903],
         [ 3.7117],
         [ 2.0794],
         [ 2.6658],
         [ 4.5588],
         [ 4.8737],
         [ 4.8244],
         [-1.5091],
         [ 0.3689],
         [-0.3577],
         [-1.1002],
         [-0.7064],
         [-0.4566],
         [ 0.2979],
         [-0.2348],
         [ 6.3767],
         [ 4.8375],
         [ 0.

  2%|▏         | 127/6235 [00:15<12:06,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.7169, -1.0000, -0.5016,  0.1288, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0165, -0.0195,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0095, -0.1915,  0.2044, -0.5637], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.7099, -0.0243,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1827],
         [-0.0466],
         [ 0.5668],
         [ 0.9989],
         [ 3.7015],
         [ 2.0830],
         [ 2.6671],
         [ 4.5682],
         [ 4.8795],
         [ 4.8296],
         [-1.4931],
         [ 0.3821],
         [-0.3526],
         [-1.1002],
         [-0.7084],
         [-0.4568],
         [ 0.3002],
         [-0.2336],
         [ 6.3847],
         [ 4.8444],
         [ 0.

  2%|▏         | 128/6235 [00:15<12:05,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.7428, -1.0000, -0.5071,  0.1191, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0425, -0.0219,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0165, -0.1827,  0.2011, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6839, -0.0195,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1485],
         [-0.0128],
         [ 0.5957],
         [ 1.0281],
         [ 3.7143],
         [ 2.1038],
         [ 2.6794],
         [ 4.5762],
         [ 4.8891],
         [ 4.8415],
         [-1.4597],
         [ 0.4149],
         [-0.3329],
         [-1.0912],
         [-0.7110],
         [-0.4608],
         [ 0.2964],
         [-0.2353],
         [ 6.3882],
         [ 4.8434],
         [ 0.

  2%|▏         | 129/6235 [00:15<12:11,  8.34it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.7688, -1.0000, -0.4983,  0.1146, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0685, -0.0175,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0425, -0.1485,  0.1853, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6579, -0.0184,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1089],
         [ 0.0236],
         [ 0.6258],
         [ 1.0589],
         [ 3.7295],
         [ 2.1268],
         [ 2.6932],
         [ 4.5804],
         [ 4.8977],
         [ 4.8536],
         [-1.4216],
         [ 0.4509],
         [-0.3101],
         [-1.0796],
         [-0.7134],
         [-0.4656],
         [ 0.2908],
         [-0.2382],
         [ 6.3877],
         [ 4.8392],
         [ 0.

  2%|▏         | 130/6235 [00:15<12:09,  8.36it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.7948, -1.0000, -0.5138,  0.1100, -0.5968], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0945, -0.0148,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0685, -0.1089,  0.1861, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6319, -0.0207,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0951],
         [ 0.0349],
         [ 0.6360],
         [ 1.0635],
         [ 3.7140],
         [ 2.1286],
         [ 2.6944],
         [ 4.5798],
         [ 4.9005],
         [ 4.8574],
         [-1.3964],
         [ 0.4687],
         [-0.2993],
         [-1.0732],
         [-0.7116],
         [-0.4628],
         [ 0.2942],
         [-0.2363],
         [ 6.3908],
         [ 4.8435],
         [ 0.

  2%|▏         | 131/6235 [00:15<12:06,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.8208, -1.0000, -0.5291,  0.1057, -0.5995], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1205, -0.0090,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0945, -0.0951,  0.1979, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.6059, -0.0196,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0989],
         [ 0.0292],
         [ 0.6320],
         [ 1.0525],
         [ 3.6820],
         [ 2.1180],
         [ 2.6883],
         [ 4.5757],
         [ 4.8985],
         [ 4.8545],
         [-1.3813],
         [ 0.4736],
         [-0.2964],
         [-1.0699],
         [-0.7060],
         [-0.4542],
         [ 0.3044],
         [-0.2305],
         [ 6.3945],
         [ 4.8521],
         [ 0.

  2%|▏         | 132/6235 [00:15<12:05,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.8468, -1.0000, -0.5542,  0.0990, -0.5995], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1465, -0.0078,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1205, -0.0989,  0.2146, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5799, -0.0188,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0970],
         [ 0.0286],
         [ 0.6333],
         [ 1.0476],
         [ 3.6604],
         [ 2.1126],
         [ 2.6867],
         [ 4.5711],
         [ 4.8999],
         [ 4.8548],
         [-1.3571],
         [ 0.4859],
         [-0.2874],
         [-1.0624],
         [-0.6993],
         [-0.4452],
         [ 0.3144],
         [-0.2235],
         [ 6.3979],
         [ 4.8601],
         [ 0.

  2%|▏         | 133/6235 [00:15<12:04,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.8728, -1.0000, -0.5781,  0.0795, -0.6106], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1724, -0.0100,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1465, -0.0970,  0.2310, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5539, -0.0183,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0716],
         [ 0.0497],
         [ 0.6534],
         [ 1.0640],
         [ 3.6625],
         [ 2.1242],
         [ 2.6957],
         [ 4.5651],
         [ 4.9050],
         [ 4.8603],
         [-1.3147],
         [ 0.5182],
         [-0.2631],
         [-1.0447],
         [-0.6921],
         [-0.4388],
         [ 0.3195],
         [-0.2185],
         [ 6.3972],
         [ 4.8617],
         [ 0.

  2%|▏         | 134/6235 [00:15<12:01,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.8988, -1.0000, -0.5464,  0.0680, -0.6188], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1984, -0.0098,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1724, -0.0716,  0.2320, -0.5526], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5279, -0.0141,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0246],
         [ 0.0902],
         [ 0.6904],
         [ 1.0977],
         [ 3.6850],
         [ 2.1507],
         [ 2.7141],
         [ 4.5564],
         [ 4.9130],
         [ 4.8702],
         [-1.2545],
         [ 0.5692],
         [-0.2242],
         [-1.0164],
         [-0.6839],
         [-0.4341],
         [ 0.3205],
         [-0.2149],
         [ 6.3925],
         [ 4.8576],
         [ 0.

  2%|▏         | 135/6235 [00:16<12:00,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.9248, -1.0000, -0.5118,  0.0562, -0.6243], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.2244, -0.0093,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1984, -0.0246,  0.2273, -0.5471], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.5019, -0.0143,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0210],
         [ 0.1339],
         [ 0.7238],
         [ 1.1309],
         [ 3.7002],
         [ 2.1794],
         [ 2.7352],
         [ 4.5510],
         [ 4.9160],
         [ 4.8768],
         [-1.2081],
         [ 0.6141],
         [-0.1890],
         [-0.9880],
         [-0.6712],
         [-0.4260],
         [ 0.3248],
         [-0.2106],
         [ 6.3891],
         [ 4.8544],
         [ 0.

  2%|▏         | 136/6235 [00:16<12:00,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.9508, -1.0000, -0.4706,  0.0544, -0.6354], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.2504, -0.0079,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.2244,  0.0210,  0.2257, -0.5444], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.4759, -0.0193,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0447],
         [ 0.1565],
         [ 0.7410],
         [ 1.1433],
         [ 3.6939],
         [ 2.1911],
         [ 2.7462],
         [ 4.5457],
         [ 4.9173],
         [ 4.8793],
         [-1.1787],
         [ 0.6418],
         [-0.1670],
         [-0.9676],
         [-0.6581],
         [-0.4144],
         [ 0.3342],
         [-0.2042],
         [ 6.3928],
         [ 4.8600],
         [ 0.

  2%|▏         | 137/6235 [00:16<12:00,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -0.9768, -1.0000, -0.4324,  0.0620, -0.6464], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.2764, -0.0071,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.2504,  0.0447,  0.2337, -0.5416], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.4499, -0.0206,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0454],
         [ 0.1565],
         [ 0.7384],
         [ 1.1365],
         [ 3.6672],
         [ 2.1845],
         [ 2.7459],
         [ 4.5417],
         [ 4.9174],
         [ 4.8782],
         [-1.1692],
         [ 0.6499],
         [-0.1605],
         [-0.9582],
         [-0.6471],
         [-0.4019],
         [ 0.3464],
         [-0.1976],
         [ 6.4048],
         [ 4.8756],
         [ 0.

  2%|▏         | 138/6235 [00:16<12:00,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.9972, -0.4233,  0.0937, -0.6878], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3024, -0.0088,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.2764,  0.0454,  0.2448, -0.5361], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.4239, -0.0213,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0399],
         [ 0.1497],
         [ 0.7269],
         [ 1.1250],
         [ 3.6326],
         [ 2.1698],
         [ 2.7397],
         [ 4.5404],
         [ 4.9189],
         [ 4.8776],
         [-1.1689],
         [ 0.6513],
         [-0.1608],
         [-0.9550],
         [-0.6393],
         [-0.3918],
         [ 0.3570],
         [-0.1930],
         [ 6.4233],
         [ 4.8965],
         [ 0.

  2%|▏         | 139/6235 [00:16<12:00,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.9712, -0.4154,  0.1290, -0.6630], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3284, -0.0128,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3024,  0.0399,  0.2537, -0.5361], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3980, -0.0223,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0536],
         [ 0.1602],
         [ 0.7267],
         [ 1.1331],
         [ 3.6174],
         [ 2.1681],
         [ 2.7408],
         [ 4.5405],
         [ 4.9228],
         [ 4.8820],
         [-1.1610],
         [ 0.6646],
         [-0.1530],
         [-0.9478],
         [-0.6333],
         [-0.3865],
         [ 0.3606],
         [-0.1934],
         [ 6.4377],
         [ 4.9101],
         [ 0.

  2%|▏         | 140/6235 [00:16<12:07,  8.37it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.9452, -0.3888,  0.1268, -0.6133], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3544, -0.0147,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3284,  0.0536,  0.2529, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3720, -0.0181,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0872],
         [ 0.1895],
         [ 0.7431],
         [ 1.1571],
         [ 3.6195],
         [ 2.1821],
         [ 2.7519],
         [ 4.5420],
         [ 4.9293],
         [ 4.8912],
         [-1.1415],
         [ 0.6908],
         [-0.1353],
         [-0.9346],
         [-0.6278],
         [-0.3843],
         [ 0.3601],
         [-0.1958],
         [ 6.4473],
         [ 4.9167],
         [ 0.

  2%|▏         | 141/6235 [00:16<12:38,  8.03it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.9192, -0.3338,  0.1264, -0.6050], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3804, -0.0124,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3544,  0.0872,  0.2466, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3460, -0.0153,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1049],
         [ 0.2036],
         [ 0.7491],
         [ 1.1619],
         [ 3.6004],
         [ 2.1832],
         [ 2.7559],
         [ 4.5450],
         [ 4.9350],
         [ 4.8975],
         [-1.1302],
         [ 0.7054],
         [-0.1263],
         [-0.9266],
         [-0.6219],
         [-0.3788],
         [ 0.3656],
         [-0.1947],
         [ 6.4635],
         [ 4.9325],
         [ 0.

  2%|▏         | 142/6235 [00:16<12:29,  8.13it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.8933, -0.3258,  0.1435, -0.5995], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.4064, -0.0099,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3804,  0.1049,  0.2518, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.3200, -0.0152,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0908],
         [ 0.1864],
         [ 0.7280],
         [ 1.1364],
         [ 3.5441],
         [ 2.1568],
         [ 2.7426],
         [ 4.5492],
         [ 4.9365],
         [ 4.8963],
         [-1.1394],
         [ 0.6963],
         [-0.1361],
         [-0.9316],
         [-0.6170],
         [-0.3697],
         [ 0.3777],
         [-0.1914],
         [ 6.4894],
         [ 4.9623],
         [ 0.

  2%|▏         | 143/6235 [00:17<12:19,  8.24it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.8673, -0.3793,  0.1559, -0.5940], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.4324, -0.0082,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.4064,  0.0908,  0.2663, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.2940, -0.0154,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0756],
         [ 0.1677],
         [ 0.7038],
         [ 1.1119],
         [ 3.4890],
         [ 2.1310],
         [ 2.7300],
         [ 4.5591],
         [ 4.9419],
         [ 4.8996],
         [-1.1439],
         [ 0.6917],
         [-0.1436],
         [-0.9374],
         [-0.6139],
         [-0.3634],
         [ 0.3879],
         [-0.1905],
         [ 6.5166],
         [ 4.9934],
         [ 0.

  2%|▏         | 144/6235 [00:17<12:11,  8.32it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.8413, -0.4451,  0.1472, -0.5775], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.4584, -0.0081,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.4324,  0.0756,  0.2798, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.2680, -0.0158,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0840],
         [ 0.1720],
         [ 0.6992],
         [ 1.1112],
         [ 3.4614],
         [ 2.1271],
         [ 2.7315],
         [ 4.5712],
         [ 4.9517],
         [ 4.9107],
         [-1.1274],
         [ 0.7098],
         [-0.1345],
         [-0.9342],
         [-0.6116],
         [-0.3617],
         [ 0.3919],
         [-0.1928],
         [ 6.5373],
         [ 5.0161],
         [ 0.

  2%|▏         | 145/6235 [00:17<12:06,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.8153, -0.4312,  0.1462, -0.5802], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.4844, -0.0114,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.4584,  0.0840,  0.2914, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.2420, -0.0189,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0962],
         [ 0.1813],
         [ 0.7021],
         [ 1.1134],
         [ 3.4385],
         [ 2.1291],
         [ 2.7383],
         [ 4.5820],
         [ 4.9630],
         [ 4.9236],
         [-1.1002],
         [ 0.7364],
         [-0.1180],
         [-0.9246],
         [-0.6059],
         [-0.3566],
         [ 0.3995],
         [-0.1913],
         [ 6.5581],
         [ 5.0394],
         [ 0.

  2%|▏         | 146/6235 [00:17<12:01,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.7893, -0.3806,  0.1606, -0.5802], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5104, -0.0132,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.4844,  0.0962,  0.2880, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.2160, -0.0201,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0958],
         [ 0.1805],
         [ 0.6965],
         [ 1.1074],
         [ 3.4058],
         [ 2.1262],
         [ 2.7432],
         [ 4.5940],
         [ 4.9737],
         [ 4.9350],
         [-1.0785],
         [ 0.7565],
         [-0.1056],
         [-0.9163],
         [-0.5985],
         [-0.3484],
         [ 0.4113],
         [-0.1872],
         [ 6.5827],
         [ 5.0679],
         [ 0.

  2%|▏         | 147/6235 [00:17<11:58,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.7633, -0.3660,  0.1658, -0.5719], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5363, -0.0098,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5104,  0.0958,  0.2767, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1900, -0.0170,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0913],
         [ 0.1743],
         [ 0.6834],
         [ 1.0957],
         [ 3.3631],
         [ 2.1167],
         [ 2.7431],
         [ 4.6067],
         [ 4.9825],
         [ 4.9446],
         [-1.0624],
         [ 0.7707],
         [-0.0983],
         [-0.9123],
         [-0.5928],
         [-0.3417],
         [ 0.4223],
         [-0.1849],
         [ 6.6080],
         [ 5.0976],
         [ 0.

  2%|▏         | 148/6235 [00:17<11:56,  8.50it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.7373, -0.3903,  0.1595, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5623, -0.0167,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5363,  0.0913,  0.2707, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1640, -0.0172,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0777],
         [ 0.1612],
         [ 0.6618],
         [ 1.0761],
         [ 3.3121],
         [ 2.1002],
         [ 2.7377],
         [ 4.6174],
         [ 4.9880],
         [ 4.9506],
         [-1.0485],
         [ 0.7798],
         [-0.0952],
         [-0.9118],
         [-0.5883],
         [-0.3355],
         [ 0.4334],
         [-0.1825],
         [ 6.6317],
         [ 5.1276],
         [ 0.

  2%|▏         | 149/6235 [00:17<11:53,  8.53it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.7113, -0.4434,  0.1539, -0.5664], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5883, -0.0144,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5623,  0.0777,  0.2678, -0.5223], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1380, -0.0171,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0728],
         [ 0.1576],
         [ 0.6463],
         [ 1.0638],
         [ 3.2691],
         [ 2.0899],
         [ 2.7352],
         [ 4.6285],
         [ 4.9948],
         [ 4.9588],
         [-1.0259],
         [ 0.7977],
         [-0.0871],
         [-0.9100],
         [-0.5864],
         [-0.3326],
         [ 0.4413],
         [-0.1818],
         [ 6.6551],
         [ 5.1560],
         [ 1.

  2%|▏         | 150/6235 [00:17<11:52,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.6853, -0.4854,  0.1449, -0.5747], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6143, -0.0151,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5883,  0.0728,  0.2775, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.1120, -0.0137,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0685],
         [ 0.1552],
         [ 0.6356],
         [ 1.0558],
         [ 3.2291],
         [ 2.0834],
         [ 2.7356],
         [ 4.6394],
         [ 5.0028],
         [ 4.9682],
         [-0.9975],
         [ 0.8206],
         [-0.0752],
         [-0.9050],
         [-0.5827],
         [-0.3279],
         [ 0.4512],
         [-0.1789],
         [ 6.6787],
         [ 5.1846],
         [ 1.

  2%|▏         | 151/6235 [00:17<11:53,  8.53it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.6593, -0.4823,  0.1336, -0.5802], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6403, -0.0178,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6143,  0.0685,  0.2908, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0860, -0.0161,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0660],
         [ 0.1561],
         [ 0.6305],
         [ 1.0548],
         [ 3.1960],
         [ 2.0830],
         [ 2.7403],
         [ 4.6489],
         [ 5.0114],
         [ 4.9776],
         [-0.9638],
         [ 0.8487],
         [-0.0588],
         [-0.8964],
         [-0.5771],
         [-0.3217],
         [ 0.4623],
         [-0.1747],
         [ 6.7007],
         [ 5.2113],
         [ 1.

  2%|▏         | 152/6235 [00:18<11:52,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.6333, -0.4609,  0.1338, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6663, -0.0117,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6403,  0.0660,  0.2855, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0600, -0.0169,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0836],
         [ 0.1717],
         [ 0.6364],
         [ 1.0663],
         [ 3.1743],
         [ 2.0915],
         [ 2.7506],
         [ 4.6601],
         [ 5.0224],
         [ 4.9924],
         [-0.9232],
         [ 0.8871],
         [-0.0350],
         [-0.8835],
         [-0.5714],
         [-0.3173],
         [ 0.4703],
         [-0.1726],
         [ 6.7211],
         [ 5.2342],
         [ 1.

  2%|▏         | 153/6235 [00:18<11:53,  8.52it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.6073, -0.4526,  0.1306, -0.5857], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6923, -0.0119,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6663,  0.0836,  0.2732, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0341, -0.0127,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1098],
         [ 0.1928],
         [ 0.6455],
         [ 1.0798],
         [ 3.1487],
         [ 2.0992],
         [ 2.7597],
         [ 4.6689],
         [ 5.0318],
         [ 5.0075],
         [-0.8823],
         [ 0.9264],
         [-0.0110],
         [-0.8709],
         [-0.5665],
         [-0.3141],
         [ 0.4766],
         [-0.1718],
         [ 6.7425],
         [ 5.2578],
         [ 1.

  2%|▏         | 154/6235 [00:18<11:51,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.5813, -0.4453,  0.1230, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7183, -0.0110,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6923,  0.1098,  0.2724, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000,  0.0081, -0.0152,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1149],
         [ 0.1964],
         [ 0.6419],
         [ 1.0773],
         [ 3.1056],
         [ 2.0953],
         [ 2.7626],
         [ 4.6744],
         [ 5.0380],
         [ 5.0168],
         [-0.8486],
         [ 0.9554],
         [ 0.0070],
         [-0.8588],
         [-0.5564],
         [-0.3035],
         [ 0.4911],
         [-0.1657],
         [ 6.7661],
         [ 5.2865],
         [ 1.

  2%|▏         | 155/6235 [00:18<11:50,  8.56it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.5554, -0.4303,  0.1249, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7443, -0.0156,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7183,  0.1149,  0.2753, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0179, -0.0153,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1083],
         [ 0.1917],
         [ 0.6290],
         [ 1.0647],
         [ 3.0525],
         [ 2.0833],
         [ 2.7601],
         [ 4.6796],
         [ 5.0426],
         [ 5.0229],
         [-0.8184],
         [ 0.9790],
         [ 0.0213],
         [-0.8483],
         [-0.5453],
         [-0.2912],
         [ 0.5076],
         [-0.1581],
         [ 6.7913],
         [ 5.3173],
         [ 1.

  3%|▎         | 156/6235 [00:18<11:51,  8.54it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.5294, -0.4141,  0.1271, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7703, -0.0154,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7443,  0.1083,  0.2825, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0439, -0.0135,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1123],
         [ 0.1964],
         [ 0.6208],
         [ 1.0578],
         [ 3.0088],
         [ 2.0762],
         [ 2.7600],
         [ 4.6887],
         [ 5.0497],
         [ 5.0326],
         [-0.7845],
         [ 1.0079],
         [ 0.0386],
         [-0.8379],
         [-0.5377],
         [-0.2841],
         [ 0.5190],
         [-0.1534],
         [ 6.8163],
         [ 5.3456],
         [ 1.

  3%|▎         | 157/6235 [00:18<11:52,  8.53it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.5034, -0.3947,  0.1292, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7963, -0.0182,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7703,  0.1123,  0.2927, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0699, -0.0121,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1186],
         [ 0.2030],
         [ 0.6151],
         [ 1.0537],
         [ 2.9719],
         [ 2.0726],
         [ 2.7628],
         [ 4.6998],
         [ 5.0587],
         [ 5.0443],
         [-0.7516],
         [ 1.0375],
         [ 0.0569],
         [-0.8266],
         [-0.5295],
         [-0.2764],
         [ 0.5309],
         [-0.1481],
         [ 6.8425],
         [ 5.3739],
         [ 1.

  3%|▎         | 158/6235 [00:18<11:55,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.4774, -0.3692,  0.1408, -0.5912], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.8223, -0.0208,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7963,  0.1186,  0.2888, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.0959, -0.0041,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1322],
         [ 0.2151],
         [ 0.6138],
         [ 1.0555],
         [ 2.9433],
         [ 2.0733],
         [ 2.7688],
         [ 4.7118],
         [ 5.0690],
         [ 5.0581],
         [-0.7182],
         [ 1.0704],
         [ 0.0785],
         [-0.8120],
         [-0.5195],
         [-0.2675],
         [ 0.5430],
         [-0.1426],
         [ 6.8690],
         [ 5.4011],
         [ 1.

  3%|▎         | 159/6235 [00:18<11:56,  8.48it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.4514, -0.3407,  0.1503, -0.5885], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.8483, -0.0191,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.8223,  0.1322,  0.2810, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1219,  0.0052,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1511],
         [ 0.2303],
         [ 0.6158],
         [ 1.0596],
         [ 2.9194],
         [ 2.0764],
         [ 2.7767],
         [ 4.7227],
         [ 5.0791],
         [ 5.0729],
         [-0.6836],
         [ 1.1053],
         [ 0.1028],
         [-0.7939],
         [-0.5071],
         [-0.2570],
         [ 0.5554],
         [-0.1365],
         [ 6.8929],
         [ 5.4258],
         [ 1.

  3%|▎         | 160/6235 [00:19<11:59,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.4254, -0.3195,  0.1516, -0.5830], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.8743, -0.0182,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.8483,  0.1511,  0.2830, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1479,  0.0140,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1506],
         [ 0.2306],
         [ 0.6085],
         [ 1.0503],
         [ 2.8839],
         [ 2.0699],
         [ 2.7791],
         [ 4.7298],
         [ 5.0859],
         [ 5.0822],
         [-0.6582],
         [ 1.1291],
         [ 0.1204],
         [-0.7776],
         [-0.4921],
         [-0.2420],
         [ 0.5723],
         [-0.1273],
         [ 6.9163],
         [ 5.4520],
         [ 1.

  3%|▎         | 161/6235 [00:19<11:58,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.3994, -0.3003,  0.1590, -0.5802], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9002, -0.0221,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.8743,  0.1506,  0.2874, -0.5223], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1739,  0.0119,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1314],
         [ 0.2164],
         [ 0.5895],
         [ 1.0289],
         [ 2.8377],
         [ 2.0531],
         [ 2.7746],
         [ 4.7346],
         [ 5.0894],
         [ 5.0857],
         [-0.6458],
         [ 1.1392],
         [ 0.1282],
         [-0.7666],
         [-0.4769],
         [-0.2251],
         [ 0.5918],
         [-0.1167],
         [ 6.9408],
         [ 5.4805],
         [ 1.

  3%|▎         | 162/6235 [00:19<12:00,  8.43it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.3734, -0.2776,  0.1705, -0.5775], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9262, -0.0213,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9002,  0.1314,  0.2909, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.1999,  0.0120,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1116],
         [ 0.2016],
         [ 0.5673],
         [ 1.0085],
         [ 2.7930],
         [ 2.0356],
         [ 2.7684],
         [ 4.7395],
         [ 5.0924],
         [ 5.0887],
         [-0.6382],
         [ 1.1457],
         [ 0.1324],
         [-0.7600],
         [-0.4651],
         [-0.2119],
         [ 0.6081],
         [-0.1085],
         [ 6.9651],
         [ 5.5078],
         [ 1.

  3%|▎         | 163/6235 [00:19<12:00,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.3474, -0.2573,  0.1848, -0.5747], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9522, -0.0221,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9262,  0.1116,  0.2951, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.2259,  0.0063,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0957],
         [ 0.1902],
         [ 0.5468],
         [ 0.9938],
         [ 2.7552],
         [ 2.0211],
         [ 2.7630],
         [ 4.7444],
         [ 5.0949],
         [ 5.0915],
         [-0.6310],
         [ 1.1521],
         [ 0.1361],
         [-0.7556],
         [-0.4564],
         [-0.2026],
         [ 0.6202],
         [-0.1032],
         [ 6.9866],
         [ 5.5321],
         [ 1.

  3%|▎         | 164/6235 [00:19<11:59,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.3214, -0.2451,  0.2006, -0.5719], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9782, -0.0226,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9522,  0.0957,  0.2862, -0.5223], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.2519,  0.0054,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1005],
         [ 0.1942],
         [ 0.5395],
         [ 0.9951],
         [ 2.7343],
         [ 2.0186],
         [ 2.7643],
         [ 4.7505],
         [ 5.0990],
         [ 5.0984],
         [-0.6151],
         [ 1.1692],
         [ 0.1474],
         [-0.7480],
         [-0.4510],
         [-0.1990],
         [ 0.6249],
         [-0.1030],
         [ 7.0033],
         [ 5.5509],
         [ 1.

  3%|▎         | 165/6235 [00:19<11:59,  8.44it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.2954, -0.2486,  0.1965, -0.5637], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9958, -0.0180,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9782,  0.1005,  0.2733, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.2779,  0.0073,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1266],
         [ 0.2139],
         [ 0.5467],
         [ 1.0080],
         [ 2.7236],
         [ 2.0253],
         [ 2.7711],
         [ 4.7587],
         [ 5.1062],
         [ 5.1113],
         [-0.5910],
         [ 1.1979],
         [ 0.1658],
         [-0.7373],
         [-0.4480],
         [-0.1993],
         [ 0.6252],
         [-0.1058],
         [ 7.0204],
         [ 5.5678],
         [ 1.

  3%|▎         | 166/6235 [00:19<12:01,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.2694, -0.2454,  0.1929, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9698, -0.0166,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9958,  0.1266,  0.2752, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.3039,  0.0040,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1327],
         [ 0.2175],
         [ 0.5429],
         [ 1.0040],
         [ 2.6987],
         [ 2.0213],
         [ 2.7711],
         [ 4.7637],
         [ 5.1100],
         [ 5.1184],
         [-0.5739],
         [ 1.2152],
         [ 0.1763],
         [-0.7307],
         [-0.4444],
         [-0.1966],
         [ 0.6296],
         [-0.1053],
         [ 7.0382],
         [ 5.5877],
         [ 1.

  3%|▎         | 167/6235 [00:19<12:11,  8.30it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.2434, -0.2404,  0.1955, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9438, -0.0198,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9698,  0.1327,  0.2781, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.3298,  0.0021,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1117],
         [ 0.2008],
         [ 0.5227],
         [ 0.9839],
         [ 2.6599],
         [ 2.0047],
         [ 2.7614],
         [ 4.7651],
         [ 5.1089],
         [ 5.1176],
         [-0.5692],
         [ 1.2154],
         [ 0.1732],
         [-0.7335],
         [-0.4439],
         [-0.1943],
         [ 0.6352],
         [-0.1037],
         [ 7.0561],
         [ 5.6094],
         [ 1.

  3%|▎         | 168/6235 [00:19<12:11,  8.30it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.2174, -0.2470,  0.1813, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.9178, -0.0212,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9438,  0.1117,  0.2835, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.3558, -0.0030,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0979],
         [ 0.1896],
         [ 0.5053],
         [ 0.9702],
         [ 2.6251],
         [ 1.9913],
         [ 2.7525],
         [ 4.7678],
         [ 5.1085],
         [ 5.1183],
         [-0.5647],
         [ 1.2178],
         [ 0.1698],
         [-0.7394],
         [-0.4474],
         [-0.1969],
         [ 0.6361],
         [-0.1059],
         [ 7.0751],
         [ 5.6304],
         [ 1.

  3%|▎         | 169/6235 [00:20<12:07,  8.33it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.1915, -0.2424,  0.1790, -0.5609], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.8918, -0.0254,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.9178,  0.0979,  0.2928, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.3818, -0.0062,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0928],
         [ 0.1847],
         [ 0.4930],
         [ 0.9633],
         [ 2.5965],
         [ 1.9832],
         [ 2.7469],
         [ 4.7723],
         [ 5.1094],
         [ 5.1212],
         [-0.5584],
         [ 1.2244],
         [ 0.1685],
         [-0.7455],
         [-0.4532],
         [-0.2025],
         [ 0.6342],
         [-0.1103],
         [ 7.0945],
         [ 5.6505],
         [ 1.

  3%|▎         | 170/6235 [00:20<12:04,  8.37it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.1655, -0.2274,  0.1954, -0.5664], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.8658, -0.0271,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.8918,  0.0928,  0.2860, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.4078, -0.0058,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1037],
         [ 0.1921],
         [ 0.4909],
         [ 0.9673],
         [ 2.5804],
         [ 1.9841],
         [ 2.7468],
         [ 4.7782],
         [ 5.1120],
         [ 5.1280],
         [-0.5458],
         [ 1.2394],
         [ 0.1728],
         [-0.7497],
         [-0.4619],
         [-0.2126],
         [ 0.6272],
         [-0.1178],
         [ 7.1118],
         [ 5.6671],
         [ 1.

  3%|▎         | 171/6235 [00:20<12:03,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.1395, -0.2164,  0.1952, -0.5609], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.8398, -0.0244,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.8658,  0.1037,  0.2740, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.4338, -0.0055,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1188],
         [ 0.2021],
         [ 0.4921],
         [ 0.9721],
         [ 2.5653],
         [ 1.9860],
         [ 2.7471],
         [ 4.7828],
         [ 5.1139],
         [ 5.1352],
         [-0.5297],
         [ 1.2571],
         [ 0.1792],
         [-0.7521],
         [-0.4706],
         [-0.2227],
         [ 0.6198],
         [-0.1253],
         [ 7.1266],
         [ 5.6818],
         [ 1.

  3%|▎         | 172/6235 [00:20<12:03,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.1135, -0.1927,  0.1937, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.8138, -0.0229,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.8398,  0.1188,  0.2750, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.4598, -0.0037,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.1116],
         [ 0.1942],
         [ 0.4799],
         [ 0.9591],
         [ 2.5331],
         [ 1.9752],
         [ 2.7396],
         [ 4.7834],
         [ 5.1116],
         [ 5.1358],
         [-0.5209],
         [ 1.2630],
         [ 0.1780],
         [-0.7573],
         [-0.4766],
         [-0.2280],
         [ 0.6179],
         [-0.1290],
         [ 7.1416],
         [ 5.7000],
         [ 1.

  3%|▎         | 173/6235 [00:20<12:01,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.0875, -0.1797,  0.1987, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7878, -0.0241,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.8138,  0.1116,  0.2763, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.4858, -0.0028,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0930],
         [ 0.1782],
         [ 0.4605],
         [ 0.9405],
         [ 2.4955],
         [ 1.9600],
         [ 2.7290],
         [ 4.7815],
         [ 5.1065],
         [ 5.1323],
         [-0.5169],
         [ 1.2626],
         [ 0.1722],
         [-0.7658],
         [-0.4836],
         [-0.2340],
         [ 0.6157],
         [-0.1330],
         [ 7.1547],
         [ 5.7173],
         [ 1.

  3%|▎         | 174/6235 [00:20<12:02,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.0615, -0.1887,  0.1890, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7619, -0.0238,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7878,  0.0930,  0.2805, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5118, -0.0014,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0824],
         [ 0.1694],
         [ 0.4463],
         [ 0.9305],
         [ 2.4660],
         [ 1.9516],
         [ 2.7224],
         [ 4.7807],
         [ 5.1022],
         [ 5.1306],
         [-0.5103],
         [ 1.2668],
         [ 0.1686],
         [-0.7748],
         [-0.4936],
         [-0.2442],
         [ 0.6093],
         [-0.1397],
         [ 7.1661],
         [ 5.7315],
         [ 1.

  3%|▎         | 175/6235 [00:20<12:02,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.0355, -0.1933,  0.1894, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7359, -0.0239,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7619,  0.0824,  0.2895, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5378, -0.0012,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0747],
         [ 0.1627],
         [ 0.4345],
         [ 0.9228],
         [ 2.4387],
         [ 1.9450],
         [ 2.7173],
         [ 4.7807],
         [ 5.0989],
         [ 5.1299],
         [-0.5019],
         [ 1.2733],
         [ 0.1665],
         [-0.7832],
         [-0.5038],
         [-0.2544],
         [ 0.6033],
         [-0.1461],
         [ 7.1793],
         [ 5.7468],
         [ 1.

  3%|▎         | 176/6235 [00:20<12:02,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000, -0.0095, -0.1915,  0.2044, -0.5637], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.7099, -0.0243,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7359,  0.0747,  0.2826, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5638,  0.0041,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0802],
         [ 0.1667],
         [ 0.4303],
         [ 0.9223],
         [ 2.4166],
         [ 1.9433],
         [ 2.7147],
         [ 4.7821],
         [ 5.0980],
         [ 5.1335],
         [-0.4886],
         [ 1.2869],
         [ 0.1697],
         [-0.7880],
         [-0.5133],
         [-0.2651],
         [ 0.5956],
         [-0.1534],
         [ 7.1933],
         [ 5.7609],
         [ 1.

  3%|▎         | 177/6235 [00:21<12:01,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.0165, -0.1827,  0.2011, -0.5609], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6839, -0.0195,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.7099,  0.0802,  0.2702, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.5898,  0.0125,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0986],
         [ 0.1812],
         [ 0.4351],
         [ 0.9270],
         [ 2.3989],
         [ 1.9467],
         [ 2.7153],
         [ 4.7845],
         [ 5.0996],
         [ 5.1413],
         [-0.4714],
         [ 1.3060],
         [ 0.1775],
         [-0.7891],
         [-0.5220],
         [-0.2761],
         [ 0.5867],
         [-0.1613],
         [ 7.2064],
         [ 5.7723],
         [ 1.

  3%|▎         | 178/6235 [00:21<12:02,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.0425, -0.1485,  0.1853, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6579, -0.0184,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6839,  0.0986,  0.2707, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.6158,  0.0186,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0977],
         [ 0.1798],
         [ 0.4304],
         [ 0.9157],
         [ 2.3673],
         [ 1.9404],
         [ 2.7106],
         [ 4.7835],
         [ 5.0980],
         [ 5.1437],
         [-0.4613],
         [ 1.3137],
         [ 0.1786],
         [-0.7920],
         [-0.5274],
         [-0.2814],
         [ 0.5843],
         [-0.1643],
         [ 7.2194],
         [ 5.7863],
         [ 1.

  3%|▎         | 179/6235 [00:21<12:00,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.0685, -0.1089,  0.1861, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6319, -0.0207,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6579,  0.0977,  0.2716, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.6418,  0.0238,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0781],
         [ 0.1634],
         [ 0.4124],
         [ 0.8907],
         [ 2.3217],
         [ 1.9229],
         [ 2.6983],
         [ 4.7792],
         [ 5.0923],
         [ 5.1392],
         [-0.4606],
         [ 1.3095],
         [ 0.1711],
         [-0.7999],
         [-0.5331],
         [-0.2853],
         [ 0.5843],
         [-0.1658],
         [ 7.2330],
         [ 5.8029],
         [ 1.

  3%|▎         | 180/6235 [00:21<12:08,  8.32it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.0945, -0.0951,  0.1979, -0.5554], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.6059, -0.0196,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6319,  0.0781,  0.2761, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.6678,  0.0257,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0587],
         [ 0.1473],
         [ 0.3894],
         [ 0.8663],
         [ 2.2749],
         [ 1.9030],
         [ 2.6830],
         [ 4.7737],
         [ 5.0843],
         [ 5.1325],
         [-0.4636],
         [ 1.3028],
         [ 0.1607],
         [-0.8112],
         [-0.5419],
         [-0.2932],
         [ 0.5800],
         [-0.1709],
         [ 7.2452],
         [ 5.8179],
         [ 1.

  3%|▎         | 181/6235 [00:21<12:35,  8.02it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.1205, -0.0989,  0.2146, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5799, -0.0188,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.6059,  0.0587,  0.2842, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.6937,  0.0234,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0461],
         [ 0.1371],
         [ 0.3697],
         [ 0.8488],
         [ 2.2344],
         [ 1.8867],
         [ 2.6697],
         [ 4.7690],
         [ 5.0771],
         [ 5.1271],
         [-0.4658],
         [ 1.2991],
         [ 0.1523],
         [-0.8218],
         [-0.5511],
         [-0.3026],
         [ 0.5737],
         [-0.1777],
         [ 7.2577],
         [ 5.8317],
         [ 1.

  3%|▎         | 182/6235 [00:21<12:21,  8.16it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.1465, -0.0970,  0.2310, -0.5582], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5539, -0.0183,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5799,  0.0461,  0.2772, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7197,  0.0187,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0538],
         [ 0.1433],
         [ 0.3662],
         [ 0.8471],
         [ 2.2109],
         [ 1.8827],
         [ 2.6648],
         [ 4.7676],
         [ 5.0746],
         [ 5.1289],
         [-0.4598],
         [ 1.3077],
         [ 0.1530],
         [-0.8266],
         [-0.5596],
         [-0.3128],
         [ 0.5653],
         [-0.1853],
         [ 7.2707],
         [ 5.8431],
         [ 1.

  3%|▎         | 183/6235 [00:21<12:11,  8.28it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.1724, -0.0716,  0.2320, -0.5526], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5279, -0.0141,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5539,  0.0538,  0.2625, -0.5251], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7457,  0.0190,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0700],
         [ 0.1565],
         [ 0.3728],
         [ 0.8515],
         [ 2.1965],
         [ 1.8855],
         [ 2.6652],
         [ 4.7668],
         [ 5.0744],
         [ 5.1344],
         [-0.4485],
         [ 1.3219],
         [ 0.1589],
         [-0.8268],
         [-0.5665],
         [-0.3219],
         [ 0.5573],
         [-0.1918],
         [ 7.2817],
         [ 5.8523],
         [ 1.

  3%|▎         | 184/6235 [00:21<12:03,  8.36it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.1984, -0.0246,  0.2273, -0.5471], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.5019, -0.0143,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5279,  0.0700,  0.2600, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7717,  0.0171,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0619],
         [ 0.1506],
         [ 0.3664],
         [ 0.8376],
         [ 2.1658],
         [ 1.8758],
         [ 2.6581],
         [ 4.7610],
         [ 5.0699],
         [ 5.1330],
         [-0.4460],
         [ 1.3219],
         [ 0.1558],
         [-0.8301],
         [-0.5704],
         [-0.3258],
         [ 0.5551],
         [-0.1936],
         [ 7.2915],
         [ 5.8632],
         [ 1.

  3%|▎         | 185/6235 [00:21<11:59,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.2244,  0.0210,  0.2257, -0.5444], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.4759, -0.0193,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.5019,  0.0619,  0.2623, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.7977,  0.0187,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[ 0.0253],
         [ 0.1211],
         [ 0.3356],
         [ 0.8065],
         [ 2.1290],
         [ 1.8569],
         [ 2.6441],
         [ 4.7515],
         [ 5.0624],
         [ 5.1252],
         [-0.4597],
         [ 1.3030],
         [ 0.1405],
         [-0.8392],
         [-0.5734],
         [-0.3274],
         [ 0.5542],
         [-0.1963],
         [ 7.2978],
         [ 5.8731],
         [ 1.

  3%|▎         | 186/6235 [00:22<11:57,  8.43it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.2504,  0.0447,  0.2337, -0.5416], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.4499, -0.0206,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.4759,  0.0253,  0.2664, -0.5278], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.8237,  0.0225,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0087],
         [ 0.0929],
         [ 0.3007],
         [ 0.7798],
         [ 2.0974],
         [ 1.8398],
         [ 2.6302],
         [ 4.7435],
         [ 5.0544],
         [ 5.1166],
         [-0.4782],
         [ 1.2826],
         [ 0.1228],
         [-0.8524],
         [-0.5813],
         [-0.3346],
         [ 0.5478],
         [-0.2032],
         [ 7.3050],
         [ 5.8827],
         [ 1.

  3%|▎         | 187/6235 [00:22<11:56,  8.45it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.2764,  0.0454,  0.2448, -0.5361], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.4239, -0.0213,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.4499, -0.0087,  0.2758, -0.5333], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.8497,  0.0188,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0329],
         [ 0.0723],
         [ 0.2715],
         [ 0.7619],
         [ 2.0724],
         [ 1.8271],
         [ 2.6188],
         [ 4.7372],
         [ 5.0475],
         [ 5.1099],
         [-0.4956],
         [ 1.2665],
         [ 0.1071],
         [-0.8662],
         [-0.5914],
         [-0.3445],
         [ 0.5388],
         [-0.2121],
         [ 7.3144],
         [ 5.8929],
         [ 1.

  3%|▎         | 188/6235 [00:22<11:55,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.3024,  0.0399,  0.2537, -0.5361], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3980, -0.0223,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.4239, -0.0329,  0.2673, -0.5333], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.8757,  0.0085,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0323],
         [ 0.0713],
         [ 0.2599],
         [ 0.7600],
         [ 2.0607],
         [ 1.8252],
         [ 2.6141],
         [ 4.7329],
         [ 5.0438],
         [ 5.1095],
         [-0.5027],
         [ 1.2637],
         [ 0.1004],
         [-0.8756],
         [-0.6029],
         [-0.3575],
         [ 0.5259],
         [-0.2231],
         [ 7.3230],
         [ 5.9000],
         [ 1.

  3%|▎         | 189/6235 [00:22<11:54,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.3284,  0.0536,  0.2529, -0.5306], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3720, -0.0181,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3980, -0.0323,  0.2513, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9017, -0.0027,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0225],
         [ 0.0771],
         [ 0.2571],
         [ 0.7622],
         [ 2.0544],
         [ 1.8281],
         [ 2.6131],
         [ 4.7284],
         [ 5.0411],
         [ 5.1118],
         [-0.5041],
         [ 1.2661],
         [ 0.0982],
         [-0.8812],
         [-0.6134],
         [-0.3701],
         [ 0.5129],
         [-0.2331],
         [ 7.3289],
         [ 5.9044],
         [ 1.

  3%|▎         | 190/6235 [00:22<11:53,  8.47it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.3544,  0.0872,  0.2466, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3460, -0.0153,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3720, -0.0225,  0.2489, -0.5306], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9277, -0.0173,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.0429],
         [ 0.0588],
         [ 0.2369],
         [ 0.7427],
         [ 2.0309],
         [ 1.8168],
         [ 2.6040],
         [ 4.7189],
         [ 5.0336],
         [ 5.1058],
         [-0.5153],
         [ 1.2520],
         [ 0.0859],
         [-0.8904],
         [-0.6203],
         [-0.3756],
         [ 0.5078],
         [-0.2374],
         [ 7.3340],
         [ 5.9122],
         [ 1.

  3%|▎         | 191/6235 [00:22<11:51,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.3804,  0.1049,  0.2518, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.3200, -0.0152,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3460, -0.0429,  0.2464, -0.5361], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9537, -0.0173,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-8.4769e-02],
         [ 2.2585e-02],
         [ 2.0123e-01],
         [ 7.1210e-01],
         [ 1.9921e+00],
         [ 1.7931e+00],
         [ 2.5861e+00],
         [ 4.7123e+00],
         [ 5.0281e+00],
         [ 5.0985e+00],
         [-5.3423e-01],
         [ 1.2269e+00],
         [ 6.4229e-02],
         [-9.0874e-01],
         [-6.3591e-01],
         [-3.9068e-01],
         [ 4.9157e-01],
     

  3%|▎         | 192/6235 [00:22<11:51,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.4064,  0.0908,  0.2663, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.2940, -0.0154,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.3200, -0.0848,  0.2434, -0.5388], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -1.0000, -0.9797, -0.0074,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1221],
         [-0.0100],
         [ 0.1651],
         [ 0.6874],
         [ 1.9572],
         [ 1.7711],
         [ 2.5673],
         [ 4.7057],
         [ 5.0212],
         [ 5.0905],
         [-0.5540],
         [ 1.2022],
         [ 0.0414],
         [-0.9304],
         [-0.6561],
         [-0.4114],
         [ 0.4695],
         [-0.2637],
         [ 7.3402],
         [ 5.9277],
         [ 1.

  3%|▎         | 193/6235 [00:22<11:51,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.4324,  0.0756,  0.2798, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.2680, -0.0158,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.2940, -0.1221,  0.2388, -0.5416], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.9943, -1.0000,  0.0018,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1454],
         [-0.0315],
         [ 0.1388],
         [ 0.6732],
         [ 1.9313],
         [ 1.7567],
         [ 2.5533],
         [ 4.6994],
         [ 5.0146],
         [ 5.0847],
         [-0.5666],
         [ 1.1863],
         [ 0.0249],
         [-0.9484],
         [-0.6757],
         [-0.4326],
         [ 0.4471],
         [-0.2791],
         [ 7.3404],
         [ 5.9309],
         [ 1.

  3%|▎         | 194/6235 [00:23<11:50,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.4584,  0.0840,  0.2914, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.2420, -0.0189,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.2680, -0.1454,  0.2281, -0.5499], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.9684, -1.0000,  0.0113,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1574],
         [-0.0441],
         [ 0.1223],
         [ 0.6661],
         [ 1.9128],
         [ 1.7483],
         [ 2.5440],
         [ 4.6927],
         [ 5.0088],
         [ 5.0807],
         [-0.5723],
         [ 1.1783],
         [ 0.0151],
         [-0.9613],
         [-0.6926],
         [-0.4513],
         [ 0.4274],
         [-0.2923],
         [ 7.3399],
         [ 5.9336],
         [ 1.

  3%|▎         | 195/6235 [00:23<11:50,  8.51it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.4844,  0.0962,  0.2880, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.2160, -0.0201,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.2420, -0.1574,  0.2121, -0.5526], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.9424, -1.0000,  0.0169,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.1640],
         [-0.0521],
         [ 0.1119],
         [ 0.6625],
         [ 1.9007],
         [ 1.7435],
         [ 2.5376],
         [ 4.6840],
         [ 5.0016],
         [ 5.0764],
         [-0.5751],
         [ 1.1731],
         [ 0.0083],
         [-0.9711],
         [-0.7073],
         [-0.4678],
         [ 0.4102],
         [-0.3033],
         [ 7.3363],
         [ 5.9335],
         [ 1.

  3%|▎         | 196/6235 [00:23<11:51,  8.49it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.5104,  0.0958,  0.2767, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1900, -0.0170,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.2160, -0.1640,  0.1987, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.9164, -1.0000,  0.0197,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-1.7754e-01],
         [-6.5515e-02],
         [ 9.9002e-02],
         [ 6.5419e-01],
         [ 1.8864e+00],
         [ 1.7357e+00],
         [ 2.5297e+00],
         [ 4.6729e+00],
         [ 4.9926e+00],
         [ 5.0697e+00],
         [-5.7917e-01],
         [ 1.1646e+00],
         [-4.0974e-05],
         [-9.8120e-01],
         [-7.2145e-01],
         [-4.8320e-01],
         [ 3.9414e-01],
     

  3%|▎         | 197/6235 [00:23<11:54,  8.46it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.5363,  0.0913,  0.2707, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1640, -0.0172,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1900, -0.1775,  0.1957, -0.5554], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8904, -1.0000,  0.0226,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2061],
         [-0.0921],
         [ 0.0755],
         [ 0.6344],
         [ 1.8643],
         [ 1.7192],
         [ 2.5162],
         [ 4.6587],
         [ 4.9802],
         [ 5.0573],
         [-0.5883],
         [ 1.1470],
         [-0.0144],
         [-0.9950],
         [-0.7369],
         [-0.4989],
         [ 0.3779],
         [-0.3213],
         [ 7.3185],
         [ 5.9288],
         [ 1.

  3%|▎         | 198/6235 [00:23<11:56,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.5623,  0.0777,  0.2678, -0.5223], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1380, -0.0171,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1640, -0.2061,  0.2014, -0.5582], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8644, -1.0000,  0.0320,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2422],
         [-0.1260],
         [ 0.0440],
         [ 0.6095],
         [ 1.8394],
         [ 1.6986],
         [ 2.4992],
         [ 4.6434],
         [ 4.9653],
         [ 5.0411],
         [-0.6035],
         [ 1.1229],
         [-0.0331],
         [-1.0111],
         [-0.7522],
         [-0.5139],
         [ 0.3623],
         [-0.3297],
         [ 7.3056],
         [ 5.9246],
         [ 1.

  3%|▎         | 199/6235 [00:23<11:56,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.5883,  0.0728,  0.2775, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.1120, -0.0137,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1380, -0.2422,  0.2067, -0.5692], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8384, -1.0000,  0.0364,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2731],
         [-0.1567],
         [ 0.0141],
         [ 0.5891],
         [ 1.8194],
         [ 1.6807],
         [ 2.4844],
         [ 4.6288],
         [ 4.9502],
         [ 5.0250],
         [-0.6195],
         [ 1.1003],
         [-0.0503],
         [-1.0263],
         [-0.7658],
         [-0.5271],
         [ 0.3489],
         [-0.3371],
         [ 7.2939],
         [ 5.9208],
         [ 1.

  3%|▎         | 200/6235 [00:23<11:57,  8.42it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.6143,  0.0685,  0.2908, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0860, -0.0161,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.1120, -0.2731,  0.1966, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.8124, -1.0000,  0.0356,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.8423e-01],
         [-1.7196e-01],
         [-1.8876e-03],
         [ 5.8297e-01],
         [ 1.8164e+00],
         [ 1.6733e+00],
         [ 2.4767e+00],
         [ 4.6141e+00],
         [ 4.9356e+00],
         [ 5.0121e+00],
         [-6.3008e-01],
         [ 1.0871e+00],
         [-6.0091e-02],
         [-1.0360e+00],
         [-7.7627e-01],
         [-5.3782e-01],
         [ 3.3827e-01],
     

  3%|▎         | 201/6235 [00:23<11:58,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.6403,  0.0660,  0.2855, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0600, -0.0169,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0860, -0.2842,  0.1714, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7864, -1.0000,  0.0344,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.6831e-01],
         [-1.6484e-01],
         [ 5.4406e-03],
         [ 5.9685e-01],
         [ 1.8368e+00],
         [ 1.6831e+00],
         [ 2.4817e+00],
         [ 4.6008e+00],
         [ 4.9248e+00],
         [ 5.0070e+00],
         [-6.2691e-01],
         [ 1.0910e+00],
         [-5.6721e-02],
         [-1.0367e+00],
         [-7.8424e-01],
         [-5.4774e-01],
         [ 3.2834e-01],
     

  3%|▎         | 202/6235 [00:24<11:58,  8.40it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.6663,  0.0836,  0.2732, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0341, -0.0127,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0600, -0.2683,  0.1613, -0.5719], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7604, -1.0000,  0.0321,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2686],
         [-0.1683],
         [ 0.0094],
         [ 0.6033],
         [ 1.8528],
         [ 1.6900],
         [ 2.4867],
         [ 4.5870],
         [ 4.9157],
         [ 5.0009],
         [-0.6245],
         [ 1.0899],
         [-0.0556],
         [-1.0371],
         [-0.7912],
         [-0.5564],
         [ 0.3195],
         [-0.3493],
         [ 7.2475],
         [ 5.8900],
         [ 1.

  3%|▎         | 203/6235 [00:24<11:58,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.6923,  0.1098,  0.2724, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000,  0.0081, -0.0152,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0341, -0.2686,  0.1598, -0.5802], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7344, -1.0000,  0.0286,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.9827e-01],
         [-1.9402e-01],
         [-5.2612e-03],
         [ 5.9325e-01],
         [ 1.8555e+00],
         [ 1.6846e+00],
         [ 2.4830e+00],
         [ 4.5720e+00],
         [ 4.9046e+00],
         [ 4.9886e+00],
         [-6.3537e-01],
         [ 1.0712e+00],
         [-6.7048e-02],
         [-1.0457e+00],
         [-8.0029e-01],
         [-5.6578e-01],
         [ 3.1002e-01],
     

  3%|▎         | 204/6235 [00:24<11:59,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.7183,  0.1149,  0.2753, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0179, -0.0153,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000,  0.0081, -0.2983,  0.1638, -0.5830], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.7084, -1.0000,  0.0307,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3241],
         [-0.2168],
         [-0.0217],
         [ 0.5880],
         [ 1.8643],
         [ 1.6813],
         [ 2.4787],
         [ 4.5591],
         [ 4.8937],
         [ 4.9768],
         [-0.6520],
         [ 1.0495],
         [-0.0831],
         [-1.0609],
         [-0.8163],
         [-0.5827],
         [ 0.2932],
         [-0.3590],
         [ 7.2110],
         [ 5.8666],
         [ 1.

  3%|▎         | 205/6235 [00:24<11:59,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.7443,  0.1083,  0.2825, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0439, -0.0135,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0179, -0.3241,  0.1748, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.6824, -1.0000,  0.0322,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3277],
         [-0.2224],
         [-0.0255],
         [ 0.5961],
         [ 1.8902],
         [ 1.6911],
         [ 2.4844],
         [ 4.5562],
         [ 4.8924],
         [ 4.9766],
         [-0.6593],
         [ 1.0412],
         [-0.0909],
         [-1.0740],
         [-0.8361],
         [-0.6059],
         [ 0.2701],
         [-0.3716],
         [ 7.1961],
         [ 5.8535],
         [ 1.

  3%|▎         | 206/6235 [00:24<11:56,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.7703,  0.1123,  0.2927, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0699, -0.0121,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0439, -0.3277,  0.1707, -0.5802], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.6564, -1.0000,  0.0291,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-2.9670e-01],
         [-1.9985e-01],
         [-1.3432e-03],
         [ 6.2498e-01],
         [ 1.9374e+00],
         [ 1.7217e+00],
         [ 2.5061e+00],
         [ 4.5594e+00],
         [ 4.8998e+00],
         [ 4.9886e+00],
         [-6.4780e-01],
         [ 1.0538e+00],
         [-8.3268e-02],
         [-1.0781e+00],
         [-8.5620e-01],
         [-6.3301e-01],
         [ 2.4279e-01],
     

  3%|▎         | 207/6235 [00:24<11:56,  8.41it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.7963,  0.1186,  0.2888, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.0959, -0.0041,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0699, -0.2967,  0.1549, -0.5775], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.6304, -1.0000,  0.0268,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2527],
         [-0.1672],
         [ 0.0319],
         [ 0.6587],
         [ 1.9866],
         [ 1.7556],
         [ 2.5299],
         [ 4.5622],
         [ 4.9082],
         [ 5.0037],
         [-0.6327],
         [ 1.0718],
         [-0.0719],
         [-1.0795],
         [-0.8761],
         [-0.6610],
         [ 0.2140],
         [-0.4024],
         [ 7.1637],
         [ 5.8175],
         [ 1.

  3%|▎         | 208/6235 [00:24<11:58,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.8223,  0.1322,  0.2810, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1219,  0.0052,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.0959, -0.2527,  0.1540, -0.5830], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.6045, -1.0000,  0.0244,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2441],
         [-0.1654],
         [ 0.0436],
         [ 0.6616],
         [ 2.0087],
         [ 1.7672],
         [ 2.5401],
         [ 4.5578],
         [ 4.9091],
         [ 5.0085],
         [-0.6328],
         [ 1.0688],
         [-0.0732],
         [-1.0842],
         [-0.8892],
         [-0.6771],
         [ 0.1970],
         [-0.4097],
         [ 7.1500],
         [ 5.8056],
         [ 1.

  3%|▎         | 209/6235 [00:24<11:58,  8.39it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.8483,  0.1511,  0.2830, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1479,  0.0140,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1219, -0.2441,  0.1601, -0.5830], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5785, -1.0000,  0.0227,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2711],
         [-0.1919],
         [ 0.0341],
         [ 0.6395],
         [ 2.0088],
         [ 1.7602],
         [ 2.5390],
         [ 4.5484],
         [ 4.9041],
         [ 5.0036],
         [-0.6485],
         [ 1.0449],
         [-0.0878],
         [-1.0942],
         [-0.8980],
         [-0.6857],
         [ 0.1881],
         [-0.4116],
         [ 7.1365],
         [ 5.7979],
         [ 1.

  3%|▎         | 210/6235 [00:24<12:03,  8.33it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.8743,  0.1506,  0.2874, -0.5223], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1739,  0.0119,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1479, -0.2711,  0.1713, -0.5830], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5525, -1.0000,  0.0246,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3074],
         [-0.2231],
         [ 0.0175],
         [ 0.6177],
         [ 2.0089],
         [ 1.7525],
         [ 2.5372],
         [ 4.5390],
         [ 4.8987],
         [ 4.9964],
         [-0.6700],
         [ 1.0140],
         [-0.1074],
         [-1.1080],
         [-0.9085],
         [-0.6968],
         [ 0.1760],
         [-0.4164],
         [ 7.1197],
         [ 5.7877],
         [ 1.

  3%|▎         | 211/6235 [00:25<12:01,  8.35it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9002,  0.1314,  0.2909, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.1999,  0.0120,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1739, -0.3074,  0.1871, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5265, -1.0000,  0.0264,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3280],
         [-0.2424],
         [ 0.0071],
         [ 0.6071],
         [ 2.0159],
         [ 1.7506],
         [ 2.5380],
         [ 4.5324],
         [ 4.8950],
         [ 4.9920],
         [-0.6905],
         [ 0.9886],
         [-0.1239],
         [-1.1211],
         [-0.9196],
         [-0.7097],
         [ 0.1618],
         [-0.4241],
         [ 7.1059],
         [ 5.7779],
         [ 1.

  3%|▎         | 212/6235 [00:25<12:01,  8.35it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9262,  0.1116,  0.2951, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.2259,  0.0063,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.1999, -0.3280,  0.1809, -0.5775], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.5005, -1.0000,  0.0248,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3243],
         [-0.2424],
         [ 0.0128],
         [ 0.6138],
         [ 2.0360],
         [ 1.7584],
         [ 2.5450],
         [ 4.5273],
         [ 4.8932],
         [ 4.9920],
         [-0.7030],
         [ 0.9770],
         [-0.1306],
         [-1.1276],
         [-0.9270],
         [-0.7199],
         [ 0.1503],
         [-0.4305],
         [ 7.0954],
         [ 5.7682],
         [ 1.

  3%|▎         | 213/6235 [00:25<12:03,  8.32it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9522,  0.0957,  0.2862, -0.5223], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.2519,  0.0054,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.2259, -0.3243,  0.1607, -0.5747], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.4745, -1.0000,  0.0216,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2981],
         [-0.2257],
         [ 0.0341],
         [ 0.6327],
         [ 2.0664],
         [ 1.7739],
         [ 2.5572],
         [ 4.5204],
         [ 4.8909],
         [ 4.9950],
         [-0.7047],
         [ 0.9789],
         [-0.1270],
         [-1.1267],
         [-0.9309],
         [-0.7269],
         [ 0.1421],
         [-0.4342],
         [ 7.0852],
         [ 5.7572],
         [ 1.

  3%|▎         | 214/6235 [00:25<12:04,  8.31it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9782,  0.1005,  0.2733, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.2779,  0.0073,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.2519, -0.2981,  0.1539, -0.5830], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.4485, -1.0000,  0.0185,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.2975],
         [-0.2310],
         [ 0.0408],
         [ 0.6298],
         [ 2.0790],
         [ 1.7742],
         [ 2.5605],
         [ 4.5063],
         [ 4.8817],
         [ 4.9894],
         [-0.7126],
         [ 0.9683],
         [-0.1296],
         [-1.1252],
         [-0.9280],
         [-0.7238],
         [ 0.1441],
         [-0.4298],
         [ 7.0732],
         [ 5.7490],
         [ 1.

  3%|▎         | 215/6235 [00:25<12:01,  8.34it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9958,  0.1266,  0.2752, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.3039,  0.0040,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.2779, -0.2975,  0.1533, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.4225, -1.0000,  0.0148,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3330],
         [-0.2640],
         [ 0.0264],
         [ 0.6056],
         [ 2.0771],
         [ 1.7600],
         [ 2.5552],
         [ 4.4866],
         [ 4.8663],
         [ 4.9733],
         [-0.7327],
         [ 0.9388],
         [-0.1436],
         [-1.1277],
         [-0.9213],
         [-0.7146],
         [ 0.1525],
         [-0.4205],
         [ 7.0560],
         [ 5.7402],
         [ 1.

  3%|▎         | 216/6235 [00:25<12:00,  8.35it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9698,  0.1327,  0.2781, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.3298,  0.0021,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3039, -0.3330,  0.1574, -0.5857], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3965, -1.0000,  0.0148,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3704],
         [-0.2975],
         [ 0.0079],
         [ 0.5847],
         [ 2.0823],
         [ 1.7486],
         [ 2.5508],
         [ 4.4677],
         [ 4.8498],
         [ 4.9554],
         [-0.7575],
         [ 0.9056],
         [-0.1608],
         [-1.1337],
         [-0.9171],
         [-0.7088],
         [ 0.1575],
         [-0.4140],
         [ 7.0353],
         [ 5.7273],
         [ 1.

  3%|▎         | 217/6235 [00:25<11:58,  8.38it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9438,  0.1117,  0.2835, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.3558, -0.0030,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3298, -0.3704,  0.1655, -0.5912], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3705, -1.0000,  0.0160,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-3.9428e-01],
         [-3.1906e-01],
         [-6.2843e-03],
         [ 5.7428e-01],
         [ 2.0982e+00],
         [ 1.7438e+00],
         [ 2.5492e+00],
         [ 4.4524e+00],
         [ 4.8356e+00],
         [ 4.9403e+00],
         [-7.8098e-01],
         [ 8.7800e-01],
         [-1.7528e-01],
         [-1.1405e+00],
         [-9.1579e-01],
         [-7.0687e-01],
         [ 1.5856e-01],
     

  3%|▎         | 218/6235 [00:25<11:58,  8.37it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.9178,  0.0979,  0.2928, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.3818, -0.0062,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3558, -0.3943,  0.1577, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3445, -1.0000,  0.0132,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-3.9227e-01],
         [-3.1853e-01],
         [-5.3774e-03],
         [ 5.8296e-01],
         [ 2.1363e+00],
         [ 1.7528e+00],
         [ 2.5551e+00],
         [ 4.4411e+00],
         [ 4.8257e+00],
         [ 4.9326e+00],
         [-7.9858e-01],
         [ 8.6295e-01],
         [-1.8195e-01],
         [-1.1439e+00],
         [-9.1555e-01],
         [-7.0762e-01],
         [ 1.5677e-01],
     

  4%|▎         | 219/6235 [00:26<11:58,  8.37it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.8918,  0.0928,  0.2860, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.4078, -0.0058,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.3818, -0.3923,  0.1417, -0.5609], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.3185, -1.0000,  0.0120,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3678],
         [-0.2999],
         [ 0.0100],
         [ 0.6050],
         [ 2.1875],
         [ 1.7712],
         [ 2.5665],
         [ 4.4313],
         [ 4.8188],
         [ 4.9312],
         [-0.8065],
         [ 0.8610],
         [-0.1800],
         [-1.1427],
         [-0.9160],
         [-0.7106],
         [ 0.1524],
         [-0.4138],
         [ 6.9849],
         [ 5.6794],
         [ 1.

  4%|▎         | 220/6235 [00:26<12:05,  8.29it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.8658,  0.1037,  0.2740, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.4338, -0.0055,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.4078, -0.3678,  0.1391, -0.5637], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.2925, -1.0000,  0.0102,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3724],
         [-0.3053],
         [ 0.0094],
         [ 0.6051],
         [ 2.2239],
         [ 1.7760],
         [ 2.5702],
         [ 4.4163],
         [ 4.8069],
         [ 4.9221],
         [-0.8223],
         [ 0.8450],
         [-0.1857],
         [-1.1429],
         [-0.9120],
         [-0.7062],
         [ 0.1555],
         [-0.4094],
         [ 6.9668],
         [ 5.6625],
         [ 1.

  4%|▎         | 221/6235 [00:26<12:31,  8.01it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.8398,  0.1188,  0.2750, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.4598, -0.0037,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.4338, -0.3724,  0.1397, -0.5637], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.2665, -1.0000,  0.0103,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-4.0122e-01],
         [-3.2894e-01],
         [-3.8298e-03],
         [ 5.9049e-01],
         [ 2.2508e+00],
         [ 1.7725e+00],
         [ 2.5692e+00],
         [ 4.3983e+00],
         [ 4.7920e+00],
         [ 4.9070e+00],
         [-8.4484e-01],
         [ 8.1738e-01],
         [-1.9862e-01],
         [-1.1461e+00],
         [-9.0671e-01],
         [-6.9913e-01],
         [ 1.6154e-01],
     

  4%|▎         | 222/6235 [00:26<12:17,  8.15it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.8138,  0.1116,  0.2763, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.4858, -0.0028,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.4598, -0.4012,  0.1409, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.2406, -1.0000,  0.0124,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4231],
         [-0.3449],
         [-0.0127],
         [ 0.5858],
         [ 2.2890],
         [ 1.7775],
         [ 2.5734],
         [ 4.3836],
         [ 4.7803],
         [ 4.8950],
         [-0.8644],
         [ 0.7942],
         [-0.2094],
         [-1.1501],
         [-0.9054],
         [-0.6984],
         [ 0.1609],
         [-0.3998],
         [ 6.9199],
         [ 5.6226],
         [ 1.

  4%|▎         | 223/6235 [00:26<12:08,  8.25it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.7878,  0.0930,  0.2805, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.5118, -0.0014,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.4858, -0.4231,  0.1540, -0.5719], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.2146, -1.0000,  0.0132,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4354],
         [-0.3542],
         [-0.0200],
         [ 0.5871],
         [ 2.3342],
         [ 1.7870],
         [ 2.5793],
         [ 4.3720],
         [ 4.7699],
         [ 4.8845],
         [-0.8849],
         [ 0.7734],
         [-0.2186],
         [-1.1539],
         [-0.9046],
         [-0.6986],
         [ 0.1588],
         [-0.3998],
         [ 6.8961],
         [ 5.6001],
         [ 1.

  4%|▎         | 224/6235 [00:26<12:01,  8.33it/s]

policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.7619,  0.0824,  0.2895, -0.5278], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.5378, -0.0012,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.5118, -0.4354,  0.1536, -0.5664], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.1886, -1.0000,  0.0113,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.4272],
         [-0.3459],
         [-0.0131],
         [ 0.6036],
         [ 2.3954],
         [ 1.8084],
         [ 2.5919],
         [ 4.3634],
         [ 4.7631],
         [ 4.8798],
         [-0.8994],
         [ 0.7635],
         [-0.2206],
         [-1.1531],
         [-0.9028],
         [-0.6991],
         [ 0.1562],
         [-0.4010],
         [ 6.8741],
         [ 5.5756],
         [ 1.

  4%|▎         | 225/6235 [00:26<11:55,  8.40it/s]


policy_in_past shape: torch.Size([1, 50, 6])
policy_in_future shape: torch.Size([1, 50, 6])
policy_in_past_first component: tensor([-0.8000, -1.0000,  0.7359,  0.0747,  0.2826, -0.5251], device='cuda:0')
policy_in_future_first component: tensor([-0.8000, -1.0000, -0.5638,  0.0041,  0.7506,  0.2636], device='cuda:0')
policy_in_past_last component: tensor([-0.8000, -1.0000, -0.5378, -0.4272,  0.1393, -0.5637], device='cuda:0')
policy_in_future_last component: tensor([-0.8000, -0.1626, -1.0000,  0.0070,  0.7506,  0.2636], device='cuda:0')
u_pred shape: torch.Size([1, 50, 1])
u_pred: tensor([[[-0.3992],
         [-0.3189],
         [ 0.0106],
         [ 0.6346],
         [ 2.4757],
         [ 1.8434],
         [ 2.6126],
         [ 4.3546],
         [ 4.7583],
         [ 4.8798],
         [-0.9069],
         [ 0.7635],
         [-0.2158],
         [-1.1484],
         [-0.9008],
         [-0.6997],
         [ 0.1531],
         [-0.4012],
         [ 6.8511],
         [ 5.5453],
         [ 1.

KeyboardInterrupt: 