# Read memory from .npz file
* Read each subfile as a key in the memory dictionary.
* Do some statistics on the data

In [1]:
import numpy as np
np.random.seed(0)

loading = np.load("/sessions/session_20200914233251_objective_mccarthy/memsnapshot/checkpoint-1000/demo.npz")

buffer = {}
for key in loading.files:
    buffer[key] = loading[key]
    print(f"Key: {key} was added with dtype '{buffer[key].dtype}' and shape '{buffer[key].shape}'")


Key: /observations/agent/position was added with dtype 'float64' and shape '(20, 1000000, 25)'
Key: /observations/agent/velocity was added with dtype 'float64' and shape '(20, 1000000, 23)'
Key: /observations/agent/rel_obj_hand was added with dtype 'float64' and shape '(20, 1000000, 1, 3)'
Key: /observations/agent/rel_obj_hand_dist was added with dtype 'float64' and shape '(20, 1000000)'
Key: /observations/agent/distance2 was added with dtype 'float64' and shape '(20, 1000000)'
Key: /observations/agent/closure was added with dtype 'float64' and shape '(20, 1000000)'
Key: /observations/demonstrator/distance was added with dtype 'float32' and shape '(20, 1000000)'
Key: /observations/demonstrator/hand_closure was added with dtype 'float32' and shape '(20, 1000000)'
Key: /observations/status/is_training was added with dtype 'uint8' and shape '(20, 1000000)'
Key: /observations/parameters/initial_closure was added with dtype 'float64' and shape '(20, 1000000)'
Key: /observations/parameters/c

In [2]:
#$

l = buffer["/infos/episode/l"]
l_where = np.where(l>0)

r = buffer["/infos/episode/r"]
r_where = np.where(r==r)

masks = buffer["/masks"]
masks_where = np.where(masks==0)

rewards = buffer["/rewards"]

assert len(masks_where[0])==len(l_where[0])
assert len(masks_where[0])==len(r_where[0])

n_episodes = len(masks_where[0])
n_workers = len(masks)

print(f"minimum length: {np.min(l[l_where])}; maximum length: {np.max(l[l_where])}; mean: {np.mean(l[l_where])}, std: {np.std(l[l_where])}")
print(f"minimum reward: {np.min(r[r_where])}; maximum reward: {np.max(r[r_where])}; mean: {np.mean(r[r_where])}, std: {np.std(r[r_where])}")


print(f"l:{l.shape}, r:{r.shape}, masks:{masks.shape}")

l_where_dict = {}
r_where_dict = {}
masks_where_dict = {}

for i in range(n_workers):
    l_where_dict[i] = np.where(l[i]>0)[0]
    r_where_dict[i] = np.where(r[i]==r[i])[0]
    masks_where_dict[i] = np.where(masks[i]==0)[0]


minimum length: 13; maximum length: 398; mean: 195.9647228292291, std: 47.97906584503823
minimum reward: 0.0; maximum reward: 20.0; mean: 17.44121941420298, std: 6.648857705611155
l:(20, 1000000), r:(20, 1000000), masks:(20, 1000000, 1)


# Several interesting problems
1. From states, predict actions! (*only for successful trajectories*)
2. From a sequence, predict the reward.
3. From a stack of states, predict the velocities.
4. From state and action, predict next state! (This is the environment model).


## For problem 1: $a = \pi(s)$

> We only consider cases where: Rewards == 20

In [3]:
# Remove index 0 of all datasets (which happens to be nan for /rewards for some reason.)
# Find the index of last completed trajectory in each worker. Call them ji's.
# Combine the first two indices by stacking [1:j1, 1:j2, 1:j3, ...] for all workers.
# Now all of the buffer arrays are the same size. The first index i for the stacked trajectories.

# Get the ends of trajectories
# masks_where = np.where(buffer["/masks"]==0)[:2]

# Set START=1 to remove the first nan elements in the buffer["/rewards"]
START=1
# Fixing episode lengths because of START=1
for i in range(n_workers):
    buffer["/infos/episode/l"][i,l_where_dict[i][0]] -= START


# Find last valid index of each worker.
masks_where_end_dict={}
for i in range(n_workers):
    masks_where_end_dict[i] = masks_where_dict[i][-1]

for i in range(1, 20):
    buffer["/masks"][i,START,0] = 0

# Now combine the first two axes by stacking them.
# Do it for all keys
stacks = {}
keys = list(buffer.keys())
for key in keys:
    shape = buffer[key].shape
    stack = [buffer[key][n,START:masks_where_end_dict[n]] for n in range(n_workers)]
    buffer[key] = np.concatenate(stack)
    print(f"Key: {key} change from '{shape}' to '{buffer[key].shape}'")

n_length = buffer["/masks"].shape[0]

Key: /observations/agent/position change from '(20, 1000000, 25)' to '(19997984, 25)'
Key: /observations/agent/velocity change from '(20, 1000000, 23)' to '(19997984, 23)'
Key: /observations/agent/rel_obj_hand change from '(20, 1000000, 1, 3)' to '(19997984, 1, 3)'
Key: /observations/agent/rel_obj_hand_dist change from '(20, 1000000)' to '(19997984,)'
Key: /observations/agent/distance2 change from '(20, 1000000)' to '(19997984,)'
Key: /observations/agent/closure change from '(20, 1000000)' to '(19997984,)'
Key: /observations/demonstrator/distance change from '(20, 1000000)' to '(19997984,)'
Key: /observations/demonstrator/hand_closure change from '(20, 1000000)' to '(19997984,)'
Key: /observations/status/is_training change from '(20, 1000000)' to '(19997984,)'
Key: /observations/parameters/initial_closure change from '(20, 1000000)' to '(19997984,)'
Key: /observations/parameters/controller_gain change from '(20, 1000000)' to '(19997984,)'
Key: /observations/parameters/controller_thre c

In [4]:
# Test time correctness
time_scale_offset = 0.5 # 1.0
time_scale_factor = 2.5 # 2.0
time_noise_factor = 0.8


noise = buffer["/infos/rand/time_noise_normal"] * time_noise_factor
T = buffer["/infos/rand/original_time"] * (time_scale_factor) + (time_scale_offset + noise)

assert np.linalg.norm(buffer["/infos/rand/randomized_time"] - T) < 1e-8

In [5]:
#$

assert len(np.where(buffer["/masks"]==0)[0])+1 == n_episodes, "Number of episodes after concatenation should be the same as number of episodes before concatenation."
assert len(np.where(buffer["/infos/episode/r"]==buffer["/infos/episode/r"])[0]) == n_episodes
assert len(np.where(buffer["/infos/episode/l"]>0)[0]) == n_episodes

In [6]:
#$

# After "combination", figure out the new whereabouts.
l_c = buffer["/infos/episode/l"]
l_where_c = np.where(l_c>0)[0]

r_c = buffer["/infos/episode/r"]
r_where_c = np.where(r_c==r_c)[0]
r20_where_c = np.where(r_c==20)[0]

masks_c = buffer["/masks"]
masks_where_c = np.where(masks_c==0)[0]
masks_where_c = np.concatenate([masks_where_c, [n_length]])

rewards_c = buffer["/rewards"]



In [7]:
#$

# Check if all sets are the same.

assert len(masks_where_c)==len(l_where_c)
assert len(masks_where_c)==len(r_where_c)

assert len(set(masks_where_c-1) - set(l_where_c)) == 0
assert len(set(l_where_c) - set(masks_where_c-1)) == 0

assert len(set(masks_where_c-1) - set(r_where_c)) == 0
assert len(set(r_where_c) - set(masks_where_c-1)) == 0

In [8]:
#$

# Another test here: r (sum of rewards at the end of each episode) should match stepwise rewards.
# r == sum(rewards[ending-20:ending])

for e in range(n_episodes):
    length = l_c[l_where_c[e]]
    reward = r_c[r_where_c[e]]
    ending = masks_where_c[e]
    
    reward_arr = buffer["/rewards"][ending-min(20,length-1):ending]
    
    assert sum(reward_arr)==reward, f"Reward discripancy for episode={e}, length={length}, reward={reward}, index={ending}"



### Create another column with trajectory index

In [9]:
assert n_episodes == len(l_where_c)

buffer["/infos/episode/i"] = np.empty_like(buffer["/infos/episode/r"], dtype=np.int)
buffer["/infos/episode/timestep"] = np.empty_like(buffer["/infos/episode/r"], dtype=np.int)

j = 0
for i in range(n_episodes):
    timestep = 0
    while j <= l_where_c[i]:
        buffer["/infos/episode/i"][j] = i
        buffer["/infos/episode/timestep"][j] = timestep
        timestep+=1
        j+=1

In [10]:
# Test: Shouldn't l_c[l_where_c[1]]==l_where_c[1]-l_where_c[0]
arrr=0
for i in range(1,n_episodes):
    assert l_c[l_where_c[i]] == (l_where_c[i]-l_where_c[i-1]), f"In {i}, {l_c[l_where_c[i]]} is not equal to {l_where_c[i]-l_where_c[i-1]}."
    # arrr += np.abs(l_c[l_where_c[i]] - (l_where_c[i]-l_where_c[i-1]))

### Create time to reach column

In [11]:
# Get start_time, final_time, reach_time from the .json files.
# Then convert reach time to timesteps, and store that along other keys.


# First create a dictionary for all files
import json, glob, os

meta_dict = {}
files = sorted(glob.glob("../extracts/*.json"))
for file in files:
    name = os.path.splitext(os.path.split(file)[1])[0]
    
    with open(f'../extracts/{name}.json') as f:
        lines = f.readlines()
        start_time = json.loads(lines[0])["t"]
        total_time = json.loads(lines[-1])["t"] - start_time
    with open(f'../extracts/meta/{name}_meta.json') as f:
        reach_time = dict(json.load(f))['reached'] - start_time
    
    meta_dict[name] = {"start_time":start_time, "reach_time":reach_time, "total_time":total_time}


# Iterate on each entry, add "reach timestep" to 
CONTROLLER_STEP = 0.02
noise = buffer["/infos/rand/time_noise_normal"] * time_noise_factor
buffer["/infos/episode/reach_timestep"] = np.empty((n_length,))


for i in range(n_length):
    reach_time = meta_dict[buffer["/infos/rand/filename"][i]]["reach_time"]
    T = reach_time*time_scale_factor + (time_scale_offset+noise[i])
    buffer["/infos/episode/reach_timestep"][i] = int(T / CONTROLLER_STEP)


In [12]:
# buffer["/infos/episode/l"]

## Dataset creation

Identify which trajectories where successful, i.e. they had r=20 at the end. Then, use data from those trajectories 

In [13]:
# Create a list of acceptable indices to sample from:

indices = []
for i in range(len(r20_where_c)):
    e = r20_where_c[i]
    s = e - (l_c[e]-1)
    assert r_c[e] == 20
    
    indices += list(range(s+1,e+2))

assert sum(l_c[r20_where_c]) == len(indices), "Sum of all valid lengths (with r==20) must be equal to the number of all valid indices."

# BUG: We remove very last element since that element probably does not exist.
indices = indices[:-1]



In [14]:
#@

# Split data to training, testing, and validating data.
np.random.seed(0)
np.random.shuffle(indices)
N = len(indices)

N80 = int(0.80 * N)

training, test = indices[:N80], indices[N80:]

In [15]:
# # states
# x = 

# # actions
# y = 

# # Try to find model f
# # Can be BN, MLP, BN, MLP, BN, ...
# y = f(x)

In [16]:
import torch

# TODO: Sets the number of OpenMP threads used for parallelizing CPU operations
# torch.set_num_threads(1)
        
## GPU
cuda_available = torch.cuda.is_available()
if cuda_available: # and use_gpu:
    print("GPU available. Using 1 GPU.")
    device = torch.device("cuda")
    # dtype = torch.cuda.FloatTensor
    # dtypelong = torch.cuda.LongTensor
else:
    print("Using CPUs.")
    device = torch.device("cpu")
    # dtype = torch.FloatTensor
    # dtypelong = torch.LongTensor

# model = ?

GPU available. Using 1 GPU.


In [17]:
# import torch.nn as nn
# # import torch.distributions as distributions

# class ModelClass(nn.Module):
#     def __init__(self, state_size, hidden_size, action_size, action_scale, init_w=3e-3):
#         super(ModelClass, self).__init__()
#        
#         self.linear1 = nn.Linear(state_size,  hidden_size)
#         self.bn1     = nn.BatchNorm1d(num_features=hidden_size)
#         self.linear2_1 = nn.Linear(hidden_size, hidden_size)
#         self.bn2_1     = nn.BatchNorm1d(num_features=hidden_size)
#         self.linear2_2 = nn.Linear(hidden_size, hidden_size)
#         self.bn2_2     = nn.BatchNorm1d(num_features=hidden_size)
#         self.linear2_3 = nn.Linear(hidden_size, hidden_size)
#         self.bn2_3     = nn.BatchNorm1d(num_features=hidden_size)
#         self.linear2_4 = nn.Linear(hidden_size, hidden_size)
#         self.bn2_4     = nn.BatchNorm1d(num_features=hidden_size)
#         self.linear3 = nn.Linear(hidden_size, action_size)
#        
#         self.linear3.weight.data.uniform_(-init_w, init_w)
#         self.linear3.bias.data.uniform_(-init_w, init_w)
#        
#         self.action_scale = action_scale
#        
#     def forward(self, state):
#         # x = torch.cat([state, action], 1)
#         x = state
#         x = torch.relu(self.bn1(self.linear1(x)))
#         x = torch.relu(self.bn2_1(self.linear2_1(x)))
#         x = torch.relu(self.bn2_2(self.linear2_2(x)))
#         x = torch.relu(self.bn2_3(self.linear2_3(x)))
#         x = torch.relu(self.bn2_4(self.linear2_4(x)))
#         x = torch.tanh(self.linear3(x)) * self.action_scale
#         return x

In [20]:
import torch.nn as nn

class ModelClass(nn.Module):
    def __init__(self, state_size, hidden_size, action_size, action_scale, init_w=3e-3):
        super(ModelClass, self).__init__()
        
        self.linear1 = nn.Linear(state_size,  hidden_size)
        self.linear2 = nn.Linear(hidden_size, hidden_size)
        self.linear3 = nn.Linear(hidden_size, action_size)
        
        self.linear3.weight.data.uniform_(-init_w, init_w)
        self.linear3.bias.data.uniform_(-init_w, init_w)
        
        self.action_scale = action_scale
        
    def forward(self, state):
        # x = torch.cat([state, action], 1)
        x = state
        x = torch.relu(self.linear1(x))
        x = torch.relu(self.linear2(x))
        # x = torch.tanh(self.linear3(x)) * self.action_scale
        x = self.linear3(x)
        return x

In [26]:
# Run this block again to reset the model and optimizer.

from torch.optim import Adam
import torch.nn as nn

model = ModelClass(state_size=48, hidden_size=256, action_size=1, action_scale=10)
# # Multi-GPU
# if torch.cuda.device_count() >= 1:
#     gpu_count = torch.cuda.device_count()
#     model = nn.DataParallel(model)
# else:
#     gpu_count = 0
model = model.to(device)

optimizer = Adam(model.parameters(),
                 lr=0.0003,
                 betas=(0.9, 0.999),
                 eps=1e-08)
lossfn = nn.MSELoss()


In [None]:
from tqdm.auto import tqdm, trange


n_epochs = 100
batch_size = 32
best_loss = np.infty

for e in trange(n_epochs, desc="epoch"):
    # Train
    model.train() # model.training = true
    np.random.shuffle(training)
    n_batches = len(training) // batch_size
    training_loss = 0
    for b in trange(n_batches, desc="train", leave=False):
        # Select data for that batch
        batch_indices = training[b*batch_size:(b+1)*batch_size]
        
        position_np = buffer["/observations/agent/position"][batch_indices]
        velocity_np = buffer["/observations/agent/velocity"][batch_indices]
        
        timestep_to_reach = buffer["/infos/episode/reach_timestep"][batch_indices] - buffer["/infos/episode/timestep"][batch_indices]
        # timestep_to_reach = np.clip(timestep_to_reach, 0, np.infty)
        # timestep = buffer["/infos/episode/timestep"][batch_indices]
        # actions_np  = buffer["/agents/demonstrator/actions"][batch_indices]
        
        states  = torch.from_numpy(np.concatenate([position_np,velocity_np], axis=1)).to(device).float()
        # states  = torch.from_numpy(np.concatenate([position_np,velocity_np,timestep_to_reach.reshape(-1,1)], axis=1)).to(device).float()
        # action_desired = torch.from_numpy(actions_np).to(device).float()
        timestep_to_reach_desired = torch.from_numpy(timestep_to_reach.reshape(-1,1)).to(device).float()
        
        # Forward
        # action_model = model(states)
        timestep_to_reach_model = model(states)
        

        # loss = lossfn(action_model, action_desired)
        loss = lossfn(timestep_to_reach_model, timestep_to_reach_desired)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Accumulate loss
        training_loss += loss.item()
        
    training_loss = training_loss / n_batches
    
    
    # Test
    model.eval() # model.training = true
    n_batches = len(test) // batch_size
    test_loss = 0
    for b in trange(n_batches, desc="test", leave=False):
        # Select data for that batch
        batch_indices = test[b*batch_size:(b+1)*batch_size]
        
        position_np = buffer["/observations/agent/position"][batch_indices]
        velocity_np = buffer["/observations/agent/velocity"][batch_indices]
        
        timestep_to_reach = buffer["/infos/episode/reach_timestep"][batch_indices] - buffer["/infos/episode/timestep"][batch_indices]
        # timestep_to_reach = np.clip(timestep_to_reach, 0, np.infty)
        # timestep = buffer["/infos/episode/timestep"][batch_indices]
        # actions_np  = buffer["/agents/demonstrator/actions"][batch_indices]
        
        states  = torch.from_numpy(np.concatenate([position_np,velocity_np], axis=1)).to(device).float()
        # states  = torch.from_numpy(np.concatenate([position_np,velocity_np,timestep_to_reach.reshape(-1,1)], axis=1)).to(device).float()
        # action_desired = torch.from_numpy(actions_np).to(device).float()
        timestep_to_reach_desired = torch.from_numpy(timestep_to_reach.reshape(-1,1)).to(device).float()
        
        # Forward
        # action_model = model(states)
        timestep_to_reach_model = model(states)

        # loss = lossfn(action_model, action_desired)
        loss = lossfn(timestep_to_reach_model, timestep_to_reach_desired)
        
        # Accumulate loss
        test_loss += loss.item()
    
    test_loss = test_loss / n_batches
    
    print(f"Epoch {e}: mean training loss: {training_loss}, mean test loss: {test_loss}.")
    
    if test_loss < best_loss:
        torch.save(model.state_dict(), f"backups/model_epoch_{e}_{test_loss:5.4f}.pt")
        best_loss = test_loss
    
    
    
        
    # Compute test error
    # Report
    # Early stopping?
    
    
    
        
        
        


HBox(children=(FloatProgress(value=0.0, description='epoch', style=ProgressStyle(description_width='initial'))…

HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 0: mean training loss: 669.3898627001416, mean test loss: 615.1193231523549.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 1: mean training loss: 597.3613022326542, mean test loss: 591.2537610947785.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 2: mean training loss: 561.8447444241226, mean test loss: 545.3397166098239.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 3: mean training loss: 531.4071127691562, mean test loss: 513.9820012241202.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 4: mean training loss: 507.5213515263482, mean test loss: 505.60889269498705.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 5: mean training loss: 489.32623681886224, mean test loss: 481.7004718047287.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 6: mean training loss: 475.2986189835718, mean test loss: 471.6685497785677.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 7: mean training loss: 464.07365796945163, mean test loss: 457.38928273291185.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 8: mean training loss: 454.9487663131359, mean test loss: 451.3026339444136.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 9: mean training loss: 447.30970026039245, mean test loss: 442.03248277181825.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 10: mean training loss: 440.5202696268419, mean test loss: 440.19873671964365.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 11: mean training loss: 434.67735921226904, mean test loss: 431.8451109398388.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 12: mean training loss: 429.6549274264254, mean test loss: 428.7238286994273.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 13: mean training loss: 425.12217884940276, mean test loss: 428.99439795117365.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 14: mean training loss: 421.2320494634175, mean test loss: 421.04393577467255.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 15: mean training loss: 417.61511850685724, mean test loss: 422.4420544574792.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 16: mean training loss: 414.33846445484454, mean test loss: 412.80674678454443.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 17: mean training loss: 411.356299991751, mean test loss: 410.5467989562815.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 18: mean training loss: 408.50835377178254, mean test loss: 407.3856372493743.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 19: mean training loss: 406.11445940154994, mean test loss: 403.3074453091355.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 20: mean training loss: 403.60852285660735, mean test loss: 404.80274984376274.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 21: mean training loss: 401.4311967461058, mean test loss: 399.8865248934848.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 22: mean training loss: 399.2846495363916, mean test loss: 398.1051475465878.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 23: mean training loss: 397.3357981723619, mean test loss: 399.23593828807395.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 24: mean training loss: 395.41121865088843, mean test loss: 393.4779213532146.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 25: mean training loss: 393.74351408177836, mean test loss: 395.45768286344435.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 26: mean training loss: 392.0108621205754, mean test loss: 393.58919943440384.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 27: mean training loss: 390.3658702700307, mean test loss: 390.9661431485026.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 28: mean training loss: 388.7667908389406, mean test loss: 392.067216673699.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 29: mean training loss: 387.234570711633, mean test loss: 389.9041182079829.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 30: mean training loss: 385.8300891387592, mean test loss: 386.8468741119979.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 31: mean training loss: 384.374081702025, mean test loss: 388.5620158085782.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 32: mean training loss: 382.9737943839834, mean test loss: 385.2880447288824.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 33: mean training loss: 381.58847016111787, mean test loss: 383.3570940465567.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 34: mean training loss: 380.2959430494822, mean test loss: 389.5408929036572.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 35: mean training loss: 379.14239685099864, mean test loss: 382.768073797514.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 36: mean training loss: 378.0377136811869, mean test loss: 383.9677690129743.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 37: mean training loss: 376.9503850963234, mean test loss: 382.86929267106564.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 38: mean training loss: 375.89942583189514, mean test loss: 379.4347790778594.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 39: mean training loss: 374.8082454703827, mean test loss: 381.437217706981.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 40: mean training loss: 373.896304534009, mean test loss: 379.34785172470527.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 41: mean training loss: 372.9410151299456, mean test loss: 371.7237384073822.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 42: mean training loss: 371.95958847239064, mean test loss: 377.8011823777306.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 43: mean training loss: 371.2045526689688, mean test loss: 376.6627738624377.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 44: mean training loss: 370.26298123205476, mean test loss: 370.18388332663625.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 45: mean training loss: 369.4280516637682, mean test loss: 372.0205866548287.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 46: mean training loss: 368.5718911011692, mean test loss: 370.81902208157476.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 47: mean training loss: 367.79610082127857, mean test loss: 370.5145989888802.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 48: mean training loss: 367.00273789229834, mean test loss: 367.13163214855456.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 49: mean training loss: 366.2913554261541, mean test loss: 364.6909479169562.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 50: mean training loss: 365.59668927155246, mean test loss: 367.7599131542423.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 51: mean training loss: 364.8554548244694, mean test loss: 367.944422907915.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 52: mean training loss: 364.1448656291263, mean test loss: 366.60312199662053.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 53: mean training loss: 363.41683332015225, mean test loss: 368.2850138164402.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 54: mean training loss: 362.81401925038523, mean test loss: 363.1391594773339.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 55: mean training loss: 362.23466910514077, mean test loss: 362.3516581275185.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 56: mean training loss: 361.55469052166285, mean test loss: 365.0061685726399.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 57: mean training loss: 360.83086852084375, mean test loss: 362.7267028499851.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 58: mean training loss: 360.2680415493978, mean test loss: 361.562052305409.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 59: mean training loss: 359.7883895115245, mean test loss: 360.56512317479246.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 60: mean training loss: 359.0920400687358, mean test loss: 365.1204759547902.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 61: mean training loss: 358.6121934440178, mean test loss: 358.8036530795301.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 62: mean training loss: 357.98764268527583, mean test loss: 360.59887242207077.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 63: mean training loss: 357.4373086031283, mean test loss: 360.09664601841894.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 64: mean training loss: 356.84166622555756, mean test loss: 361.4400269213172.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 65: mean training loss: 356.30342664789276, mean test loss: 361.1105027808349.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 66: mean training loss: 355.7103000082272, mean test loss: 356.7855037566078.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 67: mean training loss: 355.23802309085215, mean test loss: 360.8485196348464.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 68: mean training loss: 354.6533233938818, mean test loss: 357.4173564223061.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 69: mean training loss: 354.17623383533146, mean test loss: 355.4100178149195.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 70: mean training loss: 353.72061416003817, mean test loss: 358.03366175306417.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 71: mean training loss: 353.2244150983765, mean test loss: 360.98176870855673.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 72: mean training loss: 352.76320864928437, mean test loss: 355.3506708361655.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 73: mean training loss: 352.2538003054101, mean test loss: 355.0620670637444.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 74: mean training loss: 351.72007343695464, mean test loss: 359.2027822446867.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 75: mean training loss: 351.2446717916246, mean test loss: 352.7633837630972.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 76: mean training loss: 350.8221548599178, mean test loss: 355.8616757612785.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 77: mean training loss: 350.3637866420396, mean test loss: 350.8342522696917.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 78: mean training loss: 349.99282295992066, mean test loss: 352.94440420612653.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 79: mean training loss: 349.48486810832406, mean test loss: 351.79901592070587.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 80: mean training loss: 349.12988300938633, mean test loss: 358.845003072314.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 81: mean training loss: 348.7011404922418, mean test loss: 353.3009596482562.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 82: mean training loss: 348.35516378073635, mean test loss: 354.09866687245676.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 83: mean training loss: 347.94695204575584, mean test loss: 353.6347113253677.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 84: mean training loss: 347.551412615298, mean test loss: 349.57914672151674.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 85: mean training loss: 347.15371126803495, mean test loss: 350.1118971271944.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 86: mean training loss: 346.7396738655335, mean test loss: 348.71020592405097.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 87: mean training loss: 346.49630570530275, mean test loss: 348.0738159268258.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 88: mean training loss: 346.1240008350358, mean test loss: 348.57337771703516.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 89: mean training loss: 345.82464307929496, mean test loss: 345.10827258305284.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 90: mean training loss: 345.3653421031, mean test loss: 350.8885994450519.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 91: mean training loss: 344.9236257436695, mean test loss: 348.47738517780067.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 92: mean training loss: 344.6785895863906, mean test loss: 349.13051505703106.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 93: mean training loss: 344.36463769996107, mean test loss: 346.4527169017321.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 94: mean training loss: 344.00626842116935, mean test loss: 352.8216781743611.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 95: mean training loss: 343.7162856026125, mean test loss: 346.50027424906443.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

HBox(children=(FloatProgress(value=0.0, description='test', max=112584.0, style=ProgressStyle(description_widt…

Epoch 96: mean training loss: 343.341228596261, mean test loss: 347.6430857663875.


HBox(children=(FloatProgress(value=0.0, description='train', max=450336.0, style=ProgressStyle(description_wid…

In [22]:
timestep_to_reach

array([-13., -79.,  22.,  76., -11.,  44., 117.,  73., 135.,  92.,  88.,
        69., -36., -52., 123., -48.,  -4., 125., -42., -12.,  19.,  37.,
       166.,  20., 153.,  85.,  68.,  83.,  60., 115.,  41.,  57.])

In [23]:
np.clip(timestep_to_reach, 0, np.infty)

array([  0.,   0.,  22.,  76.,   0.,  44., 117.,  73., 135.,  92.,  88.,
        69.,   0.,   0., 123.,   0.,   0., 125.,   0.,   0.,  19.,  37.,
       166.,  20., 153.,  85.,  68.,  83.,  60., 115.,  41.,  57.])

In [28]:
states.shape

torch.Size([32, 49])

In [14]:
best_loss = np.infty
print(best_loss*3)

inf


In [30]:
abc = 34341.123445676767887

'34341.1234'

In [84]:
torch.save(model.state_dict(), "model_epoch_1.pt")

In [59]:
model = ModelClass(state_size=48, hidden_size=256, action_size=1, action_scale=10)
model = model.to(device)

# state_dict = torch.load("backups/model_epoch_1.pt")


In [41]:
action_desired

tensor([[-0.5421],
        [-0.1679],
        [ 0.8118],
        [-0.4336],
        [ 2.0686],
        [ 1.2972],
        [-0.0500],
        [ 0.4684],
        [-0.0345],
        [ 1.2148],
        [-0.1655],
        [-0.1707],
        [ 4.6853],
        [-0.2763],
        [-0.1873],
        [ 2.4805],
        [ 0.5893],
        [-0.2471],
        [ 1.5171],
        [ 0.5603],
        [ 0.8303],
        [ 1.2302],
        [ 0.7465],
        [ 1.9152],
        [-0.1633],
        [ 2.0186],
        [-0.1678],
        [ 1.9252],
        [ 1.2885],
        [-6.1223],
        [ 1.3161],
        [-0.0494]], device='cuda:0')

In [61]:
torch.mean((model(states) - action_desired)**2)

tensor(2.9860, device='cuda:0', grad_fn=<MeanBackward0>)

In [60]:
list(zip(model(states).detach().cpu().numpy(),action_desired.cpu().numpy()))

[(array([-0.00721777], dtype=float32), array([-0.5421269], dtype=float32)),
 (array([-0.00765975], dtype=float32), array([-0.16786535], dtype=float32)),
 (array([-0.00725904], dtype=float32), array([0.81179124], dtype=float32)),
 (array([-0.00744751], dtype=float32), array([-0.43355983], dtype=float32)),
 (array([-0.00800348], dtype=float32), array([2.0686255], dtype=float32)),
 (array([-0.00749895], dtype=float32), array([1.2972153], dtype=float32)),
 (array([-0.00770784], dtype=float32), array([-0.04997654], dtype=float32)),
 (array([-0.00846453], dtype=float32), array([0.4683502], dtype=float32)),
 (array([-0.00737752], dtype=float32), array([-0.03447848], dtype=float32)),
 (array([-0.00860534], dtype=float32), array([1.2148223], dtype=float32)),
 (array([-0.00742638], dtype=float32), array([-0.16554818], dtype=float32)),
 (array([-0.00760948], dtype=float32), array([-0.17073645], dtype=float32)),
 (array([-0.00906077], dtype=float32), array([4.6852884], dtype=float32)),
 (array([-0

In [54]:
action_desired.cpu().numpy()

array([[-0.5421269 ],
       [-0.16786535],
       [ 0.81179124],
       [-0.43355983],
       [ 2.0686255 ],
       [ 1.2972153 ],
       [-0.04997654],
       [ 0.4683502 ],
       [-0.03447848],
       [ 1.2148223 ],
       [-0.16554818],
       [-0.17073645],
       [ 4.6852884 ],
       [-0.276315  ],
       [-0.18732752],
       [ 2.4805155 ],
       [ 0.58931726],
       [-0.24711452],
       [ 1.5170671 ],
       [ 0.5603404 ],
       [ 0.83032   ],
       [ 1.2302247 ],
       [ 0.7464867 ],
       [ 1.9151827 ],
       [-0.16326883],
       [ 2.0185962 ],
       [-0.16779685],
       [ 1.925207  ],
       [ 1.2885302 ],
       [-6.122292  ],
       [ 1.3160862 ],
       [-0.04944146]], dtype=float32)

In [35]:
position = buffer["/observations/agent/position"][batch_indices]
velocity = buffer["/observations/agent/velocity"][batch_indices]
actions  = buffer["/agents/demonstrator/actions"][batch_indices]

masks    = buffer["/masks"][batch_indices]
rewards  = buffer["/rewards"][batch_indices]
# filename = buffer["/infos/rand/filename"][batch_indices]
# timestep = buffer["/agents/demonstrator/hidden_state/time_step"][batch_indices]

# initial_closure = buffer["/observations/parameters/initial_closure"][batch_indices]
# controller_thre = buffer["/observations/parameters/controller_thre"][batch_indices]
# controller_gain = buffer["/observations/parameters/controller_gain"][batch_indices]

# hand_closure = buffer["/observations/demonstrator/hand_closure"][batch_indices]
# rel_obj_hand = buffer["/observations/agent/rel_obj_hand"][batch_indices]
# rel_obj_hand_dist = buffer["/observations/agent/rel_obj_hand_dist"][batch_indices]

# distance2 = buffer["/observations/agent/distance2"][batch_indices]
# closure = buffer["/observations/agent/closure"][batch_indices]
# distance = buffer["/observations/demonstrator/distance"][batch_indices]

In [36]:
print(position.shape)
print(velocity.shape)
print(actions.shape)
print(filename)
print(timestep.reshape(-1))
print(masks.reshape(-1))
print(rewards.reshape(-1))

(32, 25)
(32, 23)
(32, 1)
['02_nl_07' '02_nl_02' '08_fr_13' '05_fr_10' '04_nl_06' '02_nl_13'
 '07_nl_09' '05_nr_13' '01_nr_10' '05_nl_01' '09_fl_02' '09_fl_06'
 '06_fl_02' '09_fl_06' '08_nr_05' '01_fl_06' '03_nr_15' '09_fr_06'
 '09_fr_09' '09_fl_10' '08_nr_06' '08_nl_13' '08_fl_10' '08_nr_05'
 '09_nr_01' '02_fr_10' '04_nl_05' '03_fl_10' '08_nr_02' '02_nr_14'
 '01_fl_11' '07_nl_02']
[131.  36. 178.  38.  71.  42.  69. 149.  53.  41.  72. 106.  31. 181.
 178.  76.  99.  79.  23.   0. 111.  96. 112. 116.  63. 163. 140.  74.
  99.  66. 134.   1.]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1.]
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]


In [39]:
position.shape

(32, 25)

In [41]:
velocity.shape

(32, 23)

In [43]:
np.concatenate([position,velocity], axis=1).shape

(32, 48)

In [None]:
# Load data. Create a dataloader/dataset interface: Not needed! All data is in the memory!

# Preprocess, e.g. do data augmentation.
#   In early stages that is not usually required. 
# Visualize the samples you do.
# Split data into train/test
# Train model. 
# Once trained, test and try it! Especially on unseen data.

In [None]:
# Load data
# Preprocess data
# Pre visualizations

# Assuming optimizer has two groups.
lambda1 = lambda epoch: epoch \\ 30
lambda2 = lambda epoch: 0.95 ** epoch
scheduler = LambdaLR(optimizer, lr_lambda=[lambda1, lambda2])
for epoch in range(100):
    scheduler.step()
    
    ### Train
    model.train() # model.training = true

    # For all data:
    #   data, label = ...
    #   inputs = inputs.to(device)
    #   labels = labels.to(device)
    #   TRACKING = ON
    #   forward: y = model(x)
    #   Compute Loss
    #   optimizer.zero_grad()
    #   loss.backward()
    #   Update with learning_rates: optimizer.step()
    #   ---
    #   Compute total loss and number of correct guesses
    # ------------------------
    # Report loss and accuracy
    
    ### Validate
    model.eval() # model.training = false
    # For all data:
    #   data, label = ...
    #   inputs = inputs.to(device)
    #   labels = labels.to(device)
    #   TRACKING = OFF,
    #   forward: y = model(x)
    #   Compute Loss
    #   ---
    #   Compute total loss and number of correct guesses
    # ------------------------
    # Report loss and accuracy
    # Keep track of the best epoch so far
    #      best_model_state = copy.deepcopy(model.state_dict())

# Post-processing



## All memory keys

* `/observations/agent/position`, `float64`, (19997984, 25)
* `/observations/agent/velocity`, `float64`, (19997984, 23)
* `/observations/agent/rel_obj_hand`, `float64`, (19997984, 1, 3)
* `/observations/agent/rel_obj_hand_dist`, `float64`, (19997984,)
* `/observations/agent/distance2`, `float64`, (19997984,)

* `/observations/agent/closure`, `float64`, (19997984,)
* `/observations/demonstrator/distance`, `float32`, (19997984,)
* `/observations/demonstrator/hand_closure`, `float32`, (19997984,)
* `/observations/status/is_training`, `uint8, (19997984,)
* `/observations/parameters/initial_closure`, `float64`, (19997984,)

* `/observations/parameters/controller_gain`, `float64`, (19997984,)
* `/observations/parameters/controller_thre`, `float64`, (19997984,)
* `/observations/parameters/real_trajectory`, `uint8`, (19997984,)
* `/masks`, `float32`, (19997984, 1)
* `/agents/demonstrator/actions`, `float32`, (19997984, 1)

* `/agents/demonstrator/hidden_state/time_step`, `float32`, (19997984, 1)
* `/agents/demonstrator/hidden_state/initial_distance`, `float32`, (19997984, 1)
* `/agents/demonstrator/hidden_state/controller_gain`, `float32`, (19997984, 1)
* `/agents/demonstrator/hidden_state/controller_thre`, `float32`, (19997984, 1)
* `/rewards`, `float64`, (19997984, 1)

* `/infos/rand/filename`, `<U8`, (19997984,)
* `/infos/rand/time_noise_normal`, `float64`, (19997984,)
* `/infos/rand/offset_noise_2d`, `float64`, (19997984, 3)
* `/infos/rand/original_time`, `float64`, (19997984,)
* `/infos/rand/randomized_time`, `float64`, (19997984,)

* `/infos/episode/r`, `float64`, (19997984,)
* `/infos/episode/l`, `int64`, (19997984,)
* `/infos/episode/t`, `float64`, (19997984,)