In [1]:
import tensorflow as tf
from time import gmtime, strftime
import time

from attention_dynamic_model import AttentionDynamicModel, set_decode_type
from reinforce_baseline import RolloutBaseline
from train import train_model

from utils import create_data_on_disk, get_cur_time

# Params of model
SAMPLES = 512 # 128*10000
BATCH = 64
START_EPOCH = 0
END_EPOCH = 100
FROM_CHECKPOINT = False
embedding_dim = 128
LEARNING_RATE = 0.0001
ROLLOUT_SAMPLES = 10000
NUMBER_OF_WP_EPOCHS = 1
GRAD_NORM_CLIPPING = 1.0
BATCH_VERBOSE = 1000
VAL_BATCH_SIZE = 1000
VALIDATE_SET_SIZE = 10000
SEED = 1234
GRAPH_SIZE = 100
FILENAME = 'VRP_{}_{}'.format(GRAPH_SIZE, strftime("%Y-%m-%d", gmtime()))

# Initialize model
model_tf = AttentionDynamicModel(embedding_dim)
set_decode_type(model_tf, "sampling")
print(get_cur_time(), 'model initialized')

# Create and save validation dataset
validation_dataset = create_data_on_disk(GRAPH_SIZE,
                                         VALIDATE_SET_SIZE,
                                         is_save=True,
                                         filename=FILENAME,
                                         is_return=True,
                                         seed = SEED)
print(get_cur_time(), 'validation dataset created and saved on the disk')

# Initialize optimizer
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

# Initialize baseline
baseline = RolloutBaseline(model_tf,
                           wp_n_epochs = NUMBER_OF_WP_EPOCHS,
                           epoch = 0,
                           num_samples=ROLLOUT_SAMPLES,
                           filename = FILENAME,
                           from_checkpoint = FROM_CHECKPOINT,
                           embedding_dim=embedding_dim,
                           graph_size=GRAPH_SIZE
                           )
print(get_cur_time(), 'baseline initialized')

start=time.time()
train_model(optimizer,
            model_tf,
            baseline,
            validation_dataset,
            samples = SAMPLES,
            batch = BATCH,
            val_batch_size = VAL_BATCH_SIZE,
            start_epoch = START_EPOCH,
            end_epoch = END_EPOCH,
            from_checkpoint = FROM_CHECKPOINT,
            grad_norm_clipping = GRAD_NORM_CLIPPING,
            batch_verbose = BATCH_VERBOSE,
            graph_size = GRAPH_SIZE,
            filename = FILENAME
            )
print("Total Training Time: ", time.time()-start, " sec")

2021-06-02 19:52:40 model initialized
2021-06-02 19:52:43 validation dataset created and saved on the disk


Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 0)


Rollout greedy execution: 100%|██████████| 10/10 [26:47<00:00, 160.78s/it]
batch calculation at epoch 0: 0it [00:00, ?it/s]

2021-06-02 20:19:33 baseline initialized
Current decode type: sampling


batch calculation at epoch 0: 1it [00:06,  6.90s/it]

grad_global_norm = 27.313236236572266, clipped_norm = 1.0
Epoch 0 (batch = 0): Loss: -7.350273132324219: Cost: 59.063865661621094


batch calculation at epoch 0: 8it [00:48,  6.10s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 0)


Rollout greedy execution: 100%|██████████| 10/10 [03:02<00:00, 18.26s/it]


Epoch 0 candidate mean 39.1143684387207, baseline epoch 0 mean 58.9781494140625, difference -19.863780975341797
p-value: 0.0
Update baseline


Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 0)


Rollout greedy execution: 100%|██████████| 10/10 [03:03<00:00, 18.37s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

alpha was updated to 1.0


Rollout greedy execution: 100%|██████████| 10/10 [02:58<00:00, 17.82s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.11880111694336
2021-06-02 20:29:29 Epoch 0: Loss: 158.68557739257812: Cost: 58.156253814697266


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.41s/it]
batch calculation at epoch 1: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 1: 1it [00:05,  5.61s/it]

grad_global_norm = 115.55860900878906, clipped_norm = 1.0
Epoch 1 (batch = 0): Loss: -6210.60595703125: Cost: 57.41487121582031


batch calculation at epoch 1: 8it [00:44,  5.58s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 1)


Rollout greedy execution: 100%|██████████| 10/10 [06:35<00:00, 39.57s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1 candidate mean 40.131778717041016, baseline epoch 1 mean 39.143638610839844, difference 0.9881401062011719


Rollout greedy execution: 100%|██████████| 10/10 [06:28<00:00, 38.80s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.039100646972656
2021-06-02 20:43:27 Epoch 1: Loss: -5839.123046875: Cost: 56.11992263793945


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.10s/it]
batch calculation at epoch 2: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 2: 1it [00:05,  5.53s/it]

grad_global_norm = 190.757080078125, clipped_norm = 1.0000001192092896
Epoch 2 (batch = 0): Loss: -5689.0595703125: Cost: 55.38169860839844


batch calculation at epoch 2: 8it [00:46,  5.77s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 2)


Rollout greedy execution: 100%|██████████| 10/10 [09:41<00:00, 58.11s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 2 candidate mean 41.24728775024414, baseline epoch 2 mean 39.143638610839844, difference 2.103649139404297


Rollout greedy execution: 100%|██████████| 10/10 [10:00<00:00, 60.06s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 41.063499450683594
2021-06-02 21:04:05 Epoch 2: Loss: -5331.02734375: Cost: 54.970359802246094


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.80s/it]
batch calculation at epoch 3: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 3: 1it [00:06,  6.35s/it]

grad_global_norm = 78.5906982421875, clipped_norm = 1.0
Epoch 3 (batch = 0): Loss: -4866.7373046875: Cost: 53.85981369018555


batch calculation at epoch 3: 8it [00:52,  6.61s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 3)


Rollout greedy execution: 100%|██████████| 10/10 [07:57<00:00, 47.76s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 3 candidate mean 40.41814041137695, baseline epoch 3 mean 39.143638610839844, difference 1.2745018005371094


Rollout greedy execution: 100%|██████████| 10/10 [07:40<00:00, 46.06s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.37289810180664
2021-06-02 21:20:47 Epoch 3: Loss: -4206.46826171875: Cost: 52.14750289916992


Rollout greedy execution: 100%|██████████| 1/1 [00:11<00:00, 11.50s/it]
batch calculation at epoch 4: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 4: 1it [00:06,  6.24s/it]

grad_global_norm = 147.78517150878906, clipped_norm = 0.9999999403953552
Epoch 4 (batch = 0): Loss: -3764.767333984375: Cost: 50.45948028564453


batch calculation at epoch 4: 8it [00:50,  6.34s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 4)


Rollout greedy execution: 100%|██████████| 10/10 [07:13<00:00, 43.39s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 4 candidate mean 40.35927200317383, baseline epoch 4 mean 39.143638610839844, difference 1.2156333923339844


Rollout greedy execution: 100%|██████████| 10/10 [06:57<00:00, 41.76s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.3129997253418
2021-06-02 21:36:01 Epoch 4: Loss: -3226.720703125: Cost: 49.52734375


Rollout greedy execution: 100%|██████████| 1/1 [00:11<00:00, 11.00s/it]
batch calculation at epoch 5: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 5: 1it [00:06,  6.79s/it]

grad_global_norm = 222.8672637939453, clipped_norm = 1.0
Epoch 5 (batch = 0): Loss: -2553.612060546875: Cost: 48.004608154296875


batch calculation at epoch 5: 8it [00:51,  6.44s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 5)


Rollout greedy execution: 100%|██████████| 10/10 [06:16<00:00, 37.61s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 5 candidate mean 40.35923767089844, baseline epoch 5 mean 39.143638610839844, difference 1.2155990600585938


Rollout greedy execution: 100%|██████████| 10/10 [05:54<00:00, 35.44s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.27989959716797
2021-06-02 21:49:14 Epoch 5: Loss: -2056.697998046875: Cost: 46.49358367919922


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.24s/it]
batch calculation at epoch 6: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 6: 1it [00:06,  6.36s/it]

grad_global_norm = 48.695091247558594, clipped_norm = 1.0
Epoch 6 (batch = 0): Loss: -1312.622802734375: Cost: 45.095191955566406


batch calculation at epoch 6: 8it [00:48,  6.04s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 6)


Rollout greedy execution: 100%|██████████| 10/10 [04:22<00:00, 26.22s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 6 candidate mean 40.43157196044922, baseline epoch 6 mean 39.143638610839844, difference 1.287933349609375


Rollout greedy execution: 100%|██████████| 10/10 [04:16<00:00, 25.65s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.339900970458984
2021-06-02 21:58:52 Epoch 6: Loss: -1315.4296875: Cost: 44.73725891113281


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.39s/it]
batch calculation at epoch 7: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 7: 1it [00:05,  5.75s/it]

grad_global_norm = 39.21778106689453, clipped_norm = 1.0
Epoch 7 (batch = 0): Loss: -1054.5150146484375: Cost: 43.48790740966797


batch calculation at epoch 7: 8it [00:40,  5.10s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 7)


Rollout greedy execution: 100%|██████████| 10/10 [05:17<00:00, 31.77s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 7 candidate mean 40.48135757446289, baseline epoch 7 mean 39.143638610839844, difference 1.3377189636230469


Rollout greedy execution: 100%|██████████| 10/10 [05:05<00:00, 30.58s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.37110137939453
2021-06-02 22:10:06 Epoch 7: Loss: -1085.831787109375: Cost: 43.603065490722656


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.40s/it]
batch calculation at epoch 8: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 8: 1it [00:05,  5.53s/it]

grad_global_norm = 73.44921112060547, clipped_norm = 1.0
Epoch 8 (batch = 0): Loss: -1004.1087646484375: Cost: 43.530616760253906


batch calculation at epoch 8: 8it [00:44,  5.60s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 8)


Rollout greedy execution: 100%|██████████| 10/10 [05:16<00:00, 31.66s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 8 candidate mean 40.35969543457031, baseline epoch 8 mean 39.143638610839844, difference 1.2160568237304688


Rollout greedy execution: 100%|██████████| 10/10 [05:20<00:00, 32.10s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.259300231933594
2021-06-02 22:21:38 Epoch 8: Loss: -878.7682495117188: Cost: 43.148902893066406


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.99s/it]
batch calculation at epoch 9: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 9: 1it [00:06,  6.45s/it]

grad_global_norm = 155.76007080078125, clipped_norm = 0.9999999403953552
Epoch 9 (batch = 0): Loss: -730.0185546875: Cost: 42.464698791503906


batch calculation at epoch 9: 8it [00:53,  6.70s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 9)


Rollout greedy execution: 100%|██████████| 10/10 [05:59<00:00, 36.00s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 9 candidate mean 40.150882720947266, baseline epoch 9 mean 39.143638610839844, difference 1.0072441101074219


Rollout greedy execution: 100%|██████████| 10/10 [05:47<00:00, 34.79s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.064300537109375
2021-06-02 22:34:30 Epoch 9: Loss: -787.4830322265625: Cost: 42.80972671508789


Rollout greedy execution: 100%|██████████| 1/1 [00:11<00:00, 11.65s/it]
batch calculation at epoch 10: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 10: 1it [00:06,  6.41s/it]

grad_global_norm = 158.98951721191406, clipped_norm = 0.9999999403953552
Epoch 10 (batch = 0): Loss: -784.3963623046875: Cost: 43.083778381347656


batch calculation at epoch 10: 8it [00:54,  6.79s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 10)


Rollout greedy execution: 100%|██████████| 10/10 [07:21<00:00, 44.14s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 10 candidate mean 40.19628143310547, baseline epoch 10 mean 39.143638610839844, difference 1.052642822265625


Rollout greedy execution: 100%|██████████| 10/10 [07:09<00:00, 42.94s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.10860061645508
2021-06-02 22:50:07 Epoch 10: Loss: -725.9302978515625: Cost: 42.424163818359375


Rollout greedy execution: 100%|██████████| 1/1 [00:11<00:00, 11.66s/it]
batch calculation at epoch 11: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 11: 1it [00:06,  6.45s/it]

grad_global_norm = 104.7627182006836, clipped_norm = 1.0
Epoch 11 (batch = 0): Loss: -694.0195922851562: Cost: 42.3075065612793


batch calculation at epoch 11: 8it [00:54,  6.80s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 11)


Rollout greedy execution: 100%|██████████| 10/10 [06:08<00:00, 36.89s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 11 candidate mean 39.99293899536133, baseline epoch 11 mean 39.143638610839844, difference 0.8493003845214844


Rollout greedy execution: 100%|██████████| 10/10 [05:57<00:00, 35.76s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.92430114746094
2021-06-02 23:03:20 Epoch 11: Loss: -679.4664306640625: Cost: 42.47322463989258


Rollout greedy execution: 100%|██████████| 1/1 [00:11<00:00, 11.63s/it]
batch calculation at epoch 12: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 12: 1it [00:07,  7.21s/it]

grad_global_norm = 67.78373718261719, clipped_norm = 0.9999999403953552
Epoch 12 (batch = 0): Loss: -760.1033935546875: Cost: 42.86213684082031


batch calculation at epoch 12: 8it [00:51,  6.48s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 12)


Rollout greedy execution: 100%|██████████| 10/10 [04:39<00:00, 27.92s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 12 candidate mean 40.074405670166016, baseline epoch 12 mean 39.143638610839844, difference 0.9307670593261719


Rollout greedy execution: 100%|██████████| 10/10 [04:26<00:00, 26.65s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.001800537109375
2021-06-02 23:13:29 Epoch 12: Loss: -656.96826171875: Cost: 42.499961853027344


Rollout greedy execution: 100%|██████████| 1/1 [00:11<00:00, 11.65s/it]
batch calculation at epoch 13: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 13: 1it [00:06,  6.09s/it]

grad_global_norm = 76.80125427246094, clipped_norm = 1.0
Epoch 13 (batch = 0): Loss: -637.671630859375: Cost: 42.418418884277344


batch calculation at epoch 13: 8it [00:49,  6.13s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 13)


Rollout greedy execution: 100%|██████████| 10/10 [03:48<00:00, 22.84s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 13 candidate mean 40.474098205566406, baseline epoch 13 mean 39.143638610839844, difference 1.3304595947265625


Rollout greedy execution: 100%|██████████| 10/10 [03:40<00:00, 22.05s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.386199951171875
2021-06-02 23:21:59 Epoch 13: Loss: -857.2412719726562: Cost: 43.13728332519531


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.39s/it]
batch calculation at epoch 14: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 14: 1it [00:05,  5.35s/it]

grad_global_norm = 81.47550964355469, clipped_norm = 0.9999999403953552
Epoch 14 (batch = 0): Loss: -811.9780883789062: Cost: 42.203102111816406


batch calculation at epoch 14: 8it [00:42,  5.37s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 14)


Rollout greedy execution: 100%|██████████| 10/10 [03:07<00:00, 18.72s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 14 candidate mean 40.81352996826172, baseline epoch 14 mean 39.143638610839844, difference 1.669891357421875


Rollout greedy execution: 100%|██████████| 10/10 [03:03<00:00, 18.33s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.730098724365234
2021-06-02 23:29:03 Epoch 14: Loss: -665.5316162109375: Cost: 42.59728240966797


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.43s/it]
batch calculation at epoch 15: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 15: 1it [00:04,  4.47s/it]

grad_global_norm = 118.12773895263672, clipped_norm = 1.0
Epoch 15 (batch = 0): Loss: -329.17529296875: Cost: 41.428504943847656


batch calculation at epoch 15: 8it [00:38,  4.80s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 15)


Rollout greedy execution: 100%|██████████| 10/10 [03:04<00:00, 18.47s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 15 candidate mean 40.632232666015625, baseline epoch 15 mean 39.143638610839844, difference 1.4885940551757812


Rollout greedy execution: 100%|██████████| 10/10 [03:03<00:00, 18.33s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.50859832763672
2021-06-02 23:35:59 Epoch 15: Loss: -568.6990356445312: Cost: 42.37350845336914


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.42s/it]
batch calculation at epoch 16: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 16: 1it [00:04,  4.88s/it]

grad_global_norm = 105.3956527709961, clipped_norm = 1.0
Epoch 16 (batch = 0): Loss: -496.1940002441406: Cost: 42.032508850097656


batch calculation at epoch 16: 8it [00:39,  4.93s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 16)


Rollout greedy execution: 100%|██████████| 10/10 [03:18<00:00, 19.88s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 16 candidate mean 40.395687103271484, baseline epoch 16 mean 39.143638610839844, difference 1.2520484924316406


Rollout greedy execution: 100%|██████████| 10/10 [03:14<00:00, 19.46s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.29679870605469
2021-06-02 23:43:21 Epoch 16: Loss: -563.5635986328125: Cost: 42.046287536621094


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.80s/it]
batch calculation at epoch 17: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 17: 1it [00:05,  5.16s/it]

grad_global_norm = 66.712890625, clipped_norm = 1.0
Epoch 17 (batch = 0): Loss: -343.10211181640625: Cost: 41.89992904663086


batch calculation at epoch 17: 8it [00:41,  5.22s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 17)


Rollout greedy execution: 100%|██████████| 10/10 [04:05<00:00, 24.59s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 17 candidate mean 40.295597076416016, baseline epoch 17 mean 39.143638610839844, difference 1.1519584655761719


Rollout greedy execution: 100%|██████████| 10/10 [04:27<00:00, 26.75s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.22520065307617
2021-06-02 23:52:46 Epoch 17: Loss: -413.8758239746094: Cost: 41.57828140258789


Rollout greedy execution: 100%|██████████| 1/1 [00:12<00:00, 12.83s/it]
batch calculation at epoch 18: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 18: 1it [00:07,  7.39s/it]

grad_global_norm = 124.0226058959961, clipped_norm = 1.0
Epoch 18 (batch = 0): Loss: -382.0337219238281: Cost: 41.259639739990234


batch calculation at epoch 18: 8it [00:54,  6.82s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 18)


Rollout greedy execution: 100%|██████████| 10/10 [04:39<00:00, 27.95s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 18 candidate mean 40.3142204284668, baseline epoch 18 mean 39.143638610839844, difference 1.1705818176269531


Rollout greedy execution: 100%|██████████| 10/10 [04:27<00:00, 26.71s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.23529815673828
2021-06-03 00:03:01 Epoch 18: Loss: -420.2196044921875: Cost: 41.42196273803711


Rollout greedy execution: 100%|██████████| 1/1 [00:12<00:00, 12.80s/it]
batch calculation at epoch 19: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 19: 1it [00:06,  6.54s/it]

grad_global_norm = 118.79826354980469, clipped_norm = 1.0
Epoch 19 (batch = 0): Loss: -447.5285949707031: Cost: 41.34303283691406


batch calculation at epoch 19: 8it [00:54,  6.82s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 19)


Rollout greedy execution: 100%|██████████| 10/10 [04:41<00:00, 28.14s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 19 candidate mean 40.19540786743164, baseline epoch 19 mean 39.143638610839844, difference 1.0517692565917969


Rollout greedy execution: 100%|██████████| 10/10 [04:28<00:00, 26.86s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.122798919677734
2021-06-03 00:13:18 Epoch 19: Loss: -423.8704833984375: Cost: 41.540061950683594


Rollout greedy execution: 100%|██████████| 1/1 [00:12<00:00, 12.81s/it]
batch calculation at epoch 20: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 20: 1it [00:06,  6.56s/it]

grad_global_norm = 62.149497985839844, clipped_norm = 1.0
Epoch 20 (batch = 0): Loss: -494.7257080078125: Cost: 41.66868209838867


batch calculation at epoch 20: 8it [00:54,  6.78s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 20)


Rollout greedy execution: 100%|██████████| 10/10 [05:32<00:00, 33.26s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 20 candidate mean 39.97092819213867, baseline epoch 20 mean 39.143638610839844, difference 0.8272895812988281


Rollout greedy execution: 100%|██████████| 10/10 [05:23<00:00, 32.40s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.891998291015625
2021-06-03 00:25:22 Epoch 20: Loss: -398.64349365234375: Cost: 41.411094665527344


Rollout greedy execution: 100%|██████████| 1/1 [00:12<00:00, 12.82s/it]
batch calculation at epoch 21: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 21: 1it [00:07,  7.08s/it]

grad_global_norm = 62.18849563598633, clipped_norm = 1.0
Epoch 21 (batch = 0): Loss: -424.7402038574219: Cost: 41.263893127441406


batch calculation at epoch 21: 8it [00:56,  7.10s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 21)


Rollout greedy execution: 100%|██████████| 10/10 [06:19<00:00, 37.93s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 21 candidate mean 39.87822341918945, baseline epoch 21 mean 39.143638610839844, difference 0.7345848083496094


Rollout greedy execution: 100%|██████████| 10/10 [06:09<00:00, 36.98s/it]


Validation score: 39.785701751708984
2021-06-03 00:39:01 Epoch 21: Loss: -336.4906921386719: Cost: 41.2228889465332


Rollout greedy execution: 100%|██████████| 1/1 [00:12<00:00, 12.74s/it]
batch calculation at epoch 22: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 22: 1it [00:06,  6.64s/it]

grad_global_norm = 72.0333023071289, clipped_norm = 0.9999999403953552
Epoch 22 (batch = 0): Loss: -194.34495544433594: Cost: 41.05332946777344


batch calculation at epoch 22: 8it [00:57,  7.15s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 22)


Rollout greedy execution: 100%|██████████| 10/10 [06:43<00:00, 40.34s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 22 candidate mean 39.89119338989258, baseline epoch 22 mean 39.143638610839844, difference 0.7475547790527344


Rollout greedy execution: 100%|██████████| 10/10 [05:42<00:00, 34.22s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.806800842285156
2021-06-03 00:52:36 Epoch 22: Loss: -510.7027587890625: Cost: 41.758384704589844


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.28s/it]
batch calculation at epoch 23: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 23: 1it [00:05,  5.68s/it]

grad_global_norm = 137.49911499023438, clipped_norm = 0.9999998807907104
Epoch 23 (batch = 0): Loss: -539.302001953125: Cost: 41.66698455810547


batch calculation at epoch 23: 8it [00:42,  5.35s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 23)


Rollout greedy execution: 100%|██████████| 10/10 [04:04<00:00, 24.41s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 23 candidate mean 39.800514221191406, baseline epoch 23 mean 39.143638610839844, difference 0.6568756103515625


Rollout greedy execution: 100%|██████████| 10/10 [04:00<00:00, 24.08s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.78879928588867
2021-06-03 01:01:34 Epoch 23: Loss: -360.2230224609375: Cost: 41.040340423583984


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.10s/it]
batch calculation at epoch 24: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 24: 1it [00:04,  4.84s/it]

grad_global_norm = 115.6860122680664, clipped_norm = 0.9999999403953552
Epoch 24 (batch = 0): Loss: -391.068603515625: Cost: 41.11913299560547


batch calculation at epoch 24: 8it [00:39,  4.90s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 24)


Rollout greedy execution: 100%|██████████| 10/10 [03:05<00:00, 18.51s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 24 candidate mean 40.19450759887695, baseline epoch 24 mean 39.143638610839844, difference 1.0508689880371094


Rollout greedy execution: 100%|██████████| 10/10 [03:02<00:00, 18.30s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.12260055541992
2021-06-03 01:08:32 Epoch 24: Loss: -355.58349609375: Cost: 41.07289123535156


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.42s/it]
batch calculation at epoch 25: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 25: 1it [00:04,  4.93s/it]

grad_global_norm = 102.74291229248047, clipped_norm = 0.9999999403953552
Epoch 25 (batch = 0): Loss: -346.8751220703125: Cost: 40.83216857910156


batch calculation at epoch 25: 8it [00:38,  4.81s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 25)


Rollout greedy execution: 100%|██████████| 10/10 [03:04<00:00, 18.46s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 25 candidate mean 40.7892951965332, baseline epoch 25 mean 39.143638610839844, difference 1.6456565856933594


Rollout greedy execution: 100%|██████████| 10/10 [03:02<00:00, 18.26s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.6963996887207
2021-06-03 01:15:27 Epoch 25: Loss: -403.22430419921875: Cost: 41.381404876708984


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.44s/it]
batch calculation at epoch 26: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 26: 1it [00:04,  4.85s/it]

grad_global_norm = 121.44657897949219, clipped_norm = 1.0
Epoch 26 (batch = 0): Loss: -536.0441284179688: Cost: 42.19524383544922


batch calculation at epoch 26: 8it [00:38,  4.76s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 26)


Rollout greedy execution: 100%|██████████| 10/10 [03:13<00:00, 19.33s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 26 candidate mean 41.20326232910156, baseline epoch 26 mean 39.143638610839844, difference 2.0596237182617188


Rollout greedy execution: 100%|██████████| 10/10 [03:24<00:00, 20.50s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 41.19599914550781
2021-06-03 01:22:53 Epoch 26: Loss: -515.5118408203125: Cost: 42.26020431518555


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.96s/it]
batch calculation at epoch 27: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 27: 1it [00:05,  5.76s/it]

grad_global_norm = 45.79206466674805, clipped_norm = 1.0
Epoch 27 (batch = 0): Loss: -470.40704345703125: Cost: 41.85673904418945


batch calculation at epoch 27: 8it [00:46,  5.87s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 27)


Rollout greedy execution: 100%|██████████| 10/10 [04:05<00:00, 24.51s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 27 candidate mean 41.1580924987793, baseline epoch 27 mean 39.143638610839844, difference 2.014453887939453


Rollout greedy execution: 100%|██████████| 10/10 [04:45<00:00, 28.56s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 41.145999908447266
2021-06-03 01:32:42 Epoch 27: Loss: -559.07958984375: Cost: 42.06876754760742


Rollout greedy execution: 100%|██████████| 1/1 [00:14<00:00, 14.74s/it]
batch calculation at epoch 28: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 28: 1it [00:07,  7.79s/it]

grad_global_norm = 161.58334350585938, clipped_norm = 1.0
Epoch 28 (batch = 0): Loss: -1096.609130859375: Cost: 43.05646514892578


batch calculation at epoch 28: 8it [01:04,  8.10s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 28)


Rollout greedy execution: 100%|██████████| 10/10 [05:00<00:00, 30.03s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 28 candidate mean 41.07405090332031, baseline epoch 28 mean 39.143638610839844, difference 1.9304122924804688


Rollout greedy execution:  70%|███████   | 7/10 [03:50<01:38, 32.94s/it]


KeyboardInterrupt: 

In [None]:
import tensorflow as tf
from time import gmtime, strftime

from attention_dynamic_model import set_decode_type
from reinforce_baseline import RolloutBaseline
from train import train_model

from utils import get_cur_time
from reinforce_baseline import load_tf_model
from utils import read_from_pickle


SAMPLES = 512 # 128*10000
BATCH = 64
LEARNING_RATE = 0.0001
ROLLOUT_SAMPLES = 10000
NUMBER_OF_WP_EPOCHS = 1
GRAD_NORM_CLIPPING = 1.0
BATCH_VERBOSE = 1000
VAL_BATCH_SIZE = 1000
VALIDATE_SET_SIZE = 10000
SEED = 1234
GRAPH_SIZE = 100
FILENAME = 'VRP_{}_{}'.format(GRAPH_SIZE, strftime("%Y-%m-%d", gmtime()))

START_EPOCH = 28
END_EPOCH = 100
FROM_CHECKPOINT = True
embedding_dim = 128
MODEL_PATH = 'model_checkpoint_epoch_27_VRP_100_2021-06-02.h5'
VAL_SET_PATH = 'Validation_dataset_VRP_100_2021-06-02.pkl'
BASELINE_MODEL_PATH = 'baseline_checkpoint_epoch_0_VRP_100_2021-06-02.h5'

# Initialize model
model_tf = load_tf_model(MODEL_PATH,
                         embedding_dim=embedding_dim,
                         graph_size=GRAPH_SIZE)
set_decode_type(model_tf, "sampling")
print(get_cur_time(), 'model loaded')

# Create and save validation dataset
validation_dataset = read_from_pickle(VAL_SET_PATH)
print(get_cur_time(), 'validation dataset loaded')

# Initialize optimizer
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

# Initialize baseline
baseline = RolloutBaseline(model_tf,
                           wp_n_epochs = NUMBER_OF_WP_EPOCHS,
                           epoch = START_EPOCH,
                           num_samples=ROLLOUT_SAMPLES,
                           filename = FILENAME,
                           from_checkpoint = FROM_CHECKPOINT,
                           embedding_dim=embedding_dim,
                           graph_size=GRAPH_SIZE,
                           path_to_checkpoint = BASELINE_MODEL_PATH)
print(get_cur_time(), 'baseline initialized')

train_model(optimizer,
            model_tf,
            baseline,
            validation_dataset,
            samples = SAMPLES,
            batch = BATCH,
            val_batch_size = VAL_BATCH_SIZE,
            start_epoch = START_EPOCH,
            end_epoch = END_EPOCH,
            from_checkpoint = FROM_CHECKPOINT,
            grad_norm_clipping = GRAD_NORM_CLIPPING,
            batch_verbose = BATCH_VERBOSE,
            graph_size = GRAPH_SIZE,
            filename = FILENAME
            )

2021-06-03 14:21:59 model loaded
2021-06-03 14:22:01 validation dataset loaded
Baseline model loaded


Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating baseline model on baseline dataset (epoch = 28)


Rollout greedy execution: 100%|██████████| 10/10 [03:36<00:00, 21.69s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

2021-06-03 14:25:42 baseline initialized
Skipping warm-up mode


Rollout greedy execution: 100%|██████████| 1/1 [00:11<00:00, 11.82s/it]
batch calculation at epoch 28: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 28: 1it [00:05,  5.38s/it]

grad_global_norm = 78.55379486083984, clipped_norm = 1.0
Epoch 28 (batch = 0): Loss: -1203.004638671875: Cost: 44.023590087890625


batch calculation at epoch 28: 8it [00:40,  5.05s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 28)


Rollout greedy execution: 100%|██████████| 10/10 [02:54<00:00, 17.47s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 28 candidate mean 40.621463775634766, baseline epoch 28 mean 39.125244140625, difference 1.4962196350097656


Rollout greedy execution: 100%|██████████| 10/10 [02:53<00:00, 17.32s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.614200592041016
2021-06-03 14:32:22 Epoch 28: Loss: -619.2234497070312: Cost: 42.506465911865234


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.38s/it]
batch calculation at epoch 29: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 29: 1it [00:04,  4.86s/it]

grad_global_norm = 68.42021179199219, clipped_norm = 1.0000001192092896
Epoch 29 (batch = 0): Loss: -392.1077575683594: Cost: 41.67573547363281


batch calculation at epoch 29: 8it [00:38,  4.87s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 29)


Rollout greedy execution: 100%|██████████| 10/10 [03:09<00:00, 18.93s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 29 candidate mean 40.10036849975586, baseline epoch 29 mean 39.125244140625, difference 0.9751243591308594


Rollout greedy execution: 100%|██████████| 10/10 [03:15<00:00, 19.56s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 40.068199157714844
2021-06-03 14:39:36 Epoch 29: Loss: -357.47021484375: Cost: 41.295310974121094


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.83s/it]
batch calculation at epoch 30: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 30: 1it [00:05,  5.23s/it]

grad_global_norm = 116.30571746826172, clipped_norm = 1.0
Epoch 30 (batch = 0): Loss: -307.2305908203125: Cost: 40.469879150390625


batch calculation at epoch 30: 8it [00:41,  5.14s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 30)


Rollout greedy execution: 100%|██████████| 10/10 [03:27<00:00, 20.75s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 30 candidate mean 39.67135238647461, baseline epoch 30 mean 39.125244140625, difference 0.5461082458496094


Rollout greedy execution: 100%|██████████| 10/10 [03:49<00:00, 22.95s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.67720031738281
2021-06-03 14:47:44 Epoch 30: Loss: -249.04234313964844: Cost: 40.68927001953125


Rollout greedy execution: 100%|██████████| 1/1 [00:13<00:00, 13.09s/it]
batch calculation at epoch 31: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 31: 1it [00:06,  6.79s/it]

grad_global_norm = 105.85757446289062, clipped_norm = 1.0
Epoch 31 (batch = 0): Loss: -212.6357421875: Cost: 39.987953186035156


batch calculation at epoch 31: 8it [00:54,  6.81s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 31)


Rollout greedy execution: 100%|██████████| 10/10 [04:16<00:00, 25.61s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 31 candidate mean 39.59379196166992, baseline epoch 31 mean 39.125244140625, difference 0.4685478210449219


Rollout greedy execution: 100%|██████████| 10/10 [04:18<00:00, 25.85s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.58620071411133
2021-06-03 14:57:26 Epoch 31: Loss: -230.904296875: Cost: 40.80261993408203


Rollout greedy execution: 100%|██████████| 1/1 [00:13<00:00, 13.25s/it]
batch calculation at epoch 32: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 32: 1it [00:06,  6.96s/it]

grad_global_norm = 67.75611114501953, clipped_norm = 0.9999998807907104
Epoch 32 (batch = 0): Loss: -89.71143341064453: Cost: 39.92375946044922


batch calculation at epoch 32: 8it [00:55,  6.92s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 32)


Rollout greedy execution: 100%|██████████| 10/10 [04:21<00:00, 26.10s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 32 candidate mean 39.630523681640625, baseline epoch 32 mean 39.125244140625, difference 0.505279541015625


Rollout greedy execution: 100%|██████████| 10/10 [04:19<00:00, 25.94s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.61109924316406
2021-06-03 15:07:15 Epoch 32: Loss: -152.4336395263672: Cost: 40.13856887817383


Rollout greedy execution: 100%|██████████| 1/1 [00:13<00:00, 13.32s/it]
batch calculation at epoch 33: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 33: 1it [00:06,  6.89s/it]

grad_global_norm = 98.8735580444336, clipped_norm = 1.0
Epoch 33 (batch = 0): Loss: -140.5901336669922: Cost: 41.06825637817383


batch calculation at epoch 33: 8it [00:55,  6.94s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 33)


Rollout greedy execution: 100%|██████████| 10/10 [04:22<00:00, 26.22s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 33 candidate mean 39.612525939941406, baseline epoch 33 mean 39.125244140625, difference 0.48728179931640625


Rollout greedy execution: 100%|██████████| 10/10 [04:20<00:00, 26.00s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.5890998840332
2021-06-03 15:17:07 Epoch 33: Loss: -176.16600036621094: Cost: 40.28609085083008


Rollout greedy execution: 100%|██████████| 1/1 [00:13<00:00, 13.47s/it]
batch calculation at epoch 34: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 34: 1it [00:06,  6.84s/it]

grad_global_norm = 175.1121063232422, clipped_norm = 1.0
Epoch 34 (batch = 0): Loss: -133.38189697265625: Cost: 39.75326156616211


batch calculation at epoch 34: 8it [00:55,  6.90s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 34)


Rollout greedy execution: 100%|██████████| 10/10 [04:22<00:00, 26.21s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 34 candidate mean 39.544151306152344, baseline epoch 34 mean 39.125244140625, difference 0.41890716552734375


Rollout greedy execution: 100%|██████████| 10/10 [04:20<00:00, 26.05s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.521400451660156
2021-06-03 15:26:58 Epoch 34: Loss: -154.94471740722656: Cost: 40.154090881347656


Rollout greedy execution: 100%|██████████| 1/1 [00:13<00:00, 13.36s/it]
batch calculation at epoch 35: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 35: 1it [00:06,  6.94s/it]

grad_global_norm = 57.91170883178711, clipped_norm = 0.9999999403953552
Epoch 35 (batch = 0): Loss: -224.7648162841797: Cost: 40.02217483520508


batch calculation at epoch 35: 8it [00:55,  6.91s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 35)


Rollout greedy execution: 100%|██████████| 10/10 [04:21<00:00, 26.12s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 35 candidate mean 39.56313705444336, baseline epoch 35 mean 39.125244140625, difference 0.4378929138183594


Rollout greedy execution: 100%|██████████| 10/10 [04:19<00:00, 25.95s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.559600830078125
2021-06-03 15:36:48 Epoch 35: Loss: -294.339599609375: Cost: 40.87434005737305


Rollout greedy execution: 100%|██████████| 1/1 [00:13<00:00, 13.31s/it]
batch calculation at epoch 36: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 36: 1it [00:07,  7.06s/it]

grad_global_norm = 48.77924346923828, clipped_norm = 0.9999999403953552
Epoch 36 (batch = 0): Loss: -609.9078369140625: Cost: 41.54046630859375


batch calculation at epoch 36: 8it [00:54,  6.85s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 36)


Rollout greedy execution: 100%|██████████| 10/10 [04:21<00:00, 26.18s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 36 candidate mean 39.603111267089844, baseline epoch 36 mean 39.125244140625, difference 0.47786712646484375


Rollout greedy execution: 100%|██████████| 10/10 [04:19<00:00, 25.94s/it]


Validation score: 39.59389877319336
2021-06-03 15:46:37 Epoch 36: Loss: -779.675048828125: Cost: 42.644737243652344


Rollout greedy execution: 100%|██████████| 1/1 [00:13<00:00, 13.45s/it]
batch calculation at epoch 37: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 37: 1it [00:06,  6.88s/it]

grad_global_norm = 192.17030334472656, clipped_norm = 1.0
Epoch 37 (batch = 0): Loss: -817.5719604492188: Cost: 42.66606140136719


batch calculation at epoch 37: 8it [00:55,  6.89s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 37)


Rollout greedy execution: 100%|██████████| 10/10 [04:00<00:00, 24.04s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 37 candidate mean 39.57242965698242, baseline epoch 37 mean 39.125244140625, difference 0.4471855163574219


Rollout greedy execution: 100%|██████████| 10/10 [03:46<00:00, 22.67s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.599700927734375
2021-06-03 15:55:33 Epoch 37: Loss: -615.1341552734375: Cost: 42.377845764160156


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.39s/it]
batch calculation at epoch 38: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 38: 1it [00:05,  5.38s/it]

grad_global_norm = 106.43025970458984, clipped_norm = 1.0
Epoch 38 (batch = 0): Loss: -156.4078369140625: Cost: 40.75578308105469


batch calculation at epoch 38: 8it [00:43,  5.39s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 38)


Rollout greedy execution: 100%|██████████| 10/10 [03:13<00:00, 19.31s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 38 candidate mean 39.59647750854492, baseline epoch 38 mean 39.125244140625, difference 0.4712333679199219


Rollout greedy execution: 100%|██████████| 10/10 [03:08<00:00, 18.87s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.56050109863281
2021-06-03 16:02:48 Epoch 38: Loss: -696.9246826171875: Cost: 42.594757080078125


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.31s/it]
batch calculation at epoch 39: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 39: 1it [00:04,  4.79s/it]

grad_global_norm = 571.9028930664062, clipped_norm = 1.0
Epoch 39 (batch = 0): Loss: -1991.9189453125: Cost: 46.96764373779297


batch calculation at epoch 39: 8it [00:39,  5.00s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 39)


Rollout greedy execution: 100%|██████████| 10/10 [03:09<00:00, 19.00s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 39 candidate mean 39.77711868286133, baseline epoch 39 mean 39.125244140625, difference 0.6518745422363281


Rollout greedy execution: 100%|██████████| 10/10 [03:08<00:00, 18.81s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.78620147705078
2021-06-03 16:09:56 Epoch 39: Loss: -1175.3975830078125: Cost: 44.124755859375


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.30s/it]
batch calculation at epoch 40: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 40: 1it [00:04,  4.87s/it]

grad_global_norm = 84.15608215332031, clipped_norm = 0.9999999403953552
Epoch 40 (batch = 0): Loss: -277.89068603515625: Cost: 41.26000213623047


batch calculation at epoch 40: 8it [00:38,  4.80s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 40)


Rollout greedy execution: 100%|██████████| 10/10 [03:09<00:00, 18.96s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 40 candidate mean 39.71867370605469, baseline epoch 40 mean 39.125244140625, difference 0.5934295654296875


Rollout greedy execution: 100%|██████████| 10/10 [03:08<00:00, 18.87s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.66889953613281
2021-06-03 16:17:02 Epoch 40: Loss: -229.39987182617188: Cost: 40.70597457885742


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.30s/it]
batch calculation at epoch 41: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 41: 1it [00:04,  4.81s/it]

grad_global_norm = 81.13422393798828, clipped_norm = 0.9999999403953552
Epoch 41 (batch = 0): Loss: -328.8546142578125: Cost: 41.3349494934082


batch calculation at epoch 41: 8it [00:38,  4.82s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 41)


Rollout greedy execution: 100%|██████████| 10/10 [03:09<00:00, 18.96s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 41 candidate mean 39.449161529541016, baseline epoch 41 mean 39.125244140625, difference 0.3239173889160156


Rollout greedy execution: 100%|██████████| 10/10 [03:08<00:00, 18.84s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.45309829711914
2021-06-03 16:24:08 Epoch 41: Loss: -271.88702392578125: Cost: 40.77781677246094


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.33s/it]
batch calculation at epoch 42: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 42: 1it [00:04,  4.83s/it]

grad_global_norm = 63.65000915527344, clipped_norm = 1.0
Epoch 42 (batch = 0): Loss: -146.19395446777344: Cost: 39.86786651611328


batch calculation at epoch 42: 8it [00:38,  4.84s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 42)


Rollout greedy execution: 100%|██████████| 10/10 [03:11<00:00, 19.19s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 42 candidate mean 39.38554763793945, baseline epoch 42 mean 39.125244140625, difference 0.2603034973144531


Rollout greedy execution: 100%|██████████| 10/10 [03:15<00:00, 19.56s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.36140060424805
2021-06-03 16:31:24 Epoch 42: Loss: -140.6053009033203: Cost: 40.061767578125


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]
batch calculation at epoch 43: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 43: 1it [00:05,  5.08s/it]

grad_global_norm = 78.86138153076172, clipped_norm = 0.9999999403953552
Epoch 43 (batch = 0): Loss: -204.0181884765625: Cost: 40.34501647949219


batch calculation at epoch 43: 8it [00:40,  5.03s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 43)


Rollout greedy execution: 100%|██████████| 10/10 [03:17<00:00, 19.71s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 43 candidate mean 39.355751037597656, baseline epoch 43 mean 39.125244140625, difference 0.23050689697265625


Rollout greedy execution: 100%|██████████| 10/10 [03:15<00:00, 19.53s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.339698791503906
2021-06-03 16:38:46 Epoch 43: Loss: -226.98379516601562: Cost: 40.4710578918457


Rollout greedy execution: 100%|██████████| 1/1 [00:10<00:00, 10.49s/it]
batch calculation at epoch 44: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 44: 1it [00:05,  5.10s/it]

grad_global_norm = 129.4531707763672, clipped_norm = 1.0
Epoch 44 (batch = 0): Loss: -222.16368103027344: Cost: 40.43467330932617


batch calculation at epoch 44: 8it [00:41,  5.15s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 44)


Rollout greedy execution: 100%|██████████| 10/10 [03:17<00:00, 19.72s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 44 candidate mean 39.34540557861328, baseline epoch 44 mean 39.125244140625, difference 0.22016143798828125


Rollout greedy execution: 100%|██████████| 10/10 [03:15<00:00, 19.57s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.302398681640625
2021-06-03 16:46:11 Epoch 44: Loss: -292.6229248046875: Cost: 40.73049545288086


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.76s/it]
batch calculation at epoch 45: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 45: 1it [00:05,  5.07s/it]

grad_global_norm = 59.29408645629883, clipped_norm = 0.9999999403953552
Epoch 45 (batch = 0): Loss: -471.0198974609375: Cost: 41.701637268066406


batch calculation at epoch 45: 8it [00:40,  5.08s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 45)


Rollout greedy execution: 100%|██████████| 10/10 [03:16<00:00, 19.70s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 45 candidate mean 39.34968185424805, baseline epoch 45 mean 39.125244140625, difference 0.22443771362304688


Rollout greedy execution: 100%|██████████| 10/10 [03:15<00:00, 19.53s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.32350158691406
2021-06-03 16:53:34 Epoch 45: Loss: -639.1943359375: Cost: 42.323974609375


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]
batch calculation at epoch 46: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 46: 1it [00:05,  5.11s/it]

grad_global_norm = 31.568187713623047, clipped_norm = 0.9999998807907104
Epoch 46 (batch = 0): Loss: -537.79150390625: Cost: 42.310394287109375


batch calculation at epoch 46: 8it [00:40,  5.07s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 46)


Rollout greedy execution: 100%|██████████| 10/10 [03:17<00:00, 19.71s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 46 candidate mean 39.33281707763672, baseline epoch 46 mean 39.125244140625, difference 0.20757293701171875


Rollout greedy execution: 100%|██████████| 10/10 [03:15<00:00, 19.51s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.30179977416992
2021-06-03 17:00:57 Epoch 46: Loss: -391.0664367675781: Cost: 41.34575271606445


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.78s/it]
batch calculation at epoch 47: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 47: 1it [00:05,  5.02s/it]

grad_global_norm = 87.31570434570312, clipped_norm = 1.0
Epoch 47 (batch = 0): Loss: -239.29527282714844: Cost: 40.479148864746094


batch calculation at epoch 47: 8it [00:40,  5.11s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 47)


Rollout greedy execution: 100%|██████████| 10/10 [03:16<00:00, 19.67s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 47 candidate mean 39.36014938354492, baseline epoch 47 mean 39.125244140625, difference 0.23490524291992188


Rollout greedy execution: 100%|██████████| 10/10 [03:16<00:00, 19.63s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.32550048828125
2021-06-03 17:08:20 Epoch 47: Loss: -164.25863647460938: Cost: 40.20552444458008


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.79s/it]
batch calculation at epoch 48: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 48: 1it [00:05,  5.12s/it]

grad_global_norm = 194.64630126953125, clipped_norm = 1.0
Epoch 48 (batch = 0): Loss: -415.3695373535156: Cost: 40.995758056640625


batch calculation at epoch 48: 8it [00:41,  5.24s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 48)


Rollout greedy execution: 100%|██████████| 10/10 [03:14<00:00, 19.44s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 48 candidate mean 39.42144012451172, baseline epoch 48 mean 39.125244140625, difference 0.29619598388671875


Rollout greedy execution: 100%|██████████| 10/10 [03:13<00:00, 19.33s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.39030075073242
2021-06-03 17:15:40 Epoch 48: Loss: -1077.5809326171875: Cost: 43.820011138916016


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.77s/it]
batch calculation at epoch 49: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 49: 1it [00:05,  5.65s/it]

grad_global_norm = 334.9604187011719, clipped_norm = 1.0
Epoch 49 (batch = 0): Loss: -1199.5677490234375: Cost: 44.337440490722656


batch calculation at epoch 49: 8it [00:40,  5.10s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 49)


Rollout greedy execution: 100%|██████████| 10/10 [03:14<00:00, 19.41s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 49 candidate mean 39.37784194946289, baseline epoch 49 mean 39.125244140625, difference 0.2525978088378906


Rollout greedy execution: 100%|██████████| 10/10 [03:13<00:00, 19.32s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.29629898071289
2021-06-03 17:22:58 Epoch 49: Loss: -422.49560546875: Cost: 41.22720718383789


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.91s/it]
batch calculation at epoch 50: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 50: 1it [00:05,  5.20s/it]

grad_global_norm = 85.3173828125, clipped_norm = 0.9999999403953552
Epoch 50 (batch = 0): Loss: -94.0256576538086: Cost: 39.89174270629883


batch calculation at epoch 50: 8it [00:40,  5.10s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 50)


Rollout greedy execution: 100%|██████████| 10/10 [03:12<00:00, 19.29s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 50 candidate mean 39.252498626708984, baseline epoch 50 mean 39.125244140625, difference 0.12725448608398438


Rollout greedy execution: 100%|██████████| 10/10 [03:05<00:00, 18.59s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.23720169067383
2021-06-03 17:30:08 Epoch 50: Loss: -411.424560546875: Cost: 41.1449089050293


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.56s/it]
batch calculation at epoch 51: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 51: 1it [00:04,  4.99s/it]

grad_global_norm = 37.93159103393555, clipped_norm = 1.0
Epoch 51 (batch = 0): Loss: -720.0003662109375: Cost: 42.375221252441406


batch calculation at epoch 51: 8it [00:38,  4.86s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 51)


Rollout greedy execution: 100%|██████████| 10/10 [03:11<00:00, 19.16s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 51 candidate mean 39.22783279418945, baseline epoch 51 mean 39.125244140625, difference 0.10258865356445312


Rollout greedy execution: 100%|██████████| 10/10 [03:09<00:00, 18.97s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.20800018310547
2021-06-03 17:37:18 Epoch 51: Loss: -1068.729248046875: Cost: 43.69554138183594


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.63s/it]
batch calculation at epoch 52: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 52: 1it [00:04,  4.92s/it]

grad_global_norm = 173.05697631835938, clipped_norm = 0.9999999403953552
Epoch 52 (batch = 0): Loss: -1512.238525390625: Cost: 45.869384765625


batch calculation at epoch 52: 8it [00:38,  4.80s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 52)


Rollout greedy execution: 100%|██████████| 10/10 [03:10<00:00, 19.09s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 52 candidate mean 39.21704864501953, baseline epoch 52 mean 39.125244140625, difference 0.09180450439453125


Rollout greedy execution: 100%|██████████| 10/10 [03:09<00:00, 18.95s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.252899169921875
2021-06-03 17:44:26 Epoch 52: Loss: -1665.4715576171875: Cost: 45.74980545043945


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.59s/it]
batch calculation at epoch 53: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 53: 1it [00:04,  4.99s/it]

grad_global_norm = 116.10391998291016, clipped_norm = 1.0
Epoch 53 (batch = 0): Loss: -1473.270263671875: Cost: 44.957984924316406


batch calculation at epoch 53: 8it [00:39,  4.91s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 53)


Rollout greedy execution: 100%|██████████| 10/10 [03:10<00:00, 19.07s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Epoch 53 candidate mean 39.22439193725586, baseline epoch 53 mean 39.125244140625, difference 0.09914779663085938


Rollout greedy execution: 100%|██████████| 10/10 [03:09<00:00, 18.98s/it]
Rollout greedy execution:   0%|          | 0/1 [00:00<?, ?it/s]

Validation score: 39.21689987182617
2021-06-03 17:51:36 Epoch 53: Loss: -881.759521484375: Cost: 42.85986328125


Rollout greedy execution: 100%|██████████| 1/1 [00:09<00:00,  9.56s/it]
batch calculation at epoch 54: 0it [00:00, ?it/s]

Current decode type: sampling


batch calculation at epoch 54: 1it [00:05,  5.02s/it]

grad_global_norm = 84.30272674560547, clipped_norm = 1.0
Epoch 54 (batch = 0): Loss: -536.678955078125: Cost: 41.851539611816406


batch calculation at epoch 54: 8it [00:39,  4.95s/it]
Rollout greedy execution:   0%|          | 0/10 [00:00<?, ?it/s]

Evaluating candidate model on baseline dataset (callback epoch = 54)
