In [13]:
import math as m
import numpy as np
import random as r
import matplotlib.pyplot as plt
import sys

In [14]:
import torch
from torch import nn
from torch import optim
from torch.optim.lr_scheduler import MultiStepLR

In [15]:
from nflows.flows.base import Flow
from nflows.distributions.uniform import BoxUniform
from nflows.transforms.base import CompositeTransform
from nflows.transforms.autoregressive import MaskedPiecewiseRationalQuadraticAutoregressiveTransform
from nflows.transforms.autoregressive import MaskedPiecewiseQuadraticAutoregressiveTransform
from nflows.transforms.permutations import ReversePermutation
from nflows.transforms.permutations import RandomPermutation
from nflows.transforms.splines.rational_quadratic import rational_quadratic_spline
from torch.utils.tensorboard import SummaryWriter

In [16]:
import subprocess
import time
import os
from copy import deepcopy
import math as m
import gc

## Tensorboard writer for loss logging

In [None]:
writer = SummaryWriter()

## GPU/CPU selection

In [None]:
device = torch.device("cuda:1")
#device = torch.device("cpu")

## Hyperparameters

In [None]:
n_RQS_knots = 10   # Number of knots in RQS transform
n_made_layers = 1  # Number of hidden layers in every made network
n_made_units = 100 # Number of units in every layer of the made network
n_flow_layers = 6  # Number of layers in the flow

batch_size = 1024
n_epochs = 800
adam_lr = 0.001     # Learning rate for the ADAM optimizer (default: 0.001)

n_train = int(1e6)  # Number of training events
n_test = int(1e5)   # Number of testing events
n_sample = int(1e5) # Number of samples for ess evaluation

## Load the training data

In [None]:
samples = np.genfromtxt("data/unweighted_samples.csv", delimiter=',')
if (n_train + n_test > samples.shape[0]):
    raise Exception("Not enough training data")

## Split to a train and test set

In [None]:
train_samples = torch.tensor(samples[:n_train], dtype=torch.float32, device=device)
test_samples = torch.tensor(samples[n_train:n_train+n_test], dtype=torch.float32, device=device)

del samples
gc.collect()

67

## Set up the flow

In [None]:
event_dim = train_samples.shape[1]
base_dist = BoxUniform(torch.zeros(event_dim), torch.ones(event_dim))

transforms = []
for _ in range(n_flow_layers):
    transforms.append(RandomPermutation(features=event_dim))
    transforms.append(MaskedPiecewiseRationalQuadraticAutoregressiveTransform(
        features=event_dim, 
        hidden_features=n_made_units,
        num_bins=n_RQS_knots,
        num_blocks=n_made_layers-1,
        tails="constrained",
        use_residual_blocks=False
    ))
transform = CompositeTransform(transforms)

flow = Flow(transform, base_dist).to(device)
optimizer = optim.Adam(flow.parameters(), lr=adam_lr)

scheduler = MultiStepLR(optimizer, milestones=[350, 425, 500, 575, 650, 725, 800], gamma=0.5)

## Training

In [None]:
data_size = train_samples.shape[0]
n_batches = m.ceil(data_size/batch_size)

data_size_validation = test_samples.shape[0]
n_batches_validate = m.ceil(data_size_validation/batch_size)

best_validation_loss = np.inf
best_ess = 0

for epoch in range(n_epochs):
    
    permutation = torch.randperm(data_size, device=device)    

    # Loop over batches
    cum_loss = 0
    for batch in range(n_batches):
        # Set up the batch
        batch_begin = batch*batch_size
        batch_end   = min( (batch+1)*batch_size, data_size-1 )
        indices = permutation[batch_begin:batch_end]
        samples_batch = train_samples[indices]
        
        # Take a step
        optimizer.zero_grad()
        loss = -(flow.log_prob(inputs=samples_batch)).mean()
        loss.backward()
        optimizer.step()

        # Compute cumulative loss
        cum_loss = (cum_loss*batch + loss.item())/(batch+1)

        if batch%25 == 0:
            print("epoch = ", epoch, "batch = ", batch, "/", n_batches, "loss = ", cum_loss)
    
    writer.add_scalar("Loss_train", cum_loss, epoch)
    scheduler.step()


    # ---------- Compute validation loss -----------
    validation_loss = 0
    for batch in range(n_batches_validate):
        batch_begin = batch*batch_size
        batch_end = min( (batch+1)*batch_size, data_size_validation-1 )
        samples_batch = test_samples[batch_begin:batch_end]
    
        with torch.no_grad():
            validation_loss = (validation_loss*batch - (flow.log_prob(samples_batch)).mean())/(batch+1)

    print("Validation loss = ", validation_loss.item())
    writer.add_scalar("Loss_test", validation_loss.item(), epoch)

    if validation_loss < best_validation_loss:
        torch.save(flow, "flow_model_unweighted_best_validation.pt")
        best_validation_loss = validation_loss

    
    # ---------- Compute effective sample size ----------
    # generate samples and evaluate llhs
    with torch.no_grad():
        samples = flow.sample(n_sample)
        llhs = flow.log_prob(samples)

    # Store files
    np.savetxt("/tmp/samples_file.csv", samples.cpu().numpy(), delimiter=',')
    np.savetxt("/tmp/llhs_file.csv", np.exp(llhs.cpu().numpy()), delimiter=',')

    # Run the evaluator
    cmd = os.path.abspath(os.getcwd())+'/ME_VEGAS/compute_metrics_from_likelihoods /tmp/samples_file.csv /tmp/llhs_file.csv'
    b = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).stdout
    lines = b.decode('ascii').split("\n")

    ess = float(lines[2].split(' ')[-1])
    
    print("Effective sample size = ", ess)
    writer.add_scalar("Effective_sample_size", ess, epoch)

    if ess > best_ess:
        torch.save(flow, "flow_model_unweighted_best_ess.pt")
        best_ess = ess
        
torch.save(flow, "flow_model_unweighted_final.pt")

epoch =  0 batch =  0 / 977 loss =  0.36800092458724976
epoch =  0 batch =  25 / 977 loss =  -4.81019204797653
epoch =  0 batch =  50 / 977 loss =  -7.429467955932896
epoch =  0 batch =  75 / 977 loss =  -9.656738750048373
epoch =  0 batch =  100 / 977 loss =  -11.553332369958998
epoch =  0 batch =  125 / 977 loss =  -13.018490877298134
epoch =  0 batch =  150 / 977 loss =  -14.256637354461562
epoch =  0 batch =  175 / 977 loss =  -15.289999546567826
epoch =  0 batch =  200 / 977 loss =  -16.13614581169477
epoch =  0 batch =  225 / 977 loss =  -16.82193415717478
epoch =  0 batch =  250 / 977 loss =  -17.384208729958168
epoch =  0 batch =  275 / 977 loss =  -17.858309729753646
epoch =  0 batch =  300 / 977 loss =  -18.262923184769907
epoch =  0 batch =  325 / 977 loss =  -18.609695450248548
epoch =  0 batch =  350 / 977 loss =  -18.9011194314967
epoch =  0 batch =  375 / 977 loss =  -19.167159768772585
epoch =  0 batch =  400 / 977 loss =  -19.396337840585357
epoch =  0 batch =  425 / 9

epoch =  3 batch =  475 / 977 loss =  -23.405365010269556
epoch =  3 batch =  500 / 977 loss =  -23.407192241645866
epoch =  3 batch =  525 / 977 loss =  -23.407611176088288
epoch =  3 batch =  550 / 977 loss =  -23.409256749923355
epoch =  3 batch =  575 / 977 loss =  -23.409335338407104
epoch =  3 batch =  600 / 977 loss =  -23.409225086205815
epoch =  3 batch =  625 / 977 loss =  -23.409449327487163
epoch =  3 batch =  650 / 977 loss =  -23.4092820251043
epoch =  3 batch =  675 / 977 loss =  -23.407976731746164
epoch =  3 batch =  700 / 977 loss =  -23.409137366671715
epoch =  3 batch =  725 / 977 loss =  -23.40938885743954
epoch =  3 batch =  750 / 977 loss =  -23.410999828902455
epoch =  3 batch =  775 / 977 loss =  -23.411625835084433
epoch =  3 batch =  800 / 977 loss =  -23.412218939201374
epoch =  3 batch =  825 / 977 loss =  -23.409809632105063
epoch =  3 batch =  850 / 977 loss =  -23.411027950909958
epoch =  3 batch =  875 / 977 loss =  -23.411486893484046
epoch =  3 batch 

epoch =  6 batch =  950 / 977 loss =  -23.504544654228464
epoch =  6 batch =  975 / 977 loss =  -23.503706087831596
Validation loss =  -23.51233673095703
Effective sample size =  0.385979
epoch =  7 batch =  0 / 977 loss =  -23.685314178466797
epoch =  7 batch =  25 / 977 loss =  -23.513135029719425
epoch =  7 batch =  50 / 977 loss =  -23.506691053801887
epoch =  7 batch =  75 / 977 loss =  -23.50863308655588
epoch =  7 batch =  100 / 977 loss =  -23.51628041031337
epoch =  7 batch =  125 / 977 loss =  -23.51639723399329
epoch =  7 batch =  150 / 977 loss =  -23.51824761700157
epoch =  7 batch =  175 / 977 loss =  -23.517819957299672
epoch =  7 batch =  200 / 977 loss =  -23.51580271554824
epoch =  7 batch =  225 / 977 loss =  -23.512810226035334
epoch =  7 batch =  250 / 977 loss =  -23.508142805669415
epoch =  7 batch =  275 / 977 loss =  -23.507923540861714
epoch =  7 batch =  300 / 977 loss =  -23.509312195635324
epoch =  7 batch =  325 / 977 loss =  -23.513172974615753
epoch =  7

epoch =  10 batch =  400 / 977 loss =  -23.550535073601402
epoch =  10 batch =  425 / 977 loss =  -23.551118613408768
epoch =  10 batch =  450 / 977 loss =  -23.550273463889926
epoch =  10 batch =  475 / 977 loss =  -23.551137996320968
epoch =  10 batch =  500 / 977 loss =  -23.55113951174799
epoch =  10 batch =  525 / 977 loss =  -23.551414156141394
epoch =  10 batch =  550 / 977 loss =  -23.55144124455115
epoch =  10 batch =  575 / 977 loss =  -23.551177521546695
epoch =  10 batch =  600 / 977 loss =  -23.55071445471437
epoch =  10 batch =  625 / 977 loss =  -23.550067575594877
epoch =  10 batch =  650 / 977 loss =  -23.5494300238731
epoch =  10 batch =  675 / 977 loss =  -23.549393989630723
epoch =  10 batch =  700 / 977 loss =  -23.549039522353347
epoch =  10 batch =  725 / 977 loss =  -23.548361660035212
epoch =  10 batch =  750 / 977 loss =  -23.54870398987469
epoch =  10 batch =  775 / 977 loss =  -23.548067916299903
epoch =  10 batch =  800 / 977 loss =  -23.547802126214172
epo

epoch =  13 batch =  825 / 977 loss =  -23.565065342346642
epoch =  13 batch =  850 / 977 loss =  -23.565927187228183
epoch =  13 batch =  875 / 977 loss =  -23.566437261834
epoch =  13 batch =  900 / 977 loss =  -23.567183724254146
epoch =  13 batch =  925 / 977 loss =  -23.567329503547562
epoch =  13 batch =  950 / 977 loss =  -23.566993147792886
epoch =  13 batch =  975 / 977 loss =  -23.566705652924842
Validation loss =  -23.55713653564453
Effective sample size =  0.43016
epoch =  14 batch =  0 / 977 loss =  -23.453895568847656
epoch =  14 batch =  25 / 977 loss =  -23.61379205263578
epoch =  14 batch =  50 / 977 loss =  -23.590497708788106
epoch =  14 batch =  75 / 977 loss =  -23.59002369328549
epoch =  14 batch =  100 / 977 loss =  -23.580705057276358
epoch =  14 batch =  125 / 977 loss =  -23.587278683980305
epoch =  14 batch =  150 / 977 loss =  -23.581838923574285
epoch =  14 batch =  175 / 977 loss =  -23.582588997754183
epoch =  14 batch =  200 / 977 loss =  -23.57931337309

epoch =  17 batch =  225 / 977 loss =  -23.860578891450317
epoch =  17 batch =  250 / 977 loss =  -23.86114612138603
epoch =  17 batch =  275 / 977 loss =  -23.857850669086837
epoch =  17 batch =  300 / 977 loss =  -23.854412560447106
epoch =  17 batch =  325 / 977 loss =  -23.856879790136414
epoch =  17 batch =  350 / 977 loss =  -23.853758684250703
epoch =  17 batch =  375 / 977 loss =  -23.855858285376367
epoch =  17 batch =  400 / 977 loss =  -23.858075753114466
epoch =  17 batch =  425 / 977 loss =  -23.8592807608591
epoch =  17 batch =  450 / 977 loss =  -23.85954107043484
epoch =  17 batch =  475 / 977 loss =  -23.860495330906716
epoch =  17 batch =  500 / 977 loss =  -23.86207898457845
epoch =  17 batch =  525 / 977 loss =  -23.861188304741574
epoch =  17 batch =  550 / 977 loss =  -23.860505462341862
epoch =  17 batch =  575 / 977 loss =  -23.86078833871418
epoch =  17 batch =  600 / 977 loss =  -23.85925169197374
epoch =  17 batch =  625 / 977 loss =  -23.859879627776227
epoc

epoch =  20 batch =  650 / 977 loss =  -23.88461821419853
epoch =  20 batch =  675 / 977 loss =  -23.88310180754353
epoch =  20 batch =  700 / 977 loss =  -23.881978898857884
epoch =  20 batch =  725 / 977 loss =  -23.880329518278785
epoch =  20 batch =  750 / 977 loss =  -23.880962397223637
epoch =  20 batch =  775 / 977 loss =  -23.881525354287074
epoch =  20 batch =  800 / 977 loss =  -23.880918328979355
epoch =  20 batch =  825 / 977 loss =  -23.882135885391065
epoch =  20 batch =  850 / 977 loss =  -23.881828128801263
epoch =  20 batch =  875 / 977 loss =  -23.88119542544292
epoch =  20 batch =  900 / 977 loss =  -23.880714251383836
epoch =  20 batch =  925 / 977 loss =  -23.882014941963224
epoch =  20 batch =  950 / 977 loss =  -23.883007823731507
epoch =  20 batch =  975 / 977 loss =  -23.884188167384426
Validation loss =  -23.885509490966797
Effective sample size =  0.719049
epoch =  21 batch =  0 / 977 loss =  -23.969688415527344
epoch =  21 batch =  25 / 977 loss =  -23.92277

epoch =  24 batch =  25 / 977 loss =  -23.903705156766453
epoch =  24 batch =  50 / 977 loss =  -23.902322619569066
epoch =  24 batch =  75 / 977 loss =  -23.911163505754974
epoch =  24 batch =  100 / 977 loss =  -23.918958172939792
epoch =  24 batch =  125 / 977 loss =  -23.91791826581198
epoch =  24 batch =  150 / 977 loss =  -23.914459796930778
epoch =  24 batch =  175 / 977 loss =  -23.907687393101778
epoch =  24 batch =  200 / 977 loss =  -23.913132672286153
epoch =  24 batch =  225 / 977 loss =  -23.913526425319436
epoch =  24 batch =  250 / 977 loss =  -23.91158518848191
epoch =  24 batch =  275 / 977 loss =  -23.912412332451865
epoch =  24 batch =  300 / 977 loss =  -23.911804623778085
epoch =  24 batch =  325 / 977 loss =  -23.912879329517583
epoch =  24 batch =  350 / 977 loss =  -23.91142189808382
epoch =  24 batch =  375 / 977 loss =  -23.91208865287457
epoch =  24 batch =  400 / 977 loss =  -23.908833415728267
epoch =  24 batch =  425 / 977 loss =  -23.9048707048658
epoch 

epoch =  27 batch =  450 / 977 loss =  -23.925358808225653
epoch =  27 batch =  475 / 977 loss =  -23.925187090865712
epoch =  27 batch =  500 / 977 loss =  -23.926020144464495
epoch =  27 batch =  525 / 977 loss =  -23.92392105929299
epoch =  27 batch =  550 / 977 loss =  -23.92456138458529
epoch =  27 batch =  575 / 977 loss =  -23.923456473482975
epoch =  27 batch =  600 / 977 loss =  -23.923204190322444
epoch =  27 batch =  625 / 977 loss =  -23.921604385010344
epoch =  27 batch =  650 / 977 loss =  -23.921587151499562
epoch =  27 batch =  675 / 977 loss =  -23.922138645804132
epoch =  27 batch =  700 / 977 loss =  -23.923427407649715
epoch =  27 batch =  725 / 977 loss =  -23.92285284194736
epoch =  27 batch =  750 / 977 loss =  -23.922273064421592
epoch =  27 batch =  775 / 977 loss =  -23.923051789863823
epoch =  27 batch =  800 / 977 loss =  -23.92193732815289
epoch =  27 batch =  825 / 977 loss =  -23.922028026626993
epoch =  27 batch =  850 / 977 loss =  -23.92288926121211
ep

epoch =  30 batch =  875 / 977 loss =  -23.932025839748967
epoch =  30 batch =  900 / 977 loss =  -23.930693398834443
epoch =  30 batch =  925 / 977 loss =  -23.930228179783313
epoch =  30 batch =  950 / 977 loss =  -23.929533227385008
epoch =  30 batch =  975 / 977 loss =  -23.929185521407184
Validation loss =  -23.918058395385742
Effective sample size =  0.759005
epoch =  31 batch =  0 / 977 loss =  -23.884353637695312
epoch =  31 batch =  25 / 977 loss =  -23.961581817040077
epoch =  31 batch =  50 / 977 loss =  -23.96816852045994
epoch =  31 batch =  75 / 977 loss =  -23.964328991739375
epoch =  31 batch =  100 / 977 loss =  -23.95488178139866
epoch =  31 batch =  125 / 977 loss =  -23.952315981425937
epoch =  31 batch =  150 / 977 loss =  -23.94683562525061
epoch =  31 batch =  175 / 977 loss =  -23.943870772014964
epoch =  31 batch =  200 / 977 loss =  -23.941623241747198
epoch =  31 batch =  225 / 977 loss =  -23.939665414590753
epoch =  31 batch =  250 / 977 loss =  -23.9468305

epoch =  34 batch =  275 / 977 loss =  -23.931210061778188
epoch =  34 batch =  300 / 977 loss =  -23.93400294281715
epoch =  34 batch =  325 / 977 loss =  -23.932342845238047
epoch =  34 batch =  350 / 977 loss =  -23.935750591788867
epoch =  34 batch =  375 / 977 loss =  -23.93607470837044
epoch =  34 batch =  400 / 977 loss =  -23.932223479349407
epoch =  34 batch =  425 / 977 loss =  -23.932035849127963
epoch =  34 batch =  450 / 977 loss =  -23.932562381888168
epoch =  34 batch =  475 / 977 loss =  -23.933793240234625
epoch =  34 batch =  500 / 977 loss =  -23.93366140186667
epoch =  34 batch =  525 / 977 loss =  -23.934006915799554
epoch =  34 batch =  550 / 977 loss =  -23.93658091757994
epoch =  34 batch =  575 / 977 loss =  -23.936482300360993
epoch =  34 batch =  600 / 977 loss =  -23.935877471517603
epoch =  34 batch =  625 / 977 loss =  -23.937553844512838
epoch =  34 batch =  650 / 977 loss =  -23.936974277877212
epoch =  34 batch =  675 / 977 loss =  -23.9366358892452
epo

epoch =  37 batch =  700 / 977 loss =  -23.941434117424674
epoch =  37 batch =  725 / 977 loss =  -23.940284516200546
epoch =  37 batch =  750 / 977 loss =  -23.94086045622032
epoch =  37 batch =  775 / 977 loss =  -23.943404244393417
epoch =  37 batch =  800 / 977 loss =  -23.944197984521605
epoch =  37 batch =  825 / 977 loss =  -23.944488222893444
epoch =  37 batch =  850 / 977 loss =  -23.944948714152066
epoch =  37 batch =  875 / 977 loss =  -23.94548828743364
epoch =  37 batch =  900 / 977 loss =  -23.94511677131272
epoch =  37 batch =  925 / 977 loss =  -23.94462626850631
epoch =  37 batch =  950 / 977 loss =  -23.943524943290573
epoch =  37 batch =  975 / 977 loss =  -23.94365405645527
Validation loss =  -23.933000564575195
Effective sample size =  0.764826
epoch =  38 batch =  0 / 977 loss =  -23.783916473388672
epoch =  38 batch =  25 / 977 loss =  -23.902191455547626
epoch =  38 batch =  50 / 977 loss =  -23.920265571743833
epoch =  38 batch =  75 / 977 loss =  -23.929532578

epoch =  41 batch =  100 / 977 loss =  -23.94638501535548
epoch =  41 batch =  125 / 977 loss =  -23.9464006726704
epoch =  41 batch =  150 / 977 loss =  -23.948021326633476
epoch =  41 batch =  175 / 977 loss =  -23.951776612888672
epoch =  41 batch =  200 / 977 loss =  -23.952512949853386
epoch =  41 batch =  225 / 977 loss =  -23.947873444683776
epoch =  41 batch =  250 / 977 loss =  -23.95209260765774
epoch =  41 batch =  275 / 977 loss =  -23.95263537116672
epoch =  41 batch =  300 / 977 loss =  -23.951093470931447
epoch =  41 batch =  325 / 977 loss =  -23.951027238295847
epoch =  41 batch =  350 / 977 loss =  -23.947570594287665
epoch =  41 batch =  375 / 977 loss =  -23.942669107558878
epoch =  41 batch =  400 / 977 loss =  -23.94454022357589
epoch =  41 batch =  425 / 977 loss =  -23.94665807177763
epoch =  41 batch =  450 / 977 loss =  -23.947326719364412
epoch =  41 batch =  475 / 977 loss =  -23.94704618774542
epoch =  41 batch =  500 / 977 loss =  -23.947883609764116
epoch

epoch =  44 batch =  525 / 977 loss =  -23.95681141537857
epoch =  44 batch =  550 / 977 loss =  -23.95659574103657
epoch =  44 batch =  575 / 977 loss =  -23.955755031771112
epoch =  44 batch =  600 / 977 loss =  -23.95695201093065
epoch =  44 batch =  625 / 977 loss =  -23.95749752361551
epoch =  44 batch =  650 / 977 loss =  -23.956333233647243
epoch =  44 batch =  675 / 977 loss =  -23.95555223938979
epoch =  44 batch =  700 / 977 loss =  -23.955034002938024
epoch =  44 batch =  725 / 977 loss =  -23.954775762952043
epoch =  44 batch =  750 / 977 loss =  -23.954495233162405
epoch =  44 batch =  775 / 977 loss =  -23.953551749593185
epoch =  44 batch =  800 / 977 loss =  -23.9537999204333
epoch =  44 batch =  825 / 977 loss =  -23.95329686227201
epoch =  44 batch =  850 / 977 loss =  -23.953295395040886
epoch =  44 batch =  875 / 977 loss =  -23.95535115237648
epoch =  44 batch =  900 / 977 loss =  -23.955320823999656
epoch =  44 batch =  925 / 977 loss =  -23.95414493511355
epoch =

epoch =  47 batch =  950 / 977 loss =  -23.955632924782858
epoch =  47 batch =  975 / 977 loss =  -23.955902541270014
Validation loss =  -23.954763412475586
Effective sample size =  0.805978
epoch =  48 batch =  0 / 977 loss =  -24.11370849609375
epoch =  48 batch =  25 / 977 loss =  -23.94150000352126
epoch =  48 batch =  50 / 977 loss =  -23.947419933244294
epoch =  48 batch =  75 / 977 loss =  -23.942137793490765
epoch =  48 batch =  100 / 977 loss =  -23.940735996359646
epoch =  48 batch =  125 / 977 loss =  -23.93667953733414
epoch =  48 batch =  150 / 977 loss =  -23.940624274954892
epoch =  48 batch =  175 / 977 loss =  -23.940305243838917
epoch =  48 batch =  200 / 977 loss =  -23.94597885739151
epoch =  48 batch =  225 / 977 loss =  -23.946930125751326
epoch =  48 batch =  250 / 977 loss =  -23.945632547021386
epoch =  48 batch =  275 / 977 loss =  -23.944731422092605
epoch =  48 batch =  300 / 977 loss =  -23.950382315043207
epoch =  48 batch =  325 / 977 loss =  -23.95227135

epoch =  51 batch =  350 / 977 loss =  -23.952477474158307
epoch =  51 batch =  375 / 977 loss =  -23.953137702130256
epoch =  51 batch =  400 / 977 loss =  -23.954346402327616
epoch =  51 batch =  425 / 977 loss =  -23.954535367903016
epoch =  51 batch =  450 / 977 loss =  -23.955709749209113
epoch =  51 batch =  475 / 977 loss =  -23.95439548652713
epoch =  51 batch =  500 / 977 loss =  -23.95350631744324
epoch =  51 batch =  525 / 977 loss =  -23.95354516061993
epoch =  51 batch =  550 / 977 loss =  -23.953580028997795
epoch =  51 batch =  575 / 977 loss =  -23.954214155673977
epoch =  51 batch =  600 / 977 loss =  -23.955560528696463
epoch =  51 batch =  625 / 977 loss =  -23.956457573765746
epoch =  51 batch =  650 / 977 loss =  -23.955427081903537
epoch =  51 batch =  675 / 977 loss =  -23.956218279325036
epoch =  51 batch =  700 / 977 loss =  -23.95698218406861
epoch =  51 batch =  725 / 977 loss =  -23.956312106003764
epoch =  51 batch =  750 / 977 loss =  -23.956191518811497
e

epoch =  54 batch =  775 / 977 loss =  -23.965213913278486
epoch =  54 batch =  800 / 977 loss =  -23.965697418289093
epoch =  54 batch =  825 / 977 loss =  -23.964832160432465
epoch =  54 batch =  850 / 977 loss =  -23.96452185634161
epoch =  54 batch =  875 / 977 loss =  -23.964617056389383
epoch =  54 batch =  900 / 977 loss =  -23.963871476388274
epoch =  54 batch =  925 / 977 loss =  -23.964594672102137
epoch =  54 batch =  950 / 977 loss =  -23.964473672219007
epoch =  54 batch =  975 / 977 loss =  -23.963839913977957
Validation loss =  -23.94281578063965
Effective sample size =  0.78692
epoch =  55 batch =  0 / 977 loss =  -24.260353088378906
epoch =  55 batch =  25 / 977 loss =  -23.94321052844708
epoch =  55 batch =  50 / 977 loss =  -23.959979524799422
epoch =  55 batch =  75 / 977 loss =  -23.961296608573516
epoch =  55 batch =  100 / 977 loss =  -23.978783069270662
epoch =  55 batch =  125 / 977 loss =  -23.981674784705746
epoch =  55 batch =  150 / 977 loss =  -23.97274053

epoch =  58 batch =  175 / 977 loss =  -23.959293668920335
epoch =  58 batch =  200 / 977 loss =  -23.961003175422317
epoch =  58 batch =  225 / 977 loss =  -23.962842553062767
epoch =  58 batch =  250 / 977 loss =  -23.962996532242606
epoch =  58 batch =  275 / 977 loss =  -23.96612172195876
epoch =  58 batch =  300 / 977 loss =  -23.965878350394107
epoch =  58 batch =  325 / 977 loss =  -23.968353142767597
epoch =  58 batch =  350 / 977 loss =  -23.968164020114475
epoch =  58 batch =  375 / 977 loss =  -23.970663278660876
epoch =  58 batch =  400 / 977 loss =  -23.970577406467047
epoch =  58 batch =  425 / 977 loss =  -23.970992258456953
epoch =  58 batch =  450 / 977 loss =  -23.969676383583085
epoch =  58 batch =  475 / 977 loss =  -23.965777076592968
epoch =  58 batch =  500 / 977 loss =  -23.965518155735648
epoch =  58 batch =  525 / 977 loss =  -23.967437435918892
epoch =  58 batch =  550 / 977 loss =  -23.96673893452557
epoch =  58 batch =  575 / 977 loss =  -23.96612648500338


epoch =  61 batch =  600 / 977 loss =  -23.969341208256424
epoch =  61 batch =  625 / 977 loss =  -23.97023339926626
epoch =  61 batch =  650 / 977 loss =  -23.970022479876228
epoch =  61 batch =  675 / 977 loss =  -23.970824805942524
epoch =  61 batch =  700 / 977 loss =  -23.971786580650356
epoch =  61 batch =  725 / 977 loss =  -23.972425739298853
epoch =  61 batch =  750 / 977 loss =  -23.972664951167012
epoch =  61 batch =  775 / 977 loss =  -23.971364235140626
epoch =  61 batch =  800 / 977 loss =  -23.97040966238721
epoch =  61 batch =  825 / 977 loss =  -23.970011221583196
epoch =  61 batch =  850 / 977 loss =  -23.97031303970571
epoch =  61 batch =  875 / 977 loss =  -23.970238415617935
epoch =  61 batch =  900 / 977 loss =  -23.9695705176723
epoch =  61 batch =  925 / 977 loss =  -23.96947920245178
epoch =  61 batch =  950 / 977 loss =  -23.967715551173786
epoch =  61 batch =  975 / 977 loss =  -23.96823950478289
Validation loss =  -23.94127082824707
Effective sample size =  

epoch =  65 batch =  25 / 977 loss =  -24.001794521625225
epoch =  65 batch =  50 / 977 loss =  -23.959871740902173
epoch =  65 batch =  75 / 977 loss =  -23.969297785508004
epoch =  65 batch =  100 / 977 loss =  -23.973459054928014
epoch =  65 batch =  125 / 977 loss =  -23.984699900188144
epoch =  65 batch =  150 / 977 loss =  -23.981783759515015
epoch =  65 batch =  175 / 977 loss =  -23.98183146390048
epoch =  65 batch =  200 / 977 loss =  -23.986548978890944
epoch =  65 batch =  225 / 977 loss =  -23.98853448850919
epoch =  65 batch =  250 / 977 loss =  -23.98493636841793
epoch =  65 batch =  275 / 977 loss =  -23.98491824191549
epoch =  65 batch =  300 / 977 loss =  -23.98264971127938
epoch =  65 batch =  325 / 977 loss =  -23.978360000563544
epoch =  65 batch =  350 / 977 loss =  -23.977880439866983
epoch =  65 batch =  375 / 977 loss =  -23.976111569303146
epoch =  65 batch =  400 / 977 loss =  -23.9774179625095
epoch =  65 batch =  425 / 977 loss =  -23.975967644525806
epoch =

epoch =  68 batch =  450 / 977 loss =  -23.96424235155735
epoch =  68 batch =  475 / 977 loss =  -23.96610844636163
epoch =  68 batch =  500 / 977 loss =  -23.966503234680534
epoch =  68 batch =  525 / 977 loss =  -23.96752587380064
epoch =  68 batch =  550 / 977 loss =  -23.969098044393714
epoch =  68 batch =  575 / 977 loss =  -23.96876058975856
epoch =  68 batch =  600 / 977 loss =  -23.969297564565252
epoch =  68 batch =  625 / 977 loss =  -23.97011524419817
epoch =  68 batch =  650 / 977 loss =  -23.97004639644596
epoch =  68 batch =  675 / 977 loss =  -23.969440127265546
epoch =  68 batch =  700 / 977 loss =  -23.96861012413909
epoch =  68 batch =  725 / 977 loss =  -23.970118971895605
epoch =  68 batch =  750 / 977 loss =  -23.969839710687694
epoch =  68 batch =  775 / 977 loss =  -23.97028358203851
epoch =  68 batch =  800 / 977 loss =  -23.970996299486504
epoch =  68 batch =  825 / 977 loss =  -23.970929755425647
epoch =  68 batch =  850 / 977 loss =  -23.971186166084212
epoch

epoch =  71 batch =  875 / 977 loss =  -23.974195547844175
epoch =  71 batch =  900 / 977 loss =  -23.973773939363436
epoch =  71 batch =  925 / 977 loss =  -23.973676794806
epoch =  71 batch =  950 / 977 loss =  -23.97370674783123
epoch =  71 batch =  975 / 977 loss =  -23.974005824229756
Validation loss =  -23.93160057067871
Effective sample size =  0.764888
epoch =  72 batch =  0 / 977 loss =  -23.84211540222168
epoch =  72 batch =  25 / 977 loss =  -23.95434100811298
epoch =  72 batch =  50 / 977 loss =  -23.951436173682115
epoch =  72 batch =  75 / 977 loss =  -23.965237567299283
epoch =  72 batch =  100 / 977 loss =  -23.972752958241074
epoch =  72 batch =  125 / 977 loss =  -23.982853087167893
epoch =  72 batch =  150 / 977 loss =  -23.980375176233956
epoch =  72 batch =  175 / 977 loss =  -23.981629198247735
epoch =  72 batch =  200 / 977 loss =  -23.97923686848351
epoch =  72 batch =  225 / 977 loss =  -23.97867894805638
epoch =  72 batch =  250 / 977 loss =  -23.9805116767427

epoch =  75 batch =  275 / 977 loss =  -23.985006982001707
epoch =  75 batch =  300 / 977 loss =  -23.984202597228386
epoch =  75 batch =  325 / 977 loss =  -23.98045274231331
epoch =  75 batch =  350 / 977 loss =  -23.979233065222065
epoch =  75 batch =  375 / 977 loss =  -23.97923387872412
epoch =  75 batch =  400 / 977 loss =  -23.981100505724214
epoch =  75 batch =  425 / 977 loss =  -23.980821018487635
epoch =  75 batch =  450 / 977 loss =  -23.980476354019604
epoch =  75 batch =  475 / 977 loss =  -23.98167482744746
epoch =  75 batch =  500 / 977 loss =  -23.98344927728771
epoch =  75 batch =  525 / 977 loss =  -23.981903116059392
epoch =  75 batch =  550 / 977 loss =  -23.980035124153922
epoch =  75 batch =  575 / 977 loss =  -23.97966370979945
epoch =  75 batch =  600 / 977 loss =  -23.97778433928276
epoch =  75 batch =  625 / 977 loss =  -23.978300685699764
epoch =  75 batch =  650 / 977 loss =  -23.979512128229345
epoch =  75 batch =  675 / 977 loss =  -23.977584291492004
epo

epoch =  78 batch =  700 / 977 loss =  -23.98124003988529
epoch =  78 batch =  725 / 977 loss =  -23.98107311416919
epoch =  78 batch =  750 / 977 loss =  -23.979537750528582
epoch =  78 batch =  775 / 977 loss =  -23.979756362659405
epoch =  78 batch =  800 / 977 loss =  -23.98063530368304
epoch =  78 batch =  825 / 977 loss =  -23.980591245194027
epoch =  78 batch =  850 / 977 loss =  -23.980651949603455
epoch =  78 batch =  875 / 977 loss =  -23.981965552726283
epoch =  78 batch =  900 / 977 loss =  -23.9817000208102
epoch =  78 batch =  925 / 977 loss =  -23.980507854766508
epoch =  78 batch =  950 / 977 loss =  -23.97986586537646
epoch =  78 batch =  975 / 977 loss =  -23.980332155696672
Validation loss =  -23.943500518798828
Effective sample size =  0.777562
epoch =  79 batch =  0 / 977 loss =  -23.936145782470703
epoch =  79 batch =  25 / 977 loss =  -23.954986278827374
epoch =  79 batch =  50 / 977 loss =  -23.979922612508137
epoch =  79 batch =  75 / 977 loss =  -23.9923884492

epoch =  82 batch =  100 / 977 loss =  -23.980388207010705
epoch =  82 batch =  125 / 977 loss =  -23.987586006285653
epoch =  82 batch =  150 / 977 loss =  -23.98407617783704
epoch =  82 batch =  175 / 977 loss =  -23.989328048445962
epoch =  82 batch =  200 / 977 loss =  -23.99417629526622
epoch =  82 batch =  225 / 977 loss =  -23.992050035864906
epoch =  82 batch =  250 / 977 loss =  -23.99082815314669
epoch =  82 batch =  275 / 977 loss =  -23.994496877642646
epoch =  82 batch =  300 / 977 loss =  -23.991344800423153
epoch =  82 batch =  325 / 977 loss =  -23.989222497296474
epoch =  82 batch =  350 / 977 loss =  -23.987586154557363
epoch =  82 batch =  375 / 977 loss =  -23.984690194434307
epoch =  82 batch =  400 / 977 loss =  -23.984440249397867
epoch =  82 batch =  425 / 977 loss =  -23.985243358522514
epoch =  82 batch =  450 / 977 loss =  -23.98509248450167
epoch =  82 batch =  475 / 977 loss =  -23.985659014277097
epoch =  82 batch =  500 / 977 loss =  -23.986242134414034
e

epoch =  85 batch =  525 / 977 loss =  -23.98514910795842
epoch =  85 batch =  550 / 977 loss =  -23.98409245278138
epoch =  85 batch =  575 / 977 loss =  -23.984693086809575
epoch =  85 batch =  600 / 977 loss =  -23.984873436850034
epoch =  85 batch =  625 / 977 loss =  -23.98536115713393
epoch =  85 batch =  650 / 977 loss =  -23.98677841163085
epoch =  85 batch =  675 / 977 loss =  -23.987158044555482
epoch =  85 batch =  700 / 977 loss =  -23.986999405603775
epoch =  85 batch =  725 / 977 loss =  -23.98708092870791
epoch =  85 batch =  750 / 977 loss =  -23.986514397530993
epoch =  85 batch =  775 / 977 loss =  -23.98682492787076
epoch =  85 batch =  800 / 977 loss =  -23.986325836657883
epoch =  85 batch =  825 / 977 loss =  -23.983927578960724
epoch =  85 batch =  850 / 977 loss =  -23.983506405535653
epoch =  85 batch =  875 / 977 loss =  -23.98300579367163
epoch =  85 batch =  900 / 977 loss =  -23.98230216294097
epoch =  85 batch =  925 / 977 loss =  -23.982801363226148
epoch

epoch =  88 batch =  950 / 977 loss =  -23.985243004077887
epoch =  88 batch =  975 / 977 loss =  -23.98432388266581
Validation loss =  -23.93516731262207
Effective sample size =  0.758699
epoch =  89 batch =  0 / 977 loss =  -23.974138259887695
epoch =  89 batch =  25 / 977 loss =  -24.01251983642578
epoch =  89 batch =  50 / 977 loss =  -23.992695901908128
epoch =  89 batch =  75 / 977 loss =  -23.986560269405967
epoch =  89 batch =  100 / 977 loss =  -23.983632134919116
epoch =  89 batch =  125 / 977 loss =  -23.975885966467477
epoch =  89 batch =  150 / 977 loss =  -23.976777133562717
epoch =  89 batch =  175 / 977 loss =  -23.982138265262947
epoch =  89 batch =  200 / 977 loss =  -23.989140686111067
epoch =  89 batch =  225 / 977 loss =  -23.987214451342552
epoch =  89 batch =  250 / 977 loss =  -23.986502142066495
epoch =  89 batch =  275 / 977 loss =  -23.988617827926852
epoch =  89 batch =  300 / 977 loss =  -23.989095136572757
epoch =  89 batch =  325 / 977 loss =  -23.9880238

epoch =  92 batch =  350 / 977 loss =  -23.980943522222358
epoch =  92 batch =  375 / 977 loss =  -23.98210364199699
epoch =  92 batch =  400 / 977 loss =  -23.983652828340215
epoch =  92 batch =  425 / 977 loss =  -23.98208247887696
epoch =  92 batch =  450 / 977 loss =  -23.982575257971646
epoch =  92 batch =  475 / 977 loss =  -23.983098991778714
epoch =  92 batch =  500 / 977 loss =  -23.98279990693052
epoch =  92 batch =  525 / 977 loss =  -23.981690076820747
epoch =  92 batch =  550 / 977 loss =  -23.9845581470081
epoch =  92 batch =  575 / 977 loss =  -23.986998819642597
epoch =  92 batch =  600 / 977 loss =  -23.98821812342487
epoch =  92 batch =  625 / 977 loss =  -23.99031436481415
epoch =  92 batch =  650 / 977 loss =  -23.990399593581806
epoch =  92 batch =  675 / 977 loss =  -23.989616230394713
epoch =  92 batch =  700 / 977 loss =  -23.988648946547123
epoch =  92 batch =  725 / 977 loss =  -23.98792493047792
epoch =  92 batch =  750 / 977 loss =  -23.988035362030303
epoch

epoch =  95 batch =  775 / 977 loss =  -23.98765774363094
epoch =  95 batch =  800 / 977 loss =  -23.988168561652053
epoch =  95 batch =  825 / 977 loss =  -23.98719629421649
epoch =  95 batch =  850 / 977 loss =  -23.98854537183893
epoch =  95 batch =  875 / 977 loss =  -23.988311395253213
epoch =  95 batch =  900 / 977 loss =  -23.98812193452451
epoch =  95 batch =  925 / 977 loss =  -23.987521626779372
epoch =  95 batch =  950 / 977 loss =  -23.987978980368236
epoch =  95 batch =  975 / 977 loss =  -23.987460224354844
Validation loss =  -23.95113182067871
Effective sample size =  0.792323
epoch =  96 batch =  0 / 977 loss =  -24.079275131225586
epoch =  96 batch =  25 / 977 loss =  -24.012210625868576
epoch =  96 batch =  50 / 977 loss =  -24.016770306755514
epoch =  96 batch =  75 / 977 loss =  -24.002339036841146
epoch =  96 batch =  100 / 977 loss =  -23.99251628158117
epoch =  96 batch =  125 / 977 loss =  -23.994377817426415
epoch =  96 batch =  150 / 977 loss =  -23.9889966952

epoch =  99 batch =  175 / 977 loss =  -23.974943464452576
epoch =  99 batch =  200 / 977 loss =  -23.977614255686902
epoch =  99 batch =  225 / 977 loss =  -23.986117801835057
epoch =  99 batch =  250 / 977 loss =  -23.98688754047531
epoch =  99 batch =  275 / 977 loss =  -23.985611901767015
epoch =  99 batch =  300 / 977 loss =  -23.986017746782778
epoch =  99 batch =  325 / 977 loss =  -23.98592015716927
epoch =  99 batch =  350 / 977 loss =  -23.985903226412248
epoch =  99 batch =  375 / 977 loss =  -23.987072117785182
epoch =  99 batch =  400 / 977 loss =  -23.98636845044066
epoch =  99 batch =  425 / 977 loss =  -23.98856428531413
epoch =  99 batch =  450 / 977 loss =  -23.98644731525835
epoch =  99 batch =  475 / 977 loss =  -23.98676683121368
epoch =  99 batch =  500 / 977 loss =  -23.986833454368114
epoch =  99 batch =  525 / 977 loss =  -23.986084996067515
epoch =  99 batch =  550 / 977 loss =  -23.985730993336634
epoch =  99 batch =  575 / 977 loss =  -23.98677651087441
epoc

epoch =  102 batch =  550 / 977 loss =  -23.993197608989284
epoch =  102 batch =  575 / 977 loss =  -23.993598613474095
epoch =  102 batch =  600 / 977 loss =  -23.992344000970274
epoch =  102 batch =  625 / 977 loss =  -23.993003479588904
epoch =  102 batch =  650 / 977 loss =  -23.99193510591709
epoch =  102 batch =  675 / 977 loss =  -23.99270539876272
epoch =  102 batch =  700 / 977 loss =  -23.99245812583412
epoch =  102 batch =  725 / 977 loss =  -23.993141024565894
epoch =  102 batch =  750 / 977 loss =  -23.992643234415475
epoch =  102 batch =  775 / 977 loss =  -23.99219071987978
epoch =  102 batch =  800 / 977 loss =  -23.990675679753334
epoch =  102 batch =  825 / 977 loss =  -23.9901263927432
epoch =  102 batch =  850 / 977 loss =  -23.98990646120244
epoch =  102 batch =  875 / 977 loss =  -23.988993895108297
epoch =  102 batch =  900 / 977 loss =  -23.98916922134247
epoch =  102 batch =  925 / 977 loss =  -23.989527129715
epoch =  102 batch =  950 / 977 loss =  -23.9890928

epoch =  105 batch =  925 / 977 loss =  -23.993426975884653
epoch =  105 batch =  950 / 977 loss =  -23.99168967398434
epoch =  105 batch =  975 / 977 loss =  -23.99134923004713
Validation loss =  -23.974735260009766
Effective sample size =  0.829061
epoch =  106 batch =  0 / 977 loss =  -23.845762252807617
epoch =  106 batch =  25 / 977 loss =  -23.960789900559647
epoch =  106 batch =  50 / 977 loss =  -23.98635793199726
epoch =  106 batch =  75 / 977 loss =  -23.983700651871537
epoch =  106 batch =  100 / 977 loss =  -23.987969559017973
epoch =  106 batch =  125 / 977 loss =  -23.982762578933958
epoch =  106 batch =  150 / 977 loss =  -23.98432759417603
epoch =  106 batch =  175 / 977 loss =  -23.990479577671397
epoch =  106 batch =  200 / 977 loss =  -23.988975724177575
epoch =  106 batch =  225 / 977 loss =  -23.990429776959715
epoch =  106 batch =  250 / 977 loss =  -23.989371371934137
epoch =  106 batch =  275 / 977 loss =  -23.989630167035088
epoch =  106 batch =  300 / 977 loss

epoch =  109 batch =  250 / 977 loss =  -23.983904667584536
epoch =  109 batch =  275 / 977 loss =  -23.987290555152338
epoch =  109 batch =  300 / 977 loss =  -23.99159873125957
epoch =  109 batch =  325 / 977 loss =  -23.99293612263686
epoch =  109 batch =  350 / 977 loss =  -23.9944313897027
epoch =  109 batch =  375 / 977 loss =  -23.994641623598472
epoch =  109 batch =  400 / 977 loss =  -23.995027813233648
epoch =  109 batch =  425 / 977 loss =  -23.994965177186785
epoch =  109 batch =  450 / 977 loss =  -23.99601718961797
epoch =  109 batch =  475 / 977 loss =  -23.99501425879343
epoch =  109 batch =  500 / 977 loss =  -23.996199027268954
epoch =  109 batch =  525 / 977 loss =  -23.994297596891577
epoch =  109 batch =  550 / 977 loss =  -23.995286654214468
epoch =  109 batch =  575 / 977 loss =  -23.994378801849162
epoch =  109 batch =  600 / 977 loss =  -23.992369942181128
epoch =  109 batch =  625 / 977 loss =  -23.992229248388128
epoch =  109 batch =  650 / 977 loss =  -23.99

epoch =  112 batch =  600 / 977 loss =  -23.991964416376963
epoch =  112 batch =  625 / 977 loss =  -23.991803144875433
epoch =  112 batch =  650 / 977 loss =  -23.99110865849322
epoch =  112 batch =  675 / 977 loss =  -23.9896144810513
epoch =  112 batch =  700 / 977 loss =  -23.990005999250872
epoch =  112 batch =  725 / 977 loss =  -23.98952920246388
epoch =  112 batch =  750 / 977 loss =  -23.98919224135886
epoch =  112 batch =  775 / 977 loss =  -23.99037540081851
epoch =  112 batch =  800 / 977 loss =  -23.990801386172414
epoch =  112 batch =  825 / 977 loss =  -23.990178740919365
epoch =  112 batch =  850 / 977 loss =  -23.991699090995752
epoch =  112 batch =  875 / 977 loss =  -23.99143317845315
epoch =  112 batch =  900 / 977 loss =  -23.991068197540393
epoch =  112 batch =  925 / 977 loss =  -23.992021605983933
epoch =  112 batch =  950 / 977 loss =  -23.99188194515578
epoch =  112 batch =  975 / 977 loss =  -23.99253454755565
Validation loss =  -23.97553253173828
Effective s

epoch =  115 batch =  975 / 977 loss =  -23.995341416265145
Validation loss =  -23.95069122314453
Effective sample size =  0.784301
epoch =  116 batch =  0 / 977 loss =  -23.970335006713867
epoch =  116 batch =  25 / 977 loss =  -23.970575626079853
epoch =  116 batch =  50 / 977 loss =  -23.960825826607497
epoch =  116 batch =  75 / 977 loss =  -23.964290518509713
epoch =  116 batch =  100 / 977 loss =  -23.96937494938917
epoch =  116 batch =  125 / 977 loss =  -23.971660190158424
epoch =  116 batch =  150 / 977 loss =  -23.973694618174576
epoch =  116 batch =  175 / 977 loss =  -23.983348586342558
epoch =  116 batch =  200 / 977 loss =  -23.99075151320121
epoch =  116 batch =  225 / 977 loss =  -23.985600614969712
epoch =  116 batch =  250 / 977 loss =  -23.98403292348186
epoch =  116 batch =  275 / 977 loss =  -23.98483332343724
epoch =  116 batch =  300 / 977 loss =  -23.987645361510626
epoch =  116 batch =  325 / 977 loss =  -23.989898254534957
epoch =  116 batch =  350 / 977 loss 

epoch =  119 batch =  300 / 977 loss =  -24.001800708200452
epoch =  119 batch =  325 / 977 loss =  -23.998718244166465
epoch =  119 batch =  350 / 977 loss =  -23.995871568337463
epoch =  119 batch =  375 / 977 loss =  -23.999449542228213
epoch =  119 batch =  400 / 977 loss =  -24.00398872261333
epoch =  119 batch =  425 / 977 loss =  -24.001633518738366
epoch =  119 batch =  450 / 977 loss =  -23.99749398918744
epoch =  119 batch =  475 / 977 loss =  -23.997341484582726
epoch =  119 batch =  500 / 977 loss =  -23.99818400232616
epoch =  119 batch =  525 / 977 loss =  -23.996983419353064
epoch =  119 batch =  550 / 977 loss =  -23.994896762384045
epoch =  119 batch =  575 / 977 loss =  -23.996643781661994
epoch =  119 batch =  600 / 977 loss =  -23.99742402252858
epoch =  119 batch =  625 / 977 loss =  -23.997591908366545
epoch =  119 batch =  650 / 977 loss =  -23.998216511833327
epoch =  119 batch =  675 / 977 loss =  -23.998027245673924
epoch =  119 batch =  700 / 977 loss =  -23.

epoch =  122 batch =  675 / 977 loss =  -24.00024793557162
epoch =  122 batch =  700 / 977 loss =  -23.99950989302827
epoch =  122 batch =  725 / 977 loss =  -23.999292373657227
epoch =  122 batch =  750 / 977 loss =  -23.99889206664064
epoch =  122 batch =  775 / 977 loss =  -23.997772039826383
epoch =  122 batch =  800 / 977 loss =  -23.99772673093722
epoch =  122 batch =  825 / 977 loss =  -23.997513551688943
epoch =  122 batch =  850 / 977 loss =  -23.99712760243937
epoch =  122 batch =  875 / 977 loss =  -23.997084449959672
epoch =  122 batch =  900 / 977 loss =  -23.997738007303084
epoch =  122 batch =  925 / 977 loss =  -23.996924439465253
epoch =  122 batch =  950 / 977 loss =  -23.99653305591218
epoch =  122 batch =  975 / 977 loss =  -23.997099512913188
Validation loss =  -23.966108322143555
Effective sample size =  0.812202
epoch =  123 batch =  0 / 977 loss =  -23.848726272583008
epoch =  123 batch =  25 / 977 loss =  -24.002923378577595
epoch =  123 batch =  50 / 977 loss 

epoch =  126 batch =  25 / 977 loss =  -23.99384043766902
epoch =  126 batch =  50 / 977 loss =  -23.987213882745483
epoch =  126 batch =  75 / 977 loss =  -23.98039842906751
epoch =  126 batch =  100 / 977 loss =  -23.982470616255647
epoch =  126 batch =  125 / 977 loss =  -23.98612502264597
epoch =  126 batch =  150 / 977 loss =  -23.98221929183858
epoch =  126 batch =  175 / 977 loss =  -23.982853250070033
epoch =  126 batch =  200 / 977 loss =  -23.985621770222963
epoch =  126 batch =  225 / 977 loss =  -23.987953548937757
epoch =  126 batch =  250 / 977 loss =  -23.98978638364023
epoch =  126 batch =  275 / 977 loss =  -23.993729867796947
epoch =  126 batch =  300 / 977 loss =  -23.994426258378624
epoch =  126 batch =  325 / 977 loss =  -23.995215532969816
epoch =  126 batch =  350 / 977 loss =  -23.997342014584415
epoch =  126 batch =  375 / 977 loss =  -23.998901174423512
epoch =  126 batch =  400 / 977 loss =  -24.000881551804362
epoch =  126 batch =  425 / 977 loss =  -24.0008

epoch =  129 batch =  400 / 977 loss =  -23.99916830562296
epoch =  129 batch =  425 / 977 loss =  -24.000713066315978
epoch =  129 batch =  450 / 977 loss =  -24.000489089018483
epoch =  129 batch =  475 / 977 loss =  -23.999494813069568
epoch =  129 batch =  500 / 977 loss =  -24.000278267317896
epoch =  129 batch =  525 / 977 loss =  -24.00031476691648
epoch =  129 batch =  550 / 977 loss =  -24.002187998887617
epoch =  129 batch =  575 / 977 loss =  -24.001487900813412
epoch =  129 batch =  600 / 977 loss =  -23.998685262365843
epoch =  129 batch =  625 / 977 loss =  -23.99929584405673
epoch =  129 batch =  650 / 977 loss =  -24.000169821415444
epoch =  129 batch =  675 / 977 loss =  -24.002041353981873
epoch =  129 batch =  700 / 977 loss =  -23.999898779919413
epoch =  129 batch =  725 / 977 loss =  -23.999100288412127
epoch =  129 batch =  750 / 977 loss =  -23.999067151594094
epoch =  129 batch =  775 / 977 loss =  -24.00004087280981
epoch =  129 batch =  800 / 977 loss =  -24.

epoch =  132 batch =  750 / 977 loss =  -24.001747017059124
epoch =  132 batch =  775 / 977 loss =  -24.002067664234907
epoch =  132 batch =  800 / 977 loss =  -24.002256324377548
epoch =  132 batch =  825 / 977 loss =  -24.0017999967709
epoch =  132 batch =  850 / 977 loss =  -24.002170697222304
epoch =  132 batch =  875 / 977 loss =  -24.001476083171966
epoch =  132 batch =  900 / 977 loss =  -24.000452668235518
epoch =  132 batch =  925 / 977 loss =  -23.99949070289892
epoch =  132 batch =  950 / 977 loss =  -23.999362359914368
epoch =  132 batch =  975 / 977 loss =  -23.99941282780444
Validation loss =  -23.958276748657227
Effective sample size =  0.802427
epoch =  133 batch =  0 / 977 loss =  -24.087879180908203
epoch =  133 batch =  25 / 977 loss =  -24.035055013803337
epoch =  133 batch =  50 / 977 loss =  -24.032099705116423
epoch =  133 batch =  75 / 977 loss =  -24.010946625157406
epoch =  133 batch =  100 / 977 loss =  -24.003668605691136
epoch =  133 batch =  125 / 977 loss

epoch =  136 batch =  75 / 977 loss =  -23.972178910907942
epoch =  136 batch =  100 / 977 loss =  -23.9784149320999
epoch =  136 batch =  125 / 977 loss =  -23.980061258588517
epoch =  136 batch =  150 / 977 loss =  -23.98316318941432
epoch =  136 batch =  175 / 977 loss =  -23.988198583776295
epoch =  136 batch =  200 / 977 loss =  -23.992083478329782
epoch =  136 batch =  225 / 977 loss =  -23.993752277002923
epoch =  136 batch =  250 / 977 loss =  -23.99331406673112
epoch =  136 batch =  275 / 977 loss =  -23.9950834011686
epoch =  136 batch =  300 / 977 loss =  -23.996673577647663
epoch =  136 batch =  325 / 977 loss =  -23.996476073937906
epoch =  136 batch =  350 / 977 loss =  -23.996143264987857
epoch =  136 batch =  375 / 977 loss =  -24.00013074976332
epoch =  136 batch =  400 / 977 loss =  -24.000395843810264
epoch =  136 batch =  425 / 977 loss =  -24.00124233317486
epoch =  136 batch =  450 / 977 loss =  -24.000127479400966
epoch =  136 batch =  475 / 977 loss =  -24.00203

epoch =  139 batch =  450 / 977 loss =  -23.999057190381233
epoch =  139 batch =  475 / 977 loss =  -24.00073992304441
epoch =  139 batch =  500 / 977 loss =  -24.002164578009506
epoch =  139 batch =  525 / 977 loss =  -24.000388772768662
epoch =  139 batch =  550 / 977 loss =  -23.99765638431057
epoch =  139 batch =  575 / 977 loss =  -23.99593289030922
epoch =  139 batch =  600 / 977 loss =  -23.99576925318966
epoch =  139 batch =  625 / 977 loss =  -23.997310522646178
epoch =  139 batch =  650 / 977 loss =  -23.997575185632183
epoch =  139 batch =  675 / 977 loss =  -23.99842545266687
epoch =  139 batch =  700 / 977 loss =  -23.998288512399967
epoch =  139 batch =  725 / 977 loss =  -23.998618853650463
epoch =  139 batch =  750 / 977 loss =  -23.998300994283817
epoch =  139 batch =  775 / 977 loss =  -23.997997927911495
epoch =  139 batch =  800 / 977 loss =  -23.99784920605529
epoch =  139 batch =  825 / 977 loss =  -23.997279709822898
epoch =  139 batch =  850 / 977 loss =  -23.99

epoch =  142 batch =  825 / 977 loss =  -23.999777706137
epoch =  142 batch =  850 / 977 loss =  -24.000795738677436
epoch =  142 batch =  875 / 977 loss =  -24.000106632981666
epoch =  142 batch =  900 / 977 loss =  -24.00089131184873
epoch =  142 batch =  925 / 977 loss =  -24.000169739609913
epoch =  142 batch =  950 / 977 loss =  -24.001003271397728
epoch =  142 batch =  975 / 977 loss =  -24.000824979094205
Validation loss =  -23.95599937438965
Effective sample size =  0.790002
epoch =  143 batch =  0 / 977 loss =  -24.079036712646484
epoch =  143 batch =  25 / 977 loss =  -23.9838537803063
epoch =  143 batch =  50 / 977 loss =  -23.993747748580635
epoch =  143 batch =  75 / 977 loss =  -23.990960422315094
epoch =  143 batch =  100 / 977 loss =  -23.99638621641858
epoch =  143 batch =  125 / 977 loss =  -23.995102988349068
epoch =  143 batch =  150 / 977 loss =  -23.998406568110383
epoch =  143 batch =  175 / 977 loss =  -23.99862346865915
epoch =  143 batch =  200 / 977 loss =  -

epoch =  146 batch =  150 / 977 loss =  -23.99034074442276
epoch =  146 batch =  175 / 977 loss =  -23.9843628840013
epoch =  146 batch =  200 / 977 loss =  -23.992734282764037
epoch =  146 batch =  225 / 977 loss =  -23.999156023548768
epoch =  146 batch =  250 / 977 loss =  -23.999600870200837
epoch =  146 batch =  275 / 977 loss =  -23.99836811120959
epoch =  146 batch =  300 / 977 loss =  -23.998749888220498
epoch =  146 batch =  325 / 977 loss =  -23.994440587751704
epoch =  146 batch =  350 / 977 loss =  -23.991081873575848
epoch =  146 batch =  375 / 977 loss =  -23.993811815343005
epoch =  146 batch =  400 / 977 loss =  -23.99489088843291
epoch =  146 batch =  425 / 977 loss =  -23.994806370265046
epoch =  146 batch =  450 / 977 loss =  -23.99371398790448
epoch =  146 batch =  475 / 977 loss =  -23.993816151338464
epoch =  146 batch =  500 / 977 loss =  -23.99373878357177
epoch =  146 batch =  525 / 977 loss =  -23.994488991711982
epoch =  146 batch =  550 / 977 loss =  -23.996

epoch =  149 batch =  525 / 977 loss =  -24.002441616566017
epoch =  149 batch =  550 / 977 loss =  -24.002817057005515
epoch =  149 batch =  575 / 977 loss =  -24.004216767019706
epoch =  149 batch =  600 / 977 loss =  -24.00388260014641
epoch =  149 batch =  625 / 977 loss =  -24.004058533202347
epoch =  149 batch =  650 / 977 loss =  -24.00476993906515
epoch =  149 batch =  675 / 977 loss =  -24.004522986665954
epoch =  149 batch =  700 / 977 loss =  -24.004419430176274
epoch =  149 batch =  725 / 977 loss =  -24.00453165805698
epoch =  149 batch =  750 / 977 loss =  -24.004381418545588
epoch =  149 batch =  775 / 977 loss =  -24.00512980677415
epoch =  149 batch =  800 / 977 loss =  -24.004458750083028
epoch =  149 batch =  825 / 977 loss =  -24.004808021804006
epoch =  149 batch =  850 / 977 loss =  -24.004118599986892
epoch =  149 batch =  875 / 977 loss =  -24.00267865559824
epoch =  149 batch =  900 / 977 loss =  -24.002370087076347
epoch =  149 batch =  925 / 977 loss =  -24.0

epoch =  152 batch =  875 / 977 loss =  -24.002665149566774
epoch =  152 batch =  900 / 977 loss =  -24.00363680789789
epoch =  152 batch =  925 / 977 loss =  -24.002476185495844
epoch =  152 batch =  950 / 977 loss =  -24.001652349558274
epoch =  152 batch =  975 / 977 loss =  -24.00174835861705
Validation loss =  -23.9559268951416
Effective sample size =  0.783719
epoch =  153 batch =  0 / 977 loss =  -24.1231689453125
epoch =  153 batch =  25 / 977 loss =  -23.983456758352425
epoch =  153 batch =  50 / 977 loss =  -23.990610047882672
epoch =  153 batch =  75 / 977 loss =  -23.989677404102522
epoch =  153 batch =  100 / 977 loss =  -23.987656055110513
epoch =  153 batch =  125 / 977 loss =  -23.985047628009127
epoch =  153 batch =  150 / 977 loss =  -23.981633786334104
epoch =  153 batch =  175 / 977 loss =  -23.98103962161324
epoch =  153 batch =  200 / 977 loss =  -23.98529298507159
epoch =  153 batch =  225 / 977 loss =  -23.985244725657775
epoch =  153 batch =  250 / 977 loss =  

epoch =  156 batch =  225 / 977 loss =  -24.01160308534066
epoch =  156 batch =  250 / 977 loss =  -24.00872301960372
epoch =  156 batch =  275 / 977 loss =  -24.005765562472142
epoch =  156 batch =  300 / 977 loss =  -24.006919873513258
epoch =  156 batch =  325 / 977 loss =  -24.006302856960176
epoch =  156 batch =  350 / 977 loss =  -24.006458488964288
epoch =  156 batch =  375 / 977 loss =  -24.004879449276213
epoch =  156 batch =  400 / 977 loss =  -24.00552422863587
epoch =  156 batch =  425 / 977 loss =  -24.00521042425308
epoch =  156 batch =  450 / 977 loss =  -24.00408806557666
epoch =  156 batch =  475 / 977 loss =  -24.002566457796497
epoch =  156 batch =  500 / 977 loss =  -24.002167037385192
epoch =  156 batch =  525 / 977 loss =  -24.00326073215035
epoch =  156 batch =  550 / 977 loss =  -24.001754681125096
epoch =  156 batch =  575 / 977 loss =  -24.00078940722678
epoch =  156 batch =  600 / 977 loss =  -24.00177816305305
epoch =  156 batch =  625 / 977 loss =  -24.0028

epoch =  159 batch =  775 / 977 loss =  -24.00141637841452
epoch =  159 batch =  800 / 977 loss =  -24.001986216665365
epoch =  159 batch =  825 / 977 loss =  -24.002742970538208
epoch =  159 batch =  850 / 977 loss =  -24.002787344604215
epoch =  159 batch =  875 / 977 loss =  -24.0030264614924
epoch =  159 batch =  900 / 977 loss =  -24.002536936684805
epoch =  159 batch =  925 / 977 loss =  -24.003367040893693
epoch =  159 batch =  950 / 977 loss =  -24.001849468323467
epoch =  159 batch =  975 / 977 loss =  -24.00277736343322
Validation loss =  -23.95513916015625
Effective sample size =  0.764098
epoch =  160 batch =  0 / 977 loss =  -23.91969108581543
epoch =  160 batch =  25 / 977 loss =  -24.01453509697547
epoch =  160 batch =  50 / 977 loss =  -24.01247439664953
epoch =  160 batch =  75 / 977 loss =  -24.021287491447048
epoch =  160 batch =  100 / 977 loss =  -24.01111379944452
epoch =  160 batch =  125 / 977 loss =  -24.011988745795357
epoch =  160 batch =  150 / 977 loss =  -

epoch =  163 batch =  100 / 977 loss =  -24.01733660933995
epoch =  163 batch =  125 / 977 loss =  -24.0063572686816
epoch =  163 batch =  150 / 977 loss =  -24.00884301141398
epoch =  163 batch =  175 / 977 loss =  -24.00936072522944
epoch =  163 batch =  200 / 977 loss =  -24.014497766447306
epoch =  163 batch =  225 / 977 loss =  -24.017738148174455
epoch =  163 batch =  250 / 977 loss =  -24.016705881551918
epoch =  163 batch =  275 / 977 loss =  -24.013859783393745
epoch =  163 batch =  300 / 977 loss =  -24.013211329513997
epoch =  163 batch =  325 / 977 loss =  -24.013463605400972
epoch =  163 batch =  350 / 977 loss =  -24.011719347744584
epoch =  163 batch =  375 / 977 loss =  -24.010821956269286
epoch =  163 batch =  400 / 977 loss =  -24.009694073265628
epoch =  163 batch =  425 / 977 loss =  -24.009242971178512
epoch =  163 batch =  450 / 977 loss =  -24.010734887979513
epoch =  163 batch =  475 / 977 loss =  -24.00930864269994
epoch =  163 batch =  500 / 977 loss =  -24.00

epoch =  166 batch =  450 / 977 loss =  -24.012744281878756
epoch =  166 batch =  475 / 977 loss =  -24.014661095723383
epoch =  166 batch =  500 / 977 loss =  -24.013200413443133
epoch =  166 batch =  525 / 977 loss =  -24.012533909467688
epoch =  166 batch =  550 / 977 loss =  -24.01324722165421
epoch =  166 batch =  575 / 977 loss =  -24.01147443056107
epoch =  166 batch =  600 / 977 loss =  -24.01239703617953
epoch =  166 batch =  625 / 977 loss =  -24.01185644777439
epoch =  166 batch =  650 / 977 loss =  -24.010719773956176
epoch =  166 batch =  675 / 977 loss =  -24.01031679232447
epoch =  166 batch =  700 / 977 loss =  -24.009500288589216
epoch =  166 batch =  725 / 977 loss =  -24.01058730606207
epoch =  166 batch =  750 / 977 loss =  -24.010103350155543
epoch =  166 batch =  775 / 977 loss =  -24.010026329571456
epoch =  166 batch =  800 / 977 loss =  -24.01034062274835
epoch =  166 batch =  825 / 977 loss =  -24.01054140788304
epoch =  166 batch =  850 / 977 loss =  -24.0100

epoch =  169 batch =  825 / 977 loss =  -24.004701425030504
epoch =  169 batch =  850 / 977 loss =  -24.00443706445212
epoch =  169 batch =  875 / 977 loss =  -24.00398275514716
epoch =  169 batch =  900 / 977 loss =  -24.00400212972728
epoch =  169 batch =  925 / 977 loss =  -24.00420364396361
epoch =  169 batch =  950 / 977 loss =  -24.003907982859324
epoch =  169 batch =  975 / 977 loss =  -24.00424083334501
Validation loss =  -23.976133346557617
Effective sample size =  0.828583
epoch =  170 batch =  0 / 977 loss =  -23.868408203125
epoch =  170 batch =  25 / 977 loss =  -23.996786484351524
epoch =  170 batch =  50 / 977 loss =  -24.008120817296646
epoch =  170 batch =  75 / 977 loss =  -24.009591378663714
epoch =  170 batch =  100 / 977 loss =  -24.01532507414865
epoch =  170 batch =  125 / 977 loss =  -24.022211544097416
epoch =  170 batch =  150 / 977 loss =  -24.020214282913717
epoch =  170 batch =  175 / 977 loss =  -24.017004175619647
epoch =  170 batch =  200 / 977 loss =  -

epoch =  173 batch =  175 / 977 loss =  -23.997370264746927
epoch =  173 batch =  200 / 977 loss =  -23.99984782014913
epoch =  173 batch =  225 / 977 loss =  -23.999152166653523
epoch =  173 batch =  250 / 977 loss =  -23.99993969434761
epoch =  173 batch =  275 / 977 loss =  -23.999433358510334
epoch =  173 batch =  300 / 977 loss =  -23.99847738449756
epoch =  173 batch =  325 / 977 loss =  -23.99993655432953
epoch =  173 batch =  350 / 977 loss =  -24.00098528413692
epoch =  173 batch =  375 / 977 loss =  -24.003302558939517
epoch =  173 batch =  400 / 977 loss =  -24.004419536067374
epoch =  173 batch =  425 / 977 loss =  -24.002404427864192
epoch =  173 batch =  450 / 977 loss =  -24.00205669699118
epoch =  173 batch =  475 / 977 loss =  -24.003401183280634
epoch =  173 batch =  500 / 977 loss =  -24.005050864762183
epoch =  173 batch =  525 / 977 loss =  -24.003183299597687
epoch =  173 batch =  550 / 977 loss =  -24.005451063928067
epoch =  173 batch =  575 / 977 loss =  -24.00

epoch =  176 batch =  525 / 977 loss =  -24.012483342972097
epoch =  176 batch =  550 / 977 loss =  -24.013500262085635
epoch =  176 batch =  575 / 977 loss =  -24.013510035143966
epoch =  176 batch =  600 / 977 loss =  -24.013491413160885
epoch =  176 batch =  625 / 977 loss =  -24.013027544600508
epoch =  176 batch =  650 / 977 loss =  -24.013705562702878
epoch =  176 batch =  675 / 977 loss =  -24.013490640200082
epoch =  176 batch =  700 / 977 loss =  -24.01243634026671
epoch =  176 batch =  725 / 977 loss =  -24.011156239785418
epoch =  176 batch =  750 / 977 loss =  -24.01016326274438
epoch =  176 batch =  775 / 977 loss =  -24.00948680553238
epoch =  176 batch =  800 / 977 loss =  -24.009470424104403
epoch =  176 batch =  825 / 977 loss =  -24.00880717307544
epoch =  176 batch =  850 / 977 loss =  -24.00904745108651
epoch =  176 batch =  875 / 977 loss =  -24.008064267842176
epoch =  176 batch =  900 / 977 loss =  -24.007107938963337
epoch =  176 batch =  925 / 977 loss =  -24.0

epoch =  179 batch =  875 / 977 loss =  -24.00997669838336
epoch =  179 batch =  900 / 977 loss =  -24.009395326282032
epoch =  179 batch =  925 / 977 loss =  -24.00858208427677
epoch =  179 batch =  950 / 977 loss =  -24.008393134228445
epoch =  179 batch =  975 / 977 loss =  -24.007683703156776
Validation loss =  -23.941104888916016
Effective sample size =  0.650728
epoch =  180 batch =  0 / 977 loss =  -23.954986572265625
epoch =  180 batch =  25 / 977 loss =  -24.02293036534236
epoch =  180 batch =  50 / 977 loss =  -24.01851874706792
epoch =  180 batch =  75 / 977 loss =  -24.00908967068321
epoch =  180 batch =  100 / 977 loss =  -24.007054546091815
epoch =  180 batch =  125 / 977 loss =  -24.00635461958628
epoch =  180 batch =  150 / 977 loss =  -24.000642271231342
epoch =  180 batch =  175 / 977 loss =  -24.00292681563984
epoch =  180 batch =  200 / 977 loss =  -24.000167381704145
epoch =  180 batch =  225 / 977 loss =  -24.002223926307877
epoch =  180 batch =  250 / 977 loss = 

epoch =  183 batch =  200 / 977 loss =  -24.011058902266015
epoch =  183 batch =  225 / 977 loss =  -24.011571917913656
epoch =  183 batch =  250 / 977 loss =  -24.014182292132737
epoch =  183 batch =  275 / 977 loss =  -24.01095035110695
epoch =  183 batch =  300 / 977 loss =  -24.013110518851548
epoch =  183 batch =  325 / 977 loss =  -24.012817880127315
epoch =  183 batch =  350 / 977 loss =  -24.011099046451744
epoch =  183 batch =  375 / 977 loss =  -24.012120449796626
epoch =  183 batch =  400 / 977 loss =  -24.011436533749542
epoch =  183 batch =  425 / 977 loss =  -24.0105373131837
epoch =  183 batch =  450 / 977 loss =  -24.011356180364427
epoch =  183 batch =  475 / 977 loss =  -24.012280440130144
epoch =  183 batch =  500 / 977 loss =  -24.01199062141829
epoch =  183 batch =  525 / 977 loss =  -24.011718554188537
epoch =  183 batch =  550 / 977 loss =  -24.011685300435857
epoch =  183 batch =  575 / 977 loss =  -24.010304027133504
epoch =  183 batch =  600 / 977 loss =  -24.

epoch =  186 batch =  575 / 977 loss =  -24.009416437811318
epoch =  186 batch =  600 / 977 loss =  -24.00881746961749
epoch =  186 batch =  625 / 977 loss =  -24.008930794347194
epoch =  186 batch =  650 / 977 loss =  -24.006911803683554
epoch =  186 batch =  675 / 977 loss =  -24.006687339240973
epoch =  186 batch =  700 / 977 loss =  -24.00603235057008
epoch =  186 batch =  725 / 977 loss =  -24.006374204125933
epoch =  186 batch =  750 / 977 loss =  -24.005879167234205
epoch =  186 batch =  775 / 977 loss =  -24.00638960808822
epoch =  186 batch =  800 / 977 loss =  -24.007586449421886
epoch =  186 batch =  825 / 977 loss =  -24.008554938919023
epoch =  186 batch =  850 / 977 loss =  -24.00757653564739
epoch =  186 batch =  875 / 977 loss =  -24.00720886117246
epoch =  186 batch =  900 / 977 loss =  -24.006670203510577
epoch =  186 batch =  925 / 977 loss =  -24.005986151890923
epoch =  186 batch =  950 / 977 loss =  -24.006616961442074
epoch =  186 batch =  975 / 977 loss =  -24.0

epoch =  189 batch =  950 / 977 loss =  -24.009083193308673
epoch =  189 batch =  975 / 977 loss =  -24.009262227621235
Validation loss =  -23.9610595703125
Effective sample size =  0.798438
epoch =  190 batch =  0 / 977 loss =  -24.07238006591797
epoch =  190 batch =  25 / 977 loss =  -24.01761553837703
epoch =  190 batch =  50 / 977 loss =  -24.017908096313477
epoch =  190 batch =  75 / 977 loss =  -23.993433751558
epoch =  190 batch =  100 / 977 loss =  -24.005299124387236
epoch =  190 batch =  125 / 977 loss =  -24.00223504929316
epoch =  190 batch =  150 / 977 loss =  -24.011744669731094
epoch =  190 batch =  175 / 977 loss =  -24.01105109128085
epoch =  190 batch =  200 / 977 loss =  -24.010085091662052
epoch =  190 batch =  225 / 977 loss =  -24.01440687939129
epoch =  190 batch =  250 / 977 loss =  -24.010757491883055
epoch =  190 batch =  275 / 977 loss =  -24.010569717573087
epoch =  190 batch =  300 / 977 loss =  -24.010251415924376
epoch =  190 batch =  325 / 977 loss =  -2

epoch =  193 batch =  275 / 977 loss =  -24.018749727719072
epoch =  193 batch =  300 / 977 loss =  -24.019256895958783
epoch =  193 batch =  325 / 977 loss =  -24.022041180382484
epoch =  193 batch =  350 / 977 loss =  -24.020049135909126
epoch =  193 batch =  375 / 977 loss =  -24.01743025982634
epoch =  193 batch =  400 / 977 loss =  -24.014441882582975
epoch =  193 batch =  425 / 977 loss =  -24.014098445014778
epoch =  193 batch =  450 / 977 loss =  -24.01530905679166
epoch =  193 batch =  475 / 977 loss =  -24.01635876423171
epoch =  193 batch =  500 / 977 loss =  -24.01245541677266
epoch =  193 batch =  525 / 977 loss =  -24.01284808532367
epoch =  193 batch =  550 / 977 loss =  -24.01357411213234
epoch =  193 batch =  575 / 977 loss =  -24.012898868984657
epoch =  193 batch =  600 / 977 loss =  -24.01257259159438
epoch =  193 batch =  625 / 977 loss =  -24.012987679185958
epoch =  193 batch =  650 / 977 loss =  -24.01245434199976
epoch =  193 batch =  675 / 977 loss =  -24.0119

epoch =  196 batch =  625 / 977 loss =  -24.018560756890533
epoch =  196 batch =  650 / 977 loss =  -24.017272688459872
epoch =  196 batch =  675 / 977 loss =  -24.016954464319898
epoch =  196 batch =  700 / 977 loss =  -24.017189344223826
epoch =  196 batch =  725 / 977 loss =  -24.01623251681157
epoch =  196 batch =  750 / 977 loss =  -24.015917350068072
epoch =  196 batch =  775 / 977 loss =  -24.01412747078335
epoch =  196 batch =  800 / 977 loss =  -24.014394195785236
epoch =  196 batch =  825 / 977 loss =  -24.013831180175337
epoch =  196 batch =  850 / 977 loss =  -24.013289257726434
epoch =  196 batch =  875 / 977 loss =  -24.01219241259849
epoch =  196 batch =  900 / 977 loss =  -24.011423595737536
epoch =  196 batch =  925 / 977 loss =  -24.01069868924036
epoch =  196 batch =  950 / 977 loss =  -24.01055244742633
epoch =  196 batch =  975 / 977 loss =  -24.010436276920505
Validation loss =  -23.979509353637695
Effective sample size =  0.828512
epoch =  197 batch =  0 / 977 lo

Validation loss =  -23.970420837402344
Effective sample size =  0.814534
epoch =  200 batch =  0 / 977 loss =  -24.083768844604492
epoch =  200 batch =  25 / 977 loss =  -23.99387315603403
epoch =  200 batch =  50 / 977 loss =  -24.01468890321021
epoch =  200 batch =  75 / 977 loss =  -24.009080686067282
epoch =  200 batch =  100 / 977 loss =  -24.007720852842425
epoch =  200 batch =  125 / 977 loss =  -24.017979879227894
epoch =  200 batch =  150 / 977 loss =  -24.01493467874085
epoch =  200 batch =  175 / 977 loss =  -24.0147810090672
epoch =  200 batch =  200 / 977 loss =  -24.011316612585276
epoch =  200 batch =  225 / 977 loss =  -24.013526232896663
epoch =  200 batch =  250 / 977 loss =  -24.01496382633529
epoch =  200 batch =  275 / 977 loss =  -24.014338431151025
epoch =  200 batch =  300 / 977 loss =  -24.010656192057155
epoch =  200 batch =  325 / 977 loss =  -24.011603659647367
epoch =  200 batch =  350 / 977 loss =  -24.012189136950713
epoch =  200 batch =  375 / 977 loss =

epoch =  203 batch =  325 / 977 loss =  -24.00841174096419
epoch =  203 batch =  350 / 977 loss =  -24.008109416037893
epoch =  203 batch =  375 / 977 loss =  -24.009207355215203
epoch =  203 batch =  400 / 977 loss =  -24.010600235099517
epoch =  203 batch =  425 / 977 loss =  -24.007716886314437
epoch =  203 batch =  450 / 977 loss =  -24.00589034932681
epoch =  203 batch =  475 / 977 loss =  -24.00609285691206
epoch =  203 batch =  500 / 977 loss =  -24.004809575642422
epoch =  203 batch =  525 / 977 loss =  -24.00463788862918
epoch =  203 batch =  550 / 977 loss =  -24.00549594719917
epoch =  203 batch =  575 / 977 loss =  -24.007247308890033
epoch =  203 batch =  600 / 977 loss =  -24.008461741163416
epoch =  203 batch =  625 / 977 loss =  -24.0111088432824
epoch =  203 batch =  650 / 977 loss =  -24.011367214806434
epoch =  203 batch =  675 / 977 loss =  -24.011913889258572
epoch =  203 batch =  700 / 977 loss =  -24.0121714446412
epoch =  203 batch =  725 / 977 loss =  -24.01136

epoch =  206 batch =  700 / 977 loss =  -24.014521442364366
epoch =  206 batch =  725 / 977 loss =  -24.01414149260719
epoch =  206 batch =  750 / 977 loss =  -24.012606121728655
epoch =  206 batch =  775 / 977 loss =  -24.01238790738215
epoch =  206 batch =  800 / 977 loss =  -24.01233789298717
epoch =  206 batch =  825 / 977 loss =  -24.01250020412788
epoch =  206 batch =  850 / 977 loss =  -24.011676317656512
epoch =  206 batch =  875 / 977 loss =  -24.011231045744744
epoch =  206 batch =  900 / 977 loss =  -24.011327690607178
epoch =  206 batch =  925 / 977 loss =  -24.01095964996141
epoch =  206 batch =  950 / 977 loss =  -24.011092377511243
epoch =  206 batch =  975 / 977 loss =  -24.011266204177364
Validation loss =  -23.967329025268555
Effective sample size =  0.804333
epoch =  207 batch =  0 / 977 loss =  -23.977643966674805
epoch =  207 batch =  25 / 977 loss =  -24.024916868943436
epoch =  207 batch =  50 / 977 loss =  -24.02291425069173
epoch =  207 batch =  75 / 977 loss =

epoch =  210 batch =  25 / 977 loss =  -24.024326911339394
epoch =  210 batch =  50 / 977 loss =  -24.043481826782216
epoch =  210 batch =  75 / 977 loss =  -24.026668899937675
epoch =  210 batch =  100 / 977 loss =  -24.02633799184666
epoch =  210 batch =  125 / 977 loss =  -24.034241812569746
epoch =  210 batch =  150 / 977 loss =  -24.029106354871328
epoch =  210 batch =  175 / 977 loss =  -24.02863901311702
epoch =  210 batch =  200 / 977 loss =  -24.030058258208477
epoch =  210 batch =  225 / 977 loss =  -24.03181864308046
epoch =  210 batch =  250 / 977 loss =  -24.02746715013725
epoch =  210 batch =  275 / 977 loss =  -24.02594660330511
epoch =  210 batch =  300 / 977 loss =  -24.024624580560733
epoch =  210 batch =  325 / 977 loss =  -24.02182323362199
epoch =  210 batch =  350 / 977 loss =  -24.02196345641742
epoch =  210 batch =  375 / 977 loss =  -24.021448292630783
epoch =  210 batch =  400 / 977 loss =  -24.022104767492586
epoch =  210 batch =  425 / 977 loss =  -24.018311

epoch =  213 batch =  600 / 977 loss =  -24.013148655312236
epoch =  213 batch =  625 / 977 loss =  -24.012600837805024
epoch =  213 batch =  650 / 977 loss =  -24.01265287033058
epoch =  213 batch =  675 / 977 loss =  -24.01320969564675
epoch =  213 batch =  700 / 977 loss =  -24.013760557188288
epoch =  213 batch =  725 / 977 loss =  -24.015035637154067
epoch =  213 batch =  750 / 977 loss =  -24.015354240305733
epoch =  213 batch =  775 / 977 loss =  -24.01371516394861
epoch =  213 batch =  800 / 977 loss =  -24.0139257928703
epoch =  213 batch =  825 / 977 loss =  -24.01426927691222
epoch =  213 batch =  850 / 977 loss =  -24.013642153084348
epoch =  213 batch =  875 / 977 loss =  -24.01357827338998
epoch =  213 batch =  900 / 977 loss =  -24.01326570711972
epoch =  213 batch =  925 / 977 loss =  -24.012947904368456
epoch =  213 batch =  950 / 977 loss =  -24.012536549292403
epoch =  213 batch =  975 / 977 loss =  -24.01170187113715
Validation loss =  -23.975610733032227
Effective 

epoch =  216 batch =  950 / 977 loss =  -24.01337056405663
epoch =  216 batch =  975 / 977 loss =  -24.012915570227833
Validation loss =  -23.967056274414062
Effective sample size =  0.814104
epoch =  217 batch =  0 / 977 loss =  -24.168540954589844
epoch =  217 batch =  25 / 977 loss =  -24.007633429307205
epoch =  217 batch =  50 / 977 loss =  -24.017324447631836
epoch =  217 batch =  75 / 977 loss =  -24.014620755848135
epoch =  217 batch =  100 / 977 loss =  -24.017298688982972
epoch =  217 batch =  125 / 977 loss =  -24.014886810666038
epoch =  217 batch =  150 / 977 loss =  -24.01549263505746
epoch =  217 batch =  175 / 977 loss =  -24.009036107496776
epoch =  217 batch =  200 / 977 loss =  -24.009697121767257
epoch =  217 batch =  225 / 977 loss =  -24.013809718916896
epoch =  217 batch =  250 / 977 loss =  -24.01388660750066
epoch =  217 batch =  275 / 977 loss =  -24.014387573021043
epoch =  217 batch =  300 / 977 loss =  -24.012096804241793
epoch =  217 batch =  325 / 977 los

epoch =  220 batch =  300 / 977 loss =  -24.011351962422218
epoch =  220 batch =  325 / 977 loss =  -24.013167855198397
epoch =  220 batch =  350 / 977 loss =  -24.010609075214777
epoch =  220 batch =  375 / 977 loss =  -24.009058932040592
epoch =  220 batch =  400 / 977 loss =  -24.009466142725756
epoch =  220 batch =  425 / 977 loss =  -24.011170817093102
epoch =  220 batch =  450 / 977 loss =  -24.008996198024022
epoch =  220 batch =  475 / 977 loss =  -24.010350079095655
epoch =  220 batch =  500 / 977 loss =  -24.00950443244979
epoch =  220 batch =  525 / 977 loss =  -24.0089419150987
epoch =  220 batch =  550 / 977 loss =  -24.00846074583742
epoch =  220 batch =  575 / 977 loss =  -24.008299079206246
epoch =  220 batch =  600 / 977 loss =  -24.007545883762642
epoch =  220 batch =  625 / 977 loss =  -24.006850855038156
epoch =  220 batch =  650 / 977 loss =  -24.00764466796965
epoch =  220 batch =  675 / 977 loss =  -24.006766576033343
epoch =  220 batch =  700 / 977 loss =  -24.0

epoch =  223 batch =  675 / 977 loss =  -24.01783277579312
epoch =  223 batch =  700 / 977 loss =  -24.017483622813522
epoch =  223 batch =  725 / 977 loss =  -24.016471392523805
epoch =  223 batch =  750 / 977 loss =  -24.01734382842732
epoch =  223 batch =  775 / 977 loss =  -24.01720076983736
epoch =  223 batch =  800 / 977 loss =  -24.017421436666986
epoch =  223 batch =  825 / 977 loss =  -24.017518101246527
epoch =  223 batch =  850 / 977 loss =  -24.015673837986725
epoch =  223 batch =  875 / 977 loss =  -24.014220002579354
epoch =  223 batch =  900 / 977 loss =  -24.013293565841142
epoch =  223 batch =  925 / 977 loss =  -24.012823024527293
epoch =  223 batch =  950 / 977 loss =  -24.012595416368864
epoch =  223 batch =  975 / 977 loss =  -24.012914757259548
Validation loss =  -23.976749420166016
Effective sample size =  0.797113
epoch =  224 batch =  0 / 977 loss =  -24.13739585876465
epoch =  224 batch =  25 / 977 loss =  -24.01930046081543
epoch =  224 batch =  50 / 977 loss

epoch =  227 batch =  25 / 977 loss =  -24.014655406658466
epoch =  227 batch =  50 / 977 loss =  -24.03698580872779
epoch =  227 batch =  75 / 977 loss =  -24.042466013055094
epoch =  227 batch =  100 / 977 loss =  -24.040747312035887
epoch =  227 batch =  125 / 977 loss =  -24.03823828318762
epoch =  227 batch =  150 / 977 loss =  -24.039105876392085
epoch =  227 batch =  175 / 977 loss =  -24.03849312392148
epoch =  227 batch =  200 / 977 loss =  -24.03300117378804
epoch =  227 batch =  225 / 977 loss =  -24.033684215714445
epoch =  227 batch =  250 / 977 loss =  -24.03485024592791
epoch =  227 batch =  275 / 977 loss =  -24.032765609630637
epoch =  227 batch =  300 / 977 loss =  -24.0313626983237
epoch =  227 batch =  325 / 977 loss =  -24.03125961865384
epoch =  227 batch =  350 / 977 loss =  -24.031351910017843
epoch =  227 batch =  375 / 977 loss =  -24.027640778967683
epoch =  227 batch =  400 / 977 loss =  -24.02728822284804
epoch =  227 batch =  425 / 977 loss =  -24.02611362

epoch =  230 batch =  400 / 977 loss =  -24.01948933589487
epoch =  230 batch =  425 / 977 loss =  -24.018433015671125
epoch =  230 batch =  450 / 977 loss =  -24.019575474267516
epoch =  230 batch =  475 / 977 loss =  -24.02096360070364
epoch =  230 batch =  500 / 977 loss =  -24.02144802497056
epoch =  230 batch =  525 / 977 loss =  -24.020766377902294
epoch =  230 batch =  550 / 977 loss =  -24.018132104198635
epoch =  230 batch =  575 / 977 loss =  -24.01746228337287
epoch =  230 batch =  600 / 977 loss =  -24.017444032995943
epoch =  230 batch =  625 / 977 loss =  -24.01789537253089
epoch =  230 batch =  650 / 977 loss =  -24.01655267460554
epoch =  230 batch =  675 / 977 loss =  -24.014808426241895
epoch =  230 batch =  700 / 977 loss =  -24.015029292303883
epoch =  230 batch =  725 / 977 loss =  -24.015041994982834
epoch =  230 batch =  750 / 977 loss =  -24.015504753224533
epoch =  230 batch =  775 / 977 loss =  -24.016133539455442
epoch =  230 batch =  800 / 977 loss =  -24.01

epoch =  233 batch =  775 / 977 loss =  -24.015289390210025
epoch =  233 batch =  800 / 977 loss =  -24.015178437536576
epoch =  233 batch =  825 / 977 loss =  -24.015282501608638
epoch =  233 batch =  850 / 977 loss =  -24.01519408937627
epoch =  233 batch =  875 / 977 loss =  -24.014457264991655
epoch =  233 batch =  900 / 977 loss =  -24.014349457955653
epoch =  233 batch =  925 / 977 loss =  -24.014865644549705
epoch =  233 batch =  950 / 977 loss =  -24.0144084043934
epoch =  233 batch =  975 / 977 loss =  -24.013997765838123
Validation loss =  -23.981611251831055
Effective sample size =  0.840457
epoch =  234 batch =  0 / 977 loss =  -24.05585289001465
epoch =  234 batch =  25 / 977 loss =  -23.96577739715576
epoch =  234 batch =  50 / 977 loss =  -23.993285123039694
epoch =  234 batch =  75 / 977 loss =  -23.999081988083688
epoch =  234 batch =  100 / 977 loss =  -24.00363093083448
epoch =  234 batch =  125 / 977 loss =  -23.999221408177934
epoch =  234 batch =  150 / 977 loss =

epoch =  237 batch =  100 / 977 loss =  -24.023293013619913
epoch =  237 batch =  125 / 977 loss =  -24.01716905926902
epoch =  237 batch =  150 / 977 loss =  -24.01784900640021
epoch =  237 batch =  175 / 977 loss =  -24.01364907351407
epoch =  237 batch =  200 / 977 loss =  -24.013586072779415
epoch =  237 batch =  225 / 977 loss =  -24.01267551320844
epoch =  237 batch =  250 / 977 loss =  -24.01542595943132
epoch =  237 batch =  275 / 977 loss =  -24.01830880538277
epoch =  237 batch =  300 / 977 loss =  -24.01804770029264
epoch =  237 batch =  325 / 977 loss =  -24.01969924294875
epoch =  237 batch =  350 / 977 loss =  -24.021574101896363
epoch =  237 batch =  375 / 977 loss =  -24.019540330196943
epoch =  237 batch =  400 / 977 loss =  -24.01954043000713
epoch =  237 batch =  425 / 977 loss =  -24.017186872276337
epoch =  237 batch =  450 / 977 loss =  -24.016031637424373
epoch =  237 batch =  475 / 977 loss =  -24.014006133840862
epoch =  237 batch =  500 / 977 loss =  -24.01367

epoch =  240 batch =  475 / 977 loss =  -24.015079698642772
epoch =  240 batch =  500 / 977 loss =  -24.013140474726825
epoch =  240 batch =  525 / 977 loss =  -24.012270339994835
epoch =  240 batch =  550 / 977 loss =  -24.01325625300192
epoch =  240 batch =  575 / 977 loss =  -24.01184474428495
epoch =  240 batch =  600 / 977 loss =  -24.013275152831635
epoch =  240 batch =  625 / 977 loss =  -24.01404950260736
epoch =  240 batch =  650 / 977 loss =  -24.014553345476617
epoch =  240 batch =  675 / 977 loss =  -24.01550435737746
epoch =  240 batch =  700 / 977 loss =  -24.016111229693845
epoch =  240 batch =  725 / 977 loss =  -24.015807041451957
epoch =  240 batch =  750 / 977 loss =  -24.015577425493227
epoch =  240 batch =  775 / 977 loss =  -24.016415018396284
epoch =  240 batch =  800 / 977 loss =  -24.016345189602937
epoch =  240 batch =  825 / 977 loss =  -24.017233989429247
epoch =  240 batch =  850 / 977 loss =  -24.01632858079133
epoch =  240 batch =  875 / 977 loss =  -24.0

epoch =  243 batch =  850 / 977 loss =  -24.01604675490204
epoch =  243 batch =  875 / 977 loss =  -24.016312941024296
epoch =  243 batch =  900 / 977 loss =  -24.016346341894156
epoch =  243 batch =  925 / 977 loss =  -24.015484043640406
epoch =  243 batch =  950 / 977 loss =  -24.015545565247162
epoch =  243 batch =  975 / 977 loss =  -24.015921553627393
Validation loss =  -23.96025276184082
Effective sample size =  0.800853
epoch =  244 batch =  0 / 977 loss =  -23.900108337402344
epoch =  244 batch =  25 / 977 loss =  -23.98698065831111
epoch =  244 batch =  50 / 977 loss =  -24.006405849082796
epoch =  244 batch =  75 / 977 loss =  -24.015362287822523
epoch =  244 batch =  100 / 977 loss =  -24.01639150865007
epoch =  244 batch =  125 / 977 loss =  -24.012458967784095
epoch =  244 batch =  150 / 977 loss =  -24.01413957962137
epoch =  244 batch =  175 / 977 loss =  -24.014157349413097
epoch =  244 batch =  200 / 977 loss =  -24.017469548467385
epoch =  244 batch =  225 / 977 loss 

epoch =  247 batch =  175 / 977 loss =  -24.016439275308088
epoch =  247 batch =  200 / 977 loss =  -24.021548503666967
epoch =  247 batch =  225 / 977 loss =  -24.020228141177018
epoch =  247 batch =  250 / 977 loss =  -24.018290059974944
epoch =  247 batch =  275 / 977 loss =  -24.020999632019926
epoch =  247 batch =  300 / 977 loss =  -24.02312923824272
epoch =  247 batch =  325 / 977 loss =  -24.019593063307696
epoch =  247 batch =  350 / 977 loss =  -24.018432030311004
epoch =  247 batch =  375 / 977 loss =  -24.01948787810955
epoch =  247 batch =  400 / 977 loss =  -24.019036580796854
epoch =  247 batch =  425 / 977 loss =  -24.020247186293627
epoch =  247 batch =  450 / 977 loss =  -24.01984033774907
epoch =  247 batch =  475 / 977 loss =  -24.01883080426385
epoch =  247 batch =  500 / 977 loss =  -24.015501848475903
epoch =  247 batch =  525 / 977 loss =  -24.01475695693448
epoch =  247 batch =  550 / 977 loss =  -24.01542721036991
epoch =  247 batch =  575 / 977 loss =  -24.01

epoch =  250 batch =  550 / 977 loss =  -24.011198542295485
epoch =  250 batch =  575 / 977 loss =  -24.010800917943325
epoch =  250 batch =  600 / 977 loss =  -24.011251465453082
epoch =  250 batch =  625 / 977 loss =  -24.011050315710687
epoch =  250 batch =  650 / 977 loss =  -24.011616910474466
epoch =  250 batch =  675 / 977 loss =  -24.012139864927224
epoch =  250 batch =  700 / 977 loss =  -24.013098397030486
epoch =  250 batch =  725 / 977 loss =  -24.013116526538035
epoch =  250 batch =  750 / 977 loss =  -24.013359316179518
epoch =  250 batch =  775 / 977 loss =  -24.013417125977206
epoch =  250 batch =  800 / 977 loss =  -24.013441943050783
epoch =  250 batch =  825 / 977 loss =  -24.01322897294533
epoch =  250 batch =  850 / 977 loss =  -24.014408326737342
epoch =  250 batch =  875 / 977 loss =  -24.013834101968715
epoch =  250 batch =  900 / 977 loss =  -24.014354138474893
epoch =  250 batch =  925 / 977 loss =  -24.013856418189437
epoch =  250 batch =  950 / 977 loss =  -

epoch =  253 batch =  900 / 977 loss =  -24.016813367109055
epoch =  253 batch =  925 / 977 loss =  -24.01631421816272
epoch =  253 batch =  950 / 977 loss =  -24.016563967074003
epoch =  253 batch =  975 / 977 loss =  -24.01625728802603
Validation loss =  -23.9658203125
Effective sample size =  0.809586
epoch =  254 batch =  0 / 977 loss =  -24.022327423095703
epoch =  254 batch =  25 / 977 loss =  -24.0243501663208
epoch =  254 batch =  50 / 977 loss =  -24.01815668741862
epoch =  254 batch =  75 / 977 loss =  -24.013042475047868
epoch =  254 batch =  100 / 977 loss =  -24.01874804732823
epoch =  254 batch =  125 / 977 loss =  -24.02231334504627
epoch =  254 batch =  150 / 977 loss =  -24.02234457660195
epoch =  254 batch =  175 / 977 loss =  -24.027074423703276
epoch =  254 batch =  200 / 977 loss =  -24.027347118700316
epoch =  254 batch =  225 / 977 loss =  -24.022267366932553
epoch =  254 batch =  250 / 977 loss =  -24.021249003619307
epoch =  254 batch =  275 / 977 loss =  -24.0

epoch =  257 batch =  225 / 977 loss =  -24.018398960079757
epoch =  257 batch =  250 / 977 loss =  -24.01587180787348
epoch =  257 batch =  275 / 977 loss =  -24.013204298157618
epoch =  257 batch =  300 / 977 loss =  -24.016426935544434
epoch =  257 batch =  325 / 977 loss =  -24.01687783551362
epoch =  257 batch =  350 / 977 loss =  -24.017452674713564
epoch =  257 batch =  375 / 977 loss =  -24.018832211798806
epoch =  257 batch =  400 / 977 loss =  -24.01826108899199
epoch =  257 batch =  425 / 977 loss =  -24.02165596473944
epoch =  257 batch =  450 / 977 loss =  -24.02000493220902
epoch =  257 batch =  475 / 977 loss =  -24.019572835008635
epoch =  257 batch =  500 / 977 loss =  -24.018408762004796
epoch =  257 batch =  525 / 977 loss =  -24.01694009512549
epoch =  257 batch =  550 / 977 loss =  -24.016233243440325
epoch =  257 batch =  575 / 977 loss =  -24.016752643717663
epoch =  257 batch =  600 / 977 loss =  -24.016405568940073
epoch =  257 batch =  625 / 977 loss =  -24.01

epoch =  260 batch =  600 / 977 loss =  -24.0241469321354
epoch =  260 batch =  625 / 977 loss =  -24.02318852701887
epoch =  260 batch =  650 / 977 loss =  -24.02105827741724
epoch =  260 batch =  675 / 977 loss =  -24.02085561865179
epoch =  260 batch =  700 / 977 loss =  -24.020501891828626
epoch =  260 batch =  725 / 977 loss =  -24.019040070946215
epoch =  260 batch =  750 / 977 loss =  -24.01822517842014
epoch =  260 batch =  775 / 977 loss =  -24.01800230360522
epoch =  260 batch =  800 / 977 loss =  -24.017317718334404
epoch =  260 batch =  825 / 977 loss =  -24.016714421657895
epoch =  260 batch =  850 / 977 loss =  -24.01768647629841
epoch =  260 batch =  875 / 977 loss =  -24.01844129605924
epoch =  260 batch =  900 / 977 loss =  -24.01821711007815
epoch =  260 batch =  925 / 977 loss =  -24.017848286721406
epoch =  260 batch =  950 / 977 loss =  -24.017872959782768
epoch =  260 batch =  975 / 977 loss =  -24.01736752713312
Validation loss =  -23.977092742919922
Effective sa

epoch =  263 batch =  975 / 977 loss =  -24.017121426394727
Validation loss =  -23.961469650268555
Effective sample size =  0.800276
epoch =  264 batch =  0 / 977 loss =  -24.029010772705078
epoch =  264 batch =  25 / 977 loss =  -23.99374609727126
epoch =  264 batch =  50 / 977 loss =  -23.98544479818905
epoch =  264 batch =  75 / 977 loss =  -23.995109432622
epoch =  264 batch =  100 / 977 loss =  -24.004203456463195
epoch =  264 batch =  125 / 977 loss =  -24.003989401317774
epoch =  264 batch =  150 / 977 loss =  -24.002548685137
epoch =  264 batch =  175 / 977 loss =  -24.003047065301363
epoch =  264 batch =  200 / 977 loss =  -24.006670036126124
epoch =  264 batch =  225 / 977 loss =  -24.00656241864229
epoch =  264 batch =  250 / 977 loss =  -24.00722543936801
epoch =  264 batch =  275 / 977 loss =  -24.013378032739606
epoch =  264 batch =  300 / 977 loss =  -24.01655417660938
epoch =  264 batch =  325 / 977 loss =  -24.016536724348008
epoch =  264 batch =  350 / 977 loss =  -24

epoch =  267 batch =  300 / 977 loss =  -24.021191669856975
epoch =  267 batch =  325 / 977 loss =  -24.016128861831003
epoch =  267 batch =  350 / 977 loss =  -24.014794167630004
epoch =  267 batch =  375 / 977 loss =  -24.01626485459347
epoch =  267 batch =  400 / 977 loss =  -24.015255687837282
epoch =  267 batch =  425 / 977 loss =  -24.015559106925274
epoch =  267 batch =  450 / 977 loss =  -24.01644732101
epoch =  267 batch =  475 / 977 loss =  -24.014157840183795
epoch =  267 batch =  500 / 977 loss =  -24.015535160453013
epoch =  267 batch =  525 / 977 loss =  -24.016180828950244
epoch =  267 batch =  550 / 977 loss =  -24.01519968202455
epoch =  267 batch =  575 / 977 loss =  -24.013751076327424
epoch =  267 batch =  600 / 977 loss =  -24.01407586794327
epoch =  267 batch =  625 / 977 loss =  -24.014761367164084
epoch =  267 batch =  650 / 977 loss =  -24.016558490407437
epoch =  267 batch =  675 / 977 loss =  -24.016739470013487
epoch =  267 batch =  700 / 977 loss =  -24.016

epoch =  270 batch =  650 / 977 loss =  -24.024069588305224
epoch =  270 batch =  675 / 977 loss =  -24.024545150395685
epoch =  270 batch =  700 / 977 loss =  -24.02310304192775
epoch =  270 batch =  725 / 977 loss =  -24.021770059569803
epoch =  270 batch =  750 / 977 loss =  -24.021397746831532
epoch =  270 batch =  775 / 977 loss =  -24.02040375139295
epoch =  270 batch =  800 / 977 loss =  -24.01986586229036
epoch =  270 batch =  825 / 977 loss =  -24.020745910108808
epoch =  270 batch =  850 / 977 loss =  -24.021157391343074
epoch =  270 batch =  875 / 977 loss =  -24.019125888336735
epoch =  270 batch =  900 / 977 loss =  -24.018725447067272
epoch =  270 batch =  925 / 977 loss =  -24.01786677111302
epoch =  270 batch =  950 / 977 loss =  -24.018018203077503
epoch =  270 batch =  975 / 977 loss =  -24.017279421696895
Validation loss =  -23.95884895324707
Effective sample size =  0.80127
epoch =  271 batch =  0 / 977 loss =  -23.93225860595703
epoch =  271 batch =  25 / 977 loss 

Effective sample size =  0.788162
epoch =  274 batch =  0 / 977 loss =  -24.17508316040039
epoch =  274 batch =  25 / 977 loss =  -24.00712174635667
epoch =  274 batch =  50 / 977 loss =  -24.013649099013385
epoch =  274 batch =  75 / 977 loss =  -24.030160326706735
epoch =  274 batch =  100 / 977 loss =  -24.02155118885607
epoch =  274 batch =  125 / 977 loss =  -24.0183748063587
epoch =  274 batch =  150 / 977 loss =  -24.019530946845254
epoch =  274 batch =  175 / 977 loss =  -24.01748491417278
epoch =  274 batch =  200 / 977 loss =  -24.01678807462626
epoch =  274 batch =  225 / 977 loss =  -24.01589383488208
epoch =  274 batch =  250 / 977 loss =  -24.020371083719322
epoch =  274 batch =  275 / 977 loss =  -24.024238938870635
epoch =  274 batch =  300 / 977 loss =  -24.022414742910186
epoch =  274 batch =  325 / 977 loss =  -24.021222126264515
epoch =  274 batch =  350 / 977 loss =  -24.018058771421426
epoch =  274 batch =  375 / 977 loss =  -24.02112324187096
epoch =  274 batch =

epoch =  277 batch =  350 / 977 loss =  -24.030221303304025
epoch =  277 batch =  375 / 977 loss =  -24.031904880036688
epoch =  277 batch =  400 / 977 loss =  -24.028210138144924
epoch =  277 batch =  425 / 977 loss =  -24.024551167734344
epoch =  277 batch =  450 / 977 loss =  -24.023433220095743
epoch =  277 batch =  475 / 977 loss =  -24.022543141821846
epoch =  277 batch =  500 / 977 loss =  -24.022576010393756
epoch =  277 batch =  525 / 977 loss =  -24.023999486132713
epoch =  277 batch =  550 / 977 loss =  -24.024118520387066
epoch =  277 batch =  575 / 977 loss =  -24.022745698690414
epoch =  277 batch =  600 / 977 loss =  -24.021924531400302
epoch =  277 batch =  625 / 977 loss =  -24.020294101093523
epoch =  277 batch =  650 / 977 loss =  -24.020044394169346
epoch =  277 batch =  675 / 977 loss =  -24.01930394257313
epoch =  277 batch =  700 / 977 loss =  -24.018653725421366
epoch =  277 batch =  725 / 977 loss =  -24.019435265175886
epoch =  277 batch =  750 / 977 loss =  -

epoch =  280 batch =  700 / 977 loss =  -24.023240830860853
epoch =  280 batch =  725 / 977 loss =  -24.023346512114028
epoch =  280 batch =  750 / 977 loss =  -24.022926289930464
epoch =  280 batch =  775 / 977 loss =  -24.023839311501394
epoch =  280 batch =  800 / 977 loss =  -24.023162679874627
epoch =  280 batch =  825 / 977 loss =  -24.022990630844873
epoch =  280 batch =  850 / 977 loss =  -24.022381353322245
epoch =  280 batch =  875 / 977 loss =  -24.021025056708332
epoch =  280 batch =  900 / 977 loss =  -24.021191551470977
epoch =  280 batch =  925 / 977 loss =  -24.020051630732805
epoch =  280 batch =  950 / 977 loss =  -24.0200855323318
epoch =  280 batch =  975 / 977 loss =  -24.019988781116034
Validation loss =  -23.95927619934082
Effective sample size =  0.794302
epoch =  281 batch =  0 / 977 loss =  -23.88663673400879
epoch =  281 batch =  25 / 977 loss =  -23.988804597121018
epoch =  281 batch =  50 / 977 loss =  -24.014598285450656
epoch =  281 batch =  75 / 977 loss

epoch =  284 batch =  25 / 977 loss =  -24.012337317833534
epoch =  284 batch =  50 / 977 loss =  -24.007810443055387
epoch =  284 batch =  75 / 977 loss =  -24.019189207177405
epoch =  284 batch =  100 / 977 loss =  -24.016287397630133
epoch =  284 batch =  125 / 977 loss =  -24.0189133447314
epoch =  284 batch =  150 / 977 loss =  -24.01923513096688
epoch =  284 batch =  175 / 977 loss =  -24.01482675292274
epoch =  284 batch =  200 / 977 loss =  -24.017940407368666
epoch =  284 batch =  225 / 977 loss =  -24.019422801194985
epoch =  284 batch =  250 / 977 loss =  -24.017758479631272
epoch =  284 batch =  275 / 977 loss =  -24.015415585559342
epoch =  284 batch =  300 / 977 loss =  -24.013031170613733
epoch =  284 batch =  325 / 977 loss =  -24.01297969467068
epoch =  284 batch =  350 / 977 loss =  -24.016134827225287
epoch =  284 batch =  375 / 977 loss =  -24.015363566418902
epoch =  284 batch =  400 / 977 loss =  -24.01432090150447
epoch =  284 batch =  425 / 977 loss =  -24.01693

epoch =  287 batch =  400 / 977 loss =  -24.014412939399847
epoch =  287 batch =  425 / 977 loss =  -24.013529558136984
epoch =  287 batch =  450 / 977 loss =  -24.016654930199326
epoch =  287 batch =  475 / 977 loss =  -24.016879947245616
epoch =  287 batch =  500 / 977 loss =  -24.01678534943662
epoch =  287 batch =  525 / 977 loss =  -24.01549696106421
epoch =  287 batch =  550 / 977 loss =  -24.016232416114878
epoch =  287 batch =  575 / 977 loss =  -24.01515510347155
epoch =  287 batch =  600 / 977 loss =  -24.015097216639475
epoch =  287 batch =  625 / 977 loss =  -24.015161361938098
epoch =  287 batch =  650 / 977 loss =  -24.01637477552471
epoch =  287 batch =  675 / 977 loss =  -24.017116876748904
epoch =  287 batch =  700 / 977 loss =  -24.01695079504169
epoch =  287 batch =  725 / 977 loss =  -24.016986195377747
epoch =  287 batch =  750 / 977 loss =  -24.016998506893977
epoch =  287 batch =  775 / 977 loss =  -24.0167376183972
epoch =  287 batch =  800 / 977 loss =  -24.017

epoch =  290 batch =  775 / 977 loss =  -24.02416247682473
epoch =  290 batch =  800 / 977 loss =  -24.023488934120437
epoch =  290 batch =  825 / 977 loss =  -24.021910383395365
epoch =  290 batch =  850 / 977 loss =  -24.021740262292667
epoch =  290 batch =  875 / 977 loss =  -24.022226932386285
epoch =  290 batch =  900 / 977 loss =  -24.020777538269925
epoch =  290 batch =  925 / 977 loss =  -24.021276677916426
epoch =  290 batch =  950 / 977 loss =  -24.02102161331257
epoch =  290 batch =  975 / 977 loss =  -24.019910880776703
Validation loss =  -23.969078063964844
Effective sample size =  0.794994
epoch =  291 batch =  0 / 977 loss =  -23.952022552490234
epoch =  291 batch =  25 / 977 loss =  -24.042760335482082
epoch =  291 batch =  50 / 977 loss =  -24.030124215518725
epoch =  291 batch =  75 / 977 loss =  -24.039321347286826
epoch =  291 batch =  100 / 977 loss =  -24.0330007005446
epoch =  291 batch =  125 / 977 loss =  -24.024103997245668
epoch =  291 batch =  150 / 977 loss

epoch =  294 batch =  100 / 977 loss =  -24.000974334112488
epoch =  294 batch =  125 / 977 loss =  -23.996741007244776
epoch =  294 batch =  150 / 977 loss =  -23.99640944777735
epoch =  294 batch =  175 / 977 loss =  -24.00446353175424
epoch =  294 batch =  200 / 977 loss =  -24.00655153378919
epoch =  294 batch =  225 / 977 loss =  -24.009190652222763
epoch =  294 batch =  250 / 977 loss =  -24.009162545679104
epoch =  294 batch =  275 / 977 loss =  -24.009049989175107
epoch =  294 batch =  300 / 977 loss =  -24.009068986506165
epoch =  294 batch =  325 / 977 loss =  -24.010825888510862
epoch =  294 batch =  350 / 977 loss =  -24.011031740411394
epoch =  294 batch =  375 / 977 loss =  -24.01267936381888
epoch =  294 batch =  400 / 977 loss =  -24.013390277092
epoch =  294 batch =  425 / 977 loss =  -24.014758333913598
epoch =  294 batch =  450 / 977 loss =  -24.015453444351905
epoch =  294 batch =  475 / 977 loss =  -24.01660509670482
epoch =  294 batch =  500 / 977 loss =  -24.0162

epoch =  297 batch =  475 / 977 loss =  -24.0212662079755
epoch =  297 batch =  500 / 977 loss =  -24.021967885974878
epoch =  297 batch =  525 / 977 loss =  -24.02411263614553
epoch =  297 batch =  550 / 977 loss =  -24.024982667012573
epoch =  297 batch =  575 / 977 loss =  -24.024853759341777
epoch =  297 batch =  600 / 977 loss =  -24.023643604729234
epoch =  297 batch =  625 / 977 loss =  -24.02298183867727
epoch =  297 batch =  650 / 977 loss =  -24.022100275745792
epoch =  297 batch =  675 / 977 loss =  -24.02133913435176
epoch =  297 batch =  700 / 977 loss =  -24.0209483256864
epoch =  297 batch =  725 / 977 loss =  -24.0216422304306
epoch =  297 batch =  750 / 977 loss =  -24.021125321064446
epoch =  297 batch =  775 / 977 loss =  -24.020657819570975
epoch =  297 batch =  800 / 977 loss =  -24.01970661891982
epoch =  297 batch =  825 / 977 loss =  -24.019844764081405
epoch =  297 batch =  850 / 977 loss =  -24.020914357921647
epoch =  297 batch =  875 / 977 loss =  -24.021830

epoch =  300 batch =  825 / 977 loss =  -24.018011141631554
epoch =  300 batch =  850 / 977 loss =  -24.0174738013226
epoch =  300 batch =  875 / 977 loss =  -24.018786319314607
epoch =  300 batch =  900 / 977 loss =  -24.01905610849801
epoch =  300 batch =  925 / 977 loss =  -24.018576049392724
epoch =  300 batch =  950 / 977 loss =  -24.019376650468274
epoch =  300 batch =  975 / 977 loss =  -24.019753061357083
Validation loss =  -23.97641944885254
Effective sample size =  0.827119
epoch =  301 batch =  0 / 977 loss =  -23.74904441833496
epoch =  301 batch =  25 / 977 loss =  -24.03164005279541
epoch =  301 batch =  50 / 977 loss =  -24.006098915548886
epoch =  301 batch =  75 / 977 loss =  -23.9997508149398
epoch =  301 batch =  100 / 977 loss =  -24.008913644469605
epoch =  301 batch =  125 / 977 loss =  -24.01394878871857
epoch =  301 batch =  150 / 977 loss =  -24.015900681350406
epoch =  301 batch =  175 / 977 loss =  -24.018022775650014
epoch =  301 batch =  200 / 977 loss =  -

epoch =  304 batch =  150 / 977 loss =  -24.016347683028673
epoch =  304 batch =  175 / 977 loss =  -24.01282412355597
epoch =  304 batch =  200 / 977 loss =  -24.015287048188018
epoch =  304 batch =  225 / 977 loss =  -24.01836986879332
epoch =  304 batch =  250 / 977 loss =  -24.017504505902178
epoch =  304 batch =  275 / 977 loss =  -24.01789339728978
epoch =  304 batch =  300 / 977 loss =  -24.01413680548684
epoch =  304 batch =  325 / 977 loss =  -24.01224688664535
epoch =  304 batch =  350 / 977 loss =  -24.014465130971693
epoch =  304 batch =  375 / 977 loss =  -24.016932157759957
epoch =  304 batch =  400 / 977 loss =  -24.018524702647664
epoch =  304 batch =  425 / 977 loss =  -24.018000656450287
epoch =  304 batch =  450 / 977 loss =  -24.016959480065715
epoch =  304 batch =  475 / 977 loss =  -24.017795959440587
epoch =  304 batch =  500 / 977 loss =  -24.01963481408154
epoch =  304 batch =  525 / 977 loss =  -24.017657798505997
epoch =  304 batch =  550 / 977 loss =  -24.01

epoch =  307 batch =  525 / 977 loss =  -24.023170039680963
epoch =  307 batch =  550 / 977 loss =  -24.023735288699605
epoch =  307 batch =  575 / 977 loss =  -24.022603369421425
epoch =  307 batch =  600 / 977 loss =  -24.021468608430936
epoch =  307 batch =  625 / 977 loss =  -24.021955694253453
epoch =  307 batch =  650 / 977 loss =  -24.022052682856078
epoch =  307 batch =  675 / 977 loss =  -24.021949437948376
epoch =  307 batch =  700 / 977 loss =  -24.02249884503374
epoch =  307 batch =  725 / 977 loss =  -24.022456140229195
epoch =  307 batch =  750 / 977 loss =  -24.02233989070481
epoch =  307 batch =  775 / 977 loss =  -24.022990526612272
epoch =  307 batch =  800 / 977 loss =  -24.022947420936994
epoch =  307 batch =  825 / 977 loss =  -24.022019266216287
epoch =  307 batch =  850 / 977 loss =  -24.02140725625528
epoch =  307 batch =  875 / 977 loss =  -24.02154183932091
epoch =  307 batch =  900 / 977 loss =  -24.02050824858637
epoch =  307 batch =  925 / 977 loss =  -24.0

epoch =  310 batch =  875 / 977 loss =  -24.023872042355457
epoch =  310 batch =  900 / 977 loss =  -24.023452978949155
epoch =  310 batch =  925 / 977 loss =  -24.0242904823748
epoch =  310 batch =  950 / 977 loss =  -24.023738363689162
epoch =  310 batch =  975 / 977 loss =  -24.023102758360675
Validation loss =  -23.98392105102539
Effective sample size =  0.842064
epoch =  311 batch =  0 / 977 loss =  -23.924776077270508
epoch =  311 batch =  25 / 977 loss =  -24.014649391174316
epoch =  311 batch =  50 / 977 loss =  -24.01657164330576
epoch =  311 batch =  75 / 977 loss =  -24.029790828102513
epoch =  311 batch =  100 / 977 loss =  -24.033180312354965
epoch =  311 batch =  125 / 977 loss =  -24.027559673975386
epoch =  311 batch =  150 / 977 loss =  -24.02967209215986
epoch =  311 batch =  175 / 977 loss =  -24.031623114239096
epoch =  311 batch =  200 / 977 loss =  -24.030138993144636
epoch =  311 batch =  225 / 977 loss =  -24.030453724143786
epoch =  311 batch =  250 / 977 loss 

epoch =  314 batch =  225 / 977 loss =  -24.025280471396655
epoch =  314 batch =  250 / 977 loss =  -24.022259465251786
epoch =  314 batch =  275 / 977 loss =  -24.021547441897187
epoch =  314 batch =  300 / 977 loss =  -24.020858999106586
epoch =  314 batch =  325 / 977 loss =  -24.02121837592565
epoch =  314 batch =  350 / 977 loss =  -24.023780681468832
epoch =  314 batch =  375 / 977 loss =  -24.02170548033208
epoch =  314 batch =  400 / 977 loss =  -24.02379423127211
epoch =  314 batch =  425 / 977 loss =  -24.02364039533016
epoch =  314 batch =  450 / 977 loss =  -24.023057569685648
epoch =  314 batch =  475 / 977 loss =  -24.024836936918636
epoch =  314 batch =  500 / 977 loss =  -24.025051592828753
epoch =  314 batch =  525 / 977 loss =  -24.02571265507108
epoch =  314 batch =  550 / 977 loss =  -24.0257487496101
epoch =  314 batch =  575 / 977 loss =  -24.025399658415065
epoch =  314 batch =  600 / 977 loss =  -24.024735849034585
epoch =  314 batch =  625 / 977 loss =  -24.027

epoch =  317 batch =  600 / 977 loss =  -24.02447093068659
epoch =  317 batch =  625 / 977 loss =  -24.024394772684968
epoch =  317 batch =  650 / 977 loss =  -24.025501503189926
epoch =  317 batch =  675 / 977 loss =  -24.025182774786376
epoch =  317 batch =  700 / 977 loss =  -24.025537308544603
epoch =  317 batch =  725 / 977 loss =  -24.026067074336915
epoch =  317 batch =  750 / 977 loss =  -24.025468110085765
epoch =  317 batch =  775 / 977 loss =  -24.02541097660651
epoch =  317 batch =  800 / 977 loss =  -24.0254602277472
epoch =  317 batch =  825 / 977 loss =  -24.025716492973828
epoch =  317 batch =  850 / 977 loss =  -24.025187222293592
epoch =  317 batch =  875 / 977 loss =  -24.02513150428525
epoch =  317 batch =  900 / 977 loss =  -24.024158549758592
epoch =  317 batch =  925 / 977 loss =  -24.024485940294667
epoch =  317 batch =  950 / 977 loss =  -24.02460667462752
epoch =  317 batch =  975 / 977 loss =  -24.023640278909998
Validation loss =  -23.977169036865234
Effecti

epoch =  320 batch =  950 / 977 loss =  -24.02305457218463
epoch =  320 batch =  975 / 977 loss =  -24.023619503271384
Validation loss =  -23.97989273071289
Effective sample size =  0.835667
epoch =  321 batch =  0 / 977 loss =  -24.103103637695312
epoch =  321 batch =  25 / 977 loss =  -24.02668417417086
epoch =  321 batch =  50 / 977 loss =  -24.004311168895047
epoch =  321 batch =  75 / 977 loss =  -24.015296283521153
epoch =  321 batch =  100 / 977 loss =  -24.012460746387447
epoch =  321 batch =  125 / 977 loss =  -24.009805058676104
epoch =  321 batch =  150 / 977 loss =  -24.015906453922092
epoch =  321 batch =  175 / 977 loss =  -24.019760825417258
epoch =  321 batch =  200 / 977 loss =  -24.022156549330376
epoch =  321 batch =  225 / 977 loss =  -24.02763821593428
epoch =  321 batch =  250 / 977 loss =  -24.028205742399056
epoch =  321 batch =  275 / 977 loss =  -24.030703793401305
epoch =  321 batch =  300 / 977 loss =  -24.029801555646216
epoch =  321 batch =  325 / 977 loss

epoch =  324 batch =  275 / 977 loss =  -24.022070235100347
epoch =  324 batch =  300 / 977 loss =  -24.0229694391802
epoch =  324 batch =  325 / 977 loss =  -24.02428414631476
epoch =  324 batch =  350 / 977 loss =  -24.02547669478631
epoch =  324 batch =  375 / 977 loss =  -24.02432509178811
epoch =  324 batch =  400 / 977 loss =  -24.024486641634134
epoch =  324 batch =  425 / 977 loss =  -24.0232814511223
epoch =  324 batch =  450 / 977 loss =  -24.023098524816813
epoch =  324 batch =  475 / 977 loss =  -24.02519273357231
epoch =  324 batch =  500 / 977 loss =  -24.02597298308047
epoch =  324 batch =  525 / 977 loss =  -24.0261078606087
epoch =  324 batch =  550 / 977 loss =  -24.024838920080946
epoch =  324 batch =  575 / 977 loss =  -24.02432588736217
epoch =  324 batch =  600 / 977 loss =  -24.024546935832053
epoch =  324 batch =  625 / 977 loss =  -24.02473673043541
epoch =  324 batch =  650 / 977 loss =  -24.024367260676563
epoch =  324 batch =  675 / 977 loss =  -24.023357470

epoch =  327 batch =  650 / 977 loss =  -24.023700283419682
epoch =  327 batch =  675 / 977 loss =  -24.023058727648117
epoch =  327 batch =  700 / 977 loss =  -24.023583135999377
epoch =  327 batch =  725 / 977 loss =  -24.022018561350073
epoch =  327 batch =  750 / 977 loss =  -24.022403122740634
epoch =  327 batch =  775 / 977 loss =  -24.02257165957971
epoch =  327 batch =  800 / 977 loss =  -24.02219644229807
epoch =  327 batch =  825 / 977 loss =  -24.02273852600023
epoch =  327 batch =  850 / 977 loss =  -24.021740439355575
epoch =  327 batch =  875 / 977 loss =  -24.022172738427976
epoch =  327 batch =  900 / 977 loss =  -24.02318992572407
epoch =  327 batch =  925 / 977 loss =  -24.024291178577656
epoch =  327 batch =  950 / 977 loss =  -24.022641672320916
epoch =  327 batch =  975 / 977 loss =  -24.022695672316623
Validation loss =  -23.936635971069336
Effective sample size =  0.748305
epoch =  328 batch =  0 / 977 loss =  -23.975574493408203
epoch =  328 batch =  25 / 977 lo