In [1]:
import os
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data
from model.pytorchtools import EarlyStopping
import model.net as models 
from model.dataset import SurfaceComplexationDataset
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import numpy as np

In [2]:
def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = optim.SGD(network.parameters(),
                              lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = optim.Adam(network.parameters(),
                               lr=learning_rate)
    
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
    return optimizer, scheduler

In [3]:
def load_data(data_dir): 
    train_set = SurfaceComplexationDataset(root_dir=data_dir, split = 'train')
    test_set = SurfaceComplexationDataset(root_dir=data_dir, split='test')
    val_set = SurfaceComplexationDataset(root_dir=data_dir, split='val')

    return train_set, test_set, val_set

In [4]:
def train_epoch(train_loader, model, optimizer, device, epoch):
    """ Train the model on num_steps batches 
    Args: 
        train_loader: a torch.utils.data.DataLoader object that fetches the data
        model: the neural network 
        optimizer: adams 
    """
    model.train()
    running_loss = 0.0
    num_batch = len(train_loader)

    for i, (inputs, targets) in enumerate(train_loader): 
        inputs, targets = inputs.to(device), targets.to(device)
        # zero the paramter gradients 
        optimizer.zero_grad()

        # forward + backward + optimize 
        pred = model(inputs)
        loss = F.mse_loss(pred, targets)
        loss.backward()
        optimizer.step()

        # print statistics 
        running_loss += loss.item()
        # if i % 300 == 0: 
        #     print('[%d: %d/%d] train loss: %f ' % (epoch, i, num_batch, loss.item()))
        # if i % 300 == 0: 
        #     print('[%d: %d/%d] train loss: %f lr = %f' % (epoch, i, num_batch, loss.item(), optimizer.param_groups[0]["lr"]))

    return running_loss / num_batch 

In [5]:
def validate(val_dataloader, model, device): 
    model.eval()
    val_running_loss = 0.0 

    with torch.no_grad(): 
        for inputs, targets in val_dataloader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = F.mse_loss(outputs, targets)

            val_running_loss += loss.item() * inputs.size(0)

    return val_running_loss / len(val_dataloader.dataset)

In [6]:
def plot_pramas(test_y, test_pred, foldername, outfile): 
    # print("R2 of training is: ", r2_score(train_y, train_pred))
    np.savetxt(f'{foldername}/test_pred_{outfile}.txt', test_pred)
    np.savetxt(f'{foldername}/test_y_{outfile}.txt', test_y)
    
    print("R2 of test is: ", r2_score(test_y, test_pred[:,4:]))

    test_mse = mean_squared_error(test_y, test_pred[:,4:])
    test_mae = mean_absolute_error(test_y, test_pred[:,4:])

    print('Test set results for %i samples:' % test_pred.shape[0])
    print('MSE:', test_mse)
    print('MAE:', test_mae)

In [7]:
def test_accuracy(net, testloader, foldername, outfile, device): 
    test_pred = []
    test_y = []
    running_loss = 0 

    with torch.no_grad():
        for data in testloader:
            inputs, targets = data
            sys = inputs[:, :4]
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = F.mse_loss(outputs, targets)

            running_loss += loss.item() 

            pred_val_numpy = outputs.data.cpu().numpy()
            target_val_numpy = targets.data.cpu().numpy()

            # test_pred.append(pred_val_numpy)
            test_pred.append(np.concatenate([sys.data.cpu().numpy(),pred_val_numpy],axis = 1))
            test_y.append(target_val_numpy)

    test_pred = np.concatenate(test_pred, axis=0)
    test_y = np.concatenate(test_y, axis=0)

    plot_pramas(test_y, test_pred, foldername, outfile)

    print('MSE loss on test set is:', running_loss / len(testloader.dataset)) 

In [8]:
def train_model(model, device, train_loader, val_loader, test_loader, optimizer, lr_scheduler, isSch, res_dir, name, patience = 20, n_epochs = 100): 
    # to track the average training loss per epoch as the model trains
    avg_train_losses = []
    # to track the average validation loss per epoch as the model trains
    avg_valid_losses = [] 

    blue = lambda x: '\033[94m' + x + '\033[0m'
    
    checkpoint_dir = os.path.join(res_dir, 'checkpoints')
    try:
        os.makedirs(res_dir)
        os.makedirs(checkpoint_dir)
    except OSError:
        pass

    checkpoint_path = os.path.join(checkpoint_dir, f'{name}.pt')
    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=True, path = checkpoint_path)

    for epoch in tqdm(range(1, n_epochs + 1)):
        ###################
        # train the model #
        ###################
        train_epoch_loss = train_epoch(train_loader, model, optimizer, device, epoch)
        val_epoch_loss = validate(val_loader, model, device)

        # print loss every epoch 
        print('[%d] train loss: %f ' % (epoch, train_epoch_loss))
        print('[%d] %s loss: %f' % (epoch, blue('validate'), val_epoch_loss))

        avg_train_losses.append(train_epoch_loss)
        avg_valid_losses.append(val_epoch_loss)
        
        if isSch: 
            lr_scheduler.step(val_epoch_loss) 
        
        # add early stopping 
        # early_stopping(val_epoch_loss, model)
        early_stopping(train_epoch_loss, model)
        if early_stopping.early_stop: 
            print("Early stopping")
            break 

    np.savetxt(os.path.join(res_dir, f'train_loss_{name}.csv'), avg_train_losses)
    np.savetxt(os.path.join(res_dir, f'val_loss_{name}.csv'), avg_valid_losses) 

    # load the last checkpoint with the best model
    model.load_state_dict(torch.load(checkpoint_path)) 

    # test on test set 
    test_accuracy(model, test_loader, res_dir, name, device)
    # print(optimizer.state_dict())

In [9]:
def train_main(config): 
    data_dir = 'dataset/'

    # get dataset 
    train_set, val_set, test_set = load_data(data_dir)

    train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=int(config["batch_size"]),
        shuffle=True,
        num_workers=4, 
        pin_memory=False)

    val_loader = torch.utils.data.DataLoader(
            val_set,
            batch_size=int(config["batch_size"]),
            shuffle=True,
            num_workers=4, 
            pin_memory=False)

    test_loader = torch.utils.data.DataLoader(
            test_set, 
            batch_size=int(config["batch_size"]), 
            shuffle=True,
            num_workers=4, 
            pin_memory=False)
            
    print("Creating model")
    Model = getattr(models, config['model'])
    print('created model is: ', Model)
    
    model = Model(config['input_dim'], config['batch_norm'], config['layer_norm'], config['constraint'],
                     config["l1"], config["l2"], config["l3"], config["l4"], config["l5"])
        
    name = "DNN"
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer, lr_scheduler = build_optimizer(model, config['optimizer'], config['lr'])
    res_dir = 'DNN_res'
    
    train_model(model, device, train_loader, val_loader, test_loader, optimizer, lr_scheduler, config['lr_scheduler'], res_dir, name, 20, 5000)

# train DNN 

In [12]:
config = {'input_dim': 664,'l1': 1024, 'l2': 1024, 'l3': 1024, 'l4': 1024, 'l5': 512, 
          'lr': 0.01, 'batch_size': 128, 'model': 'DeepNet6LayerTune', 'batch_norm': False, 
          'layer_norm': True, 'lr_scheduler': True, 'constraint': False, 'optimizer': 'adam'}

In [None]:
import time 
start_time = time.perf_counter()
train_main(config) 
end_time = time.perf_counter() 
print('time used to train model with 40/1000 patience is: ', (end_time - start_time)/60, 'mins')

Creating model
created model is:  <class 'model.net.DeepNet6LayerTune'>


  0%|          | 1/5000 [01:13<101:25:03, 73.04s/it]

[1] train loss: 0.018467 
[1] [94mvalidate[0m loss: 0.013492
Validation loss decreased (inf --> 0.018467).  Saving model ...


  0%|          | 2/5000 [02:23<99:33:27, 71.71s/it] 

[2] train loss: 0.009112 
[2] [94mvalidate[0m loss: 0.007725
Validation loss decreased (0.018467 --> 0.009112).  Saving model ...


  0%|          | 3/5000 [03:34<99:10:39, 71.45s/it]

[3] train loss: 0.008283 
[3] [94mvalidate[0m loss: 0.008790
Validation loss decreased (0.009112 --> 0.008283).  Saving model ...


  0%|          | 4/5000 [04:46<99:08:47, 71.44s/it]

[4] train loss: 0.007826 
[4] [94mvalidate[0m loss: 0.007265
Validation loss decreased (0.008283 --> 0.007826).  Saving model ...


  0%|          | 5/5000 [05:58<99:24:36, 71.65s/it]

[5] train loss: 0.007540 
[5] [94mvalidate[0m loss: 0.010297
Validation loss decreased (0.007826 --> 0.007540).  Saving model ...


  0%|          | 6/5000 [07:08<98:46:28, 71.20s/it]

[6] train loss: 0.007274 
[6] [94mvalidate[0m loss: 0.008533
Validation loss decreased (0.007540 --> 0.007274).  Saving model ...


  0%|          | 7/5000 [08:19<98:23:24, 70.94s/it]

[7] train loss: 0.007055 
[7] [94mvalidate[0m loss: 0.006664
Validation loss decreased (0.007274 --> 0.007055).  Saving model ...


  0%|          | 8/5000 [09:29<98:07:32, 70.76s/it]

[8] train loss: 0.006884 
[8] [94mvalidate[0m loss: 0.006576
Validation loss decreased (0.007055 --> 0.006884).  Saving model ...


  0%|          | 9/5000 [10:40<98:02:21, 70.72s/it]

[9] train loss: 0.006738 
[9] [94mvalidate[0m loss: 0.008136
Validation loss decreased (0.006884 --> 0.006738).  Saving model ...


  0%|          | 10/5000 [11:50<97:44:59, 70.52s/it]

[10] train loss: 0.006662 
[10] [94mvalidate[0m loss: 0.006269
Validation loss decreased (0.006738 --> 0.006662).  Saving model ...


  0%|          | 11/5000 [13:00<97:34:31, 70.41s/it]

[11] train loss: 0.006534 
[11] [94mvalidate[0m loss: 0.006456
Validation loss decreased (0.006662 --> 0.006534).  Saving model ...


  0%|          | 12/5000 [14:11<97:54:10, 70.66s/it]

[12] train loss: 0.006444 
[12] [94mvalidate[0m loss: 0.005918
Validation loss decreased (0.006534 --> 0.006444).  Saving model ...


  0%|          | 13/5000 [15:23<98:14:13, 70.92s/it]

[13] train loss: 0.006351 
[13] [94mvalidate[0m loss: 0.006179
Validation loss decreased (0.006444 --> 0.006351).  Saving model ...


  0%|          | 14/5000 [16:34<98:21:35, 71.02s/it]

[14] train loss: 0.006280 
[14] [94mvalidate[0m loss: 0.007443
Validation loss decreased (0.006351 --> 0.006280).  Saving model ...


  0%|          | 15/5000 [17:45<98:30:00, 71.13s/it]

[15] train loss: 0.006203 
[15] [94mvalidate[0m loss: 0.007002
Validation loss decreased (0.006280 --> 0.006203).  Saving model ...


  0%|          | 16/5000 [18:57<98:42:23, 71.30s/it]

[16] train loss: 0.006140 
[16] [94mvalidate[0m loss: 0.007369
Validation loss decreased (0.006203 --> 0.006140).  Saving model ...


  0%|          | 17/5000 [20:09<99:11:16, 71.66s/it]

[17] train loss: 0.006059 
[17] [94mvalidate[0m loss: 0.006082
Validation loss decreased (0.006140 --> 0.006059).  Saving model ...


  0%|          | 18/5000 [21:21<98:59:29, 71.53s/it]

[18] train loss: 0.005958 
[18] [94mvalidate[0m loss: 0.006168
Validation loss decreased (0.006059 --> 0.005958).  Saving model ...


  0%|          | 19/5000 [22:31<98:29:28, 71.18s/it]

[19] train loss: 0.005940 
[19] [94mvalidate[0m loss: 0.005265
Validation loss decreased (0.005958 --> 0.005940).  Saving model ...


  0%|          | 20/5000 [23:43<98:47:21, 71.41s/it]

[20] train loss: 0.005863 
[20] [94mvalidate[0m loss: 0.006274
Validation loss decreased (0.005940 --> 0.005863).  Saving model ...


  0%|          | 21/5000 [24:53<98:10:21, 70.98s/it]

[21] train loss: 0.005786 
[21] [94mvalidate[0m loss: 0.006291
Validation loss decreased (0.005863 --> 0.005786).  Saving model ...


  0%|          | 22/5000 [26:03<97:34:13, 70.56s/it]

[22] train loss: 0.005744 
[22] [94mvalidate[0m loss: 0.005165
Validation loss decreased (0.005786 --> 0.005744).  Saving model ...


  0%|          | 23/5000 [27:13<97:21:54, 70.43s/it]

[23] train loss: 0.005667 
[23] [94mvalidate[0m loss: 0.005659
Validation loss decreased (0.005744 --> 0.005667).  Saving model ...


  0%|          | 24/5000 [28:24<97:49:09, 70.77s/it]

[24] train loss: 0.005615 
[24] [94mvalidate[0m loss: 0.005294
Validation loss decreased (0.005667 --> 0.005615).  Saving model ...


  0%|          | 25/5000 [29:30<95:48:37, 69.33s/it]

[25] train loss: 0.005542 
[25] [94mvalidate[0m loss: 0.005722
Validation loss decreased (0.005615 --> 0.005542).  Saving model ...


  1%|          | 26/5000 [30:34<93:24:50, 67.61s/it]

[26] train loss: 0.005482 
[26] [94mvalidate[0m loss: 0.005938
Validation loss decreased (0.005542 --> 0.005482).  Saving model ...


  1%|          | 27/5000 [31:39<92:32:55, 67.00s/it]

[27] train loss: 0.005453 
[27] [94mvalidate[0m loss: 0.005541
Validation loss decreased (0.005482 --> 0.005453).  Saving model ...


  1%|          | 28/5000 [32:48<93:06:47, 67.42s/it]

[28] train loss: 0.005353 
[28] [94mvalidate[0m loss: 0.005673
Validation loss decreased (0.005453 --> 0.005353).  Saving model ...


  1%|          | 29/5000 [33:58<94:12:26, 68.23s/it]

[29] train loss: 0.005317 
[29] [94mvalidate[0m loss: 0.004989
Validation loss decreased (0.005353 --> 0.005317).  Saving model ...


  1%|          | 30/5000 [35:09<95:23:01, 69.09s/it]

[30] train loss: 0.005246 
[30] [94mvalidate[0m loss: 0.005713
Validation loss decreased (0.005317 --> 0.005246).  Saving model ...


  1%|          | 31/5000 [36:20<96:11:26, 69.69s/it]

[31] train loss: 0.005201 
[31] [94mvalidate[0m loss: 0.005474
Validation loss decreased (0.005246 --> 0.005201).  Saving model ...


  1%|          | 32/5000 [37:31<96:39:42, 70.04s/it]

[32] train loss: 0.005133 
[32] [94mvalidate[0m loss: 0.005299
Validation loss decreased (0.005201 --> 0.005133).  Saving model ...


  1%|          | 33/5000 [38:42<97:00:27, 70.31s/it]

[33] train loss: 0.005065 
[33] [94mvalidate[0m loss: 0.004832
Validation loss decreased (0.005133 --> 0.005065).  Saving model ...


  1%|          | 34/5000 [39:53<97:20:10, 70.56s/it]

[34] train loss: 0.005053 
[34] [94mvalidate[0m loss: 0.004880
Validation loss decreased (0.005065 --> 0.005053).  Saving model ...


  1%|          | 35/5000 [40:59<95:19:55, 69.12s/it]

[35] train loss: 0.004957 
[35] [94mvalidate[0m loss: 0.005009
Validation loss decreased (0.005053 --> 0.004957).  Saving model ...


  1%|          | 36/5000 [42:05<93:56:41, 68.13s/it]

[36] train loss: 0.004890 
[36] [94mvalidate[0m loss: 0.005955
Validation loss decreased (0.004957 --> 0.004890).  Saving model ...


  1%|          | 37/5000 [43:16<95:09:08, 69.02s/it]

[37] train loss: 0.004848 
[37] [94mvalidate[0m loss: 0.004798
Validation loss decreased (0.004890 --> 0.004848).  Saving model ...


  1%|          | 38/5000 [44:19<92:46:56, 67.31s/it]

[38] train loss: 0.004767 
[38] [94mvalidate[0m loss: 0.005036
Validation loss decreased (0.004848 --> 0.004767).  Saving model ...


  1%|          | 39/5000 [45:26<92:35:50, 67.19s/it]

[39] train loss: 0.004725 
[39] [94mvalidate[0m loss: 0.005297
Validation loss decreased (0.004767 --> 0.004725).  Saving model ...


  1%|          | 40/5000 [46:36<93:53:30, 68.15s/it]

[40] train loss: 0.004683 
[40] [94mvalidate[0m loss: 0.004780
Validation loss decreased (0.004725 --> 0.004683).  Saving model ...


  1%|          | 41/5000 [47:47<94:52:00, 68.87s/it]

[41] train loss: 0.004630 
[41] [94mvalidate[0m loss: 0.004825
Validation loss decreased (0.004683 --> 0.004630).  Saving model ...


  1%|          | 42/5000 [48:53<93:34:55, 67.95s/it]

[42] train loss: 0.004569 
[42] [94mvalidate[0m loss: 0.004653
Validation loss decreased (0.004630 --> 0.004569).  Saving model ...


  1%|          | 43/5000 [50:01<93:53:31, 68.19s/it]

[43] train loss: 0.004505 
[43] [94mvalidate[0m loss: 0.004415
Validation loss decreased (0.004569 --> 0.004505).  Saving model ...


  1%|          | 44/5000 [51:11<94:37:40, 68.74s/it]

[44] train loss: 0.004452 
[44] [94mvalidate[0m loss: 0.004154
Validation loss decreased (0.004505 --> 0.004452).  Saving model ...


  1%|          | 45/5000 [52:16<92:43:02, 67.36s/it]

[45] train loss: 0.004419 
[45] [94mvalidate[0m loss: 0.004636
Validation loss decreased (0.004452 --> 0.004419).  Saving model ...


  1%|          | 46/5000 [53:28<94:35:12, 68.73s/it]

[46] train loss: 0.004367 
[46] [94mvalidate[0m loss: 0.004445
Validation loss decreased (0.004419 --> 0.004367).  Saving model ...


  1%|          | 47/5000 [54:37<95:02:52, 69.08s/it]

[47] train loss: 0.004300 
[47] [94mvalidate[0m loss: 0.004499
Validation loss decreased (0.004367 --> 0.004300).  Saving model ...


  1%|          | 48/5000 [55:45<94:12:07, 68.48s/it]

[48] train loss: 0.004246 
[48] [94mvalidate[0m loss: 0.004785
Validation loss decreased (0.004300 --> 0.004246).  Saving model ...


  1%|          | 49/5000 [56:53<94:05:03, 68.41s/it]

[49] train loss: 0.004200 
[49] [94mvalidate[0m loss: 0.004432
Validation loss decreased (0.004246 --> 0.004200).  Saving model ...


  1%|          | 50/5000 [58:03<94:51:55, 68.99s/it]

[50] train loss: 0.004155 
[50] [94mvalidate[0m loss: 0.004951
Validation loss decreased (0.004200 --> 0.004155).  Saving model ...


  1%|          | 51/5000 [59:15<95:59:03, 69.82s/it]

[51] train loss: 0.004117 
[51] [94mvalidate[0m loss: 0.004223
Validation loss decreased (0.004155 --> 0.004117).  Saving model ...


  1%|          | 52/5000 [1:00:27<96:56:26, 70.53s/it]

[52] train loss: 0.004055 
[52] [94mvalidate[0m loss: 0.004654
Validation loss decreased (0.004117 --> 0.004055).  Saving model ...


  1%|          | 53/5000 [1:01:39<97:18:25, 70.81s/it]

[53] train loss: 0.004019 
[53] [94mvalidate[0m loss: 0.004195
Validation loss decreased (0.004055 --> 0.004019).  Saving model ...


  1%|          | 54/5000 [1:02:51<97:52:19, 71.24s/it]

[54] train loss: 0.003966 
[54] [94mvalidate[0m loss: 0.004351
Validation loss decreased (0.004019 --> 0.003966).  Saving model ...


  1%|          | 55/5000 [1:04:02<97:43:37, 71.15s/it]

[55] train loss: 0.003931 
[55] [94mvalidate[0m loss: 0.003989
Validation loss decreased (0.003966 --> 0.003931).  Saving model ...


  1%|          | 56/5000 [1:05:13<97:45:51, 71.19s/it]

[56] train loss: 0.003889 
[56] [94mvalidate[0m loss: 0.004047
Validation loss decreased (0.003931 --> 0.003889).  Saving model ...


  1%|          | 57/5000 [1:06:25<98:00:57, 71.39s/it]

[57] train loss: 0.003831 
[57] [94mvalidate[0m loss: 0.003821
Validation loss decreased (0.003889 --> 0.003831).  Saving model ...


  1%|          | 58/5000 [1:07:36<97:56:36, 71.35s/it]

[58] train loss: 0.003786 
[58] [94mvalidate[0m loss: 0.003806
Validation loss decreased (0.003831 --> 0.003786).  Saving model ...


  1%|          | 59/5000 [1:08:47<97:53:20, 71.32s/it]

[59] train loss: 0.003748 
[59] [94mvalidate[0m loss: 0.003875
Validation loss decreased (0.003786 --> 0.003748).  Saving model ...


  1%|          | 60/5000 [1:09:58<97:35:59, 71.13s/it]

[60] train loss: 0.003723 
[60] [94mvalidate[0m loss: 0.004106
Validation loss decreased (0.003748 --> 0.003723).  Saving model ...


  1%|          | 61/5000 [1:11:09<97:25:06, 71.01s/it]

[61] train loss: 0.003665 
[61] [94mvalidate[0m loss: 0.004301
Validation loss decreased (0.003723 --> 0.003665).  Saving model ...


  1%|▏         | 63/5000 [1:13:17<92:26:26, 67.41s/it]

[63] train loss: 0.003599 
[63] [94mvalidate[0m loss: 0.004651
Validation loss decreased (0.003637 --> 0.003599).  Saving model ...


  1%|▏         | 64/5000 [1:14:20<90:34:59, 66.07s/it]

[64] train loss: 0.003569 
[64] [94mvalidate[0m loss: 0.003786
Validation loss decreased (0.003599 --> 0.003569).  Saving model ...


  1%|▏         | 65/5000 [1:15:24<89:42:34, 65.44s/it]

[65] train loss: 0.003535 
[65] [94mvalidate[0m loss: 0.003268
Validation loss decreased (0.003569 --> 0.003535).  Saving model ...


  1%|▏         | 66/5000 [1:16:29<89:38:09, 65.40s/it]

[66] train loss: 0.003484 
[66] [94mvalidate[0m loss: 0.003876
Validation loss decreased (0.003535 --> 0.003484).  Saving model ...


  1%|▏         | 67/5000 [1:17:33<88:55:04, 64.89s/it]

[67] train loss: 0.003440 
[67] [94mvalidate[0m loss: 0.003492
Validation loss decreased (0.003484 --> 0.003440).  Saving model ...


  1%|▏         | 68/5000 [1:18:37<88:29:16, 64.59s/it]

[68] train loss: 0.003418 
[68] [94mvalidate[0m loss: 0.003417
Validation loss decreased (0.003440 --> 0.003418).  Saving model ...


  1%|▏         | 69/5000 [1:19:40<87:55:50, 64.20s/it]

[69] train loss: 0.003380 
[69] [94mvalidate[0m loss: 0.003988
Validation loss decreased (0.003418 --> 0.003380).  Saving model ...


  1%|▏         | 70/5000 [1:20:45<88:06:49, 64.34s/it]

[70] train loss: 0.003361 
[70] [94mvalidate[0m loss: 0.003275
Validation loss decreased (0.003380 --> 0.003361).  Saving model ...


  1%|▏         | 71/5000 [1:21:48<87:33:45, 63.95s/it]

[71] train loss: 0.003315 
[71] [94mvalidate[0m loss: 0.003916
Validation loss decreased (0.003361 --> 0.003315).  Saving model ...


  1%|▏         | 72/5000 [1:22:50<86:48:46, 63.42s/it]

[72] train loss: 0.003290 
[72] [94mvalidate[0m loss: 0.003383
Validation loss decreased (0.003315 --> 0.003290).  Saving model ...


  1%|▏         | 73/5000 [1:23:53<86:45:21, 63.39s/it]

[73] train loss: 0.003266 
[73] [94mvalidate[0m loss: 0.003102
Validation loss decreased (0.003290 --> 0.003266).  Saving model ...


  1%|▏         | 74/5000 [1:24:57<86:46:45, 63.42s/it]

[74] train loss: 0.003240 
[74] [94mvalidate[0m loss: 0.003322
Validation loss decreased (0.003266 --> 0.003240).  Saving model ...


  2%|▏         | 75/5000 [1:26:00<86:39:47, 63.35s/it]

[75] train loss: 0.003213 
[75] [94mvalidate[0m loss: 0.003305
Validation loss decreased (0.003240 --> 0.003213).  Saving model ...


  2%|▏         | 76/5000 [1:27:04<86:47:50, 63.46s/it]

[76] train loss: 0.003179 
[76] [94mvalidate[0m loss: 0.003602
Validation loss decreased (0.003213 --> 0.003179).  Saving model ...


  2%|▏         | 77/5000 [1:28:08<87:08:58, 63.73s/it]

[77] train loss: 0.003150 
[77] [94mvalidate[0m loss: 0.004089
Validation loss decreased (0.003179 --> 0.003150).  Saving model ...


  2%|▏         | 78/5000 [1:29:11<86:40:18, 63.39s/it]

[78] train loss: 0.003131 
[78] [94mvalidate[0m loss: 0.003320
Validation loss decreased (0.003150 --> 0.003131).  Saving model ...


  2%|▏         | 79/5000 [1:30:14<86:30:07, 63.28s/it]

[79] train loss: 0.003107 
[79] [94mvalidate[0m loss: 0.003007
Validation loss decreased (0.003131 --> 0.003107).  Saving model ...


  2%|▏         | 81/5000 [1:32:22<87:05:26, 63.74s/it]

[81] train loss: 0.003053 
[81] [94mvalidate[0m loss: 0.002965
Validation loss decreased (0.003058 --> 0.003053).  Saving model ...


  2%|▏         | 82/5000 [1:33:25<86:52:49, 63.60s/it]

[82] train loss: 0.003010 
[82] [94mvalidate[0m loss: 0.002860
Validation loss decreased (0.003053 --> 0.003010).  Saving model ...


  2%|▏         | 83/5000 [1:34:28<86:21:58, 63.23s/it]

[83] train loss: 0.002994 
[83] [94mvalidate[0m loss: 0.003364
Validation loss decreased (0.003010 --> 0.002994).  Saving model ...


  2%|▏         | 84/5000 [1:35:32<86:38:53, 63.45s/it]

[84] train loss: 0.002980 
[84] [94mvalidate[0m loss: 0.002945
Validation loss decreased (0.002994 --> 0.002980).  Saving model ...


  2%|▏         | 85/5000 [1:36:36<87:07:06, 63.81s/it]

[85] train loss: 0.002949 
[85] [94mvalidate[0m loss: 0.003091
Validation loss decreased (0.002980 --> 0.002949).  Saving model ...


  2%|▏         | 86/5000 [1:37:40<87:08:09, 63.84s/it]

[86] train loss: 0.002929 
[86] [94mvalidate[0m loss: 0.002727
Validation loss decreased (0.002949 --> 0.002929).  Saving model ...


  2%|▏         | 87/5000 [1:38:43<86:46:40, 63.59s/it]

[87] train loss: 0.002896 
[87] [94mvalidate[0m loss: 0.002838
Validation loss decreased (0.002929 --> 0.002896).  Saving model ...


  2%|▏         | 88/5000 [1:39:47<86:43:41, 63.56s/it]

[88] train loss: 0.002898 
[88] [94mvalidate[0m loss: 0.003202
EarlyStopping counter: 1 out of 20


  2%|▏         | 89/5000 [1:40:51<86:57:36, 63.75s/it]

[89] train loss: 0.002859 
[89] [94mvalidate[0m loss: 0.003146
Validation loss decreased (0.002896 --> 0.002859).  Saving model ...


  2%|▏         | 90/5000 [1:41:53<86:20:14, 63.30s/it]

[90] train loss: 0.002825 
[90] [94mvalidate[0m loss: 0.003389
Validation loss decreased (0.002859 --> 0.002825).  Saving model ...


  2%|▏         | 91/5000 [1:42:56<85:58:52, 63.05s/it]

[91] train loss: 0.002827 
[91] [94mvalidate[0m loss: 0.003124
EarlyStopping counter: 1 out of 20


  2%|▏         | 92/5000 [1:43:59<86:05:20, 63.15s/it]

[92] train loss: 0.002781 
[92] [94mvalidate[0m loss: 0.003162
Validation loss decreased (0.002825 --> 0.002781).  Saving model ...


  2%|▏         | 93/5000 [1:45:05<87:25:21, 64.14s/it]

[93] train loss: 0.002775 
[93] [94mvalidate[0m loss: 0.003459
Validation loss decreased (0.002781 --> 0.002775).  Saving model ...


  2%|▏         | 94/5000 [1:46:07<86:32:11, 63.50s/it]

[94] train loss: 0.002760 
[94] [94mvalidate[0m loss: 0.002969
Validation loss decreased (0.002775 --> 0.002760).  Saving model ...


  2%|▏         | 95/5000 [1:47:17<89:11:35, 65.46s/it]

[95] train loss: 0.002748 
[95] [94mvalidate[0m loss: 0.002948
Validation loss decreased (0.002760 --> 0.002748).  Saving model ...


  2%|▏         | 96/5000 [1:48:28<91:23:25, 67.09s/it]

[96] train loss: 0.002714 
[96] [94mvalidate[0m loss: 0.002991
Validation loss decreased (0.002748 --> 0.002714).  Saving model ...


  2%|▏         | 97/5000 [1:49:40<93:13:11, 68.45s/it]

[97] train loss: 0.002704 
[97] [94mvalidate[0m loss: 0.002715
Validation loss decreased (0.002714 --> 0.002704).  Saving model ...


  2%|▏         | 98/5000 [1:50:51<94:22:40, 69.31s/it]

[98] train loss: 0.002691 
[98] [94mvalidate[0m loss: 0.002551
Validation loss decreased (0.002704 --> 0.002691).  Saving model ...


  2%|▏         | 99/5000 [1:52:03<95:10:51, 69.91s/it]

[99] train loss: 0.002652 
[99] [94mvalidate[0m loss: 0.002554
Validation loss decreased (0.002691 --> 0.002652).  Saving model ...


  2%|▏         | 100/5000 [1:53:12<95:06:37, 69.88s/it]

[100] train loss: 0.002640 
[100] [94mvalidate[0m loss: 0.002955
Validation loss decreased (0.002652 --> 0.002640).  Saving model ...


  2%|▏         | 101/5000 [1:54:22<94:51:33, 69.71s/it]

[101] train loss: 0.002614 
[101] [94mvalidate[0m loss: 0.002657
Validation loss decreased (0.002640 --> 0.002614).  Saving model ...


  2%|▏         | 102/5000 [1:55:34<95:52:53, 70.47s/it]

[102] train loss: 0.002616 
[102] [94mvalidate[0m loss: 0.002574
EarlyStopping counter: 1 out of 20


  2%|▏         | 103/5000 [1:56:45<96:01:18, 70.59s/it]

[103] train loss: 0.002595 
[103] [94mvalidate[0m loss: 0.002811
Validation loss decreased (0.002614 --> 0.002595).  Saving model ...


  2%|▏         | 104/5000 [1:57:57<96:35:42, 71.03s/it]

[104] train loss: 0.002572 
[104] [94mvalidate[0m loss: 0.002929
Validation loss decreased (0.002595 --> 0.002572).  Saving model ...


  2%|▏         | 105/5000 [1:59:08<96:45:52, 71.17s/it]

[105] train loss: 0.002563 
[105] [94mvalidate[0m loss: 0.002826
Validation loss decreased (0.002572 --> 0.002563).  Saving model ...


  2%|▏         | 106/5000 [2:00:22<97:39:45, 71.84s/it]

[106] train loss: 0.002537 
[106] [94mvalidate[0m loss: 0.003518
Validation loss decreased (0.002563 --> 0.002537).  Saving model ...


  2%|▏         | 107/5000 [2:01:34<97:42:56, 71.89s/it]

[107] train loss: 0.002533 
[107] [94mvalidate[0m loss: 0.002612
Validation loss decreased (0.002537 --> 0.002533).  Saving model ...


  2%|▏         | 108/5000 [2:02:45<97:35:13, 71.81s/it]

[108] train loss: 0.002515 
[108] [94mvalidate[0m loss: 0.002767
Validation loss decreased (0.002533 --> 0.002515).  Saving model ...


  2%|▏         | 109/5000 [2:04:01<98:58:11, 72.85s/it]

[109] train loss: 0.002504 
[109] [94mvalidate[0m loss: 0.002976
Validation loss decreased (0.002515 --> 0.002504).  Saving model ...


  2%|▏         | 110/5000 [2:05:11<98:06:16, 72.22s/it]

[110] train loss: 0.001485 
[110] [94mvalidate[0m loss: 0.001606
Validation loss decreased (0.002504 --> 0.001485).  Saving model ...


  2%|▏         | 111/5000 [2:06:23<97:46:32, 72.00s/it]

[111] train loss: 0.001414 
[111] [94mvalidate[0m loss: 0.001578
Validation loss decreased (0.001485 --> 0.001414).  Saving model ...


  2%|▏         | 112/5000 [2:07:34<97:31:17, 71.82s/it]

[112] train loss: 0.001384 
[112] [94mvalidate[0m loss: 0.001628
Validation loss decreased (0.001414 --> 0.001384).  Saving model ...


  2%|▏         | 113/5000 [2:08:45<96:51:19, 71.35s/it]

[113] train loss: 0.001365 
[113] [94mvalidate[0m loss: 0.001510
Validation loss decreased (0.001384 --> 0.001365).  Saving model ...


  2%|▏         | 114/5000 [2:09:55<96:23:22, 71.02s/it]

[114] train loss: 0.001350 
[114] [94mvalidate[0m loss: 0.001504
Validation loss decreased (0.001365 --> 0.001350).  Saving model ...


  2%|▏         | 115/5000 [2:11:06<96:31:08, 71.13s/it]

[115] train loss: 0.001335 
[115] [94mvalidate[0m loss: 0.001556
Validation loss decreased (0.001350 --> 0.001335).  Saving model ...


  2%|▏         | 116/5000 [2:12:17<96:16:32, 70.96s/it]

[116] train loss: 0.001322 
[116] [94mvalidate[0m loss: 0.001444
Validation loss decreased (0.001335 --> 0.001322).  Saving model ...


  2%|▏         | 117/5000 [2:13:28<96:31:51, 71.17s/it]

[117] train loss: 0.001310 
[117] [94mvalidate[0m loss: 0.001464
Validation loss decreased (0.001322 --> 0.001310).  Saving model ...


  2%|▏         | 118/5000 [2:14:38<95:58:23, 70.77s/it]

[118] train loss: 0.001303 
[118] [94mvalidate[0m loss: 0.001498
Validation loss decreased (0.001310 --> 0.001303).  Saving model ...


  2%|▏         | 119/5000 [2:15:40<92:22:18, 68.13s/it]

[119] train loss: 0.001292 
[119] [94mvalidate[0m loss: 0.001460
Validation loss decreased (0.001303 --> 0.001292).  Saving model ...


  2%|▏         | 120/5000 [2:16:45<91:10:32, 67.26s/it]

[120] train loss: 0.001287 
[120] [94mvalidate[0m loss: 0.001414
Validation loss decreased (0.001292 --> 0.001287).  Saving model ...


  2%|▏         | 121/5000 [2:17:48<89:02:59, 65.71s/it]

[121] train loss: 0.001275 
[121] [94mvalidate[0m loss: 0.001466
Validation loss decreased (0.001287 --> 0.001275).  Saving model ...


  2%|▏         | 122/5000 [2:18:51<88:02:56, 64.98s/it]

[122] train loss: 0.001269 
[122] [94mvalidate[0m loss: 0.001470
Validation loss decreased (0.001275 --> 0.001269).  Saving model ...


  2%|▏         | 123/5000 [2:19:55<87:49:23, 64.83s/it]

[123] train loss: 0.001260 
[123] [94mvalidate[0m loss: 0.001449
Validation loss decreased (0.001269 --> 0.001260).  Saving model ...


  2%|▏         | 124/5000 [2:21:01<88:08:20, 65.07s/it]

[124] train loss: 0.001257 
[124] [94mvalidate[0m loss: 0.001411
Validation loss decreased (0.001260 --> 0.001257).  Saving model ...


  2%|▎         | 125/5000 [2:22:05<87:47:24, 64.83s/it]

[125] train loss: 0.001248 
[125] [94mvalidate[0m loss: 0.001394
Validation loss decreased (0.001257 --> 0.001248).  Saving model ...


  3%|▎         | 126/5000 [2:23:08<87:03:39, 64.30s/it]

[126] train loss: 0.001241 
[126] [94mvalidate[0m loss: 0.001490
Validation loss decreased (0.001248 --> 0.001241).  Saving model ...


  3%|▎         | 127/5000 [2:24:10<86:04:37, 63.59s/it]

[127] train loss: 0.001238 
[127] [94mvalidate[0m loss: 0.001378
Validation loss decreased (0.001241 --> 0.001238).  Saving model ...


  3%|▎         | 128/5000 [2:25:15<86:32:16, 63.94s/it]

[128] train loss: 0.001230 
[128] [94mvalidate[0m loss: 0.001429
Validation loss decreased (0.001238 --> 0.001230).  Saving model ...


  3%|▎         | 129/5000 [2:26:18<86:19:20, 63.80s/it]

[129] train loss: 0.001225 
[129] [94mvalidate[0m loss: 0.001378
Validation loss decreased (0.001230 --> 0.001225).  Saving model ...


  3%|▎         | 130/5000 [2:27:23<86:47:29, 64.16s/it]

[130] train loss: 0.001221 
[130] [94mvalidate[0m loss: 0.001399
Validation loss decreased (0.001225 --> 0.001221).  Saving model ...


  3%|▎         | 133/5000 [2:30:35<86:22:41, 63.89s/it]

[133] train loss: 0.001204 
[133] [94mvalidate[0m loss: 0.001393
Validation loss decreased (0.001209 --> 0.001204).  Saving model ...


  3%|▎         | 134/5000 [2:31:40<86:37:28, 64.09s/it]

[134] train loss: 0.001203 
[134] [94mvalidate[0m loss: 0.001587
Validation loss decreased (0.001204 --> 0.001203).  Saving model ...


  3%|▎         | 135/5000 [2:32:44<86:24:13, 63.94s/it]

[135] train loss: 0.001194 
[135] [94mvalidate[0m loss: 0.001338
Validation loss decreased (0.001203 --> 0.001194).  Saving model ...


  3%|▎         | 136/5000 [2:33:47<86:02:51, 63.69s/it]

[136] train loss: 0.001190 
[136] [94mvalidate[0m loss: 0.001363
Validation loss decreased (0.001194 --> 0.001190).  Saving model ...


  3%|▎         | 137/5000 [2:34:50<85:54:57, 63.60s/it]

[137] train loss: 0.001187 
[137] [94mvalidate[0m loss: 0.001410
Validation loss decreased (0.001190 --> 0.001187).  Saving model ...


  3%|▎         | 138/5000 [2:35:52<85:19:14, 63.17s/it]

[138] train loss: 0.001184 
[138] [94mvalidate[0m loss: 0.001332
Validation loss decreased (0.001187 --> 0.001184).  Saving model ...


  3%|▎         | 139/5000 [2:36:55<84:57:05, 62.91s/it]

[139] train loss: 0.001178 
[139] [94mvalidate[0m loss: 0.001295
Validation loss decreased (0.001184 --> 0.001178).  Saving model ...


  3%|▎         | 140/5000 [2:37:59<85:39:34, 63.45s/it]

[140] train loss: 0.001177 
[140] [94mvalidate[0m loss: 0.001355
Validation loss decreased (0.001178 --> 0.001177).  Saving model ...


  3%|▎         | 141/5000 [2:39:02<85:19:59, 63.22s/it]

[141] train loss: 0.001175 
[141] [94mvalidate[0m loss: 0.001315
Validation loss decreased (0.001177 --> 0.001175).  Saving model ...


  3%|▎         | 142/5000 [2:40:05<85:19:14, 63.23s/it]

[142] train loss: 0.001168 
[142] [94mvalidate[0m loss: 0.001290
Validation loss decreased (0.001175 --> 0.001168).  Saving model ...


  3%|▎         | 143/5000 [2:41:09<85:23:11, 63.29s/it]

[143] train loss: 0.001165 
[143] [94mvalidate[0m loss: 0.001294
Validation loss decreased (0.001168 --> 0.001165).  Saving model ...


  3%|▎         | 144/5000 [2:42:12<85:17:12, 63.23s/it]

[144] train loss: 0.001162 
[144] [94mvalidate[0m loss: 0.001292
Validation loss decreased (0.001165 --> 0.001162).  Saving model ...


  3%|▎         | 145/5000 [2:43:15<85:16:33, 63.23s/it]

[145] train loss: 0.001155 
[145] [94mvalidate[0m loss: 0.001311
Validation loss decreased (0.001162 --> 0.001155).  Saving model ...


  3%|▎         | 146/5000 [2:44:18<85:04:24, 63.10s/it]

[146] train loss: 0.001153 
[146] [94mvalidate[0m loss: 0.001278
Validation loss decreased (0.001155 --> 0.001153).  Saving model ...


  3%|▎         | 147/5000 [2:45:19<84:31:07, 62.70s/it]

[147] train loss: 0.001149 
[147] [94mvalidate[0m loss: 0.001373
Validation loss decreased (0.001153 --> 0.001149).  Saving model ...


  3%|▎         | 149/5000 [2:47:28<85:38:46, 63.56s/it]

[149] train loss: 0.001143 
[149] [94mvalidate[0m loss: 0.001296
Validation loss decreased (0.001144 --> 0.001143).  Saving model ...


  3%|▎         | 150/5000 [2:48:33<86:01:09, 63.85s/it]

[150] train loss: 0.001140 
[150] [94mvalidate[0m loss: 0.001268
Validation loss decreased (0.001143 --> 0.001140).  Saving model ...


  3%|▎         | 151/5000 [2:49:35<85:20:44, 63.36s/it]

[151] train loss: 0.001137 
[151] [94mvalidate[0m loss: 0.001425
Validation loss decreased (0.001140 --> 0.001137).  Saving model ...


  3%|▎         | 152/5000 [2:50:37<84:58:42, 63.10s/it]

[152] train loss: 0.001132 
[152] [94mvalidate[0m loss: 0.001260
Validation loss decreased (0.001137 --> 0.001132).  Saving model ...


  3%|▎         | 153/5000 [2:51:40<84:43:17, 62.92s/it]

[153] train loss: 0.001128 
[153] [94mvalidate[0m loss: 0.001313
Validation loss decreased (0.001132 --> 0.001128).  Saving model ...


  3%|▎         | 154/5000 [2:52:43<84:43:56, 62.95s/it]

[154] train loss: 0.001126 
[154] [94mvalidate[0m loss: 0.001260
Validation loss decreased (0.001128 --> 0.001126).  Saving model ...


  3%|▎         | 155/5000 [2:53:48<85:32:14, 63.56s/it]

[155] train loss: 0.001123 
[155] [94mvalidate[0m loss: 0.001309
Validation loss decreased (0.001126 --> 0.001123).  Saving model ...


  3%|▎         | 156/5000 [2:54:50<85:04:46, 63.23s/it]

[156] train loss: 0.001119 
[156] [94mvalidate[0m loss: 0.001248
Validation loss decreased (0.001123 --> 0.001119).  Saving model ...


  3%|▎         | 157/5000 [2:55:54<85:18:46, 63.42s/it]

[157] train loss: 0.001117 
[157] [94mvalidate[0m loss: 0.001228
Validation loss decreased (0.001119 --> 0.001117).  Saving model ...


  3%|▎         | 158/5000 [2:56:57<84:56:37, 63.16s/it]

[158] train loss: 0.001114 
[158] [94mvalidate[0m loss: 0.001352
Validation loss decreased (0.001117 --> 0.001114).  Saving model ...


  3%|▎         | 159/5000 [2:58:00<84:59:09, 63.20s/it]

[159] train loss: 0.001111 
[159] [94mvalidate[0m loss: 0.001222
Validation loss decreased (0.001114 --> 0.001111).  Saving model ...


  3%|▎         | 160/5000 [2:59:06<85:57:31, 63.94s/it]

[160] train loss: 0.001106 
[160] [94mvalidate[0m loss: 0.001237
Validation loss decreased (0.001111 --> 0.001106).  Saving model ...


  3%|▎         | 161/5000 [3:00:11<86:28:50, 64.34s/it]

[161] train loss: 0.001105 
[161] [94mvalidate[0m loss: 0.001244
Validation loss decreased (0.001106 --> 0.001105).  Saving model ...


  3%|▎         | 162/5000 [3:01:14<85:49:54, 63.87s/it]

[162] train loss: 0.001103 
[162] [94mvalidate[0m loss: 0.001307
Validation loss decreased (0.001105 --> 0.001103).  Saving model ...


  3%|▎         | 163/5000 [3:02:18<85:59:35, 64.00s/it]

[163] train loss: 0.001101 
[163] [94mvalidate[0m loss: 0.001298
Validation loss decreased (0.001103 --> 0.001101).  Saving model ...


  3%|▎         | 164/5000 [3:03:21<85:36:15, 63.73s/it]

[164] train loss: 0.001097 
[164] [94mvalidate[0m loss: 0.001300
Validation loss decreased (0.001101 --> 0.001097).  Saving model ...


  3%|▎         | 166/5000 [3:05:30<85:58:04, 64.02s/it]

[166] train loss: 0.001092 
[166] [94mvalidate[0m loss: 0.001265
Validation loss decreased (0.001096 --> 0.001092).  Saving model ...


  3%|▎         | 167/5000 [3:06:33<85:25:39, 63.63s/it]

[167] train loss: 0.001090 
[167] [94mvalidate[0m loss: 0.001304
Validation loss decreased (0.001092 --> 0.001090).  Saving model ...


  3%|▎         | 168/5000 [3:07:35<85:05:10, 63.39s/it]

[168] train loss: 0.001086 
[168] [94mvalidate[0m loss: 0.001233
Validation loss decreased (0.001090 --> 0.001086).  Saving model ...


  3%|▎         | 169/5000 [3:08:38<84:50:58, 63.23s/it]

[169] train loss: 0.001084 
[169] [94mvalidate[0m loss: 0.001343
Validation loss decreased (0.001086 --> 0.001084).  Saving model ...


  3%|▎         | 170/5000 [3:09:41<84:30:13, 62.98s/it]

[170] train loss: 0.001083 
[170] [94mvalidate[0m loss: 0.001281
Validation loss decreased (0.001084 --> 0.001083).  Saving model ...


  3%|▎         | 171/5000 [3:10:43<84:03:13, 62.66s/it]

[171] train loss: 0.000944 
[171] [94mvalidate[0m loss: 0.001112
Validation loss decreased (0.001083 --> 0.000944).  Saving model ...


  3%|▎         | 172/5000 [3:11:46<84:09:47, 62.76s/it]

[172] train loss: 0.000937 
[172] [94mvalidate[0m loss: 0.001108
Validation loss decreased (0.000944 --> 0.000937).  Saving model ...


  3%|▎         | 173/5000 [3:12:52<85:30:35, 63.77s/it]

[173] train loss: 0.000934 
[173] [94mvalidate[0m loss: 0.001112
Validation loss decreased (0.000937 --> 0.000934).  Saving model ...


  3%|▎         | 174/5000 [3:13:57<85:59:31, 64.15s/it]

[174] train loss: 0.000932 
[174] [94mvalidate[0m loss: 0.001109
Validation loss decreased (0.000934 --> 0.000932).  Saving model ...


  4%|▎         | 175/5000 [3:15:01<86:05:49, 64.24s/it]

[175] train loss: 0.000931 
[175] [94mvalidate[0m loss: 0.001108
Validation loss decreased (0.000932 --> 0.000931).  Saving model ...


  4%|▎         | 176/5000 [3:16:04<85:33:45, 63.85s/it]

[176] train loss: 0.000930 
[176] [94mvalidate[0m loss: 0.001104
Validation loss decreased (0.000931 --> 0.000930).  Saving model ...


  4%|▎         | 177/5000 [3:17:09<85:47:18, 64.03s/it]

[177] train loss: 0.000929 
[177] [94mvalidate[0m loss: 0.001104
Validation loss decreased (0.000930 --> 0.000929).  Saving model ...


  4%|▎         | 178/5000 [3:18:12<85:35:19, 63.90s/it]

[178] train loss: 0.000928 
[178] [94mvalidate[0m loss: 0.001101
Validation loss decreased (0.000929 --> 0.000928).  Saving model ...


  4%|▎         | 179/5000 [3:19:17<85:48:18, 64.07s/it]

[179] train loss: 0.000926 
[179] [94mvalidate[0m loss: 0.001094
Validation loss decreased (0.000928 --> 0.000926).  Saving model ...


  4%|▎         | 180/5000 [3:20:21<85:44:19, 64.04s/it]

[180] train loss: 0.000925 
[180] [94mvalidate[0m loss: 0.001092
Validation loss decreased (0.000926 --> 0.000925).  Saving model ...


  4%|▎         | 181/5000 [3:21:24<85:26:01, 63.82s/it]

[181] train loss: 0.000925 
[181] [94mvalidate[0m loss: 0.001098
Validation loss decreased (0.000925 --> 0.000925).  Saving model ...


  4%|▎         | 184/5000 [3:24:33<84:46:58, 63.38s/it]

[184] train loss: 0.000922 
[184] [94mvalidate[0m loss: 0.001094
Validation loss decreased (0.000923 --> 0.000922).  Saving model ...


  4%|▎         | 185/5000 [3:25:38<85:19:13, 63.79s/it]

[185] train loss: 0.000921 
[185] [94mvalidate[0m loss: 0.001101
Validation loss decreased (0.000922 --> 0.000921).  Saving model ...


  4%|▎         | 186/5000 [3:26:41<84:57:33, 63.53s/it]

[186] train loss: 0.000921 
[186] [94mvalidate[0m loss: 0.001090
Validation loss decreased (0.000921 --> 0.000921).  Saving model ...


  4%|▎         | 187/5000 [3:27:45<85:17:03, 63.79s/it]

[187] train loss: 0.000920 
[187] [94mvalidate[0m loss: 0.001089
Validation loss decreased (0.000921 --> 0.000920).  Saving model ...


  4%|▍         | 188/5000 [3:28:45<83:37:53, 62.57s/it]

[188] train loss: 0.000920 
[188] [94mvalidate[0m loss: 0.001088
Validation loss decreased (0.000920 --> 0.000920).  Saving model ...


  4%|▍         | 189/5000 [3:29:45<82:45:36, 61.93s/it]

[189] train loss: 0.000919 
[189] [94mvalidate[0m loss: 0.001091
Validation loss decreased (0.000920 --> 0.000919).  Saving model ...


  4%|▍         | 190/5000 [3:30:49<83:37:56, 62.59s/it]

[190] train loss: 0.000918 
[190] [94mvalidate[0m loss: 0.001097
Validation loss decreased (0.000919 --> 0.000918).  Saving model ...


  4%|▍         | 191/5000 [3:31:50<82:53:42, 62.06s/it]

[191] train loss: 0.000918 
[191] [94mvalidate[0m loss: 0.001128
EarlyStopping counter: 1 out of 20


  4%|▍         | 192/5000 [3:32:55<83:55:38, 62.84s/it]

[192] train loss: 0.000916 
[192] [94mvalidate[0m loss: 0.001101
Validation loss decreased (0.000918 --> 0.000916).  Saving model ...


  4%|▍         | 193/5000 [3:33:59<84:15:09, 63.10s/it]

[193] train loss: 0.000916 
[193] [94mvalidate[0m loss: 0.001128
Validation loss decreased (0.000916 --> 0.000916).  Saving model ...


  4%|▍         | 194/5000 [3:34:59<83:09:32, 62.29s/it]

[194] train loss: 0.000915 
[194] [94mvalidate[0m loss: 0.001095
Validation loss decreased (0.000916 --> 0.000915).  Saving model ...


  4%|▍         | 195/5000 [3:36:05<84:46:59, 63.52s/it]

[195] train loss: 0.000915 
[195] [94mvalidate[0m loss: 0.001083
Validation loss decreased (0.000915 --> 0.000915).  Saving model ...


  4%|▍         | 196/5000 [3:37:11<85:29:22, 64.06s/it]

[196] train loss: 0.000914 
[196] [94mvalidate[0m loss: 0.001096
Validation loss decreased (0.000915 --> 0.000914).  Saving model ...


  4%|▍         | 197/5000 [3:38:12<84:25:50, 63.28s/it]

[197] train loss: 0.000914 
[197] [94mvalidate[0m loss: 0.001090
Validation loss decreased (0.000914 --> 0.000914).  Saving model ...


  4%|▍         | 198/5000 [3:39:12<82:49:47, 62.10s/it]

[198] train loss: 0.000913 
[198] [94mvalidate[0m loss: 0.001084
Validation loss decreased (0.000914 --> 0.000913).  Saving model ...


  4%|▍         | 201/5000 [3:42:23<84:05:43, 63.08s/it]

[201] train loss: 0.000911 
[201] [94mvalidate[0m loss: 0.001083
Validation loss decreased (0.000912 --> 0.000911).  Saving model ...


  4%|▍         | 202/5000 [3:43:25<83:40:56, 62.79s/it]

[202] train loss: 0.000911 
[202] [94mvalidate[0m loss: 0.001083
Validation loss decreased (0.000911 --> 0.000911).  Saving model ...


  4%|▍         | 203/5000 [3:44:27<83:18:47, 62.52s/it]

[203] train loss: 0.000910 
[203] [94mvalidate[0m loss: 0.001082
Validation loss decreased (0.000911 --> 0.000910).  Saving model ...


  4%|▍         | 204/5000 [3:45:30<83:41:20, 62.82s/it]

[204] train loss: 0.000909 
[204] [94mvalidate[0m loss: 0.001085
Validation loss decreased (0.000910 --> 0.000909).  Saving model ...


  4%|▍         | 205/5000 [3:46:35<84:29:16, 63.43s/it]

[205] train loss: 0.000909 
[205] [94mvalidate[0m loss: 0.001076
Validation loss decreased (0.000909 --> 0.000909).  Saving model ...


  4%|▍         | 206/5000 [3:47:38<84:22:57, 63.37s/it]

[206] train loss: 0.000909 
[206] [94mvalidate[0m loss: 0.001092
EarlyStopping counter: 1 out of 20


  4%|▍         | 207/5000 [3:48:42<84:25:24, 63.41s/it]

[207] train loss: 0.000907 
[207] [94mvalidate[0m loss: 0.001106
Validation loss decreased (0.000909 --> 0.000907).  Saving model ...


  4%|▍         | 208/5000 [3:49:40<82:31:10, 61.99s/it]

[208] train loss: 0.000907 
[208] [94mvalidate[0m loss: 0.001103
Validation loss decreased (0.000907 --> 0.000907).  Saving model ...


  4%|▍         | 209/5000 [3:50:45<83:33:21, 62.78s/it]

[209] train loss: 0.000906 
[209] [94mvalidate[0m loss: 0.001091
Validation loss decreased (0.000907 --> 0.000906).  Saving model ...


  4%|▍         | 210/5000 [3:51:47<83:07:25, 62.47s/it]

[210] train loss: 0.000906 
[210] [94mvalidate[0m loss: 0.001086
Validation loss decreased (0.000906 --> 0.000906).  Saving model ...


  4%|▍         | 211/5000 [3:52:51<83:53:38, 63.07s/it]

[211] train loss: 0.000906 
[211] [94mvalidate[0m loss: 0.001089
Validation loss decreased (0.000906 --> 0.000906).  Saving model ...


  4%|▍         | 212/5000 [3:53:54<83:46:04, 62.98s/it]

[212] train loss: 0.000904 
[212] [94mvalidate[0m loss: 0.001075
Validation loss decreased (0.000906 --> 0.000904).  Saving model ...


  4%|▍         | 213/5000 [3:54:55<82:55:19, 62.36s/it]

[213] train loss: 0.000904 
[213] [94mvalidate[0m loss: 0.001089
Validation loss decreased (0.000904 --> 0.000904).  Saving model ...


  4%|▍         | 214/5000 [3:55:57<83:00:43, 62.44s/it]

[214] train loss: 0.000904 
[214] [94mvalidate[0m loss: 0.001081
Validation loss decreased (0.000904 --> 0.000904).  Saving model ...


  4%|▍         | 215/5000 [3:57:04<84:33:47, 63.62s/it]

[215] train loss: 0.000904 
[215] [94mvalidate[0m loss: 0.001083
Validation loss decreased (0.000904 --> 0.000904).  Saving model ...


  4%|▍         | 216/5000 [3:58:04<83:01:59, 62.48s/it]

[216] train loss: 0.000903 
[216] [94mvalidate[0m loss: 0.001087
Validation loss decreased (0.000904 --> 0.000903).  Saving model ...


  4%|▍         | 217/5000 [3:59:07<83:27:39, 62.82s/it]

[217] train loss: 0.000902 
[217] [94mvalidate[0m loss: 0.001095
Validation loss decreased (0.000903 --> 0.000902).  Saving model ...


  4%|▍         | 219/5000 [4:01:12<83:18:18, 62.73s/it]

[219] train loss: 0.000901 
[219] [94mvalidate[0m loss: 0.001072
Validation loss decreased (0.000901 --> 0.000901).  Saving model ...


  4%|▍         | 220/5000 [4:02:11<81:41:26, 61.52s/it]

[220] train loss: 0.000901 
[220] [94mvalidate[0m loss: 0.001082
Validation loss decreased (0.000901 --> 0.000901).  Saving model ...


  4%|▍         | 221/5000 [4:03:13<81:48:12, 61.62s/it]

[221] train loss: 0.000900 
[221] [94mvalidate[0m loss: 0.001080
Validation loss decreased (0.000901 --> 0.000900).  Saving model ...


  4%|▍         | 222/5000 [4:04:13<81:09:13, 61.15s/it]

[222] train loss: 0.000900 
[222] [94mvalidate[0m loss: 0.001070
Validation loss decreased (0.000900 --> 0.000900).  Saving model ...


  4%|▍         | 223/5000 [4:05:17<82:23:38, 62.09s/it]

[223] train loss: 0.000899 
[223] [94mvalidate[0m loss: 0.001066
Validation loss decreased (0.000900 --> 0.000899).  Saving model ...


  4%|▍         | 224/5000 [4:06:18<81:38:52, 61.54s/it]

[224] train loss: 0.000898 
[224] [94mvalidate[0m loss: 0.001074
Validation loss decreased (0.000899 --> 0.000898).  Saving model ...


  4%|▍         | 225/5000 [4:07:20<81:52:55, 61.73s/it]

[225] train loss: 0.000899 
[225] [94mvalidate[0m loss: 0.001073
EarlyStopping counter: 1 out of 20


  5%|▍         | 226/5000 [4:08:20<81:28:00, 61.43s/it]

[226] train loss: 0.000898 
[226] [94mvalidate[0m loss: 0.001073
Validation loss decreased (0.000898 --> 0.000898).  Saving model ...


  5%|▍         | 227/5000 [4:09:22<81:36:51, 61.56s/it]

[227] train loss: 0.000897 
[227] [94mvalidate[0m loss: 0.001069
Validation loss decreased (0.000898 --> 0.000897).  Saving model ...


  5%|▍         | 228/5000 [4:10:23<81:11:22, 61.25s/it]

[228] train loss: 0.000896 
[228] [94mvalidate[0m loss: 0.001075
Validation loss decreased (0.000897 --> 0.000896).  Saving model ...


  5%|▍         | 229/5000 [4:11:23<80:32:31, 60.77s/it]

[229] train loss: 0.000896 
[229] [94mvalidate[0m loss: 0.001072
Validation loss decreased (0.000896 --> 0.000896).  Saving model ...


  5%|▍         | 230/5000 [4:12:24<80:51:36, 61.03s/it]

[230] train loss: 0.000896 
[230] [94mvalidate[0m loss: 0.001061
Validation loss decreased (0.000896 --> 0.000896).  Saving model ...


  5%|▍         | 231/5000 [4:13:24<80:21:11, 60.66s/it]

[231] train loss: 0.000895 
[231] [94mvalidate[0m loss: 0.001092
Validation loss decreased (0.000896 --> 0.000895).  Saving model ...


  5%|▍         | 232/5000 [4:14:26<80:41:33, 60.93s/it]

[232] train loss: 0.000895 
[232] [94mvalidate[0m loss: 0.001069
Validation loss decreased (0.000895 --> 0.000895).  Saving model ...


  5%|▍         | 233/5000 [4:15:25<80:06:16, 60.49s/it]

[233] train loss: 0.000894 
[233] [94mvalidate[0m loss: 0.001064
Validation loss decreased (0.000895 --> 0.000894).  Saving model ...


  5%|▍         | 234/5000 [4:16:25<79:52:20, 60.33s/it]

[234] train loss: 0.000893 
[234] [94mvalidate[0m loss: 0.001068
Validation loss decreased (0.000894 --> 0.000893).  Saving model ...


  5%|▍         | 237/5000 [4:19:34<81:49:16, 61.84s/it]

[237] train loss: 0.000892 
[237] [94mvalidate[0m loss: 0.001061
EarlyStopping counter: 1 out of 20


  5%|▍         | 238/5000 [4:20:35<81:32:15, 61.64s/it]

[238] train loss: 0.000892 
[238] [94mvalidate[0m loss: 0.001062
Validation loss decreased (0.000892 --> 0.000892).  Saving model ...


  5%|▍         | 239/5000 [4:21:41<83:10:56, 62.90s/it]

[239] train loss: 0.000891 
[239] [94mvalidate[0m loss: 0.001058
Validation loss decreased (0.000892 --> 0.000891).  Saving model ...


  5%|▍         | 240/5000 [4:22:43<82:49:06, 62.64s/it]

[240] train loss: 0.000891 
[240] [94mvalidate[0m loss: 0.001059
Validation loss decreased (0.000891 --> 0.000891).  Saving model ...


  5%|▍         | 241/5000 [4:23:44<82:15:11, 62.22s/it]

[241] train loss: 0.000890 
[241] [94mvalidate[0m loss: 0.001067
Validation loss decreased (0.000891 --> 0.000890).  Saving model ...


  5%|▍         | 242/5000 [4:24:45<81:34:28, 61.72s/it]

[242] train loss: 0.000889 
[242] [94mvalidate[0m loss: 0.001097
Validation loss decreased (0.000890 --> 0.000889).  Saving model ...


  5%|▍         | 245/5000 [4:27:58<83:23:59, 63.14s/it]

[245] train loss: 0.000888 
[245] [94mvalidate[0m loss: 0.001052
Validation loss decreased (0.000889 --> 0.000888).  Saving model ...


  5%|▍         | 246/5000 [4:29:01<83:20:27, 63.11s/it]

[246] train loss: 0.000888 
[246] [94mvalidate[0m loss: 0.001066
Validation loss decreased (0.000888 --> 0.000888).  Saving model ...


  5%|▍         | 247/5000 [4:30:02<82:38:41, 62.60s/it]

[247] train loss: 0.000887 
[247] [94mvalidate[0m loss: 0.001070
Validation loss decreased (0.000888 --> 0.000887).  Saving model ...


  5%|▍         | 248/5000 [4:31:07<83:23:58, 63.18s/it]

[248] train loss: 0.000887 
[248] [94mvalidate[0m loss: 0.001076
Validation loss decreased (0.000887 --> 0.000887).  Saving model ...


  5%|▍         | 249/5000 [4:32:09<82:51:29, 62.78s/it]

[249] train loss: 0.000887 
[249] [94mvalidate[0m loss: 0.001064
Validation loss decreased (0.000887 --> 0.000887).  Saving model ...


  5%|▌         | 250/5000 [4:33:12<83:13:13, 63.07s/it]

[250] train loss: 0.000886 
[250] [94mvalidate[0m loss: 0.001057
Validation loss decreased (0.000887 --> 0.000886).  Saving model ...


  5%|▌         | 251/5000 [4:34:13<82:23:31, 62.46s/it]

[251] train loss: 0.000885 
[251] [94mvalidate[0m loss: 0.001068
Validation loss decreased (0.000886 --> 0.000885).  Saving model ...


  5%|▌         | 252/5000 [4:35:15<81:56:55, 62.13s/it]

[252] train loss: 0.000885 
[252] [94mvalidate[0m loss: 0.001076
Validation loss decreased (0.000885 --> 0.000885).  Saving model ...


  5%|▌         | 253/5000 [4:36:16<81:48:02, 62.04s/it]

[253] train loss: 0.000885 
[253] [94mvalidate[0m loss: 0.001049
Validation loss decreased (0.000885 --> 0.000885).  Saving model ...


  5%|▌         | 254/5000 [4:37:17<81:16:37, 61.65s/it]

[254] train loss: 0.000884 
[254] [94mvalidate[0m loss: 0.001053
Validation loss decreased (0.000885 --> 0.000884).  Saving model ...


  5%|▌         | 255/5000 [4:38:20<81:31:43, 61.86s/it]

[255] train loss: 0.000884 
[255] [94mvalidate[0m loss: 0.001048
Validation loss decreased (0.000884 --> 0.000884).  Saving model ...


  5%|▌         | 256/5000 [4:39:22<81:49:36, 62.09s/it]

[256] train loss: 0.000883 
[256] [94mvalidate[0m loss: 0.001053
Validation loss decreased (0.000884 --> 0.000883).  Saving model ...


  5%|▌         | 257/5000 [4:40:25<81:53:23, 62.16s/it]

[257] train loss: 0.000883 
[257] [94mvalidate[0m loss: 0.001071
Validation loss decreased (0.000883 --> 0.000883).  Saving model ...


  5%|▌         | 258/5000 [4:41:25<81:13:15, 61.66s/it]

[258] train loss: 0.000883 
[258] [94mvalidate[0m loss: 0.001056
EarlyStopping counter: 1 out of 20


  5%|▌         | 259/5000 [4:42:26<81:06:02, 61.58s/it]

[259] train loss: 0.000882 
[259] [94mvalidate[0m loss: 0.001067
Validation loss decreased (0.000883 --> 0.000882).  Saving model ...


  5%|▌         | 260/5000 [4:43:28<80:57:28, 61.49s/it]

[260] train loss: 0.000882 
[260] [94mvalidate[0m loss: 0.001057
Validation loss decreased (0.000882 --> 0.000882).  Saving model ...


  5%|▌         | 263/5000 [4:46:35<81:25:13, 61.88s/it]

[263] train loss: 0.000880 
[263] [94mvalidate[0m loss: 0.001064
Validation loss decreased (0.000880 --> 0.000880).  Saving model ...


  5%|▌         | 264/5000 [4:47:41<82:41:46, 62.86s/it]

[264] train loss: 0.000879 
[264] [94mvalidate[0m loss: 0.001058
Validation loss decreased (0.000880 --> 0.000879).  Saving model ...


  5%|▌         | 265/5000 [4:48:48<84:16:43, 64.08s/it]

[265] train loss: 0.000879 
[265] [94mvalidate[0m loss: 0.001056
Validation loss decreased (0.000879 --> 0.000879).  Saving model ...


  5%|▌         | 266/5000 [4:49:49<83:12:31, 63.28s/it]

[266] train loss: 0.000878 
[266] [94mvalidate[0m loss: 0.001045
Validation loss decreased (0.000879 --> 0.000878).  Saving model ...


  5%|▌         | 267/5000 [4:50:50<82:20:49, 62.63s/it]

[267] train loss: 0.000878 
[267] [94mvalidate[0m loss: 0.001044
Validation loss decreased (0.000878 --> 0.000878).  Saving model ...


  5%|▌         | 268/5000 [4:51:56<83:48:37, 63.76s/it]

[268] train loss: 0.000878 
[268] [94mvalidate[0m loss: 0.001046
Validation loss decreased (0.000878 --> 0.000878).  Saving model ...


  5%|▌         | 269/5000 [4:53:00<83:33:42, 63.59s/it]

[269] train loss: 0.000877 
[269] [94mvalidate[0m loss: 0.001046
Validation loss decreased (0.000878 --> 0.000877).  Saving model ...


  5%|▌         | 270/5000 [4:54:00<82:15:18, 62.60s/it]

[270] train loss: 0.000877 
[270] [94mvalidate[0m loss: 0.001042
Validation loss decreased (0.000877 --> 0.000877).  Saving model ...


  5%|▌         | 271/5000 [4:55:03<82:34:41, 62.86s/it]

[271] train loss: 0.000877 
[271] [94mvalidate[0m loss: 0.001049
Validation loss decreased (0.000877 --> 0.000877).  Saving model ...


  5%|▌         | 272/5000 [4:56:04<81:34:04, 62.11s/it]

[272] train loss: 0.000876 
[272] [94mvalidate[0m loss: 0.001044
Validation loss decreased (0.000877 --> 0.000876).  Saving model ...


  5%|▌         | 273/5000 [4:57:07<82:06:24, 62.53s/it]

[273] train loss: 0.000876 
[273] [94mvalidate[0m loss: 0.001045
Validation loss decreased (0.000876 --> 0.000876).  Saving model ...


  5%|▌         | 274/5000 [4:58:12<82:45:33, 63.04s/it]

[274] train loss: 0.000876 
[274] [94mvalidate[0m loss: 0.001056
Validation loss decreased (0.000876 --> 0.000876).  Saving model ...


  6%|▌         | 275/5000 [4:59:10<80:55:06, 61.65s/it]

[275] train loss: 0.000875 
[275] [94mvalidate[0m loss: 0.001050
Validation loss decreased (0.000876 --> 0.000875).  Saving model ...


  6%|▌         | 276/5000 [5:00:10<80:10:40, 61.10s/it]

[276] train loss: 0.000875 
[276] [94mvalidate[0m loss: 0.001065
Validation loss decreased (0.000875 --> 0.000875).  Saving model ...


  6%|▌         | 277/5000 [5:01:13<80:49:34, 61.61s/it]

[277] train loss: 0.000874 
[277] [94mvalidate[0m loss: 0.001064
Validation loss decreased (0.000875 --> 0.000874).  Saving model ...


  6%|▌         | 278/5000 [5:02:15<81:13:30, 61.93s/it]

[278] train loss: 0.000874 
[278] [94mvalidate[0m loss: 0.001066
Validation loss decreased (0.000874 --> 0.000874).  Saving model ...


  6%|▌         | 281/5000 [5:05:24<82:23:48, 62.86s/it]

[281] train loss: 0.000872 
[281] [94mvalidate[0m loss: 0.001042
Validation loss decreased (0.000873 --> 0.000872).  Saving model ...


  6%|▌         | 282/5000 [5:06:27<82:24:56, 62.89s/it]

[282] train loss: 0.000872 
[282] [94mvalidate[0m loss: 0.001072
Validation loss decreased (0.000872 --> 0.000872).  Saving model ...


  6%|▌         | 283/5000 [5:07:28<81:40:33, 62.33s/it]

[283] train loss: 0.000872 
[283] [94mvalidate[0m loss: 0.001048
Validation loss decreased (0.000872 --> 0.000872).  Saving model ...


  6%|▌         | 284/5000 [5:08:28<80:44:34, 61.64s/it]

[284] train loss: 0.000871 
[284] [94mvalidate[0m loss: 0.001055
Validation loss decreased (0.000872 --> 0.000871).  Saving model ...


  6%|▌         | 285/5000 [5:09:30<80:44:28, 61.65s/it]

[285] train loss: 0.000871 
[285] [94mvalidate[0m loss: 0.001050
Validation loss decreased (0.000871 --> 0.000871).  Saving model ...


  6%|▌         | 286/5000 [5:10:32<80:48:02, 61.71s/it]

[286] train loss: 0.000870 
[286] [94mvalidate[0m loss: 0.001048
Validation loss decreased (0.000871 --> 0.000870).  Saving model ...


  6%|▌         | 287/5000 [5:11:33<80:34:47, 61.55s/it]

[287] train loss: 0.000870 
[287] [94mvalidate[0m loss: 0.001042
Validation loss decreased (0.000870 --> 0.000870).  Saving model ...


  6%|▌         | 288/5000 [5:12:39<82:19:45, 62.90s/it]

[288] train loss: 0.000870 
[288] [94mvalidate[0m loss: 0.001037
Validation loss decreased (0.000870 --> 0.000870).  Saving model ...


  6%|▌         | 289/5000 [5:13:44<83:03:57, 63.48s/it]

[289] train loss: 0.000869 
[289] [94mvalidate[0m loss: 0.001047
Validation loss decreased (0.000870 --> 0.000869).  Saving model ...


  6%|▌         | 290/5000 [5:14:44<81:29:10, 62.28s/it]

[290] train loss: 0.000869 
[290] [94mvalidate[0m loss: 0.001050
Validation loss decreased (0.000869 --> 0.000869).  Saving model ...


  6%|▌         | 291/5000 [5:15:48<82:08:51, 62.80s/it]

[291] train loss: 0.000868 
[291] [94mvalidate[0m loss: 0.001030
Validation loss decreased (0.000869 --> 0.000868).  Saving model ...


  6%|▌         | 292/5000 [5:16:48<81:12:34, 62.10s/it]

[292] train loss: 0.000868 
[292] [94mvalidate[0m loss: 0.001038
Validation loss decreased (0.000868 --> 0.000868).  Saving model ...


  6%|▌         | 293/5000 [5:17:50<81:01:53, 61.97s/it]

[293] train loss: 0.000867 
[293] [94mvalidate[0m loss: 0.001046
Validation loss decreased (0.000868 --> 0.000867).  Saving model ...


  6%|▌         | 294/5000 [5:18:51<80:50:14, 61.84s/it]

[294] train loss: 0.000867 
[294] [94mvalidate[0m loss: 0.001041
Validation loss decreased (0.000867 --> 0.000867).  Saving model ...


  6%|▌         | 295/5000 [5:19:54<81:03:38, 62.02s/it]

[295] train loss: 0.000867 
[295] [94mvalidate[0m loss: 0.001050
Validation loss decreased (0.000867 --> 0.000867).  Saving model ...


  6%|▌         | 296/5000 [5:20:59<82:32:36, 63.17s/it]

[296] train loss: 0.000866 
[296] [94mvalidate[0m loss: 0.001035
Validation loss decreased (0.000867 --> 0.000866).  Saving model ...


  6%|▌         | 298/5000 [5:23:11<84:00:24, 64.32s/it]

[298] train loss: 0.000865 
[298] [94mvalidate[0m loss: 0.001028
Validation loss decreased (0.000866 --> 0.000865).  Saving model ...


  6%|▌         | 299/5000 [5:24:12<82:51:06, 63.45s/it]

[299] train loss: 0.000865 
[299] [94mvalidate[0m loss: 0.001049
Validation loss decreased (0.000865 --> 0.000865).  Saving model ...


  6%|▌         | 300/5000 [5:25:16<82:44:44, 63.38s/it]

[300] train loss: 0.000865 
[300] [94mvalidate[0m loss: 0.001038
Validation loss decreased (0.000865 --> 0.000865).  Saving model ...


  6%|▌         | 301/5000 [5:26:19<82:52:13, 63.49s/it]

[301] train loss: 0.000864 
[301] [94mvalidate[0m loss: 0.001030
Validation loss decreased (0.000865 --> 0.000864).  Saving model ...


  6%|▌         | 302/5000 [5:27:23<82:46:07, 63.42s/it]

[302] train loss: 0.000864 
[302] [94mvalidate[0m loss: 0.001035
Validation loss decreased (0.000864 --> 0.000864).  Saving model ...


  6%|▌         | 303/5000 [5:28:23<81:31:22, 62.48s/it]

[303] train loss: 0.000864 
[303] [94mvalidate[0m loss: 0.001027
EarlyStopping counter: 1 out of 20


  6%|▌         | 304/5000 [5:29:25<81:31:56, 62.50s/it]

[304] train loss: 0.000863 
[304] [94mvalidate[0m loss: 0.001026
Validation loss decreased (0.000864 --> 0.000863).  Saving model ...


  6%|▌         | 305/5000 [5:30:31<82:50:04, 63.52s/it]

[305] train loss: 0.000863 
[305] [94mvalidate[0m loss: 0.001040
Validation loss decreased (0.000863 --> 0.000863).  Saving model ...


  6%|▌         | 306/5000 [5:31:38<84:13:50, 64.60s/it]

[306] train loss: 0.000862 
[306] [94mvalidate[0m loss: 0.001033
Validation loss decreased (0.000863 --> 0.000862).  Saving model ...


  6%|▌         | 307/5000 [5:32:38<82:13:01, 63.07s/it]

[307] train loss: 0.000862 
[307] [94mvalidate[0m loss: 0.001029
Validation loss decreased (0.000862 --> 0.000862).  Saving model ...


  6%|▌         | 308/5000 [5:33:39<81:21:25, 62.42s/it]

[308] train loss: 0.000861 
[308] [94mvalidate[0m loss: 0.001037
Validation loss decreased (0.000862 --> 0.000861).  Saving model ...


  6%|▌         | 309/5000 [5:34:43<81:52:02, 62.83s/it]

[309] train loss: 0.000861 
[309] [94mvalidate[0m loss: 0.001050
Validation loss decreased (0.000861 --> 0.000861).  Saving model ...


  6%|▌         | 310/5000 [5:35:43<81:03:00, 62.21s/it]

[310] train loss: 0.000861 
[310] [94mvalidate[0m loss: 0.001042
Validation loss decreased (0.000861 --> 0.000861).  Saving model ...


  6%|▌         | 311/5000 [5:36:44<80:23:23, 61.72s/it]

[311] train loss: 0.000860 
[311] [94mvalidate[0m loss: 0.001097
Validation loss decreased (0.000861 --> 0.000860).  Saving model ...


  6%|▌         | 312/5000 [5:37:45<80:13:30, 61.61s/it]

[312] train loss: 0.000860 
[312] [94mvalidate[0m loss: 0.001024
Validation loss decreased (0.000860 --> 0.000860).  Saving model ...
