In [1]:
#Starting code for PatRec project
#   Omada 2 -- Grokfast experiment

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!ls /content/drive/MyDrive/PatRec_Project_Shared_Folder/

 grokfast.py				 Grokking_mnist_v1.ipynb   requirements.txt
 Groking_algo_v1.ipynb			 Grokking_qm9_v1.ipynb	   results
'Grokking and how to avoid it.gslides'	 __pycache__


In [4]:
import sys
sys.path.append('/content/drive/MyDrive/PatRec_Project_Shared_Folder')

In [5]:
!pip install -r /content/drive/MyDrive/PatRec_Project_Shared_Folder/requirements.txt

Collecting torch_geometric (from -r /content/drive/MyDrive/PatRec_Project_Shared_Folder/requirements.txt (line 4))
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1


In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import random_split
from torch_geometric.datasets import QM9
from torch_geometric.loader import DataLoader
from torch_geometric.nn import NNConv, global_add_pool

from sklearn.model_selection import train_test_split
from torch.utils.data import Subset

import tqdm
import numpy as np
import matplotlib.pyplot as plt

from argparse import ArgumentParser

In [9]:
from grokfast import gradfilter_ma, gradfilter_ema

In [10]:
 # THe Graph Neural Network (GNN) model that will be used in the qm9 dataset
class ExampleNet(torch.nn.Module):
    def __init__(self, num_node_features, num_edge_features):
        super().__init__()
        conv1_net = nn.Sequential(
            nn.Linear(num_edge_features, 32),
            nn.ReLU(),
            nn.Linear(32, num_node_features * 32))
        conv2_net = nn.Sequential(
            nn.Linear(num_edge_features, 32),
            nn.ReLU(),
            nn.Linear(32, 32 * 16))
        self.conv1 = NNConv(num_node_features, 32, conv1_net)
        self.conv2 = NNConv(32, 16, conv2_net)
        self.fc_1 = nn.Linear(16, 32)
        self.out = nn.Linear(32, 1)

    def forward(self, data):
        batch, x, edge_index, edge_attr = (
            data.batch, data.x, data.edge_index, data.edge_attr)
        # First graph conv layer
        x = F.relu(self.conv1(x, edge_index, edge_attr))
        # Second graph conv layer
        x = F.relu(self.conv2(x, edge_index, edge_attr))
        x = global_add_pool(x,batch)
        x = F.relu(self.fc_1(x))
        output = self.out(x)
        return output



In [11]:
#  L2 norm
def L2(model):
    L2_ = 0.
    for p in model.parameters():
        L2_ += torch.sum(p**2)
    return L2_

# for rescaling the parameters
def rescale(model, alpha):
    for p in model.parameters():
        p.data = alpha * p.data



In [12]:
import os

# Specify the path to save in Google Drive
results_dir = "/content/drive/MyDrive/results"
os.makedirs(results_dir, exist_ok=True)

In [17]:
def main(args):
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    alpha = args.init_scale

    #size = 1000
    epochs = int(100 * 50000 / argsδ.size)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load the QM9 small molecule dataset
    dset = QM9('.')
    dset = dset[:args.size]

    # Extract labels for stratification
    # labels = np.array([data.y[1].item() for data in dset])

    # # Perform stratified split
    # train_indices, test_indices = train_test_split(
    #     np.arange(len(dset)),        # Indices of the dataset
    #     test_size=0.2,               # 20% για test set
    #     stratify=labels,             # Stratify depending in the allocation of labels
    #     random_state=args.seed       #
    # )

    train_set, test_set = random_split(dset, [int(args.size / 2), int(args.size / 2)])

    # # Create Subsets using the indices
    # train_set = Subset(dset, train_indices)
    # test_set = Subset(dset, test_indices)

    trainloader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True)
    testloader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True)


    # #checking the split
    # print("Training set size:", len(train_set))
    # print("Test set size:", len(test_set))

    # # printing the allocation of the labels
    # train_labels = [dset[i].y[1].item() for i in train_indices]
    # test_labels = [dset[i].y[1].item() for i in test_indices]


    # initialize a network
    qm9_node_feats, qm9_edge_feats = 11, 4
    net = ExampleNet(qm9_node_feats, qm9_edge_feats)

    # initialize an optimizer with some reasonable parameters
    optimizer = torch.optim.AdamW(net.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    target_idx = 1 # index position of the polarizability label
    net.to(device)

    rescale(net, alpha)
    L2_ = L2(net)

    train_best = 1e10
    test_best = 1e10

    train_losses, test_losses, train_avg_losses, test_avg_losses = [], [], [], []
    step = 0
    grads = None

    for total_epochs in tqdm.trange(epochs):
        epoch_loss = 0
        total_graphs_train = 0

        for batch in trainloader:
            net.train()
            batch.to(device)
            optimizer.zero_grad()
            output = net(batch)
            loss = F.mse_loss(output, batch.y[:, target_idx].unsqueeze(1))
            epoch_loss += loss.item() * batch.num_graphs
            total_graphs_train += batch.num_graphs

            loss.backward()

            #######

            trigger = False

            if args.filter == "none":
                pass
            elif args.filter == "ma":
                grads = gradfilter_ma(net, grads=grads, window_size=args.window_size, lamb=args.lamb, trigger=trigger)
            elif args.filter == "ema":
                grads = gradfilter_ema(net, grads=grads, alpha=args.alpha, lamb=args.lamb)
            else:
                raise ValueError(f"Invalid gradient filter type `{args.filter}`")

            #######

            optimizer.step()

            train_losses.append(loss.item())

            step += 1

        train_avg_loss = epoch_loss / total_graphs_train
        if train_avg_loss < train_best:
            train_best = train_avg_loss
        train_avg_losses.append(train_avg_loss)

        #######

        test_loss = 0
        total_graphs_test = 0

        net.eval()

        for batch in testloader:
            batch.to(device)
            output = net(batch)
            loss = F.mse_loss(output, batch.y[:, target_idx].unsqueeze(1))
            test_loss += loss.item() * batch.num_graphs
            total_graphs_test += batch.num_graphs
            test_losses.append(loss.item())

        test_avg_loss = test_loss / total_graphs_test
        if test_avg_loss < test_best:
            test_best = test_avg_loss
        test_avg_losses.append(test_avg_loss)

        #######

        tqdm.tqdm.write(f"Epochs: {total_epochs} | epoch avg. loss: {train_avg_loss:.3f} | "
                        f"test avg. loss: {test_avg_loss:.3f}")

        if (total_epochs + 1) % 100 == 0 or total_epochs == epochs - 1:

            plt.plot(np.arange(len(train_avg_losses)), train_avg_losses, label="train")
            plt.plot(np.arange(len(train_avg_losses)), test_avg_losses, label="val")
            plt.legend()
            plt.title("QM9 Molecule Isotropic Polarizability Prediction")
            plt.xlabel("Optimization Steps")
            plt.ylabel("MSE Loss")
            plt.yscale("log", base=10)
            plt.xscale("log", base=10)
            plt.ylim(1e-4, 100)
            plt.grid()
            plt.savefig(f"{results_dir}/qm9_loss_{args.label}.png", dpi=150)
            plt.close()

            torch.save({
                'its': np.arange(len(train_losses)),
                'its_avg': np.arange(len(train_avg_losses)),
                'train_acc': None,
                'train_loss': train_losses,
                'train_avg_loss': train_avg_losses,
                'val_acc': None,
                'val_loss': test_losses,
                'val_avg_loss': test_avg_losses,
                'train_best': train_best,
                'val_best': test_best,
            }, f"{results_dir}/qm9_{args.label}.pt")

    #######

    fig, ax = plt.subplots(1, 1, figsize=(4.2, 4.2))

    ax.plot((np.arange(len(test_losses))+1)[::20], np.mean(np.array(test_losses).reshape(-1, 20), axis=1), color='#ff7f0e')
    ax.plot((np.arange(len(train_losses))+1)[::20], np.mean(np.array(train_losses).reshape(-1, 20), axis=1), color='#1f77b4')
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_ylim(1e-2, 1000)

    ax.set_ylabel("MSE", fontsize=15)
    ax.text(1, 0.003, r"$\alpha=3$", fontsize=15)
    ax.set_ylim(1e-3, 1e2)
    ax.grid()

    fig.savefig(f"results/qm9_grok_{args.label}.pdf", bbox_inches="tight")
    plt.close()



In [None]:
if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--label", default="")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--batch_size", type=int, default=32)
    parser.add_argument("--lr", type=float, default=1e-3)
    parser.add_argument("--weight_decay", type=float, default=0)
    parser.add_argument("--size", type=int, default=100)
    parser.add_argument("--init_scale", type=float, default=3.0) # init_scale 1.0 no grokking / init_scale 3.0 grokking

    # Grokfast
    parser.add_argument("--filter", type=str, choices=["none", "ma", "ema", "fir"], default="none")
    parser.add_argument("--alpha", type=float, default=0.99)
    parser.add_argument("--window_size", type=int, default=100)
    parser.add_argument("--lamb", type=float, default=5.0)
    args, unknown = parser.parse_known_args()

    filter_str = ('_' if args.label != '' else '') + args.filter
    window_size_str = f'_w{args.window_size}'
    alpha_str = f'_a{args.alpha:.3f}'.replace('.', '')
    lamb_str = f'_l{args.lamb:.2f}'.replace('.', '')

    model_suffix = f'size{args.size}_alpha{args.init_scale:.4f}'

    if args.filter == 'none':
        filter_suffix = ''
    elif args.filter == 'ma':
        filter_suffix = window_size_str + lamb_str
    elif args.filter == 'ema':
        filter_suffix = alpha_str + lamb_str
    else:
        raise ValueError(f"Unrecognized filter type {args.filter}")

    optim_suffix = ''
    if args.weight_decay != 0:
        optim_suffix = optim_suffix + f'_wd{args.weight_decay:.1e}'.replace('.', '')
    if args.lr != 1e-3:
        optim_suffix = optim_suffix + f'_lrx{int(args.lr / 1e-3)}'

    args.label = args.label + model_suffix + filter_str + filter_suffix + optim_suffix
    print(f'Experiment results saved under name: {args.label}')

    main(args)

Experiment results saved under name: size100_alpha3.0000none


  0%|          | 4/50000 [00:00<57:07, 14.59it/s]  

Epochs: 0 | epoch avg. loss: 220266.062 | test avg. loss: 82704.569
Epochs: 1 | epoch avg. loss: 76948.586 | test avg. loss: 20190.835
Epochs: 2 | epoch avg. loss: 17366.363 | test avg. loss: 1461.363
Epochs: 3 | epoch avg. loss: 1378.538 | test avg. loss: 1765.475


  0%|          | 8/50000 [00:00<55:27, 15.02it/s]

Epochs: 4 | epoch avg. loss: 2429.001 | test avg. loss: 3573.469
Epochs: 5 | epoch avg. loss: 3384.013 | test avg. loss: 2812.512
Epochs: 6 | epoch avg. loss: 2010.870 | test avg. loss: 1348.973
Epochs: 7 | epoch avg. loss: 807.923 | test avg. loss: 754.060




Epochs: 8 | epoch avg. loss: 510.675 | test avg. loss: 560.165
Epochs: 9 | epoch avg. loss: 428.923 | test avg. loss: 569.708
Epochs: 10 | epoch avg. loss: 406.208 | test avg. loss: 619.245


  0%|          | 14/50000 [00:00<59:06, 14.09it/s]

Epochs: 11 | epoch avg. loss: 432.900 | test avg. loss: 618.662
Epochs: 12 | epoch avg. loss: 432.370 | test avg. loss: 570.649
Epochs: 13 | epoch avg. loss: 409.157 | test avg. loss: 502.958


  0%|          | 16/50000 [00:01<1:04:15, 12.96it/s]

Epochs: 14 | epoch avg. loss: 374.690 | test avg. loss: 478.038
Epochs: 15 | epoch avg. loss: 367.422 | test avg. loss: 472.068
Epochs: 16 | epoch avg. loss: 359.250 | test avg. loss: 458.916


  0%|          | 20/50000 [00:01<1:04:20, 12.95it/s]

Epochs: 17 | epoch avg. loss: 340.436 | test avg. loss: 439.214
Epochs: 18 | epoch avg. loss: 317.600 | test avg. loss: 425.156
Epochs: 19 | epoch avg. loss: 301.043 | test avg. loss: 411.268


  0%|          | 22/50000 [00:01<1:03:21, 13.15it/s]

Epochs: 20 | epoch avg. loss: 290.297 | test avg. loss: 391.177
Epochs: 21 | epoch avg. loss: 272.284 | test avg. loss: 362.223
Epochs: 22 | epoch avg. loss: 252.288 | test avg. loss: 336.776


  0%|          | 26/50000 [00:01<1:01:10, 13.61it/s]

Epochs: 23 | epoch avg. loss: 237.777 | test avg. loss: 317.181
Epochs: 24 | epoch avg. loss: 223.480 | test avg. loss: 299.990
Epochs: 25 | epoch avg. loss: 206.834 | test avg. loss: 284.959


  0%|          | 28/50000 [00:02<1:06:18, 12.56it/s]

Epochs: 26 | epoch avg. loss: 189.248 | test avg. loss: 266.560
Epochs: 27 | epoch avg. loss: 170.595 | test avg. loss: 249.187
Epochs: 28 | epoch avg. loss: 156.273 | test avg. loss: 234.031


  0%|          | 32/50000 [00:02<1:02:04, 13.42it/s]

Epochs: 29 | epoch avg. loss: 145.495 | test avg. loss: 221.696
Epochs: 30 | epoch avg. loss: 135.900 | test avg. loss: 211.839
Epochs: 31 | epoch avg. loss: 127.767 | test avg. loss: 203.393
Epochs: 32 | epoch avg. loss: 118.284 | test avg. loss: 195.512


  0%|          | 36/50000 [00:02<57:15, 14.54it/s]

Epochs: 33 | epoch avg. loss: 109.787 | test avg. loss: 190.388
Epochs: 34 | epoch avg. loss: 104.653 | test avg. loss: 185.140
Epochs: 35 | epoch avg. loss: 99.494 | test avg. loss: 177.707
Epochs: 36 | epoch avg. loss: 91.554 | test avg. loss: 172.204


  0%|          | 40/50000 [00:02<56:52, 14.64it/s]

Epochs: 37 | epoch avg. loss: 86.609 | test avg. loss: 169.309
Epochs: 38 | epoch avg. loss: 83.284 | test avg. loss: 165.622
Epochs: 39 | epoch avg. loss: 79.433 | test avg. loss: 161.401


  0%|          | 42/50000 [00:03<59:45, 13.93it/s]

Epochs: 40 | epoch avg. loss: 75.204 | test avg. loss: 156.902
Epochs: 41 | epoch avg. loss: 71.816 | test avg. loss: 153.363
Epochs: 42 | epoch avg. loss: 69.902 | test avg. loss: 150.122


  0%|          | 46/50000 [00:03<58:20, 14.27it/s]

Epochs: 43 | epoch avg. loss: 67.753 | test avg. loss: 147.908
Epochs: 44 | epoch avg. loss: 64.913 | test avg. loss: 147.155
Epochs: 45 | epoch avg. loss: 63.464 | test avg. loss: 146.935
Epochs: 46 | epoch avg. loss: 62.162 | test avg. loss: 146.265


  0%|          | 50/50000 [00:03<1:00:30, 13.76it/s]

Epochs: 47 | epoch avg. loss: 60.543 | test avg. loss: 144.772
Epochs: 48 | epoch avg. loss: 58.705 | test avg. loss: 143.331
Epochs: 49 | epoch avg. loss: 57.582 | test avg. loss: 142.329


  0%|          | 52/50000 [00:03<1:05:45, 12.66it/s]

Epochs: 50 | epoch avg. loss: 56.364 | test avg. loss: 141.557
Epochs: 51 | epoch avg. loss: 55.246 | test avg. loss: 141.444
Epochs: 52 | epoch avg. loss: 54.217 | test avg. loss: 140.520


  0%|          | 56/50000 [00:04<1:11:14, 11.68it/s]

Epochs: 53 | epoch avg. loss: 53.518 | test avg. loss: 139.083
Epochs: 54 | epoch avg. loss: 52.073 | test avg. loss: 138.189
Epochs: 55 | epoch avg. loss: 51.489 | test avg. loss: 137.242


  0%|          | 58/50000 [00:04<1:09:27, 11.98it/s]

Epochs: 56 | epoch avg. loss: 50.435 | test avg. loss: 136.266
Epochs: 57 | epoch avg. loss: 49.715 | test avg. loss: 135.568
Epochs: 58 | epoch avg. loss: 49.574 | test avg. loss: 135.067


  0%|          | 62/50000 [00:04<1:03:00, 13.21it/s]

Epochs: 59 | epoch avg. loss: 48.291 | test avg. loss: 135.732
Epochs: 60 | epoch avg. loss: 47.614 | test avg. loss: 135.845
Epochs: 61 | epoch avg. loss: 47.232 | test avg. loss: 134.763


  0%|          | 64/50000 [00:04<1:03:38, 13.08it/s]

Epochs: 62 | epoch avg. loss: 45.935 | test avg. loss: 134.535
Epochs: 63 | epoch avg. loss: 45.428 | test avg. loss: 134.094
Epochs: 64 | epoch avg. loss: 45.280 | test avg. loss: 133.755


  0%|          | 68/50000 [00:05<1:08:43, 12.11it/s]

Epochs: 65 | epoch avg. loss: 44.555 | test avg. loss: 133.279
Epochs: 66 | epoch avg. loss: 44.108 | test avg. loss: 132.978
Epochs: 67 | epoch avg. loss: 43.598 | test avg. loss: 131.773


  0%|          | 70/50000 [00:05<1:08:18, 12.18it/s]

Epochs: 68 | epoch avg. loss: 42.885 | test avg. loss: 130.959
Epochs: 69 | epoch avg. loss: 42.462 | test avg. loss: 130.314
Epochs: 70 | epoch avg. loss: 42.296 | test avg. loss: 129.773


                                                  

Epochs: 71 | epoch avg. loss: 41.547 | test avg. loss: 129.875
Epochs: 72 | epoch avg. loss: 41.084 | test avg. loss: 130.081
Epochs: 73 | epoch avg. loss: 41.076 | test avg. loss: 129.496
Epochs: 74 | epoch avg. loss: 41.042 | test avg. loss: 128.484


  0%|          | 78/50000 [00:05<51:36, 16.12it/s]

Epochs: 75 | epoch avg. loss: 39.837 | test avg. loss: 128.136
Epochs: 76 | epoch avg. loss: 39.697 | test avg. loss: 128.153
Epochs: 77 | epoch avg. loss: 39.366 | test avg. loss: 127.622
Epochs: 78 | epoch avg. loss: 38.494 | test avg. loss: 127.759


  0%|          | 82/50000 [00:06<52:38, 15.81it/s]

Epochs: 79 | epoch avg. loss: 38.364 | test avg. loss: 127.209
Epochs: 80 | epoch avg. loss: 38.165 | test avg. loss: 126.286
Epochs: 81 | epoch avg. loss: 37.492 | test avg. loss: 126.098
Epochs: 82 | epoch avg. loss: 37.245 | test avg. loss: 126.048


  0%|          | 87/50000 [00:06<49:01, 16.97it/s]

Epochs: 83 | epoch avg. loss: 36.643 | test avg. loss: 126.865
Epochs: 84 | epoch avg. loss: 36.493 | test avg. loss: 126.068
Epochs: 85 | epoch avg. loss: 36.356 | test avg. loss: 124.709
Epochs: 86 | epoch avg. loss: 35.785 | test avg. loss: 124.632
Epochs: 87 | epoch avg. loss: 34.758 | test avg. loss: 125.821


  0%|          | 92/50000 [00:06<44:11, 18.83it/s]

Epochs: 88 | epoch avg. loss: 35.189 | test avg. loss: 126.341
Epochs: 89 | epoch avg. loss: 34.616 | test avg. loss: 124.326
Epochs: 90 | epoch avg. loss: 33.541 | test avg. loss: 123.475
Epochs: 91 | epoch avg. loss: 35.083 | test avg. loss: 123.304
Epochs: 92 | epoch avg. loss: 33.182 | test avg. loss: 124.758


  0%|          | 96/50000 [00:06<44:33, 18.66it/s]

Epochs: 93 | epoch avg. loss: 33.205 | test avg. loss: 126.398
Epochs: 94 | epoch avg. loss: 34.110 | test avg. loss: 124.599
Epochs: 95 | epoch avg. loss: 33.554 | test avg. loss: 122.602
Epochs: 96 | epoch avg. loss: 32.837 | test avg. loss: 122.466


  0%|          | 98/50000 [00:06<46:12, 18.00it/s]

Epochs: 97 | epoch avg. loss: 33.032 | test avg. loss: 123.630
Epochs: 98 | epoch avg. loss: 31.551 | test avg. loss: 123.322
Epochs: 99 | epoch avg. loss: 30.857 | test avg. loss: 122.074


  0%|          | 104/50000 [00:07<1:26:31,  9.61it/s]

Epochs: 100 | epoch avg. loss: 30.793 | test avg. loss: 121.689
Epochs: 101 | epoch avg. loss: 31.068 | test avg. loss: 121.532
Epochs: 102 | epoch avg. loss: 29.947 | test avg. loss: 122.796
Epochs: 103 | epoch avg. loss: 29.956 | test avg. loss: 123.321


  0%|          | 108/50000 [00:08<1:04:41, 12.85it/s]

Epochs: 104 | epoch avg. loss: 29.984 | test avg. loss: 121.764
Epochs: 105 | epoch avg. loss: 29.253 | test avg. loss: 120.525
Epochs: 106 | epoch avg. loss: 29.328 | test avg. loss: 120.441
Epochs: 107 | epoch avg. loss: 28.971 | test avg. loss: 121.135


  0%|          | 112/50000 [00:08<54:20, 15.30it/s]

Epochs: 108 | epoch avg. loss: 28.658 | test avg. loss: 121.828
Epochs: 109 | epoch avg. loss: 28.454 | test avg. loss: 120.899
Epochs: 110 | epoch avg. loss: 28.024 | test avg. loss: 120.209
Epochs: 111 | epoch avg. loss: 27.961 | test avg. loss: 119.977


  0%|          | 117/50000 [00:08<47:05, 17.65it/s]

Epochs: 112 | epoch avg. loss: 28.066 | test avg. loss: 120.308
Epochs: 113 | epoch avg. loss: 27.545 | test avg. loss: 119.892
Epochs: 114 | epoch avg. loss: 27.227 | test avg. loss: 119.834
Epochs: 115 | epoch avg. loss: 27.105 | test avg. loss: 119.715
Epochs: 116 | epoch avg. loss: 26.977 | test avg. loss: 119.402


  0%|          | 119/50000 [00:08<46:23, 17.92it/s]

Epochs: 117 | epoch avg. loss: 26.761 | test avg. loss: 119.381
Epochs: 118 | epoch avg. loss: 26.538 | test avg. loss: 119.521
Epochs: 119 | epoch avg. loss: 26.393 | test avg. loss: 119.688
Epochs: 120 | epoch avg. loss: 26.315 | test avg. loss: 119.655


  0%|          | 124/50000 [00:08<49:15, 16.87it/s]

Epochs: 121 | epoch avg. loss: 26.201 | test avg. loss: 119.037
Epochs: 122 | epoch avg. loss: 25.821 | test avg. loss: 118.488
Epochs: 123 | epoch avg. loss: 26.125 | test avg. loss: 118.513


                                                   

Epochs: 124 | epoch avg. loss: 25.603 | test avg. loss: 118.701
Epochs: 125 | epoch avg. loss: 25.281 | test avg. loss: 119.411
Epochs: 126 | epoch avg. loss: 25.660 | test avg. loss: 120.078




Epochs: 127 | epoch avg. loss: 25.674 | test avg. loss: 118.574
Epochs: 128 | epoch avg. loss: 24.864 | test avg. loss: 118.451
Epochs: 129 | epoch avg. loss: 24.604 | test avg. loss: 118.892


  0%|          | 134/50000 [00:09<50:36, 16.42it/s]

Epochs: 130 | epoch avg. loss: 24.461 | test avg. loss: 118.853
Epochs: 131 | epoch avg. loss: 24.248 | test avg. loss: 118.644
Epochs: 132 | epoch avg. loss: 24.121 | test avg. loss: 118.223
Epochs: 133 | epoch avg. loss: 24.026 | test avg. loss: 117.965


  0%|          | 138/50000 [00:09<47:07, 17.64it/s]

Epochs: 134 | epoch avg. loss: 24.072 | test avg. loss: 118.327
Epochs: 135 | epoch avg. loss: 23.661 | test avg. loss: 119.347
Epochs: 136 | epoch avg. loss: 24.111 | test avg. loss: 118.360
Epochs: 137 | epoch avg. loss: 23.796 | test avg. loss: 117.211


  0%|          | 142/50000 [00:10<50:27, 16.47it/s]

Epochs: 138 | epoch avg. loss: 23.467 | test avg. loss: 116.859
Epochs: 139 | epoch avg. loss: 23.312 | test avg. loss: 116.476
Epochs: 140 | epoch avg. loss: 23.265 | test avg. loss: 116.631
Epochs: 141 | epoch avg. loss: 22.971 | test avg. loss: 116.649


  0%|          | 146/50000 [00:10<46:52, 17.72it/s]

Epochs: 142 | epoch avg. loss: 23.042 | test avg. loss: 116.452
Epochs: 143 | epoch avg. loss: 22.817 | test avg. loss: 116.063
Epochs: 144 | epoch avg. loss: 22.913 | test avg. loss: 115.646
Epochs: 145 | epoch avg. loss: 23.412 | test avg. loss: 115.615


  0%|          | 151/50000 [00:10<43:52, 18.93it/s]

Epochs: 146 | epoch avg. loss: 22.608 | test avg. loss: 114.794
Epochs: 147 | epoch avg. loss: 23.031 | test avg. loss: 114.594
Epochs: 148 | epoch avg. loss: 22.507 | test avg. loss: 115.531
Epochs: 149 | epoch avg. loss: 22.556 | test avg. loss: 115.522
Epochs: 150 | epoch avg. loss: 22.218 | test avg. loss: 114.113


  0%|          | 155/50000 [00:10<43:34, 19.06it/s]

Epochs: 151 | epoch avg. loss: 21.931 | test avg. loss: 113.777
Epochs: 152 | epoch avg. loss: 23.529 | test avg. loss: 113.511
Epochs: 153 | epoch avg. loss: 21.866 | test avg. loss: 115.216
Epochs: 154 | epoch avg. loss: 22.913 | test avg. loss: 117.787


  0%|          | 157/50000 [00:10<45:10, 18.39it/s]

Epochs: 155 | epoch avg. loss: 23.845 | test avg. loss: 113.594
Epochs: 156 | epoch avg. loss: 22.261 | test avg. loss: 113.475
Epochs: 157 | epoch avg. loss: 24.949 | test avg. loss: 112.437


                                                   

Epochs: 158 | epoch avg. loss: 21.665 | test avg. loss: 115.282
Epochs: 159 | epoch avg. loss: 23.768 | test avg. loss: 116.231
Epochs: 160 | epoch avg. loss: 24.449 | test avg. loss: 111.974


  0%|          | 165/50000 [00:11<49:20, 16.83it/s]

Epochs: 161 | epoch avg. loss: 21.203 | test avg. loss: 111.590
Epochs: 162 | epoch avg. loss: 21.576 | test avg. loss: 111.513
Epochs: 163 | epoch avg. loss: 21.279 | test avg. loss: 111.957
Epochs: 164 | epoch avg. loss: 21.022 | test avg. loss: 111.734


  0%|          | 169/50000 [00:11<47:04, 17.64it/s]

Epochs: 165 | epoch avg. loss: 20.881 | test avg. loss: 111.104
Epochs: 166 | epoch avg. loss: 20.868 | test avg. loss: 110.775
Epochs: 167 | epoch avg. loss: 20.992 | test avg. loss: 110.902
Epochs: 168 | epoch avg. loss: 20.663 | test avg. loss: 110.863


  0%|          | 173/50000 [00:11<46:03, 18.03it/s]

Epochs: 169 | epoch avg. loss: 20.501 | test avg. loss: 110.543
Epochs: 170 | epoch avg. loss: 20.481 | test avg. loss: 110.339
Epochs: 171 | epoch avg. loss: 20.466 | test avg. loss: 110.929
Epochs: 172 | epoch avg. loss: 20.103 | test avg. loss: 111.354


  0%|          | 175/50000 [00:12<50:13, 16.53it/s]

Epochs: 173 | epoch avg. loss: 20.486 | test avg. loss: 111.674
Epochs: 174 | epoch avg. loss: 19.831 | test avg. loss: 111.279
Epochs: 175 | epoch avg. loss: 20.767 | test avg. loss: 111.432
Epochs: 176 | epoch avg. loss: 19.939 | test avg. loss: 113.152


  0%|          | 181/50000 [00:12<49:56, 16.63it/s]

Epochs: 177 | epoch avg. loss: 20.236 | test avg. loss: 112.837
Epochs: 178 | epoch avg. loss: 20.024 | test avg. loss: 111.461
Epochs: 179 | epoch avg. loss: 19.602 | test avg. loss: 111.134
Epochs: 180 | epoch avg. loss: 19.744 | test avg. loss: 111.086


  0%|          | 185/50000 [00:12<48:01, 17.29it/s]

Epochs: 181 | epoch avg. loss: 19.835 | test avg. loss: 111.147
Epochs: 182 | epoch avg. loss: 19.521 | test avg. loss: 112.145
Epochs: 183 | epoch avg. loss: 19.382 | test avg. loss: 111.357
Epochs: 184 | epoch avg. loss: 19.114 | test avg. loss: 110.767


  0%|          | 189/50000 [00:12<46:00, 18.04it/s]

Epochs: 185 | epoch avg. loss: 19.095 | test avg. loss: 110.767
Epochs: 186 | epoch avg. loss: 18.929 | test avg. loss: 110.991
Epochs: 187 | epoch avg. loss: 19.360 | test avg. loss: 111.228
Epochs: 188 | epoch avg. loss: 18.810 | test avg. loss: 110.254




Epochs: 189 | epoch avg. loss: 19.052 | test avg. loss: 109.870
Epochs: 190 | epoch avg. loss: 19.348 | test avg. loss: 109.740
Epochs: 191 | epoch avg. loss: 18.708 | test avg. loss: 109.610


  0%|          | 195/50000 [00:13<52:43, 15.74it/s]

Epochs: 192 | epoch avg. loss: 18.483 | test avg. loss: 108.978
Epochs: 193 | epoch avg. loss: 18.484 | test avg. loss: 108.688
Epochs: 194 | epoch avg. loss: 18.436 | test avg. loss: 108.865
Epochs: 195 | epoch avg. loss: 18.369 | test avg. loss: 109.335


  0%|          | 199/50000 [00:13<49:15, 16.85it/s]

Epochs: 196 | epoch avg. loss: 18.707 | test avg. loss: 108.500
Epochs: 197 | epoch avg. loss: 18.389 | test avg. loss: 107.931
Epochs: 198 | epoch avg. loss: 18.421 | test avg. loss: 107.711
Epochs: 199 | epoch avg. loss: 18.473 | test avg. loss: 107.142


  0%|          | 203/50000 [00:14<1:47:47,  7.70it/s]

Epochs: 200 | epoch avg. loss: 18.321 | test avg. loss: 107.350
Epochs: 201 | epoch avg. loss: 18.271 | test avg. loss: 107.147
Epochs: 202 | epoch avg. loss: 18.044 | test avg. loss: 106.478
Epochs: 203 | epoch avg. loss: 18.087 | test avg. loss: 106.400


  0%|          | 208/50000 [00:14<1:10:11, 11.82it/s]

Epochs: 204 | epoch avg. loss: 17.915 | test avg. loss: 106.292
Epochs: 205 | epoch avg. loss: 17.807 | test avg. loss: 106.085
Epochs: 206 | epoch avg. loss: 17.765 | test avg. loss: 105.985
Epochs: 207 | epoch avg. loss: 17.750 | test avg. loss: 105.842


  0%|          | 212/50000 [00:14<57:42, 14.38it/s]  

Epochs: 208 | epoch avg. loss: 17.693 | test avg. loss: 106.018
Epochs: 209 | epoch avg. loss: 17.889 | test avg. loss: 106.139
Epochs: 210 | epoch avg. loss: 17.631 | test avg. loss: 105.013
Epochs: 211 | epoch avg. loss: 17.900 | test avg. loss: 104.679


  0%|          | 214/50000 [00:14<56:38, 14.65it/s]

Epochs: 212 | epoch avg. loss: 17.992 | test avg. loss: 104.734
Epochs: 213 | epoch avg. loss: 17.455 | test avg. loss: 105.019
Epochs: 214 | epoch avg. loss: 17.609 | test avg. loss: 104.607


  0%|          | 218/50000 [00:15<58:12, 14.26it/s]

Epochs: 215 | epoch avg. loss: 17.464 | test avg. loss: 103.816
Epochs: 216 | epoch avg. loss: 17.323 | test avg. loss: 103.458
Epochs: 217 | epoch avg. loss: 17.371 | test avg. loss: 103.507
Epochs: 218 | epoch avg. loss: 17.464 | test avg. loss: 103.605


  0%|          | 223/50000 [00:15<48:55, 16.96it/s]

Epochs: 219 | epoch avg. loss: 17.733 | test avg. loss: 103.217
Epochs: 220 | epoch avg. loss: 17.138 | test avg. loss: 103.948
Epochs: 221 | epoch avg. loss: 17.522 | test avg. loss: 104.525
Epochs: 222 | epoch avg. loss: 17.517 | test avg. loss: 102.789


  0%|          | 225/50000 [00:15<50:50, 16.32it/s]

Epochs: 223 | epoch avg. loss: 17.864 | test avg. loss: 102.413
Epochs: 224 | epoch avg. loss: 17.084 | test avg. loss: 103.050
Epochs: 225 | epoch avg. loss: 17.536 | test avg. loss: 103.737


  0%|          | 229/50000 [00:15<59:20, 13.98it/s]

Epochs: 226 | epoch avg. loss: 17.332 | test avg. loss: 101.877
Epochs: 227 | epoch avg. loss: 17.336 | test avg. loss: 101.629
Epochs: 228 | epoch avg. loss: 17.669 | test avg. loss: 101.731


  0%|          | 231/50000 [00:16<1:02:22, 13.30it/s]

Epochs: 229 | epoch avg. loss: 16.796 | test avg. loss: 102.155
Epochs: 230 | epoch avg. loss: 16.987 | test avg. loss: 101.628
Epochs: 231 | epoch avg. loss: 16.757 | test avg. loss: 101.164


  0%|          | 235/50000 [00:16<1:02:13, 13.33it/s]

Epochs: 232 | epoch avg. loss: 16.737 | test avg. loss: 100.854
Epochs: 233 | epoch avg. loss: 16.608 | test avg. loss: 101.027
Epochs: 234 | epoch avg. loss: 16.654 | test avg. loss: 101.028
Epochs: 235 | epoch avg. loss: 16.599 | test avg. loss: 100.478


  0%|          | 239/50000 [00:16<59:49, 13.86it/s]  

Epochs: 236 | epoch avg. loss: 16.609 | test avg. loss: 100.246
Epochs: 237 | epoch avg. loss: 16.574 | test avg. loss: 100.489
Epochs: 238 | epoch avg. loss: 16.421 | test avg. loss: 101.007


  0%|          | 241/50000 [00:16<58:31, 14.17it/s]

Epochs: 239 | epoch avg. loss: 16.675 | test avg. loss: 100.316
Epochs: 240 | epoch avg. loss: 16.289 | test avg. loss: 99.606
Epochs: 241 | epoch avg. loss: 16.792 | test avg. loss: 99.524


  0%|          | 245/50000 [00:17<1:01:57, 13.38it/s]

Epochs: 242 | epoch avg. loss: 16.731 | test avg. loss: 100.012
Epochs: 243 | epoch avg. loss: 16.308 | test avg. loss: 101.862
Epochs: 244 | epoch avg. loss: 17.244 | test avg. loss: 100.174


  0%|          | 247/50000 [00:17<1:01:10, 13.56it/s]

Epochs: 245 | epoch avg. loss: 16.275 | test avg. loss: 99.080
Epochs: 246 | epoch avg. loss: 16.854 | test avg. loss: 98.939
Epochs: 247 | epoch avg. loss: 16.809 | test avg. loss: 98.950


  1%|          | 251/50000 [00:17<1:04:16, 12.90it/s]

Epochs: 248 | epoch avg. loss: 16.207 | test avg. loss: 99.190
Epochs: 249 | epoch avg. loss: 16.664 | test avg. loss: 99.653
Epochs: 250 | epoch avg. loss: 16.167 | test avg. loss: 98.511


  1%|          | 253/50000 [00:17<1:03:07, 13.14it/s]

Epochs: 251 | epoch avg. loss: 16.639 | test avg. loss: 98.401
Epochs: 252 | epoch avg. loss: 16.240 | test avg. loss: 99.589
Epochs: 253 | epoch avg. loss: 16.345 | test avg. loss: 99.209


  1%|          | 257/50000 [00:18<1:08:26, 12.11it/s]

Epochs: 254 | epoch avg. loss: 16.197 | test avg. loss: 98.321
Epochs: 255 | epoch avg. loss: 15.989 | test avg. loss: 98.044
Epochs: 256 | epoch avg. loss: 16.027 | test avg. loss: 98.139


  1%|          | 259/50000 [00:18<1:07:39, 12.25it/s]

Epochs: 257 | epoch avg. loss: 15.951 | test avg. loss: 97.723
Epochs: 258 | epoch avg. loss: 15.852 | test avg. loss: 97.853
Epochs: 259 | epoch avg. loss: 15.873 | test avg. loss: 98.156


  1%|          | 263/50000 [00:18<1:01:05, 13.57it/s]

Epochs: 260 | epoch avg. loss: 15.837 | test avg. loss: 97.412
Epochs: 261 | epoch avg. loss: 15.675 | test avg. loss: 97.025
Epochs: 262 | epoch avg. loss: 15.773 | test avg. loss: 96.876
Epochs: 263 | epoch avg. loss: 15.757 | test avg. loss: 97.168


  1%|          | 267/50000 [00:18<56:58, 14.55it/s]

Epochs: 264 | epoch avg. loss: 15.630 | test avg. loss: 97.685
Epochs: 265 | epoch avg. loss: 16.479 | test avg. loss: 97.337
Epochs: 266 | epoch avg. loss: 15.277 | test avg. loss: 96.369
Epochs: 267 | epoch avg. loss: 16.788 | test avg. loss: 96.165


  1%|          | 271/50000 [00:19<1:02:06, 13.35it/s]

Epochs: 268 | epoch avg. loss: 15.614 | test avg. loss: 98.160
Epochs: 269 | epoch avg. loss: 16.577 | test avg. loss: 101.626
Epochs: 270 | epoch avg. loss: 18.328 | test avg. loss: 96.094


  1%|          | 273/50000 [00:19<1:06:15, 12.51it/s]

Epochs: 271 | epoch avg. loss: 15.626 | test avg. loss: 96.548
Epochs: 272 | epoch avg. loss: 17.892 | test avg. loss: 95.566
Epochs: 273 | epoch avg. loss: 14.827 | test avg. loss: 99.640


  1%|          | 277/50000 [00:19<1:03:53, 12.97it/s]

Epochs: 274 | epoch avg. loss: 17.795 | test avg. loss: 98.150
Epochs: 275 | epoch avg. loss: 15.503 | test avg. loss: 95.368
Epochs: 276 | epoch avg. loss: 16.425 | test avg. loss: 97.016


  1%|          | 279/50000 [00:19<1:02:41, 13.22it/s]

Epochs: 277 | epoch avg. loss: 18.548 | test avg. loss: 95.196
Epochs: 278 | epoch avg. loss: 14.854 | test avg. loss: 100.559
Epochs: 279 | epoch avg. loss: 19.038 | test avg. loss: 96.946


  1%|          | 283/50000 [00:20<1:05:56, 12.57it/s]

Epochs: 280 | epoch avg. loss: 17.602 | test avg. loss: 94.745
Epochs: 281 | epoch avg. loss: 15.903 | test avg. loss: 94.694
Epochs: 282 | epoch avg. loss: 15.087 | test avg. loss: 96.485


  1%|          | 285/50000 [00:20<1:03:28, 13.05it/s]

Epochs: 283 | epoch avg. loss: 15.995 | test avg. loss: 95.523
Epochs: 284 | epoch avg. loss: 15.262 | test avg. loss: 94.593
Epochs: 285 | epoch avg. loss: 15.045 | test avg. loss: 94.438


  1%|          | 289/50000 [00:20<1:01:50, 13.40it/s]

Epochs: 286 | epoch avg. loss: 14.918 | test avg. loss: 94.685
Epochs: 287 | epoch avg. loss: 14.920 | test avg. loss: 94.688
Epochs: 288 | epoch avg. loss: 14.916 | test avg. loss: 94.632


  1%|          | 291/50000 [00:20<1:01:20, 13.51it/s]

Epochs: 289 | epoch avg. loss: 14.861 | test avg. loss: 94.411
Epochs: 290 | epoch avg. loss: 15.069 | test avg. loss: 93.964
Epochs: 291 | epoch avg. loss: 14.819 | test avg. loss: 93.764


  1%|          | 295/50000 [00:20<1:02:16, 13.30it/s]

Epochs: 292 | epoch avg. loss: 15.250 | test avg. loss: 93.897
Epochs: 293 | epoch avg. loss: 15.475 | test avg. loss: 94.480
Epochs: 294 | epoch avg. loss: 14.688 | test avg. loss: 93.629


  1%|          | 297/50000 [00:21<1:11:59, 11.51it/s]

Epochs: 295 | epoch avg. loss: 14.755 | test avg. loss: 93.519
Epochs: 296 | epoch avg. loss: 14.847 | test avg. loss: 93.643


  1%|          | 297/50000 [00:21<1:11:59, 11.51it/s]

Epochs: 297 | epoch avg. loss: 14.643 | test avg. loss: 94.106


  1%|          | 299/50000 [00:21<2:06:11,  6.56it/s]

Epochs: 298 | epoch avg. loss: 14.703 | test avg. loss: 94.311
Epochs: 299 | epoch avg. loss: 14.770 | test avg. loss: 93.565


  1%|          | 302/50000 [00:23<4:13:56,  3.26it/s]

Epochs: 300 | epoch avg. loss: 14.433 | test avg. loss: 93.054
Epochs: 301 | epoch avg. loss: 14.786 | test avg. loss: 92.974


  1%|          | 304/50000 [00:24<3:44:16,  3.69it/s]

Epochs: 302 | epoch avg. loss: 14.743 | test avg. loss: 93.479
Epochs: 303 | epoch avg. loss: 15.314 | test avg. loss: 94.539


  1%|          | 307/50000 [00:24<2:12:36,  6.25it/s]

Epochs: 304 | epoch avg. loss: 15.109 | test avg. loss: 92.772
Epochs: 305 | epoch avg. loss: 14.527 | test avg. loss: 92.652
Epochs: 306 | epoch avg. loss: 14.596 | test avg. loss: 93.110
Epochs: 307 | epoch avg. loss: 14.746 | test avg. loss: 93.409


  1%|          | 311/50000 [00:24<1:21:42, 10.14it/s]

Epochs: 308 | epoch avg. loss: 14.827 | test avg. loss: 92.501
Epochs: 309 | epoch avg. loss: 14.364 | test avg. loss: 92.817
Epochs: 310 | epoch avg. loss: 14.188 | test avg. loss: 93.664
Epochs: 311 | epoch avg. loss: 14.737 | test avg. loss: 93.052


  1%|          | 315/50000 [00:24<1:01:26, 13.48it/s]

Epochs: 312 | epoch avg. loss: 14.176 | test avg. loss: 92.137
Epochs: 313 | epoch avg. loss: 14.633 | test avg. loss: 92.081
Epochs: 314 | epoch avg. loss: 14.403 | test avg. loss: 92.448
Epochs: 315 | epoch avg. loss: 14.122 | test avg. loss: 93.356


  1%|          | 319/50000 [00:24<58:02, 14.26it/s]

Epochs: 316 | epoch avg. loss: 14.502 | test avg. loss: 93.116
Epochs: 317 | epoch avg. loss: 14.292 | test avg. loss: 91.980
Epochs: 318 | epoch avg. loss: 14.287 | test avg. loss: 92.027


  1%|          | 321/50000 [00:25<59:11, 13.99it/s]

Epochs: 319 | epoch avg. loss: 14.393 | test avg. loss: 92.718
Epochs: 320 | epoch avg. loss: 14.024 | test avg. loss: 92.280
Epochs: 321 | epoch avg. loss: 14.503 | test avg. loss: 92.142


  1%|          | 325/50000 [00:25<57:00, 14.52it/s]

Epochs: 322 | epoch avg. loss: 14.522 | test avg. loss: 92.664
Epochs: 323 | epoch avg. loss: 14.055 | test avg. loss: 91.725
Epochs: 324 | epoch avg. loss: 14.038 | test avg. loss: 91.622
Epochs: 325 | epoch avg. loss: 13.955 | test avg. loss: 91.832


  1%|          | 329/50000 [00:25<50:52, 16.27it/s]

Epochs: 326 | epoch avg. loss: 14.008 | test avg. loss: 92.161
Epochs: 327 | epoch avg. loss: 14.002 | test avg. loss: 91.357
Epochs: 328 | epoch avg. loss: 13.776 | test avg. loss: 91.265
Epochs: 329 | epoch avg. loss: 14.847 | test avg. loss: 91.152


  1%|          | 333/50000 [00:25<48:39, 17.01it/s]

Epochs: 330 | epoch avg. loss: 14.332 | test avg. loss: 93.139
Epochs: 331 | epoch avg. loss: 14.327 | test avg. loss: 91.618
Epochs: 332 | epoch avg. loss: 13.555 | test avg. loss: 90.806
Epochs: 333 | epoch avg. loss: 14.580 | test avg. loss: 90.828


  1%|          | 337/50000 [00:26<54:17, 15.24it/s]

Epochs: 334 | epoch avg. loss: 13.984 | test avg. loss: 92.865
Epochs: 335 | epoch avg. loss: 14.580 | test avg. loss: 91.445
Epochs: 336 | epoch avg. loss: 13.794 | test avg. loss: 90.844
Epochs: 337 | epoch avg. loss: 13.589 | test avg. loss: 91.120


  1%|          | 341/50000 [00:26<49:19, 16.78it/s]

Epochs: 338 | epoch avg. loss: 13.978 | test avg. loss: 91.482
Epochs: 339 | epoch avg. loss: 13.453 | test avg. loss: 90.535
Epochs: 340 | epoch avg. loss: 14.082 | test avg. loss: 90.507
Epochs: 341 | epoch avg. loss: 13.373 | test avg. loss: 92.045


  1%|          | 345/50000 [00:26<47:18, 17.49it/s]

Epochs: 342 | epoch avg. loss: 14.410 | test avg. loss: 91.838
Epochs: 343 | epoch avg. loss: 13.465 | test avg. loss: 90.478
Epochs: 344 | epoch avg. loss: 14.235 | test avg. loss: 90.372
Epochs: 345 | epoch avg. loss: 13.789 | test avg. loss: 91.521


  1%|          | 349/50000 [00:26<47:08, 17.56it/s]

Epochs: 346 | epoch avg. loss: 13.674 | test avg. loss: 90.834
Epochs: 347 | epoch avg. loss: 13.885 | test avg. loss: 90.075
Epochs: 348 | epoch avg. loss: 13.666 | test avg. loss: 90.438
Epochs: 349 | epoch avg. loss: 13.240 | test avg. loss: 90.712


  1%|          | 353/50000 [00:27<53:40, 15.41it/s]

Epochs: 350 | epoch avg. loss: 13.354 | test avg. loss: 90.398
Epochs: 351 | epoch avg. loss: 13.086 | test avg. loss: 89.675
Epochs: 352 | epoch avg. loss: 13.441 | test avg. loss: 89.581


  1%|          | 355/50000 [00:27<55:10, 14.99it/s]

Epochs: 353 | epoch avg. loss: 13.281 | test avg. loss: 90.487
Epochs: 354 | epoch avg. loss: 13.205 | test avg. loss: 90.119
Epochs: 355 | epoch avg. loss: 13.071 | test avg. loss: 89.521
Epochs: 356 | epoch avg. loss: 13.044 | test avg. loss: 89.372


  1%|          | 361/50000 [00:27<50:03, 16.53it/s]

Epochs: 357 | epoch avg. loss: 13.480 | test avg. loss: 89.536
Epochs: 358 | epoch avg. loss: 13.289 | test avg. loss: 90.946
Epochs: 359 | epoch avg. loss: 13.544 | test avg. loss: 89.761
Epochs: 360 | epoch avg. loss: 13.020 | test avg. loss: 89.502


  1%|          | 363/50000 [00:27<50:08, 16.50it/s]

Epochs: 361 | epoch avg. loss: 12.869 | test avg. loss: 89.705
Epochs: 362 | epoch avg. loss: 12.910 | test avg. loss: 89.427
Epochs: 363 | epoch avg. loss: 12.856 | test avg. loss: 89.087
Epochs: 364 | epoch avg. loss: 12.849 | test avg. loss: 89.324


  1%|          | 368/50000 [00:28<53:42, 15.40it/s]

Epochs: 365 | epoch avg. loss: 12.730 | test avg. loss: 89.661
Epochs: 366 | epoch avg. loss: 12.921 | test avg. loss: 89.048
Epochs: 367 | epoch avg. loss: 12.930 | test avg. loss: 88.605


  1%|          | 372/50000 [00:28<54:01, 15.31it/s]

Epochs: 368 | epoch avg. loss: 12.848 | test avg. loss: 89.116
Epochs: 369 | epoch avg. loss: 12.748 | test avg. loss: 90.045
Epochs: 370 | epoch avg. loss: 13.047 | test avg. loss: 88.779
Epochs: 371 | epoch avg. loss: 12.667 | test avg. loss: 88.437


  1%|          | 376/50000 [00:28<49:56, 16.56it/s]

Epochs: 372 | epoch avg. loss: 12.775 | test avg. loss: 88.827
Epochs: 373 | epoch avg. loss: 12.874 | test avg. loss: 89.256
Epochs: 374 | epoch avg. loss: 12.572 | test avg. loss: 88.521
Epochs: 375 | epoch avg. loss: 12.581 | test avg. loss: 88.460


  1%|          | 380/50000 [00:28<48:34, 17.02it/s]

Epochs: 376 | epoch avg. loss: 12.606 | test avg. loss: 88.747
Epochs: 377 | epoch avg. loss: 12.435 | test avg. loss: 89.040
Epochs: 378 | epoch avg. loss: 12.507 | test avg. loss: 88.742
Epochs: 379 | epoch avg. loss: 12.400 | test avg. loss: 88.357


  1%|          | 382/50000 [00:28<50:40, 16.32it/s]

Epochs: 380 | epoch avg. loss: 12.763 | test avg. loss: 88.362
Epochs: 381 | epoch avg. loss: 12.441 | test avg. loss: 88.227
Epochs: 382 | epoch avg. loss: 12.491 | test avg. loss: 88.414


  1%|          | 386/50000 [00:29<57:02, 14.50it/s]

Epochs: 383 | epoch avg. loss: 12.439 | test avg. loss: 89.263
Epochs: 384 | epoch avg. loss: 12.868 | test avg. loss: 89.484
Epochs: 385 | epoch avg. loss: 12.370 | test avg. loss: 87.866


  1%|          | 390/50000 [00:29<52:01, 15.89it/s]

Epochs: 386 | epoch avg. loss: 13.105 | test avg. loss: 87.840
Epochs: 387 | epoch avg. loss: 12.715 | test avg. loss: 89.297
Epochs: 388 | epoch avg. loss: 12.743 | test avg. loss: 89.146
Epochs: 389 | epoch avg. loss: 12.722 | test avg. loss: 87.642


  1%|          | 394/50000 [00:29<48:29, 17.05it/s]

Epochs: 390 | epoch avg. loss: 12.381 | test avg. loss: 87.830
Epochs: 391 | epoch avg. loss: 12.172 | test avg. loss: 88.597
Epochs: 392 | epoch avg. loss: 12.230 | test avg. loss: 88.222
Epochs: 393 | epoch avg. loss: 12.063 | test avg. loss: 87.887


  1%|          | 398/50000 [00:29<49:59, 16.54it/s]

Epochs: 394 | epoch avg. loss: 12.415 | test avg. loss: 88.006
Epochs: 395 | epoch avg. loss: 12.001 | test avg. loss: 89.252
Epochs: 396 | epoch avg. loss: 12.497 | test avg. loss: 88.417
Epochs: 397 | epoch avg. loss: 12.045 | test avg. loss: 87.704


  1%|          | 398/50000 [00:30<49:59, 16.54it/s]

Epochs: 398 | epoch avg. loss: 12.734 | test avg. loss: 88.091
Epochs: 399 | epoch avg. loss: 12.240 | test avg. loss: 89.907


  1%|          | 404/50000 [00:30<1:31:59,  8.99it/s]

Epochs: 400 | epoch avg. loss: 12.697 | test avg. loss: 87.824
Epochs: 401 | epoch avg. loss: 12.321 | test avg. loss: 87.460
Epochs: 402 | epoch avg. loss: 12.330 | test avg. loss: 88.344
Epochs: 403 | epoch avg. loss: 12.048 | test avg. loss: 88.000


  1%|          | 408/50000 [00:31<1:11:13, 11.61it/s]

Epochs: 404 | epoch avg. loss: 11.902 | test avg. loss: 87.667
Epochs: 405 | epoch avg. loss: 11.852 | test avg. loss: 87.420
Epochs: 406 | epoch avg. loss: 11.916 | test avg. loss: 87.983
Epochs: 407 | epoch avg. loss: 12.233 | test avg. loss: 88.471


  1%|          | 412/50000 [00:31<58:41, 14.08it/s]  

Epochs: 408 | epoch avg. loss: 11.939 | test avg. loss: 87.132
Epochs: 409 | epoch avg. loss: 11.994 | test avg. loss: 87.092
Epochs: 410 | epoch avg. loss: 12.114 | test avg. loss: 87.822
Epochs: 411 | epoch avg. loss: 11.825 | test avg. loss: 87.453


  1%|          | 416/50000 [00:31<53:50, 15.35it/s]

Epochs: 412 | epoch avg. loss: 11.871 | test avg. loss: 87.272
Epochs: 413 | epoch avg. loss: 11.686 | test avg. loss: 87.863
Epochs: 414 | epoch avg. loss: 11.775 | test avg. loss: 87.649
Epochs: 415 | epoch avg. loss: 11.787 | test avg. loss: 87.063


  1%|          | 418/50000 [00:31<53:13, 15.53it/s]

Epochs: 416 | epoch avg. loss: 12.201 | test avg. loss: 86.974
Epochs: 417 | epoch avg. loss: 11.750 | test avg. loss: 88.488
Epochs: 418 | epoch avg. loss: 12.343 | test avg. loss: 88.208


  1%|          | 422/50000 [00:32<59:47, 13.82it/s]

Epochs: 419 | epoch avg. loss: 11.502 | test avg. loss: 86.684
Epochs: 420 | epoch avg. loss: 12.303 | test avg. loss: 86.597
Epochs: 421 | epoch avg. loss: 12.110 | test avg. loss: 87.208


  1%|          | 424/50000 [00:32<1:04:14, 12.86it/s]

Epochs: 422 | epoch avg. loss: 11.688 | test avg. loss: 88.850
Epochs: 423 | epoch avg. loss: 12.143 | test avg. loss: 86.635
Epochs: 424 | epoch avg. loss: 12.973 | test avg. loss: 86.492


  1%|          | 426/50000 [00:32<1:04:42, 12.77it/s]

Epochs: 425 | epoch avg. loss: 11.806 | test avg. loss: 88.950
Epochs: 426 | epoch avg. loss: 12.447 | test avg. loss: 86.813
Epochs: 427 | epoch avg. loss: 11.576 | test avg. loss: 86.604


  1%|          | 430/50000 [00:32<1:10:51, 11.66it/s]

Epochs: 428 | epoch avg. loss: 12.575 | test avg. loss: 86.774
Epochs: 429 | epoch avg. loss: 11.947 | test avg. loss: 88.345
Epochs: 430 | epoch avg. loss: 12.050 | test avg. loss: 86.176


  1%|          | 434/50000 [00:33<1:04:53, 12.73it/s]

Epochs: 431 | epoch avg. loss: 12.002 | test avg. loss: 86.117
Epochs: 432 | epoch avg. loss: 11.826 | test avg. loss: 87.685
Epochs: 433 | epoch avg. loss: 11.801 | test avg. loss: 86.180


  1%|          | 436/50000 [00:33<1:04:48, 12.75it/s]

Epochs: 434 | epoch avg. loss: 11.959 | test avg. loss: 85.949
Epochs: 435 | epoch avg. loss: 11.685 | test avg. loss: 86.948
Epochs: 436 | epoch avg. loss: 11.714 | test avg. loss: 87.618


  1%|          | 440/50000 [00:33<1:05:46, 12.56it/s]

Epochs: 437 | epoch avg. loss: 11.558 | test avg. loss: 85.781
Epochs: 438 | epoch avg. loss: 11.836 | test avg. loss: 85.766
Epochs: 439 | epoch avg. loss: 11.308 | test avg. loss: 87.506


  1%|          | 442/50000 [00:33<1:04:04, 12.89it/s]

Epochs: 440 | epoch avg. loss: 11.837 | test avg. loss: 87.177
Epochs: 441 | epoch avg. loss: 11.977 | test avg. loss: 86.274
Epochs: 442 | epoch avg. loss: 11.322 | test avg. loss: 86.580


  1%|          | 446/50000 [00:34<1:07:10, 12.29it/s]

Epochs: 443 | epoch avg. loss: 11.386 | test avg. loss: 86.471
Epochs: 444 | epoch avg. loss: 10.975 | test avg. loss: 85.521
Epochs: 445 | epoch avg. loss: 11.532 | test avg. loss: 85.564


  1%|          | 448/50000 [00:34<1:11:06, 11.61it/s]

Epochs: 446 | epoch avg. loss: 11.424 | test avg. loss: 86.522
Epochs: 447 | epoch avg. loss: 11.088 | test avg. loss: 85.998
Epochs: 448 | epoch avg. loss: 10.915 | test avg. loss: 85.672


  1%|          | 452/50000 [00:34<1:08:51, 11.99it/s]

Epochs: 449 | epoch avg. loss: 11.143 | test avg. loss: 85.700
Epochs: 450 | epoch avg. loss: 11.206 | test avg. loss: 85.878
Epochs: 451 | epoch avg. loss: 11.413 | test avg. loss: 86.555


  1%|          | 454/50000 [00:34<1:07:28, 12.24it/s]

Epochs: 452 | epoch avg. loss: 10.887 | test avg. loss: 85.348
Epochs: 453 | epoch avg. loss: 11.148 | test avg. loss: 85.269
Epochs: 454 | epoch avg. loss: 11.355 | test avg. loss: 85.903


  1%|          | 458/50000 [00:35<1:10:51, 11.65it/s]

Epochs: 455 | epoch avg. loss: 10.931 | test avg. loss: 85.904
Epochs: 456 | epoch avg. loss: 10.744 | test avg. loss: 85.166
Epochs: 457 | epoch avg. loss: 11.064 | test avg. loss: 85.134


  1%|          | 460/50000 [00:35<1:11:03, 11.62it/s]

Epochs: 458 | epoch avg. loss: 10.819 | test avg. loss: 86.296
Epochs: 459 | epoch avg. loss: 11.215 | test avg. loss: 86.619
Epochs: 460 | epoch avg. loss: 10.923 | test avg. loss: 84.956


  1%|          | 464/50000 [00:35<1:05:01, 12.70it/s]

Epochs: 461 | epoch avg. loss: 11.363 | test avg. loss: 84.964
Epochs: 462 | epoch avg. loss: 11.530 | test avg. loss: 85.893
Epochs: 463 | epoch avg. loss: 10.906 | test avg. loss: 86.611


                                                     

Epochs: 464 | epoch avg. loss: 11.410 | test avg. loss: 85.024
Epochs: 465 | epoch avg. loss: 10.685 | test avg. loss: 84.824
Epochs: 466 | epoch avg. loss: 10.681 | test avg. loss: 85.259


  1%|          | 470/50000 [00:35<1:03:37, 12.98it/s]

Epochs: 467 | epoch avg. loss: 10.808 | test avg. loss: 85.623
Epochs: 468 | epoch avg. loss: 10.656 | test avg. loss: 86.128
Epochs: 469 | epoch avg. loss: 10.948 | test avg. loss: 85.302


  1%|          | 472/50000 [00:36<1:09:15, 11.92it/s]

Epochs: 470 | epoch avg. loss: 11.100 | test avg. loss: 85.384
Epochs: 471 | epoch avg. loss: 10.809 | test avg. loss: 86.129
Epochs: 472 | epoch avg. loss: 10.614 | test avg. loss: 84.671


  1%|          | 476/50000 [00:36<1:07:33, 12.22it/s]

Epochs: 473 | epoch avg. loss: 10.899 | test avg. loss: 84.606
Epochs: 474 | epoch avg. loss: 10.866 | test avg. loss: 85.611
Epochs: 475 | epoch avg. loss: 10.697 | test avg. loss: 85.882


  1%|          | 478/50000 [00:36<1:10:36, 11.69it/s]

Epochs: 476 | epoch avg. loss: 11.046 | test avg. loss: 84.788
Epochs: 477 | epoch avg. loss: 10.490 | test avg. loss: 85.195
Epochs: 478 | epoch avg. loss: 10.406 | test avg. loss: 85.327


  1%|          | 482/50000 [00:37<1:08:49, 11.99it/s]

Epochs: 479 | epoch avg. loss: 10.743 | test avg. loss: 84.690
Epochs: 480 | epoch avg. loss: 11.033 | test avg. loss: 84.293
Epochs: 481 | epoch avg. loss: 10.708 | test avg. loss: 85.590


  1%|          | 484/50000 [00:37<1:07:11, 12.28it/s]

Epochs: 482 | epoch avg. loss: 10.806 | test avg. loss: 85.338
Epochs: 483 | epoch avg. loss: 10.462 | test avg. loss: 84.200
Epochs: 484 | epoch avg. loss: 11.096 | test avg. loss: 84.685


  1%|          | 488/50000 [00:37<1:05:20, 12.63it/s]

Epochs: 485 | epoch avg. loss: 10.770 | test avg. loss: 86.688
Epochs: 486 | epoch avg. loss: 11.020 | test avg. loss: 84.215
Epochs: 487 | epoch avg. loss: 10.442 | test avg. loss: 84.147


  1%|          | 490/50000 [00:37<1:06:13, 12.46it/s]

Epochs: 488 | epoch avg. loss: 10.858 | test avg. loss: 84.756
Epochs: 489 | epoch avg. loss: 10.252 | test avg. loss: 84.582
Epochs: 490 | epoch avg. loss: 10.176 | test avg. loss: 84.229


  1%|          | 494/50000 [00:38<1:09:00, 11.96it/s]

Epochs: 491 | epoch avg. loss: 10.166 | test avg. loss: 84.161
Epochs: 492 | epoch avg. loss: 10.118 | test avg. loss: 84.503
Epochs: 493 | epoch avg. loss: 10.223 | test avg. loss: 84.642


                                                     

Epochs: 494 | epoch avg. loss: 10.292 | test avg. loss: 84.360
Epochs: 495 | epoch avg. loss: 10.002 | test avg. loss: 83.701


  1%|          | 498/50000 [00:38<1:15:20, 10.95it/s]

Epochs: 496 | epoch avg. loss: 10.436 | test avg. loss: 83.650
Epochs: 497 | epoch avg. loss: 10.104 | test avg. loss: 84.894
Epochs: 498 | epoch avg. loss: 10.422 | test avg. loss: 84.866


  1%|          | 498/50000 [00:38<1:15:20, 10.95it/s]

Epochs: 499 | epoch avg. loss: 9.983 | test avg. loss: 83.897


  1%|          | 504/50000 [00:39<1:42:41,  8.03it/s]

Epochs: 500 | epoch avg. loss: 11.057 | test avg. loss: 83.965
Epochs: 501 | epoch avg. loss: 9.725 | test avg. loss: 86.872
Epochs: 502 | epoch avg. loss: 11.578 | test avg. loss: 84.958
Epochs: 503 | epoch avg. loss: 10.850 | test avg. loss: 83.702


  1%|          | 508/50000 [00:39<1:14:33, 11.06it/s]

Epochs: 504 | epoch avg. loss: 10.676 | test avg. loss: 84.372
Epochs: 505 | epoch avg. loss: 10.047 | test avg. loss: 85.976
Epochs: 506 | epoch avg. loss: 10.762 | test avg. loss: 83.593
Epochs: 507 | epoch avg. loss: 11.030 | test avg. loss: 83.743


  1%|          | 510/50000 [00:40<1:07:32, 12.21it/s]

Epochs: 508 | epoch avg. loss: 10.794 | test avg. loss: 85.188
Epochs: 509 | epoch avg. loss: 10.615 | test avg. loss: 84.638
Epochs: 510 | epoch avg. loss: 9.842 | test avg. loss: 83.538
Epochs: 511 | epoch avg. loss: 10.456 | test avg. loss: 83.533


  1%|          | 516/50000 [00:40<54:58, 15.00it/s]

Epochs: 512 | epoch avg. loss: 9.736 | test avg. loss: 84.806
Epochs: 513 | epoch avg. loss: 10.175 | test avg. loss: 84.775
Epochs: 514 | epoch avg. loss: 10.777 | test avg. loss: 83.584
Epochs: 515 | epoch avg. loss: 9.925 | test avg. loss: 83.730


  1%|          | 520/50000 [00:40<50:11, 16.43it/s]

Epochs: 516 | epoch avg. loss: 10.177 | test avg. loss: 83.441
Epochs: 517 | epoch avg. loss: 9.670 | test avg. loss: 84.838
Epochs: 518 | epoch avg. loss: 10.496 | test avg. loss: 84.471
Epochs: 519 | epoch avg. loss: 9.799 | test avg. loss: 83.630


  1%|          | 524/50000 [00:40<48:00, 17.17it/s]

Epochs: 520 | epoch avg. loss: 10.614 | test avg. loss: 83.524
Epochs: 521 | epoch avg. loss: 9.634 | test avg. loss: 85.380
Epochs: 522 | epoch avg. loss: 10.558 | test avg. loss: 83.549
Epochs: 523 | epoch avg. loss: 9.532 | test avg. loss: 82.853


  1%|          | 526/50000 [00:40<49:34, 16.64it/s]

Epochs: 524 | epoch avg. loss: 9.950 | test avg. loss: 82.951
Epochs: 525 | epoch avg. loss: 9.493 | test avg. loss: 84.342
Epochs: 526 | epoch avg. loss: 10.024 | test avg. loss: 83.759


  1%|          | 530/50000 [00:41<55:24, 14.88it/s]

Epochs: 527 | epoch avg. loss: 9.752 | test avg. loss: 82.986
Epochs: 528 | epoch avg. loss: 9.769 | test avg. loss: 83.256
Epochs: 529 | epoch avg. loss: 9.515 | test avg. loss: 83.773


  1%|          | 534/50000 [00:41<54:03, 15.25it/s]

Epochs: 530 | epoch avg. loss: 9.662 | test avg. loss: 83.576
Epochs: 531 | epoch avg. loss: 9.469 | test avg. loss: 82.675
Epochs: 532 | epoch avg. loss: 9.688 | test avg. loss: 82.720
Epochs: 533 | epoch avg. loss: 9.386 | test avg. loss: 83.897


  1%|          | 538/50000 [00:41<51:43, 15.94it/s]

Epochs: 534 | epoch avg. loss: 9.751 | test avg. loss: 83.489
Epochs: 535 | epoch avg. loss: 9.689 | test avg. loss: 82.715
Epochs: 536 | epoch avg. loss: 9.546 | test avg. loss: 83.314
Epochs: 537 | epoch avg. loss: 9.357 | test avg. loss: 84.294


  1%|          | 542/50000 [00:41<51:52, 15.89it/s]

Epochs: 538 | epoch avg. loss: 9.942 | test avg. loss: 83.346
Epochs: 539 | epoch avg. loss: 9.524 | test avg. loss: 82.895
Epochs: 540 | epoch avg. loss: 9.382 | test avg. loss: 83.150
Epochs: 541 | epoch avg. loss: 9.343 | test avg. loss: 83.392


  1%|          | 546/50000 [00:42<53:01, 15.54it/s]

Epochs: 542 | epoch avg. loss: 9.439 | test avg. loss: 82.619
Epochs: 543 | epoch avg. loss: 9.464 | test avg. loss: 82.323
Epochs: 544 | epoch avg. loss: 9.240 | test avg. loss: 83.094
Epochs: 545 | epoch avg. loss: 9.429 | test avg. loss: 82.757


  1%|          | 550/50000 [00:42<49:59, 16.49it/s]

Epochs: 546 | epoch avg. loss: 9.262 | test avg. loss: 82.025
Epochs: 547 | epoch avg. loss: 9.345 | test avg. loss: 82.122
Epochs: 548 | epoch avg. loss: 9.219 | test avg. loss: 82.934
Epochs: 549 | epoch avg. loss: 9.409 | test avg. loss: 82.894


  1%|          | 554/50000 [00:42<47:24, 17.38it/s]

Epochs: 550 | epoch avg. loss: 9.171 | test avg. loss: 82.166
Epochs: 551 | epoch avg. loss: 9.428 | test avg. loss: 82.240
Epochs: 552 | epoch avg. loss: 9.287 | test avg. loss: 82.799
Epochs: 553 | epoch avg. loss: 9.193 | test avg. loss: 82.277


  1%|          | 558/50000 [00:42<45:16, 18.20it/s]

Epochs: 554 | epoch avg. loss: 9.138 | test avg. loss: 82.027
Epochs: 555 | epoch avg. loss: 9.141 | test avg. loss: 82.464
Epochs: 556 | epoch avg. loss: 9.220 | test avg. loss: 82.586
Epochs: 557 | epoch avg. loss: 9.092 | test avg. loss: 81.935


  1%|          | 560/50000 [00:42<49:23, 16.69it/s]

Epochs: 558 | epoch avg. loss: 9.186 | test avg. loss: 82.214
Epochs: 559 | epoch avg. loss: 9.065 | test avg. loss: 82.625
Epochs: 560 | epoch avg. loss: 9.133 | test avg. loss: 82.201


  1%|          | 564/50000 [00:43<55:29, 14.85it/s]

Epochs: 561 | epoch avg. loss: 9.060 | test avg. loss: 82.161
Epochs: 562 | epoch avg. loss: 9.498 | test avg. loss: 82.212
Epochs: 563 | epoch avg. loss: 9.369 | test avg. loss: 83.056


  1%|          | 568/50000 [00:43<51:33, 15.98it/s]

Epochs: 564 | epoch avg. loss: 8.997 | test avg. loss: 81.992
Epochs: 565 | epoch avg. loss: 9.582 | test avg. loss: 82.071
Epochs: 566 | epoch avg. loss: 8.988 | test avg. loss: 83.760
Epochs: 567 | epoch avg. loss: 9.521 | test avg. loss: 83.584


  1%|          | 572/50000 [00:43<47:55, 17.19it/s]

Epochs: 568 | epoch avg. loss: 9.080 | test avg. loss: 82.056
Epochs: 569 | epoch avg. loss: 9.389 | test avg. loss: 82.082
Epochs: 570 | epoch avg. loss: 9.494 | test avg. loss: 82.285
Epochs: 571 | epoch avg. loss: 9.615 | test avg. loss: 82.864


  1%|          | 576/50000 [00:43<51:12, 16.08it/s]

Epochs: 572 | epoch avg. loss: 9.389 | test avg. loss: 81.847
Epochs: 573 | epoch avg. loss: 9.040 | test avg. loss: 82.385
Epochs: 574 | epoch avg. loss: 9.245 | test avg. loss: 82.561
Epochs: 575 | epoch avg. loss: 8.737 | test avg. loss: 81.903


  1%|          | 580/50000 [00:44<51:39, 15.95it/s]

Epochs: 576 | epoch avg. loss: 10.588 | test avg. loss: 81.718
Epochs: 577 | epoch avg. loss: 8.410 | test avg. loss: 85.521
Epochs: 578 | epoch avg. loss: 10.739 | test avg. loss: 82.092
Epochs: 579 | epoch avg. loss: 9.054 | test avg. loss: 81.792


  1%|          | 584/50000 [00:44<49:02, 16.79it/s]

Epochs: 580 | epoch avg. loss: 9.462 | test avg. loss: 82.379
Epochs: 581 | epoch avg. loss: 8.826 | test avg. loss: 82.993
Epochs: 582 | epoch avg. loss: 8.895 | test avg. loss: 81.656
Epochs: 583 | epoch avg. loss: 8.840 | test avg. loss: 81.645


  1%|          | 588/50000 [00:44<47:27, 17.36it/s]

Epochs: 584 | epoch avg. loss: 8.935 | test avg. loss: 82.032
Epochs: 585 | epoch avg. loss: 9.066 | test avg. loss: 82.199
Epochs: 586 | epoch avg. loss: 8.704 | test avg. loss: 81.222
Epochs: 587 | epoch avg. loss: 8.887 | test avg. loss: 81.367


  1%|          | 592/50000 [00:44<48:40, 16.92it/s]

Epochs: 588 | epoch avg. loss: 8.582 | test avg. loss: 82.082
Epochs: 589 | epoch avg. loss: 8.773 | test avg. loss: 81.382
Epochs: 590 | epoch avg. loss: 8.575 | test avg. loss: 81.326
Epochs: 591 | epoch avg. loss: 8.561 | test avg. loss: 81.651


  1%|          | 594/50000 [00:45<53:48, 15.30it/s]

Epochs: 592 | epoch avg. loss: 8.646 | test avg. loss: 81.523
Epochs: 593 | epoch avg. loss: 8.473 | test avg. loss: 81.803
Epochs: 594 | epoch avg. loss: 8.626 | test avg. loss: 81.457


  1%|          | 598/50000 [00:45<54:59, 14.97it/s]

Epochs: 595 | epoch avg. loss: 8.483 | test avg. loss: 81.182
Epochs: 596 | epoch avg. loss: 8.599 | test avg. loss: 81.569
Epochs: 597 | epoch avg. loss: 8.449 | test avg. loss: 81.730
Epochs: 598 | epoch avg. loss: 8.679 | test avg. loss: 81.067


  1%|          | 598/50000 [00:45<54:59, 14.97it/s]

Epochs: 599 | epoch avg. loss: 8.396 | test avg. loss: 80.857


  1%|          | 604/50000 [00:46<1:31:07,  9.03it/s]

Epochs: 600 | epoch avg. loss: 9.541 | test avg. loss: 81.057
Epochs: 601 | epoch avg. loss: 8.460 | test avg. loss: 81.098
Epochs: 602 | epoch avg. loss: 8.452 | test avg. loss: 81.434
Epochs: 603 | epoch avg. loss: 8.630 | test avg. loss: 81.391


  1%|          | 608/50000 [00:46<1:07:59, 12.11it/s]

Epochs: 604 | epoch avg. loss: 8.543 | test avg. loss: 81.552
Epochs: 605 | epoch avg. loss: 8.374 | test avg. loss: 81.162
Epochs: 606 | epoch avg. loss: 8.490 | test avg. loss: 81.277
Epochs: 607 | epoch avg. loss: 8.293 | test avg. loss: 81.459


  1%|          | 612/50000 [00:46<56:58, 14.45it/s]  

Epochs: 608 | epoch avg. loss: 8.383 | test avg. loss: 80.812
Epochs: 609 | epoch avg. loss: 8.404 | test avg. loss: 80.628
Epochs: 610 | epoch avg. loss: 8.240 | test avg. loss: 81.407
Epochs: 611 | epoch avg. loss: 8.898 | test avg. loss: 80.849


  1%|          | 614/50000 [00:47<58:02, 14.18it/s]

Epochs: 612 | epoch avg. loss: 8.033 | test avg. loss: 80.686
Epochs: 613 | epoch avg. loss: 9.142 | test avg. loss: 80.750
Epochs: 614 | epoch avg. loss: 8.172 | test avg. loss: 82.389


  1%|          | 618/50000 [00:47<58:46, 14.00it/s]

Epochs: 615 | epoch avg. loss: 9.180 | test avg. loss: 80.489
Epochs: 616 | epoch avg. loss: 8.295 | test avg. loss: 80.358
Epochs: 617 | epoch avg. loss: 8.562 | test avg. loss: 80.844
Epochs: 618 | epoch avg. loss: 8.353 | test avg. loss: 80.369


  1%|          | 622/50000 [00:47<53:13, 15.46it/s]

Epochs: 619 | epoch avg. loss: 8.276 | test avg. loss: 80.558
Epochs: 620 | epoch avg. loss: 8.482 | test avg. loss: 80.566
Epochs: 621 | epoch avg. loss: 8.210 | test avg. loss: 80.894
Epochs: 622 | epoch avg. loss: 8.360 | test avg. loss: 80.057


  1%|▏         | 626/50000 [00:47<50:39, 16.24it/s]

Epochs: 623 | epoch avg. loss: 8.100 | test avg. loss: 80.058
Epochs: 624 | epoch avg. loss: 8.247 | test avg. loss: 80.193
Epochs: 625 | epoch avg. loss: 8.282 | test avg. loss: 80.275
Epochs: 626 | epoch avg. loss: 8.219 | test avg. loss: 79.745


  1%|▏         | 630/50000 [00:47<53:33, 15.37it/s]

Epochs: 627 | epoch avg. loss: 8.195 | test avg. loss: 80.183
Epochs: 628 | epoch avg. loss: 8.145 | test avg. loss: 80.559
Epochs: 629 | epoch avg. loss: 8.130 | test avg. loss: 80.392


  1%|▏         | 632/50000 [00:48<57:36, 14.28it/s]

Epochs: 630 | epoch avg. loss: 8.040 | test avg. loss: 79.732
Epochs: 631 | epoch avg. loss: 8.362 | test avg. loss: 79.618
Epochs: 632 | epoch avg. loss: 8.013 | test avg. loss: 80.807


  1%|▏         | 636/50000 [00:48<52:57, 15.53it/s]

Epochs: 633 | epoch avg. loss: 8.371 | test avg. loss: 79.225
Epochs: 634 | epoch avg. loss: 7.974 | test avg. loss: 79.063
Epochs: 635 | epoch avg. loss: 8.428 | test avg. loss: 79.344
Epochs: 636 | epoch avg. loss: 7.789 | test avg. loss: 80.983


  1%|▏         | 640/50000 [00:48<51:07, 16.09it/s]

Epochs: 637 | epoch avg. loss: 8.698 | test avg. loss: 80.075
Epochs: 638 | epoch avg. loss: 8.303 | test avg. loss: 79.358
Epochs: 639 | epoch avg. loss: 7.869 | test avg. loss: 79.506
Epochs: 640 | epoch avg. loss: 7.794 | test avg. loss: 79.142


  1%|▏         | 642/50000 [00:48<57:16, 14.36it/s]

Epochs: 641 | epoch avg. loss: 8.003 | test avg. loss: 79.206
Epochs: 642 | epoch avg. loss: 7.674 | test avg. loss: 80.323


  1%|▏         | 646/50000 [00:49<1:04:06, 12.83it/s]

Epochs: 643 | epoch avg. loss: 8.224 | test avg. loss: 79.123
Epochs: 644 | epoch avg. loss: 7.617 | test avg. loss: 78.945
Epochs: 645 | epoch avg. loss: 8.534 | test avg. loss: 78.994


  1%|▏         | 648/50000 [00:49<1:06:45, 12.32it/s]

Epochs: 646 | epoch avg. loss: 8.024 | test avg. loss: 79.902
Epochs: 647 | epoch avg. loss: 7.811 | test avg. loss: 78.866
Epochs: 648 | epoch avg. loss: 8.166 | test avg. loss: 78.858


  1%|▏         | 652/50000 [00:49<1:03:19, 12.99it/s]

Epochs: 649 | epoch avg. loss: 8.567 | test avg. loss: 79.710
Epochs: 650 | epoch avg. loss: 7.857 | test avg. loss: 79.035
Epochs: 651 | epoch avg. loss: 8.029 | test avg. loss: 79.229


  1%|▏         | 654/50000 [00:49<1:03:10, 13.02it/s]

Epochs: 652 | epoch avg. loss: 7.988 | test avg. loss: 79.588
Epochs: 653 | epoch avg. loss: 7.715 | test avg. loss: 78.587
Epochs: 654 | epoch avg. loss: 7.862 | test avg. loss: 78.765


  1%|▏         | 658/50000 [00:50<1:03:21, 12.98it/s]

Epochs: 655 | epoch avg. loss: 7.479 | test avg. loss: 79.694
Epochs: 656 | epoch avg. loss: 7.917 | test avg. loss: 78.655
Epochs: 657 | epoch avg. loss: 8.610 | test avg. loss: 78.503


  1%|▏         | 660/50000 [00:50<1:03:50, 12.88it/s]

Epochs: 658 | epoch avg. loss: 9.256 | test avg. loss: 79.252
Epochs: 659 | epoch avg. loss: 8.400 | test avg. loss: 78.334
Epochs: 660 | epoch avg. loss: 7.843 | test avg. loss: 79.246


  1%|▏         | 664/50000 [00:50<1:04:07, 12.82it/s]

Epochs: 661 | epoch avg. loss: 8.496 | test avg. loss: 78.338
Epochs: 662 | epoch avg. loss: 7.345 | test avg. loss: 78.597
Epochs: 663 | epoch avg. loss: 9.161 | test avg. loss: 78.756


  1%|▏         | 666/50000 [00:50<1:03:58, 12.85it/s]

Epochs: 664 | epoch avg. loss: 7.580 | test avg. loss: 79.002
Epochs: 665 | epoch avg. loss: 7.481 | test avg. loss: 78.194
Epochs: 666 | epoch avg. loss: 7.717 | test avg. loss: 78.359


  1%|▏         | 670/50000 [00:51<1:05:42, 12.51it/s]

Epochs: 667 | epoch avg. loss: 7.710 | test avg. loss: 78.999
Epochs: 668 | epoch avg. loss: 7.599 | test avg. loss: 78.135
Epochs: 669 | epoch avg. loss: 8.034 | test avg. loss: 78.094


  1%|▏         | 672/50000 [00:51<1:08:27, 12.01it/s]

Epochs: 670 | epoch avg. loss: 7.720 | test avg. loss: 79.684
Epochs: 671 | epoch avg. loss: 8.192 | test avg. loss: 78.327
Epochs: 672 | epoch avg. loss: 7.600 | test avg. loss: 77.767


  1%|▏         | 676/50000 [00:51<1:09:38, 11.81it/s]

Epochs: 673 | epoch avg. loss: 7.701 | test avg. loss: 78.427
Epochs: 674 | epoch avg. loss: 7.473 | test avg. loss: 78.229
Epochs: 675 | epoch avg. loss: 7.237 | test avg. loss: 77.498


  1%|▏         | 678/50000 [00:51<1:09:21, 11.85it/s]

Epochs: 676 | epoch avg. loss: 7.515 | test avg. loss: 77.613
Epochs: 677 | epoch avg. loss: 7.457 | test avg. loss: 78.403
Epochs: 678 | epoch avg. loss: 7.221 | test avg. loss: 77.485


  1%|▏         | 682/50000 [00:52<1:07:45, 12.13it/s]

Epochs: 679 | epoch avg. loss: 7.540 | test avg. loss: 77.520
Epochs: 680 | epoch avg. loss: 7.044 | test avg. loss: 78.808
Epochs: 681 | epoch avg. loss: 7.554 | test avg. loss: 77.301


  1%|▏         | 684/50000 [00:52<1:03:34, 12.93it/s]

Epochs: 682 | epoch avg. loss: 7.014 | test avg. loss: 77.164
Epochs: 683 | epoch avg. loss: 7.378 | test avg. loss: 77.488
Epochs: 684 | epoch avg. loss: 6.981 | test avg. loss: 77.826
Epochs: 685 | epoch avg. loss: 7.014 | test avg. loss: 77.202


  1%|▏         | 688/50000 [00:52<1:01:11, 13.43it/s]

Epochs: 686 | epoch avg. loss: 7.071 | test avg. loss: 77.167
Epochs: 687 | epoch avg. loss: 7.204 | test avg. loss: 77.428
Epochs: 688 | epoch avg. loss: 6.870 | test avg. loss: 76.751


  1%|▏         | 692/50000 [00:52<57:51, 14.21it/s]

Epochs: 689 | epoch avg. loss: 7.207 | test avg. loss: 76.993
Epochs: 690 | epoch avg. loss: 7.652 | test avg. loss: 77.530
Epochs: 691 | epoch avg. loss: 7.206 | test avg. loss: 76.442
Epochs: 692 | epoch avg. loss: 7.419 | test avg. loss: 76.655


  1%|▏         | 696/50000 [00:53<58:18, 14.09it/s]

Epochs: 693 | epoch avg. loss: 7.190 | test avg. loss: 76.939
Epochs: 694 | epoch avg. loss: 6.716 | test avg. loss: 76.480
Epochs: 695 | epoch avg. loss: 7.274 | test avg. loss: 76.745


  1%|▏         | 698/50000 [00:53<1:00:46, 13.52it/s]

Epochs: 696 | epoch avg. loss: 6.870 | test avg. loss: 77.840
Epochs: 697 | epoch avg. loss: 7.080 | test avg. loss: 76.494
Epochs: 698 | epoch avg. loss: 7.694 | test avg. loss: 76.415


  1%|▏         | 698/50000 [00:53<1:00:46, 13.52it/s]

Epochs: 699 | epoch avg. loss: 7.089 | test avg. loss: 78.036


  1%|▏         | 702/50000 [00:54<2:26:13,  5.62it/s]

Epochs: 700 | epoch avg. loss: 7.250 | test avg. loss: 76.033
Epochs: 701 | epoch avg. loss: 7.557 | test avg. loss: 75.883
Epochs: 702 | epoch avg. loss: 6.781 | test avg. loss: 77.941


  1%|▏         | 706/50000 [00:54<1:44:16,  7.88it/s]

Epochs: 703 | epoch avg. loss: 7.573 | test avg. loss: 76.320
Epochs: 704 | epoch avg. loss: 6.541 | test avg. loss: 76.303
Epochs: 705 | epoch avg. loss: 7.058 | test avg. loss: 76.295


  1%|▏         | 708/50000 [00:55<1:35:34,  8.60it/s]

Epochs: 706 | epoch avg. loss: 6.530 | test avg. loss: 76.884
Epochs: 707 | epoch avg. loss: 6.875 | test avg. loss: 75.447
Epochs: 708 | epoch avg. loss: 6.942 | test avg. loss: 75.337


  1%|▏         | 712/50000 [00:55<1:19:13, 10.37it/s]

Epochs: 709 | epoch avg. loss: 6.557 | test avg. loss: 76.812
Epochs: 710 | epoch avg. loss: 7.084 | test avg. loss: 75.700
Epochs: 711 | epoch avg. loss: 6.516 | test avg. loss: 75.755


  1%|▏         | 714/50000 [00:55<1:14:01, 11.10it/s]

Epochs: 712 | epoch avg. loss: 7.087 | test avg. loss: 75.733
Epochs: 713 | epoch avg. loss: 6.966 | test avg. loss: 76.548
Epochs: 714 | epoch avg. loss: 6.700 | test avg. loss: 75.286


  1%|▏         | 718/50000 [00:55<1:06:57, 12.27it/s]

Epochs: 715 | epoch avg. loss: 6.879 | test avg. loss: 75.255
Epochs: 716 | epoch avg. loss: 6.307 | test avg. loss: 76.212
Epochs: 717 | epoch avg. loss: 6.843 | test avg. loss: 75.186


  1%|▏         | 720/50000 [00:55<1:07:21, 12.19it/s]

Epochs: 718 | epoch avg. loss: 6.487 | test avg. loss: 75.198
Epochs: 719 | epoch avg. loss: 7.120 | test avg. loss: 75.464
Epochs: 720 | epoch avg. loss: 6.449 | test avg. loss: 74.954


  1%|▏         | 724/50000 [00:56<1:10:32, 11.64it/s]

Epochs: 721 | epoch avg. loss: 6.481 | test avg. loss: 74.725
Epochs: 722 | epoch avg. loss: 6.304 | test avg. loss: 75.725
Epochs: 723 | epoch avg. loss: 6.777 | test avg. loss: 75.165


  1%|▏         | 728/50000 [00:56<1:03:21, 12.96it/s]

Epochs: 724 | epoch avg. loss: 6.230 | test avg. loss: 74.892
Epochs: 725 | epoch avg. loss: 6.756 | test avg. loss: 75.114
Epochs: 726 | epoch avg. loss: 6.567 | test avg. loss: 75.413
Epochs: 727 | epoch avg. loss: 6.487 | test avg. loss: 74.590


  1%|▏         | 732/50000 [00:56<56:35, 14.51it/s]

Epochs: 728 | epoch avg. loss: 6.433 | test avg. loss: 75.077
Epochs: 729 | epoch avg. loss: 6.253 | test avg. loss: 75.461
Epochs: 730 | epoch avg. loss: 6.266 | test avg. loss: 74.799
Epochs: 731 | epoch avg. loss: 6.543 | test avg. loss: 74.849


  1%|▏         | 736/50000 [00:57<54:13, 15.14it/s]

Epochs: 732 | epoch avg. loss: 6.277 | test avg. loss: 75.692
Epochs: 733 | epoch avg. loss: 6.821 | test avg. loss: 74.461
Epochs: 734 | epoch avg. loss: 6.156 | test avg. loss: 74.551
Epochs: 735 | epoch avg. loss: 6.257 | test avg. loss: 74.361


  1%|▏         | 740/50000 [00:57<51:04, 16.07it/s]

Epochs: 736 | epoch avg. loss: 6.103 | test avg. loss: 74.245
Epochs: 737 | epoch avg. loss: 6.382 | test avg. loss: 74.382
Epochs: 738 | epoch avg. loss: 6.106 | test avg. loss: 74.704
Epochs: 739 | epoch avg. loss: 6.108 | test avg. loss: 74.341


  1%|▏         | 744/50000 [00:57<48:48, 16.82it/s]

Epochs: 740 | epoch avg. loss: 6.111 | test avg. loss: 74.240
Epochs: 741 | epoch avg. loss: 6.467 | test avg. loss: 74.274
Epochs: 742 | epoch avg. loss: 6.049 | test avg. loss: 74.010
Epochs: 743 | epoch avg. loss: 6.144 | test avg. loss: 74.360


  1%|▏         | 748/50000 [00:57<48:31, 16.92it/s]

Epochs: 744 | epoch avg. loss: 6.141 | test avg. loss: 73.994
Epochs: 745 | epoch avg. loss: 6.005 | test avg. loss: 73.711
Epochs: 746 | epoch avg. loss: 6.082 | test avg. loss: 74.204
Epochs: 747 | epoch avg. loss: 6.224 | test avg. loss: 74.011


  2%|▏         | 750/50000 [00:57<52:11, 15.73it/s]

Epochs: 748 | epoch avg. loss: 5.895 | test avg. loss: 73.788
Epochs: 749 | epoch avg. loss: 6.939 | test avg. loss: 73.729
Epochs: 750 | epoch avg. loss: 5.990 | test avg. loss: 75.136


  2%|▏         | 754/50000 [00:58<57:11, 14.35it/s]

Epochs: 751 | epoch avg. loss: 6.882 | test avg. loss: 73.577
Epochs: 752 | epoch avg. loss: 6.043 | test avg. loss: 73.713
Epochs: 753 | epoch avg. loss: 6.267 | test avg. loss: 73.717


  2%|▏         | 758/50000 [00:58<56:29, 14.53it/s]

Epochs: 754 | epoch avg. loss: 5.991 | test avg. loss: 73.704
Epochs: 755 | epoch avg. loss: 6.059 | test avg. loss: 74.376
Epochs: 756 | epoch avg. loss: 6.120 | test avg. loss: 74.567
Epochs: 757 | epoch avg. loss: 6.158 | test avg. loss: 73.550


  2%|▏         | 762/50000 [00:58<52:26, 15.65it/s]

Epochs: 758 | epoch avg. loss: 5.966 | test avg. loss: 73.491
Epochs: 759 | epoch avg. loss: 5.890 | test avg. loss: 74.101
Epochs: 760 | epoch avg. loss: 6.077 | test avg. loss: 73.745
Epochs: 761 | epoch avg. loss: 5.911 | test avg. loss: 73.385


  2%|▏         | 764/50000 [00:58<53:03, 15.47it/s]

Epochs: 762 | epoch avg. loss: 6.046 | test avg. loss: 73.192
Epochs: 763 | epoch avg. loss: 5.897 | test avg. loss: 73.069
Epochs: 764 | epoch avg. loss: 5.850 | test avg. loss: 73.636


  2%|▏         | 768/50000 [00:59<54:54, 14.94it/s]

Epochs: 765 | epoch avg. loss: 6.008 | test avg. loss: 73.468
Epochs: 766 | epoch avg. loss: 5.861 | test avg. loss: 73.058
Epochs: 767 | epoch avg. loss: 5.858 | test avg. loss: 72.957
Epochs: 768 | epoch avg. loss: 5.801 | test avg. loss: 73.492


  2%|▏         | 772/50000 [00:59<51:26, 15.95it/s]

Epochs: 769 | epoch avg. loss: 5.941 | test avg. loss: 73.196
Epochs: 770 | epoch avg. loss: 5.774 | test avg. loss: 73.365
Epochs: 771 | epoch avg. loss: 6.082 | test avg. loss: 73.209
Epochs: 772 | epoch avg. loss: 5.838 | test avg. loss: 73.132


  2%|▏         | 776/50000 [00:59<49:24, 16.60it/s]

Epochs: 773 | epoch avg. loss: 5.885 | test avg. loss: 73.914
Epochs: 774 | epoch avg. loss: 6.241 | test avg. loss: 73.185
Epochs: 775 | epoch avg. loss: 6.005 | test avg. loss: 72.818
Epochs: 776 | epoch avg. loss: 5.822 | test avg. loss: 73.948


  2%|▏         | 780/50000 [00:59<48:19, 16.98it/s]

Epochs: 777 | epoch avg. loss: 6.262 | test avg. loss: 73.120
Epochs: 778 | epoch avg. loss: 5.547 | test avg. loss: 73.020
Epochs: 779 | epoch avg. loss: 6.748 | test avg. loss: 72.960
Epochs: 780 | epoch avg. loss: 6.267 | test avg. loss: 74.328


  2%|▏         | 784/50000 [01:00<54:22, 15.09it/s]

Epochs: 781 | epoch avg. loss: 6.142 | test avg. loss: 72.914
Epochs: 782 | epoch avg. loss: 6.806 | test avg. loss: 72.532
Epochs: 783 | epoch avg. loss: 5.714 | test avg. loss: 73.596


  2%|▏         | 788/50000 [01:00<53:46, 15.25it/s]

Epochs: 784 | epoch avg. loss: 5.961 | test avg. loss: 72.821
Epochs: 785 | epoch avg. loss: 5.900 | test avg. loss: 73.142
Epochs: 786 | epoch avg. loss: 5.976 | test avg. loss: 73.575
Epochs: 787 | epoch avg. loss: 5.552 | test avg. loss: 73.104


  2%|▏         | 792/50000 [01:00<51:08, 16.04it/s]

Epochs: 788 | epoch avg. loss: 6.413 | test avg. loss: 72.870
Epochs: 789 | epoch avg. loss: 5.955 | test avg. loss: 74.039
Epochs: 790 | epoch avg. loss: 6.082 | test avg. loss: 72.419
Epochs: 791 | epoch avg. loss: 6.187 | test avg. loss: 72.436


  2%|▏         | 796/50000 [01:00<51:14, 16.00it/s]

Epochs: 792 | epoch avg. loss: 6.113 | test avg. loss: 73.171
Epochs: 793 | epoch avg. loss: 5.638 | test avg. loss: 72.779
Epochs: 794 | epoch avg. loss: 5.906 | test avg. loss: 72.858
Epochs: 795 | epoch avg. loss: 5.590 | test avg. loss: 72.945


  2%|▏         | 798/50000 [01:01<56:19, 14.56it/s]

Epochs: 796 | epoch avg. loss: 5.670 | test avg. loss: 72.481
Epochs: 797 | epoch avg. loss: 5.480 | test avg. loss: 72.009
Epochs: 798 | epoch avg. loss: 5.733 | test avg. loss: 72.145


  2%|▏         | 798/50000 [01:01<56:19, 14.56it/s]

Epochs: 799 | epoch avg. loss: 5.608 | test avg. loss: 72.193


  2%|▏         | 804/50000 [01:02<1:38:56,  8.29it/s]

Epochs: 800 | epoch avg. loss: 5.470 | test avg. loss: 71.827
Epochs: 801 | epoch avg. loss: 5.738 | test avg. loss: 71.975
Epochs: 802 | epoch avg. loss: 5.557 | test avg. loss: 72.443
Epochs: 803 | epoch avg. loss: 5.624 | test avg. loss: 71.705


  2%|▏         | 808/50000 [01:02<1:14:04, 11.07it/s]

Epochs: 804 | epoch avg. loss: 5.713 | test avg. loss: 71.553
Epochs: 805 | epoch avg. loss: 5.602 | test avg. loss: 71.826
Epochs: 806 | epoch avg. loss: 5.578 | test avg. loss: 72.043
Epochs: 807 | epoch avg. loss: 5.614 | test avg. loss: 71.368


  2%|▏         | 812/50000 [01:02<1:01:51, 13.25it/s]

Epochs: 808 | epoch avg. loss: 5.558 | test avg. loss: 71.611
Epochs: 809 | epoch avg. loss: 5.378 | test avg. loss: 71.783
Epochs: 810 | epoch avg. loss: 5.577 | test avg. loss: 71.627
Epochs: 811 | epoch avg. loss: 5.379 | test avg. loss: 71.553


  2%|▏         | 816/50000 [01:02<57:25, 14.27it/s]

Epochs: 812 | epoch avg. loss: 5.344 | test avg. loss: 71.163
Epochs: 813 | epoch avg. loss: 5.735 | test avg. loss: 71.532
Epochs: 814 | epoch avg. loss: 5.606 | test avg. loss: 72.109
Epochs: 815 | epoch avg. loss: 5.483 | test avg. loss: 71.338


  2%|▏         | 818/50000 [01:03<1:00:20, 13.58it/s]

Epochs: 816 | epoch avg. loss: 6.196 | test avg. loss: 71.517
Epochs: 817 | epoch avg. loss: 5.888 | test avg. loss: 72.240
Epochs: 818 | epoch avg. loss: 5.352 | test avg. loss: 71.751


  2%|▏         | 822/50000 [01:03<1:00:36, 13.52it/s]

Epochs: 819 | epoch avg. loss: 6.524 | test avg. loss: 71.344
Epochs: 820 | epoch avg. loss: 6.117 | test avg. loss: 71.439
Epochs: 821 | epoch avg. loss: 5.957 | test avg. loss: 71.053


  2%|▏         | 826/50000 [01:03<55:27, 14.78it/s]

Epochs: 822 | epoch avg. loss: 5.579 | test avg. loss: 72.424
Epochs: 823 | epoch avg. loss: 5.943 | test avg. loss: 71.083
Epochs: 824 | epoch avg. loss: 5.806 | test avg. loss: 70.972
Epochs: 825 | epoch avg. loss: 5.887 | test avg. loss: 72.021


  2%|▏         | 830/50000 [01:03<54:51, 14.94it/s]

Epochs: 826 | epoch avg. loss: 5.563 | test avg. loss: 70.744
Epochs: 827 | epoch avg. loss: 5.793 | test avg. loss: 71.120
Epochs: 828 | epoch avg. loss: 6.276 | test avg. loss: 71.411
Epochs: 829 | epoch avg. loss: 6.245 | test avg. loss: 70.849


  2%|▏         | 834/50000 [01:04<55:31, 14.76it/s]

Epochs: 830 | epoch avg. loss: 6.017 | test avg. loss: 71.776
Epochs: 831 | epoch avg. loss: 5.523 | test avg. loss: 70.773
Epochs: 832 | epoch avg. loss: 5.318 | test avg. loss: 70.920
Epochs: 833 | epoch avg. loss: 5.598 | test avg. loss: 70.990


  2%|▏         | 838/50000 [01:04<52:40, 15.56it/s]

Epochs: 834 | epoch avg. loss: 5.553 | test avg. loss: 70.724
Epochs: 835 | epoch avg. loss: 5.385 | test avg. loss: 71.339
Epochs: 836 | epoch avg. loss: 5.263 | test avg. loss: 70.450
Epochs: 837 | epoch avg. loss: 5.494 | test avg. loss: 70.443


  2%|▏         | 842/50000 [01:04<50:07, 16.35it/s]

Epochs: 838 | epoch avg. loss: 5.047 | test avg. loss: 71.638
Epochs: 839 | epoch avg. loss: 5.514 | test avg. loss: 70.447
Epochs: 840 | epoch avg. loss: 5.378 | test avg. loss: 70.731
Epochs: 841 | epoch avg. loss: 5.406 | test avg. loss: 71.489


  2%|▏         | 844/50000 [01:04<49:36, 16.51it/s]

Epochs: 842 | epoch avg. loss: 5.186 | test avg. loss: 70.908
Epochs: 843 | epoch avg. loss: 5.388 | test avg. loss: 70.903
Epochs: 844 | epoch avg. loss: 5.027 | test avg. loss: 71.073
Epochs: 845 | epoch avg. loss: 5.105 | test avg. loss: 70.439


  2%|▏         | 848/50000 [01:05<54:39, 14.99it/s]

Epochs: 846 | epoch avg. loss: 5.408 | test avg. loss: 70.498
Epochs: 847 | epoch avg. loss: 4.965 | test avg. loss: 71.866
Epochs: 848 | epoch avg. loss: 5.507 | test avg. loss: 70.809


  2%|▏         | 852/50000 [01:05<58:25, 14.02it/s]

Epochs: 849 | epoch avg. loss: 5.492 | test avg. loss: 70.873
Epochs: 850 | epoch avg. loss: 4.994 | test avg. loss: 72.209
Epochs: 851 | epoch avg. loss: 5.574 | test avg. loss: 70.759


  2%|▏         | 856/50000 [01:05<53:26, 15.32it/s]

Epochs: 852 | epoch avg. loss: 5.605 | test avg. loss: 70.610
Epochs: 853 | epoch avg. loss: 5.226 | test avg. loss: 71.948
Epochs: 854 | epoch avg. loss: 5.756 | test avg. loss: 70.231
Epochs: 855 | epoch avg. loss: 5.010 | test avg. loss: 70.242


  2%|▏         | 860/50000 [01:05<52:14, 15.68it/s]

Epochs: 856 | epoch avg. loss: 5.247 | test avg. loss: 70.371
Epochs: 857 | epoch avg. loss: 4.924 | test avg. loss: 70.252
Epochs: 858 | epoch avg. loss: 5.548 | test avg. loss: 70.353
Epochs: 859 | epoch avg. loss: 5.511 | test avg. loss: 70.812


  2%|▏         | 864/50000 [01:06<53:27, 15.32it/s]

Epochs: 860 | epoch avg. loss: 5.410 | test avg. loss: 70.170
Epochs: 861 | epoch avg. loss: 5.314 | test avg. loss: 71.182
Epochs: 862 | epoch avg. loss: 5.497 | test avg. loss: 70.736
Epochs: 863 | epoch avg. loss: 5.008 | test avg. loss: 70.590


  2%|▏         | 868/50000 [01:06<51:18, 15.96it/s]

Epochs: 864 | epoch avg. loss: 5.241 | test avg. loss: 71.037
Epochs: 865 | epoch avg. loss: 5.194 | test avg. loss: 70.447
Epochs: 866 | epoch avg. loss: 4.830 | test avg. loss: 70.123
Epochs: 867 | epoch avg. loss: 5.349 | test avg. loss: 70.212


  2%|▏         | 870/50000 [01:06<53:48, 15.22it/s]

Epochs: 868 | epoch avg. loss: 4.923 | test avg. loss: 70.211
Epochs: 869 | epoch avg. loss: 4.880 | test avg. loss: 69.817
Epochs: 870 | epoch avg. loss: 4.939 | test avg. loss: 70.126


  2%|▏         | 874/50000 [01:06<56:37, 14.46it/s]

Epochs: 871 | epoch avg. loss: 4.826 | test avg. loss: 70.230
Epochs: 872 | epoch avg. loss: 4.990 | test avg. loss: 70.230
Epochs: 873 | epoch avg. loss: 4.747 | test avg. loss: 71.331


  2%|▏         | 876/50000 [01:07<1:03:47, 12.84it/s]

Epochs: 874 | epoch avg. loss: 5.262 | test avg. loss: 70.467
Epochs: 875 | epoch avg. loss: 4.775 | test avg. loss: 70.358
Epochs: 876 | epoch avg. loss: 4.969 | test avg. loss: 71.013


  2%|▏         | 880/50000 [01:07<1:09:19, 11.81it/s]

Epochs: 877 | epoch avg. loss: 5.049 | test avg. loss: 70.486
Epochs: 878 | epoch avg. loss: 4.870 | test avg. loss: 70.222
Epochs: 879 | epoch avg. loss: 4.774 | test avg. loss: 70.781


  2%|▏         | 882/50000 [01:07<1:05:52, 12.43it/s]

Epochs: 880 | epoch avg. loss: 4.885 | test avg. loss: 70.380
Epochs: 881 | epoch avg. loss: 4.734 | test avg. loss: 70.066
Epochs: 882 | epoch avg. loss: 5.030 | test avg. loss: 69.875
Epochs: 883 | epoch avg. loss: 5.857 | test avg. loss: 70.219


  2%|▏         | 886/50000 [01:07<1:05:02, 12.58it/s]

Epochs: 884 | epoch avg. loss: 5.250 | test avg. loss: 71.629
Epochs: 885 | epoch avg. loss: 6.167 | test avg. loss: 70.038
Epochs: 886 | epoch avg. loss: 5.216 | test avg. loss: 70.515


  2%|▏         | 888/50000 [01:08<1:06:16, 12.35it/s]

Epochs: 887 | epoch avg. loss: 5.454 | test avg. loss: 69.687
Epochs: 888 | epoch avg. loss: 5.095 | test avg. loss: 69.136


  2%|▏         | 892/50000 [01:08<1:08:40, 11.92it/s]

Epochs: 889 | epoch avg. loss: 4.891 | test avg. loss: 70.507
Epochs: 890 | epoch avg. loss: 5.683 | test avg. loss: 69.203
Epochs: 891 | epoch avg. loss: 4.829 | test avg. loss: 69.842


  2%|▏         | 896/50000 [01:08<1:02:01, 13.19it/s]

Epochs: 892 | epoch avg. loss: 5.389 | test avg. loss: 71.024
Epochs: 893 | epoch avg. loss: 5.413 | test avg. loss: 69.747
Epochs: 894 | epoch avg. loss: 4.707 | test avg. loss: 69.866
Epochs: 895 | epoch avg. loss: 5.127 | test avg. loss: 70.051


  2%|▏         | 898/50000 [01:08<1:01:42, 13.26it/s]

Epochs: 896 | epoch avg. loss: 4.593 | test avg. loss: 69.823
Epochs: 897 | epoch avg. loss: 4.714 | test avg. loss: 69.848
Epochs: 898 | epoch avg. loss: 4.525 | test avg. loss: 70.346


  2%|▏         | 898/50000 [01:08<1:01:42, 13.26it/s]

Epochs: 899 | epoch avg. loss: 4.812 | test avg. loss: 69.733


  2%|▏         | 902/50000 [01:10<3:11:52,  4.26it/s]

Epochs: 900 | epoch avg. loss: 4.760 | test avg. loss: 69.707
Epochs: 901 | epoch avg. loss: 4.916 | test avg. loss: 69.957
Epochs: 902 | epoch avg. loss: 4.719 | test avg. loss: 69.424


  2%|▏         | 906/50000 [01:10<2:08:06,  6.39it/s]

Epochs: 903 | epoch avg. loss: 4.513 | test avg. loss: 69.376
Epochs: 904 | epoch avg. loss: 4.526 | test avg. loss: 69.375
Epochs: 905 | epoch avg. loss: 4.664 | test avg. loss: 69.228




Epochs: 906 | epoch avg. loss: 4.571 | test avg. loss: 69.184
Epochs: 907 | epoch avg. loss: 4.844 | test avg. loss: 69.235
Epochs: 908 | epoch avg. loss: 4.473 | test avg. loss: 69.457


  2%|▏         | 912/50000 [01:11<1:24:49,  9.65it/s]

Epochs: 909 | epoch avg. loss: 4.606 | test avg. loss: 69.302
Epochs: 910 | epoch avg. loss: 4.892 | test avg. loss: 69.505
Epochs: 911 | epoch avg. loss: 5.029 | test avg. loss: 69.797


  2%|▏         | 914/50000 [01:11<1:23:47,  9.76it/s]

Epochs: 912 | epoch avg. loss: 4.784 | test avg. loss: 69.514
Epochs: 913 | epoch avg. loss: 4.723 | test avg. loss: 70.042
Epochs: 914 | epoch avg. loss: 5.003 | test avg. loss: 69.467


  2%|▏         | 918/50000 [01:11<1:16:46, 10.65it/s]

Epochs: 915 | epoch avg. loss: 4.430 | test avg. loss: 69.786
Epochs: 916 | epoch avg. loss: 4.563 | test avg. loss: 69.478
Epochs: 917 | epoch avg. loss: 5.068 | test avg. loss: 69.629


  2%|▏         | 920/50000 [01:12<1:12:38, 11.26it/s]

Epochs: 918 | epoch avg. loss: 4.778 | test avg. loss: 70.529
Epochs: 919 | epoch avg. loss: 5.102 | test avg. loss: 69.547
Epochs: 920 | epoch avg. loss: 4.848 | test avg. loss: 69.732


  2%|▏         | 924/50000 [01:12<1:13:08, 11.18it/s]

Epochs: 921 | epoch avg. loss: 4.661 | test avg. loss: 69.082
Epochs: 922 | epoch avg. loss: 4.368 | test avg. loss: 68.923
Epochs: 923 | epoch avg. loss: 4.597 | test avg. loss: 69.120


  2%|▏         | 926/50000 [01:12<1:07:47, 12.06it/s]

Epochs: 924 | epoch avg. loss: 4.376 | test avg. loss: 69.103
Epochs: 925 | epoch avg. loss: 4.628 | test avg. loss: 69.089
Epochs: 926 | epoch avg. loss: 5.064 | test avg. loss: 69.095


  2%|▏         | 930/50000 [01:12<1:02:33, 13.07it/s]

Epochs: 927 | epoch avg. loss: 5.257 | test avg. loss: 69.272
Epochs: 928 | epoch avg. loss: 4.569 | test avg. loss: 69.148
Epochs: 929 | epoch avg. loss: 5.629 | test avg. loss: 69.241


  2%|▏         | 932/50000 [01:13<1:05:42, 12.45it/s]

Epochs: 930 | epoch avg. loss: 5.223 | test avg. loss: 68.848
Epochs: 931 | epoch avg. loss: 4.579 | test avg. loss: 68.966
Epochs: 932 | epoch avg. loss: 4.457 | test avg. loss: 68.639


  2%|▏         | 936/50000 [01:13<1:04:55, 12.59it/s]

Epochs: 933 | epoch avg. loss: 4.452 | test avg. loss: 69.366
Epochs: 934 | epoch avg. loss: 4.898 | test avg. loss: 68.754
Epochs: 935 | epoch avg. loss: 4.969 | test avg. loss: 68.778


  2%|▏         | 940/50000 [01:13<55:52, 14.64it/s]

Epochs: 936 | epoch avg. loss: 4.751 | test avg. loss: 69.880
Epochs: 937 | epoch avg. loss: 4.651 | test avg. loss: 69.000
Epochs: 938 | epoch avg. loss: 5.031 | test avg. loss: 68.684
Epochs: 939 | epoch avg. loss: 4.322 | test avg. loss: 69.855


  2%|▏         | 944/50000 [01:13<53:18, 15.34it/s]

Epochs: 940 | epoch avg. loss: 4.694 | test avg. loss: 68.945
Epochs: 941 | epoch avg. loss: 4.734 | test avg. loss: 68.722
Epochs: 942 | epoch avg. loss: 4.575 | test avg. loss: 69.022
Epochs: 943 | epoch avg. loss: 4.353 | test avg. loss: 68.459


  2%|▏         | 946/50000 [01:14<55:59, 14.60it/s]

Epochs: 944 | epoch avg. loss: 4.594 | test avg. loss: 68.479
Epochs: 945 | epoch avg. loss: 4.542 | test avg. loss: 68.149
Epochs: 946 | epoch avg. loss: 4.355 | test avg. loss: 68.267


  2%|▏         | 950/50000 [01:14<58:21, 14.01it/s]

Epochs: 947 | epoch avg. loss: 4.293 | test avg. loss: 69.622
Epochs: 948 | epoch avg. loss: 5.209 | test avg. loss: 68.599
Epochs: 949 | epoch avg. loss: 5.068 | test avg. loss: 68.635


  2%|▏         | 954/50000 [01:14<52:59, 15.43it/s]

Epochs: 950 | epoch avg. loss: 4.018 | test avg. loss: 71.476
Epochs: 951 | epoch avg. loss: 6.112 | test avg. loss: 68.665
Epochs: 952 | epoch avg. loss: 5.541 | test avg. loss: 68.002
Epochs: 953 | epoch avg. loss: 4.541 | test avg. loss: 70.658


  2%|▏         | 958/50000 [01:14<49:49, 16.41it/s]

Epochs: 954 | epoch avg. loss: 6.188 | test avg. loss: 67.923
Epochs: 955 | epoch avg. loss: 5.341 | test avg. loss: 67.960
Epochs: 956 | epoch avg. loss: 4.807 | test avg. loss: 68.248
Epochs: 957 | epoch avg. loss: 4.289 | test avg. loss: 68.688


  2%|▏         | 960/50000 [01:14<53:28, 15.28it/s]

Epochs: 958 | epoch avg. loss: 4.325 | test avg. loss: 69.510
Epochs: 959 | epoch avg. loss: 4.343 | test avg. loss: 69.486
Epochs: 960 | epoch avg. loss: 4.595 | test avg. loss: 69.133


  2%|▏         | 964/50000 [01:15<54:34, 14.98it/s]

Epochs: 961 | epoch avg. loss: 4.174 | test avg. loss: 69.252
Epochs: 962 | epoch avg. loss: 4.523 | test avg. loss: 68.132
Epochs: 963 | epoch avg. loss: 4.235 | test avg. loss: 68.032
Epochs: 964 | epoch avg. loss: 4.162 | test avg. loss: 68.429


  2%|▏         | 968/50000 [01:15<54:27, 15.01it/s]

Epochs: 965 | epoch avg. loss: 4.188 | test avg. loss: 68.157
Epochs: 966 | epoch avg. loss: 4.437 | test avg. loss: 68.487
Epochs: 967 | epoch avg. loss: 5.703 | test avg. loss: 68.375
Epochs: 968 | epoch avg. loss: 5.123 | test avg. loss: 68.750


  2%|▏         | 972/50000 [01:15<51:27, 15.88it/s]

Epochs: 969 | epoch avg. loss: 4.635 | test avg. loss: 71.150
Epochs: 970 | epoch avg. loss: 5.906 | test avg. loss: 68.369
Epochs: 971 | epoch avg. loss: 5.293 | test avg. loss: 68.241
Epochs: 972 | epoch avg. loss: 4.709 | test avg. loss: 69.882


  2%|▏         | 976/50000 [01:15<55:07, 14.82it/s]

Epochs: 973 | epoch avg. loss: 4.469 | test avg. loss: 69.173
Epochs: 974 | epoch avg. loss: 5.969 | test avg. loss: 68.621
Epochs: 975 | epoch avg. loss: 6.472 | test avg. loss: 68.873


  2%|▏         | 978/50000 [01:16<58:45, 13.90it/s]

Epochs: 976 | epoch avg. loss: 6.111 | test avg. loss: 68.539
Epochs: 977 | epoch avg. loss: 4.679 | test avg. loss: 71.115
Epochs: 978 | epoch avg. loss: 6.603 | test avg. loss: 67.458


  2%|▏         | 982/50000 [01:16<54:34, 14.97it/s]

Epochs: 979 | epoch avg. loss: 6.009 | test avg. loss: 67.582
Epochs: 980 | epoch avg. loss: 4.329 | test avg. loss: 70.108
Epochs: 981 | epoch avg. loss: 5.321 | test avg. loss: 69.314
Epochs: 982 | epoch avg. loss: 5.677 | test avg. loss: 68.308


  2%|▏         | 986/50000 [01:16<51:17, 15.93it/s]

Epochs: 983 | epoch avg. loss: 4.801 | test avg. loss: 68.326
Epochs: 984 | epoch avg. loss: 5.554 | test avg. loss: 67.598
Epochs: 985 | epoch avg. loss: 4.602 | test avg. loss: 68.208
Epochs: 986 | epoch avg. loss: 4.454 | test avg. loss: 67.647


  2%|▏         | 990/50000 [01:16<49:40, 16.45it/s]

Epochs: 987 | epoch avg. loss: 4.621 | test avg. loss: 67.541
Epochs: 988 | epoch avg. loss: 3.992 | test avg. loss: 69.117
Epochs: 989 | epoch avg. loss: 4.935 | test avg. loss: 68.164
Epochs: 990 | epoch avg. loss: 4.836 | test avg. loss: 68.236


  2%|▏         | 994/50000 [01:17<56:07, 14.55it/s]

Epochs: 991 | epoch avg. loss: 4.176 | test avg. loss: 70.100
Epochs: 992 | epoch avg. loss: 5.201 | test avg. loss: 68.250
Epochs: 993 | epoch avg. loss: 5.017 | test avg. loss: 67.831


  2%|▏         | 998/50000 [01:17<54:11, 15.07it/s]

Epochs: 994 | epoch avg. loss: 4.082 | test avg. loss: 69.734
Epochs: 995 | epoch avg. loss: 4.986 | test avg. loss: 67.902
Epochs: 996 | epoch avg. loss: 5.547 | test avg. loss: 67.793
Epochs: 997 | epoch avg. loss: 6.373 | test avg. loss: 68.311


  2%|▏         | 998/50000 [01:17<54:11, 15.07it/s]

Epochs: 998 | epoch avg. loss: 4.997 | test avg. loss: 68.733
Epochs: 999 | epoch avg. loss: 4.479 | test avg. loss: 70.606


  2%|▏         | 1004/50000 [01:18<1:35:12,  8.58it/s]

Epochs: 1000 | epoch avg. loss: 6.133 | test avg. loss: 67.394
Epochs: 1001 | epoch avg. loss: 4.597 | test avg. loss: 67.343
Epochs: 1002 | epoch avg. loss: 4.210 | test avg. loss: 68.817
Epochs: 1003 | epoch avg. loss: 4.915 | test avg. loss: 67.095


  2%|▏         | 1008/50000 [01:18<1:11:04, 11.49it/s]

Epochs: 1004 | epoch avg. loss: 5.255 | test avg. loss: 67.439
Epochs: 1005 | epoch avg. loss: 6.647 | test avg. loss: 67.144
Epochs: 1006 | epoch avg. loss: 5.289 | test avg. loss: 69.135
Epochs: 1007 | epoch avg. loss: 5.652 | test avg. loss: 70.396


  2%|▏         | 1010/50000 [01:18<1:06:47, 12.23it/s]

Epochs: 1008 | epoch avg. loss: 5.915 | test avg. loss: 67.616
Epochs: 1009 | epoch avg. loss: 5.256 | test avg. loss: 66.969
Epochs: 1010 | epoch avg. loss: 4.486 | test avg. loss: 67.959




Epochs: 1011 | epoch avg. loss: 4.141 | test avg. loss: 67.873
Epochs: 1012 | epoch avg. loss: 5.060 | test avg. loss: 67.788
Epochs: 1013 | epoch avg. loss: 4.510 | test avg. loss: 67.521


  2%|▏         | 1018/50000 [01:19<55:45, 14.64it/s]

Epochs: 1014 | epoch avg. loss: 3.710 | test avg. loss: 68.173
Epochs: 1015 | epoch avg. loss: 4.777 | test avg. loss: 68.168
Epochs: 1016 | epoch avg. loss: 4.427 | test avg. loss: 67.701
Epochs: 1017 | epoch avg. loss: 3.952 | test avg. loss: 67.978


  2%|▏         | 1022/50000 [01:19<50:57, 16.02it/s]

Epochs: 1018 | epoch avg. loss: 4.298 | test avg. loss: 68.287
Epochs: 1019 | epoch avg. loss: 4.512 | test avg. loss: 67.104
Epochs: 1020 | epoch avg. loss: 3.709 | test avg. loss: 67.138
Epochs: 1021 | epoch avg. loss: 4.139 | test avg. loss: 67.599


  2%|▏         | 1026/50000 [01:19<50:49, 16.06it/s]

Epochs: 1022 | epoch avg. loss: 4.422 | test avg. loss: 66.725
Epochs: 1023 | epoch avg. loss: 3.849 | test avg. loss: 66.590
Epochs: 1024 | epoch avg. loss: 3.938 | test avg. loss: 67.693
Epochs: 1025 | epoch avg. loss: 4.478 | test avg. loss: 66.693


  2%|▏         | 1028/50000 [01:20<54:25, 15.00it/s]

Epochs: 1026 | epoch avg. loss: 3.727 | test avg. loss: 67.203
Epochs: 1027 | epoch avg. loss: 4.169 | test avg. loss: 67.630
Epochs: 1028 | epoch avg. loss: 4.012 | test avg. loss: 67.217


  2%|▏         | 1032/50000 [01:20<53:45, 15.18it/s]

Epochs: 1029 | epoch avg. loss: 3.876 | test avg. loss: 67.087
Epochs: 1030 | epoch avg. loss: 4.201 | test avg. loss: 66.900
Epochs: 1031 | epoch avg. loss: 3.818 | test avg. loss: 66.688
Epochs: 1032 | epoch avg. loss: 3.754 | test avg. loss: 67.103


  2%|▏         | 1036/50000 [01:20<50:42, 16.09it/s]

Epochs: 1033 | epoch avg. loss: 4.264 | test avg. loss: 66.615
Epochs: 1034 | epoch avg. loss: 4.603 | test avg. loss: 66.417
Epochs: 1035 | epoch avg. loss: 3.815 | test avg. loss: 68.607
Epochs: 1036 | epoch avg. loss: 4.937 | test avg. loss: 66.910


  2%|▏         | 1040/50000 [01:20<48:49, 16.71it/s]

Epochs: 1037 | epoch avg. loss: 4.238 | test avg. loss: 66.865
Epochs: 1038 | epoch avg. loss: 3.762 | test avg. loss: 67.199
Epochs: 1039 | epoch avg. loss: 3.694 | test avg. loss: 67.075
Epochs: 1040 | epoch avg. loss: 3.732 | test avg. loss: 67.044


  2%|▏         | 1044/50000 [01:21<53:52, 15.15it/s]

Epochs: 1041 | epoch avg. loss: 3.684 | test avg. loss: 66.820
Epochs: 1042 | epoch avg. loss: 3.748 | test avg. loss: 66.504
Epochs: 1043 | epoch avg. loss: 3.675 | test avg. loss: 66.577


  2%|▏         | 1046/50000 [01:21<55:57, 14.58it/s]

Epochs: 1044 | epoch avg. loss: 3.678 | test avg. loss: 66.276
Epochs: 1045 | epoch avg. loss: 3.742 | test avg. loss: 66.384
Epochs: 1046 | epoch avg. loss: 3.757 | test avg. loss: 66.440


  2%|▏         | 1050/50000 [01:21<54:46, 14.90it/s]

Epochs: 1047 | epoch avg. loss: 3.810 | test avg. loss: 66.341
Epochs: 1048 | epoch avg. loss: 4.096 | test avg. loss: 66.426
Epochs: 1049 | epoch avg. loss: 3.658 | test avg. loss: 66.177
Epochs: 1050 | epoch avg. loss: 3.801 | test avg. loss: 66.270


  2%|▏         | 1054/50000 [01:21<51:43, 15.77it/s]

Epochs: 1051 | epoch avg. loss: 3.716 | test avg. loss: 66.142
Epochs: 1052 | epoch avg. loss: 3.598 | test avg. loss: 66.351
Epochs: 1053 | epoch avg. loss: 3.638 | test avg. loss: 66.365
Epochs: 1054 | epoch avg. loss: 3.914 | test avg. loss: 67.116


  2%|▏         | 1058/50000 [01:21<53:56, 15.12it/s]

Epochs: 1055 | epoch avg. loss: 4.175 | test avg. loss: 66.736
Epochs: 1056 | epoch avg. loss: 4.043 | test avg. loss: 66.614
Epochs: 1057 | epoch avg. loss: 3.556 | test avg. loss: 67.197


  2%|▏         | 1060/50000 [01:22<57:00, 14.31it/s]

Epochs: 1058 | epoch avg. loss: 4.014 | test avg. loss: 66.035
Epochs: 1059 | epoch avg. loss: 4.086 | test avg. loss: 65.795
Epochs: 1060 | epoch avg. loss: 4.118 | test avg. loss: 66.023


  2%|▏         | 1064/50000 [01:22<54:33, 14.95it/s]

Epochs: 1061 | epoch avg. loss: 3.525 | test avg. loss: 66.153
Epochs: 1062 | epoch avg. loss: 4.141 | test avg. loss: 66.495
Epochs: 1063 | epoch avg. loss: 4.136 | test avg. loss: 65.911
Epochs: 1064 | epoch avg. loss: 4.387 | test avg. loss: 66.118


  2%|▏         | 1068/50000 [01:22<50:38, 16.10it/s]

Epochs: 1065 | epoch avg. loss: 3.744 | test avg. loss: 67.154
Epochs: 1066 | epoch avg. loss: 3.895 | test avg. loss: 67.071
Epochs: 1067 | epoch avg. loss: 4.489 | test avg. loss: 66.382
Epochs: 1068 | epoch avg. loss: 4.188 | test avg. loss: 66.016




Epochs: 1069 | epoch avg. loss: 3.468 | test avg. loss: 66.446
Epochs: 1070 | epoch avg. loss: 4.507 | test avg. loss: 66.821
Epochs: 1071 | epoch avg. loss: 4.166 | test avg. loss: 66.026


  2%|▏         | 1074/50000 [01:23<54:20, 15.01it/s]

Epochs: 1072 | epoch avg. loss: 3.756 | test avg. loss: 66.335
Epochs: 1073 | epoch avg. loss: 3.919 | test avg. loss: 66.847
Epochs: 1074 | epoch avg. loss: 3.725 | test avg. loss: 66.471


  2%|▏         | 1078/50000 [01:23<55:00, 14.82it/s]

Epochs: 1075 | epoch avg. loss: 4.063 | test avg. loss: 66.964
Epochs: 1076 | epoch avg. loss: 4.266 | test avg. loss: 66.238
Epochs: 1077 | epoch avg. loss: 3.872 | test avg. loss: 65.692


  2%|▏         | 1080/50000 [01:23<57:03, 14.29it/s]

Epochs: 1078 | epoch avg. loss: 4.302 | test avg. loss: 65.882
Epochs: 1079 | epoch avg. loss: 3.472 | test avg. loss: 65.939
Epochs: 1080 | epoch avg. loss: 3.815 | test avg. loss: 66.487


  2%|▏         | 1084/50000 [01:23<57:50, 14.10it/s]

Epochs: 1081 | epoch avg. loss: 4.159 | test avg. loss: 65.826
Epochs: 1082 | epoch avg. loss: 3.784 | test avg. loss: 66.089
Epochs: 1083 | epoch avg. loss: 4.131 | test avg. loss: 66.428


  2%|▏         | 1086/50000 [01:23<58:39, 13.90it/s]

Epochs: 1084 | epoch avg. loss: 3.749 | test avg. loss: 65.591
Epochs: 1085 | epoch avg. loss: 3.679 | test avg. loss: 65.567
Epochs: 1086 | epoch avg. loss: 3.524 | test avg. loss: 65.549


  2%|▏         | 1090/50000 [01:24<1:02:33, 13.03it/s]

Epochs: 1087 | epoch avg. loss: 3.899 | test avg. loss: 65.491
Epochs: 1088 | epoch avg. loss: 3.636 | test avg. loss: 65.714
Epochs: 1089 | epoch avg. loss: 3.575 | test avg. loss: 65.553


  2%|▏         | 1092/50000 [01:24<1:02:03, 13.13it/s]

Epochs: 1090 | epoch avg. loss: 3.788 | test avg. loss: 65.794
Epochs: 1091 | epoch avg. loss: 3.534 | test avg. loss: 65.541
Epochs: 1092 | epoch avg. loss: 3.465 | test avg. loss: 65.625


  2%|▏         | 1096/50000 [01:24<59:09, 13.78it/s]

Epochs: 1093 | epoch avg. loss: 3.383 | test avg. loss: 65.990
Epochs: 1094 | epoch avg. loss: 3.493 | test avg. loss: 65.664
Epochs: 1095 | epoch avg. loss: 3.477 | test avg. loss: 65.741
Epochs: 1096 | epoch avg. loss: 3.485 | test avg. loss: 65.702


  2%|▏         | 1098/50000 [01:24<57:12, 14.25it/s]

Epochs: 1097 | epoch avg. loss: 3.355 | test avg. loss: 65.682
Epochs: 1098 | epoch avg. loss: 3.463 | test avg. loss: 65.762
Epochs: 1099 | epoch avg. loss: 3.424 | test avg. loss: 65.450


  2%|▏         | 1102/50000 [01:26<2:31:17,  5.39it/s]

Epochs: 1100 | epoch avg. loss: 3.495 | test avg. loss: 65.460
Epochs: 1101 | epoch avg. loss: 3.840 | test avg. loss: 65.375
Epochs: 1102 | epoch avg. loss: 3.785 | test avg. loss: 65.273


  2%|▏         | 1106/50000 [01:26<1:43:51,  7.85it/s]

Epochs: 1103 | epoch avg. loss: 3.517 | test avg. loss: 65.953
Epochs: 1104 | epoch avg. loss: 4.031 | test avg. loss: 65.335
Epochs: 1105 | epoch avg. loss: 3.360 | test avg. loss: 65.401


  2%|▏         | 1108/50000 [01:26<1:29:08,  9.14it/s]

Epochs: 1106 | epoch avg. loss: 3.326 | test avg. loss: 65.075
Epochs: 1107 | epoch avg. loss: 3.697 | test avg. loss: 65.198
Epochs: 1108 | epoch avg. loss: 3.675 | test avg. loss: 64.965


  2%|▏         | 1112/50000 [01:26<1:14:27, 10.94it/s]

Epochs: 1109 | epoch avg. loss: 3.176 | test avg. loss: 65.248
Epochs: 1110 | epoch avg. loss: 4.138 | test avg. loss: 64.965
Epochs: 1111 | epoch avg. loss: 3.329 | test avg. loss: 64.879


  2%|▏         | 1114/50000 [01:27<1:09:21, 11.75it/s]

Epochs: 1112 | epoch avg. loss: 3.271 | test avg. loss: 64.828
Epochs: 1113 | epoch avg. loss: 3.324 | test avg. loss: 65.023
Epochs: 1114 | epoch avg. loss: 3.392 | test avg. loss: 64.899
Epochs: 1115 | epoch avg. loss: 3.286 | test avg. loss: 65.020


                                                      

Epochs: 1116 | epoch avg. loss: 3.444 | test avg. loss: 65.123
Epochs: 1117 | epoch avg. loss: 3.331 | test avg. loss: 64.998
Epochs: 1118 | epoch avg. loss: 3.321 | test avg. loss: 65.069


  2%|▏         | 1122/50000 [01:27<1:00:13, 13.53it/s]

Epochs: 1119 | epoch avg. loss: 3.246 | test avg. loss: 65.315
Epochs: 1120 | epoch avg. loss: 3.362 | test avg. loss: 65.015
Epochs: 1121 | epoch avg. loss: 3.259 | test avg. loss: 65.155


  2%|▏         | 1124/50000 [01:27<59:56, 13.59it/s]

Epochs: 1122 | epoch avg. loss: 3.399 | test avg. loss: 64.835
Epochs: 1123 | epoch avg. loss: 3.228 | test avg. loss: 65.095
Epochs: 1124 | epoch avg. loss: 3.479 | test avg. loss: 65.231


  2%|▏         | 1128/50000 [01:28<1:01:07, 13.33it/s]

Epochs: 1125 | epoch avg. loss: 3.297 | test avg. loss: 65.269
Epochs: 1126 | epoch avg. loss: 3.347 | test avg. loss: 64.923
Epochs: 1127 | epoch avg. loss: 3.206 | test avg. loss: 64.945


  2%|▏         | 1132/50000 [01:28<57:36, 14.14it/s]  

Epochs: 1128 | epoch avg. loss: 3.268 | test avg. loss: 64.917
Epochs: 1129 | epoch avg. loss: 3.460 | test avg. loss: 64.972
Epochs: 1130 | epoch avg. loss: 3.708 | test avg. loss: 64.761
Epochs: 1131 | epoch avg. loss: 3.111 | test avg. loss: 64.889


  2%|▏         | 1136/50000 [01:28<54:42, 14.88it/s]

Epochs: 1132 | epoch avg. loss: 3.687 | test avg. loss: 65.326
Epochs: 1133 | epoch avg. loss: 3.745 | test avg. loss: 64.724
Epochs: 1134 | epoch avg. loss: 3.256 | test avg. loss: 64.807
Epochs: 1135 | epoch avg. loss: 3.158 | test avg. loss: 65.450


  2%|▏         | 1138/50000 [01:28<54:52, 14.84it/s]

Epochs: 1136 | epoch avg. loss: 3.375 | test avg. loss: 65.077
Epochs: 1137 | epoch avg. loss: 3.202 | test avg. loss: 64.961
Epochs: 1138 | epoch avg. loss: 3.150 | test avg. loss: 65.341


  2%|▏         | 1142/50000 [01:28<59:56, 13.58it/s]

Epochs: 1139 | epoch avg. loss: 3.536 | test avg. loss: 64.662
Epochs: 1140 | epoch avg. loss: 3.327 | test avg. loss: 64.605
Epochs: 1141 | epoch avg. loss: 3.325 | test avg. loss: 64.921


  2%|▏         | 1144/50000 [01:29<1:02:10, 13.10it/s]

Epochs: 1142 | epoch avg. loss: 3.296 | test avg. loss: 64.748
Epochs: 1143 | epoch avg. loss: 3.363 | test avg. loss: 65.003
Epochs: 1144 | epoch avg. loss: 3.312 | test avg. loss: 64.886


  2%|▏         | 1148/50000 [01:29<1:04:48, 12.56it/s]

Epochs: 1145 | epoch avg. loss: 3.466 | test avg. loss: 64.727
Epochs: 1146 | epoch avg. loss: 3.480 | test avg. loss: 64.708
Epochs: 1147 | epoch avg. loss: 3.481 | test avg. loss: 64.481


  2%|▏         | 1150/50000 [01:29<1:04:49, 12.56it/s]

Epochs: 1148 | epoch avg. loss: 3.468 | test avg. loss: 64.931
Epochs: 1149 | epoch avg. loss: 3.804 | test avg. loss: 64.486
Epochs: 1150 | epoch avg. loss: 3.129 | test avg. loss: 64.716


  2%|▏         | 1152/50000 [01:30<1:06:08, 12.31it/s]

Epochs: 1151 | epoch avg. loss: 3.079 | test avg. loss: 64.832
Epochs: 1152 | epoch avg. loss: 3.154 | test avg. loss: 64.883
Epochs: 1153 | epoch avg. loss: 3.104 | test avg. loss: 64.639


  2%|▏         | 1156/50000 [01:30<1:08:20, 11.91it/s]

Epochs: 1154 | epoch avg. loss: 3.109 | test avg. loss: 64.462
Epochs: 1155 | epoch avg. loss: 3.117 | test avg. loss: 64.377
Epochs: 1156 | epoch avg. loss: 3.152 | test avg. loss: 64.194


  2%|▏         | 1160/50000 [01:30<1:07:17, 12.10it/s]

Epochs: 1157 | epoch avg. loss: 3.098 | test avg. loss: 64.388
Epochs: 1158 | epoch avg. loss: 3.268 | test avg. loss: 64.504
Epochs: 1159 | epoch avg. loss: 3.043 | test avg. loss: 64.632


  2%|▏         | 1162/50000 [01:30<1:07:13, 12.11it/s]

Epochs: 1160 | epoch avg. loss: 3.053 | test avg. loss: 64.705
Epochs: 1161 | epoch avg. loss: 3.047 | test avg. loss: 64.796
Epochs: 1162 | epoch avg. loss: 3.028 | test avg. loss: 64.745


  2%|▏         | 1166/50000 [01:30<1:06:20, 12.27it/s]

Epochs: 1163 | epoch avg. loss: 3.450 | test avg. loss: 64.761
Epochs: 1164 | epoch avg. loss: 3.451 | test avg. loss: 64.628
Epochs: 1165 | epoch avg. loss: 3.155 | test avg. loss: 64.891


                                                      

Epochs: 1166 | epoch avg. loss: 3.322 | test avg. loss: 64.466
Epochs: 1167 | epoch avg. loss: 3.092 | test avg. loss: 64.819
Epochs: 1168 | epoch avg. loss: 3.374 | test avg. loss: 64.308


  2%|▏         | 1172/50000 [01:31<58:01, 14.02it/s]

Epochs: 1169 | epoch avg. loss: 3.558 | test avg. loss: 64.438
Epochs: 1170 | epoch avg. loss: 3.719 | test avg. loss: 64.752
Epochs: 1171 | epoch avg. loss: 3.147 | test avg. loss: 65.055
Epochs: 1172 | epoch avg. loss: 3.202 | test avg. loss: 66.167


  2%|▏         | 1176/50000 [01:31<55:07, 14.76it/s]

Epochs: 1173 | epoch avg. loss: 4.273 | test avg. loss: 64.805
Epochs: 1174 | epoch avg. loss: 4.313 | test avg. loss: 63.831
Epochs: 1175 | epoch avg. loss: 3.901 | test avg. loss: 64.406
Epochs: 1176 | epoch avg. loss: 4.304 | test avg. loss: 63.839


  2%|▏         | 1180/50000 [01:31<57:41, 14.10it/s]

Epochs: 1177 | epoch avg. loss: 3.864 | test avg. loss: 65.077
Epochs: 1178 | epoch avg. loss: 3.302 | test avg. loss: 65.406
Epochs: 1179 | epoch avg. loss: 3.894 | test avg. loss: 65.322


  2%|▏         | 1182/50000 [01:32<1:00:47, 13.38it/s]

Epochs: 1180 | epoch avg. loss: 3.601 | test avg. loss: 64.760
Epochs: 1181 | epoch avg. loss: 3.274 | test avg. loss: 64.707
Epochs: 1182 | epoch avg. loss: 3.590 | test avg. loss: 64.540


  2%|▏         | 1186/50000 [01:32<1:04:40, 12.58it/s]

Epochs: 1183 | epoch avg. loss: 3.107 | test avg. loss: 64.307
Epochs: 1184 | epoch avg. loss: 3.714 | test avg. loss: 63.650
Epochs: 1185 | epoch avg. loss: 3.032 | test avg. loss: 63.628


  2%|▏         | 1188/50000 [01:32<1:11:36, 11.36it/s]

Epochs: 1186 | epoch avg. loss: 2.903 | test avg. loss: 63.966
Epochs: 1187 | epoch avg. loss: 3.761 | test avg. loss: 63.663


  2%|▏         | 1190/50000 [01:32<1:13:06, 11.13it/s]

Epochs: 1188 | epoch avg. loss: 3.012 | test avg. loss: 63.938
Epochs: 1189 | epoch avg. loss: 2.981 | test avg. loss: 64.007
Epochs: 1190 | epoch avg. loss: 3.152 | test avg. loss: 64.158


  2%|▏         | 1194/50000 [01:33<1:13:42, 11.04it/s]

Epochs: 1191 | epoch avg. loss: 3.021 | test avg. loss: 63.968
Epochs: 1192 | epoch avg. loss: 2.896 | test avg. loss: 63.704
Epochs: 1193 | epoch avg. loss: 2.858 | test avg. loss: 63.434


  2%|▏         | 1198/50000 [01:33<1:03:17, 12.85it/s]

Epochs: 1194 | epoch avg. loss: 2.863 | test avg. loss: 63.392
Epochs: 1195 | epoch avg. loss: 3.175 | test avg. loss: 63.254
Epochs: 1196 | epoch avg. loss: 3.912 | test avg. loss: 63.484
Epochs: 1197 | epoch avg. loss: 5.067 | test avg. loss: 63.205


  2%|▏         | 1198/50000 [01:33<1:03:17, 12.85it/s]

Epochs: 1198 | epoch avg. loss: 3.728 | test avg. loss: 64.418
Epochs: 1199 | epoch avg. loss: 4.532 | test avg. loss: 64.331


  2%|▏         | 1204/50000 [01:34<1:43:10,  7.88it/s]

Epochs: 1200 | epoch avg. loss: 3.211 | test avg. loss: 64.404
Epochs: 1201 | epoch avg. loss: 3.821 | test avg. loss: 64.666
Epochs: 1202 | epoch avg. loss: 3.967 | test avg. loss: 63.550
Epochs: 1203 | epoch avg. loss: 3.709 | test avg. loss: 63.527


  2%|▏         | 1206/50000 [01:34<1:26:48,  9.37it/s]

Epochs: 1204 | epoch avg. loss: 3.435 | test avg. loss: 63.870
Epochs: 1205 | epoch avg. loss: 2.860 | test avg. loss: 64.417
Epochs: 1206 | epoch avg. loss: 4.102 | test avg. loss: 64.993




Epochs: 1207 | epoch avg. loss: 4.272 | test avg. loss: 63.422
Epochs: 1208 | epoch avg. loss: 2.795 | test avg. loss: 64.127
Epochs: 1209 | epoch avg. loss: 3.271 | test avg. loss: 65.014


  2%|▏         | 1212/50000 [01:35<1:06:57, 12.14it/s]

Epochs: 1210 | epoch avg. loss: 3.378 | test avg. loss: 64.399
Epochs: 1211 | epoch avg. loss: 3.111 | test avg. loss: 64.281
Epochs: 1212 | epoch avg. loss: 2.784 | test avg. loss: 64.357
Epochs: 1213 | epoch avg. loss: 3.069 | test avg. loss: 63.870


  2%|▏         | 1218/50000 [01:35<55:23, 14.68it/s]

Epochs: 1214 | epoch avg. loss: 2.958 | test avg. loss: 63.969
Epochs: 1215 | epoch avg. loss: 2.991 | test avg. loss: 63.754
Epochs: 1216 | epoch avg. loss: 2.815 | test avg. loss: 64.204
Epochs: 1217 | epoch avg. loss: 2.998 | test avg. loss: 64.181


  2%|▏         | 1222/50000 [01:35<53:28, 15.20it/s]

Epochs: 1218 | epoch avg. loss: 2.986 | test avg. loss: 64.187
Epochs: 1219 | epoch avg. loss: 2.829 | test avg. loss: 63.927
Epochs: 1220 | epoch avg. loss: 2.713 | test avg. loss: 63.857
Epochs: 1221 | epoch avg. loss: 3.105 | test avg. loss: 63.537


  2%|▏         | 1224/50000 [01:36<57:43, 14.08it/s]

Epochs: 1222 | epoch avg. loss: 2.726 | test avg. loss: 63.547
Epochs: 1223 | epoch avg. loss: 2.866 | test avg. loss: 63.509
Epochs: 1224 | epoch avg. loss: 2.898 | test avg. loss: 63.511


  2%|▏         | 1228/50000 [01:36<59:34, 13.64it/s]

Epochs: 1225 | epoch avg. loss: 2.858 | test avg. loss: 63.629
Epochs: 1226 | epoch avg. loss: 2.848 | test avg. loss: 63.803
Epochs: 1227 | epoch avg. loss: 3.182 | test avg. loss: 63.780


  2%|▏         | 1232/50000 [01:36<54:27, 14.93it/s]

Epochs: 1228 | epoch avg. loss: 3.816 | test avg. loss: 63.647
Epochs: 1229 | epoch avg. loss: 4.002 | test avg. loss: 63.342
Epochs: 1230 | epoch avg. loss: 3.562 | test avg. loss: 63.885
Epochs: 1231 | epoch avg. loss: 3.588 | test avg. loss: 63.367


  2%|▏         | 1236/50000 [01:36<51:12, 15.87it/s]

Epochs: 1232 | epoch avg. loss: 3.594 | test avg. loss: 63.831
Epochs: 1233 | epoch avg. loss: 2.970 | test avg. loss: 63.786
Epochs: 1234 | epoch avg. loss: 3.175 | test avg. loss: 64.002
Epochs: 1235 | epoch avg. loss: 2.787 | test avg. loss: 64.025


  2%|▏         | 1238/50000 [01:36<53:37, 15.15it/s]

Epochs: 1236 | epoch avg. loss: 2.876 | test avg. loss: 63.663
Epochs: 1237 | epoch avg. loss: 2.815 | test avg. loss: 63.408
Epochs: 1238 | epoch avg. loss: 3.022 | test avg. loss: 63.046


  2%|▏         | 1242/50000 [01:37<54:14, 14.98it/s]

Epochs: 1239 | epoch avg. loss: 3.327 | test avg. loss: 62.705
Epochs: 1240 | epoch avg. loss: 3.215 | test avg. loss: 62.653
Epochs: 1241 | epoch avg. loss: 2.694 | test avg. loss: 62.873
Epochs: 1242 | epoch avg. loss: 3.146 | test avg. loss: 63.613


  2%|▏         | 1246/50000 [01:37<52:15, 15.55it/s]

Epochs: 1243 | epoch avg. loss: 3.431 | test avg. loss: 63.295
Epochs: 1244 | epoch avg. loss: 3.790 | test avg. loss: 63.264
Epochs: 1245 | epoch avg. loss: 3.447 | test avg. loss: 63.729
Epochs: 1246 | epoch avg. loss: 3.545 | test avg. loss: 63.560


  2%|▎         | 1250/50000 [01:37<50:37, 16.05it/s]

Epochs: 1247 | epoch avg. loss: 3.046 | test avg. loss: 63.252
Epochs: 1248 | epoch avg. loss: 3.333 | test avg. loss: 62.814
Epochs: 1249 | epoch avg. loss: 2.854 | test avg. loss: 63.083
Epochs: 1250 | epoch avg. loss: 2.602 | test avg. loss: 63.293


  3%|▎         | 1254/50000 [01:37<54:13, 14.98it/s]

Epochs: 1251 | epoch avg. loss: 3.415 | test avg. loss: 63.892
Epochs: 1252 | epoch avg. loss: 3.819 | test avg. loss: 63.067
Epochs: 1253 | epoch avg. loss: 3.314 | test avg. loss: 63.170




Epochs: 1254 | epoch avg. loss: 2.974 | test avg. loss: 63.856
Epochs: 1255 | epoch avg. loss: 2.805 | test avg. loss: 63.507
Epochs: 1256 | epoch avg. loss: 3.524 | test avg. loss: 63.213




Epochs: 1257 | epoch avg. loss: 2.911 | test avg. loss: 62.739
Epochs: 1258 | epoch avg. loss: 2.784 | test avg. loss: 62.865
Epochs: 1259 | epoch avg. loss: 3.670 | test avg. loss: 63.322


  3%|▎         | 1264/50000 [01:38<51:35, 15.74it/s]

Epochs: 1260 | epoch avg. loss: 4.107 | test avg. loss: 63.132
Epochs: 1261 | epoch avg. loss: 3.879 | test avg. loss: 63.496
Epochs: 1262 | epoch avg. loss: 3.153 | test avg. loss: 65.123
Epochs: 1263 | epoch avg. loss: 5.593 | test avg. loss: 65.401


  3%|▎         | 1266/50000 [01:38<50:45, 16.00it/s]

Epochs: 1264 | epoch avg. loss: 4.185 | test avg. loss: 64.215
Epochs: 1265 | epoch avg. loss: 4.066 | test avg. loss: 63.448
Epochs: 1266 | epoch avg. loss: 3.158 | test avg. loss: 62.808
Epochs: 1267 | epoch avg. loss: 3.641 | test avg. loss: 63.132


  3%|▎         | 1272/50000 [01:39<55:17, 14.69it/s]

Epochs: 1268 | epoch avg. loss: 4.088 | test avg. loss: 63.404
Epochs: 1269 | epoch avg. loss: 3.743 | test avg. loss: 63.836
Epochs: 1270 | epoch avg. loss: 4.006 | test avg. loss: 63.220
Epochs: 1271 | epoch avg. loss: 3.069 | test avg. loss: 62.819


  3%|▎         | 1276/50000 [01:39<53:52, 15.07it/s]

Epochs: 1272 | epoch avg. loss: 2.736 | test avg. loss: 63.916
Epochs: 1273 | epoch avg. loss: 3.457 | test avg. loss: 62.659
Epochs: 1274 | epoch avg. loss: 2.969 | test avg. loss: 63.037
Epochs: 1275 | epoch avg. loss: 2.823 | test avg. loss: 63.298


  3%|▎         | 1280/50000 [01:39<51:13, 15.85it/s]

Epochs: 1276 | epoch avg. loss: 3.180 | test avg. loss: 63.539
Epochs: 1277 | epoch avg. loss: 2.883 | test avg. loss: 63.860
Epochs: 1278 | epoch avg. loss: 4.145 | test avg. loss: 63.935
Epochs: 1279 | epoch avg. loss: 2.932 | test avg. loss: 65.678


  3%|▎         | 1282/50000 [01:39<49:52, 16.28it/s]

Epochs: 1280 | epoch avg. loss: 3.500 | test avg. loss: 65.441
Epochs: 1281 | epoch avg. loss: 3.471 | test avg. loss: 65.253
Epochs: 1282 | epoch avg. loss: 3.385 | test avg. loss: 64.014


  3%|▎         | 1286/50000 [01:40<58:28, 13.88it/s]

Epochs: 1283 | epoch avg. loss: 3.559 | test avg. loss: 63.646
Epochs: 1284 | epoch avg. loss: 2.937 | test avg. loss: 63.930
Epochs: 1285 | epoch avg. loss: 2.475 | test avg. loss: 65.155


  3%|▎         | 1290/50000 [01:40<54:38, 14.86it/s]

Epochs: 1286 | epoch avg. loss: 3.675 | test avg. loss: 66.357
Epochs: 1287 | epoch avg. loss: 5.892 | test avg. loss: 65.555
Epochs: 1288 | epoch avg. loss: 7.704 | test avg. loss: 63.236
Epochs: 1289 | epoch avg. loss: 5.212 | test avg. loss: 63.669


  3%|▎         | 1294/50000 [01:40<52:50, 15.36it/s]

Epochs: 1290 | epoch avg. loss: 4.882 | test avg. loss: 63.595
Epochs: 1291 | epoch avg. loss: 4.400 | test avg. loss: 66.216
Epochs: 1292 | epoch avg. loss: 4.168 | test avg. loss: 65.454
Epochs: 1293 | epoch avg. loss: 4.799 | test avg. loss: 64.615


  3%|▎         | 1296/50000 [01:40<52:33, 15.45it/s]

Epochs: 1294 | epoch avg. loss: 3.359 | test avg. loss: 63.601
Epochs: 1295 | epoch avg. loss: 2.765 | test avg. loss: 63.580
Epochs: 1296 | epoch avg. loss: 2.651 | test avg. loss: 63.649


  3%|▎         | 1298/50000 [01:41<55:54, 14.52it/s]

Epochs: 1297 | epoch avg. loss: 2.444 | test avg. loss: 63.316
Epochs: 1298 | epoch avg. loss: 2.712 | test avg. loss: 63.554
Epochs: 1299 | epoch avg. loss: 2.744 | test avg. loss: 63.056


  3%|▎         | 1304/50000 [01:42<1:39:03,  8.19it/s]

Epochs: 1300 | epoch avg. loss: 2.703 | test avg. loss: 63.290
Epochs: 1301 | epoch avg. loss: 2.635 | test avg. loss: 63.434
Epochs: 1302 | epoch avg. loss: 2.510 | test avg. loss: 63.576
Epochs: 1303 | epoch avg. loss: 2.820 | test avg. loss: 63.833


  3%|▎         | 1308/50000 [01:42<1:14:46, 10.85it/s]

Epochs: 1304 | epoch avg. loss: 2.648 | test avg. loss: 63.401
Epochs: 1305 | epoch avg. loss: 2.979 | test avg. loss: 63.391
Epochs: 1306 | epoch avg. loss: 2.857 | test avg. loss: 63.036
Epochs: 1307 | epoch avg. loss: 2.386 | test avg. loss: 63.278


  3%|▎         | 1312/50000 [01:42<1:03:14, 12.83it/s]

Epochs: 1308 | epoch avg. loss: 2.915 | test avg. loss: 63.347
Epochs: 1309 | epoch avg. loss: 2.579 | test avg. loss: 63.269
Epochs: 1310 | epoch avg. loss: 2.474 | test avg. loss: 63.199
Epochs: 1311 | epoch avg. loss: 2.424 | test avg. loss: 63.313


                                                      

Epochs: 1312 | epoch avg. loss: 2.543 | test avg. loss: 62.762
Epochs: 1313 | epoch avg. loss: 2.407 | test avg. loss: 62.675
Epochs: 1314 | epoch avg. loss: 2.367 | test avg. loss: 63.029


  3%|▎         | 1318/50000 [01:43<1:04:47, 12.52it/s]

Epochs: 1315 | epoch avg. loss: 2.460 | test avg. loss: 62.911
Epochs: 1316 | epoch avg. loss: 2.485 | test avg. loss: 63.118
Epochs: 1317 | epoch avg. loss: 2.504 | test avg. loss: 63.042


  3%|▎         | 1320/50000 [01:43<1:06:59, 12.11it/s]

Epochs: 1318 | epoch avg. loss: 2.550 | test avg. loss: 62.880
Epochs: 1319 | epoch avg. loss: 2.390 | test avg. loss: 62.953
Epochs: 1320 | epoch avg. loss: 3.010 | test avg. loss: 62.565


  3%|▎         | 1324/50000 [01:43<1:07:43, 11.98it/s]

Epochs: 1321 | epoch avg. loss: 2.807 | test avg. loss: 62.609
Epochs: 1322 | epoch avg. loss: 2.615 | test avg. loss: 62.738
Epochs: 1323 | epoch avg. loss: 3.096 | test avg. loss: 63.074


  3%|▎         | 1326/50000 [01:43<1:08:40, 11.81it/s]

Epochs: 1324 | epoch avg. loss: 2.522 | test avg. loss: 63.282
Epochs: 1325 | epoch avg. loss: 2.579 | test avg. loss: 63.684
Epochs: 1326 | epoch avg. loss: 2.484 | test avg. loss: 63.793


  3%|▎         | 1330/50000 [01:44<1:10:52, 11.45it/s]

Epochs: 1327 | epoch avg. loss: 3.438 | test avg. loss: 62.941
Epochs: 1328 | epoch avg. loss: 2.345 | test avg. loss: 62.691
Epochs: 1329 | epoch avg. loss: 2.316 | test avg. loss: 62.911


  3%|▎         | 1332/50000 [01:44<1:09:13, 11.72it/s]

Epochs: 1330 | epoch avg. loss: 2.729 | test avg. loss: 62.767
Epochs: 1331 | epoch avg. loss: 2.290 | test avg. loss: 62.745
Epochs: 1332 | epoch avg. loss: 2.334 | test avg. loss: 62.649


  3%|▎         | 1336/50000 [01:44<1:10:20, 11.53it/s]

Epochs: 1333 | epoch avg. loss: 2.313 | test avg. loss: 62.574
Epochs: 1334 | epoch avg. loss: 2.417 | test avg. loss: 62.491
Epochs: 1335 | epoch avg. loss: 2.315 | test avg. loss: 62.277


  3%|▎         | 1338/50000 [01:44<1:08:03, 11.92it/s]

Epochs: 1336 | epoch avg. loss: 2.175 | test avg. loss: 62.452
Epochs: 1337 | epoch avg. loss: 2.560 | test avg. loss: 61.927
Epochs: 1338 | epoch avg. loss: 2.510 | test avg. loss: 61.964


  3%|▎         | 1342/50000 [01:45<1:04:42, 12.53it/s]

Epochs: 1339 | epoch avg. loss: 2.201 | test avg. loss: 62.194
Epochs: 1340 | epoch avg. loss: 2.232 | test avg. loss: 62.467
Epochs: 1341 | epoch avg. loss: 2.185 | test avg. loss: 62.688


  3%|▎         | 1344/50000 [01:45<1:02:36, 12.95it/s]

Epochs: 1342 | epoch avg. loss: 2.199 | test avg. loss: 62.820
Epochs: 1343 | epoch avg. loss: 2.162 | test avg. loss: 62.810
Epochs: 1344 | epoch avg. loss: 2.220 | test avg. loss: 62.756


  3%|▎         | 1348/50000 [01:45<1:09:12, 11.72it/s]

Epochs: 1345 | epoch avg. loss: 2.422 | test avg. loss: 62.716
Epochs: 1346 | epoch avg. loss: 2.520 | test avg. loss: 62.570
Epochs: 1347 | epoch avg. loss: 2.685 | test avg. loss: 62.738


  3%|▎         | 1350/50000 [01:45<1:10:58, 11.42it/s]

Epochs: 1348 | epoch avg. loss: 3.432 | test avg. loss: 62.864
Epochs: 1349 | epoch avg. loss: 3.034 | test avg. loss: 63.079
Epochs: 1350 | epoch avg. loss: 2.293 | test avg. loss: 66.421


  3%|▎         | 1354/50000 [01:46<1:12:18, 11.21it/s]

Epochs: 1351 | epoch avg. loss: 4.777 | test avg. loss: 63.884
Epochs: 1352 | epoch avg. loss: 3.621 | test avg. loss: 63.379
Epochs: 1353 | epoch avg. loss: 2.881 | test avg. loss: 62.941


  3%|▎         | 1356/50000 [01:46<1:09:55, 11.59it/s]

Epochs: 1354 | epoch avg. loss: 3.473 | test avg. loss: 63.374
Epochs: 1355 | epoch avg. loss: 2.867 | test avg. loss: 63.709
Epochs: 1356 | epoch avg. loss: 3.365 | test avg. loss: 64.131


  3%|▎         | 1360/50000 [01:46<1:08:30, 11.83it/s]

Epochs: 1357 | epoch avg. loss: 2.710 | test avg. loss: 65.146
Epochs: 1358 | epoch avg. loss: 3.291 | test avg. loss: 65.175
Epochs: 1359 | epoch avg. loss: 3.527 | test avg. loss: 64.346


  3%|▎         | 1362/50000 [01:46<1:07:25, 12.02it/s]

Epochs: 1360 | epoch avg. loss: 2.487 | test avg. loss: 63.418
Epochs: 1361 | epoch avg. loss: 2.684 | test avg. loss: 62.981
Epochs: 1362 | epoch avg. loss: 2.363 | test avg. loss: 62.673


  3%|▎         | 1366/50000 [01:47<1:10:07, 11.56it/s]

Epochs: 1363 | epoch avg. loss: 2.311 | test avg. loss: 62.869
Epochs: 1364 | epoch avg. loss: 2.548 | test avg. loss: 64.062
Epochs: 1365 | epoch avg. loss: 3.287 | test avg. loss: 64.034


  3%|▎         | 1368/50000 [01:47<1:05:58, 12.28it/s]

Epochs: 1366 | epoch avg. loss: 3.843 | test avg. loss: 64.105
Epochs: 1367 | epoch avg. loss: 4.880 | test avg. loss: 63.837
Epochs: 1368 | epoch avg. loss: 4.644 | test avg. loss: 62.786


  3%|▎         | 1372/50000 [01:47<1:02:01, 13.07it/s]

Epochs: 1369 | epoch avg. loss: 3.042 | test avg. loss: 64.273
Epochs: 1370 | epoch avg. loss: 4.587 | test avg. loss: 63.473
Epochs: 1371 | epoch avg. loss: 3.950 | test avg. loss: 65.498


  3%|▎         | 1374/50000 [01:47<1:00:09, 13.47it/s]

Epochs: 1372 | epoch avg. loss: 4.467 | test avg. loss: 63.527
Epochs: 1373 | epoch avg. loss: 4.688 | test avg. loss: 65.184
Epochs: 1374 | epoch avg. loss: 5.438 | test avg. loss: 62.844


  3%|▎         | 1376/50000 [01:48<1:03:17, 12.80it/s]

Epochs: 1375 | epoch avg. loss: 4.468 | test avg. loss: 63.302
Epochs: 1376 | epoch avg. loss: 4.580 | test avg. loss: 64.206


  3%|▎         | 1380/50000 [01:48<1:06:31, 12.18it/s]

Epochs: 1377 | epoch avg. loss: 3.888 | test avg. loss: 64.538
Epochs: 1378 | epoch avg. loss: 2.812 | test avg. loss: 66.950
Epochs: 1379 | epoch avg. loss: 4.424 | test avg. loss: 65.179




Epochs: 1380 | epoch avg. loss: 4.956 | test avg. loss: 65.191
Epochs: 1381 | epoch avg. loss: 4.836 | test avg. loss: 63.562
Epochs: 1382 | epoch avg. loss: 3.815 | test avg. loss: 63.835


  3%|▎         | 1386/50000 [01:48<57:19, 14.14it/s]

Epochs: 1383 | epoch avg. loss: 3.842 | test avg. loss: 65.308
Epochs: 1384 | epoch avg. loss: 2.982 | test avg. loss: 66.247
Epochs: 1385 | epoch avg. loss: 4.369 | test avg. loss: 66.122
Epochs: 1386 | epoch avg. loss: 4.522 | test avg. loss: 64.288


  3%|▎         | 1388/50000 [01:48<56:11, 14.42it/s]

Epochs: 1387 | epoch avg. loss: 5.051 | test avg. loss: 62.514
Epochs: 1388 | epoch avg. loss: 3.400 | test avg. loss: 62.552


  3%|▎         | 1392/50000 [01:49<1:06:36, 12.16it/s]

Epochs: 1389 | epoch avg. loss: 3.386 | test avg. loss: 63.218
Epochs: 1390 | epoch avg. loss: 2.972 | test avg. loss: 65.612
Epochs: 1391 | epoch avg. loss: 3.768 | test avg. loss: 64.949


  3%|▎         | 1394/50000 [01:49<1:07:01, 12.09it/s]

Epochs: 1392 | epoch avg. loss: 3.521 | test avg. loss: 64.672
Epochs: 1393 | epoch avg. loss: 3.458 | test avg. loss: 63.219
Epochs: 1394 | epoch avg. loss: 2.832 | test avg. loss: 62.865


  3%|▎         | 1398/50000 [01:49<1:04:39, 12.53it/s]

Epochs: 1395 | epoch avg. loss: 3.079 | test avg. loss: 62.898
Epochs: 1396 | epoch avg. loss: 2.208 | test avg. loss: 63.370
Epochs: 1397 | epoch avg. loss: 3.008 | test avg. loss: 63.375


  3%|▎         | 1398/50000 [01:49<1:04:39, 12.53it/s]

Epochs: 1398 | epoch avg. loss: 2.373 | test avg. loss: 63.043
Epochs: 1399 | epoch avg. loss: 1.983 | test avg. loss: 63.839


  3%|▎         | 1402/50000 [01:51<2:41:02,  5.03it/s]

Epochs: 1400 | epoch avg. loss: 2.241 | test avg. loss: 63.856
Epochs: 1401 | epoch avg. loss: 2.262 | test avg. loss: 63.575
Epochs: 1402 | epoch avg. loss: 2.029 | test avg. loss: 63.303


  3%|▎         | 1406/50000 [01:51<1:50:45,  7.31it/s]

Epochs: 1403 | epoch avg. loss: 1.814 | test avg. loss: 63.191
Epochs: 1404 | epoch avg. loss: 2.229 | test avg. loss: 63.329
Epochs: 1405 | epoch avg. loss: 2.650 | test avg. loss: 63.428
Epochs: 1406 | epoch avg. loss: 3.189 | test avg. loss: 62.839


  3%|▎         | 1410/50000 [01:51<1:20:08, 10.10it/s]

Epochs: 1407 | epoch avg. loss: 2.741 | test avg. loss: 62.872
Epochs: 1408 | epoch avg. loss: 2.347 | test avg. loss: 63.009
Epochs: 1409 | epoch avg. loss: 2.810 | test avg. loss: 63.969
Epochs: 1410 | epoch avg. loss: 2.900 | test avg. loss: 63.892


  3%|▎         | 1414/50000 [01:51<1:09:21, 11.68it/s]

Epochs: 1411 | epoch avg. loss: 3.006 | test avg. loss: 64.552
Epochs: 1412 | epoch avg. loss: 2.929 | test avg. loss: 64.187
Epochs: 1413 | epoch avg. loss: 3.836 | test avg. loss: 63.119


  3%|▎         | 1418/50000 [01:52<59:55, 13.51it/s]  

Epochs: 1414 | epoch avg. loss: 3.075 | test avg. loss: 62.138
Epochs: 1415 | epoch avg. loss: 2.358 | test avg. loss: 62.334
Epochs: 1416 | epoch avg. loss: 2.190 | test avg. loss: 64.374
Epochs: 1417 | epoch avg. loss: 3.220 | test avg. loss: 64.085


  3%|▎         | 1422/50000 [01:52<56:09, 14.42it/s]

Epochs: 1418 | epoch avg. loss: 3.322 | test avg. loss: 62.834
Epochs: 1419 | epoch avg. loss: 1.961 | test avg. loss: 62.514
Epochs: 1420 | epoch avg. loss: 1.797 | test avg. loss: 62.507
Epochs: 1421 | epoch avg. loss: 1.819 | test avg. loss: 62.795


  3%|▎         | 1426/50000 [01:52<53:35, 15.11it/s]

Epochs: 1422 | epoch avg. loss: 1.948 | test avg. loss: 62.826
Epochs: 1423 | epoch avg. loss: 1.759 | test avg. loss: 62.918
Epochs: 1424 | epoch avg. loss: 1.746 | test avg. loss: 63.164
Epochs: 1425 | epoch avg. loss: 1.784 | test avg. loss: 63.195


  3%|▎         | 1428/50000 [01:53<57:17, 14.13it/s]

Epochs: 1426 | epoch avg. loss: 1.871 | test avg. loss: 63.276
Epochs: 1427 | epoch avg. loss: 1.728 | test avg. loss: 63.550
Epochs: 1428 | epoch avg. loss: 1.900 | test avg. loss: 63.306


  3%|▎         | 1432/50000 [01:53<1:00:04, 13.48it/s]

Epochs: 1429 | epoch avg. loss: 2.040 | test avg. loss: 62.951
Epochs: 1430 | epoch avg. loss: 1.682 | test avg. loss: 62.792
Epochs: 1431 | epoch avg. loss: 1.646 | test avg. loss: 62.794


  3%|▎         | 1436/50000 [01:53<56:11, 14.41it/s]

Epochs: 1432 | epoch avg. loss: 1.851 | test avg. loss: 62.832
Epochs: 1433 | epoch avg. loss: 1.676 | test avg. loss: 62.863
Epochs: 1434 | epoch avg. loss: 1.678 | test avg. loss: 62.939
Epochs: 1435 | epoch avg. loss: 1.835 | test avg. loss: 62.952


  3%|▎         | 1440/50000 [01:53<53:06, 15.24it/s]

Epochs: 1436 | epoch avg. loss: 1.623 | test avg. loss: 63.030
Epochs: 1437 | epoch avg. loss: 1.811 | test avg. loss: 62.851
Epochs: 1438 | epoch avg. loss: 1.729 | test avg. loss: 62.966
Epochs: 1439 | epoch avg. loss: 1.629 | test avg. loss: 62.876


  3%|▎         | 1442/50000 [01:53<56:12, 14.40it/s]

Epochs: 1440 | epoch avg. loss: 1.748 | test avg. loss: 62.762
Epochs: 1441 | epoch avg. loss: 1.554 | test avg. loss: 62.820
Epochs: 1442 | epoch avg. loss: 1.815 | test avg. loss: 62.663


  3%|▎         | 1446/50000 [01:54<55:03, 14.70it/s]

Epochs: 1443 | epoch avg. loss: 1.884 | test avg. loss: 62.767
Epochs: 1444 | epoch avg. loss: 1.677 | test avg. loss: 62.776
Epochs: 1445 | epoch avg. loss: 1.502 | test avg. loss: 63.013
Epochs: 1446 | epoch avg. loss: 1.727 | test avg. loss: 63.154


  3%|▎         | 1450/50000 [01:54<54:29, 14.85it/s]

Epochs: 1447 | epoch avg. loss: 1.775 | test avg. loss: 62.668
Epochs: 1448 | epoch avg. loss: 1.721 | test avg. loss: 62.521
Epochs: 1449 | epoch avg. loss: 1.687 | test avg. loss: 62.503


  3%|▎         | 1452/50000 [01:54<54:08, 14.94it/s]

Epochs: 1450 | epoch avg. loss: 1.649 | test avg. loss: 62.520
Epochs: 1451 | epoch avg. loss: 1.580 | test avg. loss: 62.746
Epochs: 1452 | epoch avg. loss: 1.569 | test avg. loss: 62.921
Epochs: 1453 | epoch avg. loss: 2.098 | test avg. loss: 62.611


  3%|▎         | 1456/50000 [01:54<53:48, 15.03it/s]

Epochs: 1454 | epoch avg. loss: 1.520 | test avg. loss: 62.382
Epochs: 1455 | epoch avg. loss: 1.525 | test avg. loss: 62.210
Epochs: 1456 | epoch avg. loss: 1.510 | test avg. loss: 62.103


  3%|▎         | 1460/50000 [01:55<58:36, 13.80it/s]

Epochs: 1457 | epoch avg. loss: 1.592 | test avg. loss: 62.183
Epochs: 1458 | epoch avg. loss: 1.517 | test avg. loss: 62.376
Epochs: 1459 | epoch avg. loss: 1.561 | test avg. loss: 62.620


  3%|▎         | 1464/50000 [01:55<56:06, 14.42it/s]

Epochs: 1460 | epoch avg. loss: 1.615 | test avg. loss: 62.722
Epochs: 1461 | epoch avg. loss: 1.548 | test avg. loss: 62.771
Epochs: 1462 | epoch avg. loss: 1.524 | test avg. loss: 62.707
Epochs: 1463 | epoch avg. loss: 1.465 | test avg. loss: 63.351


  3%|▎         | 1468/50000 [01:55<55:24, 14.60it/s]

Epochs: 1464 | epoch avg. loss: 1.946 | test avg. loss: 62.930
Epochs: 1465 | epoch avg. loss: 2.082 | test avg. loss: 62.902
Epochs: 1466 | epoch avg. loss: 2.025 | test avg. loss: 62.730
Epochs: 1467 | epoch avg. loss: 2.825 | test avg. loss: 63.048


  3%|▎         | 1470/50000 [01:55<55:55, 14.46it/s]

Epochs: 1468 | epoch avg. loss: 3.149 | test avg. loss: 62.671
Epochs: 1469 | epoch avg. loss: 2.363 | test avg. loss: 62.676
Epochs: 1470 | epoch avg. loss: 2.382 | test avg. loss: 63.399




Epochs: 1471 | epoch avg. loss: 1.930 | test avg. loss: 63.169
Epochs: 1472 | epoch avg. loss: 2.187 | test avg. loss: 64.341
Epochs: 1473 | epoch avg. loss: 2.567 | test avg. loss: 63.496


  3%|▎         | 1478/50000 [01:56<56:14, 14.38it/s]

Epochs: 1474 | epoch avg. loss: 2.480 | test avg. loss: 63.276
Epochs: 1475 | epoch avg. loss: 1.588 | test avg. loss: 63.329
Epochs: 1476 | epoch avg. loss: 1.654 | test avg. loss: 63.223
Epochs: 1477 | epoch avg. loss: 1.674 | test avg. loss: 63.109


  3%|▎         | 1480/50000 [01:56<57:52, 13.97it/s]

Epochs: 1478 | epoch avg. loss: 1.427 | test avg. loss: 62.836
Epochs: 1479 | epoch avg. loss: 1.588 | test avg. loss: 63.267
Epochs: 1480 | epoch avg. loss: 1.980 | test avg. loss: 62.531


  3%|▎         | 1484/50000 [01:56<57:08, 14.15it/s]

Epochs: 1481 | epoch avg. loss: 1.884 | test avg. loss: 62.620
Epochs: 1482 | epoch avg. loss: 1.551 | test avg. loss: 62.923
Epochs: 1483 | epoch avg. loss: 1.547 | test avg. loss: 62.920


  3%|▎         | 1486/50000 [01:57<56:59, 14.19it/s]

Epochs: 1484 | epoch avg. loss: 1.518 | test avg. loss: 63.571
Epochs: 1485 | epoch avg. loss: 1.836 | test avg. loss: 63.131
Epochs: 1486 | epoch avg. loss: 1.487 | test avg. loss: 63.118


  3%|▎         | 1490/50000 [01:57<56:16, 14.37it/s]

Epochs: 1487 | epoch avg. loss: 1.414 | test avg. loss: 62.999
Epochs: 1488 | epoch avg. loss: 1.425 | test avg. loss: 62.769
Epochs: 1489 | epoch avg. loss: 1.396 | test avg. loss: 62.703
Epochs: 1490 | epoch avg. loss: 1.487 | test avg. loss: 62.698


  3%|▎         | 1494/50000 [01:57<55:33, 14.55it/s]

Epochs: 1491 | epoch avg. loss: 1.611 | test avg. loss: 62.602
Epochs: 1492 | epoch avg. loss: 1.391 | test avg. loss: 62.698
Epochs: 1493 | epoch avg. loss: 1.626 | test avg. loss: 62.723


  3%|▎         | 1498/50000 [01:57<53:37, 15.08it/s]

Epochs: 1494 | epoch avg. loss: 1.443 | test avg. loss: 62.895
Epochs: 1495 | epoch avg. loss: 1.473 | test avg. loss: 62.951
Epochs: 1496 | epoch avg. loss: 1.529 | test avg. loss: 62.968
Epochs: 1497 | epoch avg. loss: 1.472 | test avg. loss: 62.804


  3%|▎         | 1498/50000 [01:57<53:37, 15.08it/s]

Epochs: 1498 | epoch avg. loss: 1.394 | test avg. loss: 62.888
Epochs: 1499 | epoch avg. loss: 1.552 | test avg. loss: 62.724


  3%|▎         | 1504/50000 [01:58<1:36:57,  8.34it/s]

Epochs: 1500 | epoch avg. loss: 1.373 | test avg. loss: 63.058
Epochs: 1501 | epoch avg. loss: 1.472 | test avg. loss: 62.982
Epochs: 1502 | epoch avg. loss: 1.366 | test avg. loss: 63.100
Epochs: 1503 | epoch avg. loss: 1.384 | test avg. loss: 63.084


  3%|▎         | 1508/50000 [01:59<1:13:18, 11.02it/s]

Epochs: 1504 | epoch avg. loss: 1.430 | test avg. loss: 63.127
Epochs: 1505 | epoch avg. loss: 1.438 | test avg. loss: 63.076
Epochs: 1506 | epoch avg. loss: 1.441 | test avg. loss: 62.940
Epochs: 1507 | epoch avg. loss: 1.375 | test avg. loss: 62.822


  3%|▎         | 1512/50000 [01:59<1:02:43, 12.89it/s]

Epochs: 1508 | epoch avg. loss: 1.335 | test avg. loss: 62.841
Epochs: 1509 | epoch avg. loss: 1.697 | test avg. loss: 62.704
Epochs: 1510 | epoch avg. loss: 1.375 | test avg. loss: 62.751
Epochs: 1511 | epoch avg. loss: 1.534 | test avg. loss: 62.810


                                                    

Epochs: 1512 | epoch avg. loss: 1.427 | test avg. loss: 62.906
Epochs: 1513 | epoch avg. loss: 1.392 | test avg. loss: 63.032
Epochs: 1514 | epoch avg. loss: 1.543 | test avg. loss: 63.187


  3%|▎         | 1518/50000 [01:59<58:28, 13.82it/s]

Epochs: 1515 | epoch avg. loss: 1.460 | test avg. loss: 63.207
Epochs: 1516 | epoch avg. loss: 1.427 | test avg. loss: 63.127
Epochs: 1517 | epoch avg. loss: 1.337 | test avg. loss: 62.975


  3%|▎         | 1520/50000 [02:00<1:00:02, 13.46it/s]

Epochs: 1518 | epoch avg. loss: 1.415 | test avg. loss: 62.916
Epochs: 1519 | epoch avg. loss: 1.401 | test avg. loss: 62.993
Epochs: 1520 | epoch avg. loss: 1.755 | test avg. loss: 63.445


  3%|▎         | 1524/50000 [02:00<59:24, 13.60it/s]  

Epochs: 1521 | epoch avg. loss: 1.992 | test avg. loss: 63.159
Epochs: 1522 | epoch avg. loss: 2.427 | test avg. loss: 63.894
Epochs: 1523 | epoch avg. loss: 1.992 | test avg. loss: 63.683


  3%|▎         | 1528/50000 [02:00<55:39, 14.52it/s]

Epochs: 1524 | epoch avg. loss: 1.425 | test avg. loss: 64.534
Epochs: 1525 | epoch avg. loss: 2.785 | test avg. loss: 63.785
Epochs: 1526 | epoch avg. loss: 1.819 | test avg. loss: 63.116
Epochs: 1527 | epoch avg. loss: 1.347 | test avg. loss: 63.401


  3%|▎         | 1530/50000 [02:00<56:36, 14.27it/s]

Epochs: 1528 | epoch avg. loss: 1.635 | test avg. loss: 62.836
Epochs: 1529 | epoch avg. loss: 1.543 | test avg. loss: 62.974
Epochs: 1530 | epoch avg. loss: 1.520 | test avg. loss: 62.891
Epochs: 1531 | epoch avg. loss: 1.413 | test avg. loss: 62.927


  3%|▎         | 1534/50000 [02:01<57:52, 13.96it/s]

Epochs: 1532 | epoch avg. loss: 1.287 | test avg. loss: 63.173
Epochs: 1533 | epoch avg. loss: 1.473 | test avg. loss: 62.936
Epochs: 1534 | epoch avg. loss: 1.416 | test avg. loss: 62.978
Epochs: 1535 | epoch avg. loss: 1.455 | test avg. loss: 63.107


  3%|▎         | 1538/50000 [02:01<55:39, 14.51it/s]

Epochs: 1536 | epoch avg. loss: 1.410 | test avg. loss: 62.982
Epochs: 1537 | epoch avg. loss: 1.338 | test avg. loss: 63.039
Epochs: 1538 | epoch avg. loss: 1.333 | test avg. loss: 63.212


  3%|▎         | 1542/50000 [02:01<58:33, 13.79it/s]

Epochs: 1539 | epoch avg. loss: 1.310 | test avg. loss: 63.333
Epochs: 1540 | epoch avg. loss: 1.645 | test avg. loss: 64.218
Epochs: 1541 | epoch avg. loss: 2.210 | test avg. loss: 63.616


  3%|▎         | 1544/50000 [02:01<1:01:28, 13.14it/s]

Epochs: 1542 | epoch avg. loss: 1.948 | test avg. loss: 63.299
Epochs: 1543 | epoch avg. loss: 1.583 | test avg. loss: 63.320
Epochs: 1544 | epoch avg. loss: 2.059 | test avg. loss: 63.896


  3%|▎         | 1548/50000 [02:02<1:04:19, 12.55it/s]

Epochs: 1545 | epoch avg. loss: 3.166 | test avg. loss: 64.993
Epochs: 1546 | epoch avg. loss: 3.904 | test avg. loss: 63.634
Epochs: 1547 | epoch avg. loss: 2.762 | test avg. loss: 63.182


  3%|▎         | 1550/50000 [02:02<1:04:40, 12.49it/s]

Epochs: 1548 | epoch avg. loss: 2.600 | test avg. loss: 63.020
Epochs: 1549 | epoch avg. loss: 2.259 | test avg. loss: 62.929
Epochs: 1550 | epoch avg. loss: 2.426 | test avg. loss: 62.603


  3%|▎         | 1554/50000 [02:02<1:05:26, 12.34it/s]

Epochs: 1551 | epoch avg. loss: 1.459 | test avg. loss: 63.204
Epochs: 1552 | epoch avg. loss: 2.227 | test avg. loss: 62.847
Epochs: 1553 | epoch avg. loss: 2.097 | test avg. loss: 63.549


  3%|▎         | 1556/50000 [02:02<1:02:43, 12.87it/s]

Epochs: 1554 | epoch avg. loss: 1.759 | test avg. loss: 63.389
Epochs: 1555 | epoch avg. loss: 2.148 | test avg. loss: 64.245
Epochs: 1556 | epoch avg. loss: 2.219 | test avg. loss: 63.428


  3%|▎         | 1560/50000 [02:03<1:02:54, 12.83it/s]

Epochs: 1557 | epoch avg. loss: 1.822 | test avg. loss: 63.977
Epochs: 1558 | epoch avg. loss: 2.012 | test avg. loss: 63.892
Epochs: 1559 | epoch avg. loss: 2.033 | test avg. loss: 63.910


  3%|▎         | 1562/50000 [02:03<1:08:55, 11.71it/s]

Epochs: 1560 | epoch avg. loss: 1.923 | test avg. loss: 64.127
Epochs: 1561 | epoch avg. loss: 1.862 | test avg. loss: 63.735
Epochs: 1562 | epoch avg. loss: 2.170 | test avg. loss: 64.174


  3%|▎         | 1566/50000 [02:03<1:11:01, 11.37it/s]

Epochs: 1563 | epoch avg. loss: 2.017 | test avg. loss: 63.541
Epochs: 1564 | epoch avg. loss: 1.731 | test avg. loss: 63.782
Epochs: 1565 | epoch avg. loss: 1.824 | test avg. loss: 63.531


  3%|▎         | 1568/50000 [02:03<1:10:02, 11.52it/s]

Epochs: 1566 | epoch avg. loss: 1.827 | test avg. loss: 63.710
Epochs: 1567 | epoch avg. loss: 1.580 | test avg. loss: 63.502
Epochs: 1568 | epoch avg. loss: 1.494 | test avg. loss: 63.602


  3%|▎         | 1572/50000 [02:04<1:12:50, 11.08it/s]

Epochs: 1569 | epoch avg. loss: 1.287 | test avg. loss: 63.591
Epochs: 1570 | epoch avg. loss: 1.585 | test avg. loss: 63.480
Epochs: 1571 | epoch avg. loss: 1.408 | test avg. loss: 63.418


  3%|▎         | 1574/50000 [02:04<1:07:21, 11.98it/s]

Epochs: 1572 | epoch avg. loss: 1.401 | test avg. loss: 63.382
Epochs: 1573 | epoch avg. loss: 1.253 | test avg. loss: 63.517
Epochs: 1574 | epoch avg. loss: 1.477 | test avg. loss: 63.864


  3%|▎         | 1578/50000 [02:04<1:07:28, 11.96it/s]

Epochs: 1575 | epoch avg. loss: 1.352 | test avg. loss: 63.841
Epochs: 1576 | epoch avg. loss: 1.627 | test avg. loss: 64.885
Epochs: 1577 | epoch avg. loss: 2.201 | test avg. loss: 63.736


  3%|▎         | 1580/50000 [02:04<1:06:53, 12.06it/s]

Epochs: 1578 | epoch avg. loss: 1.448 | test avg. loss: 63.738
Epochs: 1579 | epoch avg. loss: 1.239 | test avg. loss: 63.970
Epochs: 1580 | epoch avg. loss: 1.318 | test avg. loss: 63.866


  3%|▎         | 1584/50000 [02:05<1:12:54, 11.07it/s]

Epochs: 1581 | epoch avg. loss: 1.538 | test avg. loss: 64.690
Epochs: 1582 | epoch avg. loss: 2.037 | test avg. loss: 63.794
Epochs: 1583 | epoch avg. loss: 1.316 | test avg. loss: 63.945


  3%|▎         | 1586/50000 [02:05<1:12:00, 11.21it/s]

Epochs: 1584 | epoch avg. loss: 1.318 | test avg. loss: 63.808
Epochs: 1585 | epoch avg. loss: 1.294 | test avg. loss: 63.926
Epochs: 1586 | epoch avg. loss: 1.584 | test avg. loss: 63.760


  3%|▎         | 1590/50000 [02:05<1:10:46, 11.40it/s]

Epochs: 1587 | epoch avg. loss: 1.268 | test avg. loss: 63.654
Epochs: 1588 | epoch avg. loss: 1.603 | test avg. loss: 63.563
Epochs: 1589 | epoch avg. loss: 1.276 | test avg. loss: 63.515


  3%|▎         | 1592/50000 [02:05<1:08:05, 11.85it/s]

Epochs: 1590 | epoch avg. loss: 1.241 | test avg. loss: 63.851
Epochs: 1591 | epoch avg. loss: 1.406 | test avg. loss: 63.734
Epochs: 1592 | epoch avg. loss: 1.460 | test avg. loss: 63.615


  3%|▎         | 1596/50000 [02:06<1:12:19, 11.15it/s]

Epochs: 1593 | epoch avg. loss: 1.341 | test avg. loss: 63.807
Epochs: 1594 | epoch avg. loss: 1.440 | test avg. loss: 63.369
Epochs: 1595 | epoch avg. loss: 1.316 | test avg. loss: 63.222


  3%|▎         | 1598/50000 [02:06<1:06:59, 12.04it/s]

Epochs: 1596 | epoch avg. loss: 1.369 | test avg. loss: 63.567
Epochs: 1597 | epoch avg. loss: 1.607 | test avg. loss: 63.133
Epochs: 1598 | epoch avg. loss: 1.850 | test avg. loss: 63.650


  3%|▎         | 1598/50000 [02:06<1:06:59, 12.04it/s]

Epochs: 1599 | epoch avg. loss: 2.201 | test avg. loss: 63.306


  3%|▎         | 1602/50000 [02:07<2:47:39,  4.81it/s]

Epochs: 1600 | epoch avg. loss: 1.702 | test avg. loss: 63.289
Epochs: 1601 | epoch avg. loss: 1.547 | test avg. loss: 63.348
Epochs: 1602 | epoch avg. loss: 1.166 | test avg. loss: 63.484


  3%|▎         | 1606/50000 [02:08<1:59:52,  6.73it/s]

Epochs: 1603 | epoch avg. loss: 1.500 | test avg. loss: 63.839
Epochs: 1604 | epoch avg. loss: 1.645 | test avg. loss: 63.478
Epochs: 1605 | epoch avg. loss: 2.158 | test avg. loss: 64.970


  3%|▎         | 1608/50000 [02:08<1:45:56,  7.61it/s]

Epochs: 1606 | epoch avg. loss: 2.999 | test avg. loss: 63.261
Epochs: 1607 | epoch avg. loss: 2.121 | test avg. loss: 63.140
Epochs: 1608 | epoch avg. loss: 2.149 | test avg. loss: 63.081


  3%|▎         | 1612/50000 [02:08<1:24:23,  9.56it/s]

Epochs: 1609 | epoch avg. loss: 1.941 | test avg. loss: 63.206
Epochs: 1610 | epoch avg. loss: 1.949 | test avg. loss: 63.560
Epochs: 1611 | epoch avg. loss: 2.313 | test avg. loss: 63.474


  3%|▎         | 1614/50000 [02:08<1:17:11, 10.45it/s]

Epochs: 1612 | epoch avg. loss: 1.549 | test avg. loss: 64.627
Epochs: 1613 | epoch avg. loss: 1.831 | test avg. loss: 64.322
Epochs: 1614 | epoch avg. loss: 2.180 | test avg. loss: 64.027


  3%|▎         | 1618/50000 [02:09<1:16:43, 10.51it/s]

Epochs: 1615 | epoch avg. loss: 1.841 | test avg. loss: 63.455
Epochs: 1616 | epoch avg. loss: 1.714 | test avg. loss: 63.265
Epochs: 1617 | epoch avg. loss: 1.193 | test avg. loss: 63.245


  3%|▎         | 1620/50000 [02:09<1:11:12, 11.32it/s]

Epochs: 1618 | epoch avg. loss: 1.331 | test avg. loss: 63.286
Epochs: 1619 | epoch avg. loss: 1.206 | test avg. loss: 63.197
Epochs: 1620 | epoch avg. loss: 1.313 | test avg. loss: 63.222


  3%|▎         | 1624/50000 [02:09<1:09:34, 11.59it/s]

Epochs: 1621 | epoch avg. loss: 1.237 | test avg. loss: 63.169
Epochs: 1622 | epoch avg. loss: 1.364 | test avg. loss: 63.446
Epochs: 1623 | epoch avg. loss: 1.515 | test avg. loss: 63.489


  3%|▎         | 1626/50000 [02:10<1:11:03, 11.34it/s]

Epochs: 1624 | epoch avg. loss: 1.401 | test avg. loss: 63.360
Epochs: 1625 | epoch avg. loss: 1.309 | test avg. loss: 63.330
Epochs: 1626 | epoch avg. loss: 1.717 | test avg. loss: 63.407


                                                      

Epochs: 1627 | epoch avg. loss: 1.850 | test avg. loss: 63.723
Epochs: 1628 | epoch avg. loss: 1.819 | test avg. loss: 63.117
Epochs: 1629 | epoch avg. loss: 1.558 | test avg. loss: 63.271


  3%|▎         | 1632/50000 [02:10<1:02:42, 12.86it/s]

Epochs: 1630 | epoch avg. loss: 1.227 | test avg. loss: 63.912
Epochs: 1631 | epoch avg. loss: 1.639 | test avg. loss: 63.657
Epochs: 1632 | epoch avg. loss: 1.762 | test avg. loss: 63.521


  3%|▎         | 1636/50000 [02:10<58:21, 13.81it/s]

Epochs: 1633 | epoch avg. loss: 1.217 | test avg. loss: 63.440
Epochs: 1634 | epoch avg. loss: 1.427 | test avg. loss: 63.228
Epochs: 1635 | epoch avg. loss: 1.421 | test avg. loss: 62.844
Epochs: 1636 | epoch avg. loss: 1.240 | test avg. loss: 62.869


  3%|▎         | 1640/50000 [02:10<59:49, 13.47it/s]

Epochs: 1637 | epoch avg. loss: 1.230 | test avg. loss: 62.909
Epochs: 1638 | epoch avg. loss: 1.172 | test avg. loss: 62.915
Epochs: 1639 | epoch avg. loss: 1.149 | test avg. loss: 62.956


  3%|▎         | 1642/50000 [02:11<1:01:01, 13.21it/s]

Epochs: 1640 | epoch avg. loss: 1.191 | test avg. loss: 62.980
Epochs: 1641 | epoch avg. loss: 1.158 | test avg. loss: 62.880
Epochs: 1642 | epoch avg. loss: 1.528 | test avg. loss: 64.369


  3%|▎         | 1646/50000 [02:11<57:36, 13.99it/s]

Epochs: 1643 | epoch avg. loss: 2.382 | test avg. loss: 63.521
Epochs: 1644 | epoch avg. loss: 2.013 | test avg. loss: 63.296
Epochs: 1645 | epoch avg. loss: 1.824 | test avg. loss: 62.925
Epochs: 1646 | epoch avg. loss: 1.632 | test avg. loss: 62.520


  3%|▎         | 1650/50000 [02:11<54:34, 14.77it/s]

Epochs: 1647 | epoch avg. loss: 1.202 | test avg. loss: 62.528
Epochs: 1648 | epoch avg. loss: 1.215 | test avg. loss: 62.539
Epochs: 1649 | epoch avg. loss: 1.211 | test avg. loss: 62.639
Epochs: 1650 | epoch avg. loss: 1.236 | test avg. loss: 62.562


  3%|▎         | 1654/50000 [02:11<56:59, 14.14it/s]

Epochs: 1651 | epoch avg. loss: 1.309 | test avg. loss: 62.590
Epochs: 1652 | epoch avg. loss: 1.152 | test avg. loss: 63.093
Epochs: 1653 | epoch avg. loss: 1.749 | test avg. loss: 62.721


  3%|▎         | 1656/50000 [02:12<59:01, 13.65it/s]

Epochs: 1654 | epoch avg. loss: 1.317 | test avg. loss: 62.438
Epochs: 1655 | epoch avg. loss: 1.309 | test avg. loss: 62.472
Epochs: 1656 | epoch avg. loss: 1.272 | test avg. loss: 62.267


  3%|▎         | 1660/50000 [02:12<55:44, 14.46it/s]

Epochs: 1657 | epoch avg. loss: 1.196 | test avg. loss: 62.590
Epochs: 1658 | epoch avg. loss: 1.320 | test avg. loss: 62.706
Epochs: 1659 | epoch avg. loss: 1.362 | test avg. loss: 62.741
Epochs: 1660 | epoch avg. loss: 1.210 | test avg. loss: 62.643




Epochs: 1661 | epoch avg. loss: 1.102 | test avg. loss: 62.700
Epochs: 1662 | epoch avg. loss: 1.217 | test avg. loss: 62.611
Epochs: 1663 | epoch avg. loss: 1.105 | test avg. loss: 62.524


  3%|▎         | 1666/50000 [02:12<54:33, 14.77it/s]

Epochs: 1664 | epoch avg. loss: 1.108 | test avg. loss: 62.814
Epochs: 1665 | epoch avg. loss: 1.298 | test avg. loss: 62.812
Epochs: 1666 | epoch avg. loss: 1.417 | test avg. loss: 62.584


  3%|▎         | 1670/50000 [02:13<56:51, 14.17it/s]

Epochs: 1667 | epoch avg. loss: 1.143 | test avg. loss: 62.538
Epochs: 1668 | epoch avg. loss: 1.153 | test avg. loss: 62.619
Epochs: 1669 | epoch avg. loss: 1.426 | test avg. loss: 63.262


  3%|▎         | 1672/50000 [02:13<59:05, 13.63it/s]

Epochs: 1670 | epoch avg. loss: 1.818 | test avg. loss: 63.327
Epochs: 1671 | epoch avg. loss: 1.745 | test avg. loss: 63.190
Epochs: 1672 | epoch avg. loss: 1.735 | test avg. loss: 62.740


  3%|▎         | 1676/50000 [02:13<56:17, 14.31it/s]

Epochs: 1673 | epoch avg. loss: 1.247 | test avg. loss: 62.624
Epochs: 1674 | epoch avg. loss: 1.103 | test avg. loss: 62.680
Epochs: 1675 | epoch avg. loss: 1.180 | test avg. loss: 62.813
Epochs: 1676 | epoch avg. loss: 1.164 | test avg. loss: 62.775


  3%|▎         | 1680/50000 [02:13<54:16, 14.84it/s]

Epochs: 1677 | epoch avg. loss: 1.304 | test avg. loss: 62.779
Epochs: 1678 | epoch avg. loss: 1.196 | test avg. loss: 62.629
Epochs: 1679 | epoch avg. loss: 1.303 | test avg. loss: 63.317
Epochs: 1680 | epoch avg. loss: 1.751 | test avg. loss: 62.718


  3%|▎         | 1684/50000 [02:14<58:00, 13.88it/s]

Epochs: 1681 | epoch avg. loss: 1.268 | test avg. loss: 62.740
Epochs: 1682 | epoch avg. loss: 1.158 | test avg. loss: 62.627
Epochs: 1683 | epoch avg. loss: 1.150 | test avg. loss: 62.605


  3%|▎         | 1686/50000 [02:14<58:32, 13.76it/s]

Epochs: 1684 | epoch avg. loss: 1.226 | test avg. loss: 62.568
Epochs: 1685 | epoch avg. loss: 1.656 | test avg. loss: 63.495
Epochs: 1686 | epoch avg. loss: 2.364 | test avg. loss: 63.511
Epochs: 1687 | epoch avg. loss: 2.635 | test avg. loss: 63.272


  3%|▎         | 1690/50000 [02:14<55:55, 14.40it/s]

Epochs: 1688 | epoch avg. loss: 2.216 | test avg. loss: 62.715
Epochs: 1689 | epoch avg. loss: 2.140 | test avg. loss: 63.159
Epochs: 1690 | epoch avg. loss: 2.499 | test avg. loss: 63.115
Epochs: 1691 | epoch avg. loss: 2.966 | test avg. loss: 64.194


  3%|▎         | 1696/50000 [02:14<55:56, 14.39it/s]

Epochs: 1692 | epoch avg. loss: 3.264 | test avg. loss: 63.127
Epochs: 1693 | epoch avg. loss: 2.130 | test avg. loss: 62.555
Epochs: 1694 | epoch avg. loss: 1.539 | test avg. loss: 62.509
Epochs: 1695 | epoch avg. loss: 1.371 | test avg. loss: 62.220


  3%|▎         | 1698/50000 [02:15<58:30, 13.76it/s]

Epochs: 1696 | epoch avg. loss: 1.064 | test avg. loss: 62.682
Epochs: 1697 | epoch avg. loss: 2.027 | test avg. loss: 61.981
Epochs: 1698 | epoch avg. loss: 1.268 | test avg. loss: 62.799


  3%|▎         | 1698/50000 [02:15<58:30, 13.76it/s]

Epochs: 1699 | epoch avg. loss: 1.929 | test avg. loss: 62.550


  3%|▎         | 1704/50000 [02:16<1:59:47,  6.72it/s]

Epochs: 1700 | epoch avg. loss: 1.508 | test avg. loss: 63.032
Epochs: 1701 | epoch avg. loss: 1.089 | test avg. loss: 63.588
Epochs: 1702 | epoch avg. loss: 1.496 | test avg. loss: 63.762
Epochs: 1703 | epoch avg. loss: 1.254 | test avg. loss: 63.758


  3%|▎         | 1708/50000 [02:16<1:25:50,  9.38it/s]

Epochs: 1704 | epoch avg. loss: 1.171 | test avg. loss: 63.742
Epochs: 1705 | epoch avg. loss: 1.163 | test avg. loss: 63.519
Epochs: 1706 | epoch avg. loss: 1.359 | test avg. loss: 63.287
Epochs: 1707 | epoch avg. loss: 1.238 | test avg. loss: 63.107


  3%|▎         | 1712/50000 [02:17<1:09:48, 11.53it/s]

Epochs: 1708 | epoch avg. loss: 1.040 | test avg. loss: 63.368
Epochs: 1709 | epoch avg. loss: 1.206 | test avg. loss: 63.370
Epochs: 1710 | epoch avg. loss: 1.110 | test avg. loss: 64.041
Epochs: 1711 | epoch avg. loss: 1.534 | test avg. loss: 64.687


  3%|▎         | 1716/50000 [02:17<59:51, 13.44it/s]  

Epochs: 1712 | epoch avg. loss: 2.293 | test avg. loss: 65.752
Epochs: 1713 | epoch avg. loss: 3.285 | test avg. loss: 65.863
Epochs: 1714 | epoch avg. loss: 4.589 | test avg. loss: 69.273
Epochs: 1715 | epoch avg. loss: 8.623 | test avg. loss: 73.950


  3%|▎         | 1720/50000 [02:17<55:12, 14.57it/s]

Epochs: 1716 | epoch avg. loss: 12.383 | test avg. loss: 65.629
Epochs: 1717 | epoch avg. loss: 4.834 | test avg. loss: 64.609
Epochs: 1718 | epoch avg. loss: 5.564 | test avg. loss: 65.633
Epochs: 1719 | epoch avg. loss: 5.076 | test avg. loss: 63.547


  3%|▎         | 1722/50000 [02:17<55:12, 14.58it/s]

Epochs: 1720 | epoch avg. loss: 4.421 | test avg. loss: 64.916
Epochs: 1721 | epoch avg. loss: 5.497 | test avg. loss: 65.188
Epochs: 1722 | epoch avg. loss: 3.602 | test avg. loss: 64.167


  3%|▎         | 1726/50000 [02:18<1:02:37, 12.85it/s]

Epochs: 1723 | epoch avg. loss: 2.397 | test avg. loss: 64.484
Epochs: 1724 | epoch avg. loss: 2.456 | test avg. loss: 65.356
Epochs: 1725 | epoch avg. loss: 3.264 | test avg. loss: 66.911


  3%|▎         | 1728/50000 [02:18<1:02:47, 12.81it/s]

Epochs: 1726 | epoch avg. loss: 3.769 | test avg. loss: 65.287
Epochs: 1727 | epoch avg. loss: 3.012 | test avg. loss: 66.013
Epochs: 1728 | epoch avg. loss: 2.212 | test avg. loss: 65.752


  3%|▎         | 1732/50000 [02:18<59:06, 13.61it/s]

Epochs: 1729 | epoch avg. loss: 2.933 | test avg. loss: 65.438
Epochs: 1730 | epoch avg. loss: 2.056 | test avg. loss: 65.378
Epochs: 1731 | epoch avg. loss: 2.579 | test avg. loss: 64.183
Epochs: 1732 | epoch avg. loss: 1.462 | test avg. loss: 63.682


  3%|▎         | 1736/50000 [02:18<55:34, 14.47it/s]

Epochs: 1733 | epoch avg. loss: 1.387 | test avg. loss: 63.364
Epochs: 1734 | epoch avg. loss: 1.096 | test avg. loss: 63.160
Epochs: 1735 | epoch avg. loss: 1.165 | test avg. loss: 63.233


  3%|▎         | 1738/50000 [02:18<58:39, 13.71it/s]

Epochs: 1736 | epoch avg. loss: 1.077 | test avg. loss: 63.350
Epochs: 1737 | epoch avg. loss: 1.090 | test avg. loss: 63.402
Epochs: 1738 | epoch avg. loss: 1.145 | test avg. loss: 63.609


  3%|▎         | 1742/50000 [02:19<57:07, 14.08it/s]

Epochs: 1739 | epoch avg. loss: 1.257 | test avg. loss: 63.532
Epochs: 1740 | epoch avg. loss: 1.226 | test avg. loss: 63.504
Epochs: 1741 | epoch avg. loss: 1.431 | test avg. loss: 63.818
Epochs: 1742 | epoch avg. loss: 1.431 | test avg. loss: 63.184


  3%|▎         | 1746/50000 [02:19<53:46, 14.96it/s]

Epochs: 1743 | epoch avg. loss: 1.117 | test avg. loss: 63.097
Epochs: 1744 | epoch avg. loss: 1.246 | test avg. loss: 63.118
Epochs: 1745 | epoch avg. loss: 1.385 | test avg. loss: 63.844
Epochs: 1746 | epoch avg. loss: 1.763 | test avg. loss: 63.740


  4%|▎         | 1750/50000 [02:19<52:37, 15.28it/s]

Epochs: 1747 | epoch avg. loss: 2.463 | test avg. loss: 66.019
Epochs: 1748 | epoch avg. loss: 3.577 | test avg. loss: 64.391
Epochs: 1749 | epoch avg. loss: 2.824 | test avg. loss: 64.553
Epochs: 1750 | epoch avg. loss: 2.581 | test avg. loss: 63.915


  4%|▎         | 1754/50000 [02:20<58:48, 13.67it/s]

Epochs: 1751 | epoch avg. loss: 2.040 | test avg. loss: 63.526
Epochs: 1752 | epoch avg. loss: 1.991 | test avg. loss: 63.560
Epochs: 1753 | epoch avg. loss: 1.572 | test avg. loss: 63.027


  4%|▎         | 1756/50000 [02:20<1:00:57, 13.19it/s]

Epochs: 1754 | epoch avg. loss: 1.820 | test avg. loss: 62.864
Epochs: 1755 | epoch avg. loss: 1.616 | test avg. loss: 62.759
Epochs: 1756 | epoch avg. loss: 1.178 | test avg. loss: 62.924


  4%|▎         | 1760/50000 [02:20<1:00:32, 13.28it/s]

Epochs: 1757 | epoch avg. loss: 1.141 | test avg. loss: 63.003
Epochs: 1758 | epoch avg. loss: 1.270 | test avg. loss: 63.316
Epochs: 1759 | epoch avg. loss: 1.110 | test avg. loss: 63.509


  4%|▎         | 1762/50000 [02:20<1:01:00, 13.18it/s]

Epochs: 1760 | epoch avg. loss: 0.991 | test avg. loss: 65.116
Epochs: 1761 | epoch avg. loss: 2.438 | test avg. loss: 63.514
Epochs: 1762 | epoch avg. loss: 1.289 | test avg. loss: 63.377


  4%|▎         | 1766/50000 [02:20<1:04:48, 12.40it/s]

Epochs: 1763 | epoch avg. loss: 1.115 | test avg. loss: 63.087
Epochs: 1764 | epoch avg. loss: 1.020 | test avg. loss: 63.459
Epochs: 1765 | epoch avg. loss: 1.204 | test avg. loss: 63.257


  4%|▎         | 1768/50000 [02:21<1:05:45, 12.22it/s]

Epochs: 1766 | epoch avg. loss: 1.189 | test avg. loss: 63.346
Epochs: 1767 | epoch avg. loss: 1.139 | test avg. loss: 63.306
Epochs: 1768 | epoch avg. loss: 1.229 | test avg. loss: 63.323


  4%|▎         | 1772/50000 [02:21<1:07:28, 11.91it/s]

Epochs: 1769 | epoch avg. loss: 1.123 | test avg. loss: 63.066
Epochs: 1770 | epoch avg. loss: 1.024 | test avg. loss: 63.072
Epochs: 1771 | epoch avg. loss: 0.995 | test avg. loss: 63.026


  4%|▎         | 1774/50000 [02:21<1:03:41, 12.62it/s]

Epochs: 1772 | epoch avg. loss: 1.010 | test avg. loss: 63.158
Epochs: 1773 | epoch avg. loss: 1.131 | test avg. loss: 63.368
Epochs: 1774 | epoch avg. loss: 1.157 | test avg. loss: 63.182


  4%|▎         | 1778/50000 [02:21<1:04:34, 12.45it/s]

Epochs: 1775 | epoch avg. loss: 1.145 | test avg. loss: 63.630
Epochs: 1776 | epoch avg. loss: 1.437 | test avg. loss: 64.079
Epochs: 1777 | epoch avg. loss: 2.134 | test avg. loss: 65.390


  4%|▎         | 1780/50000 [02:22<1:10:43, 11.36it/s]

Epochs: 1778 | epoch avg. loss: 2.710 | test avg. loss: 64.478
Epochs: 1779 | epoch avg. loss: 2.231 | test avg. loss: 64.272
Epochs: 1780 | epoch avg. loss: 1.771 | test avg. loss: 63.811


  4%|▎         | 1784/50000 [02:22<1:03:34, 12.64it/s]

Epochs: 1781 | epoch avg. loss: 1.866 | test avg. loss: 64.628
Epochs: 1782 | epoch avg. loss: 2.083 | test avg. loss: 63.813
Epochs: 1783 | epoch avg. loss: 1.555 | test avg. loss: 63.821


  4%|▎         | 1786/50000 [02:22<1:02:45, 12.80it/s]

Epochs: 1784 | epoch avg. loss: 1.233 | test avg. loss: 63.623
Epochs: 1785 | epoch avg. loss: 1.061 | test avg. loss: 63.625
Epochs: 1786 | epoch avg. loss: 1.539 | test avg. loss: 63.807


  4%|▎         | 1788/50000 [02:22<1:06:12, 12.14it/s]

Epochs: 1787 | epoch avg. loss: 1.479 | test avg. loss: 63.846
Epochs: 1788 | epoch avg. loss: 1.377 | test avg. loss: 63.549


  4%|▎         | 1790/50000 [02:23<1:15:23, 10.66it/s]

Epochs: 1789 | epoch avg. loss: 1.534 | test avg. loss: 63.881
Epochs: 1790 | epoch avg. loss: 1.964 | test avg. loss: 64.405


  4%|▎         | 1794/50000 [02:23<1:18:14, 10.27it/s]

Epochs: 1791 | epoch avg. loss: 2.680 | test avg. loss: 64.896
Epochs: 1792 | epoch avg. loss: 2.812 | test avg. loss: 63.913
Epochs: 1793 | epoch avg. loss: 2.694 | test avg. loss: 64.594


  4%|▎         | 1796/50000 [02:23<1:15:03, 10.70it/s]

Epochs: 1794 | epoch avg. loss: 2.569 | test avg. loss: 63.410
Epochs: 1795 | epoch avg. loss: 2.034 | test avg. loss: 63.995
Epochs: 1796 | epoch avg. loss: 2.320 | test avg. loss: 64.069


  4%|▎         | 1798/50000 [02:23<1:11:21, 11.26it/s]

Epochs: 1797 | epoch avg. loss: 3.537 | test avg. loss: 67.446
Epochs: 1798 | epoch avg. loss: 5.291 | test avg. loss: 64.757
Epochs: 1799 | epoch avg. loss: 3.639 | test avg. loss: 63.566


  4%|▎         | 1802/50000 [02:25<2:42:01,  4.96it/s]

Epochs: 1800 | epoch avg. loss: 2.199 | test avg. loss: 62.771
Epochs: 1801 | epoch avg. loss: 2.058 | test avg. loss: 63.474
Epochs: 1802 | epoch avg. loss: 1.798 | test avg. loss: 62.776


  4%|▎         | 1806/50000 [02:25<1:50:37,  7.26it/s]

Epochs: 1803 | epoch avg. loss: 1.032 | test avg. loss: 63.026
Epochs: 1804 | epoch avg. loss: 0.946 | test avg. loss: 63.816
Epochs: 1805 | epoch avg. loss: 1.338 | test avg. loss: 63.670


                                                      

Epochs: 1806 | epoch avg. loss: 1.581 | test avg. loss: 63.572
Epochs: 1807 | epoch avg. loss: 1.037 | test avg. loss: 63.499
Epochs: 1808 | epoch avg. loss: 1.018 | test avg. loss: 63.781


  4%|▎         | 1812/50000 [02:25<1:18:58, 10.17it/s]

Epochs: 1809 | epoch avg. loss: 1.275 | test avg. loss: 63.370
Epochs: 1810 | epoch avg. loss: 1.331 | test avg. loss: 63.220
Epochs: 1811 | epoch avg. loss: 0.949 | test avg. loss: 63.196


  4%|▎         | 1814/50000 [02:26<1:16:08, 10.55it/s]

Epochs: 1812 | epoch avg. loss: 1.040 | test avg. loss: 63.143
Epochs: 1813 | epoch avg. loss: 0.945 | test avg. loss: 63.130
Epochs: 1814 | epoch avg. loss: 0.991 | test avg. loss: 63.125


  4%|▎         | 1818/50000 [02:26<1:08:17, 11.76it/s]

Epochs: 1815 | epoch avg. loss: 0.939 | test avg. loss: 63.106
Epochs: 1816 | epoch avg. loss: 0.940 | test avg. loss: 63.204
Epochs: 1817 | epoch avg. loss: 0.979 | test avg. loss: 63.108


  4%|▎         | 1820/50000 [02:26<1:09:35, 11.54it/s]

Epochs: 1818 | epoch avg. loss: 1.062 | test avg. loss: 63.746
Epochs: 1819 | epoch avg. loss: 1.338 | test avg. loss: 63.526
Epochs: 1820 | epoch avg. loss: 1.375 | test avg. loss: 63.634


  4%|▎         | 1824/50000 [02:27<1:11:00, 11.31it/s]

Epochs: 1821 | epoch avg. loss: 1.229 | test avg. loss: 63.283
Epochs: 1822 | epoch avg. loss: 1.089 | test avg. loss: 63.314
Epochs: 1823 | epoch avg. loss: 1.038 | test avg. loss: 63.149


  4%|▎         | 1826/50000 [02:27<1:15:03, 10.70it/s]

Epochs: 1824 | epoch avg. loss: 0.927 | test avg. loss: 63.199
Epochs: 1825 | epoch avg. loss: 0.951 | test avg. loss: 63.216


  4%|▎         | 1828/50000 [02:27<1:13:33, 10.91it/s]

Epochs: 1826 | epoch avg. loss: 0.996 | test avg. loss: 63.267
Epochs: 1827 | epoch avg. loss: 1.032 | test avg. loss: 63.394
Epochs: 1828 | epoch avg. loss: 1.060 | test avg. loss: 63.135


  4%|▎         | 1832/50000 [02:27<1:12:04, 11.14it/s]

Epochs: 1829 | epoch avg. loss: 0.974 | test avg. loss: 63.175
Epochs: 1830 | epoch avg. loss: 0.966 | test avg. loss: 63.109
Epochs: 1831 | epoch avg. loss: 0.924 | test avg. loss: 63.156


  4%|▎         | 1834/50000 [02:28<1:12:13, 11.11it/s]

Epochs: 1832 | epoch avg. loss: 0.960 | test avg. loss: 63.224
Epochs: 1833 | epoch avg. loss: 0.954 | test avg. loss: 63.316
Epochs: 1834 | epoch avg. loss: 0.956 | test avg. loss: 63.412


  4%|▎         | 1838/50000 [02:28<1:11:16, 11.26it/s]

Epochs: 1835 | epoch avg. loss: 0.967 | test avg. loss: 63.345
Epochs: 1836 | epoch avg. loss: 0.995 | test avg. loss: 63.444
Epochs: 1837 | epoch avg. loss: 1.032 | test avg. loss: 63.335


  4%|▎         | 1840/50000 [02:28<1:07:52, 11.83it/s]

Epochs: 1838 | epoch avg. loss: 1.094 | test avg. loss: 63.721
Epochs: 1839 | epoch avg. loss: 1.329 | test avg. loss: 63.925
Epochs: 1840 | epoch avg. loss: 1.325 | test avg. loss: 63.329


  4%|▎         | 1842/50000 [02:28<1:07:35, 11.88it/s]

Epochs: 1841 | epoch avg. loss: 1.040 | test avg. loss: 63.439
Epochs: 1842 | epoch avg. loss: 1.003 | test avg. loss: 63.318
Epochs: 1843 | epoch avg. loss: 1.060 | test avg. loss: 63.985


  4%|▎         | 1846/50000 [02:29<1:11:52, 11.17it/s]

Epochs: 1844 | epoch avg. loss: 1.699 | test avg. loss: 65.782
Epochs: 1845 | epoch avg. loss: 3.167 | test avg. loss: 65.044
Epochs: 1846 | epoch avg. loss: 2.100 | test avg. loss: 64.985




Epochs: 1847 | epoch avg. loss: 2.518 | test avg. loss: 66.477
Epochs: 1848 | epoch avg. loss: 3.648 | test avg. loss: 67.273
Epochs: 1849 | epoch avg. loss: 4.908 | test avg. loss: 67.105


  4%|▎         | 1852/50000 [02:29<1:02:16, 12.89it/s]

Epochs: 1850 | epoch avg. loss: 4.161 | test avg. loss: 65.565
Epochs: 1851 | epoch avg. loss: 3.653 | test avg. loss: 65.208
Epochs: 1852 | epoch avg. loss: 2.664 | test avg. loss: 64.337


  4%|▎         | 1856/50000 [02:29<1:01:55, 12.96it/s]

Epochs: 1853 | epoch avg. loss: 2.294 | test avg. loss: 63.969
Epochs: 1854 | epoch avg. loss: 1.529 | test avg. loss: 63.600
Epochs: 1855 | epoch avg. loss: 1.332 | test avg. loss: 63.634


  4%|▎         | 1858/50000 [02:29<1:03:56, 12.55it/s]

Epochs: 1856 | epoch avg. loss: 1.188 | test avg. loss: 63.701
Epochs: 1857 | epoch avg. loss: 1.354 | test avg. loss: 64.069
Epochs: 1858 | epoch avg. loss: 1.391 | test avg. loss: 63.530


  4%|▎         | 1862/50000 [02:30<1:06:27, 12.07it/s]

Epochs: 1859 | epoch avg. loss: 1.163 | test avg. loss: 63.994
Epochs: 1860 | epoch avg. loss: 1.361 | test avg. loss: 63.571
Epochs: 1861 | epoch avg. loss: 1.106 | test avg. loss: 63.540


                                                      

Epochs: 1862 | epoch avg. loss: 0.940 | test avg. loss: 63.622
Epochs: 1863 | epoch avg. loss: 1.087 | test avg. loss: 64.262
Epochs: 1864 | epoch avg. loss: 1.465 | test avg. loss: 63.691


  4%|▎         | 1868/50000 [02:30<58:24, 13.74it/s]

Epochs: 1865 | epoch avg. loss: 1.242 | test avg. loss: 64.114
Epochs: 1866 | epoch avg. loss: 1.378 | test avg. loss: 64.451
Epochs: 1867 | epoch avg. loss: 1.920 | test avg. loss: 65.448
Epochs: 1868 | epoch avg. loss: 2.262 | test avg. loss: 64.785


  4%|▎         | 1872/50000 [02:30<1:01:28, 13.05it/s]

Epochs: 1869 | epoch avg. loss: 2.026 | test avg. loss: 64.316
Epochs: 1870 | epoch avg. loss: 1.201 | test avg. loss: 64.148
Epochs: 1871 | epoch avg. loss: 1.390 | test avg. loss: 63.915


  4%|▎         | 1874/50000 [02:31<1:03:00, 12.73it/s]

Epochs: 1872 | epoch avg. loss: 0.949 | test avg. loss: 64.206
Epochs: 1873 | epoch avg. loss: 1.382 | test avg. loss: 64.558
Epochs: 1874 | epoch avg. loss: 1.405 | test avg. loss: 64.249


  4%|▍         | 1878/50000 [02:31<59:32, 13.47it/s]

Epochs: 1875 | epoch avg. loss: 1.290 | test avg. loss: 65.272
Epochs: 1876 | epoch avg. loss: 1.984 | test avg. loss: 64.316
Epochs: 1877 | epoch avg. loss: 1.292 | test avg. loss: 64.841
Epochs: 1878 | epoch avg. loss: 1.686 | test avg. loss: 63.983


  4%|▍         | 1882/50000 [02:31<55:43, 14.39it/s]

Epochs: 1879 | epoch avg. loss: 1.187 | test avg. loss: 63.744
Epochs: 1880 | epoch avg. loss: 0.975 | test avg. loss: 63.494
Epochs: 1881 | epoch avg. loss: 0.955 | test avg. loss: 63.439
Epochs: 1882 | epoch avg. loss: 0.915 | test avg. loss: 63.538


  4%|▍         | 1886/50000 [02:31<1:00:52, 13.17it/s]

Epochs: 1883 | epoch avg. loss: 0.940 | test avg. loss: 63.501
Epochs: 1884 | epoch avg. loss: 1.015 | test avg. loss: 63.912
Epochs: 1885 | epoch avg. loss: 1.194 | test avg. loss: 63.973


  4%|▍         | 1888/50000 [02:32<1:03:55, 12.54it/s]

Epochs: 1886 | epoch avg. loss: 1.456 | test avg. loss: 64.937
Epochs: 1887 | epoch avg. loss: 1.986 | test avg. loss: 63.959
Epochs: 1888 | epoch avg. loss: 1.161 | test avg. loss: 64.471


  4%|▍         | 1892/50000 [02:32<59:05, 13.57it/s]

Epochs: 1889 | epoch avg. loss: 1.381 | test avg. loss: 64.111
Epochs: 1890 | epoch avg. loss: 1.165 | test avg. loss: 64.415
Epochs: 1891 | epoch avg. loss: 1.482 | test avg. loss: 63.677
Epochs: 1892 | epoch avg. loss: 0.930 | test avg. loss: 63.670


  4%|▍         | 1896/50000 [02:32<55:34, 14.43it/s]

Epochs: 1893 | epoch avg. loss: 0.915 | test avg. loss: 63.498
Epochs: 1894 | epoch avg. loss: 0.955 | test avg. loss: 63.544
Epochs: 1895 | epoch avg. loss: 0.987 | test avg. loss: 63.368
Epochs: 1896 | epoch avg. loss: 0.909 | test avg. loss: 63.973


  4%|▍         | 1898/50000 [02:33<55:11, 14.52it/s]

Epochs: 1897 | epoch avg. loss: 1.230 | test avg. loss: 64.932
Epochs: 1898 | epoch avg. loss: 2.357 | test avg. loss: 65.220
Epochs: 1899 | epoch avg. loss: 2.157 | test avg. loss: 64.313


  4%|▍         | 1904/50000 [02:34<1:42:48,  7.80it/s]

Epochs: 1900 | epoch avg. loss: 1.515 | test avg. loss: 64.228
Epochs: 1901 | epoch avg. loss: 1.301 | test avg. loss: 64.162
Epochs: 1902 | epoch avg. loss: 1.593 | test avg. loss: 64.498
Epochs: 1903 | epoch avg. loss: 1.993 | test avg. loss: 63.431


  4%|▍         | 1908/50000 [02:34<1:18:17, 10.24it/s]

Epochs: 1904 | epoch avg. loss: 0.910 | test avg. loss: 64.562
Epochs: 1905 | epoch avg. loss: 1.539 | test avg. loss: 64.336
Epochs: 1906 | epoch avg. loss: 1.806 | test avg. loss: 64.260
Epochs: 1907 | epoch avg. loss: 1.432 | test avg. loss: 63.804


  4%|▍         | 1912/50000 [02:34<1:04:57, 12.34it/s]

Epochs: 1908 | epoch avg. loss: 1.233 | test avg. loss: 63.613
Epochs: 1909 | epoch avg. loss: 1.055 | test avg. loss: 63.501
Epochs: 1910 | epoch avg. loss: 1.030 | test avg. loss: 63.409
Epochs: 1911 | epoch avg. loss: 1.303 | test avg. loss: 64.270


  4%|▍         | 1916/50000 [02:34<58:55, 13.60it/s]  

Epochs: 1912 | epoch avg. loss: 1.612 | test avg. loss: 64.275
Epochs: 1913 | epoch avg. loss: 1.750 | test avg. loss: 64.776
Epochs: 1914 | epoch avg. loss: 1.730 | test avg. loss: 64.894
Epochs: 1915 | epoch avg. loss: 2.142 | test avg. loss: 64.383


  4%|▍         | 1918/50000 [02:35<1:01:25, 13.04it/s]

Epochs: 1916 | epoch avg. loss: 1.307 | test avg. loss: 63.961
Epochs: 1917 | epoch avg. loss: 1.214 | test avg. loss: 64.185
Epochs: 1918 | epoch avg. loss: 1.296 | test avg. loss: 63.689


  4%|▍         | 1922/50000 [02:35<1:02:36, 12.80it/s]

Epochs: 1919 | epoch avg. loss: 1.251 | test avg. loss: 63.740
Epochs: 1920 | epoch avg. loss: 1.149 | test avg. loss: 64.412
Epochs: 1921 | epoch avg. loss: 1.955 | test avg. loss: 64.143


  4%|▍         | 1926/50000 [02:35<56:35, 14.16it/s]

Epochs: 1922 | epoch avg. loss: 1.482 | test avg. loss: 63.656
Epochs: 1923 | epoch avg. loss: 1.325 | test avg. loss: 64.435
Epochs: 1924 | epoch avg. loss: 1.866 | test avg. loss: 63.435
Epochs: 1925 | epoch avg. loss: 0.931 | test avg. loss: 64.010


  4%|▍         | 1928/50000 [02:35<55:56, 14.32it/s]

Epochs: 1926 | epoch avg. loss: 1.369 | test avg. loss: 63.544
Epochs: 1927 | epoch avg. loss: 1.314 | test avg. loss: 63.408
Epochs: 1928 | epoch avg. loss: 0.908 | test avg. loss: 63.460


  4%|▍         | 1932/50000 [02:36<1:00:04, 13.34it/s]

Epochs: 1929 | epoch avg. loss: 1.307 | test avg. loss: 63.243
Epochs: 1930 | epoch avg. loss: 1.244 | test avg. loss: 63.229
Epochs: 1931 | epoch avg. loss: 1.075 | test avg. loss: 63.299


  4%|▍         | 1934/50000 [02:36<58:04, 13.79it/s]

Epochs: 1932 | epoch avg. loss: 1.065 | test avg. loss: 63.517
Epochs: 1933 | epoch avg. loss: 0.940 | test avg. loss: 63.533
Epochs: 1934 | epoch avg. loss: 1.271 | test avg. loss: 63.790


  4%|▍         | 1938/50000 [02:36<56:31, 14.17it/s]

Epochs: 1935 | epoch avg. loss: 1.472 | test avg. loss: 64.239
Epochs: 1936 | epoch avg. loss: 2.109 | test avg. loss: 66.032
Epochs: 1937 | epoch avg. loss: 2.990 | test avg. loss: 65.681
Epochs: 1938 | epoch avg. loss: 3.539 | test avg. loss: 71.335


  4%|▍         | 1942/50000 [02:36<54:08, 14.79it/s]

Epochs: 1939 | epoch avg. loss: 6.833 | test avg. loss: 69.626
Epochs: 1940 | epoch avg. loss: 6.102 | test avg. loss: 70.853
Epochs: 1941 | epoch avg. loss: 6.358 | test avg. loss: 68.093
Epochs: 1942 | epoch avg. loss: 4.530 | test avg. loss: 69.175


  4%|▍         | 1946/50000 [02:37<57:23, 13.95it/s]

Epochs: 1943 | epoch avg. loss: 5.077 | test avg. loss: 68.913
Epochs: 1944 | epoch avg. loss: 4.944 | test avg. loss: 69.923
Epochs: 1945 | epoch avg. loss: 6.645 | test avg. loss: 67.612


  4%|▍         | 1948/50000 [02:37<1:00:25, 13.26it/s]

Epochs: 1946 | epoch avg. loss: 4.681 | test avg. loss: 64.470
Epochs: 1947 | epoch avg. loss: 2.447 | test avg. loss: 63.418
Epochs: 1948 | epoch avg. loss: 2.244 | test avg. loss: 63.476


  4%|▍         | 1952/50000 [02:37<57:05, 14.03it/s]

Epochs: 1949 | epoch avg. loss: 2.431 | test avg. loss: 64.820
Epochs: 1950 | epoch avg. loss: 2.417 | test avg. loss: 65.036
Epochs: 1951 | epoch avg. loss: 2.823 | test avg. loss: 67.052
Epochs: 1952 | epoch avg. loss: 3.358 | test avg. loss: 65.336


                                                    

Epochs: 1953 | epoch avg. loss: 2.321 | test avg. loss: 64.363
Epochs: 1954 | epoch avg. loss: 1.303 | test avg. loss: 64.358
Epochs: 1955 | epoch avg. loss: 1.753 | test avg. loss: 64.520


  4%|▍         | 1958/50000 [02:37<56:29, 14.17it/s]

Epochs: 1956 | epoch avg. loss: 1.756 | test avg. loss: 63.993
Epochs: 1957 | epoch avg. loss: 1.926 | test avg. loss: 64.959
Epochs: 1958 | epoch avg. loss: 2.231 | test avg. loss: 64.532


  4%|▍         | 1962/50000 [02:38<1:00:07, 13.32it/s]

Epochs: 1959 | epoch avg. loss: 2.422 | test avg. loss: 66.332
Epochs: 1960 | epoch avg. loss: 3.267 | test avg. loss: 65.723
Epochs: 1961 | epoch avg. loss: 3.146 | test avg. loss: 68.790


                                                      

Epochs: 1962 | epoch avg. loss: 4.825 | test avg. loss: 69.107
Epochs: 1963 | epoch avg. loss: 5.648 | test avg. loss: 69.903
Epochs: 1964 | epoch avg. loss: 6.059 | test avg. loss: 66.716


  4%|▍         | 1968/50000 [02:38<57:35, 13.90it/s]

Epochs: 1965 | epoch avg. loss: 2.822 | test avg. loss: 69.081
Epochs: 1966 | epoch avg. loss: 5.024 | test avg. loss: 68.076
Epochs: 1967 | epoch avg. loss: 4.131 | test avg. loss: 68.751


  4%|▍         | 1970/50000 [02:38<57:10, 14.00it/s]

Epochs: 1968 | epoch avg. loss: 4.059 | test avg. loss: 66.726
Epochs: 1969 | epoch avg. loss: 2.823 | test avg. loss: 69.820
Epochs: 1970 | epoch avg. loss: 4.840 | test avg. loss: 67.727


  4%|▍         | 1974/50000 [02:39<59:50, 13.38it/s]

Epochs: 1971 | epoch avg. loss: 4.303 | test avg. loss: 67.042
Epochs: 1972 | epoch avg. loss: 2.862 | test avg. loss: 65.374
Epochs: 1973 | epoch avg. loss: 2.219 | test avg. loss: 65.435


  4%|▍         | 1976/50000 [02:39<59:54, 13.36it/s]

Epochs: 1974 | epoch avg. loss: 2.016 | test avg. loss: 64.380
Epochs: 1975 | epoch avg. loss: 1.187 | test avg. loss: 64.514
Epochs: 1976 | epoch avg. loss: 1.020 | test avg. loss: 64.432


  4%|▍         | 1980/50000 [02:39<1:00:19, 13.27it/s]

Epochs: 1977 | epoch avg. loss: 0.867 | test avg. loss: 64.930
Epochs: 1978 | epoch avg. loss: 1.040 | test avg. loss: 65.289
Epochs: 1979 | epoch avg. loss: 1.627 | test avg. loss: 66.335


  4%|▍         | 1982/50000 [02:39<1:00:28, 13.23it/s]

Epochs: 1980 | epoch avg. loss: 2.020 | test avg. loss: 67.109
Epochs: 1981 | epoch avg. loss: 3.310 | test avg. loss: 67.898
Epochs: 1982 | epoch avg. loss: 3.340 | test avg. loss: 67.005


  4%|▍         | 1986/50000 [02:40<1:03:26, 12.62it/s]

Epochs: 1983 | epoch avg. loss: 4.011 | test avg. loss: 67.806
Epochs: 1984 | epoch avg. loss: 3.437 | test avg. loss: 67.881
Epochs: 1985 | epoch avg. loss: 3.449 | test avg. loss: 67.570


  4%|▍         | 1988/50000 [02:40<1:00:41, 13.18it/s]

Epochs: 1986 | epoch avg. loss: 2.816 | test avg. loss: 65.951
Epochs: 1987 | epoch avg. loss: 1.857 | test avg. loss: 66.333
Epochs: 1988 | epoch avg. loss: 1.793 | test avg. loss: 66.647


  4%|▍         | 1992/50000 [02:40<1:06:44, 11.99it/s]

Epochs: 1989 | epoch avg. loss: 3.073 | test avg. loss: 65.517
Epochs: 1990 | epoch avg. loss: 2.103 | test avg. loss: 64.213
Epochs: 1991 | epoch avg. loss: 0.898 | test avg. loss: 64.783


  4%|▍         | 1994/50000 [02:40<1:04:46, 12.35it/s]

Epochs: 1992 | epoch avg. loss: 1.153 | test avg. loss: 64.471
Epochs: 1993 | epoch avg. loss: 1.082 | test avg. loss: 64.643
Epochs: 1994 | epoch avg. loss: 0.889 | test avg. loss: 65.175


  4%|▍         | 1998/50000 [02:41<1:05:41, 12.18it/s]

Epochs: 1995 | epoch avg. loss: 1.701 | test avg. loss: 65.671
Epochs: 1996 | epoch avg. loss: 1.759 | test avg. loss: 64.681
Epochs: 1997 | epoch avg. loss: 1.705 | test avg. loss: 64.280


  4%|▍         | 1998/50000 [02:41<1:05:41, 12.18it/s]

Epochs: 1998 | epoch avg. loss: 0.944 | test avg. loss: 64.230
Epochs: 1999 | epoch avg. loss: 0.894 | test avg. loss: 64.278


  4%|▍         | 2002/50000 [02:42<2:51:08,  4.67it/s]

Epochs: 2000 | epoch avg. loss: 0.880 | test avg. loss: 64.409
Epochs: 2001 | epoch avg. loss: 0.930 | test avg. loss: 64.504
Epochs: 2002 | epoch avg. loss: 0.955 | test avg. loss: 64.768


  4%|▍         | 2006/50000 [02:42<1:55:31,  6.92it/s]

Epochs: 2003 | epoch avg. loss: 0.995 | test avg. loss: 64.775
Epochs: 2004 | epoch avg. loss: 1.119 | test avg. loss: 64.689
Epochs: 2005 | epoch avg. loss: 1.000 | test avg. loss: 64.401


  4%|▍         | 2008/50000 [02:43<1:39:32,  8.04it/s]

Epochs: 2006 | epoch avg. loss: 0.839 | test avg. loss: 64.232
Epochs: 2007 | epoch avg. loss: 0.897 | test avg. loss: 64.138
Epochs: 2008 | epoch avg. loss: 0.834 | test avg. loss: 64.055




Epochs: 2009 | epoch avg. loss: 0.906 | test avg. loss: 64.269
Epochs: 2010 | epoch avg. loss: 1.048 | test avg. loss: 64.798
Epochs: 2011 | epoch avg. loss: 1.743 | test avg. loss: 64.986


  4%|▍         | 2014/50000 [02:43<1:14:05, 10.79it/s]

Epochs: 2012 | epoch avg. loss: 1.465 | test avg. loss: 64.308
Epochs: 2013 | epoch avg. loss: 1.213 | test avg. loss: 64.385
Epochs: 2014 | epoch avg. loss: 0.920 | test avg. loss: 64.826


  4%|▍         | 2018/50000 [02:43<1:05:29, 12.21it/s]

Epochs: 2015 | epoch avg. loss: 1.426 | test avg. loss: 66.037
Epochs: 2016 | epoch avg. loss: 2.127 | test avg. loss: 65.677
Epochs: 2017 | epoch avg. loss: 2.144 | test avg. loss: 66.662


  4%|▍         | 2020/50000 [02:44<1:09:47, 11.46it/s]

Epochs: 2018 | epoch avg. loss: 2.324 | test avg. loss: 67.694
Epochs: 2019 | epoch avg. loss: 3.687 | test avg. loss: 70.035
Epochs: 2020 | epoch avg. loss: 5.629 | test avg. loss: 66.380


  4%|▍         | 2024/50000 [02:44<1:05:35, 12.19it/s]

Epochs: 2021 | epoch avg. loss: 3.598 | test avg. loss: 65.396
Epochs: 2022 | epoch avg. loss: 2.244 | test avg. loss: 65.208
Epochs: 2023 | epoch avg. loss: 2.109 | test avg. loss: 65.355


  4%|▍         | 2028/50000 [02:44<59:42, 13.39it/s]  

Epochs: 2024 | epoch avg. loss: 2.436 | test avg. loss: 64.473
Epochs: 2025 | epoch avg. loss: 1.765 | test avg. loss: 64.117
Epochs: 2026 | epoch avg. loss: 1.501 | test avg. loss: 64.063
Epochs: 2027 | epoch avg. loss: 2.084 | test avg. loss: 65.375


  4%|▍         | 2030/50000 [02:44<58:45, 13.61it/s]

Epochs: 2028 | epoch avg. loss: 3.609 | test avg. loss: 69.268
Epochs: 2029 | epoch avg. loss: 5.933 | test avg. loss: 71.925
Epochs: 2030 | epoch avg. loss: 8.687 | test avg. loss: 70.790


  4%|▍         | 2034/50000 [02:45<1:04:28, 12.40it/s]

Epochs: 2031 | epoch avg. loss: 6.224 | test avg. loss: 67.494
Epochs: 2032 | epoch avg. loss: 3.594 | test avg. loss: 68.216
Epochs: 2033 | epoch avg. loss: 3.199 | test avg. loss: 67.574


  4%|▍         | 2036/50000 [02:45<1:06:47, 11.97it/s]

Epochs: 2034 | epoch avg. loss: 3.175 | test avg. loss: 68.924
Epochs: 2035 | epoch avg. loss: 3.087 | test avg. loss: 69.156
Epochs: 2036 | epoch avg. loss: 3.335 | test avg. loss: 69.664




Epochs: 2037 | epoch avg. loss: 4.245 | test avg. loss: 66.756
Epochs: 2038 | epoch avg. loss: 2.982 | test avg. loss: 66.110
Epochs: 2039 | epoch avg. loss: 1.625 | test avg. loss: 65.968


  4%|▍         | 2042/50000 [02:45<58:38, 13.63it/s]

Epochs: 2040 | epoch avg. loss: 2.701 | test avg. loss: 65.941
Epochs: 2041 | epoch avg. loss: 2.068 | test avg. loss: 64.990
Epochs: 2042 | epoch avg. loss: 1.678 | test avg. loss: 65.006


  4%|▍         | 2046/50000 [02:46<1:04:59, 12.30it/s]

Epochs: 2043 | epoch avg. loss: 1.371 | test avg. loss: 64.881
Epochs: 2044 | epoch avg. loss: 1.142 | test avg. loss: 64.753
Epochs: 2045 | epoch avg. loss: 1.238 | test avg. loss: 64.390


  4%|▍         | 2048/50000 [02:46<1:03:42, 12.54it/s]

Epochs: 2046 | epoch avg. loss: 1.039 | test avg. loss: 63.963
Epochs: 2047 | epoch avg. loss: 0.919 | test avg. loss: 63.733
Epochs: 2048 | epoch avg. loss: 0.903 | test avg. loss: 63.713


  4%|▍         | 2052/50000 [02:46<1:04:23, 12.41it/s]

Epochs: 2049 | epoch avg. loss: 0.991 | test avg. loss: 63.957
Epochs: 2050 | epoch avg. loss: 0.930 | test avg. loss: 64.048
Epochs: 2051 | epoch avg. loss: 0.831 | test avg. loss: 64.370


  4%|▍         | 2054/50000 [02:46<1:03:16, 12.63it/s]

Epochs: 2052 | epoch avg. loss: 1.029 | test avg. loss: 65.241
Epochs: 2053 | epoch avg. loss: 1.558 | test avg. loss: 64.979
Epochs: 2054 | epoch avg. loss: 1.097 | test avg. loss: 65.151


  4%|▍         | 2058/50000 [02:46<1:02:44, 12.73it/s]

Epochs: 2055 | epoch avg. loss: 1.512 | test avg. loss: 65.852
Epochs: 2056 | epoch avg. loss: 1.789 | test avg. loss: 64.842
Epochs: 2057 | epoch avg. loss: 1.593 | test avg. loss: 64.368


  4%|▍         | 2060/50000 [02:47<1:02:22, 12.81it/s]

Epochs: 2058 | epoch avg. loss: 0.867 | test avg. loss: 64.386
Epochs: 2059 | epoch avg. loss: 1.134 | test avg. loss: 64.359
Epochs: 2060 | epoch avg. loss: 0.885 | test avg. loss: 64.260


  4%|▍         | 2064/50000 [02:47<57:52, 13.81it/s]

Epochs: 2061 | epoch avg. loss: 0.901 | test avg. loss: 64.314
Epochs: 2062 | epoch avg. loss: 0.941 | test avg. loss: 64.414
Epochs: 2063 | epoch avg. loss: 0.951 | test avg. loss: 64.578
Epochs: 2064 | epoch avg. loss: 1.181 | test avg. loss: 65.491


  4%|▍         | 2068/50000 [02:47<55:02, 14.51it/s]

Epochs: 2065 | epoch avg. loss: 1.614 | test avg. loss: 65.595
Epochs: 2066 | epoch avg. loss: 1.938 | test avg. loss: 66.509
Epochs: 2067 | epoch avg. loss: 2.409 | test avg. loss: 65.208


  4%|▍         | 2070/50000 [02:47<54:52, 14.56it/s]

Epochs: 2068 | epoch avg. loss: 1.855 | test avg. loss: 64.757
Epochs: 2069 | epoch avg. loss: 1.272 | test avg. loss: 64.187
Epochs: 2070 | epoch avg. loss: 1.677 | test avg. loss: 64.053


  4%|▍         | 2074/50000 [02:48<56:57, 14.02it/s]

Epochs: 2071 | epoch avg. loss: 1.116 | test avg. loss: 64.575
Epochs: 2072 | epoch avg. loss: 1.137 | test avg. loss: 64.493
Epochs: 2073 | epoch avg. loss: 1.050 | test avg. loss: 64.676


  4%|▍         | 2076/50000 [02:48<56:29, 14.14it/s]

Epochs: 2074 | epoch avg. loss: 0.996 | test avg. loss: 64.486
Epochs: 2075 | epoch avg. loss: 0.926 | test avg. loss: 64.528
Epochs: 2076 | epoch avg. loss: 0.996 | test avg. loss: 64.346


  4%|▍         | 2080/50000 [02:48<56:28, 14.14it/s]

Epochs: 2077 | epoch avg. loss: 0.784 | test avg. loss: 64.350
Epochs: 2078 | epoch avg. loss: 0.773 | test avg. loss: 64.388
Epochs: 2079 | epoch avg. loss: 0.870 | test avg. loss: 64.451


  4%|▍         | 2082/50000 [02:48<57:19, 13.93it/s]

Epochs: 2080 | epoch avg. loss: 0.860 | test avg. loss: 64.596
Epochs: 2081 | epoch avg. loss: 0.882 | test avg. loss: 64.379
Epochs: 2082 | epoch avg. loss: 0.782 | test avg. loss: 64.349


  4%|▍         | 2086/50000 [02:48<1:04:22, 12.41it/s]

Epochs: 2083 | epoch avg. loss: 0.788 | test avg. loss: 64.433
Epochs: 2084 | epoch avg. loss: 0.819 | test avg. loss: 64.313
Epochs: 2085 | epoch avg. loss: 0.824 | test avg. loss: 64.542


  4%|▍         | 2088/50000 [02:49<1:06:44, 11.96it/s]

Epochs: 2086 | epoch avg. loss: 1.057 | test avg. loss: 65.947
Epochs: 2087 | epoch avg. loss: 3.035 | test avg. loss: 65.228
Epochs: 2088 | epoch avg. loss: 1.399 | test avg. loss: 64.537


  4%|▍         | 2092/50000 [02:49<1:06:08, 12.07it/s]

Epochs: 2089 | epoch avg. loss: 1.123 | test avg. loss: 64.746
Epochs: 2090 | epoch avg. loss: 1.248 | test avg. loss: 64.135
Epochs: 2091 | epoch avg. loss: 0.816 | test avg. loss: 64.294


  4%|▍         | 2094/50000 [02:49<1:02:46, 12.72it/s]

Epochs: 2092 | epoch avg. loss: 1.167 | test avg. loss: 63.998
Epochs: 2093 | epoch avg. loss: 1.036 | test avg. loss: 63.913
Epochs: 2094 | epoch avg. loss: 1.454 | test avg. loss: 65.442


  4%|▍         | 2098/50000 [02:49<1:02:26, 12.79it/s]

Epochs: 2095 | epoch avg. loss: 1.905 | test avg. loss: 65.299
Epochs: 2096 | epoch avg. loss: 1.831 | test avg. loss: 66.305
Epochs: 2097 | epoch avg. loss: 2.244 | test avg. loss: 65.301


  4%|▍         | 2098/50000 [02:50<1:02:26, 12.79it/s]

Epochs: 2098 | epoch avg. loss: 1.734 | test avg. loss: 65.684
Epochs: 2099 | epoch avg. loss: 1.318 | test avg. loss: 66.330


  4%|▍         | 2102/50000 [02:51<2:35:31,  5.13it/s]

Epochs: 2100 | epoch avg. loss: 3.305 | test avg. loss: 65.413
Epochs: 2101 | epoch avg. loss: 1.379 | test avg. loss: 64.740
Epochs: 2102 | epoch avg. loss: 1.568 | test avg. loss: 64.690


  4%|▍         | 2106/50000 [02:51<1:46:14,  7.51it/s]

Epochs: 2103 | epoch avg. loss: 0.869 | test avg. loss: 64.789
Epochs: 2104 | epoch avg. loss: 1.153 | test avg. loss: 65.175
Epochs: 2105 | epoch avg. loss: 1.556 | test avg. loss: 64.709
Epochs: 2106 | epoch avg. loss: 0.950 | test avg. loss: 65.121


  4%|▍         | 2110/50000 [02:51<1:22:44,  9.65it/s]

Epochs: 2107 | epoch avg. loss: 0.816 | test avg. loss: 65.619
Epochs: 2108 | epoch avg. loss: 1.504 | test avg. loss: 65.469
Epochs: 2109 | epoch avg. loss: 0.847 | test avg. loss: 66.128


  4%|▍         | 2112/50000 [02:52<1:20:31,  9.91it/s]

Epochs: 2110 | epoch avg. loss: 1.649 | test avg. loss: 66.041
Epochs: 2111 | epoch avg. loss: 1.945 | test avg. loss: 64.720
Epochs: 2112 | epoch avg. loss: 1.186 | test avg. loss: 64.604


  4%|▍         | 2116/50000 [02:52<1:14:07, 10.77it/s]

Epochs: 2113 | epoch avg. loss: 0.939 | test avg. loss: 64.711
Epochs: 2114 | epoch avg. loss: 1.024 | test avg. loss: 64.937
Epochs: 2115 | epoch avg. loss: 1.060 | test avg. loss: 65.083


  4%|▍         | 2118/50000 [02:52<1:11:07, 11.22it/s]

Epochs: 2116 | epoch avg. loss: 1.038 | test avg. loss: 64.775
Epochs: 2117 | epoch avg. loss: 0.797 | test avg. loss: 64.820
Epochs: 2118 | epoch avg. loss: 0.903 | test avg. loss: 64.480


  4%|▍         | 2122/50000 [02:52<1:05:40, 12.15it/s]

Epochs: 2119 | epoch avg. loss: 0.760 | test avg. loss: 64.848
Epochs: 2120 | epoch avg. loss: 1.061 | test avg. loss: 64.432
Epochs: 2121 | epoch avg. loss: 0.777 | test avg. loss: 65.432


  4%|▍         | 2124/50000 [02:53<1:04:09, 12.44it/s]

Epochs: 2122 | epoch avg. loss: 1.287 | test avg. loss: 64.985
Epochs: 2123 | epoch avg. loss: 1.150 | test avg. loss: 65.338
Epochs: 2124 | epoch avg. loss: 1.040 | test avg. loss: 65.977


  4%|▍         | 2128/50000 [02:53<1:00:05, 13.28it/s]

Epochs: 2125 | epoch avg. loss: 1.957 | test avg. loss: 65.258
Epochs: 2126 | epoch avg. loss: 1.395 | test avg. loss: 64.549
Epochs: 2127 | epoch avg. loss: 0.817 | test avg. loss: 64.928


  4%|▍         | 2130/50000 [02:53<59:53, 13.32it/s]

Epochs: 2128 | epoch avg. loss: 0.885 | test avg. loss: 64.939
Epochs: 2129 | epoch avg. loss: 1.438 | test avg. loss: 64.880
Epochs: 2130 | epoch avg. loss: 1.235 | test avg. loss: 64.625


  4%|▍         | 2134/50000 [02:53<1:00:41, 13.14it/s]

Epochs: 2131 | epoch avg. loss: 0.881 | test avg. loss: 65.550
Epochs: 2132 | epoch avg. loss: 1.195 | test avg. loss: 66.082
Epochs: 2133 | epoch avg. loss: 2.075 | test avg. loss: 65.677


  4%|▍         | 2136/50000 [02:54<1:10:48, 11.27it/s]

Epochs: 2134 | epoch avg. loss: 2.264 | test avg. loss: 64.482
Epochs: 2135 | epoch avg. loss: 1.170 | test avg. loss: 64.367
Epochs: 2136 | epoch avg. loss: 1.711 | test avg. loss: 65.830


  4%|▍         | 2140/50000 [02:54<1:07:12, 11.87it/s]

Epochs: 2137 | epoch avg. loss: 1.940 | test avg. loss: 65.523
Epochs: 2138 | epoch avg. loss: 1.607 | test avg. loss: 65.842
Epochs: 2139 | epoch avg. loss: 1.467 | test avg. loss: 67.264




Epochs: 2140 | epoch avg. loss: 2.635 | test avg. loss: 67.006
Epochs: 2141 | epoch avg. loss: 2.393 | test avg. loss: 65.081
Epochs: 2142 | epoch avg. loss: 1.169 | test avg. loss: 64.775


  4%|▍         | 2146/50000 [02:54<1:00:38, 13.15it/s]

Epochs: 2143 | epoch avg. loss: 0.724 | test avg. loss: 64.748
Epochs: 2144 | epoch avg. loss: 0.817 | test avg. loss: 65.150
Epochs: 2145 | epoch avg. loss: 1.140 | test avg. loss: 64.811


  4%|▍         | 2148/50000 [02:55<1:02:04, 12.85it/s]

Epochs: 2146 | epoch avg. loss: 0.764 | test avg. loss: 65.718
Epochs: 2147 | epoch avg. loss: 1.508 | test avg. loss: 65.270
Epochs: 2148 | epoch avg. loss: 1.350 | test avg. loss: 65.311


  4%|▍         | 2152/50000 [02:55<58:46, 13.57it/s]  

Epochs: 2149 | epoch avg. loss: 0.998 | test avg. loss: 65.525
Epochs: 2150 | epoch avg. loss: 1.046 | test avg. loss: 65.647
Epochs: 2151 | epoch avg. loss: 1.228 | test avg. loss: 66.714


  4%|▍         | 2156/50000 [02:55<55:36, 14.34it/s]

Epochs: 2152 | epoch avg. loss: 1.711 | test avg. loss: 66.174
Epochs: 2153 | epoch avg. loss: 1.668 | test avg. loss: 66.951
Epochs: 2154 | epoch avg. loss: 1.724 | test avg. loss: 66.805
Epochs: 2155 | epoch avg. loss: 2.673 | test avg. loss: 66.593


  4%|▍         | 2158/50000 [02:55<56:07, 14.21it/s]

Epochs: 2156 | epoch avg. loss: 2.797 | test avg. loss: 65.159
Epochs: 2157 | epoch avg. loss: 1.558 | test avg. loss: 65.095
Epochs: 2158 | epoch avg. loss: 1.401 | test avg. loss: 64.719


  4%|▍         | 2162/50000 [02:56<59:33, 13.39it/s]

Epochs: 2159 | epoch avg. loss: 1.432 | test avg. loss: 64.425
Epochs: 2160 | epoch avg. loss: 1.154 | test avg. loss: 64.535
Epochs: 2161 | epoch avg. loss: 1.073 | test avg. loss: 64.595


  4%|▍         | 2164/50000 [02:56<1:02:05, 12.84it/s]

Epochs: 2162 | epoch avg. loss: 0.834 | test avg. loss: 64.677
Epochs: 2163 | epoch avg. loss: 0.873 | test avg. loss: 64.763
Epochs: 2164 | epoch avg. loss: 0.906 | test avg. loss: 64.725


  4%|▍         | 2168/50000 [02:56<57:30, 13.86it/s]

Epochs: 2165 | epoch avg. loss: 0.844 | test avg. loss: 65.122
Epochs: 2166 | epoch avg. loss: 1.569 | test avg. loss: 65.084
Epochs: 2167 | epoch avg. loss: 0.990 | test avg. loss: 64.971
Epochs: 2168 | epoch avg. loss: 0.829 | test avg. loss: 65.036


  4%|▍         | 2172/50000 [02:56<56:26, 14.12it/s]

Epochs: 2169 | epoch avg. loss: 0.838 | test avg. loss: 65.626
Epochs: 2170 | epoch avg. loss: 1.187 | test avg. loss: 65.296
Epochs: 2171 | epoch avg. loss: 1.367 | test avg. loss: 65.375


  4%|▍         | 2174/50000 [02:56<58:13, 13.69it/s]

Epochs: 2172 | epoch avg. loss: 1.374 | test avg. loss: 64.967
Epochs: 2173 | epoch avg. loss: 2.004 | test avg. loss: 64.795
Epochs: 2174 | epoch avg. loss: 1.057 | test avg. loss: 64.982


  4%|▍         | 2176/50000 [02:57<59:54, 13.30it/s]

Epochs: 2175 | epoch avg. loss: 0.867 | test avg. loss: 64.984
Epochs: 2176 | epoch avg. loss: 0.823 | test avg. loss: 65.202




Epochs: 2177 | epoch avg. loss: 0.796 | test avg. loss: 65.313
Epochs: 2178 | epoch avg. loss: 0.938 | test avg. loss: 65.169
Epochs: 2179 | epoch avg. loss: 0.939 | test avg. loss: 65.134


  4%|▍         | 2184/50000 [02:57<56:54, 14.00it/s]

Epochs: 2180 | epoch avg. loss: 0.839 | test avg. loss: 65.067
Epochs: 2181 | epoch avg. loss: 1.025 | test avg. loss: 64.827
Epochs: 2182 | epoch avg. loss: 0.832 | test avg. loss: 65.571
Epochs: 2183 | epoch avg. loss: 1.281 | test avg. loss: 65.050


  4%|▍         | 2186/50000 [02:57<55:00, 14.49it/s]

Epochs: 2184 | epoch avg. loss: 1.213 | test avg. loss: 65.305
Epochs: 2185 | epoch avg. loss: 1.114 | test avg. loss: 64.680
Epochs: 2186 | epoch avg. loss: 0.788 | test avg. loss: 64.660
Epochs: 2187 | epoch avg. loss: 0.724 | test avg. loss: 65.043


  4%|▍         | 2190/50000 [02:58<59:11, 13.46it/s]

Epochs: 2188 | epoch avg. loss: 1.606 | test avg. loss: 64.686
Epochs: 2189 | epoch avg. loss: 1.266 | test avg. loss: 64.980
Epochs: 2190 | epoch avg. loss: 1.185 | test avg. loss: 65.184


  4%|▍         | 2194/50000 [02:58<55:14, 14.42it/s]

Epochs: 2191 | epoch avg. loss: 1.528 | test avg. loss: 65.182
Epochs: 2192 | epoch avg. loss: 1.206 | test avg. loss: 64.792
Epochs: 2193 | epoch avg. loss: 1.508 | test avg. loss: 64.365
Epochs: 2194 | epoch avg. loss: 0.808 | test avg. loss: 64.344


  4%|▍         | 2198/50000 [02:58<56:44, 14.04it/s]

Epochs: 2195 | epoch avg. loss: 0.686 | test avg. loss: 64.997
Epochs: 2196 | epoch avg. loss: 1.109 | test avg. loss: 64.708
Epochs: 2197 | epoch avg. loss: 0.848 | test avg. loss: 65.218


  4%|▍         | 2198/50000 [02:58<56:44, 14.04it/s]

Epochs: 2198 | epoch avg. loss: 0.870 | test avg. loss: 65.682
Epochs: 2199 | epoch avg. loss: 1.488 | test avg. loss: 65.859


  4%|▍         | 2202/50000 [02:59<2:08:09,  6.22it/s]

Epochs: 2200 | epoch avg. loss: 1.463 | test avg. loss: 65.271
Epochs: 2201 | epoch avg. loss: 1.483 | test avg. loss: 64.961
Epochs: 2202 | epoch avg. loss: 0.851 | test avg. loss: 64.830


  4%|▍         | 2206/50000 [03:00<1:38:44,  8.07it/s]

Epochs: 2203 | epoch avg. loss: 1.140 | test avg. loss: 64.752
Epochs: 2204 | epoch avg. loss: 1.060 | test avg. loss: 65.105
Epochs: 2205 | epoch avg. loss: 1.041 | test avg. loss: 64.688


  4%|▍         | 2208/50000 [03:00<1:30:15,  8.83it/s]

Epochs: 2206 | epoch avg. loss: 0.770 | test avg. loss: 64.943
Epochs: 2207 | epoch avg. loss: 0.890 | test avg. loss: 65.812
Epochs: 2208 | epoch avg. loss: 1.544 | test avg. loss: 66.020


  4%|▍         | 2212/50000 [03:00<1:11:16, 11.18it/s]

Epochs: 2209 | epoch avg. loss: 1.838 | test avg. loss: 65.268
Epochs: 2210 | epoch avg. loss: 1.026 | test avg. loss: 65.145
Epochs: 2211 | epoch avg. loss: 0.964 | test avg. loss: 64.830
Epochs: 2212 | epoch avg. loss: 1.413 | test avg. loss: 64.507


  4%|▍         | 2216/50000 [03:00<1:04:27, 12.36it/s]

Epochs: 2213 | epoch avg. loss: 1.199 | test avg. loss: 65.311
Epochs: 2214 | epoch avg. loss: 1.408 | test avg. loss: 65.165
Epochs: 2215 | epoch avg. loss: 1.677 | test avg. loss: 64.852


  4%|▍         | 2218/50000 [03:01<1:03:38, 12.51it/s]

Epochs: 2216 | epoch avg. loss: 1.256 | test avg. loss: 64.935
Epochs: 2217 | epoch avg. loss: 0.903 | test avg. loss: 65.133
Epochs: 2218 | epoch avg. loss: 0.963 | test avg. loss: 65.995


  4%|▍         | 2222/50000 [03:01<1:00:55, 13.07it/s]

Epochs: 2219 | epoch avg. loss: 1.206 | test avg. loss: 66.502
Epochs: 2220 | epoch avg. loss: 1.776 | test avg. loss: 66.671
Epochs: 2221 | epoch avg. loss: 1.725 | test avg. loss: 66.176
Epochs: 2222 | epoch avg. loss: 2.612 | test avg. loss: 67.181


  4%|▍         | 2226/50000 [03:01<56:09, 14.18it/s]

Epochs: 2223 | epoch avg. loss: 2.702 | test avg. loss: 65.489
Epochs: 2224 | epoch avg. loss: 2.120 | test avg. loss: 65.211
Epochs: 2225 | epoch avg. loss: 1.401 | test avg. loss: 65.620
Epochs: 2226 | epoch avg. loss: 1.416 | test avg. loss: 66.435


  4%|▍         | 2230/50000 [03:01<53:23, 14.91it/s]

Epochs: 2227 | epoch avg. loss: 1.602 | test avg. loss: 66.627
Epochs: 2228 | epoch avg. loss: 1.774 | test avg. loss: 70.687
Epochs: 2229 | epoch avg. loss: 5.225 | test avg. loss: 71.007


  4%|▍         | 2234/50000 [03:02<53:12, 14.96it/s]

Epochs: 2230 | epoch avg. loss: 5.988 | test avg. loss: 66.174
Epochs: 2231 | epoch avg. loss: 2.323 | test avg. loss: 65.822
Epochs: 2232 | epoch avg. loss: 1.160 | test avg. loss: 65.843
Epochs: 2233 | epoch avg. loss: 0.802 | test avg. loss: 65.981


  4%|▍         | 2236/50000 [03:02<56:39, 14.05it/s]

Epochs: 2234 | epoch avg. loss: 0.836 | test avg. loss: 66.489
Epochs: 2235 | epoch avg. loss: 0.943 | test avg. loss: 66.780
Epochs: 2236 | epoch avg. loss: 1.402 | test avg. loss: 66.771


  4%|▍         | 2240/50000 [03:02<1:00:13, 13.22it/s]

Epochs: 2237 | epoch avg. loss: 0.874 | test avg. loss: 67.108
Epochs: 2238 | epoch avg. loss: 1.842 | test avg. loss: 66.667
Epochs: 2239 | epoch avg. loss: 1.494 | test avg. loss: 66.313


  4%|▍         | 2242/50000 [03:02<1:04:46, 12.29it/s]

Epochs: 2240 | epoch avg. loss: 0.957 | test avg. loss: 66.447
Epochs: 2241 | epoch avg. loss: 0.820 | test avg. loss: 66.353
Epochs: 2242 | epoch avg. loss: 0.814 | test avg. loss: 67.119


  4%|▍         | 2244/50000 [03:03<1:10:34, 11.28it/s]

Epochs: 2243 | epoch avg. loss: 1.196 | test avg. loss: 67.323
Epochs: 2244 | epoch avg. loss: 1.676 | test avg. loss: 67.004


  4%|▍         | 2248/50000 [03:03<1:13:18, 10.86it/s]

Epochs: 2245 | epoch avg. loss: 2.061 | test avg. loss: 66.649
Epochs: 2246 | epoch avg. loss: 1.489 | test avg. loss: 67.599
Epochs: 2247 | epoch avg. loss: 3.328 | test avg. loss: 66.099


  4%|▍         | 2250/50000 [03:03<1:09:06, 11.52it/s]

Epochs: 2248 | epoch avg. loss: 2.090 | test avg. loss: 67.381
Epochs: 2249 | epoch avg. loss: 2.683 | test avg. loss: 70.945
Epochs: 2250 | epoch avg. loss: 4.359 | test avg. loss: 68.748


  5%|▍         | 2254/50000 [03:03<1:07:00, 11.87it/s]

Epochs: 2251 | epoch avg. loss: 3.099 | test avg. loss: 69.281
Epochs: 2252 | epoch avg. loss: 4.062 | test avg. loss: 67.752
Epochs: 2253 | epoch avg. loss: 2.748 | test avg. loss: 67.373


  5%|▍         | 2256/50000 [03:04<1:05:59, 12.06it/s]

Epochs: 2254 | epoch avg. loss: 2.431 | test avg. loss: 66.491
Epochs: 2255 | epoch avg. loss: 1.686 | test avg. loss: 66.637
Epochs: 2256 | epoch avg. loss: 1.362 | test avg. loss: 66.909


  5%|▍         | 2260/50000 [03:04<1:03:56, 12.44it/s]

Epochs: 2257 | epoch avg. loss: 1.261 | test avg. loss: 66.923
Epochs: 2258 | epoch avg. loss: 1.195 | test avg. loss: 67.632
Epochs: 2259 | epoch avg. loss: 1.242 | test avg. loss: 67.329


  5%|▍         | 2262/50000 [03:04<1:04:26, 12.35it/s]

Epochs: 2260 | epoch avg. loss: 1.234 | test avg. loss: 68.347
Epochs: 2261 | epoch avg. loss: 1.559 | test avg. loss: 67.951
Epochs: 2262 | epoch avg. loss: 1.804 | test avg. loss: 67.858


  5%|▍         | 2266/50000 [03:04<1:01:54, 12.85it/s]

Epochs: 2263 | epoch avg. loss: 1.662 | test avg. loss: 66.994
Epochs: 2264 | epoch avg. loss: 0.683 | test avg. loss: 67.853
Epochs: 2265 | epoch avg. loss: 1.527 | test avg. loss: 67.180


  5%|▍         | 2268/50000 [03:05<1:02:10, 12.79it/s]

Epochs: 2266 | epoch avg. loss: 1.137 | test avg. loss: 66.968
Epochs: 2267 | epoch avg. loss: 1.305 | test avg. loss: 66.881
Epochs: 2268 | epoch avg. loss: 1.265 | test avg. loss: 67.104


  5%|▍         | 2272/50000 [03:05<1:01:07, 13.01it/s]

Epochs: 2269 | epoch avg. loss: 1.618 | test avg. loss: 67.799
Epochs: 2270 | epoch avg. loss: 1.827 | test avg. loss: 69.814
Epochs: 2271 | epoch avg. loss: 3.592 | test avg. loss: 73.077


  5%|▍         | 2274/50000 [03:05<1:02:23, 12.75it/s]

Epochs: 2272 | epoch avg. loss: 7.033 | test avg. loss: 69.168
Epochs: 2273 | epoch avg. loss: 5.217 | test avg. loss: 67.702
Epochs: 2274 | epoch avg. loss: 3.874 | test avg. loss: 66.345


  5%|▍         | 2278/50000 [03:05<1:00:28, 13.15it/s]

Epochs: 2275 | epoch avg. loss: 2.662 | test avg. loss: 66.765
Epochs: 2276 | epoch avg. loss: 1.806 | test avg. loss: 66.208
Epochs: 2277 | epoch avg. loss: 0.897 | test avg. loss: 66.113


  5%|▍         | 2280/50000 [03:05<1:03:11, 12.59it/s]

Epochs: 2278 | epoch avg. loss: 1.146 | test avg. loss: 66.957
Epochs: 2279 | epoch avg. loss: 1.280 | test avg. loss: 67.680
Epochs: 2280 | epoch avg. loss: 2.144 | test avg. loss: 67.188


  5%|▍         | 2284/50000 [03:06<1:08:50, 11.55it/s]

Epochs: 2281 | epoch avg. loss: 1.172 | test avg. loss: 67.002
Epochs: 2282 | epoch avg. loss: 0.793 | test avg. loss: 67.729
Epochs: 2283 | epoch avg. loss: 1.184 | test avg. loss: 67.237


  5%|▍         | 2286/50000 [03:06<1:04:18, 12.37it/s]

Epochs: 2284 | epoch avg. loss: 1.123 | test avg. loss: 67.464
Epochs: 2285 | epoch avg. loss: 0.850 | test avg. loss: 67.510
Epochs: 2286 | epoch avg. loss: 1.575 | test avg. loss: 67.432


  5%|▍         | 2290/50000 [03:06<1:07:40, 11.75it/s]

Epochs: 2287 | epoch avg. loss: 1.412 | test avg. loss: 67.411
Epochs: 2288 | epoch avg. loss: 0.843 | test avg. loss: 67.194
Epochs: 2289 | epoch avg. loss: 1.404 | test avg. loss: 68.379


  5%|▍         | 2292/50000 [03:06<1:06:34, 11.94it/s]

Epochs: 2290 | epoch avg. loss: 1.775 | test avg. loss: 67.239
Epochs: 2291 | epoch avg. loss: 1.355 | test avg. loss: 67.455
Epochs: 2292 | epoch avg. loss: 1.386 | test avg. loss: 66.905


  5%|▍         | 2296/50000 [03:07<1:05:10, 12.20it/s]

Epochs: 2293 | epoch avg. loss: 1.624 | test avg. loss: 66.863
Epochs: 2294 | epoch avg. loss: 1.713 | test avg. loss: 65.782
Epochs: 2295 | epoch avg. loss: 1.193 | test avg. loss: 66.171


  5%|▍         | 2298/50000 [03:07<1:02:35, 12.70it/s]

Epochs: 2296 | epoch avg. loss: 1.327 | test avg. loss: 66.974
Epochs: 2297 | epoch avg. loss: 1.367 | test avg. loss: 66.541
Epochs: 2298 | epoch avg. loss: 1.163 | test avg. loss: 68.060
Epochs: 2299 | epoch avg. loss: 1.975 | test avg. loss: 67.199


  5%|▍         | 2302/50000 [03:09<2:49:19,  4.70it/s]

Epochs: 2300 | epoch avg. loss: 1.632 | test avg. loss: 68.685
Epochs: 2301 | epoch avg. loss: 2.952 | test avg. loss: 66.362
Epochs: 2302 | epoch avg. loss: 1.397 | test avg. loss: 66.016


  5%|▍         | 2306/50000 [03:09<1:52:38,  7.06it/s]

Epochs: 2303 | epoch avg. loss: 1.348 | test avg. loss: 66.637
Epochs: 2304 | epoch avg. loss: 1.226 | test avg. loss: 66.372
Epochs: 2305 | epoch avg. loss: 1.234 | test avg. loss: 66.844
Epochs: 2306 | epoch avg. loss: 1.135 | test avg. loss: 66.374


  5%|▍         | 2310/50000 [03:09<1:22:11,  9.67it/s]

Epochs: 2307 | epoch avg. loss: 1.137 | test avg. loss: 67.090
Epochs: 2308 | epoch avg. loss: 2.173 | test avg. loss: 65.861
Epochs: 2309 | epoch avg. loss: 0.682 | test avg. loss: 66.165


  5%|▍         | 2314/50000 [03:09<1:06:38, 11.93it/s]

Epochs: 2310 | epoch avg. loss: 1.152 | test avg. loss: 65.816
Epochs: 2311 | epoch avg. loss: 0.940 | test avg. loss: 66.049
Epochs: 2312 | epoch avg. loss: 1.092 | test avg. loss: 67.752
Epochs: 2313 | epoch avg. loss: 1.751 | test avg. loss: 69.896


  5%|▍         | 2316/50000 [03:09<1:04:36, 12.30it/s]

Epochs: 2314 | epoch avg. loss: 3.449 | test avg. loss: 70.200
Epochs: 2315 | epoch avg. loss: 3.774 | test avg. loss: 69.613
Epochs: 2316 | epoch avg. loss: 4.508 | test avg. loss: 67.010


  5%|▍         | 2320/50000 [03:10<1:01:11, 12.99it/s]

Epochs: 2317 | epoch avg. loss: 3.409 | test avg. loss: 66.102
Epochs: 2318 | epoch avg. loss: 2.633 | test avg. loss: 66.499
Epochs: 2319 | epoch avg. loss: 2.154 | test avg. loss: 69.388


  5%|▍         | 2322/50000 [03:10<1:01:15, 12.97it/s]

Epochs: 2320 | epoch avg. loss: 2.754 | test avg. loss: 69.132
Epochs: 2321 | epoch avg. loss: 2.686 | test avg. loss: 67.996
Epochs: 2322 | epoch avg. loss: 1.140 | test avg. loss: 68.464
Epochs: 2323 | epoch avg. loss: 2.207 | test avg. loss: 68.168


  5%|▍         | 2326/50000 [03:10<57:27, 13.83it/s]

Epochs: 2324 | epoch avg. loss: 3.124 | test avg. loss: 66.134
Epochs: 2325 | epoch avg. loss: 2.194 | test avg. loss: 68.308
Epochs: 2326 | epoch avg. loss: 3.274 | test avg. loss: 67.723


  5%|▍         | 2330/50000 [03:10<59:17, 13.40it/s]

Epochs: 2327 | epoch avg. loss: 2.234 | test avg. loss: 66.322
Epochs: 2328 | epoch avg. loss: 1.206 | test avg. loss: 66.330
Epochs: 2329 | epoch avg. loss: 0.897 | test avg. loss: 66.468


  5%|▍         | 2332/50000 [03:11<59:41, 13.31it/s]

Epochs: 2330 | epoch avg. loss: 0.841 | test avg. loss: 66.283
Epochs: 2331 | epoch avg. loss: 0.790 | test avg. loss: 66.548
Epochs: 2332 | epoch avg. loss: 0.960 | test avg. loss: 66.212


  5%|▍         | 2336/50000 [03:11<58:15, 13.63it/s]  

Epochs: 2333 | epoch avg. loss: 0.707 | test avg. loss: 66.169
Epochs: 2334 | epoch avg. loss: 0.727 | test avg. loss: 66.276
Epochs: 2335 | epoch avg. loss: 0.743 | test avg. loss: 66.416


  5%|▍         | 2338/50000 [03:11<59:02, 13.46it/s]

Epochs: 2336 | epoch avg. loss: 1.023 | test avg. loss: 66.432
Epochs: 2337 | epoch avg. loss: 0.776 | test avg. loss: 66.371
Epochs: 2338 | epoch avg. loss: 0.679 | test avg. loss: 66.472


  5%|▍         | 2342/50000 [03:11<1:00:55, 13.04it/s]

Epochs: 2339 | epoch avg. loss: 0.676 | test avg. loss: 66.542
Epochs: 2340 | epoch avg. loss: 0.735 | test avg. loss: 67.640
Epochs: 2341 | epoch avg. loss: 1.220 | test avg. loss: 68.453


  5%|▍         | 2344/50000 [03:12<1:02:44, 12.66it/s]

Epochs: 2342 | epoch avg. loss: 2.853 | test avg. loss: 67.339
Epochs: 2343 | epoch avg. loss: 1.763 | test avg. loss: 66.327
Epochs: 2344 | epoch avg. loss: 2.719 | test avg. loss: 65.725




Epochs: 2345 | epoch avg. loss: 1.897 | test avg. loss: 67.692
Epochs: 2346 | epoch avg. loss: 2.137 | test avg. loss: 68.044
Epochs: 2347 | epoch avg. loss: 2.655 | test avg. loss: 68.847


  5%|▍         | 2352/50000 [03:12<55:24, 14.33it/s]

Epochs: 2348 | epoch avg. loss: 2.160 | test avg. loss: 68.637
Epochs: 2349 | epoch avg. loss: 2.339 | test avg. loss: 69.510
Epochs: 2350 | epoch avg. loss: 4.052 | test avg. loss: 66.960
Epochs: 2351 | epoch avg. loss: 3.747 | test avg. loss: 67.660


  5%|▍         | 2354/50000 [03:12<55:04, 14.42it/s]

Epochs: 2352 | epoch avg. loss: 2.712 | test avg. loss: 71.637
Epochs: 2353 | epoch avg. loss: 3.876 | test avg. loss: 70.537
Epochs: 2354 | epoch avg. loss: 4.468 | test avg. loss: 69.643


  5%|▍         | 2358/50000 [03:12<55:49, 14.22it/s]

Epochs: 2355 | epoch avg. loss: 2.496 | test avg. loss: 67.333
Epochs: 2356 | epoch avg. loss: 1.154 | test avg. loss: 67.337
Epochs: 2357 | epoch avg. loss: 1.143 | test avg. loss: 68.511


  5%|▍         | 2360/50000 [03:13<1:00:16, 13.17it/s]

Epochs: 2358 | epoch avg. loss: 1.641 | test avg. loss: 70.060
Epochs: 2359 | epoch avg. loss: 3.230 | test avg. loss: 73.399
Epochs: 2360 | epoch avg. loss: 5.989 | test avg. loss: 68.169


  5%|▍         | 2364/50000 [03:13<57:56, 13.70it/s]

Epochs: 2361 | epoch avg. loss: 2.900 | test avg. loss: 69.094
Epochs: 2362 | epoch avg. loss: 6.334 | test avg. loss: 66.689
Epochs: 2363 | epoch avg. loss: 2.545 | test avg. loss: 68.042


  5%|▍         | 2366/50000 [03:13<58:07, 13.66it/s]

Epochs: 2364 | epoch avg. loss: 2.599 | test avg. loss: 68.442
Epochs: 2365 | epoch avg. loss: 2.413 | test avg. loss: 67.200
Epochs: 2366 | epoch avg. loss: 1.375 | test avg. loss: 66.897


  5%|▍         | 2370/50000 [03:13<59:46, 13.28it/s]

Epochs: 2367 | epoch avg. loss: 1.672 | test avg. loss: 66.605
Epochs: 2368 | epoch avg. loss: 1.042 | test avg. loss: 66.657
Epochs: 2369 | epoch avg. loss: 1.159 | test avg. loss: 67.991




Epochs: 2370 | epoch avg. loss: 1.410 | test avg. loss: 68.141
Epochs: 2371 | epoch avg. loss: 2.358 | test avg. loss: 68.573


  5%|▍         | 2376/50000 [03:14<1:05:19, 12.15it/s]

Epochs: 2372 | epoch avg. loss: 1.830 | test avg. loss: 66.988
Epochs: 2373 | epoch avg. loss: 1.302 | test avg. loss: 67.008
Epochs: 2374 | epoch avg. loss: 1.030 | test avg. loss: 67.039
Epochs: 2375 | epoch avg. loss: 1.622 | test avg. loss: 68.682


  5%|▍         | 2378/50000 [03:14<1:03:35, 12.48it/s]

Epochs: 2376 | epoch avg. loss: 2.102 | test avg. loss: 67.561
Epochs: 2377 | epoch avg. loss: 0.994 | test avg. loss: 67.285
Epochs: 2378 | epoch avg. loss: 0.844 | test avg. loss: 67.272


  5%|▍         | 2382/50000 [03:14<1:01:45, 12.85it/s]

Epochs: 2379 | epoch avg. loss: 0.883 | test avg. loss: 68.199
Epochs: 2380 | epoch avg. loss: 1.365 | test avg. loss: 67.237
Epochs: 2381 | epoch avg. loss: 0.800 | test avg. loss: 68.290


  5%|▍         | 2384/50000 [03:15<1:02:51, 12.62it/s]

Epochs: 2382 | epoch avg. loss: 1.219 | test avg. loss: 68.018
Epochs: 2383 | epoch avg. loss: 1.921 | test avg. loss: 67.243
Epochs: 2384 | epoch avg. loss: 0.996 | test avg. loss: 66.748


  5%|▍         | 2388/50000 [03:15<1:02:16, 12.74it/s]

Epochs: 2385 | epoch avg. loss: 1.156 | test avg. loss: 66.693
Epochs: 2386 | epoch avg. loss: 1.075 | test avg. loss: 68.008
Epochs: 2387 | epoch avg. loss: 1.683 | test avg. loss: 66.710


  5%|▍         | 2392/50000 [03:15<56:08, 14.13it/s]

Epochs: 2388 | epoch avg. loss: 1.179 | test avg. loss: 66.698
Epochs: 2389 | epoch avg. loss: 0.900 | test avg. loss: 66.754
Epochs: 2390 | epoch avg. loss: 0.763 | test avg. loss: 66.663
Epochs: 2391 | epoch avg. loss: 1.203 | test avg. loss: 66.607


  5%|▍         | 2394/50000 [03:15<55:41, 14.25it/s]

Epochs: 2392 | epoch avg. loss: 0.777 | test avg. loss: 66.952
Epochs: 2393 | epoch avg. loss: 0.776 | test avg. loss: 67.041
Epochs: 2394 | epoch avg. loss: 0.812 | test avg. loss: 67.358


  5%|▍         | 2398/50000 [03:16<1:01:12, 12.96it/s]

Epochs: 2395 | epoch avg. loss: 0.795 | test avg. loss: 68.888
Epochs: 2396 | epoch avg. loss: 1.944 | test avg. loss: 69.638
Epochs: 2397 | epoch avg. loss: 2.392 | test avg. loss: 68.265


  5%|▍         | 2398/50000 [03:16<1:01:12, 12.96it/s]

Epochs: 2398 | epoch avg. loss: 2.241 | test avg. loss: 67.187
Epochs: 2399 | epoch avg. loss: 1.768 | test avg. loss: 66.727


  5%|▍         | 2402/50000 [03:17<2:38:25,  5.01it/s]

Epochs: 2400 | epoch avg. loss: 1.314 | test avg. loss: 67.259
Epochs: 2401 | epoch avg. loss: 1.249 | test avg. loss: 67.743
Epochs: 2402 | epoch avg. loss: 0.989 | test avg. loss: 67.590


  5%|▍         | 2406/50000 [03:17<1:52:31,  7.05it/s]

Epochs: 2403 | epoch avg. loss: 1.009 | test avg. loss: 67.825
Epochs: 2404 | epoch avg. loss: 1.469 | test avg. loss: 67.210
Epochs: 2405 | epoch avg. loss: 1.054 | test avg. loss: 66.928


  5%|▍         | 2408/50000 [03:18<1:37:33,  8.13it/s]

Epochs: 2406 | epoch avg. loss: 2.096 | test avg. loss: 70.752
Epochs: 2407 | epoch avg. loss: 3.638 | test avg. loss: 68.798
Epochs: 2408 | epoch avg. loss: 2.463 | test avg. loss: 68.053


  5%|▍         | 2412/50000 [03:18<1:17:02, 10.30it/s]

Epochs: 2409 | epoch avg. loss: 2.103 | test avg. loss: 66.513
Epochs: 2410 | epoch avg. loss: 1.611 | test avg. loss: 66.222
Epochs: 2411 | epoch avg. loss: 1.893 | test avg. loss: 66.603




Epochs: 2412 | epoch avg. loss: 1.191 | test avg. loss: 66.354
Epochs: 2413 | epoch avg. loss: 0.838 | test avg. loss: 67.265
Epochs: 2414 | epoch avg. loss: 1.148 | test avg. loss: 67.067


  5%|▍         | 2418/50000 [03:18<1:03:19, 12.52it/s]

Epochs: 2415 | epoch avg. loss: 1.299 | test avg. loss: 67.463
Epochs: 2416 | epoch avg. loss: 1.179 | test avg. loss: 67.075
Epochs: 2417 | epoch avg. loss: 0.762 | test avg. loss: 68.184


  5%|▍         | 2420/50000 [03:19<1:06:20, 11.95it/s]

Epochs: 2418 | epoch avg. loss: 1.978 | test avg. loss: 67.608
Epochs: 2419 | epoch avg. loss: 1.125 | test avg. loss: 67.365
Epochs: 2420 | epoch avg. loss: 0.744 | test avg. loss: 67.402


  5%|▍         | 2424/50000 [03:19<1:06:26, 11.93it/s]

Epochs: 2421 | epoch avg. loss: 0.679 | test avg. loss: 67.410
Epochs: 2422 | epoch avg. loss: 0.661 | test avg. loss: 67.466
Epochs: 2423 | epoch avg. loss: 0.856 | test avg. loss: 67.249


  5%|▍         | 2426/50000 [03:19<1:07:09, 11.81it/s]

Epochs: 2424 | epoch avg. loss: 0.759 | test avg. loss: 67.158
Epochs: 2425 | epoch avg. loss: 0.753 | test avg. loss: 67.178
Epochs: 2426 | epoch avg. loss: 0.775 | test avg. loss: 67.021


  5%|▍         | 2430/50000 [03:19<1:03:11, 12.55it/s]

Epochs: 2427 | epoch avg. loss: 0.914 | test avg. loss: 66.757
Epochs: 2428 | epoch avg. loss: 0.654 | test avg. loss: 66.640
Epochs: 2429 | epoch avg. loss: 0.705 | test avg. loss: 66.830


  5%|▍         | 2432/50000 [03:20<1:06:34, 11.91it/s]

Epochs: 2430 | epoch avg. loss: 0.670 | test avg. loss: 66.949
Epochs: 2431 | epoch avg. loss: 0.913 | test avg. loss: 67.181
Epochs: 2432 | epoch avg. loss: 0.869 | test avg. loss: 67.025


  5%|▍         | 2436/50000 [03:20<1:02:43, 12.64it/s]

Epochs: 2433 | epoch avg. loss: 0.602 | test avg. loss: 67.918
Epochs: 2434 | epoch avg. loss: 1.790 | test avg. loss: 67.265
Epochs: 2435 | epoch avg. loss: 0.661 | test avg. loss: 67.488


  5%|▍         | 2440/50000 [03:20<56:33, 14.02it/s]

Epochs: 2436 | epoch avg. loss: 0.911 | test avg. loss: 67.551
Epochs: 2437 | epoch avg. loss: 0.824 | test avg. loss: 67.651
Epochs: 2438 | epoch avg. loss: 1.126 | test avg. loss: 67.339
Epochs: 2439 | epoch avg. loss: 0.698 | test avg. loss: 67.148


  5%|▍         | 2442/50000 [03:20<58:28, 13.56it/s]

Epochs: 2440 | epoch avg. loss: 0.678 | test avg. loss: 67.190
Epochs: 2441 | epoch avg. loss: 0.650 | test avg. loss: 67.181
Epochs: 2442 | epoch avg. loss: 0.650 | test avg. loss: 67.162


  5%|▍         | 2446/50000 [03:21<1:02:13, 12.74it/s]

Epochs: 2443 | epoch avg. loss: 0.637 | test avg. loss: 67.136
Epochs: 2444 | epoch avg. loss: 0.636 | test avg. loss: 67.151
Epochs: 2445 | epoch avg. loss: 0.710 | test avg. loss: 67.054




Epochs: 2446 | epoch avg. loss: 0.674 | test avg. loss: 67.193
Epochs: 2447 | epoch avg. loss: 0.799 | test avg. loss: 68.559
Epochs: 2448 | epoch avg. loss: 2.020 | test avg. loss: 68.245


                                                    

Epochs: 2449 | epoch avg. loss: 1.110 | test avg. loss: 68.985
Epochs: 2450 | epoch avg. loss: 3.007 | test avg. loss: 67.906
Epochs: 2451 | epoch avg. loss: 1.730 | test avg. loss: 67.278


  5%|▍         | 2456/50000 [03:21<53:08, 14.91it/s]

Epochs: 2452 | epoch avg. loss: 1.402 | test avg. loss: 67.207
Epochs: 2453 | epoch avg. loss: 1.300 | test avg. loss: 67.898
Epochs: 2454 | epoch avg. loss: 1.527 | test avg. loss: 69.710
Epochs: 2455 | epoch avg. loss: 3.149 | test avg. loss: 68.866


  5%|▍         | 2458/50000 [03:21<57:06, 13.87it/s]

Epochs: 2456 | epoch avg. loss: 2.732 | test avg. loss: 68.359
Epochs: 2457 | epoch avg. loss: 3.616 | test avg. loss: 66.683
Epochs: 2458 | epoch avg. loss: 2.599 | test avg. loss: 71.673


  5%|▍         | 2462/50000 [03:22<1:02:23, 12.70it/s]

Epochs: 2459 | epoch avg. loss: 4.161 | test avg. loss: 70.464
Epochs: 2460 | epoch avg. loss: 4.724 | test avg. loss: 69.526
Epochs: 2461 | epoch avg. loss: 2.297 | test avg. loss: 67.873


  5%|▍         | 2464/50000 [03:22<1:04:13, 12.33it/s]

Epochs: 2462 | epoch avg. loss: 1.908 | test avg. loss: 69.983
Epochs: 2463 | epoch avg. loss: 4.346 | test avg. loss: 68.136
Epochs: 2464 | epoch avg. loss: 2.718 | test avg. loss: 69.631


  5%|▍         | 2468/50000 [03:22<1:08:21, 11.59it/s]

Epochs: 2465 | epoch avg. loss: 2.813 | test avg. loss: 73.155
Epochs: 2466 | epoch avg. loss: 4.698 | test avg. loss: 72.247
Epochs: 2467 | epoch avg. loss: 4.635 | test avg. loss: 70.346


  5%|▍         | 2470/50000 [03:22<1:07:52, 11.67it/s]

Epochs: 2468 | epoch avg. loss: 3.042 | test avg. loss: 67.712
Epochs: 2469 | epoch avg. loss: 1.528 | test avg. loss: 67.626
Epochs: 2470 | epoch avg. loss: 1.584 | test avg. loss: 68.124


  5%|▍         | 2474/50000 [03:23<1:04:58, 12.19it/s]

Epochs: 2471 | epoch avg. loss: 1.154 | test avg. loss: 67.720
Epochs: 2472 | epoch avg. loss: 0.883 | test avg. loss: 68.436
Epochs: 2473 | epoch avg. loss: 1.096 | test avg. loss: 68.201


  5%|▍         | 2478/50000 [03:23<59:33, 13.30it/s]  

Epochs: 2474 | epoch avg. loss: 1.020 | test avg. loss: 68.180
Epochs: 2475 | epoch avg. loss: 1.132 | test avg. loss: 68.990
Epochs: 2476 | epoch avg. loss: 1.302 | test avg. loss: 68.502
Epochs: 2477 | epoch avg. loss: 0.961 | test avg. loss: 68.514


  5%|▍         | 2482/50000 [03:23<55:43, 14.21it/s]

Epochs: 2478 | epoch avg. loss: 0.701 | test avg. loss: 68.563
Epochs: 2479 | epoch avg. loss: 0.746 | test avg. loss: 68.622
Epochs: 2480 | epoch avg. loss: 0.884 | test avg. loss: 69.746
Epochs: 2481 | epoch avg. loss: 1.837 | test avg. loss: 68.418


  5%|▍         | 2484/50000 [03:24<57:28, 13.78it/s]

Epochs: 2482 | epoch avg. loss: 1.155 | test avg. loss: 68.313
Epochs: 2483 | epoch avg. loss: 1.119 | test avg. loss: 68.168
Epochs: 2484 | epoch avg. loss: 1.108 | test avg. loss: 68.554


  5%|▍         | 2488/50000 [03:24<1:06:16, 11.95it/s]

Epochs: 2485 | epoch avg. loss: 1.130 | test avg. loss: 69.506
Epochs: 2486 | epoch avg. loss: 1.585 | test avg. loss: 73.835
Epochs: 2487 | epoch avg. loss: 4.855 | test avg. loss: 73.710


  5%|▍         | 2492/50000 [03:24<58:03, 13.64it/s]  

Epochs: 2488 | epoch avg. loss: 5.862 | test avg. loss: 70.678
Epochs: 2489 | epoch avg. loss: 6.809 | test avg. loss: 68.857
Epochs: 2490 | epoch avg. loss: 5.326 | test avg. loss: 70.138
Epochs: 2491 | epoch avg. loss: 10.394 | test avg. loss: 67.679


  5%|▍         | 2496/50000 [03:24<54:49, 14.44it/s]

Epochs: 2492 | epoch avg. loss: 7.779 | test avg. loss: 75.998
Epochs: 2493 | epoch avg. loss: 8.192 | test avg. loss: 76.431
Epochs: 2494 | epoch avg. loss: 7.818 | test avg. loss: 85.000
Epochs: 2495 | epoch avg. loss: 12.776 | test avg. loss: 80.881


  5%|▍         | 2498/50000 [03:25<59:43, 13.26it/s]

Epochs: 2496 | epoch avg. loss: 12.534 | test avg. loss: 79.880
Epochs: 2497 | epoch avg. loss: 9.203 | test avg. loss: 72.219
Epochs: 2498 | epoch avg. loss: 7.213 | test avg. loss: 73.056


  5%|▍         | 2498/50000 [03:25<59:43, 13.26it/s]

Epochs: 2499 | epoch avg. loss: 11.811 | test avg. loss: 69.350


  5%|▌         | 2502/50000 [03:26<2:48:13,  4.71it/s]

Epochs: 2500 | epoch avg. loss: 10.425 | test avg. loss: 70.005
Epochs: 2501 | epoch avg. loss: 9.717 | test avg. loss: 94.074
Epochs: 2502 | epoch avg. loss: 21.619 | test avg. loss: 82.072


  5%|▌         | 2506/50000 [03:26<1:57:12,  6.75it/s]

Epochs: 2503 | epoch avg. loss: 12.820 | test avg. loss: 78.596
Epochs: 2504 | epoch avg. loss: 11.183 | test avg. loss: 73.662
Epochs: 2505 | epoch avg. loss: 9.471 | test avg. loss: 67.077


  5%|▌         | 2508/50000 [03:27<1:41:46,  7.78it/s]

Epochs: 2506 | epoch avg. loss: 4.782 | test avg. loss: 65.462
Epochs: 2507 | epoch avg. loss: 3.543 | test avg. loss: 65.231
Epochs: 2508 | epoch avg. loss: 2.516 | test avg. loss: 64.945


  5%|▌         | 2512/50000 [03:27<1:21:39,  9.69it/s]

Epochs: 2509 | epoch avg. loss: 3.705 | test avg. loss: 66.631
Epochs: 2510 | epoch avg. loss: 3.215 | test avg. loss: 66.611
Epochs: 2511 | epoch avg. loss: 2.387 | test avg. loss: 67.015


  5%|▌         | 2514/50000 [03:27<1:14:35, 10.61it/s]

Epochs: 2512 | epoch avg. loss: 2.793 | test avg. loss: 68.962
Epochs: 2513 | epoch avg. loss: 3.593 | test avg. loss: 67.404
Epochs: 2514 | epoch avg. loss: 2.952 | test avg. loss: 67.625
Epochs: 2515 | epoch avg. loss: 2.175 | test avg. loss: 66.306


  5%|▌         | 2518/50000 [03:27<1:06:35, 11.88it/s]

Epochs: 2516 | epoch avg. loss: 2.068 | test avg. loss: 67.225
Epochs: 2517 | epoch avg. loss: 1.558 | test avg. loss: 66.971
Epochs: 2518 | epoch avg. loss: 2.156 | test avg. loss: 69.203


                                                      

Epochs: 2519 | epoch avg. loss: 2.018 | test avg. loss: 69.770
Epochs: 2520 | epoch avg. loss: 4.513 | test avg. loss: 72.353
Epochs: 2521 | epoch avg. loss: 6.316 | test avg. loss: 70.003


  5%|▌         | 2524/50000 [03:28<59:47, 13.23it/s]

Epochs: 2522 | epoch avg. loss: 4.779 | test avg. loss: 75.274
Epochs: 2523 | epoch avg. loss: 9.237 | test avg. loss: 74.361
Epochs: 2524 | epoch avg. loss: 8.060 | test avg. loss: 74.774


  5%|▌         | 2528/50000 [03:28<1:01:30, 12.86it/s]

Epochs: 2525 | epoch avg. loss: 10.937 | test avg. loss: 70.356
Epochs: 2526 | epoch avg. loss: 8.952 | test avg. loss: 69.637
Epochs: 2527 | epoch avg. loss: 7.006 | test avg. loss: 70.929


  5%|▌         | 2530/50000 [03:28<1:05:42, 12.04it/s]

Epochs: 2528 | epoch avg. loss: 5.451 | test avg. loss: 73.808
Epochs: 2529 | epoch avg. loss: 4.464 | test avg. loss: 73.793
Epochs: 2530 | epoch avg. loss: 3.273 | test avg. loss: 74.501


  5%|▌         | 2534/50000 [03:29<1:09:26, 11.39it/s]

Epochs: 2531 | epoch avg. loss: 3.116 | test avg. loss: 72.799
Epochs: 2532 | epoch avg. loss: 1.840 | test avg. loss: 72.645
Epochs: 2533 | epoch avg. loss: 2.057 | test avg. loss: 71.592


  5%|▌         | 2536/50000 [03:29<1:07:45, 11.68it/s]

Epochs: 2534 | epoch avg. loss: 1.500 | test avg. loss: 71.646
Epochs: 2535 | epoch avg. loss: 2.427 | test avg. loss: 71.216
Epochs: 2536 | epoch avg. loss: 2.500 | test avg. loss: 69.802


  5%|▌         | 2540/50000 [03:29<1:01:35, 12.84it/s]

Epochs: 2537 | epoch avg. loss: 1.443 | test avg. loss: 69.721
Epochs: 2538 | epoch avg. loss: 1.335 | test avg. loss: 69.730
Epochs: 2539 | epoch avg. loss: 2.169 | test avg. loss: 69.098
Epochs: 2540 | epoch avg. loss: 0.890 | test avg. loss: 69.223


  5%|▌         | 2544/50000 [03:29<57:22, 13.78it/s]

Epochs: 2541 | epoch avg. loss: 0.963 | test avg. loss: 69.814
Epochs: 2542 | epoch avg. loss: 1.852 | test avg. loss: 69.302
Epochs: 2543 | epoch avg. loss: 1.255 | test avg. loss: 70.221


  5%|▌         | 2546/50000 [03:30<1:01:44, 12.81it/s]

Epochs: 2544 | epoch avg. loss: 2.372 | test avg. loss: 69.295
Epochs: 2545 | epoch avg. loss: 1.707 | test avg. loss: 69.524
Epochs: 2546 | epoch avg. loss: 1.478 | test avg. loss: 70.408


  5%|▌         | 2550/50000 [03:30<1:00:46, 13.01it/s]

Epochs: 2547 | epoch avg. loss: 1.507 | test avg. loss: 70.169
Epochs: 2548 | epoch avg. loss: 1.202 | test avg. loss: 70.394
Epochs: 2549 | epoch avg. loss: 1.320 | test avg. loss: 71.423


  5%|▌         | 2552/50000 [03:30<1:02:20, 12.69it/s]

Epochs: 2550 | epoch avg. loss: 2.218 | test avg. loss: 70.810
Epochs: 2551 | epoch avg. loss: 2.227 | test avg. loss: 70.091
Epochs: 2552 | epoch avg. loss: 2.453 | test avg. loss: 69.968


  5%|▌         | 2556/50000 [03:30<1:04:13, 12.31it/s]

Epochs: 2553 | epoch avg. loss: 1.476 | test avg. loss: 70.377
Epochs: 2554 | epoch avg. loss: 3.206 | test avg. loss: 68.571
Epochs: 2555 | epoch avg. loss: 1.668 | test avg. loss: 68.378


  5%|▌         | 2558/50000 [03:31<1:04:57, 12.17it/s]

Epochs: 2556 | epoch avg. loss: 1.379 | test avg. loss: 68.718
Epochs: 2557 | epoch avg. loss: 1.664 | test avg. loss: 70.087
Epochs: 2558 | epoch avg. loss: 1.818 | test avg. loss: 70.038


  5%|▌         | 2562/50000 [03:31<1:02:32, 12.64it/s]

Epochs: 2559 | epoch avg. loss: 1.361 | test avg. loss: 69.841
Epochs: 2560 | epoch avg. loss: 1.137 | test avg. loss: 71.322
Epochs: 2561 | epoch avg. loss: 2.173 | test avg. loss: 72.819


  5%|▌         | 2564/50000 [03:31<1:00:22, 13.09it/s]

Epochs: 2562 | epoch avg. loss: 3.666 | test avg. loss: 71.500
Epochs: 2563 | epoch avg. loss: 2.676 | test avg. loss: 73.253
Epochs: 2564 | epoch avg. loss: 3.993 | test avg. loss: 70.222


  5%|▌         | 2568/50000 [03:31<58:27, 13.52it/s]

Epochs: 2565 | epoch avg. loss: 2.644 | test avg. loss: 69.319
Epochs: 2566 | epoch avg. loss: 1.137 | test avg. loss: 69.307
Epochs: 2567 | epoch avg. loss: 1.560 | test avg. loss: 70.402


  5%|▌         | 2570/50000 [03:32<1:00:18, 13.11it/s]

Epochs: 2568 | epoch avg. loss: 1.956 | test avg. loss: 70.715
Epochs: 2569 | epoch avg. loss: 1.939 | test avg. loss: 71.517
Epochs: 2570 | epoch avg. loss: 3.001 | test avg. loss: 70.648


  5%|▌         | 2574/50000 [03:32<1:05:48, 12.01it/s]

Epochs: 2571 | epoch avg. loss: 1.665 | test avg. loss: 70.573
Epochs: 2572 | epoch avg. loss: 1.663 | test avg. loss: 73.583
Epochs: 2573 | epoch avg. loss: 3.297 | test avg. loss: 74.364


  5%|▌         | 2576/50000 [03:32<1:03:23, 12.47it/s]

Epochs: 2574 | epoch avg. loss: 5.203 | test avg. loss: 73.159
Epochs: 2575 | epoch avg. loss: 3.534 | test avg. loss: 72.046
Epochs: 2576 | epoch avg. loss: 4.547 | test avg. loss: 70.799


  5%|▌         | 2580/50000 [03:32<1:00:32, 13.06it/s]

Epochs: 2577 | epoch avg. loss: 4.432 | test avg. loss: 68.714
Epochs: 2578 | epoch avg. loss: 2.191 | test avg. loss: 68.658
Epochs: 2579 | epoch avg. loss: 2.394 | test avg. loss: 70.535


  5%|▌         | 2582/50000 [03:32<1:03:28, 12.45it/s]

Epochs: 2580 | epoch avg. loss: 2.907 | test avg. loss: 69.365
Epochs: 2581 | epoch avg. loss: 2.293 | test avg. loss: 68.786
Epochs: 2582 | epoch avg. loss: 1.367 | test avg. loss: 69.253


                                                      

Epochs: 2583 | epoch avg. loss: 2.137 | test avg. loss: 68.607
Epochs: 2584 | epoch avg. loss: 1.507 | test avg. loss: 69.039
Epochs: 2585 | epoch avg. loss: 1.422 | test avg. loss: 70.708


  5%|▌         | 2588/50000 [03:33<59:18, 13.33it/s]

Epochs: 2586 | epoch avg. loss: 2.104 | test avg. loss: 72.585
Epochs: 2587 | epoch avg. loss: 2.489 | test avg. loss: 72.405
Epochs: 2588 | epoch avg. loss: 2.752 | test avg. loss: 72.772


                                                    

Epochs: 2589 | epoch avg. loss: 2.686 | test avg. loss: 71.928
Epochs: 2590 | epoch avg. loss: 2.539 | test avg. loss: 71.072
Epochs: 2591 | epoch avg. loss: 2.285 | test avg. loss: 70.737


  5%|▌         | 2594/50000 [03:33<56:05, 14.09it/s]

Epochs: 2592 | epoch avg. loss: 1.605 | test avg. loss: 71.126
Epochs: 2593 | epoch avg. loss: 2.202 | test avg. loss: 73.923
Epochs: 2594 | epoch avg. loss: 3.971 | test avg. loss: 74.986


  5%|▌         | 2596/50000 [03:34<59:11, 13.35it/s]

Epochs: 2595 | epoch avg. loss: 5.328 | test avg. loss: 75.321
Epochs: 2596 | epoch avg. loss: 5.962 | test avg. loss: 72.495
Epochs: 2597 | epoch avg. loss: 3.739 | test avg. loss: 71.513


  5%|▌         | 2598/50000 [03:34<1:05:06, 12.13it/s]

Epochs: 2598 | epoch avg. loss: 4.071 | test avg. loss: 69.598
Epochs: 2599 | epoch avg. loss: 2.408 | test avg. loss: 69.951


  5%|▌         | 2602/50000 [03:35<2:40:51,  4.91it/s]

Epochs: 2600 | epoch avg. loss: 3.578 | test avg. loss: 70.055
Epochs: 2601 | epoch avg. loss: 4.918 | test avg. loss: 67.961
Epochs: 2602 | epoch avg. loss: 2.715 | test avg. loss: 68.143


  5%|▌         | 2606/50000 [03:35<1:51:24,  7.09it/s]

Epochs: 2603 | epoch avg. loss: 1.567 | test avg. loss: 67.602
Epochs: 2604 | epoch avg. loss: 0.794 | test avg. loss: 67.848
Epochs: 2605 | epoch avg. loss: 1.309 | test avg. loss: 68.499


  5%|▌         | 2608/50000 [03:36<1:40:17,  7.88it/s]

Epochs: 2606 | epoch avg. loss: 2.013 | test avg. loss: 68.287
Epochs: 2607 | epoch avg. loss: 1.246 | test avg. loss: 68.599
Epochs: 2608 | epoch avg. loss: 1.802 | test avg. loss: 70.789


  5%|▌         | 2612/50000 [03:36<1:20:22,  9.83it/s]

Epochs: 2609 | epoch avg. loss: 2.200 | test avg. loss: 69.474
Epochs: 2610 | epoch avg. loss: 1.219 | test avg. loss: 69.166
Epochs: 2611 | epoch avg. loss: 0.682 | test avg. loss: 69.115


  5%|▌         | 2614/50000 [03:36<1:12:55, 10.83it/s]

Epochs: 2612 | epoch avg. loss: 0.731 | test avg. loss: 69.352
Epochs: 2613 | epoch avg. loss: 0.847 | test avg. loss: 69.426
Epochs: 2614 | epoch avg. loss: 0.773 | test avg. loss: 70.208


  5%|▌         | 2618/50000 [03:36<1:06:49, 11.82it/s]

Epochs: 2615 | epoch avg. loss: 1.169 | test avg. loss: 70.442
Epochs: 2616 | epoch avg. loss: 1.079 | test avg. loss: 70.478
Epochs: 2617 | epoch avg. loss: 1.116 | test avg. loss: 70.370


                                                      

Epochs: 2618 | epoch avg. loss: 1.389 | test avg. loss: 70.384
Epochs: 2619 | epoch avg. loss: 1.553 | test avg. loss: 72.607


  5%|▌         | 2622/50000 [03:37<1:13:20, 10.77it/s]

Epochs: 2620 | epoch avg. loss: 2.445 | test avg. loss: 72.230
Epochs: 2621 | epoch avg. loss: 2.127 | test avg. loss: 71.354
Epochs: 2622 | epoch avg. loss: 1.496 | test avg. loss: 71.662


                                                      

Epochs: 2623 | epoch avg. loss: 1.568 | test avg. loss: 70.796
Epochs: 2624 | epoch avg. loss: 0.778 | test avg. loss: 70.906
Epochs: 2625 | epoch avg. loss: 0.627 | test avg. loss: 70.883


  5%|▌         | 2630/50000 [03:37<57:53, 13.64it/s]  

Epochs: 2626 | epoch avg. loss: 1.083 | test avg. loss: 70.410
Epochs: 2627 | epoch avg. loss: 1.037 | test avg. loss: 70.592
Epochs: 2628 | epoch avg. loss: 0.989 | test avg. loss: 70.495
Epochs: 2629 | epoch avg. loss: 0.969 | test avg. loss: 70.797


  5%|▌         | 2632/50000 [03:38<57:34, 13.71it/s]

Epochs: 2630 | epoch avg. loss: 1.012 | test avg. loss: 70.806
Epochs: 2631 | epoch avg. loss: 1.234 | test avg. loss: 70.484
Epochs: 2632 | epoch avg. loss: 0.664 | test avg. loss: 70.478


  5%|▌         | 2636/50000 [03:38<58:42, 13.44it/s]  

Epochs: 2633 | epoch avg. loss: 0.610 | test avg. loss: 70.299
Epochs: 2634 | epoch avg. loss: 0.744 | test avg. loss: 70.317
Epochs: 2635 | epoch avg. loss: 0.815 | test avg. loss: 69.948


  5%|▌         | 2640/50000 [03:38<54:05, 14.59it/s]

Epochs: 2636 | epoch avg. loss: 0.719 | test avg. loss: 69.726
Epochs: 2637 | epoch avg. loss: 0.736 | test avg. loss: 69.481
Epochs: 2638 | epoch avg. loss: 0.595 | test avg. loss: 69.345
Epochs: 2639 | epoch avg. loss: 0.702 | test avg. loss: 69.490


  5%|▌         | 2642/50000 [03:38<53:57, 14.63it/s]

Epochs: 2640 | epoch avg. loss: 0.853 | test avg. loss: 70.342
Epochs: 2641 | epoch avg. loss: 1.594 | test avg. loss: 70.422
Epochs: 2642 | epoch avg. loss: 1.343 | test avg. loss: 70.820
Epochs: 2643 | epoch avg. loss: 3.076 | test avg. loss: 69.486


  5%|▌         | 2646/50000 [03:39<57:24, 13.75it/s]

Epochs: 2644 | epoch avg. loss: 0.835 | test avg. loss: 69.626
Epochs: 2645 | epoch avg. loss: 0.934 | test avg. loss: 70.269
Epochs: 2646 | epoch avg. loss: 1.336 | test avg. loss: 70.566


  5%|▌         | 2650/50000 [03:39<59:11, 13.33it/s]

Epochs: 2647 | epoch avg. loss: 2.392 | test avg. loss: 70.301
Epochs: 2648 | epoch avg. loss: 1.228 | test avg. loss: 69.979
Epochs: 2649 | epoch avg. loss: 1.527 | test avg. loss: 70.127


  5%|▌         | 2652/50000 [03:39<57:40, 13.68it/s]

Epochs: 2650 | epoch avg. loss: 1.522 | test avg. loss: 71.298
Epochs: 2651 | epoch avg. loss: 1.619 | test avg. loss: 71.441
Epochs: 2652 | epoch avg. loss: 1.588 | test avg. loss: 72.882


  5%|▌         | 2656/50000 [03:39<1:00:06, 13.13it/s]

Epochs: 2653 | epoch avg. loss: 2.457 | test avg. loss: 71.807
Epochs: 2654 | epoch avg. loss: 1.703 | test avg. loss: 71.610
Epochs: 2655 | epoch avg. loss: 1.135 | test avg. loss: 71.400


  5%|▌         | 2658/50000 [03:39<1:01:19, 12.87it/s]

Epochs: 2656 | epoch avg. loss: 0.905 | test avg. loss: 71.657
Epochs: 2657 | epoch avg. loss: 1.172 | test avg. loss: 71.005
Epochs: 2658 | epoch avg. loss: 0.965 | test avg. loss: 71.171


  5%|▌         | 2662/50000 [03:40<59:00, 13.37it/s]  

Epochs: 2659 | epoch avg. loss: 1.162 | test avg. loss: 72.106
Epochs: 2660 | epoch avg. loss: 1.769 | test avg. loss: 71.241
Epochs: 2661 | epoch avg. loss: 1.146 | test avg. loss: 71.219


  5%|▌         | 2666/50000 [03:40<56:35, 13.94it/s]

Epochs: 2662 | epoch avg. loss: 1.131 | test avg. loss: 70.583
Epochs: 2663 | epoch avg. loss: 0.826 | test avg. loss: 70.461
Epochs: 2664 | epoch avg. loss: 0.799 | test avg. loss: 70.390
Epochs: 2665 | epoch avg. loss: 0.764 | test avg. loss: 70.421


  5%|▌         | 2668/50000 [03:40<56:13, 14.03it/s]

Epochs: 2666 | epoch avg. loss: 0.844 | test avg. loss: 70.224
Epochs: 2667 | epoch avg. loss: 1.017 | test avg. loss: 69.578
Epochs: 2668 | epoch avg. loss: 0.655 | test avg. loss: 69.543


  5%|▌         | 2672/50000 [03:40<1:01:42, 12.78it/s]

Epochs: 2669 | epoch avg. loss: 0.653 | test avg. loss: 69.641
Epochs: 2670 | epoch avg. loss: 0.675 | test avg. loss: 69.889
Epochs: 2671 | epoch avg. loss: 0.705 | test avg. loss: 70.190


                                                      

Epochs: 2672 | epoch avg. loss: 0.761 | test avg. loss: 70.079
Epochs: 2673 | epoch avg. loss: 0.663 | test avg. loss: 70.133


  5%|▌         | 2676/50000 [03:41<1:12:26, 10.89it/s]

Epochs: 2674 | epoch avg. loss: 0.877 | test avg. loss: 69.843
Epochs: 2675 | epoch avg. loss: 0.705 | test avg. loss: 69.999
Epochs: 2676 | epoch avg. loss: 0.782 | test avg. loss: 70.136


  5%|▌         | 2680/50000 [03:41<1:03:54, 12.34it/s]

Epochs: 2677 | epoch avg. loss: 1.080 | test avg. loss: 69.501
Epochs: 2678 | epoch avg. loss: 0.670 | test avg. loss: 69.484
Epochs: 2679 | epoch avg. loss: 0.664 | test avg. loss: 69.788
Epochs: 2680 | epoch avg. loss: 0.654 | test avg. loss: 69.940


  5%|▌         | 2684/50000 [03:41<1:02:27, 12.62it/s]

Epochs: 2681 | epoch avg. loss: 0.612 | test avg. loss: 70.385
Epochs: 2682 | epoch avg. loss: 0.871 | test avg. loss: 70.298
Epochs: 2683 | epoch avg. loss: 0.662 | test avg. loss: 70.285


  5%|▌         | 2686/50000 [03:42<1:05:51, 11.97it/s]

Epochs: 2684 | epoch avg. loss: 0.777 | test avg. loss: 71.292
Epochs: 2685 | epoch avg. loss: 1.344 | test avg. loss: 72.099
Epochs: 2686 | epoch avg. loss: 1.639 | test avg. loss: 73.769


  5%|▌         | 2690/50000 [03:42<1:06:28, 11.86it/s]

Epochs: 2687 | epoch avg. loss: 3.310 | test avg. loss: 74.234
Epochs: 2688 | epoch avg. loss: 3.641 | test avg. loss: 70.411
Epochs: 2689 | epoch avg. loss: 1.937 | test avg. loss: 70.122


  5%|▌         | 2692/50000 [03:42<1:03:04, 12.50it/s]

Epochs: 2690 | epoch avg. loss: 1.795 | test avg. loss: 69.865
Epochs: 2691 | epoch avg. loss: 1.308 | test avg. loss: 70.124
Epochs: 2692 | epoch avg. loss: 0.917 | test avg. loss: 70.306


  5%|▌         | 2696/50000 [03:42<1:03:11, 12.48it/s]

Epochs: 2693 | epoch avg. loss: 0.778 | test avg. loss: 70.249
Epochs: 2694 | epoch avg. loss: 0.930 | test avg. loss: 70.599
Epochs: 2695 | epoch avg. loss: 1.054 | test avg. loss: 70.463


  5%|▌         | 2698/50000 [03:43<1:05:09, 12.10it/s]

Epochs: 2696 | epoch avg. loss: 1.276 | test avg. loss: 70.740
Epochs: 2697 | epoch avg. loss: 1.420 | test avg. loss: 70.062
Epochs: 2698 | epoch avg. loss: 1.672 | test avg. loss: 69.624


  5%|▌         | 2698/50000 [03:43<1:05:09, 12.10it/s]

Epochs: 2699 | epoch avg. loss: 0.661 | test avg. loss: 69.728


  5%|▌         | 2702/50000 [03:45<3:35:16,  3.66it/s]

Epochs: 2700 | epoch avg. loss: 0.579 | test avg. loss: 69.801
Epochs: 2701 | epoch avg. loss: 0.578 | test avg. loss: 70.277
Epochs: 2702 | epoch avg. loss: 0.940 | test avg. loss: 70.417


  5%|▌         | 2706/50000 [03:45<2:15:57,  5.80it/s]

Epochs: 2703 | epoch avg. loss: 1.058 | test avg. loss: 69.960
Epochs: 2704 | epoch avg. loss: 0.704 | test avg. loss: 69.964
Epochs: 2705 | epoch avg. loss: 1.099 | test avg. loss: 69.730


  5%|▌         | 2708/50000 [03:45<1:54:59,  6.85it/s]

Epochs: 2706 | epoch avg. loss: 0.628 | test avg. loss: 69.685
Epochs: 2707 | epoch avg. loss: 0.889 | test avg. loss: 69.759
Epochs: 2708 | epoch avg. loss: 0.737 | test avg. loss: 69.732


  5%|▌         | 2712/50000 [03:46<1:33:58,  8.39it/s]

Epochs: 2709 | epoch avg. loss: 0.717 | test avg. loss: 69.961
Epochs: 2710 | epoch avg. loss: 0.861 | test avg. loss: 69.774
Epochs: 2711 | epoch avg. loss: 0.750 | test avg. loss: 71.000


  5%|▌         | 2714/50000 [03:46<1:22:53,  9.51it/s]

Epochs: 2712 | epoch avg. loss: 1.945 | test avg. loss: 69.863
Epochs: 2713 | epoch avg. loss: 1.451 | test avg. loss: 69.262
Epochs: 2714 | epoch avg. loss: 0.939 | test avg. loss: 69.525


  5%|▌         | 2718/50000 [03:46<1:14:10, 10.62it/s]

Epochs: 2715 | epoch avg. loss: 0.945 | test avg. loss: 69.802
Epochs: 2716 | epoch avg. loss: 1.136 | test avg. loss: 69.884
Epochs: 2717 | epoch avg. loss: 0.718 | test avg. loss: 70.059


  5%|▌         | 2720/50000 [03:46<1:11:57, 10.95it/s]

Epochs: 2718 | epoch avg. loss: 1.109 | test avg. loss: 69.947
Epochs: 2719 | epoch avg. loss: 0.667 | test avg. loss: 70.214
Epochs: 2720 | epoch avg. loss: 0.735 | test avg. loss: 70.559


  5%|▌         | 2724/50000 [03:47<1:06:35, 11.83it/s]

Epochs: 2721 | epoch avg. loss: 0.956 | test avg. loss: 70.888
Epochs: 2722 | epoch avg. loss: 1.644 | test avg. loss: 70.266
Epochs: 2723 | epoch avg. loss: 0.978 | test avg. loss: 69.823


  5%|▌         | 2726/50000 [03:47<1:03:41, 12.37it/s]

Epochs: 2724 | epoch avg. loss: 0.539 | test avg. loss: 69.899
Epochs: 2725 | epoch avg. loss: 0.824 | test avg. loss: 69.896
Epochs: 2726 | epoch avg. loss: 0.750 | test avg. loss: 70.139


                                                    

Epochs: 2727 | epoch avg. loss: 0.697 | test avg. loss: 70.407
Epochs: 2728 | epoch avg. loss: 0.830 | test avg. loss: 70.282
Epochs: 2729 | epoch avg. loss: 0.635 | test avg. loss: 70.269


  5%|▌         | 2732/50000 [03:47<57:57, 13.59it/s]

Epochs: 2730 | epoch avg. loss: 0.674 | test avg. loss: 70.601
Epochs: 2731 | epoch avg. loss: 0.949 | test avg. loss: 70.753
Epochs: 2732 | epoch avg. loss: 2.293 | test avg. loss: 70.538
Epochs: 2733 | epoch avg. loss: 3.280 | test avg. loss: 70.399


  5%|▌         | 2736/50000 [03:48<58:11, 13.54it/s]

Epochs: 2734 | epoch avg. loss: 1.249 | test avg. loss: 70.459
Epochs: 2735 | epoch avg. loss: 1.296 | test avg. loss: 70.978
Epochs: 2736 | epoch avg. loss: 1.496 | test avg. loss: 71.155


  5%|▌         | 2740/50000 [03:48<58:49, 13.39it/s]

Epochs: 2737 | epoch avg. loss: 1.532 | test avg. loss: 71.610
Epochs: 2738 | epoch avg. loss: 3.158 | test avg. loss: 69.378
Epochs: 2739 | epoch avg. loss: 1.773 | test avg. loss: 68.818


  5%|▌         | 2744/50000 [03:48<54:59, 14.32it/s]

Epochs: 2740 | epoch avg. loss: 0.826 | test avg. loss: 69.007
Epochs: 2741 | epoch avg. loss: 0.933 | test avg. loss: 69.248
Epochs: 2742 | epoch avg. loss: 0.652 | test avg. loss: 69.609
Epochs: 2743 | epoch avg. loss: 0.677 | test avg. loss: 70.324


  5%|▌         | 2748/50000 [03:48<53:00, 14.86it/s]

Epochs: 2744 | epoch avg. loss: 1.109 | test avg. loss: 69.926
Epochs: 2745 | epoch avg. loss: 1.200 | test avg. loss: 69.700
Epochs: 2746 | epoch avg. loss: 0.750 | test avg. loss: 69.484
Epochs: 2747 | epoch avg. loss: 0.619 | test avg. loss: 69.226


  6%|▌         | 2750/50000 [03:49<1:02:36, 12.58it/s]

Epochs: 2748 | epoch avg. loss: 0.575 | test avg. loss: 69.167
Epochs: 2749 | epoch avg. loss: 0.573 | test avg. loss: 69.380


  6%|▌         | 2752/50000 [03:49<1:12:11, 10.91it/s]

Epochs: 2750 | epoch avg. loss: 0.638 | test avg. loss: 69.424
Epochs: 2751 | epoch avg. loss: 0.967 | test avg. loss: 69.652


  6%|▌         | 2754/50000 [03:49<1:18:14, 10.07it/s]

Epochs: 2752 | epoch avg. loss: 0.643 | test avg. loss: 70.114
Epochs: 2753 | epoch avg. loss: 0.650 | test avg. loss: 70.285


  6%|▌         | 2756/50000 [03:49<1:14:15, 10.60it/s]

Epochs: 2754 | epoch avg. loss: 0.667 | test avg. loss: 70.964
Epochs: 2755 | epoch avg. loss: 1.114 | test avg. loss: 70.467
Epochs: 2756 | epoch avg. loss: 0.604 | test avg. loss: 70.490


  6%|▌         | 2760/50000 [03:49<1:08:58, 11.41it/s]

Epochs: 2757 | epoch avg. loss: 0.567 | test avg. loss: 70.955
Epochs: 2758 | epoch avg. loss: 0.990 | test avg. loss: 70.921
Epochs: 2759 | epoch avg. loss: 1.146 | test avg. loss: 70.083


  6%|▌         | 2762/50000 [03:50<1:06:10, 11.90it/s]

Epochs: 2760 | epoch avg. loss: 0.652 | test avg. loss: 69.973
Epochs: 2761 | epoch avg. loss: 0.581 | test avg. loss: 69.993
Epochs: 2762 | epoch avg. loss: 0.595 | test avg. loss: 70.383


  6%|▌         | 2766/50000 [03:50<1:01:31, 12.79it/s]

Epochs: 2763 | epoch avg. loss: 1.271 | test avg. loss: 69.764
Epochs: 2764 | epoch avg. loss: 0.630 | test avg. loss: 69.541
Epochs: 2765 | epoch avg. loss: 0.569 | test avg. loss: 69.416


                                                    

Epochs: 2766 | epoch avg. loss: 0.657 | test avg. loss: 69.817
Epochs: 2767 | epoch avg. loss: 0.904 | test avg. loss: 70.702
Epochs: 2768 | epoch avg. loss: 1.605 | test avg. loss: 70.010


  6%|▌         | 2772/50000 [03:50<57:54, 13.59it/s]

Epochs: 2769 | epoch avg. loss: 1.290 | test avg. loss: 69.281
Epochs: 2770 | epoch avg. loss: 0.780 | test avg. loss: 69.322
Epochs: 2771 | epoch avg. loss: 0.822 | test avg. loss: 69.994


  6%|▌         | 2774/50000 [03:51<1:01:12, 12.86it/s]

Epochs: 2772 | epoch avg. loss: 1.340 | test avg. loss: 69.482
Epochs: 2773 | epoch avg. loss: 0.587 | test avg. loss: 69.510
Epochs: 2774 | epoch avg. loss: 0.625 | test avg. loss: 69.625


  6%|▌         | 2778/50000 [03:51<1:02:34, 12.58it/s]

Epochs: 2775 | epoch avg. loss: 0.572 | test avg. loss: 69.870
Epochs: 2776 | epoch avg. loss: 0.750 | test avg. loss: 71.508
Epochs: 2777 | epoch avg. loss: 2.125 | test avg. loss: 70.091


                                                      

Epochs: 2778 | epoch avg. loss: 1.534 | test avg. loss: 69.196
Epochs: 2779 | epoch avg. loss: 1.318 | test avg. loss: 69.195
Epochs: 2780 | epoch avg. loss: 1.164 | test avg. loss: 70.698


  6%|▌         | 2784/50000 [03:51<57:47, 13.62it/s]

Epochs: 2781 | epoch avg. loss: 1.713 | test avg. loss: 70.280
Epochs: 2782 | epoch avg. loss: 1.836 | test avg. loss: 69.571
Epochs: 2783 | epoch avg. loss: 1.080 | test avg. loss: 68.958


  6%|▌         | 2786/50000 [03:52<59:15, 13.28it/s]

Epochs: 2784 | epoch avg. loss: 0.682 | test avg. loss: 68.898
Epochs: 2785 | epoch avg. loss: 0.899 | test avg. loss: 68.823
Epochs: 2786 | epoch avg. loss: 0.843 | test avg. loss: 69.099


  6%|▌         | 2790/50000 [03:52<1:02:30, 12.59it/s]

Epochs: 2787 | epoch avg. loss: 0.914 | test avg. loss: 69.337
Epochs: 2788 | epoch avg. loss: 0.797 | test avg. loss: 70.435
Epochs: 2789 | epoch avg. loss: 1.655 | test avg. loss: 71.384


  6%|▌         | 2792/50000 [03:52<1:01:38, 12.76it/s]

Epochs: 2790 | epoch avg. loss: 3.472 | test avg. loss: 70.221
Epochs: 2791 | epoch avg. loss: 3.051 | test avg. loss: 69.488
Epochs: 2792 | epoch avg. loss: 1.813 | test avg. loss: 69.726


  6%|▌         | 2796/50000 [03:52<1:00:24, 13.03it/s]

Epochs: 2793 | epoch avg. loss: 1.499 | test avg. loss: 70.096
Epochs: 2794 | epoch avg. loss: 1.332 | test avg. loss: 70.981
Epochs: 2795 | epoch avg. loss: 1.459 | test avg. loss: 72.837


  6%|▌         | 2798/50000 [03:52<1:00:20, 13.04it/s]

Epochs: 2796 | epoch avg. loss: 3.189 | test avg. loss: 71.763
Epochs: 2797 | epoch avg. loss: 2.632 | test avg. loss: 71.746
Epochs: 2798 | epoch avg. loss: 3.822 | test avg. loss: 71.225


  6%|▌         | 2798/50000 [03:53<1:00:20, 13.04it/s]

Epochs: 2799 | epoch avg. loss: 1.554 | test avg. loss: 71.488


  6%|▌         | 2802/50000 [03:54<2:29:50,  5.25it/s]

Epochs: 2800 | epoch avg. loss: 1.180 | test avg. loss: 71.497
Epochs: 2801 | epoch avg. loss: 1.253 | test avg. loss: 72.055
Epochs: 2802 | epoch avg. loss: 1.349 | test avg. loss: 72.708


  6%|▌         | 2806/50000 [03:54<1:43:11,  7.62it/s]

Epochs: 2803 | epoch avg. loss: 2.300 | test avg. loss: 72.778
Epochs: 2804 | epoch avg. loss: 2.399 | test avg. loss: 71.099
Epochs: 2805 | epoch avg. loss: 1.988 | test avg. loss: 71.529


  6%|▌         | 2808/50000 [03:54<1:28:27,  8.89it/s]

Epochs: 2806 | epoch avg. loss: 1.390 | test avg. loss: 72.897
Epochs: 2807 | epoch avg. loss: 2.238 | test avg. loss: 71.263
Epochs: 2808 | epoch avg. loss: 1.803 | test avg. loss: 70.007
Epochs: 2809 | epoch avg. loss: 1.317 | test avg. loss: 72.476


  6%|▌         | 2812/50000 [03:55<1:12:14, 10.89it/s]

Epochs: 2810 | epoch avg. loss: 2.898 | test avg. loss: 70.736
Epochs: 2811 | epoch avg. loss: 2.030 | test avg. loss: 70.769
Epochs: 2812 | epoch avg. loss: 2.895 | test avg. loss: 69.044


  6%|▌         | 2816/50000 [03:55<1:02:30, 12.58it/s]

Epochs: 2813 | epoch avg. loss: 1.479 | test avg. loss: 70.365
Epochs: 2814 | epoch avg. loss: 1.662 | test avg. loss: 71.769
Epochs: 2815 | epoch avg. loss: 2.250 | test avg. loss: 70.778
Epochs: 2816 | epoch avg. loss: 1.759 | test avg. loss: 70.499


  6%|▌         | 2820/50000 [03:55<1:00:49, 12.93it/s]

Epochs: 2817 | epoch avg. loss: 1.939 | test avg. loss: 71.012
Epochs: 2818 | epoch avg. loss: 1.452 | test avg. loss: 71.964
Epochs: 2819 | epoch avg. loss: 1.865 | test avg. loss: 73.275


  6%|▌         | 2822/50000 [03:55<59:09, 13.29it/s]

Epochs: 2820 | epoch avg. loss: 2.342 | test avg. loss: 73.430
Epochs: 2821 | epoch avg. loss: 3.138 | test avg. loss: 72.576
Epochs: 2822 | epoch avg. loss: 2.842 | test avg. loss: 71.584


  6%|▌         | 2826/50000 [03:56<1:00:09, 13.07it/s]

Epochs: 2823 | epoch avg. loss: 3.353 | test avg. loss: 70.028
Epochs: 2824 | epoch avg. loss: 2.029 | test avg. loss: 71.025
Epochs: 2825 | epoch avg. loss: 2.473 | test avg. loss: 76.977


  6%|▌         | 2828/50000 [03:56<1:00:08, 13.07it/s]

Epochs: 2826 | epoch avg. loss: 5.961 | test avg. loss: 75.436
Epochs: 2827 | epoch avg. loss: 6.326 | test avg. loss: 70.111
Epochs: 2828 | epoch avg. loss: 3.342 | test avg. loss: 70.023
Epochs: 2829 | epoch avg. loss: 3.682 | test avg. loss: 69.962


  6%|▌         | 2832/50000 [03:56<55:32, 14.15it/s]

Epochs: 2830 | epoch avg. loss: 2.668 | test avg. loss: 71.220
Epochs: 2831 | epoch avg. loss: 3.686 | test avg. loss: 77.865
Epochs: 2832 | epoch avg. loss: 5.896 | test avg. loss: 81.673


  6%|▌         | 2836/50000 [03:56<58:28, 13.44it/s]

Epochs: 2833 | epoch avg. loss: 7.751 | test avg. loss: 80.403
Epochs: 2834 | epoch avg. loss: 8.911 | test avg. loss: 75.783
Epochs: 2835 | epoch avg. loss: 4.320 | test avg. loss: 72.675


  6%|▌         | 2838/50000 [03:56<57:45, 13.61it/s]

Epochs: 2836 | epoch avg. loss: 3.366 | test avg. loss: 73.312
Epochs: 2837 | epoch avg. loss: 2.643 | test avg. loss: 75.636
Epochs: 2838 | epoch avg. loss: 4.573 | test avg. loss: 75.026


  6%|▌         | 2842/50000 [03:57<1:04:16, 12.23it/s]

Epochs: 2839 | epoch avg. loss: 3.368 | test avg. loss: 75.799
Epochs: 2840 | epoch avg. loss: 6.945 | test avg. loss: 74.860
Epochs: 2841 | epoch avg. loss: 6.964 | test avg. loss: 75.089


  6%|▌         | 2844/50000 [03:57<1:02:42, 12.53it/s]

Epochs: 2842 | epoch avg. loss: 3.688 | test avg. loss: 76.810
Epochs: 2843 | epoch avg. loss: 3.928 | test avg. loss: 80.421
Epochs: 2844 | epoch avg. loss: 7.292 | test avg. loss: 77.437


  6%|▌         | 2848/50000 [03:57<59:47, 13.14it/s]

Epochs: 2845 | epoch avg. loss: 3.842 | test avg. loss: 75.021
Epochs: 2846 | epoch avg. loss: 2.953 | test avg. loss: 75.560
Epochs: 2847 | epoch avg. loss: 1.516 | test avg. loss: 76.706


  6%|▌         | 2850/50000 [03:57<57:37, 13.64it/s]

Epochs: 2848 | epoch avg. loss: 1.457 | test avg. loss: 77.508
Epochs: 2849 | epoch avg. loss: 1.723 | test avg. loss: 77.148
Epochs: 2850 | epoch avg. loss: 2.525 | test avg. loss: 77.107


  6%|▌         | 2854/50000 [03:58<1:02:53, 12.50it/s]

Epochs: 2851 | epoch avg. loss: 1.501 | test avg. loss: 77.003
Epochs: 2852 | epoch avg. loss: 1.393 | test avg. loss: 76.114
Epochs: 2853 | epoch avg. loss: 1.100 | test avg. loss: 76.519


  6%|▌         | 2856/50000 [03:58<1:01:34, 12.76it/s]

Epochs: 2854 | epoch avg. loss: 1.262 | test avg. loss: 77.142
Epochs: 2855 | epoch avg. loss: 2.614 | test avg. loss: 76.135
Epochs: 2856 | epoch avg. loss: 2.688 | test avg. loss: 74.852


  6%|▌         | 2860/50000 [03:58<57:38, 13.63it/s]

Epochs: 2857 | epoch avg. loss: 2.694 | test avg. loss: 73.951
Epochs: 2858 | epoch avg. loss: 2.053 | test avg. loss: 75.697
Epochs: 2859 | epoch avg. loss: 2.451 | test avg. loss: 76.745


  6%|▌         | 2862/50000 [03:58<57:36, 13.64it/s]

Epochs: 2860 | epoch avg. loss: 3.266 | test avg. loss: 75.630
Epochs: 2861 | epoch avg. loss: 2.031 | test avg. loss: 74.461
Epochs: 2862 | epoch avg. loss: 0.936 | test avg. loss: 74.966


  6%|▌         | 2866/50000 [03:59<1:02:54, 12.49it/s]

Epochs: 2863 | epoch avg. loss: 1.944 | test avg. loss: 74.008
Epochs: 2864 | epoch avg. loss: 1.667 | test avg. loss: 74.330
Epochs: 2865 | epoch avg. loss: 1.329 | test avg. loss: 74.921


  6%|▌         | 2868/50000 [03:59<1:04:00, 12.27it/s]

Epochs: 2866 | epoch avg. loss: 1.622 | test avg. loss: 74.511
Epochs: 2867 | epoch avg. loss: 1.442 | test avg. loss: 74.131
Epochs: 2868 | epoch avg. loss: 1.187 | test avg. loss: 74.446


  6%|▌         | 2872/50000 [03:59<59:45, 13.14it/s]  

Epochs: 2869 | epoch avg. loss: 3.407 | test avg. loss: 72.856
Epochs: 2870 | epoch avg. loss: 1.005 | test avg. loss: 73.774
Epochs: 2871 | epoch avg. loss: 1.045 | test avg. loss: 73.646


  6%|▌         | 2874/50000 [03:59<1:05:24, 12.01it/s]

Epochs: 2872 | epoch avg. loss: 0.744 | test avg. loss: 74.048
Epochs: 2873 | epoch avg. loss: 1.125 | test avg. loss: 73.665
Epochs: 2874 | epoch avg. loss: 0.920 | test avg. loss: 73.291


  6%|▌         | 2878/50000 [04:00<1:07:38, 11.61it/s]

Epochs: 2875 | epoch avg. loss: 0.718 | test avg. loss: 72.977
Epochs: 2876 | epoch avg. loss: 0.591 | test avg. loss: 72.746
Epochs: 2877 | epoch avg. loss: 0.544 | test avg. loss: 72.630


  6%|▌         | 2880/50000 [04:00<1:08:03, 11.54it/s]

Epochs: 2878 | epoch avg. loss: 0.615 | test avg. loss: 72.599
Epochs: 2879 | epoch avg. loss: 0.633 | test avg. loss: 72.932
Epochs: 2880 | epoch avg. loss: 0.869 | test avg. loss: 74.303


  6%|▌         | 2884/50000 [04:00<1:11:25, 10.99it/s]

Epochs: 2881 | epoch avg. loss: 1.971 | test avg. loss: 73.036
Epochs: 2882 | epoch avg. loss: 0.966 | test avg. loss: 73.009
Epochs: 2883 | epoch avg. loss: 0.979 | test avg. loss: 72.818


  6%|▌         | 2886/50000 [04:00<1:06:14, 11.85it/s]

Epochs: 2884 | epoch avg. loss: 1.410 | test avg. loss: 72.491
Epochs: 2885 | epoch avg. loss: 0.772 | test avg. loss: 72.629
Epochs: 2886 | epoch avg. loss: 0.886 | test avg. loss: 73.573


  6%|▌         | 2888/50000 [04:01<1:07:20, 11.66it/s]

Epochs: 2887 | epoch avg. loss: 1.280 | test avg. loss: 73.370
Epochs: 2888 | epoch avg. loss: 1.941 | test avg. loss: 72.414
Epochs: 2889 | epoch avg. loss: 0.695 | test avg. loss: 72.772


  6%|▌         | 2892/50000 [04:01<1:07:23, 11.65it/s]

Epochs: 2890 | epoch avg. loss: 0.720 | test avg. loss: 72.769
Epochs: 2891 | epoch avg. loss: 0.631 | test avg. loss: 72.887
Epochs: 2892 | epoch avg. loss: 0.517 | test avg. loss: 73.147


  6%|▌         | 2896/50000 [04:01<1:04:29, 12.17it/s]

Epochs: 2893 | epoch avg. loss: 0.614 | test avg. loss: 73.271
Epochs: 2894 | epoch avg. loss: 0.775 | test avg. loss: 73.137
Epochs: 2895 | epoch avg. loss: 0.820 | test avg. loss: 73.740


  6%|▌         | 2898/50000 [04:01<1:03:20, 12.39it/s]

Epochs: 2896 | epoch avg. loss: 1.069 | test avg. loss: 73.524
Epochs: 2897 | epoch avg. loss: 0.969 | test avg. loss: 73.290
Epochs: 2898 | epoch avg. loss: 0.518 | test avg. loss: 73.381


  6%|▌         | 2898/50000 [04:01<1:03:20, 12.39it/s]

Epochs: 2899 | epoch avg. loss: 0.573 | test avg. loss: 73.333


  6%|▌         | 2902/50000 [04:03<2:49:40,  4.63it/s]

Epochs: 2900 | epoch avg. loss: 0.720 | test avg. loss: 73.434
Epochs: 2901 | epoch avg. loss: 0.657 | test avg. loss: 73.675
Epochs: 2902 | epoch avg. loss: 0.789 | test avg. loss: 73.395


  6%|▌         | 2906/50000 [04:03<1:54:16,  6.87it/s]

Epochs: 2903 | epoch avg. loss: 0.758 | test avg. loss: 73.313
Epochs: 2904 | epoch avg. loss: 0.575 | test avg. loss: 73.390
Epochs: 2905 | epoch avg. loss: 0.635 | test avg. loss: 73.207


  6%|▌         | 2908/50000 [04:03<1:38:08,  8.00it/s]

Epochs: 2906 | epoch avg. loss: 0.563 | test avg. loss: 73.175
Epochs: 2907 | epoch avg. loss: 0.528 | test avg. loss: 73.195
Epochs: 2908 | epoch avg. loss: 0.524 | test avg. loss: 73.365


  6%|▌         | 2912/50000 [04:04<1:25:02,  9.23it/s]

Epochs: 2909 | epoch avg. loss: 0.899 | test avg. loss: 73.156
Epochs: 2910 | epoch avg. loss: 0.606 | test avg. loss: 73.022
Epochs: 2911 | epoch avg. loss: 0.611 | test avg. loss: 72.868


  6%|▌         | 2914/50000 [04:04<1:18:11, 10.04it/s]

Epochs: 2912 | epoch avg. loss: 0.580 | test avg. loss: 72.845
Epochs: 2913 | epoch avg. loss: 0.596 | test avg. loss: 73.204
Epochs: 2914 | epoch avg. loss: 1.276 | test avg. loss: 72.533


  6%|▌         | 2918/50000 [04:04<1:10:21, 11.15it/s]

Epochs: 2915 | epoch avg. loss: 0.571 | test avg. loss: 72.796
Epochs: 2916 | epoch avg. loss: 0.712 | test avg. loss: 72.638
Epochs: 2917 | epoch avg. loss: 0.739 | test avg. loss: 72.698


  6%|▌         | 2920/50000 [04:04<1:09:27, 11.30it/s]

Epochs: 2918 | epoch avg. loss: 0.834 | test avg. loss: 72.786
Epochs: 2919 | epoch avg. loss: 0.688 | test avg. loss: 73.695
Epochs: 2920 | epoch avg. loss: 2.071 | test avg. loss: 72.640


  6%|▌         | 2924/50000 [04:05<1:09:26, 11.30it/s]

Epochs: 2921 | epoch avg. loss: 1.710 | test avg. loss: 74.894
Epochs: 2922 | epoch avg. loss: 2.258 | test avg. loss: 74.676
Epochs: 2923 | epoch avg. loss: 2.587 | test avg. loss: 72.776


  6%|▌         | 2926/50000 [04:05<1:08:46, 11.41it/s]

Epochs: 2924 | epoch avg. loss: 0.936 | test avg. loss: 72.420
Epochs: 2925 | epoch avg. loss: 0.618 | test avg. loss: 72.156
Epochs: 2926 | epoch avg. loss: 0.688 | test avg. loss: 72.127


                                                      

Epochs: 2927 | epoch avg. loss: 0.578 | test avg. loss: 72.268
Epochs: 2928 | epoch avg. loss: 0.550 | test avg. loss: 72.440
Epochs: 2929 | epoch avg. loss: 0.537 | test avg. loss: 72.629


  6%|▌         | 2932/50000 [04:05<1:00:10, 13.04it/s]

Epochs: 2930 | epoch avg. loss: 0.590 | test avg. loss: 73.658
Epochs: 2931 | epoch avg. loss: 1.326 | test avg. loss: 73.720
Epochs: 2932 | epoch avg. loss: 1.704 | test avg. loss: 72.908


  6%|▌         | 2936/50000 [04:06<1:04:24, 12.18it/s]

Epochs: 2933 | epoch avg. loss: 1.635 | test avg. loss: 74.880
Epochs: 2934 | epoch avg. loss: 2.400 | test avg. loss: 75.161
Epochs: 2935 | epoch avg. loss: 3.136 | test avg. loss: 73.589


  6%|▌         | 2938/50000 [04:06<1:00:56, 12.87it/s]

Epochs: 2936 | epoch avg. loss: 4.045 | test avg. loss: 72.962
Epochs: 2937 | epoch avg. loss: 2.046 | test avg. loss: 73.622
Epochs: 2938 | epoch avg. loss: 2.202 | test avg. loss: 73.246


  6%|▌         | 2942/50000 [04:06<58:40, 13.37it/s]  

Epochs: 2939 | epoch avg. loss: 3.711 | test avg. loss: 71.209
Epochs: 2940 | epoch avg. loss: 1.682 | test avg. loss: 71.006
Epochs: 2941 | epoch avg. loss: 1.295 | test avg. loss: 72.661


  6%|▌         | 2944/50000 [04:06<57:29, 13.64it/s]

Epochs: 2942 | epoch avg. loss: 1.983 | test avg. loss: 76.529
Epochs: 2943 | epoch avg. loss: 4.414 | test avg. loss: 74.790
Epochs: 2944 | epoch avg. loss: 5.542 | test avg. loss: 72.461


  6%|▌         | 2948/50000 [04:07<1:04:42, 12.12it/s]

Epochs: 2945 | epoch avg. loss: 4.523 | test avg. loss: 70.883
Epochs: 2946 | epoch avg. loss: 4.285 | test avg. loss: 72.073
Epochs: 2947 | epoch avg. loss: 3.648 | test avg. loss: 77.867


  6%|▌         | 2950/50000 [04:07<1:03:26, 12.36it/s]

Epochs: 2948 | epoch avg. loss: 5.916 | test avg. loss: 77.502
Epochs: 2949 | epoch avg. loss: 7.531 | test avg. loss: 74.019
Epochs: 2950 | epoch avg. loss: 4.594 | test avg. loss: 72.190


  6%|▌         | 2954/50000 [04:07<58:58, 13.30it/s]  

Epochs: 2951 | epoch avg. loss: 6.245 | test avg. loss: 72.090
Epochs: 2952 | epoch avg. loss: 4.703 | test avg. loss: 81.653
Epochs: 2953 | epoch avg. loss: 7.426 | test avg. loss: 80.042


  6%|▌         | 2956/50000 [04:07<59:47, 13.11it/s]

Epochs: 2954 | epoch avg. loss: 5.332 | test avg. loss: 83.977
Epochs: 2955 | epoch avg. loss: 7.483 | test avg. loss: 79.449
Epochs: 2956 | epoch avg. loss: 5.461 | test avg. loss: 76.015


  6%|▌         | 2960/50000 [04:08<1:02:26, 12.55it/s]

Epochs: 2957 | epoch avg. loss: 4.508 | test avg. loss: 73.935
Epochs: 2958 | epoch avg. loss: 2.722 | test avg. loss: 73.822
Epochs: 2959 | epoch avg. loss: 1.497 | test avg. loss: 73.428


  6%|▌         | 2962/50000 [04:08<1:07:04, 11.69it/s]

Epochs: 2960 | epoch avg. loss: 1.165 | test avg. loss: 73.734
Epochs: 2961 | epoch avg. loss: 1.180 | test avg. loss: 74.726
Epochs: 2962 | epoch avg. loss: 1.298 | test avg. loss: 76.900


  6%|▌         | 2966/50000 [04:08<1:05:36, 11.95it/s]

Epochs: 2963 | epoch avg. loss: 2.812 | test avg. loss: 77.104
Epochs: 2964 | epoch avg. loss: 3.700 | test avg. loss: 75.141
Epochs: 2965 | epoch avg. loss: 1.395 | test avg. loss: 75.777


  6%|▌         | 2968/50000 [04:08<1:02:02, 12.63it/s]

Epochs: 2966 | epoch avg. loss: 2.168 | test avg. loss: 75.408
Epochs: 2967 | epoch avg. loss: 1.649 | test avg. loss: 76.489
Epochs: 2968 | epoch avg. loss: 1.658 | test avg. loss: 77.598


  6%|▌         | 2972/50000 [04:09<1:04:38, 12.13it/s]

Epochs: 2969 | epoch avg. loss: 1.685 | test avg. loss: 77.105
Epochs: 2970 | epoch avg. loss: 0.752 | test avg. loss: 77.558
Epochs: 2971 | epoch avg. loss: 1.101 | test avg. loss: 77.622


  6%|▌         | 2974/50000 [04:09<1:04:37, 12.13it/s]

Epochs: 2972 | epoch avg. loss: 0.717 | test avg. loss: 77.835
Epochs: 2973 | epoch avg. loss: 0.756 | test avg. loss: 77.628
Epochs: 2974 | epoch avg. loss: 0.722 | test avg. loss: 77.075


  6%|▌         | 2978/50000 [04:09<1:06:59, 11.70it/s]

Epochs: 2975 | epoch avg. loss: 0.663 | test avg. loss: 77.237
Epochs: 2976 | epoch avg. loss: 1.062 | test avg. loss: 76.675
Epochs: 2977 | epoch avg. loss: 0.853 | test avg. loss: 76.580


  6%|▌         | 2980/50000 [04:09<1:05:33, 11.96it/s]

Epochs: 2978 | epoch avg. loss: 1.032 | test avg. loss: 76.085
Epochs: 2979 | epoch avg. loss: 1.558 | test avg. loss: 75.250
Epochs: 2980 | epoch avg. loss: 0.871 | test avg. loss: 76.008


  6%|▌         | 2982/50000 [04:10<1:07:05, 11.68it/s]

Epochs: 2981 | epoch avg. loss: 1.084 | test avg. loss: 77.051
Epochs: 2982 | epoch avg. loss: 1.630 | test avg. loss: 79.035


  6%|▌         | 2986/50000 [04:10<1:11:07, 11.02it/s]

Epochs: 2983 | epoch avg. loss: 3.966 | test avg. loss: 79.450
Epochs: 2984 | epoch avg. loss: 4.621 | test avg. loss: 77.775
Epochs: 2985 | epoch avg. loss: 4.901 | test avg. loss: 75.436


  6%|▌         | 2988/50000 [04:10<1:10:18, 11.14it/s]

Epochs: 2986 | epoch avg. loss: 4.847 | test avg. loss: 75.626
Epochs: 2987 | epoch avg. loss: 3.730 | test avg. loss: 81.049
Epochs: 2988 | epoch avg. loss: 5.410 | test avg. loss: 86.447


  6%|▌         | 2992/50000 [04:10<1:04:52, 12.08it/s]

Epochs: 2989 | epoch avg. loss: 8.438 | test avg. loss: 88.786
Epochs: 2990 | epoch avg. loss: 9.740 | test avg. loss: 83.801
Epochs: 2991 | epoch avg. loss: 6.415 | test avg. loss: 78.893


  6%|▌         | 2994/50000 [04:11<1:07:42, 11.57it/s]

Epochs: 2992 | epoch avg. loss: 2.460 | test avg. loss: 78.641
Epochs: 2993 | epoch avg. loss: 3.614 | test avg. loss: 78.212
Epochs: 2994 | epoch avg. loss: 2.395 | test avg. loss: 77.082


  6%|▌         | 2998/50000 [04:11<1:05:37, 11.94it/s]

Epochs: 2995 | epoch avg. loss: 0.893 | test avg. loss: 77.632
Epochs: 2996 | epoch avg. loss: 1.022 | test avg. loss: 78.236
Epochs: 2997 | epoch avg. loss: 1.268 | test avg. loss: 79.279


  6%|▌         | 2998/50000 [04:11<1:05:37, 11.94it/s]

Epochs: 2998 | epoch avg. loss: 1.627 | test avg. loss: 79.797
Epochs: 2999 | epoch avg. loss: 1.297 | test avg. loss: 81.363


  6%|▌         | 3002/50000 [04:13<2:53:34,  4.51it/s]

Epochs: 3000 | epoch avg. loss: 2.956 | test avg. loss: 82.520
Epochs: 3001 | epoch avg. loss: 4.357 | test avg. loss: 79.610
Epochs: 3002 | epoch avg. loss: 2.915 | test avg. loss: 79.197


  6%|▌         | 3006/50000 [04:13<1:52:58,  6.93it/s]

Epochs: 3003 | epoch avg. loss: 2.840 | test avg. loss: 79.937
Epochs: 3004 | epoch avg. loss: 1.734 | test avg. loss: 80.801
Epochs: 3005 | epoch avg. loss: 1.779 | test avg. loss: 83.214


                                                      

Epochs: 3006 | epoch avg. loss: 3.413 | test avg. loss: 83.397
Epochs: 3007 | epoch avg. loss: 3.335 | test avg. loss: 81.473
Epochs: 3008 | epoch avg. loss: 3.187 | test avg. loss: 79.950




Epochs: 3009 | epoch avg. loss: 1.769 | test avg. loss: 80.005
Epochs: 3010 | epoch avg. loss: 1.967 | test avg. loss: 82.537
Epochs: 3011 | epoch avg. loss: 2.828 | test avg. loss: 83.889


  6%|▌         | 3014/50000 [04:14<1:08:54, 11.37it/s]

Epochs: 3012 | epoch avg. loss: 4.744 | test avg. loss: 81.477
Epochs: 3013 | epoch avg. loss: 1.637 | test avg. loss: 81.447
Epochs: 3014 | epoch avg. loss: 2.902 | test avg. loss: 82.056


  6%|▌         | 3016/50000 [04:14<1:13:15, 10.69it/s]

Epochs: 3015 | epoch avg. loss: 6.623 | test avg. loss: 81.072
Epochs: 3016 | epoch avg. loss: 4.858 | test avg. loss: 79.767
Epochs: 3017 | epoch avg. loss: 3.766 | test avg. loss: 83.291


  6%|▌         | 3020/50000 [04:14<1:08:48, 11.38it/s]

Epochs: 3018 | epoch avg. loss: 4.917 | test avg. loss: 85.703
Epochs: 3019 | epoch avg. loss: 6.289 | test avg. loss: 85.099
Epochs: 3020 | epoch avg. loss: 5.067 | test avg. loss: 81.973


  6%|▌         | 3024/50000 [04:14<1:01:16, 12.78it/s]

Epochs: 3021 | epoch avg. loss: 4.862 | test avg. loss: 80.235
Epochs: 3022 | epoch avg. loss: 1.738 | test avg. loss: 80.884
Epochs: 3023 | epoch avg. loss: 1.256 | test avg. loss: 80.936


  6%|▌         | 3026/50000 [04:14<1:02:53, 12.45it/s]

Epochs: 3024 | epoch avg. loss: 1.087 | test avg. loss: 81.329
Epochs: 3025 | epoch avg. loss: 1.278 | test avg. loss: 81.016
Epochs: 3026 | epoch avg. loss: 0.707 | test avg. loss: 81.248


  6%|▌         | 3030/50000 [04:15<1:03:14, 12.38it/s]

Epochs: 3027 | epoch avg. loss: 0.841 | test avg. loss: 81.260
Epochs: 3028 | epoch avg. loss: 0.539 | test avg. loss: 81.348
Epochs: 3029 | epoch avg. loss: 0.532 | test avg. loss: 81.419


  6%|▌         | 3032/50000 [04:15<1:02:33, 12.51it/s]

Epochs: 3030 | epoch avg. loss: 0.525 | test avg. loss: 81.395
Epochs: 3031 | epoch avg. loss: 0.487 | test avg. loss: 81.332
Epochs: 3032 | epoch avg. loss: 0.676 | test avg. loss: 82.287


  6%|▌         | 3036/50000 [04:15<57:38, 13.58it/s]

Epochs: 3033 | epoch avg. loss: 1.270 | test avg. loss: 84.455
Epochs: 3034 | epoch avg. loss: 3.227 | test avg. loss: 82.981
Epochs: 3035 | epoch avg. loss: 2.565 | test avg. loss: 81.599
Epochs: 3036 | epoch avg. loss: 1.020 | test avg. loss: 81.150


  6%|▌         | 3040/50000 [04:15<1:01:18, 12.76it/s]

Epochs: 3037 | epoch avg. loss: 0.587 | test avg. loss: 80.821
Epochs: 3038 | epoch avg. loss: 0.513 | test avg. loss: 80.856
Epochs: 3039 | epoch avg. loss: 0.475 | test avg. loss: 80.630


  6%|▌         | 3042/50000 [04:16<1:06:16, 11.81it/s]

Epochs: 3040 | epoch avg. loss: 0.411 | test avg. loss: 81.070
Epochs: 3041 | epoch avg. loss: 0.524 | test avg. loss: 81.430
Epochs: 3042 | epoch avg. loss: 0.722 | test avg. loss: 83.411


  6%|▌         | 3046/50000 [04:16<1:03:55, 12.24it/s]

Epochs: 3043 | epoch avg. loss: 1.957 | test avg. loss: 82.477
Epochs: 3044 | epoch avg. loss: 1.897 | test avg. loss: 81.584
Epochs: 3045 | epoch avg. loss: 0.639 | test avg. loss: 81.682


  6%|▌         | 3048/50000 [04:16<1:02:01, 12.62it/s]

Epochs: 3046 | epoch avg. loss: 0.822 | test avg. loss: 81.654
Epochs: 3047 | epoch avg. loss: 0.901 | test avg. loss: 83.204
Epochs: 3048 | epoch avg. loss: 1.620 | test avg. loss: 83.086


  6%|▌         | 3052/50000 [04:16<1:01:36, 12.70it/s]

Epochs: 3049 | epoch avg. loss: 1.791 | test avg. loss: 83.083
Epochs: 3050 | epoch avg. loss: 1.867 | test avg. loss: 81.467
Epochs: 3051 | epoch avg. loss: 1.481 | test avg. loss: 81.348


  6%|▌         | 3054/50000 [04:17<1:00:30, 12.93it/s]

Epochs: 3052 | epoch avg. loss: 1.904 | test avg. loss: 80.681
Epochs: 3053 | epoch avg. loss: 2.452 | test avg. loss: 79.971
Epochs: 3054 | epoch avg. loss: 2.173 | test avg. loss: 82.867


  6%|▌         | 3058/50000 [04:17<57:20, 13.64it/s]

Epochs: 3055 | epoch avg. loss: 2.906 | test avg. loss: 82.000
Epochs: 3056 | epoch avg. loss: 1.651 | test avg. loss: 81.854
Epochs: 3057 | epoch avg. loss: 1.345 | test avg. loss: 81.675


  6%|▌         | 3060/50000 [04:17<57:23, 13.63it/s]

Epochs: 3058 | epoch avg. loss: 1.083 | test avg. loss: 82.421
Epochs: 3059 | epoch avg. loss: 1.588 | test avg. loss: 81.612
Epochs: 3060 | epoch avg. loss: 1.086 | test avg. loss: 81.588


  6%|▌         | 3064/50000 [04:17<1:01:21, 12.75it/s]

Epochs: 3061 | epoch avg. loss: 1.018 | test avg. loss: 82.032
Epochs: 3062 | epoch avg. loss: 1.257 | test avg. loss: 84.222
Epochs: 3063 | epoch avg. loss: 2.305 | test avg. loss: 85.135


  6%|▌         | 3066/50000 [04:18<1:04:16, 12.17it/s]

Epochs: 3064 | epoch avg. loss: 2.421 | test avg. loss: 83.821
Epochs: 3065 | epoch avg. loss: 2.321 | test avg. loss: 82.958
Epochs: 3066 | epoch avg. loss: 1.734 | test avg. loss: 82.620


  6%|▌         | 3070/50000 [04:18<1:01:39, 12.69it/s]

Epochs: 3067 | epoch avg. loss: 1.132 | test avg. loss: 82.443
Epochs: 3068 | epoch avg. loss: 1.228 | test avg. loss: 84.242
Epochs: 3069 | epoch avg. loss: 1.648 | test avg. loss: 84.628


  6%|▌         | 3072/50000 [04:18<59:05, 13.24it/s]

Epochs: 3070 | epoch avg. loss: 2.302 | test avg. loss: 85.969
Epochs: 3071 | epoch avg. loss: 3.523 | test avg. loss: 84.375
Epochs: 3072 | epoch avg. loss: 4.965 | test avg. loss: 82.891


  6%|▌         | 3076/50000 [04:18<59:43, 13.10it/s]  

Epochs: 3073 | epoch avg. loss: 5.296 | test avg. loss: 80.763
Epochs: 3074 | epoch avg. loss: 3.590 | test avg. loss: 81.698
Epochs: 3075 | epoch avg. loss: 2.380 | test avg. loss: 83.353


  6%|▌         | 3078/50000 [04:19<59:59, 13.04it/s]

Epochs: 3076 | epoch avg. loss: 2.483 | test avg. loss: 83.537
Epochs: 3077 | epoch avg. loss: 2.259 | test avg. loss: 86.936
Epochs: 3078 | epoch avg. loss: 4.384 | test avg. loss: 85.096


                                                    

Epochs: 3079 | epoch avg. loss: 2.913 | test avg. loss: 84.887
Epochs: 3080 | epoch avg. loss: 3.619 | test avg. loss: 83.495
Epochs: 3081 | epoch avg. loss: 3.616 | test avg. loss: 85.005


  6%|▌         | 3084/50000 [04:19<56:45, 13.78it/s]

Epochs: 3082 | epoch avg. loss: 7.186 | test avg. loss: 80.854
Epochs: 3083 | epoch avg. loss: 4.280 | test avg. loss: 82.277
Epochs: 3084 | epoch avg. loss: 2.973 | test avg. loss: 82.259


  6%|▌         | 3088/50000 [04:19<57:10, 13.67it/s]

Epochs: 3085 | epoch avg. loss: 2.009 | test avg. loss: 82.399
Epochs: 3086 | epoch avg. loss: 2.132 | test avg. loss: 84.403
Epochs: 3087 | epoch avg. loss: 2.763 | test avg. loss: 82.971


  6%|▌         | 3090/50000 [04:19<55:44, 14.03it/s]

Epochs: 3088 | epoch avg. loss: 2.491 | test avg. loss: 82.262
Epochs: 3089 | epoch avg. loss: 2.029 | test avg. loss: 82.188
Epochs: 3090 | epoch avg. loss: 1.949 | test avg. loss: 80.666


  6%|▌         | 3094/50000 [04:20<57:37, 13.57it/s]

Epochs: 3091 | epoch avg. loss: 1.017 | test avg. loss: 80.783
Epochs: 3092 | epoch avg. loss: 0.865 | test avg. loss: 81.964
Epochs: 3093 | epoch avg. loss: 1.666 | test avg. loss: 85.102


  6%|▌         | 3096/50000 [04:20<55:46, 14.02it/s]

Epochs: 3094 | epoch avg. loss: 3.005 | test avg. loss: 83.853
Epochs: 3095 | epoch avg. loss: 2.416 | test avg. loss: 84.869
Epochs: 3096 | epoch avg. loss: 3.009 | test avg. loss: 83.163


  6%|▌         | 3098/50000 [04:20<55:55, 13.98it/s]

Epochs: 3097 | epoch avg. loss: 1.901 | test avg. loss: 82.908
Epochs: 3098 | epoch avg. loss: 0.895 | test avg. loss: 83.509
Epochs: 3099 | epoch avg. loss: 2.267 | test avg. loss: 83.710


  6%|▌         | 3102/50000 [04:21<2:34:29,  5.06it/s]

Epochs: 3100 | epoch avg. loss: 3.924 | test avg. loss: 83.307
Epochs: 3101 | epoch avg. loss: 3.269 | test avg. loss: 84.931
Epochs: 3102 | epoch avg. loss: 3.066 | test avg. loss: 88.503


  6%|▌         | 3106/50000 [04:22<1:44:09,  7.50it/s]

Epochs: 3103 | epoch avg. loss: 4.458 | test avg. loss: 88.689
Epochs: 3104 | epoch avg. loss: 4.468 | test avg. loss: 86.226
Epochs: 3105 | epoch avg. loss: 3.027 | test avg. loss: 85.402


  6%|▌         | 3110/50000 [04:22<1:17:44, 10.05it/s]

Epochs: 3106 | epoch avg. loss: 2.034 | test avg. loss: 84.509
Epochs: 3107 | epoch avg. loss: 1.328 | test avg. loss: 84.165
Epochs: 3108 | epoch avg. loss: 0.690 | test avg. loss: 84.124
Epochs: 3109 | epoch avg. loss: 0.622 | test avg. loss: 84.243




Epochs: 3110 | epoch avg. loss: 0.572 | test avg. loss: 83.875
Epochs: 3111 | epoch avg. loss: 0.553 | test avg. loss: 84.804
Epochs: 3112 | epoch avg. loss: 1.028 | test avg. loss: 84.651


  6%|▌         | 3116/50000 [04:22<1:03:37, 12.28it/s]

Epochs: 3113 | epoch avg. loss: 1.238 | test avg. loss: 84.335
Epochs: 3114 | epoch avg. loss: 0.801 | test avg. loss: 84.423
Epochs: 3115 | epoch avg. loss: 1.472 | test avg. loss: 83.842


  6%|▌         | 3118/50000 [04:23<1:04:36, 12.09it/s]

Epochs: 3116 | epoch avg. loss: 0.674 | test avg. loss: 83.837
Epochs: 3117 | epoch avg. loss: 1.323 | test avg. loss: 83.581
Epochs: 3118 | epoch avg. loss: 0.947 | test avg. loss: 84.101


  6%|▌         | 3122/50000 [04:23<1:07:33, 11.57it/s]

Epochs: 3119 | epoch avg. loss: 1.130 | test avg. loss: 84.790
Epochs: 3120 | epoch avg. loss: 1.600 | test avg. loss: 86.101
Epochs: 3121 | epoch avg. loss: 2.848 | test avg. loss: 83.700


  6%|▌         | 3124/50000 [04:23<1:08:39, 11.38it/s]

Epochs: 3122 | epoch avg. loss: 1.281 | test avg. loss: 83.770
Epochs: 3123 | epoch avg. loss: 1.946 | test avg. loss: 82.306
Epochs: 3124 | epoch avg. loss: 0.541 | test avg. loss: 82.604


  6%|▋         | 3128/50000 [04:23<1:07:36, 11.55it/s]

Epochs: 3125 | epoch avg. loss: 0.500 | test avg. loss: 82.915
Epochs: 3126 | epoch avg. loss: 0.483 | test avg. loss: 83.244
Epochs: 3127 | epoch avg. loss: 0.600 | test avg. loss: 84.104


  6%|▋         | 3130/50000 [04:24<1:08:34, 11.39it/s]

Epochs: 3128 | epoch avg. loss: 1.052 | test avg. loss: 85.273
Epochs: 3129 | epoch avg. loss: 1.825 | test avg. loss: 84.849
Epochs: 3130 | epoch avg. loss: 1.616 | test avg. loss: 83.727


  6%|▋         | 3134/50000 [04:24<1:07:45, 11.53it/s]

Epochs: 3131 | epoch avg. loss: 1.057 | test avg. loss: 83.689
Epochs: 3132 | epoch avg. loss: 1.080 | test avg. loss: 83.023
Epochs: 3133 | epoch avg. loss: 0.662 | test avg. loss: 83.342


  6%|▋         | 3136/50000 [04:24<1:05:56, 11.84it/s]

Epochs: 3134 | epoch avg. loss: 0.700 | test avg. loss: 84.382
Epochs: 3135 | epoch avg. loss: 1.020 | test avg. loss: 84.990
Epochs: 3136 | epoch avg. loss: 1.522 | test avg. loss: 84.406


  6%|▋         | 3140/50000 [04:24<1:02:06, 12.58it/s]

Epochs: 3137 | epoch avg. loss: 1.320 | test avg. loss: 83.919
Epochs: 3138 | epoch avg. loss: 0.432 | test avg. loss: 84.109
Epochs: 3139 | epoch avg. loss: 0.758 | test avg. loss: 83.655


  6%|▋         | 3142/50000 [04:25<1:03:57, 12.21it/s]

Epochs: 3140 | epoch avg. loss: 0.726 | test avg. loss: 83.419
Epochs: 3141 | epoch avg. loss: 0.986 | test avg. loss: 85.549
Epochs: 3142 | epoch avg. loss: 1.782 | test avg. loss: 86.140


  6%|▋         | 3146/50000 [04:25<57:42, 13.53it/s]

Epochs: 3143 | epoch avg. loss: 2.252 | test avg. loss: 85.795
Epochs: 3144 | epoch avg. loss: 2.169 | test avg. loss: 84.279
Epochs: 3145 | epoch avg. loss: 1.185 | test avg. loss: 83.439
Epochs: 3146 | epoch avg. loss: 0.883 | test avg. loss: 83.001


  6%|▋         | 3150/50000 [04:25<53:25, 14.62it/s]

Epochs: 3147 | epoch avg. loss: 0.558 | test avg. loss: 83.375
Epochs: 3148 | epoch avg. loss: 0.923 | test avg. loss: 84.469
Epochs: 3149 | epoch avg. loss: 1.723 | test avg. loss: 83.225


  6%|▋         | 3152/50000 [04:25<55:18, 14.12it/s]

Epochs: 3150 | epoch avg. loss: 1.395 | test avg. loss: 82.523
Epochs: 3151 | epoch avg. loss: 1.059 | test avg. loss: 81.992
Epochs: 3152 | epoch avg. loss: 1.008 | test avg. loss: 82.174


  6%|▋         | 3154/50000 [04:26<1:01:02, 12.79it/s]

Epochs: 3153 | epoch avg. loss: 0.605 | test avg. loss: 82.352
Epochs: 3154 | epoch avg. loss: 0.352 | test avg. loss: 82.591


  6%|▋         | 3158/50000 [04:26<1:06:54, 11.67it/s]

Epochs: 3155 | epoch avg. loss: 0.308 | test avg. loss: 82.950
Epochs: 3156 | epoch avg. loss: 0.265 | test avg. loss: 83.275
Epochs: 3157 | epoch avg. loss: 0.302 | test avg. loss: 83.412


  6%|▋         | 3160/50000 [04:26<1:04:43, 12.06it/s]

Epochs: 3158 | epoch avg. loss: 0.240 | test avg. loss: 83.494
Epochs: 3159 | epoch avg. loss: 0.335 | test avg. loss: 83.672
Epochs: 3160 | epoch avg. loss: 0.373 | test avg. loss: 83.606


  6%|▋         | 3164/50000 [04:26<1:00:47, 12.84it/s]

Epochs: 3161 | epoch avg. loss: 0.261 | test avg. loss: 83.547
Epochs: 3162 | epoch avg. loss: 0.267 | test avg. loss: 83.511
Epochs: 3163 | epoch avg. loss: 0.290 | test avg. loss: 83.391


  6%|▋         | 3166/50000 [04:27<1:04:17, 12.14it/s]

Epochs: 3164 | epoch avg. loss: 0.246 | test avg. loss: 83.272
Epochs: 3165 | epoch avg. loss: 0.230 | test avg. loss: 83.215
Epochs: 3166 | epoch avg. loss: 0.271 | test avg. loss: 83.220


  6%|▋         | 3170/50000 [04:27<59:33, 13.11it/s]  

Epochs: 3167 | epoch avg. loss: 0.254 | test avg. loss: 83.245
Epochs: 3168 | epoch avg. loss: 0.232 | test avg. loss: 83.209
Epochs: 3169 | epoch avg. loss: 0.264 | test avg. loss: 83.145


  6%|▋         | 3174/50000 [04:27<56:29, 13.82it/s]

Epochs: 3170 | epoch avg. loss: 0.230 | test avg. loss: 83.027
Epochs: 3171 | epoch avg. loss: 0.300 | test avg. loss: 83.011
Epochs: 3172 | epoch avg. loss: 0.286 | test avg. loss: 83.219
Epochs: 3173 | epoch avg. loss: 0.370 | test avg. loss: 83.056


  6%|▋         | 3176/50000 [04:27<55:41, 14.01it/s]

Epochs: 3174 | epoch avg. loss: 0.456 | test avg. loss: 82.894
Epochs: 3175 | epoch avg. loss: 0.306 | test avg. loss: 83.165
Epochs: 3176 | epoch avg. loss: 0.469 | test avg. loss: 82.791


  6%|▋         | 3180/50000 [04:27<1:00:04, 12.99it/s]

Epochs: 3177 | epoch avg. loss: 0.348 | test avg. loss: 82.758
Epochs: 3178 | epoch avg. loss: 0.422 | test avg. loss: 83.328
Epochs: 3179 | epoch avg. loss: 0.663 | test avg. loss: 82.762


  6%|▋         | 3182/50000 [04:28<1:00:37, 12.87it/s]

Epochs: 3180 | epoch avg. loss: 0.285 | test avg. loss: 83.070
Epochs: 3181 | epoch avg. loss: 0.496 | test avg. loss: 82.713
Epochs: 3182 | epoch avg. loss: 0.340 | test avg. loss: 82.756


  6%|▋         | 3186/50000 [04:28<1:04:45, 12.05it/s]

Epochs: 3183 | epoch avg. loss: 0.363 | test avg. loss: 82.323
Epochs: 3184 | epoch avg. loss: 0.462 | test avg. loss: 82.087
Epochs: 3185 | epoch avg. loss: 0.246 | test avg. loss: 81.947


  6%|▋         | 3188/50000 [04:28<1:04:33, 12.09it/s]

Epochs: 3186 | epoch avg. loss: 0.277 | test avg. loss: 82.015
Epochs: 3187 | epoch avg. loss: 0.548 | test avg. loss: 82.181
Epochs: 3188 | epoch avg. loss: 0.450 | test avg. loss: 82.026


  6%|▋         | 3192/50000 [04:28<1:03:53, 12.21it/s]

Epochs: 3189 | epoch avg. loss: 0.400 | test avg. loss: 82.369
Epochs: 3190 | epoch avg. loss: 0.445 | test avg. loss: 82.219
Epochs: 3191 | epoch avg. loss: 0.331 | test avg. loss: 82.781


  6%|▋         | 3194/50000 [04:29<1:01:03, 12.78it/s]

Epochs: 3192 | epoch avg. loss: 0.706 | test avg. loss: 82.506
Epochs: 3193 | epoch avg. loss: 1.138 | test avg. loss: 81.834
Epochs: 3194 | epoch avg. loss: 0.677 | test avg. loss: 81.579


  6%|▋         | 3198/50000 [04:29<1:06:43, 11.69it/s]

Epochs: 3195 | epoch avg. loss: 0.674 | test avg. loss: 82.137
Epochs: 3196 | epoch avg. loss: 1.054 | test avg. loss: 81.691
Epochs: 3197 | epoch avg. loss: 0.562 | test avg. loss: 81.736


  6%|▋         | 3198/50000 [04:29<1:06:43, 11.69it/s]

Epochs: 3198 | epoch avg. loss: 0.337 | test avg. loss: 82.174
Epochs: 3199 | epoch avg. loss: 0.582 | test avg. loss: 82.257


  6%|▋         | 3202/50000 [04:31<2:58:03,  4.38it/s]

Epochs: 3200 | epoch avg. loss: 0.499 | test avg. loss: 82.273
Epochs: 3201 | epoch avg. loss: 0.371 | test avg. loss: 82.526
Epochs: 3202 | epoch avg. loss: 0.289 | test avg. loss: 82.737


  6%|▋         | 3206/50000 [04:31<2:01:26,  6.42it/s]

Epochs: 3203 | epoch avg. loss: 0.312 | test avg. loss: 83.148
Epochs: 3204 | epoch avg. loss: 0.552 | test avg. loss: 83.058
Epochs: 3205 | epoch avg. loss: 0.435 | test avg. loss: 82.937


  6%|▋         | 3208/50000 [04:31<1:43:36,  7.53it/s]

Epochs: 3206 | epoch avg. loss: 0.308 | test avg. loss: 82.960
Epochs: 3207 | epoch avg. loss: 0.345 | test avg. loss: 82.979
Epochs: 3208 | epoch avg. loss: 0.534 | test avg. loss: 82.408


  6%|▋         | 3212/50000 [04:32<1:26:01,  9.06it/s]

Epochs: 3209 | epoch avg. loss: 0.240 | test avg. loss: 82.264
Epochs: 3210 | epoch avg. loss: 0.206 | test avg. loss: 82.081
Epochs: 3211 | epoch avg. loss: 0.422 | test avg. loss: 82.031


  6%|▋         | 3214/50000 [04:32<1:22:20,  9.47it/s]

Epochs: 3212 | epoch avg. loss: 0.287 | test avg. loss: 82.377
Epochs: 3213 | epoch avg. loss: 0.375 | test avg. loss: 82.214
Epochs: 3214 | epoch avg. loss: 0.245 | test avg. loss: 82.320


                                                      

Epochs: 3215 | epoch avg. loss: 0.206 | test avg. loss: 82.394
Epochs: 3216 | epoch avg. loss: 0.235 | test avg. loss: 82.478
Epochs: 3217 | epoch avg. loss: 0.239 | test avg. loss: 82.740


  6%|▋         | 3220/50000 [04:32<1:04:38, 12.06it/s]

Epochs: 3218 | epoch avg. loss: 0.365 | test avg. loss: 82.633
Epochs: 3219 | epoch avg. loss: 0.295 | test avg. loss: 82.789
Epochs: 3220 | epoch avg. loss: 0.251 | test avg. loss: 82.755


  6%|▋         | 3224/50000 [04:32<1:06:22, 11.74it/s]

Epochs: 3221 | epoch avg. loss: 0.437 | test avg. loss: 82.803
Epochs: 3222 | epoch avg. loss: 0.400 | test avg. loss: 82.896
Epochs: 3223 | epoch avg. loss: 0.578 | test avg. loss: 82.512


  6%|▋         | 3226/50000 [04:33<1:06:27, 11.73it/s]

Epochs: 3224 | epoch avg. loss: 0.322 | test avg. loss: 82.633
Epochs: 3225 | epoch avg. loss: 0.678 | test avg. loss: 82.849
Epochs: 3226 | epoch avg. loss: 0.857 | test avg. loss: 82.312


  6%|▋         | 3230/50000 [04:33<1:03:54, 12.20it/s]

Epochs: 3227 | epoch avg. loss: 0.665 | test avg. loss: 82.478
Epochs: 3228 | epoch avg. loss: 0.555 | test avg. loss: 84.040
Epochs: 3229 | epoch avg. loss: 1.198 | test avg. loss: 83.397


  6%|▋         | 3232/50000 [04:33<1:03:33, 12.26it/s]

Epochs: 3230 | epoch avg. loss: 0.694 | test avg. loss: 83.169
Epochs: 3231 | epoch avg. loss: 0.481 | test avg. loss: 82.832
Epochs: 3232 | epoch avg. loss: 0.342 | test avg. loss: 82.456


  6%|▋         | 3236/50000 [04:33<1:04:45, 12.03it/s]

Epochs: 3233 | epoch avg. loss: 0.254 | test avg. loss: 82.131
Epochs: 3234 | epoch avg. loss: 0.380 | test avg. loss: 82.327
Epochs: 3235 | epoch avg. loss: 0.624 | test avg. loss: 82.544


  6%|▋         | 3238/50000 [04:34<1:07:04, 11.62it/s]

Epochs: 3236 | epoch avg. loss: 1.116 | test avg. loss: 81.706
Epochs: 3237 | epoch avg. loss: 0.420 | test avg. loss: 82.166
Epochs: 3238 | epoch avg. loss: 0.623 | test avg. loss: 82.571


  6%|▋         | 3242/50000 [04:34<59:59, 12.99it/s]

Epochs: 3239 | epoch avg. loss: 0.938 | test avg. loss: 81.975
Epochs: 3240 | epoch avg. loss: 0.384 | test avg. loss: 81.967
Epochs: 3241 | epoch avg. loss: 0.335 | test avg. loss: 81.976
Epochs: 3242 | epoch avg. loss: 0.382 | test avg. loss: 81.958


  6%|▋         | 3246/50000 [04:34<56:34, 13.77it/s]

Epochs: 3243 | epoch avg. loss: 0.290 | test avg. loss: 82.039
Epochs: 3244 | epoch avg. loss: 0.243 | test avg. loss: 82.087
Epochs: 3245 | epoch avg. loss: 0.208 | test avg. loss: 82.152


  6%|▋         | 3248/50000 [04:34<57:10, 13.63it/s]

Epochs: 3246 | epoch avg. loss: 0.205 | test avg. loss: 82.264
Epochs: 3247 | epoch avg. loss: 0.270 | test avg. loss: 82.239
Epochs: 3248 | epoch avg. loss: 0.282 | test avg. loss: 82.159


  7%|▋         | 3252/50000 [04:35<1:01:11, 12.73it/s]

Epochs: 3249 | epoch avg. loss: 0.215 | test avg. loss: 82.176
Epochs: 3250 | epoch avg. loss: 0.265 | test avg. loss: 82.703
Epochs: 3251 | epoch avg. loss: 0.634 | test avg. loss: 82.453


  7%|▋         | 3254/50000 [04:35<1:00:38, 12.85it/s]

Epochs: 3252 | epoch avg. loss: 0.466 | test avg. loss: 82.283
Epochs: 3253 | epoch avg. loss: 0.425 | test avg. loss: 82.319
Epochs: 3254 | epoch avg. loss: 0.266 | test avg. loss: 82.391


  7%|▋         | 3258/50000 [04:35<57:29, 13.55it/s]

Epochs: 3255 | epoch avg. loss: 0.345 | test avg. loss: 83.142
Epochs: 3256 | epoch avg. loss: 0.720 | test avg. loss: 83.804
Epochs: 3257 | epoch avg. loss: 1.362 | test avg. loss: 83.215
Epochs: 3258 | epoch avg. loss: 0.465 | test avg. loss: 83.276


  7%|▋         | 3262/50000 [04:35<58:37, 13.29it/s]

Epochs: 3259 | epoch avg. loss: 0.792 | test avg. loss: 83.233
Epochs: 3260 | epoch avg. loss: 0.667 | test avg. loss: 85.486
Epochs: 3261 | epoch avg. loss: 2.138 | test avg. loss: 83.876


  7%|▋         | 3264/50000 [04:36<1:01:53, 12.59it/s]

Epochs: 3262 | epoch avg. loss: 1.497 | test avg. loss: 82.552
Epochs: 3263 | epoch avg. loss: 0.578 | test avg. loss: 82.179
Epochs: 3264 | epoch avg. loss: 0.567 | test avg. loss: 81.972


  7%|▋         | 3268/50000 [04:36<59:44, 13.04it/s]  

Epochs: 3265 | epoch avg. loss: 0.883 | test avg. loss: 83.791
Epochs: 3266 | epoch avg. loss: 1.805 | test avg. loss: 83.953
Epochs: 3267 | epoch avg. loss: 1.453 | test avg. loss: 83.275


  7%|▋         | 3270/50000 [04:36<58:38, 13.28it/s]

Epochs: 3268 | epoch avg. loss: 1.129 | test avg. loss: 82.803
Epochs: 3269 | epoch avg. loss: 1.446 | test avg. loss: 84.957
Epochs: 3270 | epoch avg. loss: 1.885 | test avg. loss: 85.162


  7%|▋         | 3274/50000 [04:36<57:28, 13.55it/s]

Epochs: 3271 | epoch avg. loss: 2.415 | test avg. loss: 84.248
Epochs: 3272 | epoch avg. loss: 2.006 | test avg. loss: 82.642
Epochs: 3273 | epoch avg. loss: 0.559 | test avg. loss: 82.385


  7%|▋         | 3276/50000 [04:37<1:01:17, 12.71it/s]

Epochs: 3274 | epoch avg. loss: 0.811 | test avg. loss: 83.761
Epochs: 3275 | epoch avg. loss: 1.238 | test avg. loss: 83.308
Epochs: 3276 | epoch avg. loss: 0.883 | test avg. loss: 83.573


  7%|▋         | 3280/50000 [04:37<1:00:10, 12.94it/s]

Epochs: 3277 | epoch avg. loss: 1.206 | test avg. loss: 83.129
Epochs: 3278 | epoch avg. loss: 1.232 | test avg. loss: 83.953
Epochs: 3279 | epoch avg. loss: 1.022 | test avg. loss: 85.869




Epochs: 3280 | epoch avg. loss: 2.468 | test avg. loss: 83.619
Epochs: 3281 | epoch avg. loss: 0.845 | test avg. loss: 83.404
Epochs: 3282 | epoch avg. loss: 0.887 | test avg. loss: 83.036


  7%|▋         | 3286/50000 [04:37<56:31, 13.78it/s]

Epochs: 3283 | epoch avg. loss: 0.474 | test avg. loss: 82.695
Epochs: 3284 | epoch avg. loss: 0.392 | test avg. loss: 82.581
Epochs: 3285 | epoch avg. loss: 0.465 | test avg. loss: 83.472


  7%|▋         | 3288/50000 [04:37<58:58, 13.20it/s]

Epochs: 3286 | epoch avg. loss: 1.115 | test avg. loss: 82.853
Epochs: 3287 | epoch avg. loss: 1.305 | test avg. loss: 82.138
Epochs: 3288 | epoch avg. loss: 0.450 | test avg. loss: 82.561


  7%|▋         | 3292/50000 [04:38<59:31, 13.08it/s]  

Epochs: 3289 | epoch avg. loss: 0.619 | test avg. loss: 82.451
Epochs: 3290 | epoch avg. loss: 0.861 | test avg. loss: 81.999
Epochs: 3291 | epoch avg. loss: 0.317 | test avg. loss: 82.169


  7%|▋         | 3294/50000 [04:38<1:01:25, 12.67it/s]

Epochs: 3292 | epoch avg. loss: 0.513 | test avg. loss: 81.964
Epochs: 3293 | epoch avg. loss: 0.222 | test avg. loss: 82.043
Epochs: 3294 | epoch avg. loss: 0.381 | test avg. loss: 82.053


  7%|▋         | 3298/50000 [04:38<59:58, 12.98it/s]  

Epochs: 3295 | epoch avg. loss: 0.360 | test avg. loss: 82.745
Epochs: 3296 | epoch avg. loss: 0.617 | test avg. loss: 82.796
Epochs: 3297 | epoch avg. loss: 0.937 | test avg. loss: 82.514


  7%|▋         | 3298/50000 [04:38<59:58, 12.98it/s]

Epochs: 3298 | epoch avg. loss: 0.405 | test avg. loss: 82.629
Epochs: 3299 | epoch avg. loss: 0.334 | test avg. loss: 82.599


  7%|▋         | 3302/50000 [04:40<2:36:17,  4.98it/s]

Epochs: 3300 | epoch avg. loss: 0.359 | test avg. loss: 82.487
Epochs: 3301 | epoch avg. loss: 0.313 | test avg. loss: 82.328
Epochs: 3302 | epoch avg. loss: 0.422 | test avg. loss: 82.130


  7%|▋         | 3306/50000 [04:40<1:46:44,  7.29it/s]

Epochs: 3303 | epoch avg. loss: 0.214 | test avg. loss: 82.091
Epochs: 3304 | epoch avg. loss: 0.246 | test avg. loss: 82.261
Epochs: 3305 | epoch avg. loss: 0.323 | test avg. loss: 82.406


  7%|▋         | 3308/50000 [04:40<1:32:11,  8.44it/s]

Epochs: 3306 | epoch avg. loss: 0.343 | test avg. loss: 82.504
Epochs: 3307 | epoch avg. loss: 0.312 | test avg. loss: 82.614
Epochs: 3308 | epoch avg. loss: 0.346 | test avg. loss: 82.592


  7%|▋         | 3312/50000 [04:40<1:14:24, 10.46it/s]

Epochs: 3309 | epoch avg. loss: 0.328 | test avg. loss: 82.905
Epochs: 3310 | epoch avg. loss: 0.328 | test avg. loss: 83.131
Epochs: 3311 | epoch avg. loss: 0.400 | test avg. loss: 83.112


  7%|▋         | 3314/50000 [04:41<1:12:04, 10.80it/s]

Epochs: 3312 | epoch avg. loss: 0.255 | test avg. loss: 83.674
Epochs: 3313 | epoch avg. loss: 0.604 | test avg. loss: 83.308
Epochs: 3314 | epoch avg. loss: 0.259 | test avg. loss: 83.219


  7%|▋         | 3318/50000 [04:41<1:04:22, 12.08it/s]

Epochs: 3315 | epoch avg. loss: 0.209 | test avg. loss: 83.182
Epochs: 3316 | epoch avg. loss: 0.247 | test avg. loss: 83.160
Epochs: 3317 | epoch avg. loss: 0.223 | test avg. loss: 83.217


  7%|▋         | 3322/50000 [04:41<56:27, 13.78it/s]  

Epochs: 3318 | epoch avg. loss: 0.270 | test avg. loss: 83.327
Epochs: 3319 | epoch avg. loss: 0.514 | test avg. loss: 83.161
Epochs: 3320 | epoch avg. loss: 0.264 | test avg. loss: 83.050
Epochs: 3321 | epoch avg. loss: 0.193 | test avg. loss: 83.287


  7%|▋         | 3324/50000 [04:41<55:26, 14.03it/s]

Epochs: 3322 | epoch avg. loss: 0.428 | test avg. loss: 82.959
Epochs: 3323 | epoch avg. loss: 0.184 | test avg. loss: 82.957
Epochs: 3324 | epoch avg. loss: 0.225 | test avg. loss: 82.978


  7%|▋         | 3328/50000 [04:42<58:46, 13.24it/s]

Epochs: 3325 | epoch avg. loss: 0.232 | test avg. loss: 82.904
Epochs: 3326 | epoch avg. loss: 0.317 | test avg. loss: 82.749
Epochs: 3327 | epoch avg. loss: 0.226 | test avg. loss: 82.772


  7%|▋         | 3330/50000 [04:42<59:10, 13.14it/s]

Epochs: 3328 | epoch avg. loss: 0.240 | test avg. loss: 82.962
Epochs: 3329 | epoch avg. loss: 0.371 | test avg. loss: 83.053
Epochs: 3330 | epoch avg. loss: 0.628 | test avg. loss: 82.929


  7%|▋         | 3334/50000 [04:42<1:01:28, 12.65it/s]

Epochs: 3331 | epoch avg. loss: 0.300 | test avg. loss: 82.896
Epochs: 3332 | epoch avg. loss: 0.212 | test avg. loss: 83.142
Epochs: 3333 | epoch avg. loss: 0.593 | test avg. loss: 82.962


  7%|▋         | 3336/50000 [04:42<1:00:03, 12.95it/s]

Epochs: 3334 | epoch avg. loss: 0.484 | test avg. loss: 83.639
Epochs: 3335 | epoch avg. loss: 0.626 | test avg. loss: 83.760
Epochs: 3336 | epoch avg. loss: 0.666 | test avg. loss: 83.271


  7%|▋         | 3340/50000 [04:43<1:07:27, 11.53it/s]

Epochs: 3337 | epoch avg. loss: 0.306 | test avg. loss: 83.401
Epochs: 3338 | epoch avg. loss: 0.294 | test avg. loss: 83.259
Epochs: 3339 | epoch avg. loss: 0.179 | test avg. loss: 83.133


  7%|▋         | 3342/50000 [04:43<1:10:01, 11.11it/s]

Epochs: 3340 | epoch avg. loss: 0.178 | test avg. loss: 82.985
Epochs: 3341 | epoch avg. loss: 0.209 | test avg. loss: 82.808
Epochs: 3342 | epoch avg. loss: 0.186 | test avg. loss: 82.659


                                                      

Epochs: 3343 | epoch avg. loss: 0.196 | test avg. loss: 82.944
Epochs: 3344 | epoch avg. loss: 0.580 | test avg. loss: 82.621
Epochs: 3345 | epoch avg. loss: 1.150 | test avg. loss: 82.527


  7%|▋         | 3348/50000 [04:43<58:57, 13.19it/s]

Epochs: 3346 | epoch avg. loss: 0.616 | test avg. loss: 83.411
Epochs: 3347 | epoch avg. loss: 1.154 | test avg. loss: 82.255
Epochs: 3348 | epoch avg. loss: 0.562 | test avg. loss: 81.898


  7%|▋         | 3352/50000 [04:44<59:38, 13.03it/s]

Epochs: 3349 | epoch avg. loss: 0.447 | test avg. loss: 83.313
Epochs: 3350 | epoch avg. loss: 1.263 | test avg. loss: 82.896
Epochs: 3351 | epoch avg. loss: 0.988 | test avg. loss: 82.216


  7%|▋         | 3356/50000 [04:44<56:43, 13.71it/s]

Epochs: 3352 | epoch avg. loss: 0.553 | test avg. loss: 83.324
Epochs: 3353 | epoch avg. loss: 0.997 | test avg. loss: 83.398
Epochs: 3354 | epoch avg. loss: 0.998 | test avg. loss: 82.343
Epochs: 3355 | epoch avg. loss: 0.410 | test avg. loss: 82.194


  7%|▋         | 3358/50000 [04:44<55:25, 14.02it/s]

Epochs: 3356 | epoch avg. loss: 0.688 | test avg. loss: 82.634
Epochs: 3357 | epoch avg. loss: 0.714 | test avg. loss: 82.669
Epochs: 3358 | epoch avg. loss: 0.625 | test avg. loss: 82.221


  7%|▋         | 3362/50000 [04:44<56:53, 13.66it/s]

Epochs: 3359 | epoch avg. loss: 0.391 | test avg. loss: 83.562
Epochs: 3360 | epoch avg. loss: 1.128 | test avg. loss: 83.140
Epochs: 3361 | epoch avg. loss: 1.111 | test avg. loss: 82.976




Epochs: 3362 | epoch avg. loss: 0.987 | test avg. loss: 83.888
Epochs: 3363 | epoch avg. loss: 0.944 | test avg. loss: 84.846


  7%|▋         | 3366/50000 [04:45<1:04:59, 11.96it/s]

Epochs: 3364 | epoch avg. loss: 1.351 | test avg. loss: 84.591
Epochs: 3365 | epoch avg. loss: 1.481 | test avg. loss: 83.024
Epochs: 3366 | epoch avg. loss: 0.537 | test avg. loss: 82.949


  7%|▋         | 3370/50000 [04:45<1:06:35, 11.67it/s]

Epochs: 3367 | epoch avg. loss: 0.472 | test avg. loss: 83.223
Epochs: 3368 | epoch avg. loss: 0.767 | test avg. loss: 82.545
Epochs: 3369 | epoch avg. loss: 0.500 | test avg. loss: 82.334


  7%|▋         | 3372/50000 [04:45<1:08:19, 11.37it/s]

Epochs: 3370 | epoch avg. loss: 0.239 | test avg. loss: 82.387
Epochs: 3371 | epoch avg. loss: 0.245 | test avg. loss: 82.503
Epochs: 3372 | epoch avg. loss: 0.325 | test avg. loss: 82.519


  7%|▋         | 3376/50000 [04:46<1:07:23, 11.53it/s]

Epochs: 3373 | epoch avg. loss: 0.412 | test avg. loss: 82.273
Epochs: 3374 | epoch avg. loss: 0.222 | test avg. loss: 82.380
Epochs: 3375 | epoch avg. loss: 0.318 | test avg. loss: 82.304


  7%|▋         | 3378/50000 [04:46<1:07:07, 11.58it/s]

Epochs: 3376 | epoch avg. loss: 0.171 | test avg. loss: 82.512
Epochs: 3377 | epoch avg. loss: 0.405 | test avg. loss: 82.576
Epochs: 3378 | epoch avg. loss: 0.269 | test avg. loss: 82.882


  7%|▋         | 3382/50000 [04:46<1:07:10, 11.56it/s]

Epochs: 3379 | epoch avg. loss: 0.722 | test avg. loss: 82.439
Epochs: 3380 | epoch avg. loss: 0.273 | test avg. loss: 82.965
Epochs: 3381 | epoch avg. loss: 0.536 | test avg. loss: 82.667


  7%|▋         | 3384/50000 [04:46<1:04:50, 11.98it/s]

Epochs: 3382 | epoch avg. loss: 0.306 | test avg. loss: 82.602
Epochs: 3383 | epoch avg. loss: 0.628 | test avg. loss: 83.958
Epochs: 3384 | epoch avg. loss: 1.291 | test avg. loss: 83.194


  7%|▋         | 3388/50000 [04:46<1:01:32, 12.62it/s]

Epochs: 3385 | epoch avg. loss: 0.841 | test avg. loss: 82.269
Epochs: 3386 | epoch avg. loss: 0.373 | test avg. loss: 81.883
Epochs: 3387 | epoch avg. loss: 0.511 | test avg. loss: 82.115


  7%|▋         | 3390/50000 [04:47<59:12, 13.12it/s]

Epochs: 3388 | epoch avg. loss: 0.692 | test avg. loss: 81.552
Epochs: 3389 | epoch avg. loss: 0.277 | test avg. loss: 81.675
Epochs: 3390 | epoch avg. loss: 0.244 | test avg. loss: 81.950


  7%|▋         | 3394/50000 [04:47<58:27, 13.29it/s]

Epochs: 3391 | epoch avg. loss: 0.529 | test avg. loss: 81.984
Epochs: 3392 | epoch avg. loss: 0.232 | test avg. loss: 82.528
Epochs: 3393 | epoch avg. loss: 0.357 | test avg. loss: 82.682




Epochs: 3394 | epoch avg. loss: 0.486 | test avg. loss: 82.575
Epochs: 3395 | epoch avg. loss: 0.248 | test avg. loss: 82.489
Epochs: 3396 | epoch avg. loss: 0.170 | test avg. loss: 82.537


  7%|▋         | 3398/50000 [04:47<56:14, 13.81it/s]

Epochs: 3397 | epoch avg. loss: 0.304 | test avg. loss: 82.484
Epochs: 3398 | epoch avg. loss: 0.192 | test avg. loss: 82.674
Epochs: 3399 | epoch avg. loss: 0.349 | test avg. loss: 82.497


  7%|▋         | 3402/50000 [04:49<2:53:29,  4.48it/s]

Epochs: 3400 | epoch avg. loss: 0.452 | test avg. loss: 82.441
Epochs: 3401 | epoch avg. loss: 0.278 | test avg. loss: 82.579
Epochs: 3402 | epoch avg. loss: 0.430 | test avg. loss: 82.211


  7%|▋         | 3406/50000 [04:49<1:58:20,  6.56it/s]

Epochs: 3403 | epoch avg. loss: 0.172 | test avg. loss: 82.186
Epochs: 3404 | epoch avg. loss: 0.295 | test avg. loss: 82.514
Epochs: 3405 | epoch avg. loss: 0.379 | test avg. loss: 82.320


  7%|▋         | 3408/50000 [04:49<1:42:16,  7.59it/s]

Epochs: 3406 | epoch avg. loss: 0.274 | test avg. loss: 82.293
Epochs: 3407 | epoch avg. loss: 0.321 | test avg. loss: 82.568
Epochs: 3408 | epoch avg. loss: 0.276 | test avg. loss: 82.789


  7%|▋         | 3412/50000 [04:50<1:18:47,  9.86it/s]

Epochs: 3409 | epoch avg. loss: 0.179 | test avg. loss: 83.181
Epochs: 3410 | epoch avg. loss: 0.147 | test avg. loss: 83.435
Epochs: 3411 | epoch avg. loss: 0.179 | test avg. loss: 83.537


  7%|▋         | 3416/50000 [04:50<1:04:39, 12.01it/s]

Epochs: 3412 | epoch avg. loss: 0.181 | test avg. loss: 83.617
Epochs: 3413 | epoch avg. loss: 0.197 | test avg. loss: 83.417
Epochs: 3414 | epoch avg. loss: 0.262 | test avg. loss: 83.211
Epochs: 3415 | epoch avg. loss: 0.187 | test avg. loss: 83.253




Epochs: 3416 | epoch avg. loss: 0.371 | test avg. loss: 82.892
Epochs: 3417 | epoch avg. loss: 0.138 | test avg. loss: 83.039
Epochs: 3418 | epoch avg. loss: 0.333 | test avg. loss: 82.983


  7%|▋         | 3422/50000 [04:50<59:46, 12.99it/s]

Epochs: 3419 | epoch avg. loss: 0.188 | test avg. loss: 83.185
Epochs: 3420 | epoch avg. loss: 0.239 | test avg. loss: 83.142
Epochs: 3421 | epoch avg. loss: 0.159 | test avg. loss: 83.121


  7%|▋         | 3424/50000 [04:51<59:42, 13.00it/s]

Epochs: 3422 | epoch avg. loss: 0.158 | test avg. loss: 83.177
Epochs: 3423 | epoch avg. loss: 0.353 | test avg. loss: 82.856
Epochs: 3424 | epoch avg. loss: 0.496 | test avg. loss: 82.950


  7%|▋         | 3428/50000 [04:51<1:03:16, 12.27it/s]

Epochs: 3425 | epoch avg. loss: 0.521 | test avg. loss: 82.999
Epochs: 3426 | epoch avg. loss: 1.069 | test avg. loss: 82.182
Epochs: 3427 | epoch avg. loss: 0.576 | test avg. loss: 81.843


  7%|▋         | 3430/50000 [04:51<1:04:44, 11.99it/s]

Epochs: 3428 | epoch avg. loss: 0.331 | test avg. loss: 82.329
Epochs: 3429 | epoch avg. loss: 0.716 | test avg. loss: 81.944
Epochs: 3430 | epoch avg. loss: 0.183 | test avg. loss: 82.072


  7%|▋         | 3434/50000 [04:51<1:06:08, 11.73it/s]

Epochs: 3431 | epoch avg. loss: 0.172 | test avg. loss: 82.201
Epochs: 3432 | epoch avg. loss: 0.165 | test avg. loss: 82.329
Epochs: 3433 | epoch avg. loss: 0.172 | test avg. loss: 82.398


  7%|▋         | 3436/50000 [04:52<1:04:14, 12.08it/s]

Epochs: 3434 | epoch avg. loss: 0.149 | test avg. loss: 82.475
Epochs: 3435 | epoch avg. loss: 0.179 | test avg. loss: 82.613
Epochs: 3436 | epoch avg. loss: 0.156 | test avg. loss: 82.839


  7%|▋         | 3440/50000 [04:52<1:02:01, 12.51it/s]

Epochs: 3437 | epoch avg. loss: 0.290 | test avg. loss: 82.842
Epochs: 3438 | epoch avg. loss: 0.160 | test avg. loss: 82.873
Epochs: 3439 | epoch avg. loss: 0.165 | test avg. loss: 83.031




Epochs: 3440 | epoch avg. loss: 0.253 | test avg. loss: 83.052
Epochs: 3441 | epoch avg. loss: 0.528 | test avg. loss: 82.895
Epochs: 3442 | epoch avg. loss: 0.300 | test avg. loss: 83.230


  7%|▋         | 3446/50000 [04:52<56:11, 13.81it/s]

Epochs: 3443 | epoch avg. loss: 0.461 | test avg. loss: 82.746
Epochs: 3444 | epoch avg. loss: 0.251 | test avg. loss: 82.727
Epochs: 3445 | epoch avg. loss: 0.187 | test avg. loss: 82.856


  7%|▋         | 3448/50000 [04:53<57:41, 13.45it/s]

Epochs: 3446 | epoch avg. loss: 0.413 | test avg. loss: 83.039
Epochs: 3447 | epoch avg. loss: 0.360 | test avg. loss: 83.003
Epochs: 3448 | epoch avg. loss: 0.560 | test avg. loss: 82.614


  7%|▋         | 3452/50000 [04:53<59:00, 13.15it/s]

Epochs: 3449 | epoch avg. loss: 0.204 | test avg. loss: 82.987
Epochs: 3450 | epoch avg. loss: 0.348 | test avg. loss: 82.821
Epochs: 3451 | epoch avg. loss: 0.375 | test avg. loss: 82.913


  7%|▋         | 3454/50000 [04:53<57:02, 13.60it/s]

Epochs: 3452 | epoch avg. loss: 0.345 | test avg. loss: 84.237
Epochs: 3453 | epoch avg. loss: 1.722 | test avg. loss: 82.570
Epochs: 3454 | epoch avg. loss: 0.358 | test avg. loss: 83.337
Epochs: 3455 | epoch avg. loss: 0.866 | test avg. loss: 83.358




Epochs: 3456 | epoch avg. loss: 1.535 | test avg. loss: 82.819
Epochs: 3457 | epoch avg. loss: 1.184 | test avg. loss: 84.555
Epochs: 3458 | epoch avg. loss: 1.619 | test avg. loss: 88.318


  7%|▋         | 3462/50000 [04:53<55:36, 13.95it/s]

Epochs: 3459 | epoch avg. loss: 7.140 | test avg. loss: 81.998
Epochs: 3460 | epoch avg. loss: 1.581 | test avg. loss: 84.515
Epochs: 3461 | epoch avg. loss: 2.780 | test avg. loss: 84.601


  7%|▋         | 3464/50000 [04:54<56:31, 13.72it/s]

Epochs: 3462 | epoch avg. loss: 3.890 | test avg. loss: 80.891
Epochs: 3463 | epoch avg. loss: 1.062 | test avg. loss: 81.658
Epochs: 3464 | epoch avg. loss: 1.147 | test avg. loss: 81.817


  7%|▋         | 3468/50000 [04:54<57:59, 13.37it/s]

Epochs: 3465 | epoch avg. loss: 1.072 | test avg. loss: 82.104
Epochs: 3466 | epoch avg. loss: 0.551 | test avg. loss: 82.669
Epochs: 3467 | epoch avg. loss: 0.883 | test avg. loss: 82.502


  7%|▋         | 3470/50000 [04:54<58:08, 13.34it/s]

Epochs: 3468 | epoch avg. loss: 0.962 | test avg. loss: 83.362
Epochs: 3469 | epoch avg. loss: 0.702 | test avg. loss: 83.619
Epochs: 3470 | epoch avg. loss: 0.834 | test avg. loss: 83.591


  7%|▋         | 3474/50000 [04:54<1:01:18, 12.65it/s]

Epochs: 3471 | epoch avg. loss: 0.532 | test avg. loss: 83.817
Epochs: 3472 | epoch avg. loss: 0.337 | test avg. loss: 84.501
Epochs: 3473 | epoch avg. loss: 0.532 | test avg. loss: 84.950


  7%|▋         | 3476/50000 [04:55<1:07:37, 11.46it/s]

Epochs: 3474 | epoch avg. loss: 1.688 | test avg. loss: 84.047
Epochs: 3475 | epoch avg. loss: 0.582 | test avg. loss: 84.748
Epochs: 3476 | epoch avg. loss: 1.300 | test avg. loss: 84.039


  7%|▋         | 3480/50000 [04:55<1:02:39, 12.37it/s]

Epochs: 3477 | epoch avg. loss: 1.074 | test avg. loss: 83.538
Epochs: 3478 | epoch avg. loss: 0.555 | test avg. loss: 82.664
Epochs: 3479 | epoch avg. loss: 0.213 | test avg. loss: 82.408


  7%|▋         | 3484/50000 [04:55<56:04, 13.82it/s]

Epochs: 3480 | epoch avg. loss: 0.200 | test avg. loss: 82.479
Epochs: 3481 | epoch avg. loss: 0.258 | test avg. loss: 82.494
Epochs: 3482 | epoch avg. loss: 0.342 | test avg. loss: 82.536
Epochs: 3483 | epoch avg. loss: 0.520 | test avg. loss: 82.875


  7%|▋         | 3486/50000 [04:55<54:52, 14.13it/s]

Epochs: 3484 | epoch avg. loss: 0.377 | test avg. loss: 83.876
Epochs: 3485 | epoch avg. loss: 1.110 | test avg. loss: 83.466
Epochs: 3486 | epoch avg. loss: 0.920 | test avg. loss: 84.566
Epochs: 3487 | epoch avg. loss: 1.052 | test avg. loss: 84.322


  7%|▋         | 3490/50000 [04:56<55:29, 13.97it/s]

Epochs: 3488 | epoch avg. loss: 0.766 | test avg. loss: 84.059
Epochs: 3489 | epoch avg. loss: 0.763 | test avg. loss: 83.365
Epochs: 3490 | epoch avg. loss: 0.673 | test avg. loss: 84.437


  7%|▋         | 3494/50000 [04:56<55:21, 14.00it/s]

Epochs: 3491 | epoch avg. loss: 1.305 | test avg. loss: 84.928
Epochs: 3492 | epoch avg. loss: 2.717 | test avg. loss: 83.123
Epochs: 3493 | epoch avg. loss: 0.743 | test avg. loss: 82.289


  7%|▋         | 3496/50000 [04:56<55:07, 14.06it/s]

Epochs: 3494 | epoch avg. loss: 0.305 | test avg. loss: 82.287
Epochs: 3495 | epoch avg. loss: 0.408 | test avg. loss: 83.144
Epochs: 3496 | epoch avg. loss: 0.993 | test avg. loss: 82.834


  7%|▋         | 3498/50000 [04:56<55:22, 14.00it/s]

Epochs: 3497 | epoch avg. loss: 0.338 | test avg. loss: 83.846
Epochs: 3498 | epoch avg. loss: 0.574 | test avg. loss: 84.954
Epochs: 3499 | epoch avg. loss: 1.931 | test avg. loss: 84.173


  7%|▋         | 3502/50000 [04:58<2:30:48,  5.14it/s]

Epochs: 3500 | epoch avg. loss: 0.464 | test avg. loss: 84.902
Epochs: 3501 | epoch avg. loss: 1.018 | test avg. loss: 84.861
Epochs: 3502 | epoch avg. loss: 0.577 | test avg. loss: 85.185


  7%|▋         | 3506/50000 [04:58<1:43:38,  7.48it/s]

Epochs: 3503 | epoch avg. loss: 0.403 | test avg. loss: 85.507
Epochs: 3504 | epoch avg. loss: 0.704 | test avg. loss: 85.465
Epochs: 3505 | epoch avg. loss: 0.471 | test avg. loss: 85.805


  7%|▋         | 3508/50000 [04:58<1:31:20,  8.48it/s]

Epochs: 3506 | epoch avg. loss: 0.397 | test avg. loss: 85.544
Epochs: 3507 | epoch avg. loss: 0.860 | test avg. loss: 85.128
Epochs: 3508 | epoch avg. loss: 0.588 | test avg. loss: 86.281


  7%|▋         | 3512/50000 [04:58<1:13:08, 10.59it/s]

Epochs: 3509 | epoch avg. loss: 1.392 | test avg. loss: 84.925
Epochs: 3510 | epoch avg. loss: 0.950 | test avg. loss: 84.774
Epochs: 3511 | epoch avg. loss: 0.634 | test avg. loss: 84.321


  7%|▋         | 3514/50000 [04:59<1:11:59, 10.76it/s]

Epochs: 3512 | epoch avg. loss: 0.374 | test avg. loss: 83.894
Epochs: 3513 | epoch avg. loss: 0.371 | test avg. loss: 83.850
Epochs: 3514 | epoch avg. loss: 0.250 | test avg. loss: 84.128


  7%|▋         | 3518/50000 [04:59<1:05:08, 11.89it/s]

Epochs: 3515 | epoch avg. loss: 0.506 | test avg. loss: 83.949
Epochs: 3516 | epoch avg. loss: 0.296 | test avg. loss: 84.812
Epochs: 3517 | epoch avg. loss: 0.764 | test avg. loss: 84.648


  7%|▋         | 3520/50000 [04:59<1:00:51, 12.73it/s]

Epochs: 3518 | epoch avg. loss: 0.272 | test avg. loss: 85.148
Epochs: 3519 | epoch avg. loss: 0.264 | test avg. loss: 85.562
Epochs: 3520 | epoch avg. loss: 0.542 | test avg. loss: 85.378


  7%|▋         | 3524/50000 [04:59<1:01:21, 12.63it/s]

Epochs: 3521 | epoch avg. loss: 0.262 | test avg. loss: 85.729
Epochs: 3522 | epoch avg. loss: 0.478 | test avg. loss: 85.320
Epochs: 3523 | epoch avg. loss: 0.673 | test avg. loss: 85.496


  7%|▋         | 3526/50000 [04:59<1:04:31, 12.00it/s]

Epochs: 3524 | epoch avg. loss: 0.565 | test avg. loss: 85.720
Epochs: 3525 | epoch avg. loss: 1.387 | test avg. loss: 84.889


  7%|▋         | 3528/50000 [05:00<1:10:22, 11.01it/s]

Epochs: 3526 | epoch avg. loss: 1.449 | test avg. loss: 87.178
Epochs: 3527 | epoch avg. loss: 2.857 | test avg. loss: 85.187
Epochs: 3528 | epoch avg. loss: 1.870 | test avg. loss: 84.627


  7%|▋         | 3532/50000 [05:00<1:02:49, 12.33it/s]

Epochs: 3529 | epoch avg. loss: 0.765 | test avg. loss: 84.968
Epochs: 3530 | epoch avg. loss: 1.384 | test avg. loss: 83.947
Epochs: 3531 | epoch avg. loss: 0.470 | test avg. loss: 84.440


  7%|▋         | 3534/50000 [05:00<59:14, 13.07it/s]

Epochs: 3532 | epoch avg. loss: 0.431 | test avg. loss: 85.585
Epochs: 3533 | epoch avg. loss: 1.354 | test avg. loss: 84.861
Epochs: 3534 | epoch avg. loss: 0.305 | test avg. loss: 85.256


  7%|▋         | 3538/50000 [05:00<1:00:02, 12.90it/s]

Epochs: 3535 | epoch avg. loss: 0.650 | test avg. loss: 84.851
Epochs: 3536 | epoch avg. loss: 0.192 | test avg. loss: 84.941
Epochs: 3537 | epoch avg. loss: 0.353 | test avg. loss: 85.269


  7%|▋         | 3540/50000 [05:01<1:04:45, 11.96it/s]

Epochs: 3538 | epoch avg. loss: 0.584 | test avg. loss: 84.959
Epochs: 3539 | epoch avg. loss: 0.387 | test avg. loss: 84.903
Epochs: 3540 | epoch avg. loss: 0.372 | test avg. loss: 86.123


                                                      

Epochs: 3541 | epoch avg. loss: 1.622 | test avg. loss: 84.280
Epochs: 3542 | epoch avg. loss: 0.422 | test avg. loss: 84.706
Epochs: 3543 | epoch avg. loss: 0.591 | test avg. loss: 84.088


  7%|▋         | 3548/50000 [05:01<54:59, 14.08it/s]

Epochs: 3544 | epoch avg. loss: 0.474 | test avg. loss: 84.276
Epochs: 3545 | epoch avg. loss: 0.522 | test avg. loss: 84.063
Epochs: 3546 | epoch avg. loss: 0.159 | test avg. loss: 84.258
Epochs: 3547 | epoch avg. loss: 0.161 | test avg. loss: 84.431


  7%|▋         | 3550/50000 [05:01<55:00, 14.08it/s]

Epochs: 3548 | epoch avg. loss: 0.133 | test avg. loss: 84.744
Epochs: 3549 | epoch avg. loss: 0.258 | test avg. loss: 84.756
Epochs: 3550 | epoch avg. loss: 0.382 | test avg. loss: 85.078




Epochs: 3551 | epoch avg. loss: 0.391 | test avg. loss: 85.088
Epochs: 3552 | epoch avg. loss: 0.622 | test avg. loss: 84.776


  7%|▋         | 3556/50000 [05:02<1:03:50, 12.12it/s]

Epochs: 3553 | epoch avg. loss: 0.313 | test avg. loss: 85.330
Epochs: 3554 | epoch avg. loss: 0.814 | test avg. loss: 84.694
Epochs: 3555 | epoch avg. loss: 0.423 | test avg. loss: 84.501


  7%|▋         | 3558/50000 [05:02<1:02:08, 12.46it/s]

Epochs: 3556 | epoch avg. loss: 0.291 | test avg. loss: 84.554
Epochs: 3557 | epoch avg. loss: 0.311 | test avg. loss: 84.531
Epochs: 3558 | epoch avg. loss: 0.149 | test avg. loss: 84.750


  7%|▋         | 3562/50000 [05:02<56:38, 13.67it/s]  

Epochs: 3559 | epoch avg. loss: 0.199 | test avg. loss: 84.677
Epochs: 3560 | epoch avg. loss: 0.193 | test avg. loss: 84.698
Epochs: 3561 | epoch avg. loss: 0.177 | test avg. loss: 84.832


  7%|▋         | 3564/50000 [05:03<1:03:00, 12.28it/s]

Epochs: 3562 | epoch avg. loss: 0.236 | test avg. loss: 84.927
Epochs: 3563 | epoch avg. loss: 0.627 | test avg. loss: 84.315
Epochs: 3564 | epoch avg. loss: 0.386 | test avg. loss: 85.941


  7%|▋         | 3568/50000 [05:03<1:01:49, 12.52it/s]

Epochs: 3565 | epoch avg. loss: 1.981 | test avg. loss: 84.292
Epochs: 3566 | epoch avg. loss: 1.006 | test avg. loss: 84.933
Epochs: 3567 | epoch avg. loss: 0.799 | test avg. loss: 86.333


  7%|▋         | 3570/50000 [05:03<1:00:22, 12.82it/s]

Epochs: 3568 | epoch avg. loss: 2.014 | test avg. loss: 84.393
Epochs: 3569 | epoch avg. loss: 2.513 | test avg. loss: 86.032
Epochs: 3570 | epoch avg. loss: 2.373 | test avg. loss: 85.516


  7%|▋         | 3574/50000 [05:03<57:21, 13.49it/s]

Epochs: 3571 | epoch avg. loss: 3.050 | test avg. loss: 83.143
Epochs: 3572 | epoch avg. loss: 2.444 | test avg. loss: 86.453
Epochs: 3573 | epoch avg. loss: 2.672 | test avg. loss: 88.858
Epochs: 3574 | epoch avg. loss: 5.514 | test avg. loss: 84.662


  7%|▋         | 3578/50000 [05:04<1:01:43, 12.54it/s]

Epochs: 3575 | epoch avg. loss: 0.826 | test avg. loss: 86.240
Epochs: 3576 | epoch avg. loss: 1.634 | test avg. loss: 85.764
Epochs: 3577 | epoch avg. loss: 1.400 | test avg. loss: 86.718


  7%|▋         | 3580/50000 [05:04<59:05, 13.09it/s]

Epochs: 3578 | epoch avg. loss: 1.116 | test avg. loss: 88.000
Epochs: 3579 | epoch avg. loss: 3.575 | test avg. loss: 86.247
Epochs: 3580 | epoch avg. loss: 3.882 | test avg. loss: 87.901


  7%|▋         | 3584/50000 [05:04<55:41, 13.89it/s]

Epochs: 3581 | epoch avg. loss: 2.693 | test avg. loss: 90.720
Epochs: 3582 | epoch avg. loss: 6.980 | test avg. loss: 86.695
Epochs: 3583 | epoch avg. loss: 6.239 | test avg. loss: 90.834
Epochs: 3584 | epoch avg. loss: 4.736 | test avg. loss: 92.206


  7%|▋         | 3588/50000 [05:04<53:14, 14.53it/s]

Epochs: 3585 | epoch avg. loss: 5.252 | test avg. loss: 88.638
Epochs: 3586 | epoch avg. loss: 4.979 | test avg. loss: 89.907
Epochs: 3587 | epoch avg. loss: 2.631 | test avg. loss: 90.729
Epochs: 3588 | epoch avg. loss: 2.405 | test avg. loss: 88.739


                                                    

Epochs: 3589 | epoch avg. loss: 1.663 | test avg. loss: 87.849
Epochs: 3590 | epoch avg. loss: 1.015 | test avg. loss: 89.584


  7%|▋         | 3594/50000 [05:05<1:01:30, 12.58it/s]

Epochs: 3591 | epoch avg. loss: 1.907 | test avg. loss: 88.078
Epochs: 3592 | epoch avg. loss: 0.706 | test avg. loss: 87.594
Epochs: 3593 | epoch avg. loss: 0.811 | test avg. loss: 88.102


  7%|▋         | 3596/50000 [05:05<1:02:27, 12.38it/s]

Epochs: 3594 | epoch avg. loss: 0.883 | test avg. loss: 88.781
Epochs: 3595 | epoch avg. loss: 2.237 | test avg. loss: 86.839
Epochs: 3596 | epoch avg. loss: 2.056 | test avg. loss: 87.042


  7%|▋         | 3598/50000 [05:05<1:01:19, 12.61it/s]

Epochs: 3597 | epoch avg. loss: 1.092 | test avg. loss: 86.853
Epochs: 3598 | epoch avg. loss: 1.029 | test avg. loss: 88.991
Epochs: 3599 | epoch avg. loss: 4.076 | test avg. loss: 86.729


  7%|▋         | 3602/50000 [05:07<2:50:32,  4.53it/s]

Epochs: 3600 | epoch avg. loss: 3.297 | test avg. loss: 90.117
Epochs: 3601 | epoch avg. loss: 3.918 | test avg. loss: 89.824
Epochs: 3602 | epoch avg. loss: 3.844 | test avg. loss: 87.055


                                                      

Epochs: 3603 | epoch avg. loss: 0.647 | test avg. loss: 87.431
Epochs: 3604 | epoch avg. loss: 0.703 | test avg. loss: 88.702
Epochs: 3605 | epoch avg. loss: 1.949 | test avg. loss: 87.606


  7%|▋         | 3610/50000 [05:07<1:20:57,  9.55it/s]

Epochs: 3606 | epoch avg. loss: 1.466 | test avg. loss: 87.909
Epochs: 3607 | epoch avg. loss: 0.684 | test avg. loss: 88.666
Epochs: 3608 | epoch avg. loss: 0.725 | test avg. loss: 89.811
Epochs: 3609 | epoch avg. loss: 3.129 | test avg. loss: 89.044


  7%|▋         | 3612/50000 [05:07<1:13:47, 10.48it/s]

Epochs: 3610 | epoch avg. loss: 2.844 | test avg. loss: 90.298
Epochs: 3611 | epoch avg. loss: 1.786 | test avg. loss: 91.154
Epochs: 3612 | epoch avg. loss: 2.019 | test avg. loss: 89.998


  7%|▋         | 3616/50000 [05:08<1:08:07, 11.35it/s]

Epochs: 3613 | epoch avg. loss: 1.691 | test avg. loss: 88.988
Epochs: 3614 | epoch avg. loss: 0.528 | test avg. loss: 88.926
Epochs: 3615 | epoch avg. loss: 0.634 | test avg. loss: 88.027


  7%|▋         | 3618/50000 [05:08<1:09:11, 11.17it/s]

Epochs: 3616 | epoch avg. loss: 0.321 | test avg. loss: 87.820
Epochs: 3617 | epoch avg. loss: 0.286 | test avg. loss: 87.800
Epochs: 3618 | epoch avg. loss: 0.283 | test avg. loss: 87.682


  7%|▋         | 3622/50000 [05:08<1:04:36, 11.96it/s]

Epochs: 3619 | epoch avg. loss: 0.346 | test avg. loss: 87.801
Epochs: 3620 | epoch avg. loss: 0.285 | test avg. loss: 88.025
Epochs: 3621 | epoch avg. loss: 0.517 | test avg. loss: 87.788


  7%|▋         | 3624/50000 [05:08<1:02:23, 12.39it/s]

Epochs: 3622 | epoch avg. loss: 0.308 | test avg. loss: 88.074
Epochs: 3623 | epoch avg. loss: 0.343 | test avg. loss: 88.328
Epochs: 3624 | epoch avg. loss: 0.399 | test avg. loss: 88.137


  7%|▋         | 3628/50000 [05:09<1:09:46, 11.08it/s]

Epochs: 3625 | epoch avg. loss: 0.495 | test avg. loss: 87.989
Epochs: 3626 | epoch avg. loss: 0.236 | test avg. loss: 88.231
Epochs: 3627 | epoch avg. loss: 0.363 | test avg. loss: 88.626


  7%|▋         | 3630/50000 [05:09<1:10:33, 10.95it/s]

Epochs: 3628 | epoch avg. loss: 0.937 | test avg. loss: 87.911
Epochs: 3629 | epoch avg. loss: 0.276 | test avg. loss: 88.023
Epochs: 3630 | epoch avg. loss: 0.280 | test avg. loss: 88.567


  7%|▋         | 3634/50000 [05:09<1:07:21, 11.47it/s]

Epochs: 3631 | epoch avg. loss: 0.507 | test avg. loss: 89.631
Epochs: 3632 | epoch avg. loss: 1.056 | test avg. loss: 88.574
Epochs: 3633 | epoch avg. loss: 1.742 | test avg. loss: 87.936


  7%|▋         | 3636/50000 [05:10<1:07:16, 11.49it/s]

Epochs: 3634 | epoch avg. loss: 0.911 | test avg. loss: 90.715
Epochs: 3635 | epoch avg. loss: 2.922 | test avg. loss: 88.215
Epochs: 3636 | epoch avg. loss: 2.507 | test avg. loss: 86.786


  7%|▋         | 3640/50000 [05:10<1:08:26, 11.29it/s]

Epochs: 3637 | epoch avg. loss: 2.006 | test avg. loss: 92.482
Epochs: 3638 | epoch avg. loss: 4.706 | test avg. loss: 89.953
Epochs: 3639 | epoch avg. loss: 2.735 | test avg. loss: 87.475


  7%|▋         | 3642/50000 [05:10<1:09:43, 11.08it/s]

Epochs: 3640 | epoch avg. loss: 0.550 | test avg. loss: 88.109
Epochs: 3641 | epoch avg. loss: 0.503 | test avg. loss: 88.870
Epochs: 3642 | epoch avg. loss: 0.936 | test avg. loss: 88.403


  7%|▋         | 3646/50000 [05:10<1:11:49, 10.76it/s]

Epochs: 3643 | epoch avg. loss: 0.137 | test avg. loss: 88.625
Epochs: 3644 | epoch avg. loss: 0.269 | test avg. loss: 88.842
Epochs: 3645 | epoch avg. loss: 0.281 | test avg. loss: 89.592


  7%|▋         | 3648/50000 [05:11<1:11:14, 10.84it/s]

Epochs: 3646 | epoch avg. loss: 0.587 | test avg. loss: 89.228
Epochs: 3647 | epoch avg. loss: 0.449 | test avg. loss: 89.387
Epochs: 3648 | epoch avg. loss: 0.330 | test avg. loss: 90.298


  7%|▋         | 3652/50000 [05:11<1:03:16, 12.21it/s]

Epochs: 3649 | epoch avg. loss: 0.996 | test avg. loss: 90.071
Epochs: 3650 | epoch avg. loss: 1.759 | test avg. loss: 89.668
Epochs: 3651 | epoch avg. loss: 1.227 | test avg. loss: 95.971


  7%|▋         | 3654/50000 [05:11<1:00:33, 12.76it/s]

Epochs: 3652 | epoch avg. loss: 6.270 | test avg. loss: 92.161
Epochs: 3653 | epoch avg. loss: 7.594 | test avg. loss: 87.720
Epochs: 3654 | epoch avg. loss: 4.332 | test avg. loss: 94.695


  7%|▋         | 3658/50000 [05:11<1:00:45, 12.71it/s]

Epochs: 3655 | epoch avg. loss: 5.994 | test avg. loss: 100.485
Epochs: 3656 | epoch avg. loss: 8.651 | test avg. loss: 96.517
Epochs: 3657 | epoch avg. loss: 8.715 | test avg. loss: 89.303


  7%|▋         | 3660/50000 [05:12<1:04:10, 12.04it/s]

Epochs: 3658 | epoch avg. loss: 2.620 | test avg. loss: 89.253
Epochs: 3659 | epoch avg. loss: 1.472 | test avg. loss: 90.043
Epochs: 3660 | epoch avg. loss: 1.907 | test avg. loss: 96.659


  7%|▋         | 3664/50000 [05:12<1:01:39, 12.52it/s]

Epochs: 3661 | epoch avg. loss: 4.987 | test avg. loss: 95.940
Epochs: 3662 | epoch avg. loss: 5.678 | test avg. loss: 92.158
Epochs: 3663 | epoch avg. loss: 1.940 | test avg. loss: 92.413


  7%|▋         | 3666/50000 [05:12<59:01, 13.08it/s]

Epochs: 3664 | epoch avg. loss: 1.164 | test avg. loss: 94.240
Epochs: 3665 | epoch avg. loss: 1.689 | test avg. loss: 97.742
Epochs: 3666 | epoch avg. loss: 3.383 | test avg. loss: 95.126
Epochs: 3667 | epoch avg. loss: 1.833 | test avg. loss: 94.174


  7%|▋         | 3670/50000 [05:12<59:30, 12.98it/s]

Epochs: 3668 | epoch avg. loss: 0.423 | test avg. loss: 94.559
Epochs: 3669 | epoch avg. loss: 0.426 | test avg. loss: 94.808
Epochs: 3670 | epoch avg. loss: 0.555 | test avg. loss: 94.292


  7%|▋         | 3674/50000 [05:13<1:01:05, 12.64it/s]

Epochs: 3671 | epoch avg. loss: 0.191 | test avg. loss: 94.241
Epochs: 3672 | epoch avg. loss: 0.348 | test avg. loss: 93.907
Epochs: 3673 | epoch avg. loss: 0.262 | test avg. loss: 93.665


  7%|▋         | 3676/50000 [05:13<59:21, 13.01it/s]

Epochs: 3674 | epoch avg. loss: 0.245 | test avg. loss: 93.385
Epochs: 3675 | epoch avg. loss: 0.250 | test avg. loss: 93.273
Epochs: 3676 | epoch avg. loss: 0.438 | test avg. loss: 92.611
Epochs: 3677 | epoch avg. loss: 0.192 | test avg. loss: 92.303


  7%|▋         | 3682/50000 [05:13<53:12, 14.51it/s]

Epochs: 3678 | epoch avg. loss: 0.166 | test avg. loss: 92.081
Epochs: 3679 | epoch avg. loss: 0.195 | test avg. loss: 91.896
Epochs: 3680 | epoch avg. loss: 0.140 | test avg. loss: 91.825
Epochs: 3681 | epoch avg. loss: 0.236 | test avg. loss: 91.656


  7%|▋         | 3684/50000 [05:13<55:23, 13.94it/s]

Epochs: 3682 | epoch avg. loss: 0.217 | test avg. loss: 91.767
Epochs: 3683 | epoch avg. loss: 0.259 | test avg. loss: 92.199
Epochs: 3684 | epoch avg. loss: 0.709 | test avg. loss: 91.830


  7%|▋         | 3688/50000 [05:14<57:55, 13.32it/s]

Epochs: 3685 | epoch avg. loss: 0.423 | test avg. loss: 91.456
Epochs: 3686 | epoch avg. loss: 0.211 | test avg. loss: 91.599
Epochs: 3687 | epoch avg. loss: 0.346 | test avg. loss: 91.781


                                                    

Epochs: 3688 | epoch avg. loss: 0.466 | test avg. loss: 91.275
Epochs: 3689 | epoch avg. loss: 0.358 | test avg. loss: 91.204
Epochs: 3690 | epoch avg. loss: 0.228 | test avg. loss: 91.174


  7%|▋         | 3694/50000 [05:14<53:50, 14.33it/s]

Epochs: 3691 | epoch avg. loss: 0.160 | test avg. loss: 91.239
Epochs: 3692 | epoch avg. loss: 0.265 | test avg. loss: 91.172
Epochs: 3693 | epoch avg. loss: 0.129 | test avg. loss: 91.214
Epochs: 3694 | epoch avg. loss: 0.201 | test avg. loss: 91.369


  7%|▋         | 3698/50000 [05:14<53:35, 14.40it/s]

Epochs: 3695 | epoch avg. loss: 0.201 | test avg. loss: 91.657
Epochs: 3696 | epoch avg. loss: 0.372 | test avg. loss: 91.454
Epochs: 3697 | epoch avg. loss: 0.159 | test avg. loss: 91.520


  7%|▋         | 3698/50000 [05:14<53:35, 14.40it/s]

Epochs: 3698 | epoch avg. loss: 0.251 | test avg. loss: 91.477
Epochs: 3699 | epoch avg. loss: 0.202 | test avg. loss: 91.623


                                                      

Epochs: 3700 | epoch avg. loss: 0.237 | test avg. loss: 92.310
Epochs: 3701 | epoch avg. loss: 0.639 | test avg. loss: 91.774
Epochs: 3702 | epoch avg. loss: 0.365 | test avg. loss: 91.855


  7%|▋         | 3706/50000 [05:16<1:42:17,  7.54it/s]

Epochs: 3703 | epoch avg. loss: 0.324 | test avg. loss: 92.425
Epochs: 3704 | epoch avg. loss: 0.384 | test avg. loss: 92.780
Epochs: 3705 | epoch avg. loss: 0.542 | test avg. loss: 92.690


  7%|▋         | 3708/50000 [05:16<1:27:48,  8.79it/s]

Epochs: 3706 | epoch avg. loss: 0.479 | test avg. loss: 92.900
Epochs: 3707 | epoch avg. loss: 0.644 | test avg. loss: 93.131
Epochs: 3708 | epoch avg. loss: 0.484 | test avg. loss: 94.442


  7%|▋         | 3712/50000 [05:17<1:18:20,  9.85it/s]

Epochs: 3709 | epoch avg. loss: 1.318 | test avg. loss: 93.790
Epochs: 3710 | epoch avg. loss: 2.155 | test avg. loss: 93.372
Epochs: 3711 | epoch avg. loss: 0.956 | test avg. loss: 95.692


  7%|▋         | 3714/50000 [05:17<1:12:51, 10.59it/s]

Epochs: 3712 | epoch avg. loss: 2.526 | test avg. loss: 94.626
Epochs: 3713 | epoch avg. loss: 2.674 | test avg. loss: 93.159
Epochs: 3714 | epoch avg. loss: 1.591 | test avg. loss: 102.078


  7%|▋         | 3718/50000 [05:17<1:04:46, 11.91it/s]

Epochs: 3715 | epoch avg. loss: 6.689 | test avg. loss: 96.759
Epochs: 3716 | epoch avg. loss: 5.580 | test avg. loss: 92.456
Epochs: 3717 | epoch avg. loss: 2.236 | test avg. loss: 94.246


  7%|▋         | 3720/50000 [05:17<1:01:07, 12.62it/s]

Epochs: 3718 | epoch avg. loss: 2.096 | test avg. loss: 94.411
Epochs: 3719 | epoch avg. loss: 1.560 | test avg. loss: 93.552
Epochs: 3720 | epoch avg. loss: 2.156 | test avg. loss: 92.402
Epochs: 3721 | epoch avg. loss: 0.669 | test avg. loss: 93.005


  7%|▋         | 3724/50000 [05:18<59:43, 12.91it/s]

Epochs: 3722 | epoch avg. loss: 0.796 | test avg. loss: 93.910
Epochs: 3723 | epoch avg. loss: 1.910 | test avg. loss: 94.008
Epochs: 3724 | epoch avg. loss: 2.863 | test avg. loss: 92.476


  7%|▋         | 3728/50000 [05:18<1:00:53, 12.67it/s]

Epochs: 3725 | epoch avg. loss: 1.774 | test avg. loss: 95.502
Epochs: 3726 | epoch avg. loss: 2.551 | test avg. loss: 97.244
Epochs: 3727 | epoch avg. loss: 4.167 | test avg. loss: 94.743




Epochs: 3728 | epoch avg. loss: 5.718 | test avg. loss: 91.523
Epochs: 3729 | epoch avg. loss: 2.268 | test avg. loss: 91.520
Epochs: 3730 | epoch avg. loss: 1.719 | test avg. loss: 93.921


  7%|▋         | 3734/50000 [05:18<54:32, 14.14it/s]

Epochs: 3731 | epoch avg. loss: 2.134 | test avg. loss: 93.570
Epochs: 3732 | epoch avg. loss: 1.198 | test avg. loss: 94.285
Epochs: 3733 | epoch avg. loss: 1.790 | test avg. loss: 93.826
Epochs: 3734 | epoch avg. loss: 1.680 | test avg. loss: 93.208


  7%|▋         | 3738/50000 [05:18<55:38, 13.86it/s]

Epochs: 3735 | epoch avg. loss: 1.245 | test avg. loss: 93.584
Epochs: 3736 | epoch avg. loss: 0.779 | test avg. loss: 93.782
Epochs: 3737 | epoch avg. loss: 0.615 | test avg. loss: 94.058


  7%|▋         | 3740/50000 [05:19<57:21, 13.44it/s]

Epochs: 3738 | epoch avg. loss: 0.923 | test avg. loss: 94.196
Epochs: 3739 | epoch avg. loss: 1.800 | test avg. loss: 92.914
Epochs: 3740 | epoch avg. loss: 0.643 | test avg. loss: 93.081




Epochs: 3741 | epoch avg. loss: 0.554 | test avg. loss: 93.335
Epochs: 3742 | epoch avg. loss: 0.617 | test avg. loss: 93.458
Epochs: 3743 | epoch avg. loss: 0.617 | test avg. loss: 93.339


  7%|▋         | 3746/50000 [05:19<56:14, 13.71it/s]

Epochs: 3744 | epoch avg. loss: 0.344 | test avg. loss: 93.598
Epochs: 3745 | epoch avg. loss: 0.347 | test avg. loss: 94.019
Epochs: 3746 | epoch avg. loss: 0.367 | test avg. loss: 94.297


  8%|▊         | 3750/50000 [05:19<57:16, 13.46it/s]

Epochs: 3747 | epoch avg. loss: 0.531 | test avg. loss: 94.160
Epochs: 3748 | epoch avg. loss: 0.319 | test avg. loss: 94.169
Epochs: 3749 | epoch avg. loss: 0.263 | test avg. loss: 94.252


  8%|▊         | 3752/50000 [05:20<1:01:43, 12.49it/s]

Epochs: 3750 | epoch avg. loss: 0.270 | test avg. loss: 94.311
Epochs: 3751 | epoch avg. loss: 0.256 | test avg. loss: 94.247
Epochs: 3752 | epoch avg. loss: 0.487 | test avg. loss: 94.075


  8%|▊         | 3756/50000 [05:20<58:17, 13.22it/s]

Epochs: 3753 | epoch avg. loss: 0.377 | test avg. loss: 94.066
Epochs: 3754 | epoch avg. loss: 0.304 | test avg. loss: 94.379
Epochs: 3755 | epoch avg. loss: 0.458 | test avg. loss: 94.363


  8%|▊         | 3758/50000 [05:20<58:35, 13.15it/s]

Epochs: 3756 | epoch avg. loss: 0.482 | test avg. loss: 93.964
Epochs: 3757 | epoch avg. loss: 0.185 | test avg. loss: 93.960
Epochs: 3758 | epoch avg. loss: 0.219 | test avg. loss: 94.384
Epochs: 3759 | epoch avg. loss: 0.664 | test avg. loss: 93.864


  8%|▊         | 3762/50000 [05:20<54:45, 14.07it/s]

Epochs: 3760 | epoch avg. loss: 0.375 | test avg. loss: 94.236
Epochs: 3761 | epoch avg. loss: 0.430 | test avg. loss: 94.752
Epochs: 3762 | epoch avg. loss: 0.648 | test avg. loss: 95.111


  8%|▊         | 3764/50000 [05:21<57:13, 13.46it/s]

Epochs: 3763 | epoch avg. loss: 1.122 | test avg. loss: 94.155
Epochs: 3764 | epoch avg. loss: 0.452 | test avg. loss: 94.009
Epochs: 3765 | epoch avg. loss: 0.711 | test avg. loss: 94.071


  8%|▊         | 3768/50000 [05:21<1:02:49, 12.26it/s]

Epochs: 3766 | epoch avg. loss: 0.693 | test avg. loss: 96.110
Epochs: 3767 | epoch avg. loss: 1.556 | test avg. loss: 96.236
Epochs: 3768 | epoch avg. loss: 2.955 | test avg. loss: 93.914


  8%|▊         | 3772/50000 [05:21<1:04:58, 11.86it/s]

Epochs: 3769 | epoch avg. loss: 1.782 | test avg. loss: 93.279
Epochs: 3770 | epoch avg. loss: 1.253 | test avg. loss: 95.464
Epochs: 3771 | epoch avg. loss: 2.345 | test avg. loss: 94.860


  8%|▊         | 3774/50000 [05:21<1:02:40, 12.29it/s]

Epochs: 3772 | epoch avg. loss: 2.733 | test avg. loss: 93.783
Epochs: 3773 | epoch avg. loss: 2.378 | test avg. loss: 93.803
Epochs: 3774 | epoch avg. loss: 1.169 | test avg. loss: 95.460


  8%|▊         | 3778/50000 [05:22<1:01:55, 12.44it/s]

Epochs: 3775 | epoch avg. loss: 1.583 | test avg. loss: 97.645
Epochs: 3776 | epoch avg. loss: 3.201 | test avg. loss: 96.131
Epochs: 3777 | epoch avg. loss: 3.167 | test avg. loss: 94.254


  8%|▊         | 3780/50000 [05:22<1:02:22, 12.35it/s]

Epochs: 3778 | epoch avg. loss: 1.128 | test avg. loss: 94.327
Epochs: 3779 | epoch avg. loss: 1.050 | test avg. loss: 95.402
Epochs: 3780 | epoch avg. loss: 1.288 | test avg. loss: 95.421


  8%|▊         | 3784/50000 [05:22<58:46, 13.11it/s]

Epochs: 3781 | epoch avg. loss: 0.934 | test avg. loss: 95.027
Epochs: 3782 | epoch avg. loss: 0.824 | test avg. loss: 94.419
Epochs: 3783 | epoch avg. loss: 0.784 | test avg. loss: 94.772


                                                    

Epochs: 3784 | epoch avg. loss: 0.588 | test avg. loss: 96.515
Epochs: 3785 | epoch avg. loss: 1.268 | test avg. loss: 96.073
Epochs: 3786 | epoch avg. loss: 0.982 | test avg. loss: 95.653


  8%|▊         | 3790/50000 [05:23<1:01:28, 12.53it/s]

Epochs: 3787 | epoch avg. loss: 0.407 | test avg. loss: 95.938
Epochs: 3788 | epoch avg. loss: 0.821 | test avg. loss: 96.014
Epochs: 3789 | epoch avg. loss: 0.516 | test avg. loss: 97.120


  8%|▊         | 3792/50000 [05:23<1:00:37, 12.70it/s]

Epochs: 3790 | epoch avg. loss: 1.045 | test avg. loss: 96.529
Epochs: 3791 | epoch avg. loss: 0.944 | test avg. loss: 95.852
Epochs: 3792 | epoch avg. loss: 0.368 | test avg. loss: 95.659


  8%|▊         | 3796/50000 [05:23<58:24, 13.19it/s]  

Epochs: 3793 | epoch avg. loss: 0.336 | test avg. loss: 95.585
Epochs: 3794 | epoch avg. loss: 0.570 | test avg. loss: 94.818
Epochs: 3795 | epoch avg. loss: 0.272 | test avg. loss: 94.517


  8%|▊         | 3798/50000 [05:23<59:59, 12.84it/s]

Epochs: 3796 | epoch avg. loss: 0.153 | test avg. loss: 94.328
Epochs: 3797 | epoch avg. loss: 0.137 | test avg. loss: 94.208
Epochs: 3798 | epoch avg. loss: 0.132 | test avg. loss: 94.126


  8%|▊         | 3798/50000 [05:23<59:59, 12.84it/s]

Epochs: 3799 | epoch avg. loss: 0.163 | test avg. loss: 94.198


  8%|▊         | 3803/50000 [05:26<3:09:34,  4.06it/s]

Epochs: 3800 | epoch avg. loss: 0.265 | test avg. loss: 94.227
Epochs: 3801 | epoch avg. loss: 0.141 | test avg. loss: 94.355
Epochs: 3802 | epoch avg. loss: 0.196 | test avg. loss: 94.361


  8%|▊         | 3805/50000 [05:26<2:27:18,  5.23it/s]

Epochs: 3803 | epoch avg. loss: 0.147 | test avg. loss: 94.499
Epochs: 3804 | epoch avg. loss: 0.201 | test avg. loss: 94.583
Epochs: 3805 | epoch avg. loss: 0.141 | test avg. loss: 94.746


  8%|▊         | 3809/50000 [05:26<1:37:08,  7.93it/s]

Epochs: 3806 | epoch avg. loss: 0.196 | test avg. loss: 94.746
Epochs: 3807 | epoch avg. loss: 0.113 | test avg. loss: 94.806
Epochs: 3808 | epoch avg. loss: 0.110 | test avg. loss: 94.840
Epochs: 3809 | epoch avg. loss: 0.118 | test avg. loss: 94.825


  8%|▊         | 3813/50000 [05:26<1:12:47, 10.57it/s]

Epochs: 3810 | epoch avg. loss: 0.163 | test avg. loss: 94.804
Epochs: 3811 | epoch avg. loss: 0.152 | test avg. loss: 94.948
Epochs: 3812 | epoch avg. loss: 0.345 | test avg. loss: 94.784


  8%|▊         | 3815/50000 [05:27<1:13:12, 10.52it/s]

Epochs: 3813 | epoch avg. loss: 0.228 | test avg. loss: 94.671
Epochs: 3814 | epoch avg. loss: 0.270 | test avg. loss: 94.759
Epochs: 3815 | epoch avg. loss: 0.275 | test avg. loss: 94.757


  8%|▊         | 3819/50000 [05:27<1:05:36, 11.73it/s]

Epochs: 3816 | epoch avg. loss: 0.301 | test avg. loss: 94.644
Epochs: 3817 | epoch avg. loss: 0.255 | test avg. loss: 94.599
Epochs: 3818 | epoch avg. loss: 0.210 | test avg. loss: 94.523


  8%|▊         | 3821/50000 [05:27<1:04:47, 11.88it/s]

Epochs: 3819 | epoch avg. loss: 0.153 | test avg. loss: 94.492
Epochs: 3820 | epoch avg. loss: 0.140 | test avg. loss: 94.392
Epochs: 3821 | epoch avg. loss: 0.250 | test avg. loss: 94.222


  8%|▊         | 3825/50000 [05:27<1:02:30, 12.31it/s]

Epochs: 3822 | epoch avg. loss: 0.130 | test avg. loss: 94.139
Epochs: 3823 | epoch avg. loss: 0.147 | test avg. loss: 94.130
Epochs: 3824 | epoch avg. loss: 0.181 | test avg. loss: 94.477


  8%|▊         | 3827/50000 [05:27<59:18, 12.97it/s]

Epochs: 3825 | epoch avg. loss: 0.290 | test avg. loss: 94.566
Epochs: 3826 | epoch avg. loss: 0.666 | test avg. loss: 94.829
Epochs: 3827 | epoch avg. loss: 0.395 | test avg. loss: 95.095


  8%|▊         | 3831/50000 [05:28<1:03:52, 12.05it/s]

Epochs: 3828 | epoch avg. loss: 0.392 | test avg. loss: 95.048
Epochs: 3829 | epoch avg. loss: 0.471 | test avg. loss: 94.898
Epochs: 3830 | epoch avg. loss: 0.177 | test avg. loss: 95.099


  8%|▊         | 3833/50000 [05:28<1:04:50, 11.87it/s]

Epochs: 3831 | epoch avg. loss: 0.211 | test avg. loss: 95.475
Epochs: 3832 | epoch avg. loss: 0.333 | test avg. loss: 95.423
Epochs: 3833 | epoch avg. loss: 0.157 | test avg. loss: 95.430


  8%|▊         | 3837/50000 [05:28<1:07:13, 11.45it/s]

Epochs: 3834 | epoch avg. loss: 0.133 | test avg. loss: 95.515
Epochs: 3835 | epoch avg. loss: 0.211 | test avg. loss: 95.269
Epochs: 3836 | epoch avg. loss: 0.198 | test avg. loss: 95.101


  8%|▊         | 3839/50000 [05:29<1:09:02, 11.14it/s]

Epochs: 3837 | epoch avg. loss: 0.277 | test avg. loss: 95.230
Epochs: 3838 | epoch avg. loss: 0.336 | test avg. loss: 95.247
Epochs: 3839 | epoch avg. loss: 0.532 | test avg. loss: 94.621


  8%|▊         | 3843/50000 [05:29<1:05:27, 11.75it/s]

Epochs: 3840 | epoch avg. loss: 0.284 | test avg. loss: 94.431
Epochs: 3841 | epoch avg. loss: 0.143 | test avg. loss: 94.393
Epochs: 3842 | epoch avg. loss: 0.139 | test avg. loss: 94.493


  8%|▊         | 3845/50000 [05:29<1:03:10, 12.18it/s]

Epochs: 3843 | epoch avg. loss: 0.204 | test avg. loss: 94.491
Epochs: 3844 | epoch avg. loss: 0.211 | test avg. loss: 94.745
Epochs: 3845 | epoch avg. loss: 0.171 | test avg. loss: 95.046


  8%|▊         | 3849/50000 [05:29<58:39, 13.11it/s]  

Epochs: 3846 | epoch avg. loss: 0.311 | test avg. loss: 94.931
Epochs: 3847 | epoch avg. loss: 0.149 | test avg. loss: 95.024
Epochs: 3848 | epoch avg. loss: 0.218 | test avg. loss: 94.841


  8%|▊         | 3851/50000 [05:29<56:48, 13.54it/s]

Epochs: 3849 | epoch avg. loss: 0.116 | test avg. loss: 95.083
Epochs: 3850 | epoch avg. loss: 0.550 | test avg. loss: 94.756
Epochs: 3851 | epoch avg. loss: 0.417 | test avg. loss: 94.497


  8%|▊         | 3855/50000 [05:30<1:01:51, 12.43it/s]

Epochs: 3852 | epoch avg. loss: 0.200 | test avg. loss: 94.359
Epochs: 3853 | epoch avg. loss: 0.158 | test avg. loss: 94.217
Epochs: 3854 | epoch avg. loss: 0.134 | test avg. loss: 94.197


  8%|▊         | 3857/50000 [05:30<1:07:55, 11.32it/s]

Epochs: 3855 | epoch avg. loss: 0.206 | test avg. loss: 94.212
Epochs: 3856 | epoch avg. loss: 0.198 | test avg. loss: 94.264
Epochs: 3857 | epoch avg. loss: 0.128 | test avg. loss: 94.389


  8%|▊         | 3861/50000 [05:30<1:07:43, 11.36it/s]

Epochs: 3858 | epoch avg. loss: 0.122 | test avg. loss: 94.489
Epochs: 3859 | epoch avg. loss: 0.119 | test avg. loss: 94.659
Epochs: 3860 | epoch avg. loss: 0.182 | test avg. loss: 94.654


  8%|▊         | 3863/50000 [05:31<1:10:43, 10.87it/s]

Epochs: 3861 | epoch avg. loss: 0.298 | test avg. loss: 94.349
Epochs: 3862 | epoch avg. loss: 0.224 | test avg. loss: 94.732
Epochs: 3863 | epoch avg. loss: 0.521 | test avg. loss: 94.670


  8%|▊         | 3867/50000 [05:31<1:03:21, 12.13it/s]

Epochs: 3864 | epoch avg. loss: 0.721 | test avg. loss: 94.012
Epochs: 3865 | epoch avg. loss: 0.379 | test avg. loss: 94.291
Epochs: 3866 | epoch avg. loss: 0.338 | test avg. loss: 94.786


  8%|▊         | 3869/50000 [05:31<1:01:45, 12.45it/s]

Epochs: 3867 | epoch avg. loss: 0.514 | test avg. loss: 95.051
Epochs: 3868 | epoch avg. loss: 0.875 | test avg. loss: 94.123
Epochs: 3869 | epoch avg. loss: 0.420 | test avg. loss: 95.014


  8%|▊         | 3873/50000 [05:31<57:48, 13.30it/s]  

Epochs: 3870 | epoch avg. loss: 0.806 | test avg. loss: 94.941
Epochs: 3871 | epoch avg. loss: 1.013 | test avg. loss: 93.814
Epochs: 3872 | epoch avg. loss: 0.317 | test avg. loss: 94.006


  8%|▊         | 3875/50000 [05:31<56:46, 13.54it/s]

Epochs: 3873 | epoch avg. loss: 0.295 | test avg. loss: 94.321
Epochs: 3874 | epoch avg. loss: 0.567 | test avg. loss: 94.310
Epochs: 3875 | epoch avg. loss: 0.573 | test avg. loss: 94.321


  8%|▊         | 3879/50000 [05:32<1:00:07, 12.78it/s]

Epochs: 3876 | epoch avg. loss: 0.287 | test avg. loss: 94.560
Epochs: 3877 | epoch avg. loss: 0.409 | test avg. loss: 95.785
Epochs: 3878 | epoch avg. loss: 1.012 | test avg. loss: 95.253


  8%|▊         | 3881/50000 [05:32<1:01:41, 12.46it/s]

Epochs: 3879 | epoch avg. loss: 0.864 | test avg. loss: 94.627
Epochs: 3880 | epoch avg. loss: 1.633 | test avg. loss: 95.035
Epochs: 3881 | epoch avg. loss: 0.930 | test avg. loss: 94.519


  8%|▊         | 3885/50000 [05:32<59:39, 12.88it/s]  

Epochs: 3882 | epoch avg. loss: 0.715 | test avg. loss: 93.845
Epochs: 3883 | epoch avg. loss: 0.400 | test avg. loss: 93.731
Epochs: 3884 | epoch avg. loss: 0.312 | test avg. loss: 94.261


  8%|▊         | 3887/50000 [05:32<59:09, 12.99it/s]

Epochs: 3885 | epoch avg. loss: 1.012 | test avg. loss: 93.740
Epochs: 3886 | epoch avg. loss: 0.695 | test avg. loss: 94.054
Epochs: 3887 | epoch avg. loss: 0.526 | test avg. loss: 94.409


  8%|▊         | 3891/50000 [05:33<1:02:38, 12.27it/s]

Epochs: 3888 | epoch avg. loss: 0.898 | test avg. loss: 94.130
Epochs: 3889 | epoch avg. loss: 1.240 | test avg. loss: 94.305
Epochs: 3890 | epoch avg. loss: 0.624 | test avg. loss: 94.784


  8%|▊         | 3893/50000 [05:33<1:00:41, 12.66it/s]

Epochs: 3891 | epoch avg. loss: 0.908 | test avg. loss: 94.592
Epochs: 3892 | epoch avg. loss: 1.293 | test avg. loss: 95.388
Epochs: 3893 | epoch avg. loss: 0.927 | test avg. loss: 97.179


  8%|▊         | 3897/50000 [05:33<58:48, 13.06it/s]  

Epochs: 3894 | epoch avg. loss: 2.264 | test avg. loss: 95.539
Epochs: 3895 | epoch avg. loss: 1.536 | test avg. loss: 95.431
Epochs: 3896 | epoch avg. loss: 0.832 | test avg. loss: 97.597


  8%|▊         | 3899/50000 [05:33<56:51, 13.51it/s]

Epochs: 3897 | epoch avg. loss: 2.214 | test avg. loss: 95.676
Epochs: 3898 | epoch avg. loss: 1.158 | test avg. loss: 94.950
Epochs: 3899 | epoch avg. loss: 0.793 | test avg. loss: 94.974


  8%|▊         | 3903/50000 [05:35<2:40:09,  4.80it/s]

Epochs: 3900 | epoch avg. loss: 0.671 | test avg. loss: 96.897
Epochs: 3901 | epoch avg. loss: 2.107 | test avg. loss: 94.999
Epochs: 3902 | epoch avg. loss: 0.932 | test avg. loss: 94.770


  8%|▊         | 3905/50000 [05:35<2:09:19,  5.94it/s]

Epochs: 3903 | epoch avg. loss: 0.374 | test avg. loss: 95.548
Epochs: 3904 | epoch avg. loss: 0.535 | test avg. loss: 96.242
Epochs: 3905 | epoch avg. loss: 1.554 | test avg. loss: 95.659


  8%|▊         | 3909/50000 [05:35<1:31:53,  8.36it/s]

Epochs: 3906 | epoch avg. loss: 1.247 | test avg. loss: 96.341
Epochs: 3907 | epoch avg. loss: 1.206 | test avg. loss: 103.326
Epochs: 3908 | epoch avg. loss: 6.474 | test avg. loss: 97.651


  8%|▊         | 3911/50000 [05:36<1:24:19,  9.11it/s]

Epochs: 3909 | epoch avg. loss: 4.148 | test avg. loss: 96.363
Epochs: 3910 | epoch avg. loss: 1.795 | test avg. loss: 100.037
Epochs: 3911 | epoch avg. loss: 4.312 | test avg. loss: 97.508


  8%|▊         | 3915/50000 [05:36<1:11:44, 10.71it/s]

Epochs: 3912 | epoch avg. loss: 3.557 | test avg. loss: 94.040
Epochs: 3913 | epoch avg. loss: 2.825 | test avg. loss: 94.861
Epochs: 3914 | epoch avg. loss: 1.874 | test avg. loss: 95.501


  8%|▊         | 3917/50000 [05:36<1:07:13, 11.42it/s]

Epochs: 3915 | epoch avg. loss: 2.415 | test avg. loss: 93.397
Epochs: 3916 | epoch avg. loss: 0.872 | test avg. loss: 93.554
Epochs: 3917 | epoch avg. loss: 0.728 | test avg. loss: 94.214


  8%|▊         | 3921/50000 [05:36<1:03:11, 12.15it/s]

Epochs: 3918 | epoch avg. loss: 0.518 | test avg. loss: 95.000
Epochs: 3919 | epoch avg. loss: 0.879 | test avg. loss: 94.962
Epochs: 3920 | epoch avg. loss: 0.281 | test avg. loss: 95.233


  8%|▊         | 3923/50000 [05:36<1:03:58, 12.00it/s]

Epochs: 3921 | epoch avg. loss: 0.188 | test avg. loss: 95.473
Epochs: 3922 | epoch avg. loss: 0.278 | test avg. loss: 95.644
Epochs: 3923 | epoch avg. loss: 0.198 | test avg. loss: 95.850


  8%|▊         | 3927/50000 [05:37<1:04:42, 11.87it/s]

Epochs: 3924 | epoch avg. loss: 0.209 | test avg. loss: 96.453
Epochs: 3925 | epoch avg. loss: 0.486 | test avg. loss: 96.408
Epochs: 3926 | epoch avg. loss: 0.890 | test avg. loss: 96.218


  8%|▊         | 3929/50000 [05:37<1:02:12, 12.34it/s]

Epochs: 3927 | epoch avg. loss: 0.317 | test avg. loss: 96.471
Epochs: 3928 | epoch avg. loss: 0.269 | test avg. loss: 97.047
Epochs: 3929 | epoch avg. loss: 1.023 | test avg. loss: 96.733


  8%|▊         | 3933/50000 [05:37<57:35, 13.33it/s]

Epochs: 3930 | epoch avg. loss: 0.780 | test avg. loss: 96.546
Epochs: 3931 | epoch avg. loss: 0.892 | test avg. loss: 96.676
Epochs: 3932 | epoch avg. loss: 0.517 | test avg. loss: 96.766


  8%|▊         | 3935/50000 [05:37<56:44, 13.53it/s]

Epochs: 3933 | epoch avg. loss: 0.493 | test avg. loss: 96.313
Epochs: 3934 | epoch avg. loss: 0.459 | test avg. loss: 96.118
Epochs: 3935 | epoch avg. loss: 0.256 | test avg. loss: 96.425


  8%|▊         | 3939/50000 [05:38<1:01:38, 12.46it/s]

Epochs: 3936 | epoch avg. loss: 0.366 | test avg. loss: 96.234
Epochs: 3937 | epoch avg. loss: 0.176 | test avg. loss: 96.340
Epochs: 3938 | epoch avg. loss: 0.257 | test avg. loss: 96.367


  8%|▊         | 3941/50000 [05:38<59:49, 12.83it/s]

Epochs: 3939 | epoch avg. loss: 0.209 | test avg. loss: 96.654
Epochs: 3940 | epoch avg. loss: 0.221 | test avg. loss: 97.326
Epochs: 3941 | epoch avg. loss: 0.578 | test avg. loss: 97.115
Epochs: 3942 | epoch avg. loss: 0.490 | test avg. loss: 97.145


  8%|▊         | 3945/50000 [05:38<57:08, 13.43it/s]

Epochs: 3943 | epoch avg. loss: 0.218 | test avg. loss: 97.222
Epochs: 3944 | epoch avg. loss: 0.204 | test avg. loss: 97.354
Epochs: 3945 | epoch avg. loss: 0.180 | test avg. loss: 97.405


  8%|▊         | 3949/50000 [05:38<1:00:48, 12.62it/s]

Epochs: 3946 | epoch avg. loss: 0.273 | test avg. loss: 97.406
Epochs: 3947 | epoch avg. loss: 0.186 | test avg. loss: 97.536
Epochs: 3948 | epoch avg. loss: 0.206 | test avg. loss: 97.578


  8%|▊         | 3951/50000 [05:39<1:04:37, 11.88it/s]

Epochs: 3949 | epoch avg. loss: 0.181 | test avg. loss: 97.764
Epochs: 3950 | epoch avg. loss: 0.501 | test avg. loss: 97.206
Epochs: 3951 | epoch avg. loss: 0.251 | test avg. loss: 96.972


  8%|▊         | 3955/50000 [05:39<1:00:58, 12.59it/s]

Epochs: 3952 | epoch avg. loss: 0.174 | test avg. loss: 96.734
Epochs: 3953 | epoch avg. loss: 0.131 | test avg. loss: 96.592
Epochs: 3954 | epoch avg. loss: 0.155 | test avg. loss: 96.540


  8%|▊         | 3957/50000 [05:39<58:52, 13.03it/s]

Epochs: 3955 | epoch avg. loss: 0.296 | test avg. loss: 96.416
Epochs: 3956 | epoch avg. loss: 0.169 | test avg. loss: 96.472
Epochs: 3957 | epoch avg. loss: 0.196 | test avg. loss: 96.524


  8%|▊         | 3961/50000 [05:39<57:11, 13.42it/s]

Epochs: 3958 | epoch avg. loss: 0.241 | test avg. loss: 96.337
Epochs: 3959 | epoch avg. loss: 0.156 | test avg. loss: 96.248
Epochs: 3960 | epoch avg. loss: 0.122 | test avg. loss: 96.248


  8%|▊         | 3963/50000 [05:40<1:00:12, 12.74it/s]

Epochs: 3961 | epoch avg. loss: 0.141 | test avg. loss: 96.309
Epochs: 3962 | epoch avg. loss: 0.150 | test avg. loss: 96.461
Epochs: 3963 | epoch avg. loss: 0.286 | test avg. loss: 96.419


  8%|▊         | 3967/50000 [05:40<55:34, 13.81it/s]

Epochs: 3964 | epoch avg. loss: 0.302 | test avg. loss: 96.429
Epochs: 3965 | epoch avg. loss: 0.182 | test avg. loss: 96.599
Epochs: 3966 | epoch avg. loss: 0.248 | test avg. loss: 96.545
Epochs: 3967 | epoch avg. loss: 0.314 | test avg. loss: 96.419


  8%|▊         | 3971/50000 [05:40<53:07, 14.44it/s]

Epochs: 3968 | epoch avg. loss: 0.199 | test avg. loss: 96.986
Epochs: 3969 | epoch avg. loss: 0.731 | test avg. loss: 96.385
Epochs: 3970 | epoch avg. loss: 0.300 | test avg. loss: 96.483
Epochs: 3971 | epoch avg. loss: 0.447 | test avg. loss: 98.395


  8%|▊         | 3975/50000 [05:40<51:19, 14.94it/s]

Epochs: 3972 | epoch avg. loss: 1.477 | test avg. loss: 97.447
Epochs: 3973 | epoch avg. loss: 0.791 | test avg. loss: 97.300
Epochs: 3974 | epoch avg. loss: 0.194 | test avg. loss: 97.475


  8%|▊         | 3977/50000 [05:41<56:18, 13.62it/s]

Epochs: 3975 | epoch avg. loss: 0.148 | test avg. loss: 97.624
Epochs: 3976 | epoch avg. loss: 0.198 | test avg. loss: 97.973
Epochs: 3977 | epoch avg. loss: 0.261 | test avg. loss: 98.310


  8%|▊         | 3981/50000 [05:41<1:01:14, 12.52it/s]

Epochs: 3978 | epoch avg. loss: 0.448 | test avg. loss: 98.253
Epochs: 3979 | epoch avg. loss: 0.369 | test avg. loss: 98.142
Epochs: 3980 | epoch avg. loss: 0.895 | test avg. loss: 98.528


  8%|▊         | 3983/50000 [05:41<1:04:15, 11.93it/s]

Epochs: 3981 | epoch avg. loss: 0.676 | test avg. loss: 98.849
Epochs: 3982 | epoch avg. loss: 0.952 | test avg. loss: 97.779
Epochs: 3983 | epoch avg. loss: 0.207 | test avg. loss: 97.721


  8%|▊         | 3987/50000 [05:41<1:03:04, 12.16it/s]

Epochs: 3984 | epoch avg. loss: 0.210 | test avg. loss: 97.455
Epochs: 3985 | epoch avg. loss: 0.134 | test avg. loss: 97.225
Epochs: 3986 | epoch avg. loss: 0.109 | test avg. loss: 97.141


  8%|▊         | 3989/50000 [05:42<1:04:27, 11.90it/s]

Epochs: 3987 | epoch avg. loss: 0.154 | test avg. loss: 97.051
Epochs: 3988 | epoch avg. loss: 0.364 | test avg. loss: 96.917
Epochs: 3989 | epoch avg. loss: 0.371 | test avg. loss: 97.963


  8%|▊         | 3993/50000 [05:42<1:02:10, 12.33it/s]

Epochs: 3990 | epoch avg. loss: 0.822 | test avg. loss: 97.795
Epochs: 3991 | epoch avg. loss: 1.408 | test avg. loss: 96.940
Epochs: 3992 | epoch avg. loss: 0.617 | test avg. loss: 97.253


  8%|▊         | 3995/50000 [05:42<1:01:21, 12.50it/s]

Epochs: 3993 | epoch avg. loss: 0.538 | test avg. loss: 97.647
Epochs: 3994 | epoch avg. loss: 0.838 | test avg. loss: 97.534
Epochs: 3995 | epoch avg. loss: 1.287 | test avg. loss: 97.015


  8%|▊         | 3999/50000 [05:42<57:02, 13.44it/s]

Epochs: 3996 | epoch avg. loss: 2.054 | test avg. loss: 96.990
Epochs: 3997 | epoch avg. loss: 0.764 | test avg. loss: 97.408
Epochs: 3998 | epoch avg. loss: 0.671 | test avg. loss: 97.093


  8%|▊         | 3999/50000 [05:42<57:02, 13.44it/s]

Epochs: 3999 | epoch avg. loss: 0.761 | test avg. loss: 96.723


  8%|▊         | 4003/50000 [05:44<2:46:27,  4.61it/s]

Epochs: 4000 | epoch avg. loss: 0.243 | test avg. loss: 97.283
Epochs: 4001 | epoch avg. loss: 0.412 | test avg. loss: 97.612
Epochs: 4002 | epoch avg. loss: 0.819 | test avg. loss: 96.926


  8%|▊         | 4005/50000 [05:44<2:15:52,  5.64it/s]

Epochs: 4003 | epoch avg. loss: 0.419 | test avg. loss: 96.689
Epochs: 4004 | epoch avg. loss: 0.383 | test avg. loss: 97.948
Epochs: 4005 | epoch avg. loss: 1.258 | test avg. loss: 96.713


  8%|▊         | 4009/50000 [05:44<1:39:34,  7.70it/s]

Epochs: 4006 | epoch avg. loss: 0.404 | test avg. loss: 96.626
Epochs: 4007 | epoch avg. loss: 0.781 | test avg. loss: 97.131
Epochs: 4008 | epoch avg. loss: 0.531 | test avg. loss: 98.410


  8%|▊         | 4011/50000 [05:45<1:30:45,  8.44it/s]

Epochs: 4009 | epoch avg. loss: 1.255 | test avg. loss: 97.842
Epochs: 4010 | epoch avg. loss: 0.440 | test avg. loss: 98.234
Epochs: 4011 | epoch avg. loss: 0.317 | test avg. loss: 99.130


  8%|▊         | 4015/50000 [05:45<1:13:19, 10.45it/s]

Epochs: 4012 | epoch avg. loss: 0.622 | test avg. loss: 99.032
Epochs: 4013 | epoch avg. loss: 0.848 | test avg. loss: 98.476
Epochs: 4014 | epoch avg. loss: 0.334 | test avg. loss: 98.410


  8%|▊         | 4019/50000 [05:45<1:01:50, 12.39it/s]

Epochs: 4015 | epoch avg. loss: 0.302 | test avg. loss: 98.082
Epochs: 4016 | epoch avg. loss: 0.274 | test avg. loss: 97.751
Epochs: 4017 | epoch avg. loss: 0.224 | test avg. loss: 97.446
Epochs: 4018 | epoch avg. loss: 0.153 | test avg. loss: 97.287


  8%|▊         | 4021/50000 [05:45<1:00:16, 12.71it/s]

Epochs: 4019 | epoch avg. loss: 0.206 | test avg. loss: 97.038
Epochs: 4020 | epoch avg. loss: 0.189 | test avg. loss: 96.838
Epochs: 4021 | epoch avg. loss: 0.225 | test avg. loss: 96.896


  8%|▊         | 4025/50000 [05:46<1:01:54, 12.38it/s]

Epochs: 4022 | epoch avg. loss: 0.325 | test avg. loss: 96.732
Epochs: 4023 | epoch avg. loss: 0.130 | test avg. loss: 96.824
Epochs: 4024 | epoch avg. loss: 0.147 | test avg. loss: 96.985


  8%|▊         | 4027/50000 [05:46<59:09, 12.95it/s]

Epochs: 4025 | epoch avg. loss: 0.224 | test avg. loss: 96.893
Epochs: 4026 | epoch avg. loss: 0.360 | test avg. loss: 96.692
Epochs: 4027 | epoch avg. loss: 0.157 | test avg. loss: 96.647


  8%|▊         | 4031/50000 [05:46<56:14, 13.62it/s]

Epochs: 4028 | epoch avg. loss: 0.170 | test avg. loss: 96.822
Epochs: 4029 | epoch avg. loss: 0.367 | test avg. loss: 96.581
Epochs: 4030 | epoch avg. loss: 0.305 | test avg. loss: 96.589
Epochs: 4031 | epoch avg. loss: 0.192 | test avg. loss: 96.705


  8%|▊         | 4035/50000 [05:46<51:46, 14.79it/s]

Epochs: 4032 | epoch avg. loss: 0.229 | test avg. loss: 96.789
Epochs: 4033 | epoch avg. loss: 0.557 | test avg. loss: 96.740
Epochs: 4034 | epoch avg. loss: 0.232 | test avg. loss: 97.227


  8%|▊         | 4037/50000 [05:47<58:46, 13.03it/s]

Epochs: 4035 | epoch avg. loss: 0.499 | test avg. loss: 97.171
Epochs: 4036 | epoch avg. loss: 0.472 | test avg. loss: 97.172
Epochs: 4037 | epoch avg. loss: 0.247 | test avg. loss: 97.585


  8%|▊         | 4041/50000 [05:47<1:01:40, 12.42it/s]

Epochs: 4038 | epoch avg. loss: 0.432 | test avg. loss: 97.249
Epochs: 4039 | epoch avg. loss: 0.128 | test avg. loss: 97.379
Epochs: 4040 | epoch avg. loss: 0.185 | test avg. loss: 97.358


  8%|▊         | 4043/50000 [05:47<1:00:45, 12.61it/s]

Epochs: 4041 | epoch avg. loss: 0.215 | test avg. loss: 97.357
Epochs: 4042 | epoch avg. loss: 0.204 | test avg. loss: 98.033
Epochs: 4043 | epoch avg. loss: 0.690 | test avg. loss: 97.649


  8%|▊         | 4047/50000 [05:47<58:22, 13.12it/s]

Epochs: 4044 | epoch avg. loss: 0.842 | test avg. loss: 97.590
Epochs: 4045 | epoch avg. loss: 0.385 | test avg. loss: 98.255
Epochs: 4046 | epoch avg. loss: 0.677 | test avg. loss: 98.287


  8%|▊         | 4049/50000 [05:48<1:01:29, 12.45it/s]

Epochs: 4047 | epoch avg. loss: 1.324 | test avg. loss: 97.836
Epochs: 4048 | epoch avg. loss: 0.536 | test avg. loss: 99.825
Epochs: 4049 | epoch avg. loss: 1.711 | test avg. loss: 98.353


  8%|▊         | 4053/50000 [05:48<1:01:46, 12.40it/s]

Epochs: 4050 | epoch avg. loss: 0.860 | test avg. loss: 98.156
Epochs: 4051 | epoch avg. loss: 0.365 | test avg. loss: 98.693
Epochs: 4052 | epoch avg. loss: 0.851 | test avg. loss: 98.100


  8%|▊         | 4055/50000 [05:48<1:01:38, 12.42it/s]

Epochs: 4053 | epoch avg. loss: 0.330 | test avg. loss: 98.538
Epochs: 4054 | epoch avg. loss: 0.342 | test avg. loss: 99.606
Epochs: 4055 | epoch avg. loss: 1.337 | test avg. loss: 98.308


  8%|▊         | 4059/50000 [05:48<59:43, 12.82it/s]  

Epochs: 4056 | epoch avg. loss: 0.343 | test avg. loss: 98.712
Epochs: 4057 | epoch avg. loss: 0.720 | test avg. loss: 98.088
Epochs: 4058 | epoch avg. loss: 0.109 | test avg. loss: 97.954


  8%|▊         | 4061/50000 [05:48<59:34, 12.85it/s]

Epochs: 4059 | epoch avg. loss: 0.108 | test avg. loss: 98.234
Epochs: 4060 | epoch avg. loss: 0.427 | test avg. loss: 97.715
Epochs: 4061 | epoch avg. loss: 0.263 | test avg. loss: 98.391


  8%|▊         | 4065/50000 [05:49<58:06, 13.17it/s]

Epochs: 4062 | epoch avg. loss: 0.595 | test avg. loss: 98.247
Epochs: 4063 | epoch avg. loss: 1.055 | test avg. loss: 97.825
Epochs: 4064 | epoch avg. loss: 0.398 | test avg. loss: 98.701


  8%|▊         | 4067/50000 [05:49<56:06, 13.65it/s]

Epochs: 4065 | epoch avg. loss: 0.685 | test avg. loss: 98.985
Epochs: 4066 | epoch avg. loss: 1.077 | test avg. loss: 98.389
Epochs: 4067 | epoch avg. loss: 0.337 | test avg. loss: 98.805


  8%|▊         | 4071/50000 [05:49<57:10, 13.39it/s]

Epochs: 4068 | epoch avg. loss: 0.366 | test avg. loss: 99.879
Epochs: 4069 | epoch avg. loss: 1.095 | test avg. loss: 99.010
Epochs: 4070 | epoch avg. loss: 0.278 | test avg. loss: 99.890


  8%|▊         | 4073/50000 [05:49<1:01:43, 12.40it/s]

Epochs: 4071 | epoch avg. loss: 0.799 | test avg. loss: 99.506
Epochs: 4072 | epoch avg. loss: 0.750 | test avg. loss: 99.208
Epochs: 4073 | epoch avg. loss: 0.549 | test avg. loss: 99.606


  8%|▊         | 4077/50000 [05:50<1:06:43, 11.47it/s]

Epochs: 4074 | epoch avg. loss: 0.600 | test avg. loss: 99.279
Epochs: 4075 | epoch avg. loss: 0.504 | test avg. loss: 98.731
Epochs: 4076 | epoch avg. loss: 0.469 | test avg. loss: 98.812


  8%|▊         | 4079/50000 [05:50<1:08:46, 11.13it/s]

Epochs: 4077 | epoch avg. loss: 0.362 | test avg. loss: 98.663
Epochs: 4078 | epoch avg. loss: 0.446 | test avg. loss: 98.581
Epochs: 4079 | epoch avg. loss: 0.172 | test avg. loss: 98.729


  8%|▊         | 4083/50000 [05:50<1:04:14, 11.91it/s]

Epochs: 4080 | epoch avg. loss: 0.264 | test avg. loss: 100.335
Epochs: 4081 | epoch avg. loss: 1.580 | test avg. loss: 99.051
Epochs: 4082 | epoch avg. loss: 0.711 | test avg. loss: 99.724


  8%|▊         | 4085/50000 [05:50<1:02:01, 12.34it/s]

Epochs: 4083 | epoch avg. loss: 0.778 | test avg. loss: 99.381
Epochs: 4084 | epoch avg. loss: 0.397 | test avg. loss: 99.183
Epochs: 4085 | epoch avg. loss: 0.228 | test avg. loss: 99.116


  8%|▊         | 4089/50000 [05:51<59:47, 12.80it/s]  

Epochs: 4086 | epoch avg. loss: 0.179 | test avg. loss: 99.074
Epochs: 4087 | epoch avg. loss: 0.217 | test avg. loss: 98.962
Epochs: 4088 | epoch avg. loss: 0.379 | test avg. loss: 98.779


                                                    

Epochs: 4089 | epoch avg. loss: 0.181 | test avg. loss: 98.885
Epochs: 4090 | epoch avg. loss: 0.173 | test avg. loss: 99.249
Epochs: 4091 | epoch avg. loss: 0.342 | test avg. loss: 99.225




Epochs: 4092 | epoch avg. loss: 0.571 | test avg. loss: 99.341
Epochs: 4093 | epoch avg. loss: 0.371 | test avg. loss: 100.758
Epochs: 4094 | epoch avg. loss: 0.900 | test avg. loss: 100.784
Epochs: 4095 | epoch avg. loss: 0.988 | test avg. loss: 99.956


  8%|▊         | 4099/50000 [05:51<55:43, 13.73it/s]

Epochs: 4096 | epoch avg. loss: 0.365 | test avg. loss: 100.704
Epochs: 4097 | epoch avg. loss: 0.745 | test avg. loss: 100.195
Epochs: 4098 | epoch avg. loss: 0.501 | test avg. loss: 99.992


  8%|▊         | 4099/50000 [05:51<55:43, 13.73it/s]

Epochs: 4099 | epoch avg. loss: 0.269 | test avg. loss: 100.600


  8%|▊         | 4103/50000 [05:53<2:29:45,  5.11it/s]

Epochs: 4100 | epoch avg. loss: 0.485 | test avg. loss: 100.871
Epochs: 4101 | epoch avg. loss: 0.782 | test avg. loss: 100.000
Epochs: 4102 | epoch avg. loss: 1.399 | test avg. loss: 100.243
Epochs: 4103 | epoch avg. loss: 1.127 | test avg. loss: 107.217


  8%|▊         | 4107/50000 [05:53<1:38:23,  7.77it/s]

Epochs: 4104 | epoch avg. loss: 7.244 | test avg. loss: 100.814
Epochs: 4105 | epoch avg. loss: 3.141 | test avg. loss: 101.606
Epochs: 4106 | epoch avg. loss: 1.721 | test avg. loss: 103.266
Epochs: 4107 | epoch avg. loss: 2.203 | test avg. loss: 104.287


  8%|▊         | 4111/50000 [05:53<1:15:46, 10.09it/s]

Epochs: 4108 | epoch avg. loss: 1.855 | test avg. loss: 102.600
Epochs: 4109 | epoch avg. loss: 2.011 | test avg. loss: 102.292
Epochs: 4110 | epoch avg. loss: 1.387 | test avg. loss: 104.814


                                                      

Epochs: 4111 | epoch avg. loss: 2.482 | test avg. loss: 103.164
Epochs: 4112 | epoch avg. loss: 1.609 | test avg. loss: 102.218
Epochs: 4113 | epoch avg. loss: 1.323 | test avg. loss: 102.758


  8%|▊         | 4117/50000 [05:54<1:00:40, 12.60it/s]

Epochs: 4114 | epoch avg. loss: 1.083 | test avg. loss: 102.389
Epochs: 4115 | epoch avg. loss: 0.380 | test avg. loss: 102.480
Epochs: 4116 | epoch avg. loss: 0.327 | test avg. loss: 102.991
Epochs: 4117 | epoch avg. loss: 0.462 | test avg. loss: 103.717


  8%|▊         | 4121/50000 [05:54<54:34, 14.01it/s]

Epochs: 4118 | epoch avg. loss: 1.147 | test avg. loss: 102.563
Epochs: 4119 | epoch avg. loss: 0.495 | test avg. loss: 103.220
Epochs: 4120 | epoch avg. loss: 1.038 | test avg. loss: 102.287
Epochs: 4121 | epoch avg. loss: 0.834 | test avg. loss: 101.524


  8%|▊         | 4125/50000 [05:54<52:00, 14.70it/s]

Epochs: 4122 | epoch avg. loss: 0.556 | test avg. loss: 101.550
Epochs: 4123 | epoch avg. loss: 0.395 | test avg. loss: 102.411
Epochs: 4124 | epoch avg. loss: 0.649 | test avg. loss: 102.325
Epochs: 4125 | epoch avg. loss: 0.862 | test avg. loss: 101.706


  8%|▊         | 4129/50000 [05:55<58:38, 13.04it/s]

Epochs: 4126 | epoch avg. loss: 0.445 | test avg. loss: 101.966
Epochs: 4127 | epoch avg. loss: 0.365 | test avg. loss: 102.420
Epochs: 4128 | epoch avg. loss: 0.745 | test avg. loss: 102.149


                                                    

Epochs: 4129 | epoch avg. loss: 0.559 | test avg. loss: 101.696
Epochs: 4130 | epoch avg. loss: 0.675 | test avg. loss: 102.240
Epochs: 4131 | epoch avg. loss: 0.788 | test avg. loss: 102.055


  8%|▊         | 4135/50000 [05:55<54:54, 13.92it/s]

Epochs: 4132 | epoch avg. loss: 0.703 | test avg. loss: 101.210
Epochs: 4133 | epoch avg. loss: 0.795 | test avg. loss: 100.967
Epochs: 4134 | epoch avg. loss: 0.531 | test avg. loss: 102.078
Epochs: 4135 | epoch avg. loss: 1.731 | test avg. loss: 100.160


  8%|▊         | 4139/50000 [05:55<51:18, 14.90it/s]

Epochs: 4136 | epoch avg. loss: 0.535 | test avg. loss: 100.434
Epochs: 4137 | epoch avg. loss: 0.446 | test avg. loss: 101.578
Epochs: 4138 | epoch avg. loss: 1.435 | test avg. loss: 100.404


  8%|▊         | 4141/50000 [05:56<55:57, 13.66it/s]

Epochs: 4139 | epoch avg. loss: 0.612 | test avg. loss: 100.963
Epochs: 4140 | epoch avg. loss: 0.712 | test avg. loss: 104.388
Epochs: 4141 | epoch avg. loss: 3.719 | test avg. loss: 101.378


  8%|▊         | 4145/50000 [05:56<55:07, 13.86it/s]

Epochs: 4142 | epoch avg. loss: 1.928 | test avg. loss: 102.400
Epochs: 4143 | epoch avg. loss: 1.605 | test avg. loss: 103.514
Epochs: 4144 | epoch avg. loss: 3.207 | test avg. loss: 101.337
Epochs: 4145 | epoch avg. loss: 0.994 | test avg. loss: 102.705


  8%|▊         | 4149/50000 [05:56<52:31, 14.55it/s]

Epochs: 4146 | epoch avg. loss: 1.401 | test avg. loss: 104.178
Epochs: 4147 | epoch avg. loss: 2.373 | test avg. loss: 101.859
Epochs: 4148 | epoch avg. loss: 1.050 | test avg. loss: 102.872
Epochs: 4149 | epoch avg. loss: 1.230 | test avg. loss: 103.355


  8%|▊         | 4153/50000 [05:56<50:27, 15.14it/s]

Epochs: 4150 | epoch avg. loss: 2.846 | test avg. loss: 101.743
Epochs: 4151 | epoch avg. loss: 2.012 | test avg. loss: 101.406
Epochs: 4152 | epoch avg. loss: 1.727 | test avg. loss: 107.782


  8%|▊         | 4155/50000 [05:57<53:46, 14.21it/s]

Epochs: 4153 | epoch avg. loss: 5.882 | test avg. loss: 102.996
Epochs: 4154 | epoch avg. loss: 3.251 | test avg. loss: 102.417
Epochs: 4155 | epoch avg. loss: 1.364 | test avg. loss: 105.150


  8%|▊         | 4159/50000 [05:57<54:54, 13.91it/s]

Epochs: 4156 | epoch avg. loss: 3.140 | test avg. loss: 102.731
Epochs: 4157 | epoch avg. loss: 1.446 | test avg. loss: 102.470
Epochs: 4158 | epoch avg. loss: 0.775 | test avg. loss: 105.899


  8%|▊         | 4163/50000 [05:57<51:46, 14.76it/s]

Epochs: 4159 | epoch avg. loss: 2.651 | test avg. loss: 103.853
Epochs: 4160 | epoch avg. loss: 1.839 | test avg. loss: 102.982
Epochs: 4161 | epoch avg. loss: 0.545 | test avg. loss: 104.282
Epochs: 4162 | epoch avg. loss: 0.647 | test avg. loss: 104.737


  8%|▊         | 4167/50000 [05:57<50:27, 15.14it/s]

Epochs: 4163 | epoch avg. loss: 0.516 | test avg. loss: 104.769
Epochs: 4164 | epoch avg. loss: 0.186 | test avg. loss: 104.989
Epochs: 4165 | epoch avg. loss: 0.264 | test avg. loss: 105.210
Epochs: 4166 | epoch avg. loss: 0.666 | test avg. loss: 105.390


  8%|▊         | 4169/50000 [05:58<52:23, 14.58it/s]

Epochs: 4167 | epoch avg. loss: 0.409 | test avg. loss: 105.314
Epochs: 4168 | epoch avg. loss: 0.631 | test avg. loss: 104.662
Epochs: 4169 | epoch avg. loss: 0.166 | test avg. loss: 104.345


  8%|▊         | 4173/50000 [05:58<57:51, 13.20it/s]

Epochs: 4170 | epoch avg. loss: 0.164 | test avg. loss: 104.025
Epochs: 4171 | epoch avg. loss: 0.165 | test avg. loss: 103.746
Epochs: 4172 | epoch avg. loss: 0.175 | test avg. loss: 103.722


  8%|▊         | 4177/50000 [05:58<54:12, 14.09it/s]

Epochs: 4173 | epoch avg. loss: 0.293 | test avg. loss: 103.515
Epochs: 4174 | epoch avg. loss: 0.129 | test avg. loss: 103.597
Epochs: 4175 | epoch avg. loss: 0.168 | test avg. loss: 103.837
Epochs: 4176 | epoch avg. loss: 0.170 | test avg. loss: 104.271




Epochs: 4177 | epoch avg. loss: 0.450 | test avg. loss: 104.194
Epochs: 4178 | epoch avg. loss: 0.312 | test avg. loss: 104.484
Epochs: 4179 | epoch avg. loss: 0.264 | test avg. loss: 104.836


  8%|▊         | 4183/50000 [05:58<57:59, 13.17it/s]

Epochs: 4180 | epoch avg. loss: 0.386 | test avg. loss: 104.577
Epochs: 4181 | epoch avg. loss: 0.144 | test avg. loss: 104.667
Epochs: 4182 | epoch avg. loss: 0.117 | test avg. loss: 104.790


  8%|▊         | 4185/50000 [05:59<59:40, 12.80it/s]

Epochs: 4183 | epoch avg. loss: 0.139 | test avg. loss: 104.821
Epochs: 4184 | epoch avg. loss: 0.140 | test avg. loss: 104.885
Epochs: 4185 | epoch avg. loss: 0.204 | test avg. loss: 104.804


  8%|▊         | 4189/50000 [05:59<57:17, 13.33it/s]

Epochs: 4186 | epoch avg. loss: 0.192 | test avg. loss: 104.718
Epochs: 4187 | epoch avg. loss: 0.126 | test avg. loss: 104.656
Epochs: 4188 | epoch avg. loss: 0.107 | test avg. loss: 104.559


                                                    

Epochs: 4189 | epoch avg. loss: 0.105 | test avg. loss: 104.459
Epochs: 4190 | epoch avg. loss: 0.154 | test avg. loss: 104.617
Epochs: 4191 | epoch avg. loss: 0.287 | test avg. loss: 104.249


  8%|▊         | 4195/50000 [05:59<52:36, 14.51it/s]

Epochs: 4192 | epoch avg. loss: 0.148 | test avg. loss: 104.103
Epochs: 4193 | epoch avg. loss: 0.114 | test avg. loss: 104.097
Epochs: 4194 | epoch avg. loss: 0.246 | test avg. loss: 103.836


  8%|▊         | 4197/50000 [06:00<58:41, 13.01it/s]

Epochs: 4195 | epoch avg. loss: 0.125 | test avg. loss: 103.733
Epochs: 4196 | epoch avg. loss: 0.160 | test avg. loss: 104.188
Epochs: 4197 | epoch avg. loss: 0.533 | test avg. loss: 103.888


  8%|▊         | 4199/50000 [06:00<1:02:20, 12.24it/s]

Epochs: 4198 | epoch avg. loss: 0.582 | test avg. loss: 103.617
Epochs: 4199 | epoch avg. loss: 0.247 | test avg. loss: 103.561


  8%|▊         | 4203/50000 [06:01<2:41:28,  4.73it/s]

Epochs: 4200 | epoch avg. loss: 0.183 | test avg. loss: 103.512
Epochs: 4201 | epoch avg. loss: 0.225 | test avg. loss: 103.457
Epochs: 4202 | epoch avg. loss: 0.312 | test avg. loss: 103.226


  8%|▊         | 4205/50000 [06:02<2:14:43,  5.67it/s]

Epochs: 4203 | epoch avg. loss: 0.108 | test avg. loss: 103.233
Epochs: 4204 | epoch avg. loss: 0.111 | test avg. loss: 103.292
Epochs: 4205 | epoch avg. loss: 0.172 | test avg. loss: 103.781


  8%|▊         | 4209/50000 [06:02<1:34:40,  8.06it/s]

Epochs: 4206 | epoch avg. loss: 0.514 | test avg. loss: 103.537
Epochs: 4207 | epoch avg. loss: 0.295 | test avg. loss: 103.957
Epochs: 4208 | epoch avg. loss: 0.375 | test avg. loss: 103.727


  8%|▊         | 4211/50000 [06:02<1:26:34,  8.82it/s]

Epochs: 4209 | epoch avg. loss: 0.119 | test avg. loss: 103.782
Epochs: 4210 | epoch avg. loss: 0.114 | test avg. loss: 103.943
Epochs: 4211 | epoch avg. loss: 0.168 | test avg. loss: 104.000


  8%|▊         | 4215/50000 [06:02<1:11:26, 10.68it/s]

Epochs: 4212 | epoch avg. loss: 0.267 | test avg. loss: 103.877
Epochs: 4213 | epoch avg. loss: 0.201 | test avg. loss: 104.506
Epochs: 4214 | epoch avg. loss: 0.565 | test avg. loss: 104.055


  8%|▊         | 4217/50000 [06:03<1:11:51, 10.62it/s]

Epochs: 4215 | epoch avg. loss: 0.388 | test avg. loss: 103.698
Epochs: 4216 | epoch avg. loss: 0.149 | test avg. loss: 103.567
Epochs: 4217 | epoch avg. loss: 0.110 | test avg. loss: 103.441


  8%|▊         | 4221/50000 [06:03<1:09:26, 10.99it/s]

Epochs: 4218 | epoch avg. loss: 0.104 | test avg. loss: 103.400
Epochs: 4219 | epoch avg. loss: 0.099 | test avg. loss: 103.446
Epochs: 4220 | epoch avg. loss: 0.110 | test avg. loss: 103.528


  8%|▊         | 4223/50000 [06:03<1:08:21, 11.16it/s]

Epochs: 4221 | epoch avg. loss: 0.117 | test avg. loss: 103.783
Epochs: 4222 | epoch avg. loss: 0.223 | test avg. loss: 103.768
Epochs: 4223 | epoch avg. loss: 0.335 | test avg. loss: 103.690


  8%|▊         | 4227/50000 [06:03<1:05:39, 11.62it/s]

Epochs: 4224 | epoch avg. loss: 0.157 | test avg. loss: 103.717
Epochs: 4225 | epoch avg. loss: 0.192 | test avg. loss: 103.597
Epochs: 4226 | epoch avg. loss: 0.088 | test avg. loss: 103.527


  8%|▊         | 4229/50000 [06:04<1:08:08, 11.19it/s]

Epochs: 4227 | epoch avg. loss: 0.092 | test avg. loss: 103.539
Epochs: 4228 | epoch avg. loss: 0.148 | test avg. loss: 103.523
Epochs: 4229 | epoch avg. loss: 0.133 | test avg. loss: 103.921


  8%|▊         | 4233/50000 [06:04<1:01:55, 12.32it/s]

Epochs: 4230 | epoch avg. loss: 0.491 | test avg. loss: 103.521
Epochs: 4231 | epoch avg. loss: 0.298 | test avg. loss: 103.541
Epochs: 4232 | epoch avg. loss: 0.206 | test avg. loss: 103.510


  8%|▊         | 4235/50000 [06:04<58:45, 12.98it/s]

Epochs: 4233 | epoch avg. loss: 0.372 | test avg. loss: 103.417
Epochs: 4234 | epoch avg. loss: 0.137 | test avg. loss: 103.658
Epochs: 4235 | epoch avg. loss: 0.195 | test avg. loss: 103.695


  8%|▊         | 4239/50000 [06:04<55:58, 13.62it/s]

Epochs: 4236 | epoch avg. loss: 0.126 | test avg. loss: 103.781
Epochs: 4237 | epoch avg. loss: 0.113 | test avg. loss: 103.799
Epochs: 4238 | epoch avg. loss: 0.101 | test avg. loss: 103.805


  8%|▊         | 4241/50000 [06:04<58:09, 13.11it/s]

Epochs: 4239 | epoch avg. loss: 0.093 | test avg. loss: 103.847
Epochs: 4240 | epoch avg. loss: 0.139 | test avg. loss: 103.830
Epochs: 4241 | epoch avg. loss: 0.133 | test avg. loss: 104.021


  8%|▊         | 4245/50000 [06:05<1:01:17, 12.44it/s]

Epochs: 4242 | epoch avg. loss: 0.217 | test avg. loss: 103.894
Epochs: 4243 | epoch avg. loss: 0.252 | test avg. loss: 103.595
Epochs: 4244 | epoch avg. loss: 0.141 | test avg. loss: 103.688


  8%|▊         | 4247/50000 [06:05<58:16, 13.09it/s]

Epochs: 4245 | epoch avg. loss: 0.240 | test avg. loss: 103.347
Epochs: 4246 | epoch avg. loss: 0.104 | test avg. loss: 103.309
Epochs: 4247 | epoch avg. loss: 0.182 | test avg. loss: 103.174
Epochs: 4248 | epoch avg. loss: 0.089 | test avg. loss: 103.122


  9%|▊         | 4251/50000 [06:05<55:31, 13.73it/s]

Epochs: 4249 | epoch avg. loss: 0.179 | test avg. loss: 103.265
Epochs: 4250 | epoch avg. loss: 0.268 | test avg. loss: 103.231
Epochs: 4251 | epoch avg. loss: 0.110 | test avg. loss: 103.378


                                                    

Epochs: 4252 | epoch avg. loss: 0.140 | test avg. loss: 103.452
Epochs: 4253 | epoch avg. loss: 0.103 | test avg. loss: 103.704


  9%|▊         | 4257/50000 [06:06<1:01:42, 12.35it/s]

Epochs: 4254 | epoch avg. loss: 0.453 | test avg. loss: 103.606
Epochs: 4255 | epoch avg. loss: 0.189 | test avg. loss: 103.849
Epochs: 4256 | epoch avg. loss: 0.394 | test avg. loss: 103.327


  9%|▊         | 4259/50000 [06:06<1:00:03, 12.69it/s]

Epochs: 4257 | epoch avg. loss: 0.415 | test avg. loss: 103.241
Epochs: 4258 | epoch avg. loss: 0.254 | test avg. loss: 103.693
Epochs: 4259 | epoch avg. loss: 1.029 | test avg. loss: 102.974


  9%|▊         | 4263/50000 [06:06<54:48, 13.91it/s]

Epochs: 4260 | epoch avg. loss: 0.324 | test avg. loss: 103.474
Epochs: 4261 | epoch avg. loss: 0.440 | test avg. loss: 103.835
Epochs: 4262 | epoch avg. loss: 1.028 | test avg. loss: 102.997
Epochs: 4263 | epoch avg. loss: 0.332 | test avg. loss: 103.348


  9%|▊         | 4267/50000 [06:06<51:49, 14.71it/s]

Epochs: 4264 | epoch avg. loss: 0.613 | test avg. loss: 102.832
Epochs: 4265 | epoch avg. loss: 0.236 | test avg. loss: 102.845
Epochs: 4266 | epoch avg. loss: 0.328 | test avg. loss: 102.447


  9%|▊         | 4269/50000 [06:07<58:34, 13.01it/s]

Epochs: 4267 | epoch avg. loss: 0.108 | test avg. loss: 102.352
Epochs: 4268 | epoch avg. loss: 0.139 | test avg. loss: 102.224
Epochs: 4269 | epoch avg. loss: 0.260 | test avg. loss: 102.394


  9%|▊         | 4273/50000 [06:07<1:03:43, 11.96it/s]

Epochs: 4270 | epoch avg. loss: 0.252 | test avg. loss: 102.810
Epochs: 4271 | epoch avg. loss: 0.544 | test avg. loss: 102.464
Epochs: 4272 | epoch avg. loss: 0.328 | test avg. loss: 103.437


  9%|▊         | 4275/50000 [06:07<1:00:26, 12.61it/s]

Epochs: 4273 | epoch avg. loss: 0.807 | test avg. loss: 103.053
Epochs: 4274 | epoch avg. loss: 0.647 | test avg. loss: 103.056
Epochs: 4275 | epoch avg. loss: 0.322 | test avg. loss: 104.164


  9%|▊         | 4279/50000 [06:07<59:23, 12.83it/s]

Epochs: 4276 | epoch avg. loss: 0.974 | test avg. loss: 103.504
Epochs: 4277 | epoch avg. loss: 0.418 | test avg. loss: 103.621
Epochs: 4278 | epoch avg. loss: 0.205 | test avg. loss: 103.759


  9%|▊         | 4281/50000 [06:08<1:03:54, 11.92it/s]

Epochs: 4279 | epoch avg. loss: 0.194 | test avg. loss: 103.847
Epochs: 4280 | epoch avg. loss: 0.148 | test avg. loss: 104.250
Epochs: 4281 | epoch avg. loss: 0.809 | test avg. loss: 103.684


  9%|▊         | 4285/50000 [06:08<1:05:00, 11.72it/s]

Epochs: 4282 | epoch avg. loss: 0.291 | test avg. loss: 104.231
Epochs: 4283 | epoch avg. loss: 0.716 | test avg. loss: 103.943
Epochs: 4284 | epoch avg. loss: 0.698 | test avg. loss: 103.686


  9%|▊         | 4287/50000 [06:08<1:04:59, 11.72it/s]

Epochs: 4285 | epoch avg. loss: 0.762 | test avg. loss: 105.590
Epochs: 4286 | epoch avg. loss: 2.137 | test avg. loss: 103.808
Epochs: 4287 | epoch avg. loss: 0.927 | test avg. loss: 104.108


  9%|▊         | 4291/50000 [06:08<1:02:54, 12.11it/s]

Epochs: 4288 | epoch avg. loss: 0.551 | test avg. loss: 104.826
Epochs: 4289 | epoch avg. loss: 0.866 | test avg. loss: 104.083
Epochs: 4290 | epoch avg. loss: 0.758 | test avg. loss: 105.247


  9%|▊         | 4293/50000 [06:09<1:02:32, 12.18it/s]

Epochs: 4291 | epoch avg. loss: 0.925 | test avg. loss: 105.327
Epochs: 4292 | epoch avg. loss: 0.791 | test avg. loss: 104.714
Epochs: 4293 | epoch avg. loss: 0.201 | test avg. loss: 104.900


  9%|▊         | 4297/50000 [06:09<1:01:48, 12.33it/s]

Epochs: 4294 | epoch avg. loss: 0.210 | test avg. loss: 105.235
Epochs: 4295 | epoch avg. loss: 0.276 | test avg. loss: 105.334
Epochs: 4296 | epoch avg. loss: 0.908 | test avg. loss: 105.782


  9%|▊         | 4299/50000 [06:09<59:00, 12.91it/s]

Epochs: 4297 | epoch avg. loss: 0.652 | test avg. loss: 105.865
Epochs: 4298 | epoch avg. loss: 1.125 | test avg. loss: 105.584
Epochs: 4299 | epoch avg. loss: 0.970 | test avg. loss: 105.789


  9%|▊         | 4303/50000 [06:11<2:42:04,  4.70it/s]

Epochs: 4300 | epoch avg. loss: 0.852 | test avg. loss: 106.818
Epochs: 4301 | epoch avg. loss: 0.761 | test avg. loss: 107.228
Epochs: 4302 | epoch avg. loss: 1.278 | test avg. loss: 105.717


  9%|▊         | 4305/50000 [06:11<2:09:50,  5.87it/s]

Epochs: 4303 | epoch avg. loss: 0.232 | test avg. loss: 105.527
Epochs: 4304 | epoch avg. loss: 0.427 | test avg. loss: 104.667
Epochs: 4305 | epoch avg. loss: 0.195 | test avg. loss: 104.164


  9%|▊         | 4309/50000 [06:11<1:32:23,  8.24it/s]

Epochs: 4306 | epoch avg. loss: 0.164 | test avg. loss: 103.847
Epochs: 4307 | epoch avg. loss: 0.124 | test avg. loss: 103.815
Epochs: 4308 | epoch avg. loss: 0.252 | test avg. loss: 103.559


  9%|▊         | 4311/50000 [06:11<1:21:43,  9.32it/s]

Epochs: 4309 | epoch avg. loss: 0.109 | test avg. loss: 103.631
Epochs: 4310 | epoch avg. loss: 0.181 | test avg. loss: 103.698
Epochs: 4311 | epoch avg. loss: 0.334 | test avg. loss: 103.683


  9%|▊         | 4315/50000 [06:11<1:09:41, 10.92it/s]

Epochs: 4312 | epoch avg. loss: 0.201 | test avg. loss: 103.776
Epochs: 4313 | epoch avg. loss: 0.348 | test avg. loss: 103.545
Epochs: 4314 | epoch avg. loss: 0.223 | test avg. loss: 103.586


  9%|▊         | 4317/50000 [06:12<1:09:53, 10.89it/s]

Epochs: 4315 | epoch avg. loss: 0.244 | test avg. loss: 104.301
Epochs: 4316 | epoch avg. loss: 0.623 | test avg. loss: 104.438
Epochs: 4317 | epoch avg. loss: 0.734 | test avg. loss: 103.864


  9%|▊         | 4321/50000 [06:12<1:06:21, 11.47it/s]

Epochs: 4318 | epoch avg. loss: 0.257 | test avg. loss: 104.477
Epochs: 4319 | epoch avg. loss: 0.571 | test avg. loss: 104.519
Epochs: 4320 | epoch avg. loss: 0.917 | test avg. loss: 105.160


  9%|▊         | 4323/50000 [06:12<1:02:16, 12.22it/s]

Epochs: 4321 | epoch avg. loss: 0.708 | test avg. loss: 105.314
Epochs: 4322 | epoch avg. loss: 0.878 | test avg. loss: 104.820
Epochs: 4323 | epoch avg. loss: 0.334 | test avg. loss: 105.646


  9%|▊         | 4327/50000 [06:12<1:00:14, 12.64it/s]

Epochs: 4324 | epoch avg. loss: 0.906 | test avg. loss: 105.012
Epochs: 4325 | epoch avg. loss: 0.323 | test avg. loss: 105.021
Epochs: 4326 | epoch avg. loss: 0.313 | test avg. loss: 106.338


  9%|▊         | 4329/50000 [06:13<1:00:14, 12.63it/s]

Epochs: 4327 | epoch avg. loss: 1.237 | test avg. loss: 104.816
Epochs: 4328 | epoch avg. loss: 0.266 | test avg. loss: 105.363
Epochs: 4329 | epoch avg. loss: 1.038 | test avg. loss: 104.334


  9%|▊         | 4333/50000 [06:13<56:20, 13.51it/s]

Epochs: 4330 | epoch avg. loss: 0.454 | test avg. loss: 104.305
Epochs: 4331 | epoch avg. loss: 0.340 | test avg. loss: 104.128
Epochs: 4332 | epoch avg. loss: 0.387 | test avg. loss: 103.812
Epochs: 4333 | epoch avg. loss: 0.177 | test avg. loss: 103.678


  9%|▊         | 4337/50000 [06:13<55:25, 13.73it/s]

Epochs: 4334 | epoch avg. loss: 0.181 | test avg. loss: 103.587
Epochs: 4335 | epoch avg. loss: 0.145 | test avg. loss: 103.716
Epochs: 4336 | epoch avg. loss: 0.320 | test avg. loss: 103.735


  9%|▊         | 4339/50000 [06:13<54:35, 13.94it/s]

Epochs: 4337 | epoch avg. loss: 0.147 | test avg. loss: 104.152
Epochs: 4338 | epoch avg. loss: 0.234 | test avg. loss: 104.454
Epochs: 4339 | epoch avg. loss: 0.510 | test avg. loss: 104.838


  9%|▊         | 4343/50000 [06:14<55:45, 13.65it/s]

Epochs: 4340 | epoch avg. loss: 0.260 | test avg. loss: 105.757
Epochs: 4341 | epoch avg. loss: 0.770 | test avg. loss: 105.578
Epochs: 4342 | epoch avg. loss: 0.220 | test avg. loss: 106.513


  9%|▊         | 4345/50000 [06:14<56:10, 13.55it/s]

Epochs: 4343 | epoch avg. loss: 0.645 | test avg. loss: 106.276
Epochs: 4344 | epoch avg. loss: 0.344 | test avg. loss: 106.944
Epochs: 4345 | epoch avg. loss: 0.793 | test avg. loss: 106.478


  9%|▊         | 4349/50000 [06:14<53:57, 14.10it/s]

Epochs: 4346 | epoch avg. loss: 0.673 | test avg. loss: 106.362
Epochs: 4347 | epoch avg. loss: 0.399 | test avg. loss: 108.178
Epochs: 4348 | epoch avg. loss: 1.881 | test avg. loss: 106.837


  9%|▊         | 4351/50000 [06:14<56:19, 13.51it/s]

Epochs: 4349 | epoch avg. loss: 1.423 | test avg. loss: 111.524
Epochs: 4350 | epoch avg. loss: 5.262 | test avg. loss: 107.238
Epochs: 4351 | epoch avg. loss: 3.805 | test avg. loss: 106.985


  9%|▊         | 4355/50000 [06:14<54:15, 14.02it/s]

Epochs: 4352 | epoch avg. loss: 1.499 | test avg. loss: 105.655
Epochs: 4353 | epoch avg. loss: 0.854 | test avg. loss: 105.178
Epochs: 4354 | epoch avg. loss: 0.660 | test avg. loss: 104.661


  9%|▊         | 4357/50000 [06:15<54:17, 14.01it/s]

Epochs: 4355 | epoch avg. loss: 1.110 | test avg. loss: 104.346
Epochs: 4356 | epoch avg. loss: 0.780 | test avg. loss: 107.276
Epochs: 4357 | epoch avg. loss: 4.176 | test avg. loss: 104.439


  9%|▊         | 4361/50000 [06:15<55:20, 13.75it/s]

Epochs: 4358 | epoch avg. loss: 0.590 | test avg. loss: 105.165
Epochs: 4359 | epoch avg. loss: 0.470 | test avg. loss: 106.449
Epochs: 4360 | epoch avg. loss: 0.692 | test avg. loss: 106.778


  9%|▊         | 4363/50000 [06:15<53:53, 14.11it/s]

Epochs: 4361 | epoch avg. loss: 0.779 | test avg. loss: 107.084
Epochs: 4362 | epoch avg. loss: 1.333 | test avg. loss: 108.458
Epochs: 4363 | epoch avg. loss: 1.156 | test avg. loss: 112.065


  9%|▊         | 4367/50000 [06:15<53:09, 14.31it/s]

Epochs: 4364 | epoch avg. loss: 4.590 | test avg. loss: 107.328
Epochs: 4365 | epoch avg. loss: 0.832 | test avg. loss: 108.120
Epochs: 4366 | epoch avg. loss: 0.801 | test avg. loss: 108.282
Epochs: 4367 | epoch avg. loss: 1.575 | test avg. loss: 107.092


  9%|▊         | 4371/50000 [06:16<56:58, 13.35it/s]

Epochs: 4368 | epoch avg. loss: 0.385 | test avg. loss: 107.366
Epochs: 4369 | epoch avg. loss: 0.700 | test avg. loss: 107.513
Epochs: 4370 | epoch avg. loss: 0.989 | test avg. loss: 106.575


  9%|▊         | 4373/50000 [06:16<57:06, 13.31it/s]

Epochs: 4371 | epoch avg. loss: 0.205 | test avg. loss: 106.414
Epochs: 4372 | epoch avg. loss: 0.210 | test avg. loss: 106.418
Epochs: 4373 | epoch avg. loss: 0.242 | test avg. loss: 106.248


  9%|▉         | 4377/50000 [06:16<55:10, 13.78it/s]

Epochs: 4374 | epoch avg. loss: 0.238 | test avg. loss: 106.217
Epochs: 4375 | epoch avg. loss: 0.216 | test avg. loss: 106.147
Epochs: 4376 | epoch avg. loss: 0.393 | test avg. loss: 105.685


  9%|▉         | 4379/50000 [06:16<55:39, 13.66it/s]

Epochs: 4377 | epoch avg. loss: 0.234 | test avg. loss: 105.788
Epochs: 4378 | epoch avg. loss: 0.269 | test avg. loss: 106.739
Epochs: 4379 | epoch avg. loss: 1.334 | test avg. loss: 105.848


  9%|▉         | 4383/50000 [06:17<59:53, 12.70it/s]

Epochs: 4380 | epoch avg. loss: 0.386 | test avg. loss: 107.077
Epochs: 4381 | epoch avg. loss: 0.854 | test avg. loss: 106.914
Epochs: 4382 | epoch avg. loss: 1.035 | test avg. loss: 106.489


  9%|▉         | 4385/50000 [06:17<1:01:44, 12.31it/s]

Epochs: 4383 | epoch avg. loss: 0.272 | test avg. loss: 106.738
Epochs: 4384 | epoch avg. loss: 0.311 | test avg. loss: 107.025
Epochs: 4385 | epoch avg. loss: 0.518 | test avg. loss: 106.476


  9%|▉         | 4389/50000 [06:17<57:44, 13.17it/s]  

Epochs: 4386 | epoch avg. loss: 0.225 | test avg. loss: 106.716
Epochs: 4387 | epoch avg. loss: 0.349 | test avg. loss: 106.638
Epochs: 4388 | epoch avg. loss: 0.246 | test avg. loss: 106.684


  9%|▉         | 4391/50000 [06:17<56:16, 13.51it/s]

Epochs: 4389 | epoch avg. loss: 0.152 | test avg. loss: 107.149
Epochs: 4390 | epoch avg. loss: 0.458 | test avg. loss: 107.027
Epochs: 4391 | epoch avg. loss: 0.387 | test avg. loss: 107.324


  9%|▉         | 4395/50000 [06:17<57:56, 13.12it/s]

Epochs: 4392 | epoch avg. loss: 0.382 | test avg. loss: 107.428
Epochs: 4393 | epoch avg. loss: 0.700 | test avg. loss: 106.709
Epochs: 4394 | epoch avg. loss: 0.181 | test avg. loss: 106.656


  9%|▉         | 4397/50000 [06:18<1:01:19, 12.39it/s]

Epochs: 4395 | epoch avg. loss: 0.200 | test avg. loss: 106.673
Epochs: 4396 | epoch avg. loss: 0.323 | test avg. loss: 106.675
Epochs: 4397 | epoch avg. loss: 0.270 | test avg. loss: 108.129


  9%|▉         | 4399/50000 [06:18<1:01:18, 12.40it/s]

Epochs: 4398 | epoch avg. loss: 1.324 | test avg. loss: 107.467
Epochs: 4399 | epoch avg. loss: 0.902 | test avg. loss: 107.664


  9%|▉         | 4403/50000 [06:19<2:38:33,  4.79it/s]

Epochs: 4400 | epoch avg. loss: 0.603 | test avg. loss: 110.757
Epochs: 4401 | epoch avg. loss: 3.543 | test avg. loss: 108.111
Epochs: 4402 | epoch avg. loss: 1.719 | test avg. loss: 110.968


  9%|▉         | 4405/50000 [06:20<2:10:48,  5.81it/s]

Epochs: 4403 | epoch avg. loss: 2.340 | test avg. loss: 115.135
Epochs: 4404 | epoch avg. loss: 5.749 | test avg. loss: 108.359
Epochs: 4405 | epoch avg. loss: 2.808 | test avg. loss: 110.820


  9%|▉         | 4409/50000 [06:20<1:30:34,  8.39it/s]

Epochs: 4406 | epoch avg. loss: 2.569 | test avg. loss: 109.038
Epochs: 4407 | epoch avg. loss: 1.321 | test avg. loss: 108.956
Epochs: 4408 | epoch avg. loss: 0.854 | test avg. loss: 108.470


  9%|▉         | 4411/50000 [06:20<1:20:01,  9.50it/s]

Epochs: 4409 | epoch avg. loss: 1.329 | test avg. loss: 107.664
Epochs: 4410 | epoch avg. loss: 0.699 | test avg. loss: 108.624
Epochs: 4411 | epoch avg. loss: 1.037 | test avg. loss: 109.831


  9%|▉         | 4415/50000 [06:20<1:05:48, 11.54it/s]

Epochs: 4412 | epoch avg. loss: 2.894 | test avg. loss: 110.317
Epochs: 4413 | epoch avg. loss: 5.633 | test avg. loss: 108.427
Epochs: 4414 | epoch avg. loss: 4.474 | test avg. loss: 110.392


  9%|▉         | 4417/50000 [06:20<1:07:43, 11.22it/s]

Epochs: 4415 | epoch avg. loss: 3.798 | test avg. loss: 120.891
Epochs: 4416 | epoch avg. loss: 7.371 | test avg. loss: 118.857
Epochs: 4417 | epoch avg. loss: 6.170 | test avg. loss: 116.936


  9%|▉         | 4419/50000 [06:21<1:09:03, 11.00it/s]

Epochs: 4418 | epoch avg. loss: 6.097 | test avg. loss: 111.886
Epochs: 4419 | epoch avg. loss: 2.059 | test avg. loss: 114.150


  9%|▉         | 4423/50000 [06:21<1:13:14, 10.37it/s]

Epochs: 4420 | epoch avg. loss: 2.676 | test avg. loss: 122.799
Epochs: 4421 | epoch avg. loss: 5.933 | test avg. loss: 123.114
Epochs: 4422 | epoch avg. loss: 7.886 | test avg. loss: 122.329


  9%|▉         | 4425/50000 [06:21<1:14:37, 10.18it/s]

Epochs: 4423 | epoch avg. loss: 9.054 | test avg. loss: 116.536
Epochs: 4424 | epoch avg. loss: 5.609 | test avg. loss: 114.208
Epochs: 4425 | epoch avg. loss: 3.443 | test avg. loss: 114.670


  9%|▉         | 4429/50000 [06:22<1:14:30, 10.19it/s]

Epochs: 4426 | epoch avg. loss: 3.331 | test avg. loss: 115.103
Epochs: 4427 | epoch avg. loss: 2.281 | test avg. loss: 115.988
Epochs: 4428 | epoch avg. loss: 2.630 | test avg. loss: 117.967


  9%|▉         | 4431/50000 [06:22<1:11:23, 10.64it/s]

Epochs: 4429 | epoch avg. loss: 2.494 | test avg. loss: 121.714
Epochs: 4430 | epoch avg. loss: 3.905 | test avg. loss: 130.099
Epochs: 4431 | epoch avg. loss: 9.411 | test avg. loss: 130.022


  9%|▉         | 4435/50000 [06:22<1:04:42, 11.74it/s]

Epochs: 4432 | epoch avg. loss: 9.482 | test avg. loss: 124.686
Epochs: 4433 | epoch avg. loss: 5.615 | test avg. loss: 122.523
Epochs: 4434 | epoch avg. loss: 3.102 | test avg. loss: 120.726


  9%|▉         | 4437/50000 [06:22<1:01:58, 12.25it/s]

Epochs: 4435 | epoch avg. loss: 2.491 | test avg. loss: 120.209
Epochs: 4436 | epoch avg. loss: 2.245 | test avg. loss: 122.729
Epochs: 4437 | epoch avg. loss: 2.344 | test avg. loss: 122.934


  9%|▉         | 4441/50000 [06:23<1:02:20, 12.18it/s]

Epochs: 4438 | epoch avg. loss: 1.779 | test avg. loss: 124.547
Epochs: 4439 | epoch avg. loss: 1.859 | test avg. loss: 125.937
Epochs: 4440 | epoch avg. loss: 2.441 | test avg. loss: 125.388


  9%|▉         | 4443/50000 [06:23<1:00:01, 12.65it/s]

Epochs: 4441 | epoch avg. loss: 0.995 | test avg. loss: 126.344
Epochs: 4442 | epoch avg. loss: 1.542 | test avg. loss: 127.797
Epochs: 4443 | epoch avg. loss: 2.096 | test avg. loss: 126.562


  9%|▉         | 4447/50000 [06:23<55:21, 13.72it/s]

Epochs: 4444 | epoch avg. loss: 1.457 | test avg. loss: 126.474
Epochs: 4445 | epoch avg. loss: 1.026 | test avg. loss: 125.688
Epochs: 4446 | epoch avg. loss: 0.648 | test avg. loss: 125.594
Epochs: 4447 | epoch avg. loss: 0.511 | test avg. loss: 124.708


  9%|▉         | 4451/50000 [06:23<52:16, 14.52it/s]

Epochs: 4448 | epoch avg. loss: 0.449 | test avg. loss: 123.771
Epochs: 4449 | epoch avg. loss: 0.354 | test avg. loss: 122.660
Epochs: 4450 | epoch avg. loss: 0.325 | test avg. loss: 122.076
Epochs: 4451 | epoch avg. loss: 0.252 | test avg. loss: 121.893


  9%|▉         | 4455/50000 [06:24<54:18, 13.98it/s]

Epochs: 4452 | epoch avg. loss: 0.311 | test avg. loss: 121.933
Epochs: 4453 | epoch avg. loss: 0.354 | test avg. loss: 122.479
Epochs: 4454 | epoch avg. loss: 0.518 | test avg. loss: 122.682


                                                      

Epochs: 4455 | epoch avg. loss: 0.574 | test avg. loss: 122.937
Epochs: 4456 | epoch avg. loss: 0.631 | test avg. loss: 122.990


  9%|▉         | 4459/50000 [06:24<1:05:01, 11.67it/s]

Epochs: 4457 | epoch avg. loss: 0.867 | test avg. loss: 122.859
Epochs: 4458 | epoch avg. loss: 0.729 | test avg. loss: 122.517
Epochs: 4459 | epoch avg. loss: 0.628 | test avg. loss: 122.427


  9%|▉         | 4463/50000 [06:24<1:05:30, 11.59it/s]

Epochs: 4460 | epoch avg. loss: 0.212 | test avg. loss: 122.848
Epochs: 4461 | epoch avg. loss: 0.601 | test avg. loss: 122.793
Epochs: 4462 | epoch avg. loss: 0.653 | test avg. loss: 122.295


  9%|▉         | 4465/50000 [06:25<1:05:09, 11.65it/s]

Epochs: 4463 | epoch avg. loss: 0.679 | test avg. loss: 121.516
Epochs: 4464 | epoch avg. loss: 0.343 | test avg. loss: 120.971
Epochs: 4465 | epoch avg. loss: 0.734 | test avg. loss: 120.850


  9%|▉         | 4469/50000 [06:25<1:00:36, 12.52it/s]

Epochs: 4466 | epoch avg. loss: 0.380 | test avg. loss: 121.115
Epochs: 4467 | epoch avg. loss: 0.335 | test avg. loss: 121.406
Epochs: 4468 | epoch avg. loss: 0.356 | test avg. loss: 121.604


  9%|▉         | 4471/50000 [06:25<59:10, 12.82it/s]

Epochs: 4469 | epoch avg. loss: 0.269 | test avg. loss: 121.724
Epochs: 4470 | epoch avg. loss: 0.254 | test avg. loss: 121.701
Epochs: 4471 | epoch avg. loss: 0.375 | test avg. loss: 121.558


  9%|▉         | 4475/50000 [06:25<54:33, 13.91it/s]

Epochs: 4472 | epoch avg. loss: 0.278 | test avg. loss: 121.616
Epochs: 4473 | epoch avg. loss: 0.139 | test avg. loss: 121.885
Epochs: 4474 | epoch avg. loss: 0.254 | test avg. loss: 122.002
Epochs: 4475 | epoch avg. loss: 0.264 | test avg. loss: 121.870


  9%|▉         | 4479/50000 [06:25<59:25, 12.77it/s]

Epochs: 4476 | epoch avg. loss: 0.277 | test avg. loss: 121.500
Epochs: 4477 | epoch avg. loss: 0.275 | test avg. loss: 121.263
Epochs: 4478 | epoch avg. loss: 0.288 | test avg. loss: 121.156


  9%|▉         | 4481/50000 [06:26<59:38, 12.72it/s]

Epochs: 4479 | epoch avg. loss: 0.350 | test avg. loss: 120.993
Epochs: 4480 | epoch avg. loss: 0.300 | test avg. loss: 120.919
Epochs: 4481 | epoch avg. loss: 0.173 | test avg. loss: 120.957
Epochs: 4482 | epoch avg. loss: 0.156 | test avg. loss: 121.018


  9%|▉         | 4485/50000 [06:26<55:46, 13.60it/s]

Epochs: 4483 | epoch avg. loss: 0.183 | test avg. loss: 121.012
Epochs: 4484 | epoch avg. loss: 0.137 | test avg. loss: 120.970
Epochs: 4485 | epoch avg. loss: 0.159 | test avg. loss: 120.870


  9%|▉         | 4489/50000 [06:26<53:15, 14.24it/s]

Epochs: 4486 | epoch avg. loss: 0.144 | test avg. loss: 120.739
Epochs: 4487 | epoch avg. loss: 0.136 | test avg. loss: 120.699
Epochs: 4488 | epoch avg. loss: 0.137 | test avg. loss: 120.763
Epochs: 4489 | epoch avg. loss: 0.189 | test avg. loss: 120.815




Epochs: 4490 | epoch avg. loss: 0.163 | test avg. loss: 120.817
Epochs: 4491 | epoch avg. loss: 0.175 | test avg. loss: 120.801


  9%|▉         | 4495/50000 [06:27<1:03:52, 11.87it/s]

Epochs: 4492 | epoch avg. loss: 0.204 | test avg. loss: 120.684
Epochs: 4493 | epoch avg. loss: 0.226 | test avg. loss: 120.631
Epochs: 4494 | epoch avg. loss: 0.173 | test avg. loss: 120.714


  9%|▉         | 4497/50000 [06:27<1:07:20, 11.26it/s]

Epochs: 4495 | epoch avg. loss: 0.160 | test avg. loss: 120.840
Epochs: 4496 | epoch avg. loss: 0.199 | test avg. loss: 121.032
Epochs: 4497 | epoch avg. loss: 0.301 | test avg. loss: 121.394


  9%|▉         | 4499/50000 [06:27<1:09:09, 10.97it/s]

Epochs: 4498 | epoch avg. loss: 0.610 | test avg. loss: 121.333
Epochs: 4499 | epoch avg. loss: 0.603 | test avg. loss: 121.443


  9%|▉         | 4503/50000 [06:29<2:59:14,  4.23it/s]

Epochs: 4500 | epoch avg. loss: 0.416 | test avg. loss: 121.556
Epochs: 4501 | epoch avg. loss: 0.261 | test avg. loss: 121.703
Epochs: 4502 | epoch avg. loss: 0.228 | test avg. loss: 121.557


  9%|▉         | 4505/50000 [06:29<2:22:53,  5.31it/s]

Epochs: 4503 | epoch avg. loss: 0.230 | test avg. loss: 121.615
Epochs: 4504 | epoch avg. loss: 0.214 | test avg. loss: 121.451
Epochs: 4505 | epoch avg. loss: 0.197 | test avg. loss: 121.857


  9%|▉         | 4509/50000 [06:29<1:42:18,  7.41it/s]

Epochs: 4506 | epoch avg. loss: 0.339 | test avg. loss: 121.619
Epochs: 4507 | epoch avg. loss: 0.362 | test avg. loss: 121.785
Epochs: 4508 | epoch avg. loss: 0.195 | test avg. loss: 122.031


  9%|▉         | 4511/50000 [06:30<1:38:10,  7.72it/s]

Epochs: 4509 | epoch avg. loss: 0.165 | test avg. loss: 121.881
Epochs: 4510 | epoch avg. loss: 0.161 | test avg. loss: 122.033


  9%|▉         | 4513/50000 [06:30<1:31:08,  8.32it/s]

Epochs: 4511 | epoch avg. loss: 0.213 | test avg. loss: 121.910
Epochs: 4512 | epoch avg. loss: 0.198 | test avg. loss: 122.162
Epochs: 4513 | epoch avg. loss: 0.291 | test avg. loss: 121.996


  9%|▉         | 4517/50000 [06:30<1:12:17, 10.49it/s]

Epochs: 4514 | epoch avg. loss: 0.439 | test avg. loss: 121.915
Epochs: 4515 | epoch avg. loss: 0.182 | test avg. loss: 121.934
Epochs: 4516 | epoch avg. loss: 0.204 | test avg. loss: 121.743


  9%|▉         | 4519/50000 [06:30<1:07:54, 11.16it/s]

Epochs: 4517 | epoch avg. loss: 0.176 | test avg. loss: 121.366
Epochs: 4518 | epoch avg. loss: 0.150 | test avg. loss: 120.801
Epochs: 4519 | epoch avg. loss: 0.133 | test avg. loss: 120.625


  9%|▉         | 4521/50000 [06:30<1:05:19, 11.60it/s]

Epochs: 4520 | epoch avg. loss: 0.122 | test avg. loss: 120.614
Epochs: 4521 | epoch avg. loss: 0.120 | test avg. loss: 120.761


  9%|▉         | 4525/50000 [06:31<1:08:15, 11.10it/s]

Epochs: 4522 | epoch avg. loss: 0.118 | test avg. loss: 120.975
Epochs: 4523 | epoch avg. loss: 0.121 | test avg. loss: 121.065
Epochs: 4524 | epoch avg. loss: 0.123 | test avg. loss: 120.954


  9%|▉         | 4527/50000 [06:31<1:04:59, 11.66it/s]

Epochs: 4525 | epoch avg. loss: 0.121 | test avg. loss: 120.818
Epochs: 4526 | epoch avg. loss: 0.132 | test avg. loss: 120.586
Epochs: 4527 | epoch avg. loss: 0.220 | test avg. loss: 120.521


  9%|▉         | 4531/50000 [06:31<1:03:38, 11.91it/s]

Epochs: 4528 | epoch avg. loss: 0.135 | test avg. loss: 120.646
Epochs: 4529 | epoch avg. loss: 0.396 | test avg. loss: 120.679
Epochs: 4530 | epoch avg. loss: 0.258 | test avg. loss: 120.767


  9%|▉         | 4533/50000 [06:32<1:07:03, 11.30it/s]

Epochs: 4531 | epoch avg. loss: 0.226 | test avg. loss: 120.916
Epochs: 4532 | epoch avg. loss: 0.349 | test avg. loss: 121.392
Epochs: 4533 | epoch avg. loss: 0.450 | test avg. loss: 121.037


  9%|▉         | 4537/50000 [06:32<1:05:29, 11.57it/s]

Epochs: 4534 | epoch avg. loss: 0.357 | test avg. loss: 123.061
Epochs: 4535 | epoch avg. loss: 1.673 | test avg. loss: 121.575
Epochs: 4536 | epoch avg. loss: 0.619 | test avg. loss: 122.209


  9%|▉         | 4539/50000 [06:32<1:03:00, 12.03it/s]

Epochs: 4537 | epoch avg. loss: 0.218 | test avg. loss: 122.598
Epochs: 4538 | epoch avg. loss: 0.217 | test avg. loss: 122.743
Epochs: 4539 | epoch avg. loss: 0.248 | test avg. loss: 123.508


  9%|▉         | 4543/50000 [06:32<1:01:22, 12.34it/s]

Epochs: 4540 | epoch avg. loss: 0.245 | test avg. loss: 123.614
Epochs: 4541 | epoch avg. loss: 0.214 | test avg. loss: 124.003
Epochs: 4542 | epoch avg. loss: 0.177 | test avg. loss: 122.863


  9%|▉         | 4545/50000 [06:33<1:05:17, 11.60it/s]

Epochs: 4543 | epoch avg. loss: 0.143 | test avg. loss: 122.591
Epochs: 4544 | epoch avg. loss: 0.206 | test avg. loss: 122.135
Epochs: 4545 | epoch avg. loss: 0.110 | test avg. loss: 122.164


  9%|▉         | 4549/50000 [06:33<1:06:39, 11.37it/s]

Epochs: 4546 | epoch avg. loss: 0.181 | test avg. loss: 121.827
Epochs: 4547 | epoch avg. loss: 0.192 | test avg. loss: 121.685
Epochs: 4548 | epoch avg. loss: 0.152 | test avg. loss: 121.762


  9%|▉         | 4551/50000 [06:33<1:03:37, 11.91it/s]

Epochs: 4549 | epoch avg. loss: 0.181 | test avg. loss: 121.384
Epochs: 4550 | epoch avg. loss: 0.146 | test avg. loss: 121.288
Epochs: 4551 | epoch avg. loss: 0.132 | test avg. loss: 121.398


  9%|▉         | 4555/50000 [06:33<58:03, 13.05it/s]  

Epochs: 4552 | epoch avg. loss: 0.112 | test avg. loss: 121.429
Epochs: 4553 | epoch avg. loss: 0.141 | test avg. loss: 122.255
Epochs: 4554 | epoch avg. loss: 0.359 | test avg. loss: 121.748


  9%|▉         | 4557/50000 [06:33<57:18, 13.22it/s]

Epochs: 4555 | epoch avg. loss: 0.276 | test avg. loss: 122.164
Epochs: 4556 | epoch avg. loss: 0.133 | test avg. loss: 122.691
Epochs: 4557 | epoch avg. loss: 0.203 | test avg. loss: 122.871


  9%|▉         | 4561/50000 [06:34<56:11, 13.48it/s]

Epochs: 4558 | epoch avg. loss: 0.420 | test avg. loss: 123.715
Epochs: 4559 | epoch avg. loss: 0.481 | test avg. loss: 123.492
Epochs: 4560 | epoch avg. loss: 0.632 | test avg. loss: 123.707


  9%|▉         | 4565/50000 [06:34<53:34, 14.13it/s]

Epochs: 4561 | epoch avg. loss: 0.291 | test avg. loss: 123.487
Epochs: 4562 | epoch avg. loss: 0.244 | test avg. loss: 122.110
Epochs: 4563 | epoch avg. loss: 0.317 | test avg. loss: 121.847
Epochs: 4564 | epoch avg. loss: 0.255 | test avg. loss: 121.428


  9%|▉         | 4569/50000 [06:34<51:40, 14.65it/s]

Epochs: 4565 | epoch avg. loss: 0.201 | test avg. loss: 120.860
Epochs: 4566 | epoch avg. loss: 0.174 | test avg. loss: 120.971
Epochs: 4567 | epoch avg. loss: 0.205 | test avg. loss: 120.731
Epochs: 4568 | epoch avg. loss: 0.141 | test avg. loss: 120.516


  9%|▉         | 4571/50000 [06:34<51:31, 14.70it/s]

Epochs: 4569 | epoch avg. loss: 0.171 | test avg. loss: 121.129
Epochs: 4570 | epoch avg. loss: 0.328 | test avg. loss: 120.103
Epochs: 4571 | epoch avg. loss: 0.365 | test avg. loss: 120.313


  9%|▉         | 4575/50000 [06:35<56:29, 13.40it/s]

Epochs: 4572 | epoch avg. loss: 0.452 | test avg. loss: 120.821
Epochs: 4573 | epoch avg. loss: 0.188 | test avg. loss: 121.577
Epochs: 4574 | epoch avg. loss: 0.249 | test avg. loss: 122.673


  9%|▉         | 4577/50000 [06:35<55:33, 13.63it/s]

Epochs: 4575 | epoch avg. loss: 0.355 | test avg. loss: 122.801
Epochs: 4576 | epoch avg. loss: 0.148 | test avg. loss: 123.081
Epochs: 4577 | epoch avg. loss: 0.208 | test avg. loss: 123.694


  9%|▉         | 4581/50000 [06:35<52:50, 14.32it/s]

Epochs: 4578 | epoch avg. loss: 0.112 | test avg. loss: 123.744
Epochs: 4579 | epoch avg. loss: 0.383 | test avg. loss: 124.189
Epochs: 4580 | epoch avg. loss: 0.256 | test avg. loss: 123.723
Epochs: 4581 | epoch avg. loss: 0.357 | test avg. loss: 123.406


  9%|▉         | 4585/50000 [06:35<52:58, 14.29it/s]

Epochs: 4582 | epoch avg. loss: 0.237 | test avg. loss: 123.897
Epochs: 4583 | epoch avg. loss: 0.239 | test avg. loss: 123.852
Epochs: 4584 | epoch avg. loss: 0.157 | test avg. loss: 125.133


  9%|▉         | 4587/50000 [06:36<56:34, 13.38it/s]

Epochs: 4585 | epoch avg. loss: 0.309 | test avg. loss: 125.047
Epochs: 4586 | epoch avg. loss: 0.430 | test avg. loss: 125.014
Epochs: 4587 | epoch avg. loss: 0.282 | test avg. loss: 125.609


  9%|▉         | 4591/50000 [06:36<57:00, 13.27it/s]

Epochs: 4588 | epoch avg. loss: 0.450 | test avg. loss: 124.467
Epochs: 4589 | epoch avg. loss: 0.532 | test avg. loss: 125.106
Epochs: 4590 | epoch avg. loss: 0.574 | test avg. loss: 124.763


                                                    

Epochs: 4591 | epoch avg. loss: 0.333 | test avg. loss: 124.606
Epochs: 4592 | epoch avg. loss: 0.307 | test avg. loss: 124.683
Epochs: 4593 | epoch avg. loss: 0.484 | test avg. loss: 122.895


  9%|▉         | 4597/50000 [06:36<53:31, 14.14it/s]

Epochs: 4594 | epoch avg. loss: 0.462 | test avg. loss: 123.232
Epochs: 4595 | epoch avg. loss: 0.529 | test avg. loss: 124.321
Epochs: 4596 | epoch avg. loss: 0.413 | test avg. loss: 123.940


  9%|▉         | 4599/50000 [06:37<57:11, 13.23it/s]

Epochs: 4597 | epoch avg. loss: 0.272 | test avg. loss: 123.866
Epochs: 4598 | epoch avg. loss: 0.148 | test avg. loss: 123.084
Epochs: 4599 | epoch avg. loss: 0.386 | test avg. loss: 123.532


  9%|▉         | 4603/50000 [06:38<2:25:32,  5.20it/s]

Epochs: 4600 | epoch avg. loss: 0.293 | test avg. loss: 123.616
Epochs: 4601 | epoch avg. loss: 0.144 | test avg. loss: 123.894
Epochs: 4602 | epoch avg. loss: 0.140 | test avg. loss: 123.902


  9%|▉         | 4605/50000 [06:38<1:58:37,  6.38it/s]

Epochs: 4603 | epoch avg. loss: 0.168 | test avg. loss: 123.770
Epochs: 4604 | epoch avg. loss: 0.232 | test avg. loss: 125.267
Epochs: 4605 | epoch avg. loss: 0.453 | test avg. loss: 124.686


  9%|▉         | 4609/50000 [06:38<1:25:04,  8.89it/s]

Epochs: 4606 | epoch avg. loss: 0.722 | test avg. loss: 125.421
Epochs: 4607 | epoch avg. loss: 0.642 | test avg. loss: 124.224
Epochs: 4608 | epoch avg. loss: 0.335 | test avg. loss: 123.937


  9%|▉         | 4611/50000 [06:38<1:17:07,  9.81it/s]

Epochs: 4609 | epoch avg. loss: 0.447 | test avg. loss: 125.513
Epochs: 4610 | epoch avg. loss: 0.727 | test avg. loss: 125.117
Epochs: 4611 | epoch avg. loss: 0.869 | test avg. loss: 126.136


                                                      

Epochs: 4612 | epoch avg. loss: 0.416 | test avg. loss: 125.603
Epochs: 4613 | epoch avg. loss: 0.270 | test avg. loss: 124.370
Epochs: 4614 | epoch avg. loss: 0.635 | test avg. loss: 123.907


  9%|▉         | 4617/50000 [06:39<1:03:09, 11.97it/s]

Epochs: 4615 | epoch avg. loss: 0.289 | test avg. loss: 123.135
Epochs: 4616 | epoch avg. loss: 0.509 | test avg. loss: 123.095
Epochs: 4617 | epoch avg. loss: 0.279 | test avg. loss: 123.771


  9%|▉         | 4621/50000 [06:39<56:47, 13.32it/s]

Epochs: 4618 | epoch avg. loss: 0.436 | test avg. loss: 123.598
Epochs: 4619 | epoch avg. loss: 1.243 | test avg. loss: 122.759
Epochs: 4620 | epoch avg. loss: 0.278 | test avg. loss: 122.463
Epochs: 4621 | epoch avg. loss: 0.276 | test avg. loss: 122.116


  9%|▉         | 4625/50000 [06:39<55:10, 13.70it/s]

Epochs: 4622 | epoch avg. loss: 0.371 | test avg. loss: 123.438
Epochs: 4623 | epoch avg. loss: 0.879 | test avg. loss: 122.253
Epochs: 4624 | epoch avg. loss: 0.360 | test avg. loss: 122.834


  9%|▉         | 4629/50000 [06:40<52:49, 14.32it/s]

Epochs: 4625 | epoch avg. loss: 0.200 | test avg. loss: 123.329
Epochs: 4626 | epoch avg. loss: 0.160 | test avg. loss: 123.226
Epochs: 4627 | epoch avg. loss: 0.229 | test avg. loss: 123.998
Epochs: 4628 | epoch avg. loss: 0.687 | test avg. loss: 122.712


  9%|▉         | 4631/50000 [06:40<52:23, 14.43it/s]

Epochs: 4629 | epoch avg. loss: 0.430 | test avg. loss: 122.808
Epochs: 4630 | epoch avg. loss: 0.326 | test avg. loss: 123.182
Epochs: 4631 | epoch avg. loss: 0.320 | test avg. loss: 123.490


  9%|▉         | 4635/50000 [06:40<54:58, 13.75it/s]

Epochs: 4632 | epoch avg. loss: 0.736 | test avg. loss: 124.194
Epochs: 4633 | epoch avg. loss: 0.553 | test avg. loss: 122.950
Epochs: 4634 | epoch avg. loss: 0.438 | test avg. loss: 122.791


  9%|▉         | 4637/50000 [06:40<55:35, 13.60it/s]

Epochs: 4635 | epoch avg. loss: 0.278 | test avg. loss: 123.250
Epochs: 4636 | epoch avg. loss: 0.283 | test avg. loss: 123.108
Epochs: 4637 | epoch avg. loss: 0.392 | test avg. loss: 125.137


  9%|▉         | 4641/50000 [06:41<58:57, 12.82it/s]

Epochs: 4638 | epoch avg. loss: 0.828 | test avg. loss: 123.662
Epochs: 4639 | epoch avg. loss: 1.305 | test avg. loss: 123.393
Epochs: 4640 | epoch avg. loss: 0.470 | test avg. loss: 122.924


  9%|▉         | 4643/50000 [06:41<57:43, 13.10it/s]

Epochs: 4641 | epoch avg. loss: 0.322 | test avg. loss: 122.140
Epochs: 4642 | epoch avg. loss: 0.433 | test avg. loss: 123.569
Epochs: 4643 | epoch avg. loss: 1.124 | test avg. loss: 122.613


  9%|▉         | 4647/50000 [06:41<58:43, 12.87it/s]

Epochs: 4644 | epoch avg. loss: 0.340 | test avg. loss: 123.630
Epochs: 4645 | epoch avg. loss: 0.337 | test avg. loss: 125.418
Epochs: 4646 | epoch avg. loss: 0.550 | test avg. loss: 124.740


  9%|▉         | 4651/50000 [06:41<53:46, 14.06it/s]

Epochs: 4647 | epoch avg. loss: 1.101 | test avg. loss: 125.562
Epochs: 4648 | epoch avg. loss: 1.569 | test avg. loss: 123.324
Epochs: 4649 | epoch avg. loss: 0.804 | test avg. loss: 122.887
Epochs: 4650 | epoch avg. loss: 0.472 | test avg. loss: 123.570


  9%|▉         | 4655/50000 [06:42<51:57, 14.55it/s]

Epochs: 4651 | epoch avg. loss: 0.492 | test avg. loss: 123.418
Epochs: 4652 | epoch avg. loss: 1.256 | test avg. loss: 124.407
Epochs: 4653 | epoch avg. loss: 0.859 | test avg. loss: 123.076
Epochs: 4654 | epoch avg. loss: 0.187 | test avg. loss: 122.676


  9%|▉         | 4659/50000 [06:42<50:22, 15.00it/s]

Epochs: 4655 | epoch avg. loss: 0.174 | test avg. loss: 122.778
Epochs: 4656 | epoch avg. loss: 0.168 | test avg. loss: 122.910
Epochs: 4657 | epoch avg. loss: 0.137 | test avg. loss: 123.117
Epochs: 4658 | epoch avg. loss: 0.219 | test avg. loss: 123.333


  9%|▉         | 4661/50000 [06:42<49:31, 15.26it/s]

Epochs: 4659 | epoch avg. loss: 0.142 | test avg. loss: 123.684
Epochs: 4660 | epoch avg. loss: 0.176 | test avg. loss: 123.205
Epochs: 4661 | epoch avg. loss: 0.587 | test avg. loss: 123.651


  9%|▉         | 4665/50000 [06:42<50:25, 14.99it/s]

Epochs: 4662 | epoch avg. loss: 0.394 | test avg. loss: 124.134
Epochs: 4663 | epoch avg. loss: 0.306 | test avg. loss: 124.462
Epochs: 4664 | epoch avg. loss: 0.565 | test avg. loss: 125.938
Epochs: 4665 | epoch avg. loss: 0.714 | test avg. loss: 125.068


                                                    

Epochs: 4666 | epoch avg. loss: 0.254 | test avg. loss: 124.718
Epochs: 4667 | epoch avg. loss: 0.290 | test avg. loss: 125.026
Epochs: 4668 | epoch avg. loss: 0.172 | test avg. loss: 124.153




Epochs: 4669 | epoch avg. loss: 0.267 | test avg. loss: 124.025
Epochs: 4670 | epoch avg. loss: 0.134 | test avg. loss: 124.057
Epochs: 4671 | epoch avg. loss: 0.151 | test avg. loss: 123.672


  9%|▉         | 4675/50000 [06:43<50:23, 14.99it/s]

Epochs: 4672 | epoch avg. loss: 0.487 | test avg. loss: 123.970
Epochs: 4673 | epoch avg. loss: 0.126 | test avg. loss: 124.447
Epochs: 4674 | epoch avg. loss: 0.174 | test avg. loss: 124.215
Epochs: 4675 | epoch avg. loss: 0.504 | test avg. loss: 125.536


  9%|▉         | 4679/50000 [06:43<56:30, 13.37it/s]

Epochs: 4676 | epoch avg. loss: 0.548 | test avg. loss: 124.345
Epochs: 4677 | epoch avg. loss: 0.336 | test avg. loss: 124.332
Epochs: 4678 | epoch avg. loss: 0.371 | test avg. loss: 126.116


  9%|▉         | 4681/50000 [06:43<58:57, 12.81it/s]

Epochs: 4679 | epoch avg. loss: 0.788 | test avg. loss: 124.236
Epochs: 4680 | epoch avg. loss: 0.854 | test avg. loss: 124.407
Epochs: 4681 | epoch avg. loss: 0.449 | test avg. loss: 125.575


  9%|▉         | 4685/50000 [06:44<59:28, 12.70it/s]

Epochs: 4682 | epoch avg. loss: 0.543 | test avg. loss: 125.199
Epochs: 4683 | epoch avg. loss: 1.254 | test avg. loss: 127.261
Epochs: 4684 | epoch avg. loss: 1.148 | test avg. loss: 124.424


  9%|▉         | 4687/50000 [06:44<58:59, 12.80it/s]

Epochs: 4685 | epoch avg. loss: 0.793 | test avg. loss: 124.763
Epochs: 4686 | epoch avg. loss: 1.313 | test avg. loss: 125.420
Epochs: 4687 | epoch avg. loss: 0.907 | test avg. loss: 124.636


  9%|▉         | 4691/50000 [06:44<59:25, 12.71it/s]

Epochs: 4688 | epoch avg. loss: 0.684 | test avg. loss: 126.657
Epochs: 4689 | epoch avg. loss: 0.608 | test avg. loss: 124.778
Epochs: 4690 | epoch avg. loss: 0.704 | test avg. loss: 124.227


  9%|▉         | 4693/50000 [06:44<59:25, 12.71it/s]

Epochs: 4691 | epoch avg. loss: 0.377 | test avg. loss: 124.869
Epochs: 4692 | epoch avg. loss: 0.357 | test avg. loss: 124.192
Epochs: 4693 | epoch avg. loss: 0.243 | test avg. loss: 125.484


  9%|▉         | 4697/50000 [06:45<57:06, 13.22it/s]

Epochs: 4694 | epoch avg. loss: 0.543 | test avg. loss: 124.382
Epochs: 4695 | epoch avg. loss: 0.480 | test avg. loss: 125.046
Epochs: 4696 | epoch avg. loss: 0.865 | test avg. loss: 126.313


  9%|▉         | 4699/50000 [06:45<56:53, 13.27it/s]

Epochs: 4697 | epoch avg. loss: 0.811 | test avg. loss: 127.541
Epochs: 4698 | epoch avg. loss: 2.191 | test avg. loss: 133.437
Epochs: 4699 | epoch avg. loss: 5.184 | test avg. loss: 129.119


  9%|▉         | 4703/50000 [06:46<2:42:01,  4.66it/s]

Epochs: 4700 | epoch avg. loss: 5.374 | test avg. loss: 126.914
Epochs: 4701 | epoch avg. loss: 2.190 | test avg. loss: 130.295
Epochs: 4702 | epoch avg. loss: 2.686 | test avg. loss: 126.624


  9%|▉         | 4705/50000 [06:47<2:11:46,  5.73it/s]

Epochs: 4703 | epoch avg. loss: 2.730 | test avg. loss: 128.343
Epochs: 4704 | epoch avg. loss: 3.036 | test avg. loss: 125.113
Epochs: 4705 | epoch avg. loss: 2.069 | test avg. loss: 125.964


  9%|▉         | 4709/50000 [06:47<1:40:26,  7.52it/s]

Epochs: 4706 | epoch avg. loss: 1.496 | test avg. loss: 130.523
Epochs: 4707 | epoch avg. loss: 2.580 | test avg. loss: 133.354
Epochs: 4708 | epoch avg. loss: 5.242 | test avg. loss: 131.450


  9%|▉         | 4711/50000 [06:47<1:30:08,  8.37it/s]

Epochs: 4709 | epoch avg. loss: 4.955 | test avg. loss: 126.531
Epochs: 4710 | epoch avg. loss: 3.357 | test avg. loss: 124.314
Epochs: 4711 | epoch avg. loss: 1.393 | test avg. loss: 124.983


  9%|▉         | 4713/50000 [06:47<1:22:25,  9.16it/s]

Epochs: 4712 | epoch avg. loss: 1.318 | test avg. loss: 125.818
Epochs: 4713 | epoch avg. loss: 1.366 | test avg. loss: 126.132


                                                      

Epochs: 4714 | epoch avg. loss: 1.010 | test avg. loss: 126.103
Epochs: 4715 | epoch avg. loss: 1.170 | test avg. loss: 126.115


  9%|▉         | 4719/50000 [06:48<1:13:34, 10.26it/s]

Epochs: 4716 | epoch avg. loss: 0.398 | test avg. loss: 126.180
Epochs: 4717 | epoch avg. loss: 0.523 | test avg. loss: 126.603
Epochs: 4718 | epoch avg. loss: 0.410 | test avg. loss: 127.820


  9%|▉         | 4723/50000 [06:48<1:01:31, 12.27it/s]

Epochs: 4719 | epoch avg. loss: 0.528 | test avg. loss: 128.833
Epochs: 4720 | epoch avg. loss: 0.964 | test avg. loss: 130.362
Epochs: 4721 | epoch avg. loss: 0.845 | test avg. loss: 130.351
Epochs: 4722 | epoch avg. loss: 0.776 | test avg. loss: 131.134


  9%|▉         | 4725/50000 [06:48<58:38, 12.87it/s]

Epochs: 4723 | epoch avg. loss: 0.791 | test avg. loss: 131.126
Epochs: 4724 | epoch avg. loss: 0.378 | test avg. loss: 131.386
Epochs: 4725 | epoch avg. loss: 0.491 | test avg. loss: 131.457


  9%|▉         | 4729/50000 [06:49<1:02:23, 12.09it/s]

Epochs: 4726 | epoch avg. loss: 0.406 | test avg. loss: 130.644
Epochs: 4727 | epoch avg. loss: 0.462 | test avg. loss: 131.943
Epochs: 4728 | epoch avg. loss: 0.977 | test avg. loss: 130.311




Epochs: 4729 | epoch avg. loss: 0.914 | test avg. loss: 131.078
Epochs: 4730 | epoch avg. loss: 0.394 | test avg. loss: 130.955
Epochs: 4731 | epoch avg. loss: 0.336 | test avg. loss: 131.291


  9%|▉         | 4735/50000 [06:49<55:55, 13.49it/s]

Epochs: 4732 | epoch avg. loss: 0.241 | test avg. loss: 131.374
Epochs: 4733 | epoch avg. loss: 0.175 | test avg. loss: 131.133
Epochs: 4734 | epoch avg. loss: 0.129 | test avg. loss: 131.054


  9%|▉         | 4737/50000 [06:49<53:55, 13.99it/s]

Epochs: 4735 | epoch avg. loss: 0.123 | test avg. loss: 130.838
Epochs: 4736 | epoch avg. loss: 0.126 | test avg. loss: 130.822
Epochs: 4737 | epoch avg. loss: 0.134 | test avg. loss: 130.623
Epochs: 4738 | epoch avg. loss: 0.134 | test avg. loss: 130.363


  9%|▉         | 4741/50000 [06:50<1:00:34, 12.45it/s]

Epochs: 4739 | epoch avg. loss: 0.116 | test avg. loss: 129.960
Epochs: 4740 | epoch avg. loss: 0.135 | test avg. loss: 130.511
Epochs: 4741 | epoch avg. loss: 0.244 | test avg. loss: 129.876


  9%|▉         | 4745/50000 [06:50<1:02:33, 12.06it/s]

Epochs: 4742 | epoch avg. loss: 0.681 | test avg. loss: 130.868
Epochs: 4743 | epoch avg. loss: 0.482 | test avg. loss: 130.149
Epochs: 4744 | epoch avg. loss: 0.823 | test avg. loss: 130.645


  9%|▉         | 4747/50000 [06:50<1:06:59, 11.26it/s]

Epochs: 4745 | epoch avg. loss: 0.407 | test avg. loss: 131.016
Epochs: 4746 | epoch avg. loss: 0.351 | test avg. loss: 130.701
Epochs: 4747 | epoch avg. loss: 0.484 | test avg. loss: 131.507


 10%|▉         | 4751/50000 [06:50<1:00:52, 12.39it/s]

Epochs: 4748 | epoch avg. loss: 0.619 | test avg. loss: 129.996
Epochs: 4749 | epoch avg. loss: 0.412 | test avg. loss: 130.839
Epochs: 4750 | epoch avg. loss: 0.392 | test avg. loss: 130.511


 10%|▉         | 4753/50000 [06:51<1:02:16, 12.11it/s]

Epochs: 4751 | epoch avg. loss: 0.289 | test avg. loss: 131.089
Epochs: 4752 | epoch avg. loss: 0.229 | test avg. loss: 131.395
Epochs: 4753 | epoch avg. loss: 0.232 | test avg. loss: 130.624


 10%|▉         | 4757/50000 [06:51<1:00:14, 12.52it/s]

Epochs: 4754 | epoch avg. loss: 0.371 | test avg. loss: 130.637
Epochs: 4755 | epoch avg. loss: 0.178 | test avg. loss: 130.573
Epochs: 4756 | epoch avg. loss: 0.172 | test avg. loss: 130.379


 10%|▉         | 4759/50000 [06:51<59:37, 12.65it/s]

Epochs: 4757 | epoch avg. loss: 0.307 | test avg. loss: 131.134
Epochs: 4758 | epoch avg. loss: 0.120 | test avg. loss: 130.642
Epochs: 4759 | epoch avg. loss: 0.277 | test avg. loss: 131.224


 10%|▉         | 4763/50000 [06:51<58:09, 12.96it/s]  

Epochs: 4760 | epoch avg. loss: 0.246 | test avg. loss: 130.893
Epochs: 4761 | epoch avg. loss: 0.272 | test avg. loss: 130.331
Epochs: 4762 | epoch avg. loss: 0.152 | test avg. loss: 129.709


 10%|▉         | 4765/50000 [06:52<59:03, 12.77it/s]

Epochs: 4763 | epoch avg. loss: 0.182 | test avg. loss: 129.381
Epochs: 4764 | epoch avg. loss: 0.251 | test avg. loss: 130.881
Epochs: 4765 | epoch avg. loss: 0.303 | test avg. loss: 130.704


 10%|▉         | 4769/50000 [06:52<56:39, 13.30it/s]

Epochs: 4766 | epoch avg. loss: 0.227 | test avg. loss: 131.094
Epochs: 4767 | epoch avg. loss: 0.233 | test avg. loss: 130.423
Epochs: 4768 | epoch avg. loss: 0.177 | test avg. loss: 130.109


 10%|▉         | 4771/50000 [06:52<56:17, 13.39it/s]

Epochs: 4769 | epoch avg. loss: 0.214 | test avg. loss: 131.293
Epochs: 4770 | epoch avg. loss: 0.423 | test avg. loss: 130.245
Epochs: 4771 | epoch avg. loss: 0.390 | test avg. loss: 130.930


 10%|▉         | 4775/50000 [06:52<53:31, 14.08it/s]

Epochs: 4772 | epoch avg. loss: 0.522 | test avg. loss: 130.992
Epochs: 4773 | epoch avg. loss: 0.354 | test avg. loss: 130.023
Epochs: 4774 | epoch avg. loss: 0.480 | test avg. loss: 132.009


 10%|▉         | 4777/50000 [06:52<53:26, 14.10it/s]

Epochs: 4775 | epoch avg. loss: 0.850 | test avg. loss: 130.384
Epochs: 4776 | epoch avg. loss: 0.867 | test avg. loss: 130.533
Epochs: 4777 | epoch avg. loss: 0.336 | test avg. loss: 130.321


 10%|▉         | 4781/50000 [06:53<58:52, 12.80it/s]

Epochs: 4778 | epoch avg. loss: 0.338 | test avg. loss: 130.313
Epochs: 4779 | epoch avg. loss: 0.529 | test avg. loss: 131.163
Epochs: 4780 | epoch avg. loss: 0.227 | test avg. loss: 130.587


 10%|▉         | 4783/50000 [06:53<57:29, 13.11it/s]

Epochs: 4781 | epoch avg. loss: 0.219 | test avg. loss: 131.272
Epochs: 4782 | epoch avg. loss: 0.363 | test avg. loss: 130.528
Epochs: 4783 | epoch avg. loss: 0.288 | test avg. loss: 130.472


 10%|▉         | 4787/50000 [06:53<57:14, 13.16it/s]  

Epochs: 4784 | epoch avg. loss: 0.248 | test avg. loss: 131.171
Epochs: 4785 | epoch avg. loss: 0.271 | test avg. loss: 130.980
Epochs: 4786 | epoch avg. loss: 0.423 | test avg. loss: 131.269


 10%|▉         | 4789/50000 [06:53<57:30, 13.10it/s]

Epochs: 4787 | epoch avg. loss: 0.398 | test avg. loss: 129.608
Epochs: 4788 | epoch avg. loss: 0.366 | test avg. loss: 128.788
Epochs: 4789 | epoch avg. loss: 0.229 | test avg. loss: 128.946


 10%|▉         | 4793/50000 [06:54<1:04:10, 11.74it/s]

Epochs: 4790 | epoch avg. loss: 0.255 | test avg. loss: 128.410
Epochs: 4791 | epoch avg. loss: 0.352 | test avg. loss: 129.608
Epochs: 4792 | epoch avg. loss: 0.428 | test avg. loss: 128.602


 10%|▉         | 4797/50000 [06:54<58:48, 12.81it/s]  

Epochs: 4793 | epoch avg. loss: 0.350 | test avg. loss: 128.400
Epochs: 4794 | epoch avg. loss: 0.328 | test avg. loss: 129.271
Epochs: 4795 | epoch avg. loss: 0.390 | test avg. loss: 127.647
Epochs: 4796 | epoch avg. loss: 0.404 | test avg. loss: 128.383


 10%|▉         | 4799/50000 [06:54<59:01, 12.76it/s]

Epochs: 4797 | epoch avg. loss: 0.109 | test avg. loss: 129.545
Epochs: 4798 | epoch avg. loss: 0.102 | test avg. loss: 130.036
Epochs: 4799 | epoch avg. loss: 0.092 | test avg. loss: 130.062


 10%|▉         | 4803/50000 [06:56<2:31:42,  4.97it/s]

Epochs: 4800 | epoch avg. loss: 0.105 | test avg. loss: 130.318
Epochs: 4801 | epoch avg. loss: 0.147 | test avg. loss: 130.607
Epochs: 4802 | epoch avg. loss: 0.154 | test avg. loss: 130.462


                                                      

Epochs: 4803 | epoch avg. loss: 0.086 | test avg. loss: 130.764
Epochs: 4804 | epoch avg. loss: 0.105 | test avg. loss: 131.009
Epochs: 4805 | epoch avg. loss: 0.096 | test avg. loss: 130.563


 10%|▉         | 4809/50000 [06:56<1:26:42,  8.69it/s]

Epochs: 4806 | epoch avg. loss: 0.137 | test avg. loss: 130.485
Epochs: 4807 | epoch avg. loss: 0.079 | test avg. loss: 130.444
Epochs: 4808 | epoch avg. loss: 0.085 | test avg. loss: 130.908


 10%|▉         | 4811/50000 [06:56<1:19:25,  9.48it/s]

Epochs: 4809 | epoch avg. loss: 0.098 | test avg. loss: 131.270
Epochs: 4810 | epoch avg. loss: 0.113 | test avg. loss: 131.070
Epochs: 4811 | epoch avg. loss: 0.090 | test avg. loss: 130.998


 10%|▉         | 4813/50000 [06:57<1:14:30, 10.11it/s]

Epochs: 4812 | epoch avg. loss: 0.075 | test avg. loss: 131.086
Epochs: 4813 | epoch avg. loss: 0.076 | test avg. loss: 131.407
Epochs: 4814 | epoch avg. loss: 0.102 | test avg. loss: 130.860


                                                      

Epochs: 4815 | epoch avg. loss: 0.114 | test avg. loss: 131.179
Epochs: 4816 | epoch avg. loss: 0.122 | test avg. loss: 130.896
Epochs: 4817 | epoch avg. loss: 0.109 | test avg. loss: 131.293


 10%|▉         | 4821/50000 [06:57<1:00:39, 12.41it/s]

Epochs: 4818 | epoch avg. loss: 0.130 | test avg. loss: 131.422
Epochs: 4819 | epoch avg. loss: 0.124 | test avg. loss: 130.723
Epochs: 4820 | epoch avg. loss: 0.214 | test avg. loss: 130.516


 10%|▉         | 4823/50000 [06:57<58:03, 12.97it/s]

Epochs: 4821 | epoch avg. loss: 0.089 | test avg. loss: 130.006
Epochs: 4822 | epoch avg. loss: 0.160 | test avg. loss: 130.612
Epochs: 4823 | epoch avg. loss: 0.103 | test avg. loss: 130.849


 10%|▉         | 4827/50000 [06:57<57:53, 13.00it/s]

Epochs: 4824 | epoch avg. loss: 0.180 | test avg. loss: 130.635
Epochs: 4825 | epoch avg. loss: 0.145 | test avg. loss: 131.120
Epochs: 4826 | epoch avg. loss: 0.262 | test avg. loss: 130.341


 10%|▉         | 4829/50000 [06:58<1:03:22, 11.88it/s]

Epochs: 4827 | epoch avg. loss: 0.121 | test avg. loss: 130.829
Epochs: 4828 | epoch avg. loss: 0.144 | test avg. loss: 131.030
Epochs: 4829 | epoch avg. loss: 0.117 | test avg. loss: 130.784


 10%|▉         | 4833/50000 [06:58<59:29, 12.65it/s]

Epochs: 4830 | epoch avg. loss: 0.127 | test avg. loss: 131.182
Epochs: 4831 | epoch avg. loss: 0.215 | test avg. loss: 130.586
Epochs: 4832 | epoch avg. loss: 0.114 | test avg. loss: 130.492
Epochs: 4833 | epoch avg. loss: 0.160 | test avg. loss: 131.170


 10%|▉         | 4837/50000 [06:58<53:48, 13.99it/s]

Epochs: 4834 | epoch avg. loss: 0.442 | test avg. loss: 130.055
Epochs: 4835 | epoch avg. loss: 0.097 | test avg. loss: 130.657
Epochs: 4836 | epoch avg. loss: 0.105 | test avg. loss: 130.583
Epochs: 4837 | epoch avg. loss: 0.172 | test avg. loss: 131.112


 10%|▉         | 4841/50000 [06:59<59:24, 12.67it/s]

Epochs: 4838 | epoch avg. loss: 0.147 | test avg. loss: 131.553
Epochs: 4839 | epoch avg. loss: 0.173 | test avg. loss: 131.952
Epochs: 4840 | epoch avg. loss: 0.280 | test avg. loss: 133.262




Epochs: 4841 | epoch avg. loss: 0.502 | test avg. loss: 131.716
Epochs: 4842 | epoch avg. loss: 0.235 | test avg. loss: 131.938
Epochs: 4843 | epoch avg. loss: 0.165 | test avg. loss: 131.982


 10%|▉         | 4847/50000 [06:59<55:22, 13.59it/s]

Epochs: 4844 | epoch avg. loss: 0.177 | test avg. loss: 131.371
Epochs: 4845 | epoch avg. loss: 0.205 | test avg. loss: 131.619
Epochs: 4846 | epoch avg. loss: 0.180 | test avg. loss: 131.245


 10%|▉         | 4849/50000 [06:59<54:30, 13.81it/s]

Epochs: 4847 | epoch avg. loss: 0.136 | test avg. loss: 132.575
Epochs: 4848 | epoch avg. loss: 0.190 | test avg. loss: 133.096
Epochs: 4849 | epoch avg. loss: 0.203 | test avg. loss: 133.144


 10%|▉         | 4853/50000 [06:59<56:26, 13.33it/s]

Epochs: 4850 | epoch avg. loss: 0.182 | test avg. loss: 132.561
Epochs: 4851 | epoch avg. loss: 0.279 | test avg. loss: 131.416
Epochs: 4852 | epoch avg. loss: 0.209 | test avg. loss: 131.932


 10%|▉         | 4855/50000 [07:00<59:11, 12.71it/s]

Epochs: 4853 | epoch avg. loss: 0.187 | test avg. loss: 132.163
Epochs: 4854 | epoch avg. loss: 0.128 | test avg. loss: 132.064
Epochs: 4855 | epoch avg. loss: 0.219 | test avg. loss: 132.032


 10%|▉         | 4859/50000 [07:00<56:33, 13.30it/s]

Epochs: 4856 | epoch avg. loss: 0.109 | test avg. loss: 130.490
Epochs: 4857 | epoch avg. loss: 0.249 | test avg. loss: 130.215
Epochs: 4858 | epoch avg. loss: 0.179 | test avg. loss: 131.464


 10%|▉         | 4861/50000 [07:00<56:58, 13.21it/s]

Epochs: 4859 | epoch avg. loss: 0.380 | test avg. loss: 130.219
Epochs: 4860 | epoch avg. loss: 0.288 | test avg. loss: 130.459
Epochs: 4861 | epoch avg. loss: 0.249 | test avg. loss: 131.310


 10%|▉         | 4865/50000 [07:00<56:56, 13.21it/s]

Epochs: 4862 | epoch avg. loss: 0.304 | test avg. loss: 130.999
Epochs: 4863 | epoch avg. loss: 0.169 | test avg. loss: 131.692
Epochs: 4864 | epoch avg. loss: 0.235 | test avg. loss: 131.391


 10%|▉         | 4867/50000 [07:01<59:20, 12.68it/s]

Epochs: 4865 | epoch avg. loss: 0.155 | test avg. loss: 130.828
Epochs: 4866 | epoch avg. loss: 0.172 | test avg. loss: 131.367
Epochs: 4867 | epoch avg. loss: 0.252 | test avg. loss: 130.638


 10%|▉         | 4871/50000 [07:01<59:28, 12.65it/s]

Epochs: 4868 | epoch avg. loss: 0.412 | test avg. loss: 131.679
Epochs: 4869 | epoch avg. loss: 0.158 | test avg. loss: 132.500
Epochs: 4870 | epoch avg. loss: 0.155 | test avg. loss: 131.901


 10%|▉         | 4873/50000 [07:01<58:16, 12.91it/s]

Epochs: 4871 | epoch avg. loss: 0.194 | test avg. loss: 132.156
Epochs: 4872 | epoch avg. loss: 0.188 | test avg. loss: 131.405
Epochs: 4873 | epoch avg. loss: 0.187 | test avg. loss: 132.026


 10%|▉         | 4877/50000 [07:01<1:02:44, 11.99it/s]

Epochs: 4874 | epoch avg. loss: 0.337 | test avg. loss: 132.566
Epochs: 4875 | epoch avg. loss: 0.231 | test avg. loss: 131.821
Epochs: 4876 | epoch avg. loss: 0.330 | test avg. loss: 132.652


 10%|▉         | 4879/50000 [07:02<1:00:58, 12.33it/s]

Epochs: 4877 | epoch avg. loss: 0.471 | test avg. loss: 131.283
Epochs: 4878 | epoch avg. loss: 0.784 | test avg. loss: 131.774
Epochs: 4879 | epoch avg. loss: 0.512 | test avg. loss: 133.738


 10%|▉         | 4883/50000 [07:02<1:05:57, 11.40it/s]

Epochs: 4880 | epoch avg. loss: 0.882 | test avg. loss: 130.898
Epochs: 4881 | epoch avg. loss: 1.144 | test avg. loss: 131.727
Epochs: 4882 | epoch avg. loss: 0.910 | test avg. loss: 132.954


 10%|▉         | 4885/50000 [07:02<1:09:00, 10.90it/s]

Epochs: 4883 | epoch avg. loss: 0.670 | test avg. loss: 132.811
Epochs: 4884 | epoch avg. loss: 1.391 | test avg. loss: 133.715


 10%|▉         | 4887/50000 [07:02<1:10:08, 10.72it/s]

Epochs: 4885 | epoch avg. loss: 1.732 | test avg. loss: 130.577
Epochs: 4886 | epoch avg. loss: 0.833 | test avg. loss: 130.419
Epochs: 4887 | epoch avg. loss: 0.329 | test avg. loss: 130.681


 10%|▉         | 4891/50000 [07:03<1:10:04, 10.73it/s]

Epochs: 4888 | epoch avg. loss: 0.563 | test avg. loss: 130.265
Epochs: 4889 | epoch avg. loss: 0.453 | test avg. loss: 132.501
Epochs: 4890 | epoch avg. loss: 0.807 | test avg. loss: 132.472


 10%|▉         | 4893/50000 [07:03<1:08:10, 11.03it/s]

Epochs: 4891 | epoch avg. loss: 1.129 | test avg. loss: 134.801
Epochs: 4892 | epoch avg. loss: 0.646 | test avg. loss: 134.763
Epochs: 4893 | epoch avg. loss: 0.549 | test avg. loss: 134.187


 10%|▉         | 4897/50000 [07:03<1:09:03, 10.89it/s]

Epochs: 4894 | epoch avg. loss: 0.457 | test avg. loss: 134.063
Epochs: 4895 | epoch avg. loss: 0.662 | test avg. loss: 130.885
Epochs: 4896 | epoch avg. loss: 0.338 | test avg. loss: 131.292


 10%|▉         | 4899/50000 [07:03<1:07:49, 11.08it/s]

Epochs: 4897 | epoch avg. loss: 0.557 | test avg. loss: 129.543
Epochs: 4898 | epoch avg. loss: 0.290 | test avg. loss: 128.968
Epochs: 4899 | epoch avg. loss: 0.270 | test avg. loss: 129.613


 10%|▉         | 4903/50000 [07:05<2:54:30,  4.31it/s]

Epochs: 4900 | epoch avg. loss: 0.117 | test avg. loss: 130.105
Epochs: 4901 | epoch avg. loss: 0.140 | test avg. loss: 130.792
Epochs: 4902 | epoch avg. loss: 0.118 | test avg. loss: 130.467


 10%|▉         | 4905/50000 [07:05<2:18:10,  5.44it/s]

Epochs: 4903 | epoch avg. loss: 0.123 | test avg. loss: 130.734
Epochs: 4904 | epoch avg. loss: 0.245 | test avg. loss: 129.813
Epochs: 4905 | epoch avg. loss: 0.156 | test avg. loss: 129.163


 10%|▉         | 4907/50000 [07:05<1:54:28,  6.57it/s]

Epochs: 4906 | epoch avg. loss: 0.186 | test avg. loss: 130.098
Epochs: 4907 | epoch avg. loss: 0.238 | test avg. loss: 129.464


 10%|▉         | 4911/50000 [07:06<1:29:07,  8.43it/s]

Epochs: 4908 | epoch avg. loss: 0.214 | test avg. loss: 130.154
Epochs: 4909 | epoch avg. loss: 0.169 | test avg. loss: 131.847
Epochs: 4910 | epoch avg. loss: 0.306 | test avg. loss: 130.799


 10%|▉         | 4913/50000 [07:06<1:19:38,  9.44it/s]

Epochs: 4911 | epoch avg. loss: 0.401 | test avg. loss: 131.166
Epochs: 4912 | epoch avg. loss: 0.143 | test avg. loss: 131.656
Epochs: 4913 | epoch avg. loss: 0.136 | test avg. loss: 131.107


 10%|▉         | 4917/50000 [07:06<1:07:44, 11.09it/s]

Epochs: 4914 | epoch avg. loss: 0.177 | test avg. loss: 131.601
Epochs: 4915 | epoch avg. loss: 0.140 | test avg. loss: 131.954
Epochs: 4916 | epoch avg. loss: 0.169 | test avg. loss: 131.627


 10%|▉         | 4919/50000 [07:06<1:06:44, 11.26it/s]

Epochs: 4917 | epoch avg. loss: 0.082 | test avg. loss: 132.084
Epochs: 4918 | epoch avg. loss: 0.077 | test avg. loss: 132.671
Epochs: 4919 | epoch avg. loss: 0.091 | test avg. loss: 132.978


 10%|▉         | 4923/50000 [07:07<1:08:57, 10.89it/s]

Epochs: 4920 | epoch avg. loss: 0.080 | test avg. loss: 132.951
Epochs: 4921 | epoch avg. loss: 0.084 | test avg. loss: 132.573
Epochs: 4922 | epoch avg. loss: 0.080 | test avg. loss: 133.330


 10%|▉         | 4925/50000 [07:07<1:09:42, 10.78it/s]

Epochs: 4923 | epoch avg. loss: 0.165 | test avg. loss: 132.624
Epochs: 4924 | epoch avg. loss: 0.190 | test avg. loss: 132.762
Epochs: 4925 | epoch avg. loss: 0.087 | test avg. loss: 132.360


 10%|▉         | 4929/50000 [07:07<1:03:48, 11.77it/s]

Epochs: 4926 | epoch avg. loss: 0.128 | test avg. loss: 132.314
Epochs: 4927 | epoch avg. loss: 0.119 | test avg. loss: 133.372
Epochs: 4928 | epoch avg. loss: 0.179 | test avg. loss: 132.535


 10%|▉         | 4931/50000 [07:07<1:07:04, 11.20it/s]

Epochs: 4929 | epoch avg. loss: 0.127 | test avg. loss: 132.208
Epochs: 4930 | epoch avg. loss: 0.136 | test avg. loss: 133.448


 10%|▉         | 4933/50000 [07:08<1:10:27, 10.66it/s]

Epochs: 4931 | epoch avg. loss: 0.419 | test avg. loss: 132.050
Epochs: 4932 | epoch avg. loss: 0.233 | test avg. loss: 132.990
Epochs: 4933 | epoch avg. loss: 0.110 | test avg. loss: 133.296


 10%|▉         | 4937/50000 [07:08<1:08:41, 10.93it/s]

Epochs: 4934 | epoch avg. loss: 0.086 | test avg. loss: 133.099
Epochs: 4935 | epoch avg. loss: 0.112 | test avg. loss: 133.480
Epochs: 4936 | epoch avg. loss: 0.161 | test avg. loss: 133.851


 10%|▉         | 4939/50000 [07:08<1:09:32, 10.80it/s]

Epochs: 4937 | epoch avg. loss: 0.088 | test avg. loss: 134.012
Epochs: 4938 | epoch avg. loss: 0.083 | test avg. loss: 134.667
Epochs: 4939 | epoch avg. loss: 0.089 | test avg. loss: 134.763


 10%|▉         | 4943/50000 [07:08<1:07:09, 11.18it/s]

Epochs: 4940 | epoch avg. loss: 0.092 | test avg. loss: 134.333
Epochs: 4941 | epoch avg. loss: 0.067 | test avg. loss: 134.348
Epochs: 4942 | epoch avg. loss: 0.087 | test avg. loss: 134.354


                                                      

Epochs: 4943 | epoch avg. loss: 0.063 | test avg. loss: 134.563
Epochs: 4944 | epoch avg. loss: 0.076 | test avg. loss: 135.026


 10%|▉         | 4947/50000 [07:09<1:12:21, 10.38it/s]

Epochs: 4945 | epoch avg. loss: 0.102 | test avg. loss: 134.475
Epochs: 4946 | epoch avg. loss: 0.267 | test avg. loss: 134.287
Epochs: 4947 | epoch avg. loss: 0.125 | test avg. loss: 135.144


 10%|▉         | 4951/50000 [07:09<1:05:02, 11.54it/s]

Epochs: 4948 | epoch avg. loss: 0.212 | test avg. loss: 134.156
Epochs: 4949 | epoch avg. loss: 0.240 | test avg. loss: 134.980
Epochs: 4950 | epoch avg. loss: 0.289 | test avg. loss: 134.867


 10%|▉         | 4953/50000 [07:09<1:07:17, 11.16it/s]

Epochs: 4951 | epoch avg. loss: 0.356 | test avg. loss: 134.774
Epochs: 4952 | epoch avg. loss: 0.429 | test avg. loss: 136.988


 10%|▉         | 4955/50000 [07:10<1:09:16, 10.84it/s]

Epochs: 4953 | epoch avg. loss: 0.946 | test avg. loss: 134.991
Epochs: 4954 | epoch avg. loss: 1.290 | test avg. loss: 134.989
Epochs: 4955 | epoch avg. loss: 0.521 | test avg. loss: 135.335


 10%|▉         | 4959/50000 [07:10<1:07:06, 11.19it/s]

Epochs: 4956 | epoch avg. loss: 0.463 | test avg. loss: 134.104
Epochs: 4957 | epoch avg. loss: 0.933 | test avg. loss: 135.247
Epochs: 4958 | epoch avg. loss: 0.958 | test avg. loss: 133.534


 10%|▉         | 4961/50000 [07:10<1:07:36, 11.10it/s]

Epochs: 4959 | epoch avg. loss: 1.628 | test avg. loss: 133.333
Epochs: 4960 | epoch avg. loss: 1.388 | test avg. loss: 138.187
Epochs: 4961 | epoch avg. loss: 2.446 | test avg. loss: 136.336


 10%|▉         | 4965/50000 [07:10<1:03:37, 11.80it/s]

Epochs: 4962 | epoch avg. loss: 4.396 | test avg. loss: 135.129
Epochs: 4963 | epoch avg. loss: 1.642 | test avg. loss: 137.154
Epochs: 4964 | epoch avg. loss: 1.104 | test avg. loss: 138.306


 10%|▉         | 4967/50000 [07:11<1:01:08, 12.28it/s]

Epochs: 4965 | epoch avg. loss: 1.237 | test avg. loss: 142.137
Epochs: 4966 | epoch avg. loss: 2.013 | test avg. loss: 140.025
Epochs: 4967 | epoch avg. loss: 2.346 | test avg. loss: 137.874


 10%|▉         | 4971/50000 [07:11<1:00:25, 12.42it/s]

Epochs: 4968 | epoch avg. loss: 1.698 | test avg. loss: 139.364
Epochs: 4969 | epoch avg. loss: 2.318 | test avg. loss: 135.004
Epochs: 4970 | epoch avg. loss: 2.396 | test avg. loss: 139.894


 10%|▉         | 4973/50000 [07:11<59:03, 12.71it/s]

Epochs: 4971 | epoch avg. loss: 2.541 | test avg. loss: 139.303
Epochs: 4972 | epoch avg. loss: 2.030 | test avg. loss: 142.935
Epochs: 4973 | epoch avg. loss: 1.089 | test avg. loss: 146.202


 10%|▉         | 4977/50000 [07:11<58:58, 12.72it/s]

Epochs: 4974 | epoch avg. loss: 1.304 | test avg. loss: 145.270
Epochs: 4975 | epoch avg. loss: 3.884 | test avg. loss: 146.701
Epochs: 4976 | epoch avg. loss: 2.456 | test avg. loss: 139.304


 10%|▉         | 4979/50000 [07:12<59:03, 12.70it/s]

Epochs: 4977 | epoch avg. loss: 2.732 | test avg. loss: 137.212
Epochs: 4978 | epoch avg. loss: 1.439 | test avg. loss: 140.713
Epochs: 4979 | epoch avg. loss: 1.319 | test avg. loss: 140.848


 10%|▉         | 4983/50000 [07:12<58:10, 12.90it/s]

Epochs: 4980 | epoch avg. loss: 1.245 | test avg. loss: 146.197
Epochs: 4981 | epoch avg. loss: 1.576 | test avg. loss: 143.560
Epochs: 4982 | epoch avg. loss: 1.792 | test avg. loss: 143.940


 10%|▉         | 4985/50000 [07:12<56:33, 13.27it/s]

Epochs: 4983 | epoch avg. loss: 0.690 | test avg. loss: 145.845
Epochs: 4984 | epoch avg. loss: 0.809 | test avg. loss: 143.426
Epochs: 4985 | epoch avg. loss: 1.058 | test avg. loss: 145.446


 10%|▉         | 4989/50000 [07:12<55:57, 13.41it/s]

Epochs: 4986 | epoch avg. loss: 1.041 | test avg. loss: 143.145
Epochs: 4987 | epoch avg. loss: 1.382 | test avg. loss: 143.022
Epochs: 4988 | epoch avg. loss: 0.380 | test avg. loss: 142.401


 10%|▉         | 4991/50000 [07:13<58:16, 12.87it/s]

Epochs: 4989 | epoch avg. loss: 0.417 | test avg. loss: 142.256
Epochs: 4990 | epoch avg. loss: 0.272 | test avg. loss: 141.707
Epochs: 4991 | epoch avg. loss: 0.249 | test avg. loss: 141.536


 10%|▉         | 4995/50000 [07:13<56:39, 13.24it/s]

Epochs: 4992 | epoch avg. loss: 0.278 | test avg. loss: 143.120
Epochs: 4993 | epoch avg. loss: 0.465 | test avg. loss: 142.237
Epochs: 4994 | epoch avg. loss: 0.184 | test avg. loss: 142.407


                                                    

Epochs: 4995 | epoch avg. loss: 0.135 | test avg. loss: 143.565
Epochs: 4996 | epoch avg. loss: 0.186 | test avg. loss: 144.283
Epochs: 4997 | epoch avg. loss: 0.138 | test avg. loss: 145.753


 10%|▉         | 4999/50000 [07:13<55:16, 13.57it/s]

Epochs: 4998 | epoch avg. loss: 0.192 | test avg. loss: 145.166
Epochs: 4999 | epoch avg. loss: 0.209 | test avg. loss: 144.510


 10%|█         | 5003/50000 [07:15<2:29:47,  5.01it/s]

Epochs: 5000 | epoch avg. loss: 0.200 | test avg. loss: 145.490
Epochs: 5001 | epoch avg. loss: 0.421 | test avg. loss: 144.142
Epochs: 5002 | epoch avg. loss: 0.841 | test avg. loss: 144.820


 10%|█         | 5005/50000 [07:15<2:03:39,  6.06it/s]

Epochs: 5003 | epoch avg. loss: 0.210 | test avg. loss: 143.535
Epochs: 5004 | epoch avg. loss: 0.509 | test avg. loss: 143.487
Epochs: 5005 | epoch avg. loss: 0.324 | test avg. loss: 145.251


 10%|█         | 5009/50000 [07:15<1:26:58,  8.62it/s]

Epochs: 5006 | epoch avg. loss: 0.493 | test avg. loss: 146.055
Epochs: 5007 | epoch avg. loss: 1.563 | test avg. loss: 148.595
Epochs: 5008 | epoch avg. loss: 1.747 | test avg. loss: 143.920


 10%|█         | 5013/50000 [07:15<1:07:57, 11.03it/s]

Epochs: 5009 | epoch avg. loss: 2.320 | test avg. loss: 150.336
Epochs: 5010 | epoch avg. loss: 6.663 | test avg. loss: 150.178
Epochs: 5011 | epoch avg. loss: 8.352 | test avg. loss: 151.597
Epochs: 5012 | epoch avg. loss: 4.805 | test avg. loss: 153.574


 10%|█         | 5015/50000 [07:15<1:03:52, 11.74it/s]

Epochs: 5013 | epoch avg. loss: 4.410 | test avg. loss: 146.811
Epochs: 5014 | epoch avg. loss: 4.480 | test avg. loss: 150.999
Epochs: 5015 | epoch avg. loss: 3.170 | test avg. loss: 150.983


 10%|█         | 5019/50000 [07:16<1:00:37, 12.37it/s]

Epochs: 5016 | epoch avg. loss: 3.220 | test avg. loss: 158.934
Epochs: 5017 | epoch avg. loss: 3.642 | test avg. loss: 155.842
Epochs: 5018 | epoch avg. loss: 2.569 | test avg. loss: 156.730


 10%|█         | 5021/50000 [07:16<1:00:00, 12.49it/s]

Epochs: 5019 | epoch avg. loss: 3.211 | test avg. loss: 150.951
Epochs: 5020 | epoch avg. loss: 2.690 | test avg. loss: 147.583
Epochs: 5021 | epoch avg. loss: 3.086 | test avg. loss: 144.423


 10%|█         | 5025/50000 [07:16<56:39, 13.23it/s]

Epochs: 5022 | epoch avg. loss: 2.751 | test avg. loss: 144.815
Epochs: 5023 | epoch avg. loss: 1.349 | test avg. loss: 146.739
Epochs: 5024 | epoch avg. loss: 1.064 | test avg. loss: 149.326


 10%|█         | 5027/50000 [07:16<54:38, 13.72it/s]

Epochs: 5025 | epoch avg. loss: 1.787 | test avg. loss: 154.196
Epochs: 5026 | epoch avg. loss: 1.150 | test avg. loss: 171.237
Epochs: 5027 | epoch avg. loss: 0.934 | test avg. loss: 179.020


 10%|█         | 5031/50000 [07:17<56:35, 13.24it/s]

Epochs: 5028 | epoch avg. loss: 0.926 | test avg. loss: 184.922
Epochs: 5029 | epoch avg. loss: 1.030 | test avg. loss: 182.920
Epochs: 5030 | epoch avg. loss: 0.867 | test avg. loss: 183.253


 10%|█         | 5033/50000 [07:17<56:46, 13.20it/s]

Epochs: 5031 | epoch avg. loss: 0.827 | test avg. loss: 182.708
Epochs: 5032 | epoch avg. loss: 0.938 | test avg. loss: 177.819
Epochs: 5033 | epoch avg. loss: 0.619 | test avg. loss: 175.007


 10%|█         | 5037/50000 [07:17<52:53, 14.17it/s]

Epochs: 5034 | epoch avg. loss: 1.496 | test avg. loss: 167.476
Epochs: 5035 | epoch avg. loss: 1.232 | test avg. loss: 169.709
Epochs: 5036 | epoch avg. loss: 0.876 | test avg. loss: 174.257
Epochs: 5037 | epoch avg. loss: 1.325 | test avg. loss: 173.541


 10%|█         | 5041/50000 [07:17<52:33, 14.26it/s]

Epochs: 5038 | epoch avg. loss: 1.053 | test avg. loss: 174.297
Epochs: 5039 | epoch avg. loss: 0.907 | test avg. loss: 172.331
Epochs: 5040 | epoch avg. loss: 0.366 | test avg. loss: 168.410


 10%|█         | 5043/50000 [07:18<55:04, 13.60it/s]

Epochs: 5041 | epoch avg. loss: 0.325 | test avg. loss: 165.780
Epochs: 5042 | epoch avg. loss: 0.321 | test avg. loss: 165.078
Epochs: 5043 | epoch avg. loss: 0.369 | test avg. loss: 165.809


 10%|█         | 5047/50000 [07:18<59:26, 12.60it/s]

Epochs: 5044 | epoch avg. loss: 0.386 | test avg. loss: 169.953
Epochs: 5045 | epoch avg. loss: 0.321 | test avg. loss: 172.212
Epochs: 5046 | epoch avg. loss: 0.357 | test avg. loss: 173.138


 10%|█         | 5049/50000 [07:18<58:23, 12.83it/s]

Epochs: 5047 | epoch avg. loss: 0.380 | test avg. loss: 172.361
Epochs: 5048 | epoch avg. loss: 0.457 | test avg. loss: 171.300
Epochs: 5049 | epoch avg. loss: 0.372 | test avg. loss: 170.600


 10%|█         | 5053/50000 [07:18<57:20, 13.06it/s]

Epochs: 5050 | epoch avg. loss: 0.401 | test avg. loss: 168.827
Epochs: 5051 | epoch avg. loss: 0.485 | test avg. loss: 165.315
Epochs: 5052 | epoch avg. loss: 0.385 | test avg. loss: 165.373


 10%|█         | 5055/50000 [07:18<59:49, 12.52it/s]

Epochs: 5053 | epoch avg. loss: 0.427 | test avg. loss: 162.671
Epochs: 5054 | epoch avg. loss: 0.764 | test avg. loss: 162.891
Epochs: 5055 | epoch avg. loss: 0.527 | test avg. loss: 160.593


 10%|█         | 5059/50000 [07:19<59:09, 12.66it/s]

Epochs: 5056 | epoch avg. loss: 0.813 | test avg. loss: 162.385
Epochs: 5057 | epoch avg. loss: 0.468 | test avg. loss: 162.768
Epochs: 5058 | epoch avg. loss: 0.515 | test avg. loss: 163.985


 10%|█         | 5061/50000 [07:19<1:01:26, 12.19it/s]

Epochs: 5059 | epoch avg. loss: 0.493 | test avg. loss: 163.589
Epochs: 5060 | epoch avg. loss: 0.369 | test avg. loss: 162.744
Epochs: 5061 | epoch avg. loss: 0.375 | test avg. loss: 165.990


 10%|█         | 5065/50000 [07:19<57:15, 13.08it/s]  

Epochs: 5062 | epoch avg. loss: 0.375 | test avg. loss: 165.627
Epochs: 5063 | epoch avg. loss: 0.312 | test avg. loss: 167.279
Epochs: 5064 | epoch avg. loss: 0.436 | test avg. loss: 165.228


 10%|█         | 5067/50000 [07:19<56:18, 13.30it/s]

Epochs: 5065 | epoch avg. loss: 0.274 | test avg. loss: 167.069
Epochs: 5066 | epoch avg. loss: 0.557 | test avg. loss: 165.105
Epochs: 5067 | epoch avg. loss: 0.430 | test avg. loss: 166.449


 10%|█         | 5071/50000 [07:20<1:02:26, 11.99it/s]

Epochs: 5068 | epoch avg. loss: 0.283 | test avg. loss: 165.353
Epochs: 5069 | epoch avg. loss: 0.465 | test avg. loss: 166.265
Epochs: 5070 | epoch avg. loss: 0.246 | test avg. loss: 165.739


 10%|█         | 5073/50000 [07:20<59:49, 12.52it/s]

Epochs: 5071 | epoch avg. loss: 0.186 | test avg. loss: 165.994
Epochs: 5072 | epoch avg. loss: 0.172 | test avg. loss: 165.446
Epochs: 5073 | epoch avg. loss: 0.189 | test avg. loss: 165.564


 10%|█         | 5077/50000 [07:20<57:53, 12.93it/s]

Epochs: 5074 | epoch avg. loss: 0.154 | test avg. loss: 165.535
Epochs: 5075 | epoch avg. loss: 0.212 | test avg. loss: 165.091
Epochs: 5076 | epoch avg. loss: 0.309 | test avg. loss: 167.114


 10%|█         | 5079/50000 [07:20<1:00:34, 12.36it/s]

Epochs: 5077 | epoch avg. loss: 0.542 | test avg. loss: 164.802
Epochs: 5078 | epoch avg. loss: 0.303 | test avg. loss: 166.499


                                                      

Epochs: 5079 | epoch avg. loss: 0.353 | test avg. loss: 164.719
Epochs: 5080 | epoch avg. loss: 0.337 | test avg. loss: 167.043


 10%|█         | 5083/50000 [07:21<1:09:57, 10.70it/s]

Epochs: 5081 | epoch avg. loss: 0.436 | test avg. loss: 166.094
Epochs: 5082 | epoch avg. loss: 0.364 | test avg. loss: 168.103
Epochs: 5083 | epoch avg. loss: 0.331 | test avg. loss: 165.449


 10%|█         | 5087/50000 [07:21<1:10:43, 10.58it/s]

Epochs: 5084 | epoch avg. loss: 0.407 | test avg. loss: 166.734
Epochs: 5085 | epoch avg. loss: 0.742 | test avg. loss: 164.320
Epochs: 5086 | epoch avg. loss: 0.564 | test avg. loss: 164.971


 10%|█         | 5089/50000 [07:21<1:05:23, 11.45it/s]

Epochs: 5087 | epoch avg. loss: 0.445 | test avg. loss: 162.978
Epochs: 5088 | epoch avg. loss: 1.085 | test avg. loss: 168.027
Epochs: 5089 | epoch avg. loss: 0.707 | test avg. loss: 172.279


 10%|█         | 5093/50000 [07:22<1:03:19, 11.82it/s]

Epochs: 5090 | epoch avg. loss: 0.686 | test avg. loss: 178.057
Epochs: 5091 | epoch avg. loss: 0.559 | test avg. loss: 181.963
Epochs: 5092 | epoch avg. loss: 0.523 | test avg. loss: 187.352


 10%|█         | 5097/50000 [07:22<55:47, 13.41it/s]

Epochs: 5093 | epoch avg. loss: 0.441 | test avg. loss: 193.777
Epochs: 5094 | epoch avg. loss: 0.610 | test avg. loss: 183.864
Epochs: 5095 | epoch avg. loss: 0.419 | test avg. loss: 173.194
Epochs: 5096 | epoch avg. loss: 0.292 | test avg. loss: 168.377


 10%|█         | 5099/50000 [07:22<55:08, 13.57it/s]

Epochs: 5097 | epoch avg. loss: 0.232 | test avg. loss: 168.796
Epochs: 5098 | epoch avg. loss: 0.191 | test avg. loss: 168.979
Epochs: 5099 | epoch avg. loss: 0.207 | test avg. loss: 170.402


 10%|█         | 5103/50000 [07:25<4:01:44,  3.10it/s]

Epochs: 5100 | epoch avg. loss: 0.207 | test avg. loss: 168.752
Epochs: 5101 | epoch avg. loss: 0.148 | test avg. loss: 168.695
Epochs: 5102 | epoch avg. loss: 0.235 | test avg. loss: 167.660


 10%|█         | 5105/50000 [07:25<3:09:02,  3.96it/s]

Epochs: 5103 | epoch avg. loss: 0.128 | test avg. loss: 169.934
Epochs: 5104 | epoch avg. loss: 0.298 | test avg. loss: 169.116
Epochs: 5105 | epoch avg. loss: 0.128 | test avg. loss: 168.933


 10%|█         | 5109/50000 [07:25<2:04:15,  6.02it/s]

Epochs: 5106 | epoch avg. loss: 0.095 | test avg. loss: 168.781
Epochs: 5107 | epoch avg. loss: 0.097 | test avg. loss: 169.680
Epochs: 5108 | epoch avg. loss: 0.112 | test avg. loss: 170.578


 10%|█         | 5111/50000 [07:25<1:44:12,  7.18it/s]

Epochs: 5109 | epoch avg. loss: 0.146 | test avg. loss: 170.020
Epochs: 5110 | epoch avg. loss: 0.242 | test avg. loss: 171.697
Epochs: 5111 | epoch avg. loss: 0.268 | test avg. loss: 169.529


 10%|█         | 5115/50000 [07:26<1:27:47,  8.52it/s]

Epochs: 5112 | epoch avg. loss: 0.177 | test avg. loss: 171.089
Epochs: 5113 | epoch avg. loss: 0.222 | test avg. loss: 171.102
Epochs: 5114 | epoch avg. loss: 0.108 | test avg. loss: 172.598


 10%|█         | 5117/50000 [07:26<1:18:48,  9.49it/s]

Epochs: 5115 | epoch avg. loss: 0.138 | test avg. loss: 173.574
Epochs: 5116 | epoch avg. loss: 0.193 | test avg. loss: 171.886
Epochs: 5117 | epoch avg. loss: 0.136 | test avg. loss: 172.202


 10%|█         | 5121/50000 [07:26<1:12:11, 10.36it/s]

Epochs: 5118 | epoch avg. loss: 0.144 | test avg. loss: 172.753
Epochs: 5119 | epoch avg. loss: 0.112 | test avg. loss: 173.851
Epochs: 5120 | epoch avg. loss: 0.084 | test avg. loss: 173.747




Epochs: 5121 | epoch avg. loss: 0.063 | test avg. loss: 173.728
Epochs: 5122 | epoch avg. loss: 0.105 | test avg. loss: 173.464


 10%|█         | 5125/50000 [07:27<1:12:15, 10.35it/s]

Epochs: 5123 | epoch avg. loss: 0.079 | test avg. loss: 173.769
Epochs: 5124 | epoch avg. loss: 0.070 | test avg. loss: 174.050
Epochs: 5125 | epoch avg. loss: 0.071 | test avg. loss: 174.270


 10%|█         | 5129/50000 [07:27<1:08:11, 10.97it/s]

Epochs: 5126 | epoch avg. loss: 0.081 | test avg. loss: 174.124
Epochs: 5127 | epoch avg. loss: 0.072 | test avg. loss: 173.857
Epochs: 5128 | epoch avg. loss: 0.073 | test avg. loss: 174.094


 10%|█         | 5131/50000 [07:27<1:08:38, 10.90it/s]

Epochs: 5129 | epoch avg. loss: 0.086 | test avg. loss: 175.505
Epochs: 5130 | epoch avg. loss: 0.216 | test avg. loss: 173.956
Epochs: 5131 | epoch avg. loss: 0.203 | test avg. loss: 174.937




Epochs: 5132 | epoch avg. loss: 0.133 | test avg. loss: 174.152
Epochs: 5133 | epoch avg. loss: 0.094 | test avg. loss: 175.220


 10%|█         | 5135/50000 [07:28<1:12:37, 10.30it/s]

Epochs: 5134 | epoch avg. loss: 0.082 | test avg. loss: 175.498
Epochs: 5135 | epoch avg. loss: 0.075 | test avg. loss: 175.005


 10%|█         | 5139/50000 [07:28<1:11:35, 10.44it/s]

Epochs: 5136 | epoch avg. loss: 0.083 | test avg. loss: 174.586
Epochs: 5137 | epoch avg. loss: 0.082 | test avg. loss: 174.078
Epochs: 5138 | epoch avg. loss: 0.082 | test avg. loss: 175.409


 10%|█         | 5141/50000 [07:28<1:09:34, 10.74it/s]

Epochs: 5139 | epoch avg. loss: 0.094 | test avg. loss: 174.612
Epochs: 5140 | epoch avg. loss: 0.141 | test avg. loss: 175.195
Epochs: 5141 | epoch avg. loss: 0.178 | test avg. loss: 174.308


 10%|█         | 5143/50000 [07:28<1:09:24, 10.77it/s]

Epochs: 5142 | epoch avg. loss: 0.110 | test avg. loss: 174.433
Epochs: 5143 | epoch avg. loss: 0.135 | test avg. loss: 176.277


 10%|█         | 5147/50000 [07:29<1:08:54, 10.85it/s]

Epochs: 5144 | epoch avg. loss: 0.131 | test avg. loss: 175.332
Epochs: 5145 | epoch avg. loss: 0.228 | test avg. loss: 176.451
Epochs: 5146 | epoch avg. loss: 0.203 | test avg. loss: 174.409


 10%|█         | 5149/50000 [07:29<1:08:15, 10.95it/s]

Epochs: 5147 | epoch avg. loss: 0.350 | test avg. loss: 174.847
Epochs: 5148 | epoch avg. loss: 0.267 | test avg. loss: 174.243
Epochs: 5149 | epoch avg. loss: 0.221 | test avg. loss: 173.273




Epochs: 5150 | epoch avg. loss: 0.188 | test avg. loss: 174.418
Epochs: 5151 | epoch avg. loss: 0.164 | test avg. loss: 173.557
Epochs: 5152 | epoch avg. loss: 0.120 | test avg. loss: 174.596


 10%|█         | 5155/50000 [07:29<55:58, 13.35it/s]

Epochs: 5153 | epoch avg. loss: 0.104 | test avg. loss: 174.642
Epochs: 5154 | epoch avg. loss: 0.113 | test avg. loss: 174.946
Epochs: 5155 | epoch avg. loss: 0.080 | test avg. loss: 175.001


 10%|█         | 5159/50000 [07:30<56:26, 13.24it/s]

Epochs: 5156 | epoch avg. loss: 0.083 | test avg. loss: 173.784
Epochs: 5157 | epoch avg. loss: 0.156 | test avg. loss: 176.062
Epochs: 5158 | epoch avg. loss: 0.302 | test avg. loss: 175.528


 10%|█         | 5161/50000 [07:30<57:44, 12.94it/s]

Epochs: 5159 | epoch avg. loss: 0.256 | test avg. loss: 176.731
Epochs: 5160 | epoch avg. loss: 0.205 | test avg. loss: 175.635
Epochs: 5161 | epoch avg. loss: 0.159 | test avg. loss: 177.025


 10%|█         | 5165/50000 [07:30<56:12, 13.30it/s]

Epochs: 5162 | epoch avg. loss: 0.118 | test avg. loss: 178.258
Epochs: 5163 | epoch avg. loss: 0.132 | test avg. loss: 176.477
Epochs: 5164 | epoch avg. loss: 0.153 | test avg. loss: 176.539


 10%|█         | 5167/50000 [07:30<55:34, 13.44it/s]

Epochs: 5165 | epoch avg. loss: 0.160 | test avg. loss: 174.947
Epochs: 5166 | epoch avg. loss: 0.247 | test avg. loss: 176.805
Epochs: 5167 | epoch avg. loss: 0.192 | test avg. loss: 176.482


 10%|█         | 5171/50000 [07:30<57:27, 13.00it/s]

Epochs: 5168 | epoch avg. loss: 0.097 | test avg. loss: 176.166
Epochs: 5169 | epoch avg. loss: 0.066 | test avg. loss: 176.247
Epochs: 5170 | epoch avg. loss: 0.058 | test avg. loss: 176.936


 10%|█         | 5173/50000 [07:31<1:00:13, 12.40it/s]

Epochs: 5171 | epoch avg. loss: 0.074 | test avg. loss: 177.174
Epochs: 5172 | epoch avg. loss: 0.061 | test avg. loss: 177.404
Epochs: 5173 | epoch avg. loss: 0.057 | test avg. loss: 176.600


 10%|█         | 5177/50000 [07:31<1:00:27, 12.36it/s]

Epochs: 5174 | epoch avg. loss: 0.073 | test avg. loss: 176.867
Epochs: 5175 | epoch avg. loss: 0.061 | test avg. loss: 177.254
Epochs: 5176 | epoch avg. loss: 0.065 | test avg. loss: 177.078


 10%|█         | 5179/50000 [07:31<59:08, 12.63it/s]

Epochs: 5177 | epoch avg. loss: 0.082 | test avg. loss: 177.740
Epochs: 5178 | epoch avg. loss: 0.210 | test avg. loss: 176.639
Epochs: 5179 | epoch avg. loss: 0.075 | test avg. loss: 177.919


 10%|█         | 5183/50000 [07:31<57:34, 12.97it/s]

Epochs: 5180 | epoch avg. loss: 0.103 | test avg. loss: 177.842
Epochs: 5181 | epoch avg. loss: 0.109 | test avg. loss: 177.212
Epochs: 5182 | epoch avg. loss: 0.075 | test avg. loss: 176.539


 10%|█         | 5185/50000 [07:32<1:00:02, 12.44it/s]

Epochs: 5183 | epoch avg. loss: 0.049 | test avg. loss: 175.836
Epochs: 5184 | epoch avg. loss: 0.055 | test avg. loss: 176.085
Epochs: 5185 | epoch avg. loss: 0.052 | test avg. loss: 176.438


                                                    

Epochs: 5186 | epoch avg. loss: 0.056 | test avg. loss: 176.631
Epochs: 5187 | epoch avg. loss: 0.049 | test avg. loss: 176.080
Epochs: 5188 | epoch avg. loss: 0.065 | test avg. loss: 176.595


 10%|█         | 5191/50000 [07:32<57:29, 12.99it/s]

Epochs: 5189 | epoch avg. loss: 0.059 | test avg. loss: 177.448
Epochs: 5190 | epoch avg. loss: 0.060 | test avg. loss: 177.180
Epochs: 5191 | epoch avg. loss: 0.077 | test avg. loss: 177.268


 10%|█         | 5195/50000 [07:32<54:15, 13.76it/s]

Epochs: 5192 | epoch avg. loss: 0.106 | test avg. loss: 177.140
Epochs: 5193 | epoch avg. loss: 0.064 | test avg. loss: 177.228
Epochs: 5194 | epoch avg. loss: 0.089 | test avg. loss: 177.207


 10%|█         | 5197/50000 [07:33<56:49, 13.14it/s]

Epochs: 5195 | epoch avg. loss: 0.053 | test avg. loss: 177.445
Epochs: 5196 | epoch avg. loss: 0.052 | test avg. loss: 177.078
Epochs: 5197 | epoch avg. loss: 0.068 | test avg. loss: 177.531


 10%|█         | 5199/50000 [07:33<58:56, 12.67it/s]

Epochs: 5198 | epoch avg. loss: 0.051 | test avg. loss: 177.131
Epochs: 5199 | epoch avg. loss: 0.091 | test avg. loss: 176.386


 10%|█         | 5203/50000 [07:34<2:32:21,  4.90it/s]

Epochs: 5200 | epoch avg. loss: 0.109 | test avg. loss: 177.846
Epochs: 5201 | epoch avg. loss: 0.199 | test avg. loss: 176.865
Epochs: 5202 | epoch avg. loss: 0.236 | test avg. loss: 179.208


 10%|█         | 5205/50000 [07:34<2:02:51,  6.08it/s]

Epochs: 5203 | epoch avg. loss: 0.382 | test avg. loss: 178.236
Epochs: 5204 | epoch avg. loss: 0.251 | test avg. loss: 177.833
Epochs: 5205 | epoch avg. loss: 0.170 | test avg. loss: 177.977


 10%|█         | 5209/50000 [07:35<1:29:17,  8.36it/s]

Epochs: 5206 | epoch avg. loss: 0.174 | test avg. loss: 175.868
Epochs: 5207 | epoch avg. loss: 0.405 | test avg. loss: 177.192
Epochs: 5208 | epoch avg. loss: 0.374 | test avg. loss: 177.401


 10%|█         | 5211/50000 [07:35<1:19:26,  9.40it/s]

Epochs: 5209 | epoch avg. loss: 0.152 | test avg. loss: 176.756
Epochs: 5210 | epoch avg. loss: 0.167 | test avg. loss: 176.741
Epochs: 5211 | epoch avg. loss: 0.337 | test avg. loss: 174.449




Epochs: 5212 | epoch avg. loss: 0.337 | test avg. loss: 176.818
Epochs: 5213 | epoch avg. loss: 0.234 | test avg. loss: 177.405
Epochs: 5214 | epoch avg. loss: 0.186 | test avg. loss: 177.815


 10%|█         | 5217/50000 [07:35<1:01:17, 12.18it/s]

Epochs: 5215 | epoch avg. loss: 0.191 | test avg. loss: 176.396
Epochs: 5216 | epoch avg. loss: 0.234 | test avg. loss: 175.263
Epochs: 5217 | epoch avg. loss: 0.226 | test avg. loss: 178.895


 10%|█         | 5219/50000 [07:35<1:02:08, 12.01it/s]

Epochs: 5218 | epoch avg. loss: 0.124 | test avg. loss: 177.792
Epochs: 5219 | epoch avg. loss: 0.124 | test avg. loss: 175.325


 10%|█         | 5223/50000 [07:36<1:06:48, 11.17it/s]

Epochs: 5220 | epoch avg. loss: 0.057 | test avg. loss: 173.607
Epochs: 5221 | epoch avg. loss: 0.118 | test avg. loss: 174.990
Epochs: 5222 | epoch avg. loss: 0.084 | test avg. loss: 176.597


                                                      

Epochs: 5223 | epoch avg. loss: 0.071 | test avg. loss: 177.376
Epochs: 5224 | epoch avg. loss: 0.080 | test avg. loss: 176.828
Epochs: 5225 | epoch avg. loss: 0.057 | test avg. loss: 175.571


 10%|█         | 5229/50000 [07:36<56:40, 13.16it/s]

Epochs: 5226 | epoch avg. loss: 0.100 | test avg. loss: 176.781
Epochs: 5227 | epoch avg. loss: 0.074 | test avg. loss: 177.792
Epochs: 5228 | epoch avg. loss: 0.073 | test avg. loss: 177.926


 10%|█         | 5231/50000 [07:36<55:35, 13.42it/s]

Epochs: 5229 | epoch avg. loss: 0.069 | test avg. loss: 177.872
Epochs: 5230 | epoch avg. loss: 0.100 | test avg. loss: 177.048
Epochs: 5231 | epoch avg. loss: 0.099 | test avg. loss: 178.305


 10%|█         | 5235/50000 [07:37<57:35, 12.95it/s]

Epochs: 5232 | epoch avg. loss: 0.070 | test avg. loss: 177.953
Epochs: 5233 | epoch avg. loss: 0.057 | test avg. loss: 177.734
Epochs: 5234 | epoch avg. loss: 0.052 | test avg. loss: 177.154


 10%|█         | 5237/50000 [07:37<56:40, 13.16it/s]

Epochs: 5235 | epoch avg. loss: 0.057 | test avg. loss: 177.043
Epochs: 5236 | epoch avg. loss: 0.064 | test avg. loss: 178.461
Epochs: 5237 | epoch avg. loss: 0.106 | test avg. loss: 177.458


 10%|█         | 5241/50000 [07:37<54:26, 13.70it/s]

Epochs: 5238 | epoch avg. loss: 0.056 | test avg. loss: 176.247
Epochs: 5239 | epoch avg. loss: 0.079 | test avg. loss: 177.608
Epochs: 5240 | epoch avg. loss: 0.078 | test avg. loss: 178.835


 10%|█         | 5243/50000 [07:37<54:28, 13.69it/s]

Epochs: 5241 | epoch avg. loss: 0.211 | test avg. loss: 180.827
Epochs: 5242 | epoch avg. loss: 0.824 | test avg. loss: 174.842
Epochs: 5243 | epoch avg. loss: 0.694 | test avg. loss: 173.931


 10%|█         | 5245/50000 [07:37<54:06, 13.78it/s]

Epochs: 5244 | epoch avg. loss: 0.240 | test avg. loss: 174.599
Epochs: 5245 | epoch avg. loss: 0.178 | test avg. loss: 176.549


 10%|█         | 5249/50000 [07:38<1:01:00, 12.22it/s]

Epochs: 5246 | epoch avg. loss: 0.157 | test avg. loss: 177.327
Epochs: 5247 | epoch avg. loss: 0.118 | test avg. loss: 175.995
Epochs: 5248 | epoch avg. loss: 0.115 | test avg. loss: 177.245


 11%|█         | 5251/50000 [07:38<59:11, 12.60it/s]

Epochs: 5249 | epoch avg. loss: 0.102 | test avg. loss: 178.841
Epochs: 5250 | epoch avg. loss: 0.080 | test avg. loss: 179.303
Epochs: 5251 | epoch avg. loss: 0.055 | test avg. loss: 177.870




Epochs: 5252 | epoch avg. loss: 0.072 | test avg. loss: 178.400
Epochs: 5253 | epoch avg. loss: 0.074 | test avg. loss: 179.235
Epochs: 5254 | epoch avg. loss: 0.084 | test avg. loss: 178.794


 11%|█         | 5257/50000 [07:38<53:26, 13.95it/s]

Epochs: 5255 | epoch avg. loss: 0.062 | test avg. loss: 177.471
Epochs: 5256 | epoch avg. loss: 0.080 | test avg. loss: 177.763
Epochs: 5257 | epoch avg. loss: 0.063 | test avg. loss: 179.703


 11%|█         | 5261/50000 [07:39<55:35, 13.41it/s]

Epochs: 5258 | epoch avg. loss: 0.064 | test avg. loss: 179.179
Epochs: 5259 | epoch avg. loss: 0.193 | test avg. loss: 178.896
Epochs: 5260 | epoch avg. loss: 0.267 | test avg. loss: 177.384


 11%|█         | 5263/50000 [07:39<54:26, 13.70it/s]

Epochs: 5261 | epoch avg. loss: 0.157 | test avg. loss: 177.901
Epochs: 5262 | epoch avg. loss: 0.127 | test avg. loss: 180.235
Epochs: 5263 | epoch avg. loss: 0.168 | test avg. loss: 178.479


 11%|█         | 5267/50000 [07:39<57:49, 12.89it/s]

Epochs: 5264 | epoch avg. loss: 0.209 | test avg. loss: 178.977
Epochs: 5265 | epoch avg. loss: 0.118 | test avg. loss: 179.044
Epochs: 5266 | epoch avg. loss: 0.158 | test avg. loss: 180.168


 11%|█         | 5269/50000 [07:39<59:17, 12.57it/s]

Epochs: 5267 | epoch avg. loss: 0.116 | test avg. loss: 181.794
Epochs: 5268 | epoch avg. loss: 0.209 | test avg. loss: 178.736
Epochs: 5269 | epoch avg. loss: 0.329 | test avg. loss: 179.959


 11%|█         | 5271/50000 [07:39<1:00:50, 12.25it/s]

Epochs: 5270 | epoch avg. loss: 0.308 | test avg. loss: 181.075
Epochs: 5271 | epoch avg. loss: 0.222 | test avg. loss: 179.459


 11%|█         | 5275/50000 [07:40<1:07:51, 10.99it/s]

Epochs: 5272 | epoch avg. loss: 0.322 | test avg. loss: 178.663
Epochs: 5273 | epoch avg. loss: 0.528 | test avg. loss: 176.619
Epochs: 5274 | epoch avg. loss: 0.666 | test avg. loss: 178.854


 11%|█         | 5277/50000 [07:40<1:05:24, 11.40it/s]

Epochs: 5275 | epoch avg. loss: 0.431 | test avg. loss: 180.690
Epochs: 5276 | epoch avg. loss: 0.323 | test avg. loss: 178.978
Epochs: 5277 | epoch avg. loss: 0.339 | test avg. loss: 179.497


                                                    

Epochs: 5278 | epoch avg. loss: 0.252 | test avg. loss: 179.272
Epochs: 5279 | epoch avg. loss: 0.230 | test avg. loss: 181.123
Epochs: 5280 | epoch avg. loss: 0.156 | test avg. loss: 181.677


 11%|█         | 5283/50000 [07:40<57:14, 13.02it/s]

Epochs: 5281 | epoch avg. loss: 0.243 | test avg. loss: 178.082
Epochs: 5282 | epoch avg. loss: 0.268 | test avg. loss: 179.185
Epochs: 5283 | epoch avg. loss: 0.363 | test avg. loss: 176.618


 11%|█         | 5287/50000 [07:41<1:03:12, 11.79it/s]

Epochs: 5284 | epoch avg. loss: 0.586 | test avg. loss: 177.753
Epochs: 5285 | epoch avg. loss: 0.188 | test avg. loss: 179.646
Epochs: 5286 | epoch avg. loss: 0.162 | test avg. loss: 180.030


 11%|█         | 5289/50000 [07:41<1:02:07, 11.99it/s]

Epochs: 5287 | epoch avg. loss: 0.204 | test avg. loss: 179.513
Epochs: 5288 | epoch avg. loss: 0.546 | test avg. loss: 176.738
Epochs: 5289 | epoch avg. loss: 0.667 | test avg. loss: 179.715




Epochs: 5290 | epoch avg. loss: 0.239 | test avg. loss: 178.342
Epochs: 5291 | epoch avg. loss: 0.879 | test avg. loss: 176.557
Epochs: 5292 | epoch avg. loss: 0.524 | test avg. loss: 178.167


 11%|█         | 5295/50000 [07:41<53:54, 13.82it/s]

Epochs: 5293 | epoch avg. loss: 0.954 | test avg. loss: 173.785
Epochs: 5294 | epoch avg. loss: 1.081 | test avg. loss: 175.762
Epochs: 5295 | epoch avg. loss: 1.517 | test avg. loss: 171.899


 11%|█         | 5299/50000 [07:42<1:00:09, 12.38it/s]

Epochs: 5296 | epoch avg. loss: 0.560 | test avg. loss: 171.918
Epochs: 5297 | epoch avg. loss: 0.868 | test avg. loss: 175.516
Epochs: 5298 | epoch avg. loss: 1.481 | test avg. loss: 171.014


 11%|█         | 5299/50000 [07:42<1:00:09, 12.38it/s]

Epochs: 5299 | epoch avg. loss: 1.220 | test avg. loss: 173.287


 11%|█         | 5303/50000 [07:43<2:49:06,  4.41it/s]

Epochs: 5300 | epoch avg. loss: 0.873 | test avg. loss: 171.368
Epochs: 5301 | epoch avg. loss: 0.512 | test avg. loss: 172.979
Epochs: 5302 | epoch avg. loss: 0.263 | test avg. loss: 174.910


 11%|█         | 5305/50000 [07:44<2:15:11,  5.51it/s]

Epochs: 5303 | epoch avg. loss: 0.263 | test avg. loss: 174.635
Epochs: 5304 | epoch avg. loss: 0.258 | test avg. loss: 175.741
Epochs: 5305 | epoch avg. loss: 0.301 | test avg. loss: 174.594


 11%|█         | 5309/50000 [07:44<1:35:04,  7.83it/s]

Epochs: 5306 | epoch avg. loss: 0.238 | test avg. loss: 174.926
Epochs: 5307 | epoch avg. loss: 0.151 | test avg. loss: 176.215
Epochs: 5308 | epoch avg. loss: 0.128 | test avg. loss: 176.594


 11%|█         | 5311/50000 [07:44<1:24:36,  8.80it/s]

Epochs: 5309 | epoch avg. loss: 0.188 | test avg. loss: 179.371
Epochs: 5310 | epoch avg. loss: 0.240 | test avg. loss: 179.399
Epochs: 5311 | epoch avg. loss: 0.134 | test avg. loss: 180.408


 11%|█         | 5315/50000 [07:44<1:07:14, 11.07it/s]

Epochs: 5312 | epoch avg. loss: 0.188 | test avg. loss: 180.679
Epochs: 5313 | epoch avg. loss: 0.218 | test avg. loss: 178.241
Epochs: 5314 | epoch avg. loss: 0.202 | test avg. loss: 178.750
Epochs: 5315 | epoch avg. loss: 0.145 | test avg. loss: 177.953


 11%|█         | 5319/50000 [07:45<1:03:13, 11.78it/s]

Epochs: 5316 | epoch avg. loss: 0.226 | test avg. loss: 178.098
Epochs: 5317 | epoch avg. loss: 0.175 | test avg. loss: 180.010
Epochs: 5318 | epoch avg. loss: 0.212 | test avg. loss: 178.534


 11%|█         | 5321/50000 [07:45<1:01:33, 12.10it/s]

Epochs: 5319 | epoch avg. loss: 0.456 | test avg. loss: 179.159
Epochs: 5320 | epoch avg. loss: 0.493 | test avg. loss: 179.463
Epochs: 5321 | epoch avg. loss: 0.366 | test avg. loss: 178.065


 11%|█         | 5325/50000 [07:45<1:06:03, 11.27it/s]

Epochs: 5322 | epoch avg. loss: 0.755 | test avg. loss: 181.886
Epochs: 5323 | epoch avg. loss: 0.629 | test avg. loss: 180.919
Epochs: 5324 | epoch avg. loss: 0.553 | test avg. loss: 180.599


 11%|█         | 5327/50000 [07:45<1:03:31, 11.72it/s]

Epochs: 5325 | epoch avg. loss: 0.161 | test avg. loss: 180.150
Epochs: 5326 | epoch avg. loss: 0.179 | test avg. loss: 180.416
Epochs: 5327 | epoch avg. loss: 0.161 | test avg. loss: 182.191


 11%|█         | 5331/50000 [07:46<1:06:35, 11.18it/s]

Epochs: 5328 | epoch avg. loss: 0.168 | test avg. loss: 181.020
Epochs: 5329 | epoch avg. loss: 0.111 | test avg. loss: 180.358
Epochs: 5330 | epoch avg. loss: 0.153 | test avg. loss: 179.216


 11%|█         | 5333/50000 [07:46<1:04:14, 11.59it/s]

Epochs: 5331 | epoch avg. loss: 0.111 | test avg. loss: 179.843
Epochs: 5332 | epoch avg. loss: 0.110 | test avg. loss: 180.986
Epochs: 5333 | epoch avg. loss: 0.111 | test avg. loss: 180.157


 11%|█         | 5337/50000 [07:46<1:01:27, 12.11it/s]

Epochs: 5334 | epoch avg. loss: 0.135 | test avg. loss: 181.514
Epochs: 5335 | epoch avg. loss: 0.138 | test avg. loss: 181.537
Epochs: 5336 | epoch avg. loss: 0.092 | test avg. loss: 182.421


 11%|█         | 5339/50000 [07:46<1:01:04, 12.19it/s]

Epochs: 5337 | epoch avg. loss: 0.067 | test avg. loss: 182.039
Epochs: 5338 | epoch avg. loss: 0.100 | test avg. loss: 180.879
Epochs: 5339 | epoch avg. loss: 0.087 | test avg. loss: 181.049


                                                      

Epochs: 5340 | epoch avg. loss: 0.063 | test avg. loss: 181.286
Epochs: 5341 | epoch avg. loss: 0.087 | test avg. loss: 182.125


 11%|█         | 5345/50000 [07:47<1:05:21, 11.39it/s]

Epochs: 5342 | epoch avg. loss: 0.082 | test avg. loss: 182.289
Epochs: 5343 | epoch avg. loss: 0.126 | test avg. loss: 181.208
Epochs: 5344 | epoch avg. loss: 0.064 | test avg. loss: 181.142


 11%|█         | 5347/50000 [07:47<1:01:36, 12.08it/s]

Epochs: 5345 | epoch avg. loss: 0.063 | test avg. loss: 181.775
Epochs: 5346 | epoch avg. loss: 0.056 | test avg. loss: 182.036
Epochs: 5347 | epoch avg. loss: 0.067 | test avg. loss: 182.112


 11%|█         | 5351/50000 [07:47<58:53, 12.64it/s]

Epochs: 5348 | epoch avg. loss: 0.076 | test avg. loss: 180.072
Epochs: 5349 | epoch avg. loss: 0.127 | test avg. loss: 180.880
Epochs: 5350 | epoch avg. loss: 0.072 | test avg. loss: 182.318


                                                      

Epochs: 5351 | epoch avg. loss: 0.088 | test avg. loss: 181.890
Epochs: 5352 | epoch avg. loss: 0.118 | test avg. loss: 181.916


 11%|█         | 5355/50000 [07:48<1:03:55, 11.64it/s]

Epochs: 5353 | epoch avg. loss: 0.106 | test avg. loss: 181.818
Epochs: 5354 | epoch avg. loss: 0.099 | test avg. loss: 183.040
Epochs: 5355 | epoch avg. loss: 0.136 | test avg. loss: 182.971


 11%|█         | 5359/50000 [07:48<1:03:38, 11.69it/s]

Epochs: 5356 | epoch avg. loss: 0.111 | test avg. loss: 180.008
Epochs: 5357 | epoch avg. loss: 0.151 | test avg. loss: 181.857
Epochs: 5358 | epoch avg. loss: 0.431 | test avg. loss: 181.188


 11%|█         | 5361/50000 [07:48<1:02:16, 11.95it/s]

Epochs: 5359 | epoch avg. loss: 0.232 | test avg. loss: 182.706
Epochs: 5360 | epoch avg. loss: 0.198 | test avg. loss: 185.044
Epochs: 5361 | epoch avg. loss: 0.547 | test avg. loss: 181.440


 11%|█         | 5365/50000 [07:48<1:01:00, 12.19it/s]

Epochs: 5362 | epoch avg. loss: 0.266 | test avg. loss: 181.533
Epochs: 5363 | epoch avg. loss: 0.297 | test avg. loss: 182.271
Epochs: 5364 | epoch avg. loss: 0.166 | test avg. loss: 182.198


 11%|█         | 5367/50000 [07:49<58:49, 12.65it/s]

Epochs: 5365 | epoch avg. loss: 0.204 | test avg. loss: 184.198
Epochs: 5366 | epoch avg. loss: 0.406 | test avg. loss: 181.080
Epochs: 5367 | epoch avg. loss: 0.232 | test avg. loss: 181.071


 11%|█         | 5371/50000 [07:49<55:29, 13.41it/s]

Epochs: 5368 | epoch avg. loss: 0.163 | test avg. loss: 182.094
Epochs: 5369 | epoch avg. loss: 0.152 | test avg. loss: 181.915
Epochs: 5370 | epoch avg. loss: 0.301 | test avg. loss: 182.777
Epochs: 5371 | epoch avg. loss: 0.370 | test avg. loss: 180.174


 11%|█         | 5375/50000 [07:49<51:48, 14.36it/s]

Epochs: 5372 | epoch avg. loss: 0.140 | test avg. loss: 178.829
Epochs: 5373 | epoch avg. loss: 0.140 | test avg. loss: 178.755
Epochs: 5374 | epoch avg. loss: 0.160 | test avg. loss: 177.687
Epochs: 5375 | epoch avg. loss: 0.110 | test avg. loss: 178.790


 11%|█         | 5379/50000 [07:49<57:32, 12.92it/s]

Epochs: 5376 | epoch avg. loss: 0.191 | test avg. loss: 178.191
Epochs: 5377 | epoch avg. loss: 0.179 | test avg. loss: 177.863
Epochs: 5378 | epoch avg. loss: 0.110 | test avg. loss: 178.156


 11%|█         | 5381/50000 [07:50<59:43, 12.45it/s]

Epochs: 5379 | epoch avg. loss: 0.096 | test avg. loss: 177.700
Epochs: 5380 | epoch avg. loss: 0.146 | test avg. loss: 179.186
Epochs: 5381 | epoch avg. loss: 0.161 | test avg. loss: 178.538


 11%|█         | 5385/50000 [07:50<54:12, 13.72it/s]

Epochs: 5382 | epoch avg. loss: 0.128 | test avg. loss: 178.466
Epochs: 5383 | epoch avg. loss: 0.102 | test avg. loss: 179.006
Epochs: 5384 | epoch avg. loss: 0.190 | test avg. loss: 177.761
Epochs: 5385 | epoch avg. loss: 0.088 | test avg. loss: 178.620


 11%|█         | 5389/50000 [07:50<51:26, 14.45it/s]

Epochs: 5386 | epoch avg. loss: 0.120 | test avg. loss: 178.445
Epochs: 5387 | epoch avg. loss: 0.102 | test avg. loss: 179.226
Epochs: 5388 | epoch avg. loss: 0.092 | test avg. loss: 179.506
Epochs: 5389 | epoch avg. loss: 0.103 | test avg. loss: 179.634


 11%|█         | 5393/50000 [07:51<57:36, 12.91it/s]

Epochs: 5390 | epoch avg. loss: 0.100 | test avg. loss: 180.764
Epochs: 5391 | epoch avg. loss: 0.147 | test avg. loss: 179.661
Epochs: 5392 | epoch avg. loss: 0.149 | test avg. loss: 181.364


 11%|█         | 5397/50000 [07:51<53:14, 13.96it/s]

Epochs: 5393 | epoch avg. loss: 0.346 | test avg. loss: 180.670
Epochs: 5394 | epoch avg. loss: 0.159 | test avg. loss: 180.478
Epochs: 5395 | epoch avg. loss: 0.138 | test avg. loss: 180.435
Epochs: 5396 | epoch avg. loss: 0.168 | test avg. loss: 178.508


 11%|█         | 5399/50000 [07:51<52:13, 14.23it/s]

Epochs: 5397 | epoch avg. loss: 0.207 | test avg. loss: 179.772
Epochs: 5398 | epoch avg. loss: 0.353 | test avg. loss: 179.376
Epochs: 5399 | epoch avg. loss: 0.109 | test avg. loss: 179.601


 11%|█         | 5403/50000 [07:52<2:28:40,  5.00it/s]

Epochs: 5400 | epoch avg. loss: 0.105 | test avg. loss: 180.352
Epochs: 5401 | epoch avg. loss: 0.135 | test avg. loss: 179.542
Epochs: 5402 | epoch avg. loss: 0.086 | test avg. loss: 179.288


 11%|█         | 5405/50000 [07:53<2:01:41,  6.11it/s]

Epochs: 5403 | epoch avg. loss: 0.081 | test avg. loss: 179.887
Epochs: 5404 | epoch avg. loss: 0.094 | test avg. loss: 179.841
Epochs: 5405 | epoch avg. loss: 0.067 | test avg. loss: 180.027


 11%|█         | 5409/50000 [07:53<1:28:00,  8.44it/s]

Epochs: 5406 | epoch avg. loss: 0.067 | test avg. loss: 179.817
Epochs: 5407 | epoch avg. loss: 0.110 | test avg. loss: 180.114
Epochs: 5408 | epoch avg. loss: 0.088 | test avg. loss: 181.582


 11%|█         | 5411/50000 [07:53<1:16:35,  9.70it/s]

Epochs: 5409 | epoch avg. loss: 0.177 | test avg. loss: 180.750
Epochs: 5410 | epoch avg. loss: 0.223 | test avg. loss: 181.228
Epochs: 5411 | epoch avg. loss: 0.158 | test avg. loss: 183.176


 11%|█         | 5415/50000 [07:53<1:05:50, 11.29it/s]

Epochs: 5412 | epoch avg. loss: 0.373 | test avg. loss: 180.572
Epochs: 5413 | epoch avg. loss: 0.475 | test avg. loss: 181.071
Epochs: 5414 | epoch avg. loss: 0.278 | test avg. loss: 179.425


 11%|█         | 5417/50000 [07:54<1:05:44, 11.30it/s]

Epochs: 5415 | epoch avg. loss: 0.236 | test avg. loss: 177.757
Epochs: 5416 | epoch avg. loss: 0.213 | test avg. loss: 178.805
Epochs: 5417 | epoch avg. loss: 0.211 | test avg. loss: 177.760


 11%|█         | 5421/50000 [07:54<1:00:10, 12.35it/s]

Epochs: 5418 | epoch avg. loss: 0.571 | test avg. loss: 180.690
Epochs: 5419 | epoch avg. loss: 0.247 | test avg. loss: 181.217
Epochs: 5420 | epoch avg. loss: 0.240 | test avg. loss: 180.240


 11%|█         | 5425/50000 [07:54<54:26, 13.65it/s]

Epochs: 5421 | epoch avg. loss: 0.610 | test avg. loss: 181.922
Epochs: 5422 | epoch avg. loss: 0.870 | test avg. loss: 179.880
Epochs: 5423 | epoch avg. loss: 0.579 | test avg. loss: 180.298
Epochs: 5424 | epoch avg. loss: 0.636 | test avg. loss: 183.231


 11%|█         | 5427/50000 [07:54<52:46, 14.07it/s]

Epochs: 5425 | epoch avg. loss: 0.690 | test avg. loss: 179.294
Epochs: 5426 | epoch avg. loss: 0.724 | test avg. loss: 178.473
Epochs: 5427 | epoch avg. loss: 0.263 | test avg. loss: 179.990
Epochs: 5428 | epoch avg. loss: 0.269 | test avg. loss: 178.390


 11%|█         | 5431/50000 [07:55<58:34, 12.68it/s]

Epochs: 5429 | epoch avg. loss: 0.379 | test avg. loss: 179.290
Epochs: 5430 | epoch avg. loss: 0.201 | test avg. loss: 181.410
Epochs: 5431 | epoch avg. loss: 0.409 | test avg. loss: 178.975


 11%|█         | 5435/50000 [07:55<56:33, 13.13it/s]

Epochs: 5432 | epoch avg. loss: 1.137 | test avg. loss: 178.667
Epochs: 5433 | epoch avg. loss: 0.327 | test avg. loss: 177.790
Epochs: 5434 | epoch avg. loss: 0.222 | test avg. loss: 176.217


 11%|█         | 5439/50000 [07:55<52:49, 14.06it/s]

Epochs: 5435 | epoch avg. loss: 0.172 | test avg. loss: 179.519
Epochs: 5436 | epoch avg. loss: 0.254 | test avg. loss: 180.746
Epochs: 5437 | epoch avg. loss: 0.581 | test avg. loss: 183.588
Epochs: 5438 | epoch avg. loss: 0.299 | test avg. loss: 185.424


 11%|█         | 5441/50000 [07:55<52:43, 14.09it/s]

Epochs: 5439 | epoch avg. loss: 0.302 | test avg. loss: 185.224
Epochs: 5440 | epoch avg. loss: 0.194 | test avg. loss: 187.101
Epochs: 5441 | epoch avg. loss: 0.076 | test avg. loss: 187.242


 11%|█         | 5445/50000 [07:56<56:31, 13.14it/s]

Epochs: 5442 | epoch avg. loss: 0.160 | test avg. loss: 188.203
Epochs: 5443 | epoch avg. loss: 0.275 | test avg. loss: 188.619
Epochs: 5444 | epoch avg. loss: 0.314 | test avg. loss: 186.753


 11%|█         | 5447/50000 [07:56<55:44, 13.32it/s]

Epochs: 5445 | epoch avg. loss: 0.242 | test avg. loss: 187.274
Epochs: 5446 | epoch avg. loss: 0.118 | test avg. loss: 187.703
Epochs: 5447 | epoch avg. loss: 0.174 | test avg. loss: 185.899


 11%|█         | 5451/50000 [07:56<55:24, 13.40it/s]

Epochs: 5448 | epoch avg. loss: 0.093 | test avg. loss: 185.750
Epochs: 5449 | epoch avg. loss: 0.104 | test avg. loss: 185.535
Epochs: 5450 | epoch avg. loss: 0.090 | test avg. loss: 185.483


 11%|█         | 5453/50000 [07:56<54:42, 13.57it/s]

Epochs: 5451 | epoch avg. loss: 0.094 | test avg. loss: 187.082
Epochs: 5452 | epoch avg. loss: 0.256 | test avg. loss: 185.190
Epochs: 5453 | epoch avg. loss: 0.075 | test avg. loss: 184.317


 11%|█         | 5457/50000 [07:56<56:02, 13.25it/s]

Epochs: 5454 | epoch avg. loss: 0.118 | test avg. loss: 184.758
Epochs: 5455 | epoch avg. loss: 0.195 | test avg. loss: 183.543
Epochs: 5456 | epoch avg. loss: 0.399 | test avg. loss: 184.196


 11%|█         | 5459/50000 [07:57<1:02:29, 11.88it/s]

Epochs: 5457 | epoch avg. loss: 0.143 | test avg. loss: 184.781
Epochs: 5458 | epoch avg. loss: 0.153 | test avg. loss: 183.540
Epochs: 5459 | epoch avg. loss: 0.269 | test avg. loss: 184.107


 11%|█         | 5463/50000 [07:57<58:36, 12.66it/s]

Epochs: 5460 | epoch avg. loss: 0.165 | test avg. loss: 183.220
Epochs: 5461 | epoch avg. loss: 0.109 | test avg. loss: 182.123
Epochs: 5462 | epoch avg. loss: 0.170 | test avg. loss: 183.093


                                                    

Epochs: 5463 | epoch avg. loss: 0.161 | test avg. loss: 182.724
Epochs: 5464 | epoch avg. loss: 0.101 | test avg. loss: 182.214
Epochs: 5465 | epoch avg. loss: 0.205 | test avg. loss: 182.972


 11%|█         | 5469/50000 [07:57<55:56, 13.27it/s]

Epochs: 5466 | epoch avg. loss: 0.186 | test avg. loss: 182.841
Epochs: 5467 | epoch avg. loss: 0.101 | test avg. loss: 182.506
Epochs: 5468 | epoch avg. loss: 0.145 | test avg. loss: 183.275


 11%|█         | 5471/50000 [07:58<57:35, 12.89it/s]

Epochs: 5469 | epoch avg. loss: 0.089 | test avg. loss: 183.821
Epochs: 5470 | epoch avg. loss: 0.108 | test avg. loss: 183.010
Epochs: 5471 | epoch avg. loss: 0.203 | test avg. loss: 183.669


 11%|█         | 5475/50000 [07:58<55:30, 13.37it/s]

Epochs: 5472 | epoch avg. loss: 0.192 | test avg. loss: 183.716
Epochs: 5473 | epoch avg. loss: 0.142 | test avg. loss: 182.562
Epochs: 5474 | epoch avg. loss: 0.143 | test avg. loss: 183.393


 11%|█         | 5477/50000 [07:58<54:57, 13.50it/s]

Epochs: 5475 | epoch avg. loss: 0.091 | test avg. loss: 183.892
Epochs: 5476 | epoch avg. loss: 0.078 | test avg. loss: 184.029
Epochs: 5477 | epoch avg. loss: 0.071 | test avg. loss: 184.857


 11%|█         | 5481/50000 [07:58<57:06, 12.99it/s]

Epochs: 5478 | epoch avg. loss: 0.148 | test avg. loss: 184.248
Epochs: 5479 | epoch avg. loss: 0.087 | test avg. loss: 184.071
Epochs: 5480 | epoch avg. loss: 0.082 | test avg. loss: 185.101


 11%|█         | 5483/50000 [07:59<1:01:02, 12.16it/s]

Epochs: 5481 | epoch avg. loss: 0.120 | test avg. loss: 185.081
Epochs: 5482 | epoch avg. loss: 0.139 | test avg. loss: 185.435
Epochs: 5483 | epoch avg. loss: 0.091 | test avg. loss: 186.035


 11%|█         | 5487/50000 [07:59<1:00:11, 12.33it/s]

Epochs: 5484 | epoch avg. loss: 0.086 | test avg. loss: 185.562
Epochs: 5485 | epoch avg. loss: 0.078 | test avg. loss: 186.163
Epochs: 5486 | epoch avg. loss: 0.084 | test avg. loss: 186.285


 11%|█         | 5489/50000 [07:59<58:46, 12.62it/s]

Epochs: 5487 | epoch avg. loss: 0.082 | test avg. loss: 186.074
Epochs: 5488 | epoch avg. loss: 0.091 | test avg. loss: 186.397
Epochs: 5489 | epoch avg. loss: 0.097 | test avg. loss: 186.105


 11%|█         | 5493/50000 [07:59<58:00, 12.79it/s]

Epochs: 5490 | epoch avg. loss: 0.067 | test avg. loss: 186.265
Epochs: 5491 | epoch avg. loss: 0.067 | test avg. loss: 186.819
Epochs: 5492 | epoch avg. loss: 0.082 | test avg. loss: 186.439


 11%|█         | 5495/50000 [07:59<1:00:39, 12.23it/s]

Epochs: 5493 | epoch avg. loss: 0.079 | test avg. loss: 186.515
Epochs: 5494 | epoch avg. loss: 0.073 | test avg. loss: 187.035


 11%|█         | 5497/50000 [08:00<1:03:19, 11.71it/s]

Epochs: 5495 | epoch avg. loss: 0.090 | test avg. loss: 187.057
Epochs: 5496 | epoch avg. loss: 0.074 | test avg. loss: 186.350
Epochs: 5497 | epoch avg. loss: 0.108 | test avg. loss: 187.571


 11%|█         | 5499/50000 [08:00<1:00:36, 12.24it/s]

Epochs: 5498 | epoch avg. loss: 0.124 | test avg. loss: 186.733
Epochs: 5499 | epoch avg. loss: 0.214 | test avg. loss: 186.980


 11%|█         | 5503/50000 [08:01<2:34:51,  4.79it/s]

Epochs: 5500 | epoch avg. loss: 0.113 | test avg. loss: 186.963
Epochs: 5501 | epoch avg. loss: 0.149 | test avg. loss: 185.282
Epochs: 5502 | epoch avg. loss: 0.666 | test avg. loss: 186.273


 11%|█         | 5506/50000 [08:02<1:53:36,  6.53it/s]

Epochs: 5503 | epoch avg. loss: 0.294 | test avg. loss: 187.142
Epochs: 5504 | epoch avg. loss: 0.214 | test avg. loss: 186.687
Epochs: 5505 | epoch avg. loss: 0.312 | test avg. loss: 189.302


 11%|█         | 5508/50000 [08:02<1:37:28,  7.61it/s]

Epochs: 5506 | epoch avg. loss: 0.641 | test avg. loss: 186.295
Epochs: 5507 | epoch avg. loss: 0.344 | test avg. loss: 185.466
Epochs: 5508 | epoch avg. loss: 0.289 | test avg. loss: 187.212


 11%|█         | 5512/50000 [08:02<1:12:19, 10.25it/s]

Epochs: 5509 | epoch avg. loss: 0.229 | test avg. loss: 185.996
Epochs: 5510 | epoch avg. loss: 0.187 | test avg. loss: 186.348
Epochs: 5511 | epoch avg. loss: 0.115 | test avg. loss: 186.227
Epochs: 5512 | epoch avg. loss: 0.149 | test avg. loss: 185.928


 11%|█         | 5516/50000 [08:02<1:00:11, 12.32it/s]

Epochs: 5513 | epoch avg. loss: 0.121 | test avg. loss: 186.175
Epochs: 5514 | epoch avg. loss: 0.088 | test avg. loss: 185.772
Epochs: 5515 | epoch avg. loss: 0.076 | test avg. loss: 186.166


                                                      

Epochs: 5516 | epoch avg. loss: 0.189 | test avg. loss: 184.645
Epochs: 5517 | epoch avg. loss: 0.162 | test avg. loss: 184.221


 11%|█         | 5520/50000 [08:03<1:03:17, 11.71it/s]

Epochs: 5518 | epoch avg. loss: 0.184 | test avg. loss: 186.313
Epochs: 5519 | epoch avg. loss: 0.505 | test avg. loss: 186.105
Epochs: 5520 | epoch avg. loss: 0.147 | test avg. loss: 185.562


 11%|█         | 5524/50000 [08:03<57:17, 12.94it/s]

Epochs: 5521 | epoch avg. loss: 0.255 | test avg. loss: 186.149
Epochs: 5522 | epoch avg. loss: 0.078 | test avg. loss: 185.950
Epochs: 5523 | epoch avg. loss: 0.062 | test avg. loss: 186.209
Epochs: 5524 | epoch avg. loss: 0.081 | test avg. loss: 186.099


 11%|█         | 5528/50000 [08:03<52:00, 14.25it/s]

Epochs: 5525 | epoch avg. loss: 0.070 | test avg. loss: 186.262
Epochs: 5526 | epoch avg. loss: 0.091 | test avg. loss: 185.401
Epochs: 5527 | epoch avg. loss: 0.060 | test avg. loss: 185.768
Epochs: 5528 | epoch avg. loss: 0.146 | test avg. loss: 185.340


 11%|█         | 5532/50000 [08:04<56:52, 13.03it/s]

Epochs: 5529 | epoch avg. loss: 0.116 | test avg. loss: 185.490
Epochs: 5530 | epoch avg. loss: 0.143 | test avg. loss: 187.080
Epochs: 5531 | epoch avg. loss: 0.205 | test avg. loss: 185.661


 11%|█         | 5534/50000 [08:04<58:29, 12.67it/s]

Epochs: 5532 | epoch avg. loss: 0.257 | test avg. loss: 186.185
Epochs: 5533 | epoch avg. loss: 0.226 | test avg. loss: 188.893
Epochs: 5534 | epoch avg. loss: 0.703 | test avg. loss: 184.859


 11%|█         | 5538/50000 [08:04<52:47, 14.04it/s]

Epochs: 5535 | epoch avg. loss: 0.146 | test avg. loss: 183.076
Epochs: 5536 | epoch avg. loss: 0.209 | test avg. loss: 184.088
Epochs: 5537 | epoch avg. loss: 0.268 | test avg. loss: 183.873
Epochs: 5538 | epoch avg. loss: 0.114 | test avg. loss: 184.263


 11%|█         | 5542/50000 [08:04<49:11, 15.06it/s]

Epochs: 5539 | epoch avg. loss: 0.142 | test avg. loss: 184.338
Epochs: 5540 | epoch avg. loss: 0.091 | test avg. loss: 184.128
Epochs: 5541 | epoch avg. loss: 0.106 | test avg. loss: 184.295
Epochs: 5542 | epoch avg. loss: 0.204 | test avg. loss: 183.027


 11%|█         | 5546/50000 [08:04<54:41, 13.55it/s]

Epochs: 5543 | epoch avg. loss: 0.139 | test avg. loss: 183.320
Epochs: 5544 | epoch avg. loss: 0.100 | test avg. loss: 183.974
Epochs: 5545 | epoch avg. loss: 0.096 | test avg. loss: 184.027


 11%|█         | 5548/50000 [08:05<52:32, 14.10it/s]

Epochs: 5546 | epoch avg. loss: 0.255 | test avg. loss: 184.872
Epochs: 5547 | epoch avg. loss: 0.131 | test avg. loss: 185.927
Epochs: 5548 | epoch avg. loss: 0.469 | test avg. loss: 184.699


 11%|█         | 5552/50000 [08:05<54:39, 13.55it/s]

Epochs: 5549 | epoch avg. loss: 0.194 | test avg. loss: 185.370
Epochs: 5550 | epoch avg. loss: 0.133 | test avg. loss: 186.140
Epochs: 5551 | epoch avg. loss: 0.083 | test avg. loss: 185.435


 11%|█         | 5554/50000 [08:05<53:54, 13.74it/s]

Epochs: 5552 | epoch avg. loss: 0.101 | test avg. loss: 185.956
Epochs: 5553 | epoch avg. loss: 0.155 | test avg. loss: 185.197
Epochs: 5554 | epoch avg. loss: 0.096 | test avg. loss: 184.248


 11%|█         | 5558/50000 [08:05<55:55, 13.24it/s]

Epochs: 5555 | epoch avg. loss: 0.159 | test avg. loss: 184.700
Epochs: 5556 | epoch avg. loss: 0.055 | test avg. loss: 184.086
Epochs: 5557 | epoch avg. loss: 0.118 | test avg. loss: 185.309


 11%|█         | 5560/50000 [08:06<58:59, 12.55it/s]

Epochs: 5558 | epoch avg. loss: 0.272 | test avg. loss: 183.834
Epochs: 5559 | epoch avg. loss: 0.658 | test avg. loss: 183.174
Epochs: 5560 | epoch avg. loss: 0.352 | test avg. loss: 185.626


 11%|█         | 5564/50000 [08:06<58:49, 12.59it/s]

Epochs: 5561 | epoch avg. loss: 0.584 | test avg. loss: 183.913
Epochs: 5562 | epoch avg. loss: 0.713 | test avg. loss: 184.761
Epochs: 5563 | epoch avg. loss: 0.293 | test avg. loss: 187.783


 11%|█         | 5566/50000 [08:06<59:05, 12.53it/s]

Epochs: 5564 | epoch avg. loss: 0.755 | test avg. loss: 183.318
Epochs: 5565 | epoch avg. loss: 0.495 | test avg. loss: 182.499
Epochs: 5566 | epoch avg. loss: 0.346 | test avg. loss: 185.263


 11%|█         | 5570/50000 [08:06<59:59, 12.34it/s]

Epochs: 5567 | epoch avg. loss: 1.005 | test avg. loss: 181.632
Epochs: 5568 | epoch avg. loss: 0.776 | test avg. loss: 182.128
Epochs: 5569 | epoch avg. loss: 0.550 | test avg. loss: 187.131


                                                      

Epochs: 5570 | epoch avg. loss: 1.852 | test avg. loss: 183.409
Epochs: 5571 | epoch avg. loss: 1.757 | test avg. loss: 183.490


 11%|█         | 5574/50000 [08:07<1:09:04, 10.72it/s]

Epochs: 5572 | epoch avg. loss: 0.555 | test avg. loss: 182.776
Epochs: 5573 | epoch avg. loss: 0.472 | test avg. loss: 181.306
Epochs: 5574 | epoch avg. loss: 0.694 | test avg. loss: 186.271


 11%|█         | 5578/50000 [08:07<1:02:17, 11.89it/s]

Epochs: 5575 | epoch avg. loss: 2.214 | test avg. loss: 182.190
Epochs: 5576 | epoch avg. loss: 1.268 | test avg. loss: 183.907
Epochs: 5577 | epoch avg. loss: 1.142 | test avg. loss: 190.127


 11%|█         | 5580/50000 [08:07<58:57, 12.56it/s]

Epochs: 5578 | epoch avg. loss: 1.426 | test avg. loss: 185.902
Epochs: 5579 | epoch avg. loss: 0.875 | test avg. loss: 188.251
Epochs: 5580 | epoch avg. loss: 0.887 | test avg. loss: 189.109


 11%|█         | 5584/50000 [08:08<58:48, 12.59it/s]

Epochs: 5581 | epoch avg. loss: 0.345 | test avg. loss: 188.559
Epochs: 5582 | epoch avg. loss: 0.273 | test avg. loss: 191.315
Epochs: 5583 | epoch avg. loss: 0.467 | test avg. loss: 189.127


 11%|█         | 5586/50000 [08:08<1:04:10, 11.53it/s]

Epochs: 5584 | epoch avg. loss: 0.416 | test avg. loss: 188.561
Epochs: 5585 | epoch avg. loss: 0.353 | test avg. loss: 191.716
Epochs: 5586 | epoch avg. loss: 1.089 | test avg. loss: 188.372


 11%|█         | 5590/50000 [08:08<1:02:04, 11.92it/s]

Epochs: 5587 | epoch avg. loss: 0.411 | test avg. loss: 189.020
Epochs: 5588 | epoch avg. loss: 0.205 | test avg. loss: 187.782
Epochs: 5589 | epoch avg. loss: 0.180 | test avg. loss: 184.087


 11%|█         | 5592/50000 [08:08<1:01:30, 12.03it/s]

Epochs: 5590 | epoch avg. loss: 0.223 | test avg. loss: 182.796
Epochs: 5591 | epoch avg. loss: 0.136 | test avg. loss: 181.330
Epochs: 5592 | epoch avg. loss: 0.273 | test avg. loss: 182.056


 11%|█         | 5596/50000 [08:09<1:03:34, 11.64it/s]

Epochs: 5593 | epoch avg. loss: 0.298 | test avg. loss: 186.349
Epochs: 5594 | epoch avg. loss: 0.564 | test avg. loss: 186.047
Epochs: 5595 | epoch avg. loss: 0.137 | test avg. loss: 185.768


 11%|█         | 5598/50000 [08:09<1:04:27, 11.48it/s]

Epochs: 5596 | epoch avg. loss: 0.137 | test avg. loss: 186.363
Epochs: 5597 | epoch avg. loss: 0.154 | test avg. loss: 185.291
Epochs: 5598 | epoch avg. loss: 0.352 | test avg. loss: 186.013


 11%|█         | 5598/50000 [08:09<1:04:27, 11.48it/s]

Epochs: 5599 | epoch avg. loss: 0.195 | test avg. loss: 187.981


 11%|█         | 5602/50000 [08:10<2:33:52,  4.81it/s]

Epochs: 5600 | epoch avg. loss: 0.388 | test avg. loss: 184.129
Epochs: 5601 | epoch avg. loss: 0.678 | test avg. loss: 183.501
Epochs: 5602 | epoch avg. loss: 0.391 | test avg. loss: 183.644


 11%|█         | 5606/50000 [08:11<1:43:42,  7.13it/s]

Epochs: 5603 | epoch avg. loss: 0.798 | test avg. loss: 181.143
Epochs: 5604 | epoch avg. loss: 0.318 | test avg. loss: 183.380
Epochs: 5605 | epoch avg. loss: 0.357 | test avg. loss: 187.114


 11%|█         | 5610/50000 [08:11<1:15:55,  9.74it/s]

Epochs: 5606 | epoch avg. loss: 0.794 | test avg. loss: 183.979
Epochs: 5607 | epoch avg. loss: 0.388 | test avg. loss: 184.073
Epochs: 5608 | epoch avg. loss: 0.190 | test avg. loss: 184.862
Epochs: 5609 | epoch avg. loss: 0.329 | test avg. loss: 182.885


 11%|█         | 5612/50000 [08:11<1:07:28, 10.96it/s]

Epochs: 5610 | epoch avg. loss: 0.155 | test avg. loss: 184.585
Epochs: 5611 | epoch avg. loss: 0.275 | test avg. loss: 183.593
Epochs: 5612 | epoch avg. loss: 0.899 | test avg. loss: 183.441
Epochs: 5613 | epoch avg. loss: 0.378 | test avg. loss: 184.109


 11%|█         | 5616/50000 [08:11<1:01:40, 12.00it/s]

Epochs: 5614 | epoch avg. loss: 0.542 | test avg. loss: 181.514
Epochs: 5615 | epoch avg. loss: 0.353 | test avg. loss: 182.602
Epochs: 5616 | epoch avg. loss: 0.770 | test avg. loss: 182.874


 11%|█         | 5620/50000 [08:12<1:01:36, 12.01it/s]

Epochs: 5617 | epoch avg. loss: 0.472 | test avg. loss: 182.363
Epochs: 5618 | epoch avg. loss: 1.262 | test avg. loss: 186.506
Epochs: 5619 | epoch avg. loss: 1.353 | test avg. loss: 185.135


                                                    

Epochs: 5620 | epoch avg. loss: 0.463 | test avg. loss: 183.961
Epochs: 5621 | epoch avg. loss: 0.818 | test avg. loss: 187.257
Epochs: 5622 | epoch avg. loss: 1.120 | test avg. loss: 186.008


 11%|█▏        | 5626/50000 [08:12<56:08, 13.17it/s]

Epochs: 5623 | epoch avg. loss: 0.440 | test avg. loss: 187.200
Epochs: 5624 | epoch avg. loss: 0.519 | test avg. loss: 190.843
Epochs: 5625 | epoch avg. loss: 0.629 | test avg. loss: 186.595


 11%|█▏        | 5628/50000 [08:12<55:08, 13.41it/s]

Epochs: 5626 | epoch avg. loss: 1.216 | test avg. loss: 185.762
Epochs: 5627 | epoch avg. loss: 0.448 | test avg. loss: 189.246
Epochs: 5628 | epoch avg. loss: 1.364 | test avg. loss: 184.279


 11%|█▏        | 5632/50000 [08:13<56:12, 13.16it/s]

Epochs: 5629 | epoch avg. loss: 1.367 | test avg. loss: 185.832
Epochs: 5630 | epoch avg. loss: 0.518 | test avg. loss: 190.727
Epochs: 5631 | epoch avg. loss: 0.996 | test avg. loss: 187.797


 11%|█▏        | 5634/50000 [08:13<57:14, 12.92it/s]

Epochs: 5632 | epoch avg. loss: 1.561 | test avg. loss: 187.577
Epochs: 5633 | epoch avg. loss: 0.541 | test avg. loss: 190.149
Epochs: 5634 | epoch avg. loss: 1.019 | test avg. loss: 186.273


                                                    

Epochs: 5635 | epoch avg. loss: 1.379 | test avg. loss: 187.463
Epochs: 5636 | epoch avg. loss: 0.466 | test avg. loss: 185.557
Epochs: 5637 | epoch avg. loss: 0.649 | test avg. loss: 183.424


 11%|█▏        | 5640/50000 [08:13<53:48, 13.74it/s]

Epochs: 5638 | epoch avg. loss: 0.765 | test avg. loss: 188.203
Epochs: 5639 | epoch avg. loss: 1.981 | test avg. loss: 179.336
Epochs: 5640 | epoch avg. loss: 2.355 | test avg. loss: 180.544
Epochs: 5641 | epoch avg. loss: 1.199 | test avg. loss: 184.969




Epochs: 5642 | epoch avg. loss: 1.225 | test avg. loss: 182.831
Epochs: 5643 | epoch avg. loss: 2.319 | test avg. loss: 189.595


 11%|█▏        | 5646/50000 [08:14<58:44, 12.58it/s]

Epochs: 5644 | epoch avg. loss: 3.203 | test avg. loss: 185.893
Epochs: 5645 | epoch avg. loss: 1.631 | test avg. loss: 185.896
Epochs: 5646 | epoch avg. loss: 0.926 | test avg. loss: 187.207


 11%|█▏        | 5650/50000 [08:14<56:30, 13.08it/s]

Epochs: 5647 | epoch avg. loss: 1.203 | test avg. loss: 184.278
Epochs: 5648 | epoch avg. loss: 4.101 | test avg. loss: 192.830
Epochs: 5649 | epoch avg. loss: 2.276 | test avg. loss: 191.877


                                                    

Epochs: 5650 | epoch avg. loss: 3.881 | test avg. loss: 191.239
Epochs: 5651 | epoch avg. loss: 2.822 | test avg. loss: 195.112
Epochs: 5652 | epoch avg. loss: 3.861 | test avg. loss: 190.983


 11%|█▏        | 5656/50000 [08:14<52:14, 14.15it/s]

Epochs: 5653 | epoch avg. loss: 5.255 | test avg. loss: 189.956
Epochs: 5654 | epoch avg. loss: 3.484 | test avg. loss: 181.727
Epochs: 5655 | epoch avg. loss: 2.271 | test avg. loss: 184.236
Epochs: 5656 | epoch avg. loss: 2.058 | test avg. loss: 182.312


 11%|█▏        | 5660/50000 [08:15<55:52, 13.23it/s]

Epochs: 5657 | epoch avg. loss: 2.928 | test avg. loss: 183.055
Epochs: 5658 | epoch avg. loss: 0.935 | test avg. loss: 183.645
Epochs: 5659 | epoch avg. loss: 1.382 | test avg. loss: 184.212


 11%|█▏        | 5662/50000 [08:15<55:24, 13.34it/s]

Epochs: 5660 | epoch avg. loss: 1.398 | test avg. loss: 188.416
Epochs: 5661 | epoch avg. loss: 2.033 | test avg. loss: 183.663
Epochs: 5662 | epoch avg. loss: 3.883 | test avg. loss: 190.070


 11%|█▏        | 5666/50000 [08:15<53:13, 13.88it/s]

Epochs: 5663 | epoch avg. loss: 4.328 | test avg. loss: 179.683
Epochs: 5664 | epoch avg. loss: 2.984 | test avg. loss: 183.313
Epochs: 5665 | epoch avg. loss: 2.486 | test avg. loss: 180.564


 11%|█▏        | 5668/50000 [08:15<52:15, 14.14it/s]

Epochs: 5666 | epoch avg. loss: 1.544 | test avg. loss: 185.369
Epochs: 5667 | epoch avg. loss: 1.658 | test avg. loss: 186.163
Epochs: 5668 | epoch avg. loss: 0.839 | test avg. loss: 185.496


 11%|█▏        | 5672/50000 [08:15<54:15, 13.62it/s]

Epochs: 5669 | epoch avg. loss: 0.684 | test avg. loss: 188.098
Epochs: 5670 | epoch avg. loss: 0.785 | test avg. loss: 186.794
Epochs: 5671 | epoch avg. loss: 1.143 | test avg. loss: 191.558


 11%|█▏        | 5674/50000 [08:16<57:39, 12.81it/s]

Epochs: 5672 | epoch avg. loss: 0.795 | test avg. loss: 189.031
Epochs: 5673 | epoch avg. loss: 1.295 | test avg. loss: 192.734
Epochs: 5674 | epoch avg. loss: 0.798 | test avg. loss: 188.945


 11%|█▏        | 5678/50000 [08:16<52:52, 13.97it/s]

Epochs: 5675 | epoch avg. loss: 0.548 | test avg. loss: 189.230
Epochs: 5676 | epoch avg. loss: 0.612 | test avg. loss: 186.527
Epochs: 5677 | epoch avg. loss: 0.491 | test avg. loss: 185.475
Epochs: 5678 | epoch avg. loss: 0.702 | test avg. loss: 188.541


 11%|█▏        | 5682/50000 [08:16<52:27, 14.08it/s]

Epochs: 5679 | epoch avg. loss: 1.038 | test avg. loss: 185.863
Epochs: 5680 | epoch avg. loss: 1.031 | test avg. loss: 190.214
Epochs: 5681 | epoch avg. loss: 1.160 | test avg. loss: 187.220


 11%|█▏        | 5684/50000 [08:16<51:19, 14.39it/s]

Epochs: 5682 | epoch avg. loss: 0.522 | test avg. loss: 189.536
Epochs: 5683 | epoch avg. loss: 0.442 | test avg. loss: 189.098
Epochs: 5684 | epoch avg. loss: 0.231 | test avg. loss: 189.577


 11%|█▏        | 5688/50000 [08:17<57:42, 12.80it/s]

Epochs: 5685 | epoch avg. loss: 0.146 | test avg. loss: 190.351
Epochs: 5686 | epoch avg. loss: 0.156 | test avg. loss: 191.183
Epochs: 5687 | epoch avg. loss: 0.171 | test avg. loss: 191.708


 11%|█▏        | 5690/50000 [08:17<58:36, 12.60it/s]

Epochs: 5688 | epoch avg. loss: 0.242 | test avg. loss: 190.964
Epochs: 5689 | epoch avg. loss: 0.376 | test avg. loss: 193.962
Epochs: 5690 | epoch avg. loss: 0.478 | test avg. loss: 191.762


 11%|█▏        | 5694/50000 [08:17<56:03, 13.17it/s]

Epochs: 5691 | epoch avg. loss: 0.434 | test avg. loss: 193.637
Epochs: 5692 | epoch avg. loss: 0.197 | test avg. loss: 193.600
Epochs: 5693 | epoch avg. loss: 0.145 | test avg. loss: 194.615


 11%|█▏        | 5696/50000 [08:17<55:20, 13.34it/s]

Epochs: 5694 | epoch avg. loss: 0.114 | test avg. loss: 195.140
Epochs: 5695 | epoch avg. loss: 0.113 | test avg. loss: 195.486
Epochs: 5696 | epoch avg. loss: 0.128 | test avg. loss: 196.298


 11%|█▏        | 5698/50000 [08:18<56:42, 13.02it/s]

Epochs: 5697 | epoch avg. loss: 0.091 | test avg. loss: 196.808
Epochs: 5698 | epoch avg. loss: 0.109 | test avg. loss: 198.311
Epochs: 5699 | epoch avg. loss: 0.211 | test avg. loss: 196.652


 11%|█▏        | 5702/50000 [08:19<2:26:58,  5.02it/s]

Epochs: 5700 | epoch avg. loss: 0.268 | test avg. loss: 198.052
Epochs: 5701 | epoch avg. loss: 0.279 | test avg. loss: 197.128
Epochs: 5702 | epoch avg. loss: 0.203 | test avg. loss: 197.961


 11%|█▏        | 5706/50000 [08:19<1:39:32,  7.42it/s]

Epochs: 5703 | epoch avg. loss: 0.179 | test avg. loss: 197.744
Epochs: 5704 | epoch avg. loss: 0.143 | test avg. loss: 196.600
Epochs: 5705 | epoch avg. loss: 0.130 | test avg. loss: 196.728


 11%|█▏        | 5708/50000 [08:19<1:27:27,  8.44it/s]

Epochs: 5706 | epoch avg. loss: 0.141 | test avg. loss: 195.427
Epochs: 5707 | epoch avg. loss: 0.213 | test avg. loss: 196.344
Epochs: 5708 | epoch avg. loss: 0.122 | test avg. loss: 195.041


 11%|█▏        | 5712/50000 [08:20<1:12:56, 10.12it/s]

Epochs: 5709 | epoch avg. loss: 0.213 | test avg. loss: 196.664
Epochs: 5710 | epoch avg. loss: 0.286 | test avg. loss: 194.781
Epochs: 5711 | epoch avg. loss: 0.426 | test avg. loss: 195.316


 11%|█▏        | 5714/50000 [08:20<1:09:09, 10.67it/s]

Epochs: 5712 | epoch avg. loss: 0.239 | test avg. loss: 195.040
Epochs: 5713 | epoch avg. loss: 0.385 | test avg. loss: 195.124
Epochs: 5714 | epoch avg. loss: 0.363 | test avg. loss: 198.418


 11%|█▏        | 5718/50000 [08:20<59:10, 12.47it/s]

Epochs: 5715 | epoch avg. loss: 0.571 | test avg. loss: 195.993
Epochs: 5716 | epoch avg. loss: 0.623 | test avg. loss: 200.661
Epochs: 5717 | epoch avg. loss: 1.124 | test avg. loss: 198.068
Epochs: 5718 | epoch avg. loss: 1.214 | test avg. loss: 200.996


 11%|█▏        | 5722/50000 [08:20<58:00, 12.72it/s]

Epochs: 5719 | epoch avg. loss: 0.933 | test avg. loss: 198.143
Epochs: 5720 | epoch avg. loss: 0.808 | test avg. loss: 196.386
Epochs: 5721 | epoch avg. loss: 0.503 | test avg. loss: 196.849


 11%|█▏        | 5724/50000 [08:21<1:01:45, 11.95it/s]

Epochs: 5722 | epoch avg. loss: 0.404 | test avg. loss: 194.888
Epochs: 5723 | epoch avg. loss: 0.699 | test avg. loss: 199.221
Epochs: 5724 | epoch avg. loss: 1.071 | test avg. loss: 196.135


 11%|█▏        | 5728/50000 [08:21<59:57, 12.30it/s]  

Epochs: 5725 | epoch avg. loss: 1.078 | test avg. loss: 198.848
Epochs: 5726 | epoch avg. loss: 0.849 | test avg. loss: 196.059
Epochs: 5727 | epoch avg. loss: 0.436 | test avg. loss: 195.142


 11%|█▏        | 5732/50000 [08:21<54:12, 13.61it/s]

Epochs: 5728 | epoch avg. loss: 0.592 | test avg. loss: 199.648
Epochs: 5729 | epoch avg. loss: 1.083 | test avg. loss: 195.986
Epochs: 5730 | epoch avg. loss: 0.985 | test avg. loss: 200.726
Epochs: 5731 | epoch avg. loss: 1.043 | test avg. loss: 196.317


 11%|█▏        | 5734/50000 [08:21<53:06, 13.89it/s]

Epochs: 5732 | epoch avg. loss: 0.594 | test avg. loss: 199.268
Epochs: 5733 | epoch avg. loss: 0.460 | test avg. loss: 198.899
Epochs: 5734 | epoch avg. loss: 0.356 | test avg. loss: 197.262


 11%|█▏        | 5738/50000 [08:22<59:31, 12.39it/s]

Epochs: 5735 | epoch avg. loss: 0.168 | test avg. loss: 196.393
Epochs: 5736 | epoch avg. loss: 0.161 | test avg. loss: 195.635
Epochs: 5737 | epoch avg. loss: 0.310 | test avg. loss: 196.289


 11%|█▏        | 5740/50000 [08:22<1:01:07, 12.07it/s]

Epochs: 5738 | epoch avg. loss: 0.370 | test avg. loss: 194.003
Epochs: 5739 | epoch avg. loss: 0.456 | test avg. loss: 196.930
Epochs: 5740 | epoch avg. loss: 0.528 | test avg. loss: 194.142


 11%|█▏        | 5744/50000 [08:22<58:45, 12.55it/s]  

Epochs: 5741 | epoch avg. loss: 0.670 | test avg. loss: 196.493
Epochs: 5742 | epoch avg. loss: 0.551 | test avg. loss: 194.967
Epochs: 5743 | epoch avg. loss: 0.372 | test avg. loss: 195.819


 11%|█▏        | 5746/50000 [08:22<57:50, 12.75it/s]

Epochs: 5744 | epoch avg. loss: 0.454 | test avg. loss: 194.724
Epochs: 5745 | epoch avg. loss: 0.509 | test avg. loss: 194.302
Epochs: 5746 | epoch avg. loss: 0.330 | test avg. loss: 195.603


 12%|█▏        | 5750/50000 [08:23<1:01:34, 11.98it/s]

Epochs: 5747 | epoch avg. loss: 0.317 | test avg. loss: 194.090
Epochs: 5748 | epoch avg. loss: 0.309 | test avg. loss: 196.254
Epochs: 5749 | epoch avg. loss: 0.239 | test avg. loss: 195.409


 12%|█▏        | 5752/50000 [08:23<1:01:00, 12.09it/s]

Epochs: 5750 | epoch avg. loss: 0.255 | test avg. loss: 197.324
Epochs: 5751 | epoch avg. loss: 0.186 | test avg. loss: 196.857
Epochs: 5752 | epoch avg. loss: 0.275 | test avg. loss: 197.770


 12%|█▏        | 5756/50000 [08:23<58:50, 12.53it/s]  

Epochs: 5753 | epoch avg. loss: 0.155 | test avg. loss: 198.043
Epochs: 5754 | epoch avg. loss: 0.177 | test avg. loss: 197.195
Epochs: 5755 | epoch avg. loss: 0.253 | test avg. loss: 199.311


 12%|█▏        | 5758/50000 [08:23<59:11, 12.46it/s]

Epochs: 5756 | epoch avg. loss: 0.357 | test avg. loss: 197.851
Epochs: 5757 | epoch avg. loss: 0.261 | test avg. loss: 199.685
Epochs: 5758 | epoch avg. loss: 0.245 | test avg. loss: 198.661


 12%|█▏        | 5762/50000 [08:24<1:02:18, 11.83it/s]

Epochs: 5759 | epoch avg. loss: 0.094 | test avg. loss: 198.863
Epochs: 5760 | epoch avg. loss: 0.083 | test avg. loss: 198.211
Epochs: 5761 | epoch avg. loss: 0.140 | test avg. loss: 199.164


 12%|█▏        | 5764/50000 [08:24<1:00:23, 12.21it/s]

Epochs: 5762 | epoch avg. loss: 0.208 | test avg. loss: 197.806
Epochs: 5763 | epoch avg. loss: 0.117 | test avg. loss: 197.804
Epochs: 5764 | epoch avg. loss: 0.141 | test avg. loss: 197.145


 12%|█▏        | 5768/50000 [08:24<55:44, 13.23it/s]

Epochs: 5765 | epoch avg. loss: 0.114 | test avg. loss: 195.589
Epochs: 5766 | epoch avg. loss: 0.093 | test avg. loss: 195.536
Epochs: 5767 | epoch avg. loss: 0.103 | test avg. loss: 194.629


 12%|█▏        | 5770/50000 [08:24<56:35, 13.03it/s]

Epochs: 5768 | epoch avg. loss: 0.185 | test avg. loss: 196.424
Epochs: 5769 | epoch avg. loss: 0.242 | test avg. loss: 195.576
Epochs: 5770 | epoch avg. loss: 0.246 | test avg. loss: 197.028


 12%|█▏        | 5774/50000 [08:25<56:05, 13.14it/s]

Epochs: 5771 | epoch avg. loss: 0.118 | test avg. loss: 197.232
Epochs: 5772 | epoch avg. loss: 0.141 | test avg. loss: 197.963
Epochs: 5773 | epoch avg. loss: 0.121 | test avg. loss: 198.692


 12%|█▏        | 5776/50000 [08:25<54:06, 13.62it/s]

Epochs: 5774 | epoch avg. loss: 0.135 | test avg. loss: 197.936
Epochs: 5775 | epoch avg. loss: 0.198 | test avg. loss: 199.859
Epochs: 5776 | epoch avg. loss: 0.321 | test avg. loss: 199.008


 12%|█▏        | 5780/50000 [08:25<51:42, 14.25it/s]

Epochs: 5777 | epoch avg. loss: 0.172 | test avg. loss: 199.758
Epochs: 5778 | epoch avg. loss: 0.120 | test avg. loss: 200.188
Epochs: 5779 | epoch avg. loss: 0.110 | test avg. loss: 199.941


 12%|█▏        | 5782/50000 [08:25<53:59, 13.65it/s]

Epochs: 5780 | epoch avg. loss: 0.155 | test avg. loss: 200.615
Epochs: 5781 | epoch avg. loss: 0.082 | test avg. loss: 199.574
Epochs: 5782 | epoch avg. loss: 0.077 | test avg. loss: 200.288


 12%|█▏        | 5786/50000 [08:25<58:38, 12.57it/s]

Epochs: 5783 | epoch avg. loss: 0.094 | test avg. loss: 200.762
Epochs: 5784 | epoch avg. loss: 0.136 | test avg. loss: 200.689
Epochs: 5785 | epoch avg. loss: 0.096 | test avg. loss: 200.381


 12%|█▏        | 5788/50000 [08:26<58:15, 12.65it/s]

Epochs: 5786 | epoch avg. loss: 0.110 | test avg. loss: 198.863
Epochs: 5787 | epoch avg. loss: 0.191 | test avg. loss: 199.988
Epochs: 5788 | epoch avg. loss: 0.230 | test avg. loss: 198.172


 12%|█▏        | 5792/50000 [08:26<1:02:15, 11.84it/s]

Epochs: 5789 | epoch avg. loss: 0.351 | test avg. loss: 199.690
Epochs: 5790 | epoch avg. loss: 0.221 | test avg. loss: 198.253
Epochs: 5791 | epoch avg. loss: 0.307 | test avg. loss: 199.436


 12%|█▏        | 5794/50000 [08:26<1:02:04, 11.87it/s]

Epochs: 5792 | epoch avg. loss: 0.216 | test avg. loss: 200.637
Epochs: 5793 | epoch avg. loss: 0.230 | test avg. loss: 199.304
Epochs: 5794 | epoch avg. loss: 0.334 | test avg. loss: 201.332


 12%|█▏        | 5798/50000 [08:27<1:03:20, 11.63it/s]

Epochs: 5795 | epoch avg. loss: 0.551 | test avg. loss: 198.474
Epochs: 5796 | epoch avg. loss: 0.414 | test avg. loss: 198.803
Epochs: 5797 | epoch avg. loss: 0.159 | test avg. loss: 198.830


 12%|█▏        | 5798/50000 [08:27<1:03:20, 11.63it/s]

Epochs: 5798 | epoch avg. loss: 0.152 | test avg. loss: 197.457
Epochs: 5799 | epoch avg. loss: 0.184 | test avg. loss: 198.686


 12%|█▏        | 5802/50000 [08:28<2:53:13,  4.25it/s]

Epochs: 5800 | epoch avg. loss: 0.134 | test avg. loss: 197.516
Epochs: 5801 | epoch avg. loss: 0.385 | test avg. loss: 198.692
Epochs: 5802 | epoch avg. loss: 0.157 | test avg. loss: 198.338


 12%|█▏        | 5806/50000 [08:29<1:53:15,  6.50it/s]

Epochs: 5803 | epoch avg. loss: 0.139 | test avg. loss: 199.378
Epochs: 5804 | epoch avg. loss: 0.131 | test avg. loss: 201.228
Epochs: 5805 | epoch avg. loss: 0.163 | test avg. loss: 200.403


 12%|█▏        | 5808/50000 [08:29<1:38:13,  7.50it/s]

Epochs: 5806 | epoch avg. loss: 0.275 | test avg. loss: 202.091
Epochs: 5807 | epoch avg. loss: 0.173 | test avg. loss: 201.309
Epochs: 5808 | epoch avg. loss: 0.137 | test avg. loss: 202.115


 12%|█▏        | 5812/50000 [08:29<1:17:16,  9.53it/s]

Epochs: 5809 | epoch avg. loss: 0.203 | test avg. loss: 200.896
Epochs: 5810 | epoch avg. loss: 0.118 | test avg. loss: 200.713
Epochs: 5811 | epoch avg. loss: 0.114 | test avg. loss: 202.103


 12%|█▏        | 5814/50000 [08:29<1:10:25, 10.46it/s]

Epochs: 5812 | epoch avg. loss: 0.176 | test avg. loss: 200.357
Epochs: 5813 | epoch avg. loss: 0.148 | test avg. loss: 199.775
Epochs: 5814 | epoch avg. loss: 0.085 | test avg. loss: 199.117


 12%|█▏        | 5818/50000 [08:30<1:04:27, 11.42it/s]

Epochs: 5815 | epoch avg. loss: 0.105 | test avg. loss: 199.804
Epochs: 5816 | epoch avg. loss: 0.081 | test avg. loss: 199.785
Epochs: 5817 | epoch avg. loss: 0.088 | test avg. loss: 200.731


 12%|█▏        | 5820/50000 [08:30<1:02:23, 11.80it/s]

Epochs: 5818 | epoch avg. loss: 0.071 | test avg. loss: 200.746
Epochs: 5819 | epoch avg. loss: 0.140 | test avg. loss: 200.725
Epochs: 5820 | epoch avg. loss: 0.077 | test avg. loss: 200.177


 12%|█▏        | 5824/50000 [08:30<55:06, 13.36it/s]

Epochs: 5821 | epoch avg. loss: 0.115 | test avg. loss: 199.042
Epochs: 5822 | epoch avg. loss: 0.204 | test avg. loss: 199.797
Epochs: 5823 | epoch avg. loss: 0.107 | test avg. loss: 198.263
Epochs: 5824 | epoch avg. loss: 0.357 | test avg. loss: 199.789


 12%|█▏        | 5828/50000 [08:30<53:04, 13.87it/s]

Epochs: 5825 | epoch avg. loss: 0.205 | test avg. loss: 198.708
Epochs: 5826 | epoch avg. loss: 0.105 | test avg. loss: 198.483
Epochs: 5827 | epoch avg. loss: 0.125 | test avg. loss: 200.306


 12%|█▏        | 5830/50000 [08:30<55:01, 13.38it/s]

Epochs: 5828 | epoch avg. loss: 0.282 | test avg. loss: 198.594
Epochs: 5829 | epoch avg. loss: 0.254 | test avg. loss: 199.135
Epochs: 5830 | epoch avg. loss: 0.065 | test avg. loss: 198.620


 12%|█▏        | 5834/50000 [08:31<56:01, 13.14it/s]

Epochs: 5831 | epoch avg. loss: 0.109 | test avg. loss: 198.344
Epochs: 5832 | epoch avg. loss: 0.104 | test avg. loss: 199.438
Epochs: 5833 | epoch avg. loss: 0.159 | test avg. loss: 198.189


 12%|█▏        | 5836/50000 [08:31<54:49, 13.42it/s]

Epochs: 5834 | epoch avg. loss: 0.132 | test avg. loss: 198.518
Epochs: 5835 | epoch avg. loss: 0.081 | test avg. loss: 198.148
Epochs: 5836 | epoch avg. loss: 0.103 | test avg. loss: 197.618


 12%|█▏        | 5840/50000 [08:31<56:17, 13.07it/s]

Epochs: 5837 | epoch avg. loss: 0.131 | test avg. loss: 199.039
Epochs: 5838 | epoch avg. loss: 0.324 | test avg. loss: 196.399
Epochs: 5839 | epoch avg. loss: 0.453 | test avg. loss: 197.599


 12%|█▏        | 5842/50000 [08:31<59:30, 12.37it/s]

Epochs: 5840 | epoch avg. loss: 0.400 | test avg. loss: 195.985
Epochs: 5841 | epoch avg. loss: 0.245 | test avg. loss: 195.449
Epochs: 5842 | epoch avg. loss: 0.124 | test avg. loss: 195.292


 12%|█▏        | 5846/50000 [08:32<1:01:47, 11.91it/s]

Epochs: 5843 | epoch avg. loss: 0.118 | test avg. loss: 194.797
Epochs: 5844 | epoch avg. loss: 0.136 | test avg. loss: 196.185
Epochs: 5845 | epoch avg. loss: 0.233 | test avg. loss: 194.852


 12%|█▏        | 5848/50000 [08:32<1:00:03, 12.25it/s]

Epochs: 5846 | epoch avg. loss: 0.106 | test avg. loss: 195.091
Epochs: 5847 | epoch avg. loss: 0.063 | test avg. loss: 195.361
Epochs: 5848 | epoch avg. loss: 0.062 | test avg. loss: 195.924


 12%|█▏        | 5852/50000 [08:32<53:56, 13.64it/s]

Epochs: 5849 | epoch avg. loss: 0.060 | test avg. loss: 196.237
Epochs: 5850 | epoch avg. loss: 0.092 | test avg. loss: 197.836
Epochs: 5851 | epoch avg. loss: 0.131 | test avg. loss: 198.213
Epochs: 5852 | epoch avg. loss: 0.097 | test avg. loss: 198.076


 12%|█▏        | 5856/50000 [08:32<55:11, 13.33it/s]

Epochs: 5853 | epoch avg. loss: 0.158 | test avg. loss: 198.961
Epochs: 5854 | epoch avg. loss: 0.064 | test avg. loss: 197.923
Epochs: 5855 | epoch avg. loss: 0.204 | test avg. loss: 198.213


 12%|█▏        | 5858/50000 [08:33<58:22, 12.60it/s]

Epochs: 5856 | epoch avg. loss: 0.143 | test avg. loss: 198.214
Epochs: 5857 | epoch avg. loss: 0.145 | test avg. loss: 196.946
Epochs: 5858 | epoch avg. loss: 0.186 | test avg. loss: 199.493


 12%|█▏        | 5862/50000 [08:33<57:33, 12.78it/s]

Epochs: 5859 | epoch avg. loss: 0.372 | test avg. loss: 198.211
Epochs: 5860 | epoch avg. loss: 0.321 | test avg. loss: 199.858
Epochs: 5861 | epoch avg. loss: 0.176 | test avg. loss: 198.537


 12%|█▏        | 5864/50000 [08:33<56:29, 13.02it/s]

Epochs: 5862 | epoch avg. loss: 0.569 | test avg. loss: 199.387
Epochs: 5863 | epoch avg. loss: 0.579 | test avg. loss: 201.602
Epochs: 5864 | epoch avg. loss: 0.312 | test avg. loss: 197.157


 12%|█▏        | 5868/50000 [08:33<55:27, 13.26it/s]

Epochs: 5865 | epoch avg. loss: 0.358 | test avg. loss: 196.940
Epochs: 5866 | epoch avg. loss: 0.402 | test avg. loss: 195.300
Epochs: 5867 | epoch avg. loss: 0.230 | test avg. loss: 195.431


 12%|█▏        | 5870/50000 [08:34<57:26, 12.80it/s]

Epochs: 5868 | epoch avg. loss: 0.274 | test avg. loss: 196.986
Epochs: 5869 | epoch avg. loss: 0.244 | test avg. loss: 196.175
Epochs: 5870 | epoch avg. loss: 0.243 | test avg. loss: 197.596


 12%|█▏        | 5874/50000 [08:34<1:00:27, 12.17it/s]

Epochs: 5871 | epoch avg. loss: 0.135 | test avg. loss: 196.455
Epochs: 5872 | epoch avg. loss: 0.118 | test avg. loss: 196.224
Epochs: 5873 | epoch avg. loss: 0.145 | test avg. loss: 196.633


 12%|█▏        | 5876/50000 [08:34<59:41, 12.32it/s]

Epochs: 5874 | epoch avg. loss: 0.249 | test avg. loss: 194.331
Epochs: 5875 | epoch avg. loss: 0.218 | test avg. loss: 195.480
Epochs: 5876 | epoch avg. loss: 0.319 | test avg. loss: 194.509


 12%|█▏        | 5880/50000 [08:34<59:00, 12.46it/s]  

Epochs: 5877 | epoch avg. loss: 0.220 | test avg. loss: 194.952
Epochs: 5878 | epoch avg. loss: 0.156 | test avg. loss: 196.661
Epochs: 5879 | epoch avg. loss: 0.134 | test avg. loss: 196.254


                                                      

Epochs: 5880 | epoch avg. loss: 0.093 | test avg. loss: 197.026
Epochs: 5881 | epoch avg. loss: 0.063 | test avg. loss: 197.209


 12%|█▏        | 5884/50000 [08:35<1:05:05, 11.30it/s]

Epochs: 5882 | epoch avg. loss: 0.079 | test avg. loss: 197.132
Epochs: 5883 | epoch avg. loss: 0.087 | test avg. loss: 198.348
Epochs: 5884 | epoch avg. loss: 0.123 | test avg. loss: 197.258


 12%|█▏        | 5888/50000 [08:35<58:05, 12.66it/s]  

Epochs: 5885 | epoch avg. loss: 0.074 | test avg. loss: 198.757
Epochs: 5886 | epoch avg. loss: 0.230 | test avg. loss: 197.723
Epochs: 5887 | epoch avg. loss: 0.197 | test avg. loss: 198.782


 12%|█▏        | 5892/50000 [08:35<53:49, 13.66it/s]

Epochs: 5888 | epoch avg. loss: 0.171 | test avg. loss: 200.883
Epochs: 5889 | epoch avg. loss: 0.328 | test avg. loss: 198.116
Epochs: 5890 | epoch avg. loss: 0.247 | test avg. loss: 198.972
Epochs: 5891 | epoch avg. loss: 0.155 | test avg. loss: 198.285


 12%|█▏        | 5894/50000 [08:35<56:52, 12.93it/s]

Epochs: 5892 | epoch avg. loss: 0.079 | test avg. loss: 197.444
Epochs: 5893 | epoch avg. loss: 0.058 | test avg. loss: 196.912


 12%|█▏        | 5896/50000 [08:36<1:00:10, 12.21it/s]

Epochs: 5894 | epoch avg. loss: 0.104 | test avg. loss: 196.802
Epochs: 5895 | epoch avg. loss: 0.083 | test avg. loss: 197.938
Epochs: 5896 | epoch avg. loss: 0.157 | test avg. loss: 196.774


 12%|█▏        | 5898/50000 [08:36<57:19, 12.82it/s]

Epochs: 5897 | epoch avg. loss: 0.101 | test avg. loss: 196.830
Epochs: 5898 | epoch avg. loss: 0.069 | test avg. loss: 197.189
Epochs: 5899 | epoch avg. loss: 0.070 | test avg. loss: 196.574


 12%|█▏        | 5902/50000 [08:37<2:35:18,  4.73it/s]

Epochs: 5900 | epoch avg. loss: 0.084 | test avg. loss: 198.085
Epochs: 5901 | epoch avg. loss: 0.239 | test avg. loss: 196.444
Epochs: 5902 | epoch avg. loss: 0.116 | test avg. loss: 197.069


 12%|█▏        | 5906/50000 [08:38<1:43:24,  7.11it/s]

Epochs: 5903 | epoch avg. loss: 0.174 | test avg. loss: 195.969
Epochs: 5904 | epoch avg. loss: 0.122 | test avg. loss: 194.510
Epochs: 5905 | epoch avg. loss: 0.188 | test avg. loss: 196.659
Epochs: 5906 | epoch avg. loss: 0.426 | test avg. loss: 194.638


 12%|█▏        | 5910/50000 [08:38<1:15:22,  9.75it/s]

Epochs: 5907 | epoch avg. loss: 0.373 | test avg. loss: 195.405
Epochs: 5908 | epoch avg. loss: 0.184 | test avg. loss: 197.651
Epochs: 5909 | epoch avg. loss: 0.126 | test avg. loss: 196.876
Epochs: 5910 | epoch avg. loss: 0.235 | test avg. loss: 198.286


 12%|█▏        | 5914/50000 [08:38<1:00:43, 12.10it/s]

Epochs: 5911 | epoch avg. loss: 0.111 | test avg. loss: 197.255
Epochs: 5912 | epoch avg. loss: 0.158 | test avg. loss: 197.799
Epochs: 5913 | epoch avg. loss: 0.420 | test avg. loss: 198.119
Epochs: 5914 | epoch avg. loss: 0.206 | test avg. loss: 197.590


 12%|█▏        | 5916/50000 [08:38<58:32, 12.55it/s]

Epochs: 5915 | epoch avg. loss: 0.213 | test avg. loss: 200.340
Epochs: 5916 | epoch avg. loss: 0.262 | test avg. loss: 199.996


 12%|█▏        | 5920/50000 [08:39<1:04:07, 11.46it/s]

Epochs: 5917 | epoch avg. loss: 0.378 | test avg. loss: 198.750
Epochs: 5918 | epoch avg. loss: 0.158 | test avg. loss: 197.871
Epochs: 5919 | epoch avg. loss: 0.103 | test avg. loss: 195.553


 12%|█▏        | 5922/50000 [08:39<1:05:43, 11.18it/s]

Epochs: 5920 | epoch avg. loss: 0.181 | test avg. loss: 195.959
Epochs: 5921 | epoch avg. loss: 0.183 | test avg. loss: 197.109
Epochs: 5922 | epoch avg. loss: 0.144 | test avg. loss: 197.006


 12%|█▏        | 5926/50000 [08:39<57:43, 12.72it/s]

Epochs: 5923 | epoch avg. loss: 0.182 | test avg. loss: 199.885
Epochs: 5924 | epoch avg. loss: 0.285 | test avg. loss: 198.919
Epochs: 5925 | epoch avg. loss: 0.294 | test avg. loss: 197.796
Epochs: 5926 | epoch avg. loss: 0.192 | test avg. loss: 197.938




Epochs: 5927 | epoch avg. loss: 0.166 | test avg. loss: 196.821
Epochs: 5928 | epoch avg. loss: 0.117 | test avg. loss: 197.196


 12%|█▏        | 5932/50000 [08:40<59:55, 12.26it/s]  

Epochs: 5929 | epoch avg. loss: 0.120 | test avg. loss: 199.814
Epochs: 5930 | epoch avg. loss: 0.261 | test avg. loss: 198.309
Epochs: 5931 | epoch avg. loss: 0.787 | test avg. loss: 199.004


 12%|█▏        | 5936/50000 [08:40<53:55, 13.62it/s]

Epochs: 5932 | epoch avg. loss: 0.159 | test avg. loss: 198.284
Epochs: 5933 | epoch avg. loss: 0.274 | test avg. loss: 196.719
Epochs: 5934 | epoch avg. loss: 0.413 | test avg. loss: 198.607
Epochs: 5935 | epoch avg. loss: 0.464 | test avg. loss: 196.336


 12%|█▏        | 5940/50000 [08:40<49:48, 14.74it/s]

Epochs: 5936 | epoch avg. loss: 0.249 | test avg. loss: 196.157
Epochs: 5937 | epoch avg. loss: 0.134 | test avg. loss: 196.893
Epochs: 5938 | epoch avg. loss: 0.166 | test avg. loss: 196.579
Epochs: 5939 | epoch avg. loss: 0.232 | test avg. loss: 197.335


 12%|█▏        | 5942/50000 [08:40<48:33, 15.12it/s]

Epochs: 5940 | epoch avg. loss: 0.258 | test avg. loss: 194.498
Epochs: 5941 | epoch avg. loss: 0.154 | test avg. loss: 193.717
Epochs: 5942 | epoch avg. loss: 0.141 | test avg. loss: 195.178


 12%|█▏        | 5946/50000 [08:41<56:27, 13.01it/s]

Epochs: 5943 | epoch avg. loss: 0.258 | test avg. loss: 195.738
Epochs: 5944 | epoch avg. loss: 0.089 | test avg. loss: 196.025
Epochs: 5945 | epoch avg. loss: 0.131 | test avg. loss: 198.286


 12%|█▏        | 5948/50000 [08:41<57:48, 12.70it/s]

Epochs: 5946 | epoch avg. loss: 0.336 | test avg. loss: 198.020
Epochs: 5947 | epoch avg. loss: 0.152 | test avg. loss: 196.539
Epochs: 5948 | epoch avg. loss: 0.325 | test avg. loss: 198.404


 12%|█▏        | 5952/50000 [08:41<56:30, 12.99it/s]

Epochs: 5949 | epoch avg. loss: 0.198 | test avg. loss: 196.871
Epochs: 5950 | epoch avg. loss: 0.233 | test avg. loss: 197.830
Epochs: 5951 | epoch avg. loss: 0.100 | test avg. loss: 198.629


 12%|█▏        | 5954/50000 [08:41<56:50, 12.91it/s]

Epochs: 5952 | epoch avg. loss: 0.076 | test avg. loss: 198.652
Epochs: 5953 | epoch avg. loss: 0.069 | test avg. loss: 199.375
Epochs: 5954 | epoch avg. loss: 0.086 | test avg. loss: 198.054


 12%|█▏        | 5958/50000 [08:42<1:05:08, 11.27it/s]

Epochs: 5955 | epoch avg. loss: 0.192 | test avg. loss: 198.572
Epochs: 5956 | epoch avg. loss: 0.104 | test avg. loss: 198.954
Epochs: 5957 | epoch avg. loss: 0.122 | test avg. loss: 197.971




Epochs: 5958 | epoch avg. loss: 0.074 | test avg. loss: 198.098
Epochs: 5959 | epoch avg. loss: 0.067 | test avg. loss: 198.280
Epochs: 5960 | epoch avg. loss: 0.073 | test avg. loss: 198.248


 12%|█▏        | 5964/50000 [08:42<55:45, 13.16it/s]

Epochs: 5961 | epoch avg. loss: 0.094 | test avg. loss: 198.881
Epochs: 5962 | epoch avg. loss: 0.128 | test avg. loss: 197.201
Epochs: 5963 | epoch avg. loss: 0.356 | test avg. loss: 197.358


 12%|█▏        | 5966/50000 [08:42<55:41, 13.18it/s]

Epochs: 5964 | epoch avg. loss: 0.126 | test avg. loss: 198.830
Epochs: 5965 | epoch avg. loss: 0.211 | test avg. loss: 197.385
Epochs: 5966 | epoch avg. loss: 0.084 | test avg. loss: 197.666


 12%|█▏        | 5968/50000 [08:43<58:10, 12.61it/s]

Epochs: 5967 | epoch avg. loss: 0.081 | test avg. loss: 197.268
Epochs: 5968 | epoch avg. loss: 0.069 | test avg. loss: 196.713


 12%|█▏        | 5972/50000 [08:43<1:06:05, 11.10it/s]

Epochs: 5969 | epoch avg. loss: 0.080 | test avg. loss: 196.602
Epochs: 5970 | epoch avg. loss: 0.060 | test avg. loss: 197.019
Epochs: 5971 | epoch avg. loss: 0.065 | test avg. loss: 196.302


 12%|█▏        | 5974/50000 [08:43<1:03:53, 11.48it/s]

Epochs: 5972 | epoch avg. loss: 0.122 | test avg. loss: 196.301
Epochs: 5973 | epoch avg. loss: 0.108 | test avg. loss: 198.356
Epochs: 5974 | epoch avg. loss: 0.286 | test avg. loss: 196.701


 12%|█▏        | 5978/50000 [08:43<59:58, 12.23it/s]  

Epochs: 5975 | epoch avg. loss: 0.277 | test avg. loss: 198.165
Epochs: 5976 | epoch avg. loss: 0.191 | test avg. loss: 198.780
Epochs: 5977 | epoch avg. loss: 0.169 | test avg. loss: 197.229


 12%|█▏        | 5980/50000 [08:44<1:05:06, 11.27it/s]

Epochs: 5978 | epoch avg. loss: 0.088 | test avg. loss: 197.663
Epochs: 5979 | epoch avg. loss: 0.117 | test avg. loss: 197.083
Epochs: 5980 | epoch avg. loss: 0.111 | test avg. loss: 195.939


 12%|█▏        | 5984/50000 [08:44<58:31, 12.53it/s]  

Epochs: 5981 | epoch avg. loss: 0.143 | test avg. loss: 197.330
Epochs: 5982 | epoch avg. loss: 0.314 | test avg. loss: 194.816
Epochs: 5983 | epoch avg. loss: 0.259 | test avg. loss: 195.501


 12%|█▏        | 5986/50000 [08:44<55:23, 13.24it/s]

Epochs: 5984 | epoch avg. loss: 0.172 | test avg. loss: 197.038
Epochs: 5985 | epoch avg. loss: 0.217 | test avg. loss: 195.327
Epochs: 5986 | epoch avg. loss: 0.582 | test avg. loss: 196.135


 12%|█▏        | 5990/50000 [08:44<55:53, 13.12it/s]

Epochs: 5987 | epoch avg. loss: 0.190 | test avg. loss: 194.834
Epochs: 5988 | epoch avg. loss: 0.152 | test avg. loss: 193.855
Epochs: 5989 | epoch avg. loss: 0.244 | test avg. loss: 195.473


 12%|█▏        | 5992/50000 [08:44<53:52, 13.61it/s]

Epochs: 5990 | epoch avg. loss: 0.111 | test avg. loss: 195.500
Epochs: 5991 | epoch avg. loss: 0.062 | test avg. loss: 196.030
Epochs: 5992 | epoch avg. loss: 0.121 | test avg. loss: 196.545


 12%|█▏        | 5996/50000 [08:45<59:17, 12.37it/s]

Epochs: 5993 | epoch avg. loss: 0.120 | test avg. loss: 195.887
Epochs: 5994 | epoch avg. loss: 0.103 | test avg. loss: 196.252
Epochs: 5995 | epoch avg. loss: 0.095 | test avg. loss: 197.942


 12%|█▏        | 5998/50000 [08:45<58:23, 12.56it/s]

Epochs: 5996 | epoch avg. loss: 0.133 | test avg. loss: 198.519
Epochs: 5997 | epoch avg. loss: 0.084 | test avg. loss: 198.935
Epochs: 5998 | epoch avg. loss: 0.091 | test avg. loss: 200.674


 12%|█▏        | 5998/50000 [08:45<58:23, 12.56it/s]

Epochs: 5999 | epoch avg. loss: 0.286 | test avg. loss: 199.519


 12%|█▏        | 6002/50000 [08:47<2:40:29,  4.57it/s]

Epochs: 6000 | epoch avg. loss: 0.079 | test avg. loss: 198.869
Epochs: 6001 | epoch avg. loss: 0.081 | test avg. loss: 199.632
Epochs: 6002 | epoch avg. loss: 0.119 | test avg. loss: 198.452


 12%|█▏        | 6006/50000 [08:47<1:47:48,  6.80it/s]

Epochs: 6003 | epoch avg. loss: 0.147 | test avg. loss: 198.837
Epochs: 6004 | epoch avg. loss: 0.081 | test avg. loss: 199.725
Epochs: 6005 | epoch avg. loss: 0.132 | test avg. loss: 198.765


 12%|█▏        | 6008/50000 [08:47<1:33:46,  7.82it/s]

Epochs: 6006 | epoch avg. loss: 0.098 | test avg. loss: 199.450
Epochs: 6007 | epoch avg. loss: 0.061 | test avg. loss: 200.035
Epochs: 6008 | epoch avg. loss: 0.063 | test avg. loss: 199.530


 12%|█▏        | 6012/50000 [08:47<1:13:38,  9.96it/s]

Epochs: 6009 | epoch avg. loss: 0.116 | test avg. loss: 199.827
Epochs: 6010 | epoch avg. loss: 0.070 | test avg. loss: 200.375
Epochs: 6011 | epoch avg. loss: 0.068 | test avg. loss: 199.896


 12%|█▏        | 6014/50000 [08:48<1:11:59, 10.18it/s]

Epochs: 6012 | epoch avg. loss: 0.068 | test avg. loss: 199.964
Epochs: 6013 | epoch avg. loss: 0.041 | test avg. loss: 199.407
Epochs: 6014 | epoch avg. loss: 0.090 | test avg. loss: 199.373


 12%|█▏        | 6018/50000 [08:48<1:10:16, 10.43it/s]

Epochs: 6015 | epoch avg. loss: 0.069 | test avg. loss: 200.957
Epochs: 6016 | epoch avg. loss: 0.274 | test avg. loss: 199.757
Epochs: 6017 | epoch avg. loss: 0.198 | test avg. loss: 198.991


 12%|█▏        | 6020/50000 [08:48<1:07:57, 10.79it/s]

Epochs: 6018 | epoch avg. loss: 0.121 | test avg. loss: 199.485
Epochs: 6019 | epoch avg. loss: 0.171 | test avg. loss: 198.615
Epochs: 6020 | epoch avg. loss: 0.197 | test avg. loss: 200.352


 12%|█▏        | 6024/50000 [08:48<1:06:10, 11.08it/s]

Epochs: 6021 | epoch avg. loss: 0.236 | test avg. loss: 198.978
Epochs: 6022 | epoch avg. loss: 0.141 | test avg. loss: 196.841
Epochs: 6023 | epoch avg. loss: 0.193 | test avg. loss: 198.647


 12%|█▏        | 6026/50000 [08:49<1:08:01, 10.77it/s]

Epochs: 6024 | epoch avg. loss: 0.415 | test avg. loss: 195.636
Epochs: 6025 | epoch avg. loss: 0.431 | test avg. loss: 196.014
Epochs: 6026 | epoch avg. loss: 0.196 | test avg. loss: 198.533


 12%|█▏        | 6030/50000 [08:49<59:50, 12.25it/s]  

Epochs: 6027 | epoch avg. loss: 0.411 | test avg. loss: 196.134
Epochs: 6028 | epoch avg. loss: 0.401 | test avg. loss: 197.392
Epochs: 6029 | epoch avg. loss: 0.134 | test avg. loss: 198.709


                                                    

Epochs: 6030 | epoch avg. loss: 0.159 | test avg. loss: 197.562
Epochs: 6031 | epoch avg. loss: 0.488 | test avg. loss: 199.349
Epochs: 6032 | epoch avg. loss: 0.098 | test avg. loss: 200.381


 12%|█▏        | 6036/50000 [08:49<53:13, 13.77it/s]

Epochs: 6033 | epoch avg. loss: 0.106 | test avg. loss: 200.057
Epochs: 6034 | epoch avg. loss: 0.168 | test avg. loss: 201.518
Epochs: 6035 | epoch avg. loss: 0.223 | test avg. loss: 200.656


 12%|█▏        | 6038/50000 [08:50<58:01, 12.63it/s]

Epochs: 6036 | epoch avg. loss: 0.086 | test avg. loss: 199.800
Epochs: 6037 | epoch avg. loss: 0.094 | test avg. loss: 201.076
Epochs: 6038 | epoch avg. loss: 0.229 | test avg. loss: 199.337


 12%|█▏        | 6042/50000 [08:50<1:02:49, 11.66it/s]

Epochs: 6039 | epoch avg. loss: 0.113 | test avg. loss: 199.630
Epochs: 6040 | epoch avg. loss: 0.247 | test avg. loss: 201.945
Epochs: 6041 | epoch avg. loss: 0.904 | test avg. loss: 197.625


 12%|█▏        | 6044/50000 [08:50<1:03:52, 11.47it/s]

Epochs: 6042 | epoch avg. loss: 0.592 | test avg. loss: 197.095
Epochs: 6043 | epoch avg. loss: 0.397 | test avg. loss: 200.956
Epochs: 6044 | epoch avg. loss: 0.919 | test avg. loss: 196.407


 12%|█▏        | 6048/50000 [08:50<1:05:49, 11.13it/s]

Epochs: 6045 | epoch avg. loss: 1.046 | test avg. loss: 196.329
Epochs: 6046 | epoch avg. loss: 0.397 | test avg. loss: 196.335
Epochs: 6047 | epoch avg. loss: 0.396 | test avg. loss: 193.385


 12%|█▏        | 6050/50000 [08:51<1:07:13, 10.89it/s]

Epochs: 6048 | epoch avg. loss: 0.563 | test avg. loss: 196.241
Epochs: 6049 | epoch avg. loss: 1.263 | test avg. loss: 196.619
Epochs: 6050 | epoch avg. loss: 0.459 | test avg. loss: 195.338


 12%|█▏        | 6054/50000 [08:51<1:01:46, 11.86it/s]

Epochs: 6051 | epoch avg. loss: 0.708 | test avg. loss: 200.954
Epochs: 6052 | epoch avg. loss: 0.694 | test avg. loss: 198.537
Epochs: 6053 | epoch avg. loss: 0.360 | test avg. loss: 194.644


 12%|█▏        | 6056/50000 [08:51<59:42, 12.27it/s]

Epochs: 6054 | epoch avg. loss: 0.997 | test avg. loss: 199.628
Epochs: 6055 | epoch avg. loss: 3.407 | test avg. loss: 194.018
Epochs: 6056 | epoch avg. loss: 3.827 | test avg. loss: 197.466


 12%|█▏        | 6060/50000 [08:51<56:46, 12.90it/s]

Epochs: 6057 | epoch avg. loss: 2.401 | test avg. loss: 212.625
Epochs: 6058 | epoch avg. loss: 6.497 | test avg. loss: 200.693
Epochs: 6059 | epoch avg. loss: 1.895 | test avg. loss: 198.293


 12%|█▏        | 6062/50000 [08:52<56:45, 12.90it/s]

Epochs: 6060 | epoch avg. loss: 1.035 | test avg. loss: 202.389
Epochs: 6061 | epoch avg. loss: 1.728 | test avg. loss: 193.895
Epochs: 6062 | epoch avg. loss: 1.158 | test avg. loss: 200.488


 12%|█▏        | 6066/50000 [08:52<58:17, 12.56it/s]

Epochs: 6063 | epoch avg. loss: 2.793 | test avg. loss: 195.947
Epochs: 6064 | epoch avg. loss: 1.252 | test avg. loss: 198.303
Epochs: 6065 | epoch avg. loss: 0.764 | test avg. loss: 204.272


 12%|█▏        | 6068/50000 [08:52<54:54, 13.33it/s]

Epochs: 6066 | epoch avg. loss: 0.595 | test avg. loss: 203.089
Epochs: 6067 | epoch avg. loss: 1.039 | test avg. loss: 205.927
Epochs: 6068 | epoch avg. loss: 0.331 | test avg. loss: 202.806


 12%|█▏        | 6072/50000 [08:52<54:05, 13.53it/s]

Epochs: 6069 | epoch avg. loss: 0.826 | test avg. loss: 200.457
Epochs: 6070 | epoch avg. loss: 0.483 | test avg. loss: 201.251
Epochs: 6071 | epoch avg. loss: 0.728 | test avg. loss: 194.813


 12%|█▏        | 6074/50000 [08:52<55:56, 13.09it/s]

Epochs: 6072 | epoch avg. loss: 1.077 | test avg. loss: 196.217
Epochs: 6073 | epoch avg. loss: 1.156 | test avg. loss: 193.172
Epochs: 6074 | epoch avg. loss: 0.517 | test avg. loss: 193.100


 12%|█▏        | 6078/50000 [08:53<54:45, 13.37it/s]

Epochs: 6075 | epoch avg. loss: 0.421 | test avg. loss: 196.190
Epochs: 6076 | epoch avg. loss: 0.513 | test avg. loss: 194.770
Epochs: 6077 | epoch avg. loss: 0.695 | test avg. loss: 200.011


 12%|█▏        | 6080/50000 [08:53<55:19, 13.23it/s]

Epochs: 6078 | epoch avg. loss: 0.782 | test avg. loss: 197.740
Epochs: 6079 | epoch avg. loss: 0.902 | test avg. loss: 200.638
Epochs: 6080 | epoch avg. loss: 0.705 | test avg. loss: 202.413


 12%|█▏        | 6084/50000 [08:53<51:53, 14.10it/s]

Epochs: 6081 | epoch avg. loss: 0.721 | test avg. loss: 197.099
Epochs: 6082 | epoch avg. loss: 0.994 | test avg. loss: 201.835
Epochs: 6083 | epoch avg. loss: 2.025 | test avg. loss: 196.188
Epochs: 6084 | epoch avg. loss: 2.152 | test avg. loss: 197.783


 12%|█▏        | 6088/50000 [08:53<50:45, 14.42it/s]

Epochs: 6085 | epoch avg. loss: 1.783 | test avg. loss: 197.252
Epochs: 6086 | epoch avg. loss: 0.931 | test avg. loss: 192.956
Epochs: 6087 | epoch avg. loss: 0.475 | test avg. loss: 195.374


 12%|█▏        | 6090/50000 [08:54<51:36, 14.18it/s]

Epochs: 6088 | epoch avg. loss: 0.519 | test avg. loss: 193.032
Epochs: 6089 | epoch avg. loss: 0.713 | test avg. loss: 197.014
Epochs: 6090 | epoch avg. loss: 0.581 | test avg. loss: 196.013
Epochs: 6091 | epoch avg. loss: 0.419 | test avg. loss: 196.823


                                                    

Epochs: 6092 | epoch avg. loss: 0.313 | test avg. loss: 199.449
Epochs: 6093 | epoch avg. loss: 0.416 | test avg. loss: 195.774
Epochs: 6094 | epoch avg. loss: 0.354 | test avg. loss: 197.873


 12%|█▏        | 6098/50000 [08:54<49:40, 14.73it/s]

Epochs: 6095 | epoch avg. loss: 0.331 | test avg. loss: 195.656
Epochs: 6096 | epoch avg. loss: 0.414 | test avg. loss: 196.345
Epochs: 6097 | epoch avg. loss: 0.202 | test avg. loss: 198.281
Epochs: 6098 | epoch avg. loss: 0.255 | test avg. loss: 197.238


 12%|█▏        | 6098/50000 [08:54<49:40, 14.73it/s]

Epochs: 6099 | epoch avg. loss: 0.383 | test avg. loss: 199.787


 12%|█▏        | 6102/50000 [08:56<2:23:55,  5.08it/s]

Epochs: 6100 | epoch avg. loss: 0.165 | test avg. loss: 199.371
Epochs: 6101 | epoch avg. loss: 0.239 | test avg. loss: 199.698
Epochs: 6102 | epoch avg. loss: 0.101 | test avg. loss: 199.966


 12%|█▏        | 6106/50000 [08:56<1:35:53,  7.63it/s]

Epochs: 6103 | epoch avg. loss: 0.085 | test avg. loss: 199.164
Epochs: 6104 | epoch avg. loss: 0.108 | test avg. loss: 200.835
Epochs: 6105 | epoch avg. loss: 0.261 | test avg. loss: 198.232
Epochs: 6106 | epoch avg. loss: 0.274 | test avg. loss: 200.561


 12%|█▏        | 6110/50000 [08:56<1:15:35,  9.68it/s]

Epochs: 6107 | epoch avg. loss: 0.402 | test avg. loss: 198.881
Epochs: 6108 | epoch avg. loss: 0.741 | test avg. loss: 201.498
Epochs: 6109 | epoch avg. loss: 0.288 | test avg. loss: 205.083


 12%|█▏        | 6112/50000 [08:56<1:09:59, 10.45it/s]

Epochs: 6110 | epoch avg. loss: 0.282 | test avg. loss: 203.665
Epochs: 6111 | epoch avg. loss: 0.464 | test avg. loss: 208.262
Epochs: 6112 | epoch avg. loss: 1.062 | test avg. loss: 202.087


 12%|█▏        | 6116/50000 [08:57<1:06:17, 11.03it/s]

Epochs: 6113 | epoch avg. loss: 1.150 | test avg. loss: 204.482
Epochs: 6114 | epoch avg. loss: 0.665 | test avg. loss: 202.977
Epochs: 6115 | epoch avg. loss: 0.390 | test avg. loss: 201.349


 12%|█▏        | 6118/50000 [08:57<1:02:59, 11.61it/s]

Epochs: 6116 | epoch avg. loss: 0.531 | test avg. loss: 204.507
Epochs: 6117 | epoch avg. loss: 1.132 | test avg. loss: 201.397
Epochs: 6118 | epoch avg. loss: 0.634 | test avg. loss: 204.122


 12%|█▏        | 6122/50000 [08:57<1:00:13, 12.14it/s]

Epochs: 6119 | epoch avg. loss: 0.695 | test avg. loss: 203.624
Epochs: 6120 | epoch avg. loss: 0.466 | test avg. loss: 203.655
Epochs: 6121 | epoch avg. loss: 0.473 | test avg. loss: 206.724


 12%|█▏        | 6124/50000 [08:57<56:35, 12.92it/s]

Epochs: 6122 | epoch avg. loss: 0.592 | test avg. loss: 203.981
Epochs: 6123 | epoch avg. loss: 0.565 | test avg. loss: 207.495
Epochs: 6124 | epoch avg. loss: 0.398 | test avg. loss: 204.695


 12%|█▏        | 6128/50000 [08:58<57:05, 12.81it/s]

Epochs: 6125 | epoch avg. loss: 0.500 | test avg. loss: 207.322
Epochs: 6126 | epoch avg. loss: 0.223 | test avg. loss: 207.266
Epochs: 6127 | epoch avg. loss: 0.242 | test avg. loss: 205.402


                                                    

Epochs: 6128 | epoch avg. loss: 0.523 | test avg. loss: 208.039
Epochs: 6129 | epoch avg. loss: 0.305 | test avg. loss: 204.121
Epochs: 6130 | epoch avg. loss: 0.084 | test avg. loss: 203.969


 12%|█▏        | 6134/50000 [08:58<51:37, 14.16it/s]

Epochs: 6131 | epoch avg. loss: 0.147 | test avg. loss: 202.189
Epochs: 6132 | epoch avg. loss: 0.139 | test avg. loss: 202.246
Epochs: 6133 | epoch avg. loss: 0.091 | test avg. loss: 202.778
Epochs: 6134 | epoch avg. loss: 0.088 | test avg. loss: 202.185


 12%|█▏        | 6138/50000 [08:58<52:08, 14.02it/s]

Epochs: 6135 | epoch avg. loss: 0.113 | test avg. loss: 204.133
Epochs: 6136 | epoch avg. loss: 0.178 | test avg. loss: 202.392
Epochs: 6137 | epoch avg. loss: 0.113 | test avg. loss: 202.141


 12%|█▏        | 6140/50000 [08:59<55:46, 13.11it/s]

Epochs: 6138 | epoch avg. loss: 0.085 | test avg. loss: 200.697
Epochs: 6139 | epoch avg. loss: 0.091 | test avg. loss: 200.060
Epochs: 6140 | epoch avg. loss: 0.094 | test avg. loss: 200.379


 12%|█▏        | 6144/50000 [08:59<58:00, 12.60it/s]  

Epochs: 6141 | epoch avg. loss: 0.091 | test avg. loss: 199.372
Epochs: 6142 | epoch avg. loss: 0.103 | test avg. loss: 200.322
Epochs: 6143 | epoch avg. loss: 0.078 | test avg. loss: 200.282


 12%|█▏        | 6148/50000 [08:59<52:35, 13.90it/s]

Epochs: 6144 | epoch avg. loss: 0.064 | test avg. loss: 200.442
Epochs: 6145 | epoch avg. loss: 0.059 | test avg. loss: 200.492
Epochs: 6146 | epoch avg. loss: 0.070 | test avg. loss: 202.048
Epochs: 6147 | epoch avg. loss: 0.093 | test avg. loss: 201.926


 12%|█▏        | 6150/50000 [08:59<50:52, 14.36it/s]

Epochs: 6148 | epoch avg. loss: 0.069 | test avg. loss: 202.291
Epochs: 6149 | epoch avg. loss: 0.063 | test avg. loss: 202.428
Epochs: 6150 | epoch avg. loss: 0.058 | test avg. loss: 203.050
Epochs: 6151 | epoch avg. loss: 0.058 | test avg. loss: 202.261


 12%|█▏        | 6154/50000 [09:00<57:21, 12.74it/s]

Epochs: 6152 | epoch avg. loss: 0.060 | test avg. loss: 202.578
Epochs: 6153 | epoch avg. loss: 0.065 | test avg. loss: 201.649


 12%|█▏        | 6156/50000 [09:00<57:43, 12.66it/s]

Epochs: 6154 | epoch avg. loss: 0.079 | test avg. loss: 202.663
Epochs: 6155 | epoch avg. loss: 0.107 | test avg. loss: 203.306
Epochs: 6156 | epoch avg. loss: 0.092 | test avg. loss: 202.410


 12%|█▏        | 6160/50000 [09:00<55:05, 13.26it/s]

Epochs: 6157 | epoch avg. loss: 0.138 | test avg. loss: 204.004
Epochs: 6158 | epoch avg. loss: 0.155 | test avg. loss: 202.367
Epochs: 6159 | epoch avg. loss: 0.179 | test avg. loss: 203.007


 12%|█▏        | 6162/50000 [09:00<54:40, 13.36it/s]

Epochs: 6160 | epoch avg. loss: 0.078 | test avg. loss: 201.749
Epochs: 6161 | epoch avg. loss: 0.178 | test avg. loss: 201.998
Epochs: 6162 | epoch avg. loss: 0.087 | test avg. loss: 202.388


 12%|█▏        | 6166/50000 [09:00<59:27, 12.29it/s]

Epochs: 6163 | epoch avg. loss: 0.107 | test avg. loss: 202.891
Epochs: 6164 | epoch avg. loss: 0.105 | test avg. loss: 204.399
Epochs: 6165 | epoch avg. loss: 0.197 | test avg. loss: 202.862


 12%|█▏        | 6168/50000 [09:01<1:04:29, 11.33it/s]

Epochs: 6166 | epoch avg. loss: 0.171 | test avg. loss: 203.831
Epochs: 6167 | epoch avg. loss: 0.105 | test avg. loss: 205.445
Epochs: 6168 | epoch avg. loss: 0.138 | test avg. loss: 204.433


 12%|█▏        | 6172/50000 [09:01<1:02:31, 11.68it/s]

Epochs: 6169 | epoch avg. loss: 0.122 | test avg. loss: 202.864
Epochs: 6170 | epoch avg. loss: 0.081 | test avg. loss: 200.514
Epochs: 6171 | epoch avg. loss: 0.215 | test avg. loss: 200.377


 12%|█▏        | 6174/50000 [09:01<1:04:14, 11.37it/s]

Epochs: 6172 | epoch avg. loss: 0.134 | test avg. loss: 200.853
Epochs: 6173 | epoch avg. loss: 0.094 | test avg. loss: 200.640
Epochs: 6174 | epoch avg. loss: 0.078 | test avg. loss: 202.443


 12%|█▏        | 6178/50000 [09:01<1:00:54, 11.99it/s]

Epochs: 6175 | epoch avg. loss: 0.159 | test avg. loss: 201.136
Epochs: 6176 | epoch avg. loss: 0.152 | test avg. loss: 200.894
Epochs: 6177 | epoch avg. loss: 0.107 | test avg. loss: 202.660


 12%|█▏        | 6180/50000 [09:02<1:02:39, 11.66it/s]

Epochs: 6178 | epoch avg. loss: 0.202 | test avg. loss: 201.635
Epochs: 6179 | epoch avg. loss: 0.097 | test avg. loss: 202.545
Epochs: 6180 | epoch avg. loss: 0.126 | test avg. loss: 203.537


 12%|█▏        | 6184/50000 [09:02<59:17, 12.32it/s]  

Epochs: 6181 | epoch avg. loss: 0.154 | test avg. loss: 201.709
Epochs: 6182 | epoch avg. loss: 0.143 | test avg. loss: 203.512
Epochs: 6183 | epoch avg. loss: 0.175 | test avg. loss: 202.120


 12%|█▏        | 6186/50000 [09:02<58:44, 12.43it/s]

Epochs: 6184 | epoch avg. loss: 0.108 | test avg. loss: 202.169
Epochs: 6185 | epoch avg. loss: 0.123 | test avg. loss: 202.774
Epochs: 6186 | epoch avg. loss: 0.151 | test avg. loss: 201.679


 12%|█▏        | 6190/50000 [09:02<1:00:15, 12.12it/s]

Epochs: 6187 | epoch avg. loss: 0.068 | test avg. loss: 202.662
Epochs: 6188 | epoch avg. loss: 0.115 | test avg. loss: 202.283
Epochs: 6189 | epoch avg. loss: 0.083 | test avg. loss: 201.805


 12%|█▏        | 6192/50000 [09:03<1:08:18, 10.69it/s]

Epochs: 6190 | epoch avg. loss: 0.146 | test avg. loss: 204.663
Epochs: 6191 | epoch avg. loss: 0.323 | test avg. loss: 201.890
Epochs: 6192 | epoch avg. loss: 0.318 | test avg. loss: 202.163


                                                    

Epochs: 6193 | epoch avg. loss: 0.158 | test avg. loss: 201.808
Epochs: 6194 | epoch avg. loss: 0.131 | test avg. loss: 200.521
Epochs: 6195 | epoch avg. loss: 0.108 | test avg. loss: 202.006


 12%|█▏        | 6198/50000 [09:03<58:40, 12.44it/s]

Epochs: 6196 | epoch avg. loss: 0.107 | test avg. loss: 201.550
Epochs: 6197 | epoch avg. loss: 0.076 | test avg. loss: 201.556
Epochs: 6198 | epoch avg. loss: 0.062 | test avg. loss: 201.895


 12%|█▏        | 6198/50000 [09:03<58:40, 12.44it/s]

Epochs: 6199 | epoch avg. loss: 0.054 | test avg. loss: 201.234


 12%|█▏        | 6202/50000 [09:05<2:45:35,  4.41it/s]

Epochs: 6200 | epoch avg. loss: 0.069 | test avg. loss: 201.156
Epochs: 6201 | epoch avg. loss: 0.049 | test avg. loss: 201.290
Epochs: 6202 | epoch avg. loss: 0.053 | test avg. loss: 201.152


 12%|█▏        | 6206/50000 [09:05<1:48:24,  6.73it/s]

Epochs: 6203 | epoch avg. loss: 0.065 | test avg. loss: 202.975
Epochs: 6204 | epoch avg. loss: 0.152 | test avg. loss: 201.872
Epochs: 6205 | epoch avg. loss: 0.160 | test avg. loss: 202.496


 12%|█▏        | 6208/50000 [09:05<1:31:09,  8.01it/s]

Epochs: 6206 | epoch avg. loss: 0.099 | test avg. loss: 204.702
Epochs: 6207 | epoch avg. loss: 0.210 | test avg. loss: 203.605
Epochs: 6208 | epoch avg. loss: 0.242 | test avg. loss: 204.531


 12%|█▏        | 6212/50000 [09:06<1:17:29,  9.42it/s]

Epochs: 6209 | epoch avg. loss: 0.124 | test avg. loss: 202.752
Epochs: 6210 | epoch avg. loss: 0.101 | test avg. loss: 200.690
Epochs: 6211 | epoch avg. loss: 0.172 | test avg. loss: 202.366


 12%|█▏        | 6214/50000 [09:06<1:13:37,  9.91it/s]

Epochs: 6212 | epoch avg. loss: 0.353 | test avg. loss: 200.769
Epochs: 6213 | epoch avg. loss: 0.104 | test avg. loss: 201.737
Epochs: 6214 | epoch avg. loss: 0.104 | test avg. loss: 202.747


 12%|█▏        | 6218/50000 [09:06<1:00:11, 12.12it/s]

Epochs: 6215 | epoch avg. loss: 0.109 | test avg. loss: 202.737
Epochs: 6216 | epoch avg. loss: 0.233 | test avg. loss: 204.215
Epochs: 6217 | epoch avg. loss: 0.152 | test avg. loss: 204.958
Epochs: 6218 | epoch avg. loss: 0.144 | test avg. loss: 204.013


 12%|█▏        | 6222/50000 [09:06<1:00:57, 11.97it/s]

Epochs: 6219 | epoch avg. loss: 0.048 | test avg. loss: 205.356
Epochs: 6220 | epoch avg. loss: 0.130 | test avg. loss: 204.990
Epochs: 6221 | epoch avg. loss: 0.128 | test avg. loss: 205.317


 12%|█▏        | 6224/50000 [09:07<1:02:52, 11.60it/s]

Epochs: 6222 | epoch avg. loss: 0.125 | test avg. loss: 207.894
Epochs: 6223 | epoch avg. loss: 0.351 | test avg. loss: 205.619
Epochs: 6224 | epoch avg. loss: 0.281 | test avg. loss: 205.455


 12%|█▏        | 6228/50000 [09:07<1:03:07, 11.56it/s]

Epochs: 6225 | epoch avg. loss: 0.144 | test avg. loss: 206.291
Epochs: 6226 | epoch avg. loss: 0.210 | test avg. loss: 205.197
Epochs: 6227 | epoch avg. loss: 0.083 | test avg. loss: 205.525


 12%|█▏        | 6230/50000 [09:07<1:04:33, 11.30it/s]

Epochs: 6228 | epoch avg. loss: 0.279 | test avg. loss: 204.994
Epochs: 6229 | epoch avg. loss: 0.109 | test avg. loss: 205.603
Epochs: 6230 | epoch avg. loss: 0.121 | test avg. loss: 208.490


 12%|█▏        | 6234/50000 [09:08<1:06:58, 10.89it/s]

Epochs: 6231 | epoch avg. loss: 0.278 | test avg. loss: 208.256
Epochs: 6232 | epoch avg. loss: 0.488 | test avg. loss: 203.644
Epochs: 6233 | epoch avg. loss: 0.180 | test avg. loss: 203.236


 12%|█▏        | 6236/50000 [09:08<1:04:09, 11.37it/s]

Epochs: 6234 | epoch avg. loss: 0.329 | test avg. loss: 198.670
Epochs: 6235 | epoch avg. loss: 0.684 | test avg. loss: 201.076
Epochs: 6236 | epoch avg. loss: 0.772 | test avg. loss: 199.677


 12%|█▏        | 6240/50000 [09:08<1:01:48, 11.80it/s]

Epochs: 6237 | epoch avg. loss: 0.676 | test avg. loss: 200.618
Epochs: 6238 | epoch avg. loss: 0.492 | test avg. loss: 202.105
Epochs: 6239 | epoch avg. loss: 0.451 | test avg. loss: 199.176




Epochs: 6240 | epoch avg. loss: 0.859 | test avg. loss: 203.850
Epochs: 6241 | epoch avg. loss: 1.305 | test avg. loss: 199.387
Epochs: 6242 | epoch avg. loss: 0.603 | test avg. loss: 198.449


                                                    

Epochs: 6243 | epoch avg. loss: 0.704 | test avg. loss: 204.189
Epochs: 6244 | epoch avg. loss: 1.159 | test avg. loss: 200.768


 12%|█▏        | 6248/50000 [09:09<1:01:11, 11.92it/s]

Epochs: 6245 | epoch avg. loss: 0.425 | test avg. loss: 202.406
Epochs: 6246 | epoch avg. loss: 0.493 | test avg. loss: 203.971
Epochs: 6247 | epoch avg. loss: 0.405 | test avg. loss: 200.930


 12%|█▎        | 6250/50000 [09:09<1:01:30, 11.85it/s]

Epochs: 6248 | epoch avg. loss: 0.774 | test avg. loss: 203.945
Epochs: 6249 | epoch avg. loss: 0.839 | test avg. loss: 200.695
Epochs: 6250 | epoch avg. loss: 0.214 | test avg. loss: 198.519


 13%|█▎        | 6254/50000 [09:09<1:00:44, 12.00it/s]

Epochs: 6251 | epoch avg. loss: 0.289 | test avg. loss: 199.880
Epochs: 6252 | epoch avg. loss: 0.236 | test avg. loss: 197.751
Epochs: 6253 | epoch avg. loss: 0.658 | test avg. loss: 200.025


 13%|█▎        | 6256/50000 [09:09<58:58, 12.36it/s]

Epochs: 6254 | epoch avg. loss: 0.181 | test avg. loss: 203.364
Epochs: 6255 | epoch avg. loss: 0.209 | test avg. loss: 203.877
Epochs: 6256 | epoch avg. loss: 0.233 | test avg. loss: 204.363


 13%|█▎        | 6260/50000 [09:10<59:38, 12.22it/s]  

Epochs: 6257 | epoch avg. loss: 0.174 | test avg. loss: 199.943
Epochs: 6258 | epoch avg. loss: 0.142 | test avg. loss: 196.764
Epochs: 6259 | epoch avg. loss: 0.320 | test avg. loss: 198.518


                                                    

Epochs: 6260 | epoch avg. loss: 0.507 | test avg. loss: 197.721
Epochs: 6261 | epoch avg. loss: 0.199 | test avg. loss: 198.006
Epochs: 6262 | epoch avg. loss: 0.228 | test avg. loss: 202.372


 13%|█▎        | 6266/50000 [09:10<54:17, 13.42it/s]

Epochs: 6263 | epoch avg. loss: 0.337 | test avg. loss: 200.460
Epochs: 6264 | epoch avg. loss: 0.409 | test avg. loss: 203.507
Epochs: 6265 | epoch avg. loss: 0.256 | test avg. loss: 206.494


 13%|█▎        | 6268/50000 [09:10<53:41, 13.58it/s]

Epochs: 6266 | epoch avg. loss: 0.392 | test avg. loss: 202.921
Epochs: 6267 | epoch avg. loss: 0.192 | test avg. loss: 203.474
Epochs: 6268 | epoch avg. loss: 0.223 | test avg. loss: 202.566


 13%|█▎        | 6272/50000 [09:11<55:00, 13.25it/s]

Epochs: 6269 | epoch avg. loss: 0.157 | test avg. loss: 200.009
Epochs: 6270 | epoch avg. loss: 0.155 | test avg. loss: 201.812
Epochs: 6271 | epoch avg. loss: 0.340 | test avg. loss: 200.171


 13%|█▎        | 6274/50000 [09:11<52:55, 13.77it/s]

Epochs: 6272 | epoch avg. loss: 0.232 | test avg. loss: 199.872
Epochs: 6273 | epoch avg. loss: 0.224 | test avg. loss: 203.967
Epochs: 6274 | epoch avg. loss: 0.490 | test avg. loss: 201.707


 13%|█▎        | 6278/50000 [09:11<51:04, 14.27it/s]

Epochs: 6275 | epoch avg. loss: 0.381 | test avg. loss: 203.136
Epochs: 6276 | epoch avg. loss: 0.152 | test avg. loss: 204.975
Epochs: 6277 | epoch avg. loss: 0.173 | test avg. loss: 202.748
Epochs: 6278 | epoch avg. loss: 0.210 | test avg. loss: 204.265


 13%|█▎        | 6282/50000 [09:11<50:25, 14.45it/s]

Epochs: 6279 | epoch avg. loss: 0.236 | test avg. loss: 203.494
Epochs: 6280 | epoch avg. loss: 0.131 | test avg. loss: 201.449
Epochs: 6281 | epoch avg. loss: 0.105 | test avg. loss: 203.383
Epochs: 6282 | epoch avg. loss: 0.149 | test avg. loss: 201.630


 13%|█▎        | 6286/50000 [09:12<53:11, 13.70it/s]

Epochs: 6283 | epoch avg. loss: 0.076 | test avg. loss: 203.102
Epochs: 6284 | epoch avg. loss: 0.363 | test avg. loss: 201.713
Epochs: 6285 | epoch avg. loss: 0.207 | test avg. loss: 199.947


 13%|█▎        | 6288/50000 [09:12<56:19, 12.93it/s]

Epochs: 6286 | epoch avg. loss: 0.278 | test avg. loss: 202.702
Epochs: 6287 | epoch avg. loss: 0.406 | test avg. loss: 199.790
Epochs: 6288 | epoch avg. loss: 0.336 | test avg. loss: 200.470


 13%|█▎        | 6292/50000 [09:12<52:38, 13.84it/s]

Epochs: 6289 | epoch avg. loss: 0.197 | test avg. loss: 204.568
Epochs: 6290 | epoch avg. loss: 0.365 | test avg. loss: 200.935
Epochs: 6291 | epoch avg. loss: 0.207 | test avg. loss: 203.447


 13%|█▎        | 6294/50000 [09:12<52:29, 13.88it/s]

Epochs: 6292 | epoch avg. loss: 0.193 | test avg. loss: 200.676
Epochs: 6293 | epoch avg. loss: 0.231 | test avg. loss: 198.379
Epochs: 6294 | epoch avg. loss: 0.181 | test avg. loss: 199.076


 13%|█▎        | 6298/50000 [09:12<55:44, 13.07it/s]

Epochs: 6295 | epoch avg. loss: 0.262 | test avg. loss: 197.215
Epochs: 6296 | epoch avg. loss: 0.204 | test avg. loss: 197.266
Epochs: 6297 | epoch avg. loss: 0.190 | test avg. loss: 199.509


 13%|█▎        | 6298/50000 [09:13<55:44, 13.07it/s]

Epochs: 6298 | epoch avg. loss: 0.134 | test avg. loss: 200.964
Epochs: 6299 | epoch avg. loss: 0.086 | test avg. loss: 201.863


 13%|█▎        | 6304/50000 [09:14<1:55:50,  6.29it/s]

Epochs: 6300 | epoch avg. loss: 0.063 | test avg. loss: 202.183
Epochs: 6301 | epoch avg. loss: 0.059 | test avg. loss: 201.870
Epochs: 6302 | epoch avg. loss: 0.055 | test avg. loss: 200.664
Epochs: 6303 | epoch avg. loss: 0.080 | test avg. loss: 200.925




Epochs: 6304 | epoch avg. loss: 0.063 | test avg. loss: 201.535
Epochs: 6305 | epoch avg. loss: 0.072 | test avg. loss: 200.325
Epochs: 6306 | epoch avg. loss: 0.206 | test avg. loss: 201.186


 13%|█▎        | 6310/50000 [09:15<1:18:20,  9.30it/s]

Epochs: 6307 | epoch avg. loss: 0.065 | test avg. loss: 202.289
Epochs: 6308 | epoch avg. loss: 0.067 | test avg. loss: 201.152
Epochs: 6309 | epoch avg. loss: 0.100 | test avg. loss: 202.713


                                                      

Epochs: 6310 | epoch avg. loss: 0.148 | test avg. loss: 201.838
Epochs: 6311 | epoch avg. loss: 0.098 | test avg. loss: 200.548
Epochs: 6312 | epoch avg. loss: 0.182 | test avg. loss: 201.435


 13%|█▎        | 6316/50000 [09:15<1:02:14, 11.70it/s]

Epochs: 6313 | epoch avg. loss: 0.098 | test avg. loss: 201.057
Epochs: 6314 | epoch avg. loss: 0.071 | test avg. loss: 200.450
Epochs: 6315 | epoch avg. loss: 0.111 | test avg. loss: 202.309


 13%|█▎        | 6318/50000 [09:15<58:48, 12.38it/s]

Epochs: 6316 | epoch avg. loss: 0.065 | test avg. loss: 201.430
Epochs: 6317 | epoch avg. loss: 0.158 | test avg. loss: 202.215
Epochs: 6318 | epoch avg. loss: 0.093 | test avg. loss: 203.805


 13%|█▎        | 6322/50000 [09:15<58:46, 12.39it/s]

Epochs: 6319 | epoch avg. loss: 0.059 | test avg. loss: 201.686
Epochs: 6320 | epoch avg. loss: 0.332 | test avg. loss: 202.178
Epochs: 6321 | epoch avg. loss: 0.104 | test avg. loss: 202.975


                                                    

Epochs: 6322 | epoch avg. loss: 0.160 | test avg. loss: 201.172
Epochs: 6323 | epoch avg. loss: 0.084 | test avg. loss: 202.556
Epochs: 6324 | epoch avg. loss: 0.085 | test avg. loss: 201.925


 13%|█▎        | 6328/50000 [09:16<53:44, 13.54it/s]

Epochs: 6325 | epoch avg. loss: 0.050 | test avg. loss: 203.519
Epochs: 6326 | epoch avg. loss: 0.146 | test avg. loss: 204.320
Epochs: 6327 | epoch avg. loss: 0.086 | test avg. loss: 203.651


 13%|█▎        | 6330/50000 [09:16<54:14, 13.42it/s]

Epochs: 6328 | epoch avg. loss: 0.233 | test avg. loss: 206.053
Epochs: 6329 | epoch avg. loss: 0.260 | test avg. loss: 204.436
Epochs: 6330 | epoch avg. loss: 0.133 | test avg. loss: 202.278


 13%|█▎        | 6334/50000 [09:16<51:15, 14.20it/s]

Epochs: 6331 | epoch avg. loss: 0.155 | test avg. loss: 204.393
Epochs: 6332 | epoch avg. loss: 0.416 | test avg. loss: 200.940
Epochs: 6333 | epoch avg. loss: 0.446 | test avg. loss: 202.176




Epochs: 6334 | epoch avg. loss: 0.480 | test avg. loss: 202.012
Epochs: 6335 | epoch avg. loss: 0.375 | test avg. loss: 201.758


 13%|█▎        | 6338/50000 [09:17<1:03:05, 11.53it/s]

Epochs: 6336 | epoch avg. loss: 0.614 | test avg. loss: 208.444
Epochs: 6337 | epoch avg. loss: 1.564 | test avg. loss: 203.659
Epochs: 6338 | epoch avg. loss: 0.602 | test avg. loss: 206.191


 13%|█▎        | 6342/50000 [09:17<56:49, 12.81it/s]

Epochs: 6339 | epoch avg. loss: 0.454 | test avg. loss: 205.484
Epochs: 6340 | epoch avg. loss: 0.592 | test avg. loss: 199.072
Epochs: 6341 | epoch avg. loss: 0.577 | test avg. loss: 199.681


 13%|█▎        | 6346/50000 [09:17<52:22, 13.89it/s]

Epochs: 6342 | epoch avg. loss: 0.320 | test avg. loss: 199.491
Epochs: 6343 | epoch avg. loss: 0.264 | test avg. loss: 199.518
Epochs: 6344 | epoch avg. loss: 0.276 | test avg. loss: 203.297
Epochs: 6345 | epoch avg. loss: 0.621 | test avg. loss: 201.216


 13%|█▎        | 6348/50000 [09:17<51:49, 14.04it/s]

Epochs: 6346 | epoch avg. loss: 0.285 | test avg. loss: 200.282
Epochs: 6347 | epoch avg. loss: 0.285 | test avg. loss: 204.785
Epochs: 6348 | epoch avg. loss: 0.574 | test avg. loss: 200.315


 13%|█▎        | 6352/50000 [09:18<55:37, 13.08it/s]

Epochs: 6349 | epoch avg. loss: 0.874 | test avg. loss: 201.532
Epochs: 6350 | epoch avg. loss: 0.271 | test avg. loss: 202.654
Epochs: 6351 | epoch avg. loss: 0.380 | test avg. loss: 201.969


 13%|█▎        | 6354/50000 [09:18<55:02, 13.21it/s]

Epochs: 6352 | epoch avg. loss: 1.017 | test avg. loss: 210.754
Epochs: 6353 | epoch avg. loss: 1.476 | test avg. loss: 206.774
Epochs: 6354 | epoch avg. loss: 0.662 | test avg. loss: 205.438
Epochs: 6355 | epoch avg. loss: 0.476 | test avg. loss: 207.759


 13%|█▎        | 6358/50000 [09:18<51:48, 14.04it/s]

Epochs: 6356 | epoch avg. loss: 0.674 | test avg. loss: 199.434
Epochs: 6357 | epoch avg. loss: 1.382 | test avg. loss: 201.315
Epochs: 6358 | epoch avg. loss: 0.672 | test avg. loss: 207.811


 13%|█▎        | 6360/50000 [09:18<52:21, 13.89it/s]

Epochs: 6359 | epoch avg. loss: 1.396 | test avg. loss: 202.353
Epochs: 6360 | epoch avg. loss: 1.241 | test avg. loss: 204.446
Epochs: 6361 | epoch avg. loss: 0.618 | test avg. loss: 206.169


 13%|█▎        | 6364/50000 [09:19<1:03:10, 11.51it/s]

Epochs: 6362 | epoch avg. loss: 0.599 | test avg. loss: 201.875
Epochs: 6363 | epoch avg. loss: 0.873 | test avg. loss: 205.494
Epochs: 6364 | epoch avg. loss: 0.685 | test avg. loss: 204.817


 13%|█▎        | 6368/50000 [09:19<57:32, 12.64it/s]  

Epochs: 6365 | epoch avg. loss: 0.675 | test avg. loss: 212.196
Epochs: 6366 | epoch avg. loss: 0.485 | test avg. loss: 210.387
Epochs: 6367 | epoch avg. loss: 0.685 | test avg. loss: 205.220


 13%|█▎        | 6372/50000 [09:19<52:33, 13.83it/s]

Epochs: 6368 | epoch avg. loss: 0.836 | test avg. loss: 206.893
Epochs: 6369 | epoch avg. loss: 0.427 | test avg. loss: 207.112
Epochs: 6370 | epoch avg. loss: 0.219 | test avg. loss: 207.770
Epochs: 6371 | epoch avg. loss: 0.168 | test avg. loss: 208.413


 13%|█▎        | 6374/50000 [09:19<53:49, 13.51it/s]

Epochs: 6372 | epoch avg. loss: 0.136 | test avg. loss: 207.019
Epochs: 6373 | epoch avg. loss: 0.333 | test avg. loss: 207.907
Epochs: 6374 | epoch avg. loss: 0.145 | test avg. loss: 206.576


 13%|█▎        | 6378/50000 [09:20<53:10, 13.67it/s]

Epochs: 6375 | epoch avg. loss: 0.122 | test avg. loss: 206.639
Epochs: 6376 | epoch avg. loss: 0.115 | test avg. loss: 208.685
Epochs: 6377 | epoch avg. loss: 0.178 | test avg. loss: 208.271


 13%|█▎        | 6380/50000 [09:20<55:35, 13.08it/s]

Epochs: 6378 | epoch avg. loss: 0.110 | test avg. loss: 209.522
Epochs: 6379 | epoch avg. loss: 0.119 | test avg. loss: 208.903
Epochs: 6380 | epoch avg. loss: 0.090 | test avg. loss: 208.045


 13%|█▎        | 6384/50000 [09:20<55:52, 13.01it/s]

Epochs: 6381 | epoch avg. loss: 0.079 | test avg. loss: 207.616
Epochs: 6382 | epoch avg. loss: 0.070 | test avg. loss: 206.921
Epochs: 6383 | epoch avg. loss: 0.088 | test avg. loss: 208.141


 13%|█▎        | 6386/50000 [09:20<56:50, 12.79it/s]

Epochs: 6384 | epoch avg. loss: 0.118 | test avg. loss: 206.582
Epochs: 6385 | epoch avg. loss: 0.259 | test avg. loss: 207.775
Epochs: 6386 | epoch avg. loss: 0.063 | test avg. loss: 207.894


 13%|█▎        | 6390/50000 [09:21<1:00:40, 11.98it/s]

Epochs: 6387 | epoch avg. loss: 0.187 | test avg. loss: 207.927
Epochs: 6388 | epoch avg. loss: 0.116 | test avg. loss: 209.305
Epochs: 6389 | epoch avg. loss: 0.147 | test avg. loss: 206.891


 13%|█▎        | 6392/50000 [09:21<1:02:00, 11.72it/s]

Epochs: 6390 | epoch avg. loss: 0.084 | test avg. loss: 208.429
Epochs: 6391 | epoch avg. loss: 0.194 | test avg. loss: 206.010
Epochs: 6392 | epoch avg. loss: 0.316 | test avg. loss: 205.582


 13%|█▎        | 6396/50000 [09:21<58:23, 12.44it/s]  

Epochs: 6393 | epoch avg. loss: 0.169 | test avg. loss: 207.400
Epochs: 6394 | epoch avg. loss: 0.218 | test avg. loss: 206.362
Epochs: 6395 | epoch avg. loss: 0.146 | test avg. loss: 207.317


 13%|█▎        | 6398/50000 [09:21<58:22, 12.45it/s]

Epochs: 6396 | epoch avg. loss: 0.083 | test avg. loss: 207.991
Epochs: 6397 | epoch avg. loss: 0.120 | test avg. loss: 207.197
Epochs: 6398 | epoch avg. loss: 0.136 | test avg. loss: 207.353


 13%|█▎        | 6398/50000 [09:21<58:22, 12.45it/s]

Epochs: 6399 | epoch avg. loss: 0.087 | test avg. loss: 207.952


 13%|█▎        | 6402/50000 [09:23<2:50:04,  4.27it/s]

Epochs: 6400 | epoch avg. loss: 0.155 | test avg. loss: 206.504
Epochs: 6401 | epoch avg. loss: 0.117 | test avg. loss: 207.080
Epochs: 6402 | epoch avg. loss: 0.133 | test avg. loss: 206.395


 13%|█▎        | 6406/50000 [09:23<1:52:21,  6.47it/s]

Epochs: 6403 | epoch avg. loss: 0.157 | test avg. loss: 206.506
Epochs: 6404 | epoch avg. loss: 0.108 | test avg. loss: 208.681
Epochs: 6405 | epoch avg. loss: 0.124 | test avg. loss: 207.879


 13%|█▎        | 6408/50000 [09:24<1:38:34,  7.37it/s]

Epochs: 6406 | epoch avg. loss: 0.173 | test avg. loss: 209.404
Epochs: 6407 | epoch avg. loss: 0.368 | test avg. loss: 210.506
Epochs: 6408 | epoch avg. loss: 0.555 | test avg. loss: 204.708


 13%|█▎        | 6412/50000 [09:24<1:14:06,  9.80it/s]

Epochs: 6409 | epoch avg. loss: 0.714 | test avg. loss: 204.813
Epochs: 6410 | epoch avg. loss: 0.386 | test avg. loss: 208.222
Epochs: 6411 | epoch avg. loss: 0.765 | test avg. loss: 203.156
Epochs: 6412 | epoch avg. loss: 1.963 | test avg. loss: 207.363


 13%|█▎        | 6416/50000 [09:24<1:04:21, 11.29it/s]

Epochs: 6413 | epoch avg. loss: 1.280 | test avg. loss: 209.070
Epochs: 6414 | epoch avg. loss: 0.711 | test avg. loss: 207.981
Epochs: 6415 | epoch avg. loss: 0.775 | test avg. loss: 213.480


 13%|█▎        | 6418/50000 [09:24<1:00:16, 12.05it/s]

Epochs: 6416 | epoch avg. loss: 0.798 | test avg. loss: 209.522
Epochs: 6417 | epoch avg. loss: 0.208 | test avg. loss: 207.795
Epochs: 6418 | epoch avg. loss: 0.297 | test avg. loss: 210.338


 13%|█▎        | 6422/50000 [09:25<1:02:37, 11.60it/s]

Epochs: 6419 | epoch avg. loss: 0.588 | test avg. loss: 210.027
Epochs: 6420 | epoch avg. loss: 0.200 | test avg. loss: 212.496
Epochs: 6421 | epoch avg. loss: 0.369 | test avg. loss: 218.453


 13%|█▎        | 6424/50000 [09:25<59:39, 12.17it/s]

Epochs: 6422 | epoch avg. loss: 0.710 | test avg. loss: 215.294
Epochs: 6423 | epoch avg. loss: 0.229 | test avg. loss: 212.012
Epochs: 6424 | epoch avg. loss: 0.258 | test avg. loss: 213.060


 13%|█▎        | 6428/50000 [09:25<1:00:03, 12.09it/s]

Epochs: 6425 | epoch avg. loss: 0.379 | test avg. loss: 211.000
Epochs: 6426 | epoch avg. loss: 0.218 | test avg. loss: 211.921
Epochs: 6427 | epoch avg. loss: 0.139 | test avg. loss: 214.459


 13%|█▎        | 6430/50000 [09:25<59:27, 12.21it/s]

Epochs: 6428 | epoch avg. loss: 0.294 | test avg. loss: 212.538
Epochs: 6429 | epoch avg. loss: 0.212 | test avg. loss: 212.703
Epochs: 6430 | epoch avg. loss: 0.208 | test avg. loss: 210.784


 13%|█▎        | 6434/50000 [09:26<1:01:36, 11.78it/s]

Epochs: 6431 | epoch avg. loss: 0.130 | test avg. loss: 208.673
Epochs: 6432 | epoch avg. loss: 0.193 | test avg. loss: 210.861
Epochs: 6433 | epoch avg. loss: 0.377 | test avg. loss: 209.464


 13%|█▎        | 6436/50000 [09:26<1:01:41, 11.77it/s]

Epochs: 6434 | epoch avg. loss: 0.073 | test avg. loss: 209.453
Epochs: 6435 | epoch avg. loss: 0.087 | test avg. loss: 211.049
Epochs: 6436 | epoch avg. loss: 0.137 | test avg. loss: 210.355


 13%|█▎        | 6440/50000 [09:26<58:12, 12.47it/s]  

Epochs: 6437 | epoch avg. loss: 0.184 | test avg. loss: 210.530
Epochs: 6438 | epoch avg. loss: 0.102 | test avg. loss: 210.335
Epochs: 6439 | epoch avg. loss: 0.096 | test avg. loss: 209.200


 13%|█▎        | 6442/50000 [09:26<56:40, 12.81it/s]

Epochs: 6440 | epoch avg. loss: 0.205 | test avg. loss: 210.759
Epochs: 6441 | epoch avg. loss: 0.125 | test avg. loss: 213.507
Epochs: 6442 | epoch avg. loss: 0.192 | test avg. loss: 212.620


 13%|█▎        | 6446/50000 [09:27<1:02:11, 11.67it/s]

Epochs: 6443 | epoch avg. loss: 0.396 | test avg. loss: 213.222
Epochs: 6444 | epoch avg. loss: 0.148 | test avg. loss: 212.449
Epochs: 6445 | epoch avg. loss: 0.097 | test avg. loss: 211.197


 13%|█▎        | 6448/50000 [09:27<1:01:27, 11.81it/s]

Epochs: 6446 | epoch avg. loss: 0.107 | test avg. loss: 211.155
Epochs: 6447 | epoch avg. loss: 0.057 | test avg. loss: 210.688
Epochs: 6448 | epoch avg. loss: 0.081 | test avg. loss: 211.146


 13%|█▎        | 6452/50000 [09:27<1:01:34, 11.79it/s]

Epochs: 6449 | epoch avg. loss: 0.050 | test avg. loss: 212.055
Epochs: 6450 | epoch avg. loss: 0.061 | test avg. loss: 212.842
Epochs: 6451 | epoch avg. loss: 0.048 | test avg. loss: 212.815


 13%|█▎        | 6454/50000 [09:27<1:00:38, 11.97it/s]

Epochs: 6452 | epoch avg. loss: 0.055 | test avg. loss: 214.014
Epochs: 6453 | epoch avg. loss: 0.110 | test avg. loss: 212.920
Epochs: 6454 | epoch avg. loss: 0.046 | test avg. loss: 212.593


 13%|█▎        | 6458/50000 [09:28<1:04:45, 11.21it/s]

Epochs: 6455 | epoch avg. loss: 0.065 | test avg. loss: 211.674
Epochs: 6456 | epoch avg. loss: 0.069 | test avg. loss: 210.527
Epochs: 6457 | epoch avg. loss: 0.150 | test avg. loss: 210.727


 13%|█▎        | 6460/50000 [09:28<1:04:55, 11.18it/s]

Epochs: 6458 | epoch avg. loss: 0.055 | test avg. loss: 210.891
Epochs: 6459 | epoch avg. loss: 0.056 | test avg. loss: 210.471
Epochs: 6460 | epoch avg. loss: 0.099 | test avg. loss: 211.212


 13%|█▎        | 6464/50000 [09:28<1:02:50, 11.55it/s]

Epochs: 6461 | epoch avg. loss: 0.074 | test avg. loss: 213.321
Epochs: 6462 | epoch avg. loss: 0.178 | test avg. loss: 211.549
Epochs: 6463 | epoch avg. loss: 0.360 | test avg. loss: 212.151


 13%|█▎        | 6466/50000 [09:28<1:02:41, 11.57it/s]

Epochs: 6464 | epoch avg. loss: 0.205 | test avg. loss: 212.384
Epochs: 6465 | epoch avg. loss: 0.158 | test avg. loss: 210.265
Epochs: 6466 | epoch avg. loss: 0.182 | test avg. loss: 210.384


 13%|█▎        | 6470/50000 [09:29<1:02:02, 11.69it/s]

Epochs: 6467 | epoch avg. loss: 0.169 | test avg. loss: 209.531
Epochs: 6468 | epoch avg. loss: 0.116 | test avg. loss: 208.376
Epochs: 6469 | epoch avg. loss: 0.324 | test avg. loss: 210.501


                                                    

Epochs: 6470 | epoch avg. loss: 0.130 | test avg. loss: 211.262
Epochs: 6471 | epoch avg. loss: 0.072 | test avg. loss: 211.217
Epochs: 6472 | epoch avg. loss: 0.088 | test avg. loss: 212.952


 13%|█▎        | 6476/50000 [09:29<56:01, 12.95it/s]

Epochs: 6473 | epoch avg. loss: 0.166 | test avg. loss: 213.045
Epochs: 6474 | epoch avg. loss: 0.081 | test avg. loss: 212.633
Epochs: 6475 | epoch avg. loss: 0.125 | test avg. loss: 213.276


 13%|█▎        | 6478/50000 [09:29<54:11, 13.38it/s]

Epochs: 6476 | epoch avg. loss: 0.115 | test avg. loss: 213.047
Epochs: 6477 | epoch avg. loss: 0.089 | test avg. loss: 211.887
Epochs: 6478 | epoch avg. loss: 0.071 | test avg. loss: 211.514


 13%|█▎        | 6482/50000 [09:30<58:45, 12.34it/s]  

Epochs: 6479 | epoch avg. loss: 0.055 | test avg. loss: 212.237
Epochs: 6480 | epoch avg. loss: 0.109 | test avg. loss: 211.510
Epochs: 6481 | epoch avg. loss: 0.064 | test avg. loss: 211.869


 13%|█▎        | 6484/50000 [09:30<58:22, 12.42it/s]

Epochs: 6482 | epoch avg. loss: 0.053 | test avg. loss: 212.618
Epochs: 6483 | epoch avg. loss: 0.064 | test avg. loss: 212.035
Epochs: 6484 | epoch avg. loss: 0.154 | test avg. loss: 210.774


 13%|█▎        | 6488/50000 [09:30<53:15, 13.62it/s]

Epochs: 6485 | epoch avg. loss: 0.064 | test avg. loss: 210.452
Epochs: 6486 | epoch avg. loss: 0.134 | test avg. loss: 210.153
Epochs: 6487 | epoch avg. loss: 0.071 | test avg. loss: 209.941


 13%|█▎        | 6490/50000 [09:30<53:55, 13.45it/s]

Epochs: 6488 | epoch avg. loss: 0.147 | test avg. loss: 212.627
Epochs: 6489 | epoch avg. loss: 0.351 | test avg. loss: 213.489
Epochs: 6490 | epoch avg. loss: 0.117 | test avg. loss: 213.209


 13%|█▎        | 6494/50000 [09:31<59:39, 12.15it/s]

Epochs: 6491 | epoch avg. loss: 0.203 | test avg. loss: 213.323
Epochs: 6492 | epoch avg. loss: 0.109 | test avg. loss: 211.286
Epochs: 6493 | epoch avg. loss: 0.094 | test avg. loss: 209.842


 13%|█▎        | 6496/50000 [09:31<58:47, 12.33it/s]

Epochs: 6494 | epoch avg. loss: 0.150 | test avg. loss: 209.535
Epochs: 6495 | epoch avg. loss: 0.057 | test avg. loss: 209.484
Epochs: 6496 | epoch avg. loss: 0.060 | test avg. loss: 210.224


 13%|█▎        | 6498/50000 [09:31<56:20, 12.87it/s]

Epochs: 6497 | epoch avg. loss: 0.046 | test avg. loss: 211.194
Epochs: 6498 | epoch avg. loss: 0.047 | test avg. loss: 211.625
Epochs: 6499 | epoch avg. loss: 0.061 | test avg. loss: 211.999


 13%|█▎        | 6502/50000 [09:33<2:33:53,  4.71it/s]

Epochs: 6500 | epoch avg. loss: 0.043 | test avg. loss: 211.222
Epochs: 6501 | epoch avg. loss: 0.045 | test avg. loss: 210.553
Epochs: 6502 | epoch avg. loss: 0.042 | test avg. loss: 209.883


 13%|█▎        | 6506/50000 [09:33<1:46:06,  6.83it/s]

Epochs: 6503 | epoch avg. loss: 0.039 | test avg. loss: 209.777
Epochs: 6504 | epoch avg. loss: 0.057 | test avg. loss: 210.540
Epochs: 6505 | epoch avg. loss: 0.101 | test avg. loss: 211.171


 13%|█▎        | 6508/50000 [09:33<1:29:13,  8.12it/s]

Epochs: 6506 | epoch avg. loss: 0.040 | test avg. loss: 211.789
Epochs: 6507 | epoch avg. loss: 0.120 | test avg. loss: 212.574
Epochs: 6508 | epoch avg. loss: 0.085 | test avg. loss: 213.404


 13%|█▎        | 6512/50000 [09:33<1:12:51,  9.95it/s]

Epochs: 6509 | epoch avg. loss: 0.177 | test avg. loss: 211.876
Epochs: 6510 | epoch avg. loss: 0.128 | test avg. loss: 209.726
Epochs: 6511 | epoch avg. loss: 0.154 | test avg. loss: 209.760


 13%|█▎        | 6514/50000 [09:33<1:06:32, 10.89it/s]

Epochs: 6512 | epoch avg. loss: 0.079 | test avg. loss: 210.052
Epochs: 6513 | epoch avg. loss: 0.071 | test avg. loss: 209.942
Epochs: 6514 | epoch avg. loss: 0.117 | test avg. loss: 210.721


 13%|█▎        | 6518/50000 [09:34<1:04:43, 11.20it/s]

Epochs: 6515 | epoch avg. loss: 0.112 | test avg. loss: 213.734
Epochs: 6516 | epoch avg. loss: 0.637 | test avg. loss: 212.372
Epochs: 6517 | epoch avg. loss: 0.105 | test avg. loss: 212.627


 13%|█▎        | 6520/50000 [09:34<1:01:44, 11.74it/s]

Epochs: 6518 | epoch avg. loss: 0.131 | test avg. loss: 212.472
Epochs: 6519 | epoch avg. loss: 0.047 | test avg. loss: 211.571
Epochs: 6520 | epoch avg. loss: 0.060 | test avg. loss: 211.609


 13%|█▎        | 6524/50000 [09:34<54:29, 13.30it/s]

Epochs: 6521 | epoch avg. loss: 0.041 | test avg. loss: 211.484
Epochs: 6522 | epoch avg. loss: 0.042 | test avg. loss: 211.060
Epochs: 6523 | epoch avg. loss: 0.039 | test avg. loss: 210.730
Epochs: 6524 | epoch avg. loss: 0.058 | test avg. loss: 211.405


 13%|█▎        | 6528/50000 [09:34<53:44, 13.48it/s]

Epochs: 6525 | epoch avg. loss: 0.039 | test avg. loss: 210.478
Epochs: 6526 | epoch avg. loss: 0.089 | test avg. loss: 209.110
Epochs: 6527 | epoch avg. loss: 0.157 | test avg. loss: 208.779




Epochs: 6528 | epoch avg. loss: 0.090 | test avg. loss: 209.391
Epochs: 6529 | epoch avg. loss: 0.046 | test avg. loss: 209.853
Epochs: 6530 | epoch avg. loss: 0.065 | test avg. loss: 210.990


 13%|█▎        | 6534/50000 [09:35<51:14, 14.14it/s]

Epochs: 6531 | epoch avg. loss: 0.038 | test avg. loss: 211.322
Epochs: 6532 | epoch avg. loss: 0.067 | test avg. loss: 212.125
Epochs: 6533 | epoch avg. loss: 0.067 | test avg. loss: 212.813


 13%|█▎        | 6536/50000 [09:35<53:29, 13.54it/s]

Epochs: 6534 | epoch avg. loss: 0.087 | test avg. loss: 211.424
Epochs: 6535 | epoch avg. loss: 0.163 | test avg. loss: 211.704
Epochs: 6536 | epoch avg. loss: 0.083 | test avg. loss: 213.633


 13%|█▎        | 6540/50000 [09:35<51:58, 13.94it/s]

Epochs: 6537 | epoch avg. loss: 0.230 | test avg. loss: 211.227
Epochs: 6538 | epoch avg. loss: 0.316 | test avg. loss: 209.779
Epochs: 6539 | epoch avg. loss: 0.170 | test avg. loss: 210.594


 13%|█▎        | 6542/50000 [09:36<55:26, 13.07it/s]

Epochs: 6540 | epoch avg. loss: 0.489 | test avg. loss: 208.774
Epochs: 6541 | epoch avg. loss: 0.165 | test avg. loss: 208.230
Epochs: 6542 | epoch avg. loss: 0.510 | test avg. loss: 211.543


 13%|█▎        | 6546/50000 [09:36<52:39, 13.75it/s]

Epochs: 6543 | epoch avg. loss: 0.505 | test avg. loss: 212.594
Epochs: 6544 | epoch avg. loss: 0.254 | test avg. loss: 210.910
Epochs: 6545 | epoch avg. loss: 0.685 | test avg. loss: 212.350
Epochs: 6546 | epoch avg. loss: 0.339 | test avg. loss: 211.656


 13%|█▎        | 6550/50000 [09:36<50:39, 14.29it/s]

Epochs: 6547 | epoch avg. loss: 0.213 | test avg. loss: 210.218
Epochs: 6548 | epoch avg. loss: 0.239 | test avg. loss: 210.692
Epochs: 6549 | epoch avg. loss: 0.088 | test avg. loss: 210.151


 13%|█▎        | 6554/50000 [09:36<49:19, 14.68it/s]

Epochs: 6550 | epoch avg. loss: 0.159 | test avg. loss: 210.522
Epochs: 6551 | epoch avg. loss: 0.107 | test avg. loss: 211.841
Epochs: 6552 | epoch avg. loss: 0.119 | test avg. loss: 211.059
Epochs: 6553 | epoch avg. loss: 0.293 | test avg. loss: 212.521


 13%|█▎        | 6556/50000 [09:36<50:30, 14.33it/s]

Epochs: 6554 | epoch avg. loss: 0.154 | test avg. loss: 214.499
Epochs: 6555 | epoch avg. loss: 0.313 | test avg. loss: 211.637
Epochs: 6556 | epoch avg. loss: 0.368 | test avg. loss: 210.934


 13%|█▎        | 6560/50000 [09:37<49:44, 14.56it/s]

Epochs: 6557 | epoch avg. loss: 0.090 | test avg. loss: 210.585
Epochs: 6558 | epoch avg. loss: 0.085 | test avg. loss: 209.927
Epochs: 6559 | epoch avg. loss: 0.093 | test avg. loss: 210.913
Epochs: 6560 | epoch avg. loss: 0.052 | test avg. loss: 211.288


 13%|█▎        | 6564/50000 [09:37<49:32, 14.61it/s]

Epochs: 6561 | epoch avg. loss: 0.043 | test avg. loss: 212.175
Epochs: 6562 | epoch avg. loss: 0.087 | test avg. loss: 212.641
Epochs: 6563 | epoch avg. loss: 0.105 | test avg. loss: 212.170


 13%|█▎        | 6566/50000 [09:37<51:26, 14.07it/s]

Epochs: 6564 | epoch avg. loss: 0.051 | test avg. loss: 212.572
Epochs: 6565 | epoch avg. loss: 0.053 | test avg. loss: 212.410
Epochs: 6566 | epoch avg. loss: 0.047 | test avg. loss: 211.821


 13%|█▎        | 6570/50000 [09:37<53:42, 13.48it/s]

Epochs: 6567 | epoch avg. loss: 0.052 | test avg. loss: 211.551
Epochs: 6568 | epoch avg. loss: 0.100 | test avg. loss: 210.566
Epochs: 6569 | epoch avg. loss: 0.044 | test avg. loss: 209.923


 13%|█▎        | 6572/50000 [09:38<58:03, 12.47it/s]

Epochs: 6570 | epoch avg. loss: 0.067 | test avg. loss: 210.015
Epochs: 6571 | epoch avg. loss: 0.046 | test avg. loss: 210.549
Epochs: 6572 | epoch avg. loss: 0.074 | test avg. loss: 210.562


 13%|█▎        | 6576/50000 [09:38<54:55, 13.18it/s]

Epochs: 6573 | epoch avg. loss: 0.045 | test avg. loss: 210.754
Epochs: 6574 | epoch avg. loss: 0.050 | test avg. loss: 211.671
Epochs: 6575 | epoch avg. loss: 0.161 | test avg. loss: 211.277


 13%|█▎        | 6578/50000 [09:38<53:41, 13.48it/s]

Epochs: 6576 | epoch avg. loss: 0.064 | test avg. loss: 211.070
Epochs: 6577 | epoch avg. loss: 0.042 | test avg. loss: 210.251
Epochs: 6578 | epoch avg. loss: 0.103 | test avg. loss: 210.178


 13%|█▎        | 6582/50000 [09:38<55:55, 12.94it/s]

Epochs: 6579 | epoch avg. loss: 0.050 | test avg. loss: 210.611
Epochs: 6580 | epoch avg. loss: 0.077 | test avg. loss: 210.585
Epochs: 6581 | epoch avg. loss: 0.041 | test avg. loss: 210.486


 13%|█▎        | 6584/50000 [09:39<1:00:24, 11.98it/s]

Epochs: 6582 | epoch avg. loss: 0.093 | test avg. loss: 210.674
Epochs: 6583 | epoch avg. loss: 0.048 | test avg. loss: 211.220
Epochs: 6584 | epoch avg. loss: 0.086 | test avg. loss: 209.931


 13%|█▎        | 6588/50000 [09:39<1:02:31, 11.57it/s]

Epochs: 6585 | epoch avg. loss: 0.058 | test avg. loss: 209.304
Epochs: 6586 | epoch avg. loss: 0.072 | test avg. loss: 209.831
Epochs: 6587 | epoch avg. loss: 0.099 | test avg. loss: 209.851


 13%|█▎        | 6590/50000 [09:39<1:02:42, 11.54it/s]

Epochs: 6588 | epoch avg. loss: 0.069 | test avg. loss: 208.802
Epochs: 6589 | epoch avg. loss: 0.130 | test avg. loss: 210.417
Epochs: 6590 | epoch avg. loss: 0.223 | test avg. loss: 211.235


 13%|█▎        | 6594/50000 [09:39<1:01:47, 11.71it/s]

Epochs: 6591 | epoch avg. loss: 0.129 | test avg. loss: 211.158
Epochs: 6592 | epoch avg. loss: 0.070 | test avg. loss: 211.527
Epochs: 6593 | epoch avg. loss: 0.064 | test avg. loss: 211.975


 13%|█▎        | 6596/50000 [09:40<1:00:43, 11.91it/s]

Epochs: 6594 | epoch avg. loss: 0.037 | test avg. loss: 211.296
Epochs: 6595 | epoch avg. loss: 0.097 | test avg. loss: 211.775
Epochs: 6596 | epoch avg. loss: 0.043 | test avg. loss: 212.292


 13%|█▎        | 6598/50000 [09:40<1:00:31, 11.95it/s]

Epochs: 6597 | epoch avg. loss: 0.035 | test avg. loss: 212.420
Epochs: 6598 | epoch avg. loss: 0.041 | test avg. loss: 211.859
Epochs: 6599 | epoch avg. loss: 0.035 | test avg. loss: 211.220


 13%|█▎        | 6602/50000 [09:41<2:40:00,  4.52it/s]

Epochs: 6600 | epoch avg. loss: 0.041 | test avg. loss: 209.990
Epochs: 6601 | epoch avg. loss: 0.166 | test avg. loss: 210.624
Epochs: 6602 | epoch avg. loss: 0.057 | test avg. loss: 211.655


 13%|█▎        | 6606/50000 [09:42<1:46:20,  6.80it/s]

Epochs: 6603 | epoch avg. loss: 0.045 | test avg. loss: 211.570
Epochs: 6604 | epoch avg. loss: 0.060 | test avg. loss: 211.323
Epochs: 6605 | epoch avg. loss: 0.060 | test avg. loss: 211.663


 13%|█▎        | 6608/50000 [09:42<1:31:27,  7.91it/s]

Epochs: 6606 | epoch avg. loss: 0.111 | test avg. loss: 210.946
Epochs: 6607 | epoch avg. loss: 0.053 | test avg. loss: 209.800
Epochs: 6608 | epoch avg. loss: 0.059 | test avg. loss: 209.954


 13%|█▎        | 6612/50000 [09:42<1:09:57, 10.34it/s]

Epochs: 6609 | epoch avg. loss: 0.038 | test avg. loss: 209.517
Epochs: 6610 | epoch avg. loss: 0.097 | test avg. loss: 209.485
Epochs: 6611 | epoch avg. loss: 0.064 | test avg. loss: 210.585


 13%|█▎        | 6614/50000 [09:42<1:03:47, 11.34it/s]

Epochs: 6612 | epoch avg. loss: 0.144 | test avg. loss: 209.582
Epochs: 6613 | epoch avg. loss: 0.361 | test avg. loss: 209.162
Epochs: 6614 | epoch avg. loss: 0.595 | test avg. loss: 212.738


 13%|█▎        | 6618/50000 [09:43<1:03:04, 11.46it/s]

Epochs: 6615 | epoch avg. loss: 1.279 | test avg. loss: 211.277
Epochs: 6616 | epoch avg. loss: 0.441 | test avg. loss: 210.919
Epochs: 6617 | epoch avg. loss: 0.793 | test avg. loss: 217.493


 13%|█▎        | 6620/50000 [09:43<1:02:54, 11.49it/s]

Epochs: 6618 | epoch avg. loss: 2.923 | test avg. loss: 213.632
Epochs: 6619 | epoch avg. loss: 1.689 | test avg. loss: 216.378
Epochs: 6620 | epoch avg. loss: 7.437 | test avg. loss: 214.362


 13%|█▎        | 6624/50000 [09:43<1:04:14, 11.25it/s]

Epochs: 6621 | epoch avg. loss: 5.644 | test avg. loss: 212.673
Epochs: 6622 | epoch avg. loss: 2.829 | test avg. loss: 221.641
Epochs: 6623 | epoch avg. loss: 8.869 | test avg. loss: 214.143


 13%|█▎        | 6626/50000 [09:43<1:03:04, 11.46it/s]

Epochs: 6624 | epoch avg. loss: 2.761 | test avg. loss: 208.469
Epochs: 6625 | epoch avg. loss: 3.760 | test avg. loss: 198.929
Epochs: 6626 | epoch avg. loss: 4.439 | test avg. loss: 216.223


 13%|█▎        | 6630/50000 [09:44<1:02:10, 11.63it/s]

Epochs: 6627 | epoch avg. loss: 10.134 | test avg. loss: 199.959
Epochs: 6628 | epoch avg. loss: 5.828 | test avg. loss: 205.598
Epochs: 6629 | epoch avg. loss: 3.124 | test avg. loss: 213.864


 13%|█▎        | 6632/50000 [09:44<1:03:19, 11.42it/s]

Epochs: 6630 | epoch avg. loss: 2.902 | test avg. loss: 216.517
Epochs: 6631 | epoch avg. loss: 3.363 | test avg. loss: 228.610
Epochs: 6632 | epoch avg. loss: 5.678 | test avg. loss: 217.604


 13%|█▎        | 6636/50000 [09:44<55:31, 13.02it/s]

Epochs: 6633 | epoch avg. loss: 1.058 | test avg. loss: 210.740
Epochs: 6634 | epoch avg. loss: 0.904 | test avg. loss: 211.283
Epochs: 6635 | epoch avg. loss: 0.775 | test avg. loss: 208.494
Epochs: 6636 | epoch avg. loss: 0.677 | test avg. loss: 207.939


 13%|█▎        | 6640/50000 [09:44<54:19, 13.30it/s]

Epochs: 6637 | epoch avg. loss: 0.498 | test avg. loss: 209.100
Epochs: 6638 | epoch avg. loss: 0.423 | test avg. loss: 209.546
Epochs: 6639 | epoch avg. loss: 0.370 | test avg. loss: 213.152


 13%|█▎        | 6642/50000 [09:45<1:01:08, 11.82it/s]

Epochs: 6640 | epoch avg. loss: 0.347 | test avg. loss: 214.911
Epochs: 6641 | epoch avg. loss: 0.248 | test avg. loss: 218.919
Epochs: 6642 | epoch avg. loss: 0.374 | test avg. loss: 220.296


 13%|█▎        | 6646/50000 [09:45<1:03:38, 11.35it/s]

Epochs: 6643 | epoch avg. loss: 0.894 | test avg. loss: 224.467
Epochs: 6644 | epoch avg. loss: 0.436 | test avg. loss: 230.787
Epochs: 6645 | epoch avg. loss: 0.545 | test avg. loss: 231.934


 13%|█▎        | 6648/50000 [09:45<1:04:25, 11.21it/s]

Epochs: 6646 | epoch avg. loss: 0.484 | test avg. loss: 236.535
Epochs: 6647 | epoch avg. loss: 0.584 | test avg. loss: 233.039
Epochs: 6648 | epoch avg. loss: 0.240 | test avg. loss: 223.795


 13%|█▎        | 6652/50000 [09:46<1:05:41, 11.00it/s]

Epochs: 6649 | epoch avg. loss: 0.248 | test avg. loss: 220.069
Epochs: 6650 | epoch avg. loss: 0.289 | test avg. loss: 216.715
Epochs: 6651 | epoch avg. loss: 0.236 | test avg. loss: 217.279


 13%|█▎        | 6654/50000 [09:46<1:04:19, 11.23it/s]

Epochs: 6652 | epoch avg. loss: 0.160 | test avg. loss: 219.419
Epochs: 6653 | epoch avg. loss: 0.142 | test avg. loss: 219.454
Epochs: 6654 | epoch avg. loss: 0.103 | test avg. loss: 218.824


 13%|█▎        | 6658/50000 [09:46<1:00:14, 11.99it/s]

Epochs: 6655 | epoch avg. loss: 0.098 | test avg. loss: 218.705
Epochs: 6656 | epoch avg. loss: 0.106 | test avg. loss: 217.432
Epochs: 6657 | epoch avg. loss: 0.076 | test avg. loss: 216.564


 13%|█▎        | 6660/50000 [09:46<57:39, 12.53it/s]

Epochs: 6658 | epoch avg. loss: 0.086 | test avg. loss: 217.023
Epochs: 6659 | epoch avg. loss: 0.085 | test avg. loss: 217.567
Epochs: 6660 | epoch avg. loss: 0.081 | test avg. loss: 217.155


 13%|█▎        | 6662/50000 [09:46<56:16, 12.84it/s]

Epochs: 6661 | epoch avg. loss: 0.103 | test avg. loss: 218.155
Epochs: 6662 | epoch avg. loss: 0.049 | test avg. loss: 217.484
Epochs: 6663 | epoch avg. loss: 0.129 | test avg. loss: 218.406


 13%|█▎        | 6666/50000 [09:47<59:00, 12.24it/s]

Epochs: 6664 | epoch avg. loss: 0.083 | test avg. loss: 217.493
Epochs: 6665 | epoch avg. loss: 0.101 | test avg. loss: 216.582
Epochs: 6666 | epoch avg. loss: 0.094 | test avg. loss: 217.579


 13%|█▎        | 6670/50000 [09:47<59:06, 12.22it/s]  

Epochs: 6667 | epoch avg. loss: 0.132 | test avg. loss: 216.064
Epochs: 6668 | epoch avg. loss: 0.042 | test avg. loss: 216.431
Epochs: 6669 | epoch avg. loss: 0.128 | test avg. loss: 215.908


 13%|█▎        | 6672/50000 [09:47<58:48, 12.28it/s]

Epochs: 6670 | epoch avg. loss: 0.072 | test avg. loss: 215.809
Epochs: 6671 | epoch avg. loss: 0.079 | test avg. loss: 217.179
Epochs: 6672 | epoch avg. loss: 0.135 | test avg. loss: 215.715


                                                    

Epochs: 6673 | epoch avg. loss: 0.189 | test avg. loss: 217.328
Epochs: 6674 | epoch avg. loss: 0.121 | test avg. loss: 217.571


 13%|█▎        | 6678/50000 [09:48<1:02:01, 11.64it/s]

Epochs: 6675 | epoch avg. loss: 0.093 | test avg. loss: 216.349
Epochs: 6676 | epoch avg. loss: 0.172 | test avg. loss: 218.300
Epochs: 6677 | epoch avg. loss: 0.141 | test avg. loss: 216.212


 13%|█▎        | 6680/50000 [09:48<1:00:51, 11.86it/s]

Epochs: 6678 | epoch avg. loss: 0.160 | test avg. loss: 217.745
Epochs: 6679 | epoch avg. loss: 0.179 | test avg. loss: 218.025
Epochs: 6680 | epoch avg. loss: 0.085 | test avg. loss: 217.471


 13%|█▎        | 6684/50000 [09:48<59:13, 12.19it/s]

Epochs: 6681 | epoch avg. loss: 0.098 | test avg. loss: 218.024
Epochs: 6682 | epoch avg. loss: 0.057 | test avg. loss: 218.154
Epochs: 6683 | epoch avg. loss: 0.056 | test avg. loss: 218.642


 13%|█▎        | 6686/50000 [09:48<56:27, 12.78it/s]

Epochs: 6684 | epoch avg. loss: 0.056 | test avg. loss: 218.656
Epochs: 6685 | epoch avg. loss: 0.054 | test avg. loss: 217.325
Epochs: 6686 | epoch avg. loss: 0.085 | test avg. loss: 217.628


 13%|█▎        | 6690/50000 [09:49<58:48, 12.28it/s]  

Epochs: 6687 | epoch avg. loss: 0.063 | test avg. loss: 217.453
Epochs: 6688 | epoch avg. loss: 0.065 | test avg. loss: 216.564
Epochs: 6689 | epoch avg. loss: 0.046 | test avg. loss: 217.165


 13%|█▎        | 6692/50000 [09:49<56:40, 12.74it/s]

Epochs: 6690 | epoch avg. loss: 0.049 | test avg. loss: 217.160
Epochs: 6691 | epoch avg. loss: 0.037 | test avg. loss: 217.765
Epochs: 6692 | epoch avg. loss: 0.074 | test avg. loss: 218.246


 13%|█▎        | 6696/50000 [09:49<55:07, 13.09it/s]

Epochs: 6693 | epoch avg. loss: 0.058 | test avg. loss: 217.240
Epochs: 6694 | epoch avg. loss: 0.113 | test avg. loss: 218.099
Epochs: 6695 | epoch avg. loss: 0.108 | test avg. loss: 217.962


 13%|█▎        | 6698/50000 [09:49<54:10, 13.32it/s]

Epochs: 6696 | epoch avg. loss: 0.079 | test avg. loss: 216.366
Epochs: 6697 | epoch avg. loss: 0.132 | test avg. loss: 218.461
Epochs: 6698 | epoch avg. loss: 0.213 | test avg. loss: 216.334


 13%|█▎        | 6698/50000 [09:49<54:10, 13.32it/s]

Epochs: 6699 | epoch avg. loss: 0.242 | test avg. loss: 216.323


 13%|█▎        | 6702/50000 [09:51<2:35:11,  4.65it/s]

Epochs: 6700 | epoch avg. loss: 0.131 | test avg. loss: 218.624
Epochs: 6701 | epoch avg. loss: 0.283 | test avg. loss: 215.500
Epochs: 6702 | epoch avg. loss: 0.230 | test avg. loss: 218.035


 13%|█▎        | 6706/50000 [09:51<1:43:02,  7.00it/s]

Epochs: 6703 | epoch avg. loss: 0.236 | test avg. loss: 216.921
Epochs: 6704 | epoch avg. loss: 0.093 | test avg. loss: 216.073
Epochs: 6705 | epoch avg. loss: 0.145 | test avg. loss: 218.602


 13%|█▎        | 6708/50000 [09:51<1:28:46,  8.13it/s]

Epochs: 6706 | epoch avg. loss: 0.503 | test avg. loss: 214.503
Epochs: 6707 | epoch avg. loss: 0.483 | test avg. loss: 215.808
Epochs: 6708 | epoch avg. loss: 0.768 | test avg. loss: 217.201


 13%|█▎        | 6712/50000 [09:52<1:13:07,  9.87it/s]

Epochs: 6709 | epoch avg. loss: 0.514 | test avg. loss: 214.805
Epochs: 6710 | epoch avg. loss: 1.431 | test avg. loss: 218.474
Epochs: 6711 | epoch avg. loss: 1.190 | test avg. loss: 215.859


 13%|█▎        | 6714/50000 [09:52<1:08:04, 10.60it/s]

Epochs: 6712 | epoch avg. loss: 1.195 | test avg. loss: 216.462
Epochs: 6713 | epoch avg. loss: 1.109 | test avg. loss: 225.519
Epochs: 6714 | epoch avg. loss: 1.837 | test avg. loss: 217.421


 13%|█▎        | 6718/50000 [09:52<58:42, 12.29it/s]  

Epochs: 6715 | epoch avg. loss: 1.936 | test avg. loss: 217.218
Epochs: 6716 | epoch avg. loss: 0.704 | test avg. loss: 215.850
Epochs: 6717 | epoch avg. loss: 0.619 | test avg. loss: 211.960


 13%|█▎        | 6720/50000 [09:52<56:13, 12.83it/s]

Epochs: 6718 | epoch avg. loss: 0.877 | test avg. loss: 213.994
Epochs: 6719 | epoch avg. loss: 0.412 | test avg. loss: 210.876
Epochs: 6720 | epoch avg. loss: 0.560 | test avg. loss: 213.957


 13%|█▎        | 6724/50000 [09:53<57:36, 12.52it/s]

Epochs: 6721 | epoch avg. loss: 0.373 | test avg. loss: 213.979
Epochs: 6722 | epoch avg. loss: 0.286 | test avg. loss: 213.582
Epochs: 6723 | epoch avg. loss: 0.423 | test avg. loss: 216.554


 13%|█▎        | 6728/50000 [09:53<52:44, 13.68it/s]

Epochs: 6724 | epoch avg. loss: 0.306 | test avg. loss: 213.668
Epochs: 6725 | epoch avg. loss: 0.189 | test avg. loss: 215.756
Epochs: 6726 | epoch avg. loss: 0.197 | test avg. loss: 213.421
Epochs: 6727 | epoch avg. loss: 0.313 | test avg. loss: 214.239




Epochs: 6728 | epoch avg. loss: 0.222 | test avg. loss: 214.062
Epochs: 6729 | epoch avg. loss: 0.314 | test avg. loss: 214.898
Epochs: 6730 | epoch avg. loss: 0.323 | test avg. loss: 220.534


 13%|█▎        | 6734/50000 [09:53<49:38, 14.52it/s]

Epochs: 6731 | epoch avg. loss: 0.558 | test avg. loss: 216.803
Epochs: 6732 | epoch avg. loss: 0.463 | test avg. loss: 218.542
Epochs: 6733 | epoch avg. loss: 0.245 | test avg. loss: 219.071
Epochs: 6734 | epoch avg. loss: 0.199 | test avg. loss: 219.989


 13%|█▎        | 6738/50000 [09:54<52:15, 13.80it/s]

Epochs: 6735 | epoch avg. loss: 0.170 | test avg. loss: 222.552
Epochs: 6736 | epoch avg. loss: 0.186 | test avg. loss: 220.900
Epochs: 6737 | epoch avg. loss: 0.458 | test avg. loss: 225.492


 13%|█▎        | 6742/50000 [09:54<50:25, 14.30it/s]

Epochs: 6738 | epoch avg. loss: 0.456 | test avg. loss: 226.243
Epochs: 6739 | epoch avg. loss: 0.181 | test avg. loss: 224.894
Epochs: 6740 | epoch avg. loss: 0.272 | test avg. loss: 227.846
Epochs: 6741 | epoch avg. loss: 0.424 | test avg. loss: 222.707


 13%|█▎        | 6744/50000 [09:54<51:59, 13.86it/s]

Epochs: 6742 | epoch avg. loss: 0.384 | test avg. loss: 221.168
Epochs: 6743 | epoch avg. loss: 0.278 | test avg. loss: 221.834
Epochs: 6744 | epoch avg. loss: 0.532 | test avg. loss: 218.452


 13%|█▎        | 6748/50000 [09:54<50:35, 14.25it/s]

Epochs: 6745 | epoch avg. loss: 0.372 | test avg. loss: 221.194
Epochs: 6746 | epoch avg. loss: 0.438 | test avg. loss: 220.727
Epochs: 6747 | epoch avg. loss: 0.319 | test avg. loss: 224.041


 14%|█▎        | 6750/50000 [09:54<51:38, 13.96it/s]

Epochs: 6748 | epoch avg. loss: 0.210 | test avg. loss: 223.297
Epochs: 6749 | epoch avg. loss: 0.227 | test avg. loss: 219.781
Epochs: 6750 | epoch avg. loss: 0.188 | test avg. loss: 220.773


                                                    

Epochs: 6751 | epoch avg. loss: 0.210 | test avg. loss: 217.653
Epochs: 6752 | epoch avg. loss: 0.239 | test avg. loss: 218.971
Epochs: 6753 | epoch avg. loss: 0.244 | test avg. loss: 215.884


 14%|█▎        | 6756/50000 [09:55<50:58, 14.14it/s]

Epochs: 6754 | epoch avg. loss: 0.256 | test avg. loss: 214.882
Epochs: 6755 | epoch avg. loss: 0.218 | test avg. loss: 217.860
Epochs: 6756 | epoch avg. loss: 0.195 | test avg. loss: 216.661
Epochs: 6757 | epoch avg. loss: 0.291 | test avg. loss: 219.839


 14%|█▎        | 6760/50000 [09:55<49:30, 14.55it/s]

Epochs: 6758 | epoch avg. loss: 0.169 | test avg. loss: 221.802
Epochs: 6759 | epoch avg. loss: 0.197 | test avg. loss: 219.674
Epochs: 6760 | epoch avg. loss: 0.225 | test avg. loss: 221.365


 14%|█▎        | 6762/50000 [09:55<51:57, 13.87it/s]

Epochs: 6761 | epoch avg. loss: 0.239 | test avg. loss: 219.705


 14%|█▎        | 6764/50000 [09:57<2:49:12,  4.26it/s]

Epochs: 6762 | epoch avg. loss: 0.092 | test avg. loss: 218.387
Epochs: 6763 | epoch avg. loss: 0.089 | test avg. loss: 219.823
Epochs: 6764 | epoch avg. loss: 0.086 | test avg. loss: 219.932


 14%|█▎        | 6768/50000 [09:57<1:51:43,  6.45it/s]

Epochs: 6765 | epoch avg. loss: 0.073 | test avg. loss: 222.371
Epochs: 6766 | epoch avg. loss: 0.069 | test avg. loss: 222.721
Epochs: 6767 | epoch avg. loss: 0.057 | test avg. loss: 224.259


 14%|█▎        | 6770/50000 [09:57<1:35:37,  7.53it/s]

Epochs: 6768 | epoch avg. loss: 0.109 | test avg. loss: 223.629
Epochs: 6769 | epoch avg. loss: 0.079 | test avg. loss: 222.111
Epochs: 6770 | epoch avg. loss: 0.134 | test avg. loss: 222.488


 14%|█▎        | 6774/50000 [09:57<1:13:42,  9.77it/s]

Epochs: 6771 | epoch avg. loss: 0.060 | test avg. loss: 222.008
Epochs: 6772 | epoch avg. loss: 0.050 | test avg. loss: 220.730
Epochs: 6773 | epoch avg. loss: 0.086 | test avg. loss: 221.368


 14%|█▎        | 6776/50000 [09:57<1:08:38, 10.49it/s]

Epochs: 6774 | epoch avg. loss: 0.067 | test avg. loss: 221.560
Epochs: 6775 | epoch avg. loss: 0.052 | test avg. loss: 221.194
Epochs: 6776 | epoch avg. loss: 0.080 | test avg. loss: 222.311


 14%|█▎        | 6780/50000 [09:58<1:03:49, 11.29it/s]

Epochs: 6777 | epoch avg. loss: 0.037 | test avg. loss: 222.599
Epochs: 6778 | epoch avg. loss: 0.049 | test avg. loss: 221.897
Epochs: 6779 | epoch avg. loss: 0.156 | test avg. loss: 221.616


 14%|█▎        | 6782/50000 [09:58<1:03:10, 11.40it/s]

Epochs: 6780 | epoch avg. loss: 0.066 | test avg. loss: 222.730
Epochs: 6781 | epoch avg. loss: 0.182 | test avg. loss: 220.546
Epochs: 6782 | epoch avg. loss: 0.118 | test avg. loss: 221.315


 14%|█▎        | 6786/50000 [09:58<59:14, 12.16it/s]  

Epochs: 6783 | epoch avg. loss: 0.091 | test avg. loss: 222.387
Epochs: 6784 | epoch avg. loss: 0.127 | test avg. loss: 220.678
Epochs: 6785 | epoch avg. loss: 0.209 | test avg. loss: 221.787


 14%|█▎        | 6788/50000 [09:58<58:10, 12.38it/s]

Epochs: 6786 | epoch avg. loss: 0.129 | test avg. loss: 222.652
Epochs: 6787 | epoch avg. loss: 0.122 | test avg. loss: 221.311
Epochs: 6788 | epoch avg. loss: 0.092 | test avg. loss: 221.667


 14%|█▎        | 6792/50000 [09:59<1:00:03, 11.99it/s]

Epochs: 6789 | epoch avg. loss: 0.040 | test avg. loss: 220.949
Epochs: 6790 | epoch avg. loss: 0.121 | test avg. loss: 221.204
Epochs: 6791 | epoch avg. loss: 0.073 | test avg. loss: 223.517


 14%|█▎        | 6794/50000 [09:59<1:02:36, 11.50it/s]

Epochs: 6792 | epoch avg. loss: 0.112 | test avg. loss: 221.907
Epochs: 6793 | epoch avg. loss: 0.358 | test avg. loss: 223.291
Epochs: 6794 | epoch avg. loss: 0.130 | test avg. loss: 223.633


 14%|█▎        | 6798/50000 [09:59<1:01:15, 11.75it/s]

Epochs: 6795 | epoch avg. loss: 0.126 | test avg. loss: 222.284
Epochs: 6796 | epoch avg. loss: 0.239 | test avg. loss: 226.042
Epochs: 6797 | epoch avg. loss: 0.314 | test avg. loss: 223.640


 14%|█▎        | 6798/50000 [09:59<1:01:15, 11.75it/s]

Epochs: 6798 | epoch avg. loss: 0.284 | test avg. loss: 224.329
Epochs: 6799 | epoch avg. loss: 0.200 | test avg. loss: 226.094


 14%|█▎        | 6803/50000 [10:01<2:28:07,  4.86it/s]

Epochs: 6800 | epoch avg. loss: 0.256 | test avg. loss: 222.845
Epochs: 6801 | epoch avg. loss: 0.399 | test avg. loss: 225.576
Epochs: 6802 | epoch avg. loss: 0.321 | test avg. loss: 224.529


 14%|█▎        | 6805/50000 [10:01<1:56:09,  6.20it/s]

Epochs: 6803 | epoch avg. loss: 0.107 | test avg. loss: 223.109
Epochs: 6804 | epoch avg. loss: 0.138 | test avg. loss: 224.940
Epochs: 6805 | epoch avg. loss: 0.105 | test avg. loss: 222.952


 14%|█▎        | 6809/50000 [10:02<1:28:08,  8.17it/s]

Epochs: 6806 | epoch avg. loss: 0.113 | test avg. loss: 223.060
Epochs: 6807 | epoch avg. loss: 0.074 | test avg. loss: 223.241
Epochs: 6808 | epoch avg. loss: 0.109 | test avg. loss: 221.115


 14%|█▎        | 6811/50000 [10:02<1:16:23,  9.42it/s]

Epochs: 6809 | epoch avg. loss: 0.118 | test avg. loss: 221.147
Epochs: 6810 | epoch avg. loss: 0.074 | test avg. loss: 221.858
Epochs: 6811 | epoch avg. loss: 0.097 | test avg. loss: 221.647


 14%|█▎        | 6815/50000 [10:02<1:07:32, 10.66it/s]

Epochs: 6812 | epoch avg. loss: 0.036 | test avg. loss: 222.503
Epochs: 6813 | epoch avg. loss: 0.051 | test avg. loss: 222.039
Epochs: 6814 | epoch avg. loss: 0.057 | test avg. loss: 222.612


                                                      

Epochs: 6815 | epoch avg. loss: 0.046 | test avg. loss: 223.067
Epochs: 6816 | epoch avg. loss: 0.054 | test avg. loss: 222.726
Epochs: 6817 | epoch avg. loss: 0.036 | test avg. loss: 223.023


 14%|█▎        | 6821/50000 [10:02<1:00:26, 11.91it/s]

Epochs: 6818 | epoch avg. loss: 0.044 | test avg. loss: 223.449
Epochs: 6819 | epoch avg. loss: 0.052 | test avg. loss: 221.675
Epochs: 6820 | epoch avg. loss: 0.197 | test avg. loss: 222.588


 14%|█▎        | 6823/50000 [10:03<1:01:11, 11.76it/s]

Epochs: 6821 | epoch avg. loss: 0.040 | test avg. loss: 222.902
Epochs: 6822 | epoch avg. loss: 0.042 | test avg. loss: 222.268
Epochs: 6823 | epoch avg. loss: 0.049 | test avg. loss: 222.586


 14%|█▎        | 6827/50000 [10:03<56:16, 12.78it/s]

Epochs: 6824 | epoch avg. loss: 0.042 | test avg. loss: 221.878
Epochs: 6825 | epoch avg. loss: 0.058 | test avg. loss: 220.532
Epochs: 6826 | epoch avg. loss: 0.195 | test avg. loss: 222.073


 14%|█▎        | 6829/50000 [10:03<53:45, 13.38it/s]

Epochs: 6827 | epoch avg. loss: 0.070 | test avg. loss: 222.195
Epochs: 6828 | epoch avg. loss: 0.058 | test avg. loss: 221.572
Epochs: 6829 | epoch avg. loss: 0.056 | test avg. loss: 223.047


 14%|█▎        | 6833/50000 [10:03<54:45, 13.14it/s]

Epochs: 6830 | epoch avg. loss: 0.092 | test avg. loss: 223.252
Epochs: 6831 | epoch avg. loss: 0.047 | test avg. loss: 223.058
Epochs: 6832 | epoch avg. loss: 0.064 | test avg. loss: 223.687


 14%|█▎        | 6835/50000 [10:04<59:23, 12.11it/s]

Epochs: 6833 | epoch avg. loss: 0.044 | test avg. loss: 221.642
Epochs: 6834 | epoch avg. loss: 0.134 | test avg. loss: 222.650
Epochs: 6835 | epoch avg. loss: 0.131 | test avg. loss: 220.793


 14%|█▎        | 6839/50000 [10:04<1:01:18, 11.73it/s]

Epochs: 6836 | epoch avg. loss: 0.146 | test avg. loss: 219.403
Epochs: 6837 | epoch avg. loss: 0.328 | test avg. loss: 221.466
Epochs: 6838 | epoch avg. loss: 0.377 | test avg. loss: 221.775


 14%|█▎        | 6841/50000 [10:04<58:35, 12.28it/s]

Epochs: 6839 | epoch avg. loss: 0.132 | test avg. loss: 221.839
Epochs: 6840 | epoch avg. loss: 0.183 | test avg. loss: 226.422
Epochs: 6841 | epoch avg. loss: 0.882 | test avg. loss: 220.990


 14%|█▎        | 6845/50000 [10:04<56:47, 12.66it/s]

Epochs: 6842 | epoch avg. loss: 0.295 | test avg. loss: 218.425
Epochs: 6843 | epoch avg. loss: 0.262 | test avg. loss: 220.250
Epochs: 6844 | epoch avg. loss: 0.283 | test avg. loss: 217.966


 14%|█▎        | 6847/50000 [10:05<1:01:33, 11.68it/s]

Epochs: 6845 | epoch avg. loss: 0.275 | test avg. loss: 219.329
Epochs: 6846 | epoch avg. loss: 0.083 | test avg. loss: 220.850
Epochs: 6847 | epoch avg. loss: 0.083 | test avg. loss: 221.880


 14%|█▎        | 6851/50000 [10:05<1:01:17, 11.73it/s]

Epochs: 6848 | epoch avg. loss: 0.103 | test avg. loss: 224.313
Epochs: 6849 | epoch avg. loss: 0.113 | test avg. loss: 222.287
Epochs: 6850 | epoch avg. loss: 0.093 | test avg. loss: 221.932


 14%|█▎        | 6853/50000 [10:05<58:46, 12.23it/s]

Epochs: 6851 | epoch avg. loss: 0.076 | test avg. loss: 222.399
Epochs: 6852 | epoch avg. loss: 0.114 | test avg. loss: 219.915
Epochs: 6853 | epoch avg. loss: 0.181 | test avg. loss: 221.543


 14%|█▎        | 6857/50000 [10:05<1:00:40, 11.85it/s]

Epochs: 6854 | epoch avg. loss: 0.207 | test avg. loss: 221.553
Epochs: 6855 | epoch avg. loss: 0.179 | test avg. loss: 220.125
Epochs: 6856 | epoch avg. loss: 0.498 | test avg. loss: 221.687


 14%|█▎        | 6859/50000 [10:06<1:04:22, 11.17it/s]

Epochs: 6857 | epoch avg. loss: 0.241 | test avg. loss: 221.104
Epochs: 6858 | epoch avg. loss: 0.167 | test avg. loss: 218.331
Epochs: 6859 | epoch avg. loss: 0.469 | test avg. loss: 220.746


 14%|█▎        | 6863/50000 [10:06<56:50, 12.65it/s]

Epochs: 6860 | epoch avg. loss: 0.553 | test avg. loss: 220.837
Epochs: 6861 | epoch avg. loss: 0.209 | test avg. loss: 219.886
Epochs: 6862 | epoch avg. loss: 0.365 | test avg. loss: 223.796
Epochs: 6863 | epoch avg. loss: 0.370 | test avg. loss: 220.508


 14%|█▎        | 6867/50000 [10:06<52:30, 13.69it/s]

Epochs: 6864 | epoch avg. loss: 0.240 | test avg. loss: 218.795
Epochs: 6865 | epoch avg. loss: 0.144 | test avg. loss: 220.032
Epochs: 6866 | epoch avg. loss: 0.131 | test avg. loss: 217.991


 14%|█▎        | 6869/50000 [10:06<52:55, 13.58it/s]

Epochs: 6867 | epoch avg. loss: 0.136 | test avg. loss: 219.072
Epochs: 6868 | epoch avg. loss: 0.074 | test avg. loss: 220.132
Epochs: 6869 | epoch avg. loss: 0.080 | test avg. loss: 219.742


 14%|█▎        | 6873/50000 [10:07<56:46, 12.66it/s]

Epochs: 6870 | epoch avg. loss: 0.050 | test avg. loss: 219.681
Epochs: 6871 | epoch avg. loss: 0.037 | test avg. loss: 219.740
Epochs: 6872 | epoch avg. loss: 0.047 | test avg. loss: 219.681


 14%|█▍        | 6875/50000 [10:07<56:22, 12.75it/s]

Epochs: 6873 | epoch avg. loss: 0.049 | test avg. loss: 219.093
Epochs: 6874 | epoch avg. loss: 0.065 | test avg. loss: 220.336
Epochs: 6875 | epoch avg. loss: 0.152 | test avg. loss: 220.555


 14%|█▍        | 6879/50000 [10:07<59:50, 12.01it/s]  

Epochs: 6876 | epoch avg. loss: 0.074 | test avg. loss: 219.586
Epochs: 6877 | epoch avg. loss: 0.245 | test avg. loss: 220.367
Epochs: 6878 | epoch avg. loss: 0.078 | test avg. loss: 221.228


 14%|█▍        | 6881/50000 [10:07<1:00:21, 11.91it/s]

Epochs: 6879 | epoch avg. loss: 0.112 | test avg. loss: 218.373
Epochs: 6880 | epoch avg. loss: 0.286 | test avg. loss: 219.635
Epochs: 6881 | epoch avg. loss: 0.246 | test avg. loss: 220.254


 14%|█▍        | 6885/50000 [10:08<1:02:23, 11.52it/s]

Epochs: 6882 | epoch avg. loss: 0.228 | test avg. loss: 218.013
Epochs: 6883 | epoch avg. loss: 0.795 | test avg. loss: 221.822
Epochs: 6884 | epoch avg. loss: 0.377 | test avg. loss: 220.439


 14%|█▍        | 6887/50000 [10:08<59:16, 12.12it/s]

Epochs: 6885 | epoch avg. loss: 0.180 | test avg. loss: 219.053
Epochs: 6886 | epoch avg. loss: 0.299 | test avg. loss: 222.932
Epochs: 6887 | epoch avg. loss: 0.333 | test avg. loss: 220.474


 14%|█▍        | 6891/50000 [10:08<56:35, 12.69it/s]

Epochs: 6888 | epoch avg. loss: 0.377 | test avg. loss: 221.808
Epochs: 6889 | epoch avg. loss: 0.197 | test avg. loss: 226.101
Epochs: 6890 | epoch avg. loss: 0.473 | test avg. loss: 221.344


 14%|█▍        | 6893/50000 [10:08<55:30, 12.94it/s]

Epochs: 6891 | epoch avg. loss: 0.137 | test avg. loss: 220.060
Epochs: 6892 | epoch avg. loss: 0.093 | test avg. loss: 220.020
Epochs: 6893 | epoch avg. loss: 0.119 | test avg. loss: 217.867


 14%|█▍        | 6897/50000 [10:09<1:01:14, 11.73it/s]

Epochs: 6894 | epoch avg. loss: 0.321 | test avg. loss: 220.220
Epochs: 6895 | epoch avg. loss: 0.097 | test avg. loss: 221.844
Epochs: 6896 | epoch avg. loss: 0.083 | test avg. loss: 221.280


 14%|█▍        | 6899/50000 [10:09<57:30, 12.49it/s]

Epochs: 6897 | epoch avg. loss: 0.138 | test avg. loss: 223.393
Epochs: 6898 | epoch avg. loss: 0.189 | test avg. loss: 221.236
Epochs: 6899 | epoch avg. loss: 0.116 | test avg. loss: 218.503


 14%|█▍        | 6903/50000 [10:10<2:29:00,  4.82it/s]

Epochs: 6900 | epoch avg. loss: 0.134 | test avg. loss: 219.733
Epochs: 6901 | epoch avg. loss: 0.519 | test avg. loss: 218.133
Epochs: 6902 | epoch avg. loss: 0.477 | test avg. loss: 219.819


 14%|█▍        | 6905/50000 [10:11<1:58:49,  6.04it/s]

Epochs: 6903 | epoch avg. loss: 0.319 | test avg. loss: 224.030
Epochs: 6904 | epoch avg. loss: 0.338 | test avg. loss: 221.111
Epochs: 6905 | epoch avg. loss: 0.161 | test avg. loss: 219.850


 14%|█▍        | 6909/50000 [10:11<1:28:34,  8.11it/s]

Epochs: 6906 | epoch avg. loss: 0.125 | test avg. loss: 217.748
Epochs: 6907 | epoch avg. loss: 0.112 | test avg. loss: 217.479
Epochs: 6908 | epoch avg. loss: 0.154 | test avg. loss: 218.682


 14%|█▍        | 6911/50000 [10:11<1:18:52,  9.10it/s]

Epochs: 6909 | epoch avg. loss: 0.105 | test avg. loss: 218.988
Epochs: 6910 | epoch avg. loss: 0.085 | test avg. loss: 220.899
Epochs: 6911 | epoch avg. loss: 0.095 | test avg. loss: 222.183


 14%|█▍        | 6915/50000 [10:11<1:03:44, 11.27it/s]

Epochs: 6912 | epoch avg. loss: 0.073 | test avg. loss: 223.074
Epochs: 6913 | epoch avg. loss: 0.183 | test avg. loss: 221.880
Epochs: 6914 | epoch avg. loss: 0.109 | test avg. loss: 219.635


 14%|█▍        | 6917/50000 [10:12<1:04:40, 11.10it/s]

Epochs: 6915 | epoch avg. loss: 0.208 | test avg. loss: 221.140
Epochs: 6916 | epoch avg. loss: 0.224 | test avg. loss: 218.701
Epochs: 6917 | epoch avg. loss: 0.144 | test avg. loss: 218.507


 14%|█▍        | 6921/50000 [10:12<1:00:00, 11.97it/s]

Epochs: 6918 | epoch avg. loss: 0.113 | test avg. loss: 220.489
Epochs: 6919 | epoch avg. loss: 0.438 | test avg. loss: 218.297
Epochs: 6920 | epoch avg. loss: 0.336 | test avg. loss: 218.573


 14%|█▍        | 6925/50000 [10:12<54:22, 13.20it/s]

Epochs: 6921 | epoch avg. loss: 0.269 | test avg. loss: 223.759
Epochs: 6922 | epoch avg. loss: 1.196 | test avg. loss: 219.770
Epochs: 6923 | epoch avg. loss: 0.506 | test avg. loss: 218.054
Epochs: 6924 | epoch avg. loss: 0.679 | test avg. loss: 221.310


 14%|█▍        | 6929/50000 [10:12<50:49, 14.12it/s]

Epochs: 6925 | epoch avg. loss: 0.657 | test avg. loss: 218.912
Epochs: 6926 | epoch avg. loss: 0.367 | test avg. loss: 219.342
Epochs: 6927 | epoch avg. loss: 0.671 | test avg. loss: 225.175
Epochs: 6928 | epoch avg. loss: 0.617 | test avg. loss: 224.587


 14%|█▍        | 6931/50000 [10:13<52:25, 13.69it/s]

Epochs: 6929 | epoch avg. loss: 0.372 | test avg. loss: 221.846
Epochs: 6930 | epoch avg. loss: 1.077 | test avg. loss: 224.569
Epochs: 6931 | epoch avg. loss: 0.987 | test avg. loss: 221.630


 14%|█▍        | 6935/50000 [10:13<53:49, 13.34it/s]

Epochs: 6932 | epoch avg. loss: 0.444 | test avg. loss: 216.730
Epochs: 6933 | epoch avg. loss: 0.918 | test avg. loss: 221.574
Epochs: 6934 | epoch avg. loss: 1.194 | test avg. loss: 218.102


 14%|█▍        | 6937/50000 [10:13<55:21, 12.97it/s]

Epochs: 6935 | epoch avg. loss: 1.603 | test avg. loss: 218.083
Epochs: 6936 | epoch avg. loss: 2.568 | test avg. loss: 227.551
Epochs: 6937 | epoch avg. loss: 3.389 | test avg. loss: 216.731


 14%|█▍        | 6941/50000 [10:13<54:27, 13.18it/s]

Epochs: 6938 | epoch avg. loss: 3.368 | test avg. loss: 215.144
Epochs: 6939 | epoch avg. loss: 2.571 | test avg. loss: 232.303
Epochs: 6940 | epoch avg. loss: 8.373 | test avg. loss: 214.655


 14%|█▍        | 6943/50000 [10:14<59:26, 12.07it/s]

Epochs: 6941 | epoch avg. loss: 6.325 | test avg. loss: 219.165
Epochs: 6942 | epoch avg. loss: 1.136 | test avg. loss: 224.389
Epochs: 6943 | epoch avg. loss: 1.443 | test avg. loss: 224.724


 14%|█▍        | 6947/50000 [10:14<55:58, 12.82it/s]

Epochs: 6944 | epoch avg. loss: 1.279 | test avg. loss: 226.594
Epochs: 6945 | epoch avg. loss: 1.674 | test avg. loss: 216.604
Epochs: 6946 | epoch avg. loss: 1.332 | test avg. loss: 223.592


 14%|█▍        | 6949/50000 [10:14<54:51, 13.08it/s]

Epochs: 6947 | epoch avg. loss: 3.553 | test avg. loss: 219.095
Epochs: 6948 | epoch avg. loss: 3.120 | test avg. loss: 225.070
Epochs: 6949 | epoch avg. loss: 1.472 | test avg. loss: 235.398


 14%|█▍        | 6953/50000 [10:14<56:04, 12.79it/s]

Epochs: 6950 | epoch avg. loss: 1.801 | test avg. loss: 226.394
Epochs: 6951 | epoch avg. loss: 1.582 | test avg. loss: 232.347
Epochs: 6952 | epoch avg. loss: 1.048 | test avg. loss: 222.536


 14%|█▍        | 6955/50000 [10:14<54:54, 13.06it/s]

Epochs: 6953 | epoch avg. loss: 1.114 | test avg. loss: 225.585
Epochs: 6954 | epoch avg. loss: 0.938 | test avg. loss: 221.815
Epochs: 6955 | epoch avg. loss: 0.602 | test avg. loss: 219.005


 14%|█▍        | 6959/50000 [10:15<57:10, 12.55it/s]

Epochs: 6956 | epoch avg. loss: 0.394 | test avg. loss: 219.246
Epochs: 6957 | epoch avg. loss: 0.285 | test avg. loss: 216.419
Epochs: 6958 | epoch avg. loss: 0.324 | test avg. loss: 220.066


 14%|█▍        | 6961/50000 [10:15<56:50, 12.62it/s]

Epochs: 6959 | epoch avg. loss: 0.241 | test avg. loss: 218.582
Epochs: 6960 | epoch avg. loss: 0.271 | test avg. loss: 217.969
Epochs: 6961 | epoch avg. loss: 0.171 | test avg. loss: 217.037


 14%|█▍        | 6965/50000 [10:15<54:03, 13.27it/s]

Epochs: 6962 | epoch avg. loss: 0.112 | test avg. loss: 215.670
Epochs: 6963 | epoch avg. loss: 0.112 | test avg. loss: 217.431
Epochs: 6964 | epoch avg. loss: 0.145 | test avg. loss: 215.717


 14%|█▍        | 6967/50000 [10:15<53:13, 13.48it/s]

Epochs: 6965 | epoch avg. loss: 0.204 | test avg. loss: 218.187
Epochs: 6966 | epoch avg. loss: 0.127 | test avg. loss: 216.478
Epochs: 6967 | epoch avg. loss: 0.321 | test avg. loss: 218.651


 14%|█▍        | 6971/50000 [10:16<55:45, 12.86it/s]

Epochs: 6968 | epoch avg. loss: 0.150 | test avg. loss: 218.253
Epochs: 6969 | epoch avg. loss: 0.138 | test avg. loss: 217.380
Epochs: 6970 | epoch avg. loss: 0.125 | test avg. loss: 217.275


 14%|█▍        | 6973/50000 [10:16<54:34, 13.14it/s]

Epochs: 6971 | epoch avg. loss: 0.125 | test avg. loss: 214.345
Epochs: 6972 | epoch avg. loss: 0.149 | test avg. loss: 216.579
Epochs: 6973 | epoch avg. loss: 0.204 | test avg. loss: 213.642


 14%|█▍        | 6977/50000 [10:16<52:13, 13.73it/s]

Epochs: 6974 | epoch avg. loss: 0.260 | test avg. loss: 216.305
Epochs: 6975 | epoch avg. loss: 0.128 | test avg. loss: 215.608
Epochs: 6976 | epoch avg. loss: 0.192 | test avg. loss: 218.232


 14%|█▍        | 6979/50000 [10:16<55:23, 12.95it/s]

Epochs: 6977 | epoch avg. loss: 0.099 | test avg. loss: 218.125
Epochs: 6978 | epoch avg. loss: 0.131 | test avg. loss: 218.610
Epochs: 6979 | epoch avg. loss: 0.071 | test avg. loss: 217.502


 14%|█▍        | 6983/50000 [10:17<55:28, 12.92it/s]

Epochs: 6980 | epoch avg. loss: 0.059 | test avg. loss: 216.673
Epochs: 6981 | epoch avg. loss: 0.050 | test avg. loss: 216.324
Epochs: 6982 | epoch avg. loss: 0.061 | test avg. loss: 216.339


 14%|█▍        | 6985/50000 [10:17<54:25, 13.17it/s]

Epochs: 6983 | epoch avg. loss: 0.060 | test avg. loss: 216.813
Epochs: 6984 | epoch avg. loss: 0.057 | test avg. loss: 216.709
Epochs: 6985 | epoch avg. loss: 0.054 | test avg. loss: 217.718


 14%|█▍        | 6989/50000 [10:17<56:26, 12.70it/s]

Epochs: 6986 | epoch avg. loss: 0.047 | test avg. loss: 217.591
Epochs: 6987 | epoch avg. loss: 0.072 | test avg. loss: 219.085
Epochs: 6988 | epoch avg. loss: 0.073 | test avg. loss: 218.185


 14%|█▍        | 6991/50000 [10:17<56:19, 12.73it/s]

Epochs: 6989 | epoch avg. loss: 0.076 | test avg. loss: 218.328
Epochs: 6990 | epoch avg. loss: 0.075 | test avg. loss: 217.636
Epochs: 6991 | epoch avg. loss: 0.079 | test avg. loss: 216.493


 14%|█▍        | 6995/50000 [10:17<56:23, 12.71it/s]

Epochs: 6992 | epoch avg. loss: 0.115 | test avg. loss: 218.437
Epochs: 6993 | epoch avg. loss: 0.180 | test avg. loss: 216.841
Epochs: 6994 | epoch avg. loss: 0.089 | test avg. loss: 218.066


 14%|█▍        | 6997/50000 [10:18<54:09, 13.23it/s]

Epochs: 6995 | epoch avg. loss: 0.112 | test avg. loss: 218.145
Epochs: 6996 | epoch avg. loss: 0.079 | test avg. loss: 217.496
Epochs: 6997 | epoch avg. loss: 0.089 | test avg. loss: 218.767


 14%|█▍        | 6999/50000 [10:18<55:35, 12.89it/s]

Epochs: 6998 | epoch avg. loss: 0.071 | test avg. loss: 217.136
Epochs: 6999 | epoch avg. loss: 0.076 | test avg. loss: 217.928


 14%|█▍        | 7003/50000 [10:20<2:47:23,  4.28it/s]

Epochs: 7000 | epoch avg. loss: 0.099 | test avg. loss: 217.978
Epochs: 7001 | epoch avg. loss: 0.072 | test avg. loss: 217.668
Epochs: 7002 | epoch avg. loss: 0.043 | test avg. loss: 217.655


 14%|█▍        | 7007/50000 [10:20<1:45:29,  6.79it/s]

Epochs: 7003 | epoch avg. loss: 0.039 | test avg. loss: 217.692
Epochs: 7004 | epoch avg. loss: 0.047 | test avg. loss: 218.318
Epochs: 7005 | epoch avg. loss: 0.108 | test avg. loss: 217.659
Epochs: 7006 | epoch avg. loss: 0.057 | test avg. loss: 216.712


 14%|█▍        | 7011/50000 [10:20<1:16:08,  9.41it/s]

Epochs: 7007 | epoch avg. loss: 0.108 | test avg. loss: 217.638
Epochs: 7008 | epoch avg. loss: 0.068 | test avg. loss: 217.055
Epochs: 7009 | epoch avg. loss: 0.065 | test avg. loss: 217.213
Epochs: 7010 | epoch avg. loss: 0.055 | test avg. loss: 218.243


 14%|█▍        | 7013/50000 [10:20<1:09:40, 10.28it/s]

Epochs: 7011 | epoch avg. loss: 0.062 | test avg. loss: 217.466
Epochs: 7012 | epoch avg. loss: 0.063 | test avg. loss: 218.048
Epochs: 7013 | epoch avg. loss: 0.077 | test avg. loss: 218.675


 14%|█▍        | 7017/50000 [10:21<1:04:53, 11.04it/s]

Epochs: 7014 | epoch avg. loss: 0.103 | test avg. loss: 216.079
Epochs: 7015 | epoch avg. loss: 0.317 | test avg. loss: 217.932
Epochs: 7016 | epoch avg. loss: 0.187 | test avg. loss: 217.712


 14%|█▍        | 7019/50000 [10:21<1:04:39, 11.08it/s]

Epochs: 7017 | epoch avg. loss: 0.103 | test avg. loss: 216.445
Epochs: 7018 | epoch avg. loss: 0.144 | test avg. loss: 218.305
Epochs: 7019 | epoch avg. loss: 0.109 | test avg. loss: 217.368


 14%|█▍        | 7023/50000 [10:21<1:02:22, 11.48it/s]

Epochs: 7020 | epoch avg. loss: 0.044 | test avg. loss: 218.074
Epochs: 7021 | epoch avg. loss: 0.126 | test avg. loss: 218.452
Epochs: 7022 | epoch avg. loss: 0.099 | test avg. loss: 216.491


 14%|█▍        | 7025/50000 [10:21<1:00:22, 11.86it/s]

Epochs: 7023 | epoch avg. loss: 0.172 | test avg. loss: 219.290
Epochs: 7024 | epoch avg. loss: 0.269 | test avg. loss: 217.720
Epochs: 7025 | epoch avg. loss: 0.083 | test avg. loss: 217.465


 14%|█▍        | 7029/50000 [10:22<1:02:54, 11.39it/s]

Epochs: 7026 | epoch avg. loss: 0.074 | test avg. loss: 218.649
Epochs: 7027 | epoch avg. loss: 0.077 | test avg. loss: 218.054
Epochs: 7028 | epoch avg. loss: 0.040 | test avg. loss: 217.441


 14%|█▍        | 7031/50000 [10:22<1:01:04, 11.73it/s]

Epochs: 7029 | epoch avg. loss: 0.040 | test avg. loss: 217.990
Epochs: 7030 | epoch avg. loss: 0.051 | test avg. loss: 217.227
Epochs: 7031 | epoch avg. loss: 0.080 | test avg. loss: 217.698


 14%|█▍        | 7035/50000 [10:22<55:13, 12.97it/s]

Epochs: 7032 | epoch avg. loss: 0.043 | test avg. loss: 218.373
Epochs: 7033 | epoch avg. loss: 0.047 | test avg. loss: 218.913
Epochs: 7034 | epoch avg. loss: 0.051 | test avg. loss: 218.599


 14%|█▍        | 7037/50000 [10:22<55:27, 12.91it/s]

Epochs: 7035 | epoch avg. loss: 0.050 | test avg. loss: 218.533
Epochs: 7036 | epoch avg. loss: 0.055 | test avg. loss: 220.123
Epochs: 7037 | epoch avg. loss: 0.135 | test avg. loss: 218.056


 14%|█▍        | 7041/50000 [10:23<59:17, 12.07it/s]

Epochs: 7038 | epoch avg. loss: 0.070 | test avg. loss: 219.316
Epochs: 7039 | epoch avg. loss: 0.182 | test avg. loss: 217.516
Epochs: 7040 | epoch avg. loss: 0.083 | test avg. loss: 216.811


 14%|█▍        | 7043/50000 [10:23<1:00:00, 11.93it/s]

Epochs: 7041 | epoch avg. loss: 0.058 | test avg. loss: 217.206
Epochs: 7042 | epoch avg. loss: 0.049 | test avg. loss: 217.175
Epochs: 7043 | epoch avg. loss: 0.046 | test avg. loss: 218.027


 14%|█▍        | 7047/50000 [10:23<54:39, 13.10it/s]

Epochs: 7044 | epoch avg. loss: 0.044 | test avg. loss: 218.017
Epochs: 7045 | epoch avg. loss: 0.088 | test avg. loss: 218.871
Epochs: 7046 | epoch avg. loss: 0.043 | test avg. loss: 219.350
Epochs: 7047 | epoch avg. loss: 0.041 | test avg. loss: 218.247


 14%|█▍        | 7051/50000 [10:23<51:44, 13.83it/s]

Epochs: 7048 | epoch avg. loss: 0.087 | test avg. loss: 219.055
Epochs: 7049 | epoch avg. loss: 0.091 | test avg. loss: 218.999
Epochs: 7050 | epoch avg. loss: 0.060 | test avg. loss: 218.009


 14%|█▍        | 7053/50000 [10:24<56:47, 12.60it/s]

Epochs: 7051 | epoch avg. loss: 0.043 | test avg. loss: 218.455
Epochs: 7052 | epoch avg. loss: 0.055 | test avg. loss: 217.861
Epochs: 7053 | epoch avg. loss: 0.047 | test avg. loss: 218.002


 14%|█▍        | 7057/50000 [10:24<1:02:04, 11.53it/s]

Epochs: 7054 | epoch avg. loss: 0.040 | test avg. loss: 218.493
Epochs: 7055 | epoch avg. loss: 0.050 | test avg. loss: 219.073
Epochs: 7056 | epoch avg. loss: 0.088 | test avg. loss: 218.294


 14%|█▍        | 7059/50000 [10:24<1:01:13, 11.69it/s]

Epochs: 7057 | epoch avg. loss: 0.032 | test avg. loss: 218.632
Epochs: 7058 | epoch avg. loss: 0.054 | test avg. loss: 217.849
Epochs: 7059 | epoch avg. loss: 0.058 | test avg. loss: 217.717


 14%|█▍        | 7063/50000 [10:24<1:02:48, 11.40it/s]

Epochs: 7060 | epoch avg. loss: 0.044 | test avg. loss: 218.288
Epochs: 7061 | epoch avg. loss: 0.044 | test avg. loss: 217.355
Epochs: 7062 | epoch avg. loss: 0.086 | test avg. loss: 217.387


 14%|█▍        | 7065/50000 [10:25<1:08:59, 10.37it/s]

Epochs: 7063 | epoch avg. loss: 0.047 | test avg. loss: 217.895
Epochs: 7064 | epoch avg. loss: 0.035 | test avg. loss: 217.805
Epochs: 7065 | epoch avg. loss: 0.041 | test avg. loss: 217.461


 14%|█▍        | 7069/50000 [10:25<1:04:21, 11.12it/s]

Epochs: 7066 | epoch avg. loss: 0.055 | test avg. loss: 218.896
Epochs: 7067 | epoch avg. loss: 0.091 | test avg. loss: 219.027
Epochs: 7068 | epoch avg. loss: 0.071 | test avg. loss: 217.500


 14%|█▍        | 7071/50000 [10:25<1:00:16, 11.87it/s]

Epochs: 7069 | epoch avg. loss: 0.100 | test avg. loss: 218.006
Epochs: 7070 | epoch avg. loss: 0.039 | test avg. loss: 218.387
Epochs: 7071 | epoch avg. loss: 0.037 | test avg. loss: 218.472


 14%|█▍        | 7075/50000 [10:25<1:00:10, 11.89it/s]

Epochs: 7072 | epoch avg. loss: 0.040 | test avg. loss: 218.086
Epochs: 7073 | epoch avg. loss: 0.034 | test avg. loss: 217.668
Epochs: 7074 | epoch avg. loss: 0.036 | test avg. loss: 217.574


 14%|█▍        | 7077/50000 [10:26<1:05:47, 10.87it/s]

Epochs: 7075 | epoch avg. loss: 0.037 | test avg. loss: 216.989
Epochs: 7076 | epoch avg. loss: 0.056 | test avg. loss: 218.135
Epochs: 7077 | epoch avg. loss: 0.066 | test avg. loss: 217.242


 14%|█▍        | 7081/50000 [10:26<1:02:21, 11.47it/s]

Epochs: 7078 | epoch avg. loss: 0.129 | test avg. loss: 217.381
Epochs: 7079 | epoch avg. loss: 0.086 | test avg. loss: 219.029
Epochs: 7080 | epoch avg. loss: 0.172 | test avg. loss: 215.877


 14%|█▍        | 7083/50000 [10:26<57:47, 12.38it/s]

Epochs: 7081 | epoch avg. loss: 0.214 | test avg. loss: 217.580
Epochs: 7082 | epoch avg. loss: 0.180 | test avg. loss: 216.429
Epochs: 7083 | epoch avg. loss: 0.124 | test avg. loss: 214.588


 14%|█▍        | 7087/50000 [10:26<56:55, 12.56it/s]

Epochs: 7084 | epoch avg. loss: 0.213 | test avg. loss: 218.237
Epochs: 7085 | epoch avg. loss: 0.678 | test avg. loss: 216.427
Epochs: 7086 | epoch avg. loss: 0.375 | test avg. loss: 216.932


 14%|█▍        | 7089/50000 [10:27<59:43, 11.97it/s]

Epochs: 7087 | epoch avg. loss: 0.279 | test avg. loss: 221.175
Epochs: 7088 | epoch avg. loss: 0.685 | test avg. loss: 216.385
Epochs: 7089 | epoch avg. loss: 0.500 | test avg. loss: 215.645


 14%|█▍        | 7093/50000 [10:27<58:43, 12.18it/s]

Epochs: 7090 | epoch avg. loss: 0.254 | test avg. loss: 219.411
Epochs: 7091 | epoch avg. loss: 0.703 | test avg. loss: 214.682
Epochs: 7092 | epoch avg. loss: 0.597 | test avg. loss: 217.668


 14%|█▍        | 7095/50000 [10:27<1:00:03, 11.91it/s]

Epochs: 7093 | epoch avg. loss: 0.387 | test avg. loss: 221.742
Epochs: 7094 | epoch avg. loss: 0.521 | test avg. loss: 216.981
Epochs: 7095 | epoch avg. loss: 0.641 | test avg. loss: 222.022


 14%|█▍        | 7099/50000 [10:27<1:02:19, 11.47it/s]

Epochs: 7096 | epoch avg. loss: 0.579 | test avg. loss: 219.954
Epochs: 7097 | epoch avg. loss: 0.404 | test avg. loss: 218.815
Epochs: 7098 | epoch avg. loss: 0.311 | test avg. loss: 220.329


 14%|█▍        | 7099/50000 [10:28<1:02:19, 11.47it/s]

Epochs: 7099 | epoch avg. loss: 0.414 | test avg. loss: 213.837


 14%|█▍        | 7103/50000 [10:29<2:32:00,  4.70it/s]

Epochs: 7100 | epoch avg. loss: 0.445 | test avg. loss: 215.700
Epochs: 7101 | epoch avg. loss: 0.329 | test avg. loss: 215.704
Epochs: 7102 | epoch avg. loss: 0.258 | test avg. loss: 214.571


 14%|█▍        | 7105/50000 [10:29<2:02:00,  5.86it/s]

Epochs: 7103 | epoch avg. loss: 0.723 | test avg. loss: 218.456
Epochs: 7104 | epoch avg. loss: 0.298 | test avg. loss: 218.433
Epochs: 7105 | epoch avg. loss: 0.238 | test avg. loss: 218.168


 14%|█▍        | 7109/50000 [10:29<1:26:58,  8.22it/s]

Epochs: 7106 | epoch avg. loss: 0.236 | test avg. loss: 221.105
Epochs: 7107 | epoch avg. loss: 0.468 | test avg. loss: 216.676
Epochs: 7108 | epoch avg. loss: 0.153 | test avg. loss: 217.803


 14%|█▍        | 7111/50000 [10:30<1:16:57,  9.29it/s]

Epochs: 7109 | epoch avg. loss: 0.279 | test avg. loss: 215.683
Epochs: 7110 | epoch avg. loss: 0.242 | test avg. loss: 215.015
Epochs: 7111 | epoch avg. loss: 0.221 | test avg. loss: 219.558


 14%|█▍        | 7115/50000 [10:30<1:00:56, 11.73it/s]

Epochs: 7112 | epoch avg. loss: 0.520 | test avg. loss: 215.224
Epochs: 7113 | epoch avg. loss: 0.938 | test avg. loss: 216.422
Epochs: 7114 | epoch avg. loss: 0.297 | test avg. loss: 217.887
Epochs: 7115 | epoch avg. loss: 0.364 | test avg. loss: 213.693


 14%|█▍        | 7119/50000 [10:30<56:01, 12.76it/s]

Epochs: 7116 | epoch avg. loss: 0.379 | test avg. loss: 215.471
Epochs: 7117 | epoch avg. loss: 0.313 | test avg. loss: 214.552
Epochs: 7118 | epoch avg. loss: 0.197 | test avg. loss: 214.152


 14%|█▍        | 7121/50000 [10:30<56:22, 12.68it/s]

Epochs: 7119 | epoch avg. loss: 0.251 | test avg. loss: 217.349
Epochs: 7120 | epoch avg. loss: 0.203 | test avg. loss: 215.885
Epochs: 7121 | epoch avg. loss: 0.233 | test avg. loss: 216.248


 14%|█▍        | 7125/50000 [10:31<58:59, 12.11it/s]

Epochs: 7122 | epoch avg. loss: 0.106 | test avg. loss: 215.019
Epochs: 7123 | epoch avg. loss: 0.061 | test avg. loss: 216.259
Epochs: 7124 | epoch avg. loss: 0.356 | test avg. loss: 217.760


 14%|█▍        | 7127/50000 [10:31<57:19, 12.46it/s]

Epochs: 7125 | epoch avg. loss: 0.310 | test avg. loss: 215.014
Epochs: 7126 | epoch avg. loss: 0.230 | test avg. loss: 217.033
Epochs: 7127 | epoch avg. loss: 0.087 | test avg. loss: 216.698


 14%|█▍        | 7131/50000 [10:31<53:34, 13.33it/s]

Epochs: 7128 | epoch avg. loss: 0.080 | test avg. loss: 215.096
Epochs: 7129 | epoch avg. loss: 0.130 | test avg. loss: 216.908
Epochs: 7130 | epoch avg. loss: 0.119 | test avg. loss: 214.865


 14%|█▍        | 7133/50000 [10:31<53:31, 13.35it/s]

Epochs: 7131 | epoch avg. loss: 0.148 | test avg. loss: 214.362
Epochs: 7132 | epoch avg. loss: 0.099 | test avg. loss: 216.505
Epochs: 7133 | epoch avg. loss: 0.179 | test avg. loss: 214.305


 14%|█▍        | 7137/50000 [10:32<53:33, 13.34it/s]

Epochs: 7134 | epoch avg. loss: 0.055 | test avg. loss: 214.442
Epochs: 7135 | epoch avg. loss: 0.054 | test avg. loss: 214.122
Epochs: 7136 | epoch avg. loss: 0.071 | test avg. loss: 213.975


 14%|█▍        | 7139/50000 [10:32<51:44, 13.81it/s]

Epochs: 7137 | epoch avg. loss: 0.070 | test avg. loss: 215.495
Epochs: 7138 | epoch avg. loss: 0.074 | test avg. loss: 215.666
Epochs: 7139 | epoch avg. loss: 0.063 | test avg. loss: 215.879


 14%|█▍        | 7143/50000 [10:32<51:45, 13.80it/s]

Epochs: 7140 | epoch avg. loss: 0.064 | test avg. loss: 218.008
Epochs: 7141 | epoch avg. loss: 0.088 | test avg. loss: 216.555
Epochs: 7142 | epoch avg. loss: 0.110 | test avg. loss: 215.248


 14%|█▍        | 7145/50000 [10:32<50:46, 14.07it/s]

Epochs: 7143 | epoch avg. loss: 0.110 | test avg. loss: 216.129
Epochs: 7144 | epoch avg. loss: 0.054 | test avg. loss: 214.275
Epochs: 7145 | epoch avg. loss: 0.126 | test avg. loss: 215.790


 14%|█▍        | 7149/50000 [10:32<53:16, 13.41it/s]

Epochs: 7146 | epoch avg. loss: 0.072 | test avg. loss: 216.071
Epochs: 7147 | epoch avg. loss: 0.052 | test avg. loss: 215.359
Epochs: 7148 | epoch avg. loss: 0.044 | test avg. loss: 216.581


 14%|█▍        | 7151/50000 [10:33<56:34, 12.62it/s]

Epochs: 7149 | epoch avg. loss: 0.061 | test avg. loss: 215.199
Epochs: 7150 | epoch avg. loss: 0.174 | test avg. loss: 214.608
Epochs: 7151 | epoch avg. loss: 0.159 | test avg. loss: 217.232


 14%|█▍        | 7155/50000 [10:33<53:08, 13.44it/s]

Epochs: 7152 | epoch avg. loss: 0.239 | test avg. loss: 215.727
Epochs: 7153 | epoch avg. loss: 0.079 | test avg. loss: 214.733
Epochs: 7154 | epoch avg. loss: 0.129 | test avg. loss: 216.708
Epochs: 7155 | epoch avg. loss: 0.108 | test avg. loss: 215.235


 14%|█▍        | 7159/50000 [10:33<50:51, 14.04it/s]

Epochs: 7156 | epoch avg. loss: 0.086 | test avg. loss: 214.727
Epochs: 7157 | epoch avg. loss: 0.080 | test avg. loss: 215.573
Epochs: 7158 | epoch avg. loss: 0.035 | test avg. loss: 214.880


 14%|█▍        | 7161/50000 [10:33<49:57, 14.29it/s]

Epochs: 7159 | epoch avg. loss: 0.057 | test avg. loss: 215.613
Epochs: 7160 | epoch avg. loss: 0.055 | test avg. loss: 215.897
Epochs: 7161 | epoch avg. loss: 0.055 | test avg. loss: 214.577


 14%|█▍        | 7165/50000 [10:34<58:23, 12.23it/s]

Epochs: 7162 | epoch avg. loss: 0.085 | test avg. loss: 214.835
Epochs: 7163 | epoch avg. loss: 0.037 | test avg. loss: 214.882
Epochs: 7164 | epoch avg. loss: 0.038 | test avg. loss: 214.858


 14%|█▍        | 7167/50000 [10:34<56:47, 12.57it/s]

Epochs: 7165 | epoch avg. loss: 0.037 | test avg. loss: 215.615
Epochs: 7166 | epoch avg. loss: 0.042 | test avg. loss: 215.541
Epochs: 7167 | epoch avg. loss: 0.043 | test avg. loss: 215.217


 14%|█▍        | 7171/50000 [10:34<53:48, 13.26it/s]

Epochs: 7168 | epoch avg. loss: 0.070 | test avg. loss: 216.105
Epochs: 7169 | epoch avg. loss: 0.041 | test avg. loss: 216.238
Epochs: 7170 | epoch avg. loss: 0.038 | test avg. loss: 216.597


 14%|█▍        | 7173/50000 [10:34<53:02, 13.46it/s]

Epochs: 7171 | epoch avg. loss: 0.038 | test avg. loss: 215.952
Epochs: 7172 | epoch avg. loss: 0.065 | test avg. loss: 216.073
Epochs: 7173 | epoch avg. loss: 0.047 | test avg. loss: 217.386


 14%|█▍        | 7177/50000 [10:35<56:01, 12.74it/s]

Epochs: 7174 | epoch avg. loss: 0.094 | test avg. loss: 215.099
Epochs: 7175 | epoch avg. loss: 0.141 | test avg. loss: 215.317
Epochs: 7176 | epoch avg. loss: 0.052 | test avg. loss: 216.298


 14%|█▍        | 7179/50000 [10:35<55:53, 12.77it/s]

Epochs: 7177 | epoch avg. loss: 0.067 | test avg. loss: 215.123
Epochs: 7178 | epoch avg. loss: 0.052 | test avg. loss: 216.129
Epochs: 7179 | epoch avg. loss: 0.043 | test avg. loss: 215.476
Epochs: 7180 | epoch avg. loss: 0.075 | test avg. loss: 215.329


 14%|█▍        | 7183/50000 [10:35<51:44, 13.79it/s]

Epochs: 7181 | epoch avg. loss: 0.081 | test avg. loss: 217.818
Epochs: 7182 | epoch avg. loss: 0.319 | test avg. loss: 214.217
Epochs: 7183 | epoch avg. loss: 0.415 | test avg. loss: 214.902


 14%|█▍        | 7187/50000 [10:35<54:16, 13.14it/s]

Epochs: 7184 | epoch avg. loss: 0.158 | test avg. loss: 216.802
Epochs: 7185 | epoch avg. loss: 0.082 | test avg. loss: 216.157
Epochs: 7186 | epoch avg. loss: 0.045 | test avg. loss: 215.838


 14%|█▍        | 7189/50000 [10:36<54:56, 12.98it/s]

Epochs: 7187 | epoch avg. loss: 0.046 | test avg. loss: 214.535
Epochs: 7188 | epoch avg. loss: 0.125 | test avg. loss: 214.678
Epochs: 7189 | epoch avg. loss: 0.057 | test avg. loss: 215.913


 14%|█▍        | 7193/50000 [10:36<56:16, 12.68it/s]

Epochs: 7190 | epoch avg. loss: 0.106 | test avg. loss: 215.089
Epochs: 7191 | epoch avg. loss: 0.082 | test avg. loss: 214.652
Epochs: 7192 | epoch avg. loss: 0.131 | test avg. loss: 216.632


 14%|█▍        | 7197/50000 [10:36<51:52, 13.75it/s]

Epochs: 7193 | epoch avg. loss: 0.146 | test avg. loss: 214.908
Epochs: 7194 | epoch avg. loss: 0.080 | test avg. loss: 212.703
Epochs: 7195 | epoch avg. loss: 0.198 | test avg. loss: 215.579
Epochs: 7196 | epoch avg. loss: 0.328 | test avg. loss: 213.026


 14%|█▍        | 7199/50000 [10:36<52:41, 13.54it/s]

Epochs: 7197 | epoch avg. loss: 0.158 | test avg. loss: 211.923
Epochs: 7198 | epoch avg. loss: 0.319 | test avg. loss: 215.669
Epochs: 7199 | epoch avg. loss: 0.415 | test avg. loss: 215.409


 14%|█▍        | 7203/50000 [10:38<2:35:19,  4.59it/s]

Epochs: 7200 | epoch avg. loss: 0.109 | test avg. loss: 215.003
Epochs: 7201 | epoch avg. loss: 0.102 | test avg. loss: 217.519
Epochs: 7202 | epoch avg. loss: 0.147 | test avg. loss: 217.135


 14%|█▍        | 7205/50000 [10:38<2:06:02,  5.66it/s]

Epochs: 7203 | epoch avg. loss: 0.060 | test avg. loss: 215.848
Epochs: 7204 | epoch avg. loss: 0.131 | test avg. loss: 215.927
Epochs: 7205 | epoch avg. loss: 0.083 | test avg. loss: 216.734


 14%|█▍        | 7209/50000 [10:38<1:32:41,  7.69it/s]

Epochs: 7206 | epoch avg. loss: 0.149 | test avg. loss: 214.192
Epochs: 7207 | epoch avg. loss: 0.119 | test avg. loss: 214.423
Epochs: 7208 | epoch avg. loss: 0.059 | test avg. loss: 215.462


 14%|█▍        | 7211/50000 [10:39<1:23:56,  8.50it/s]

Epochs: 7209 | epoch avg. loss: 0.037 | test avg. loss: 215.501
Epochs: 7210 | epoch avg. loss: 0.061 | test avg. loss: 215.976
Epochs: 7211 | epoch avg. loss: 0.035 | test avg. loss: 215.894


 14%|█▍        | 7215/50000 [10:39<1:06:25, 10.74it/s]

Epochs: 7212 | epoch avg. loss: 0.039 | test avg. loss: 215.408
Epochs: 7213 | epoch avg. loss: 0.051 | test avg. loss: 215.996
Epochs: 7214 | epoch avg. loss: 0.033 | test avg. loss: 215.118


 14%|█▍        | 7219/50000 [10:39<57:04, 12.49it/s]  

Epochs: 7215 | epoch avg. loss: 0.079 | test avg. loss: 215.786
Epochs: 7216 | epoch avg. loss: 0.046 | test avg. loss: 216.446
Epochs: 7217 | epoch avg. loss: 0.070 | test avg. loss: 215.819
Epochs: 7218 | epoch avg. loss: 0.034 | test avg. loss: 215.596


                                                    

Epochs: 7219 | epoch avg. loss: 0.033 | test avg. loss: 215.407
Epochs: 7220 | epoch avg. loss: 0.033 | test avg. loss: 215.420
Epochs: 7221 | epoch avg. loss: 0.032 | test avg. loss: 215.551


                                                    

Epochs: 7222 | epoch avg. loss: 0.032 | test avg. loss: 215.274
Epochs: 7223 | epoch avg. loss: 0.044 | test avg. loss: 215.925
Epochs: 7224 | epoch avg. loss: 0.036 | test avg. loss: 216.369


 14%|█▍        | 7229/50000 [10:40<49:01, 14.54it/s]

Epochs: 7225 | epoch avg. loss: 0.039 | test avg. loss: 215.512
Epochs: 7226 | epoch avg. loss: 0.192 | test avg. loss: 215.573
Epochs: 7227 | epoch avg. loss: 0.060 | test avg. loss: 216.302
Epochs: 7228 | epoch avg. loss: 0.085 | test avg. loss: 215.572


 14%|█▍        | 7231/50000 [10:40<47:35, 14.98it/s]

Epochs: 7229 | epoch avg. loss: 0.038 | test avg. loss: 215.535
Epochs: 7230 | epoch avg. loss: 0.041 | test avg. loss: 215.531
Epochs: 7231 | epoch avg. loss: 0.037 | test avg. loss: 215.810


 14%|█▍        | 7235/50000 [10:40<48:30, 14.69it/s]

Epochs: 7232 | epoch avg. loss: 0.041 | test avg. loss: 215.417
Epochs: 7233 | epoch avg. loss: 0.065 | test avg. loss: 214.770
Epochs: 7234 | epoch avg. loss: 0.164 | test avg. loss: 215.502
Epochs: 7235 | epoch avg. loss: 0.042 | test avg. loss: 216.265


 14%|█▍        | 7239/50000 [10:40<48:33, 14.68it/s]

Epochs: 7236 | epoch avg. loss: 0.045 | test avg. loss: 215.500
Epochs: 7237 | epoch avg. loss: 0.075 | test avg. loss: 215.094
Epochs: 7238 | epoch avg. loss: 0.048 | test avg. loss: 215.905


 14%|█▍        | 7241/50000 [10:41<48:03, 14.83it/s]

Epochs: 7239 | epoch avg. loss: 0.155 | test avg. loss: 214.603
Epochs: 7240 | epoch avg. loss: 0.038 | test avg. loss: 214.555
Epochs: 7241 | epoch avg. loss: 0.057 | test avg. loss: 216.128


 14%|█▍        | 7245/50000 [10:41<54:49, 13.00it/s]

Epochs: 7242 | epoch avg. loss: 0.178 | test avg. loss: 214.192
Epochs: 7243 | epoch avg. loss: 0.091 | test avg. loss: 213.908
Epochs: 7244 | epoch avg. loss: 0.089 | test avg. loss: 216.045


 14%|█▍        | 7247/50000 [10:41<55:32, 12.83it/s]

Epochs: 7245 | epoch avg. loss: 0.173 | test avg. loss: 215.257
Epochs: 7246 | epoch avg. loss: 0.141 | test avg. loss: 215.561
Epochs: 7247 | epoch avg. loss: 0.112 | test avg. loss: 216.708


 15%|█▍        | 7251/50000 [10:41<57:48, 12.33it/s]

Epochs: 7248 | epoch avg. loss: 0.050 | test avg. loss: 217.187
Epochs: 7249 | epoch avg. loss: 0.077 | test avg. loss: 217.115
Epochs: 7250 | epoch avg. loss: 0.063 | test avg. loss: 215.293


 15%|█▍        | 7253/50000 [10:42<58:56, 12.09it/s]

Epochs: 7251 | epoch avg. loss: 0.124 | test avg. loss: 215.762
Epochs: 7252 | epoch avg. loss: 0.065 | test avg. loss: 216.194
Epochs: 7253 | epoch avg. loss: 0.062 | test avg. loss: 215.215


 15%|█▍        | 7257/50000 [10:42<56:17, 12.65it/s]

Epochs: 7254 | epoch avg. loss: 0.057 | test avg. loss: 216.374
Epochs: 7255 | epoch avg. loss: 0.056 | test avg. loss: 215.532
Epochs: 7256 | epoch avg. loss: 0.136 | test avg. loss: 215.106


 15%|█▍        | 7259/50000 [10:42<53:24, 13.34it/s]

Epochs: 7257 | epoch avg. loss: 0.081 | test avg. loss: 216.279
Epochs: 7258 | epoch avg. loss: 0.114 | test avg. loss: 214.974
Epochs: 7259 | epoch avg. loss: 0.155 | test avg. loss: 214.744


 15%|█▍        | 7263/50000 [10:42<57:58, 12.29it/s]

Epochs: 7260 | epoch avg. loss: 0.229 | test avg. loss: 217.583
Epochs: 7261 | epoch avg. loss: 0.180 | test avg. loss: 217.374
Epochs: 7262 | epoch avg. loss: 0.123 | test avg. loss: 214.733


 15%|█▍        | 7265/50000 [10:43<59:55, 11.89it/s]

Epochs: 7263 | epoch avg. loss: 0.141 | test avg. loss: 214.455
Epochs: 7264 | epoch avg. loss: 0.060 | test avg. loss: 215.467
Epochs: 7265 | epoch avg. loss: 0.092 | test avg. loss: 214.056


 15%|█▍        | 7269/50000 [10:43<54:49, 12.99it/s]

Epochs: 7266 | epoch avg. loss: 0.130 | test avg. loss: 213.552
Epochs: 7267 | epoch avg. loss: 0.235 | test avg. loss: 214.890
Epochs: 7268 | epoch avg. loss: 0.086 | test avg. loss: 215.322
Epochs: 7269 | epoch avg. loss: 0.076 | test avg. loss: 213.861


 15%|█▍        | 7273/50000 [10:43<53:20, 13.35it/s]

Epochs: 7270 | epoch avg. loss: 0.219 | test avg. loss: 215.542
Epochs: 7271 | epoch avg. loss: 0.161 | test avg. loss: 215.612
Epochs: 7272 | epoch avg. loss: 0.147 | test avg. loss: 212.432


 15%|█▍        | 7275/50000 [10:43<50:51, 14.00it/s]

Epochs: 7273 | epoch avg. loss: 0.454 | test avg. loss: 212.495
Epochs: 7274 | epoch avg. loss: 0.143 | test avg. loss: 215.657
Epochs: 7275 | epoch avg. loss: 0.405 | test avg. loss: 214.374


 15%|█▍        | 7279/50000 [10:44<51:09, 13.92it/s]

Epochs: 7276 | epoch avg. loss: 0.080 | test avg. loss: 213.671
Epochs: 7277 | epoch avg. loss: 0.259 | test avg. loss: 215.918
Epochs: 7278 | epoch avg. loss: 0.126 | test avg. loss: 216.363
Epochs: 7279 | epoch avg. loss: 0.096 | test avg. loss: 213.966


 15%|█▍        | 7283/50000 [10:44<48:11, 14.77it/s]

Epochs: 7280 | epoch avg. loss: 0.321 | test avg. loss: 215.347
Epochs: 7281 | epoch avg. loss: 0.317 | test avg. loss: 216.809
Epochs: 7282 | epoch avg. loss: 0.338 | test avg. loss: 213.836
Epochs: 7283 | epoch avg. loss: 0.333 | test avg. loss: 214.854


 15%|█▍        | 7287/50000 [10:44<51:27, 13.83it/s]

Epochs: 7284 | epoch avg. loss: 0.160 | test avg. loss: 216.386
Epochs: 7285 | epoch avg. loss: 0.061 | test avg. loss: 216.626
Epochs: 7286 | epoch avg. loss: 0.073 | test avg. loss: 216.371


 15%|█▍        | 7289/50000 [10:44<55:39, 12.79it/s]

Epochs: 7287 | epoch avg. loss: 0.049 | test avg. loss: 216.470
Epochs: 7288 | epoch avg. loss: 0.073 | test avg. loss: 213.989
Epochs: 7289 | epoch avg. loss: 0.139 | test avg. loss: 213.510


 15%|█▍        | 7293/50000 [10:45<1:00:04, 11.85it/s]

Epochs: 7290 | epoch avg. loss: 0.081 | test avg. loss: 215.650
Epochs: 7291 | epoch avg. loss: 0.216 | test avg. loss: 214.612
Epochs: 7292 | epoch avg. loss: 0.059 | test avg. loss: 213.763


 15%|█▍        | 7295/50000 [10:45<59:05, 12.04it/s]

Epochs: 7293 | epoch avg. loss: 0.119 | test avg. loss: 216.903
Epochs: 7294 | epoch avg. loss: 0.250 | test avg. loss: 216.436
Epochs: 7295 | epoch avg. loss: 0.078 | test avg. loss: 214.908


 15%|█▍        | 7299/50000 [10:45<55:13, 12.89it/s]

Epochs: 7296 | epoch avg. loss: 0.225 | test avg. loss: 214.731
Epochs: 7297 | epoch avg. loss: 0.213 | test avg. loss: 215.987
Epochs: 7298 | epoch avg. loss: 0.183 | test avg. loss: 218.489


 15%|█▍        | 7299/50000 [10:45<55:13, 12.89it/s]

Epochs: 7299 | epoch avg. loss: 0.659 | test avg. loss: 214.225


 15%|█▍        | 7303/50000 [10:47<2:34:54,  4.59it/s]

Epochs: 7300 | epoch avg. loss: 0.156 | test avg. loss: 213.561
Epochs: 7301 | epoch avg. loss: 0.160 | test avg. loss: 214.724
Epochs: 7302 | epoch avg. loss: 0.198 | test avg. loss: 213.107


 15%|█▍        | 7307/50000 [10:47<1:39:06,  7.18it/s]

Epochs: 7303 | epoch avg. loss: 0.078 | test avg. loss: 213.494
Epochs: 7304 | epoch avg. loss: 0.041 | test avg. loss: 214.032
Epochs: 7305 | epoch avg. loss: 0.056 | test avg. loss: 215.041
Epochs: 7306 | epoch avg. loss: 0.081 | test avg. loss: 217.081


 15%|█▍        | 7311/50000 [10:47<1:12:29,  9.81it/s]

Epochs: 7307 | epoch avg. loss: 0.279 | test avg. loss: 215.069
Epochs: 7308 | epoch avg. loss: 0.071 | test avg. loss: 214.388
Epochs: 7309 | epoch avg. loss: 0.076 | test avg. loss: 216.600
Epochs: 7310 | epoch avg. loss: 0.286 | test avg. loss: 215.431


 15%|█▍        | 7313/50000 [10:47<1:09:16, 10.27it/s]

Epochs: 7311 | epoch avg. loss: 0.087 | test avg. loss: 213.523
Epochs: 7312 | epoch avg. loss: 0.492 | test avg. loss: 215.638
Epochs: 7313 | epoch avg. loss: 0.167 | test avg. loss: 218.560


 15%|█▍        | 7317/50000 [10:48<1:01:44, 11.52it/s]

Epochs: 7314 | epoch avg. loss: 0.556 | test avg. loss: 215.295
Epochs: 7315 | epoch avg. loss: 0.213 | test avg. loss: 212.828
Epochs: 7316 | epoch avg. loss: 0.323 | test avg. loss: 214.515


 15%|█▍        | 7319/50000 [10:48<1:02:21, 11.41it/s]

Epochs: 7317 | epoch avg. loss: 0.180 | test avg. loss: 211.863
Epochs: 7318 | epoch avg. loss: 0.170 | test avg. loss: 211.401
Epochs: 7319 | epoch avg. loss: 0.177 | test avg. loss: 213.855


 15%|█▍        | 7323/50000 [10:48<59:43, 11.91it/s]  

Epochs: 7320 | epoch avg. loss: 0.205 | test avg. loss: 214.907
Epochs: 7321 | epoch avg. loss: 0.117 | test avg. loss: 215.129
Epochs: 7322 | epoch avg. loss: 0.055 | test avg. loss: 216.299


 15%|█▍        | 7325/50000 [10:48<57:53, 12.29it/s]

Epochs: 7323 | epoch avg. loss: 0.163 | test avg. loss: 215.376
Epochs: 7324 | epoch avg. loss: 0.059 | test avg. loss: 213.532
Epochs: 7325 | epoch avg. loss: 0.083 | test avg. loss: 213.982


 15%|█▍        | 7329/50000 [10:49<57:53, 12.29it/s]

Epochs: 7326 | epoch avg. loss: 0.051 | test avg. loss: 215.203
Epochs: 7327 | epoch avg. loss: 0.146 | test avg. loss: 214.959
Epochs: 7328 | epoch avg. loss: 0.047 | test avg. loss: 214.904


 15%|█▍        | 7333/50000 [10:49<53:29, 13.29it/s]

Epochs: 7329 | epoch avg. loss: 0.034 | test avg. loss: 216.162
Epochs: 7330 | epoch avg. loss: 0.120 | test avg. loss: 216.896
Epochs: 7331 | epoch avg. loss: 0.118 | test avg. loss: 215.422
Epochs: 7332 | epoch avg. loss: 0.069 | test avg. loss: 216.008


                                                    

Epochs: 7333 | epoch avg. loss: 0.116 | test avg. loss: 216.373
Epochs: 7334 | epoch avg. loss: 0.242 | test avg. loss: 212.957
Epochs: 7335 | epoch avg. loss: 0.212 | test avg. loss: 212.007


 15%|█▍        | 7339/50000 [10:49<51:33, 13.79it/s]

Epochs: 7336 | epoch avg. loss: 0.273 | test avg. loss: 213.602
Epochs: 7337 | epoch avg. loss: 0.040 | test avg. loss: 213.982
Epochs: 7338 | epoch avg. loss: 0.033 | test avg. loss: 215.199


                                                    

Epochs: 7339 | epoch avg. loss: 0.060 | test avg. loss: 215.170
Epochs: 7340 | epoch avg. loss: 0.053 | test avg. loss: 213.899
Epochs: 7341 | epoch avg. loss: 0.206 | test avg. loss: 213.679


 15%|█▍        | 7345/50000 [10:50<50:56, 13.95it/s]

Epochs: 7342 | epoch avg. loss: 0.123 | test avg. loss: 214.524
Epochs: 7343 | epoch avg. loss: 0.053 | test avg. loss: 214.936
Epochs: 7344 | epoch avg. loss: 0.059 | test avg. loss: 214.289


 15%|█▍        | 7347/50000 [10:50<52:39, 13.50it/s]

Epochs: 7345 | epoch avg. loss: 0.073 | test avg. loss: 214.755
Epochs: 7346 | epoch avg. loss: 0.053 | test avg. loss: 215.283
Epochs: 7347 | epoch avg. loss: 0.038 | test avg. loss: 216.150


 15%|█▍        | 7351/50000 [10:50<51:35, 13.78it/s]

Epochs: 7348 | epoch avg. loss: 0.054 | test avg. loss: 215.917
Epochs: 7349 | epoch avg. loss: 0.036 | test avg. loss: 214.937
Epochs: 7350 | epoch avg. loss: 0.049 | test avg. loss: 215.198


 15%|█▍        | 7353/50000 [10:51<55:17, 12.86it/s]

Epochs: 7351 | epoch avg. loss: 0.060 | test avg. loss: 216.014
Epochs: 7352 | epoch avg. loss: 0.158 | test avg. loss: 214.807
Epochs: 7353 | epoch avg. loss: 0.200 | test avg. loss: 214.486


 15%|█▍        | 7357/50000 [10:51<53:21, 13.32it/s]

Epochs: 7354 | epoch avg. loss: 0.157 | test avg. loss: 218.791
Epochs: 7355 | epoch avg. loss: 0.916 | test avg. loss: 216.254
Epochs: 7356 | epoch avg. loss: 0.209 | test avg. loss: 214.979


 15%|█▍        | 7359/50000 [10:51<53:19, 13.33it/s]

Epochs: 7357 | epoch avg. loss: 0.979 | test avg. loss: 217.753
Epochs: 7358 | epoch avg. loss: 0.580 | test avg. loss: 217.128
Epochs: 7359 | epoch avg. loss: 0.321 | test avg. loss: 213.983


 15%|█▍        | 7363/50000 [10:51<54:23, 13.06it/s]

Epochs: 7360 | epoch avg. loss: 1.386 | test avg. loss: 215.996
Epochs: 7361 | epoch avg. loss: 0.840 | test avg. loss: 218.384
Epochs: 7362 | epoch avg. loss: 0.836 | test avg. loss: 215.413


 15%|█▍        | 7365/50000 [10:51<54:20, 13.08it/s]

Epochs: 7363 | epoch avg. loss: 0.693 | test avg. loss: 218.098
Epochs: 7364 | epoch avg. loss: 0.227 | test avg. loss: 223.668
Epochs: 7365 | epoch avg. loss: 0.657 | test avg. loss: 221.695


 15%|█▍        | 7369/50000 [10:52<53:09, 13.37it/s]

Epochs: 7366 | epoch avg. loss: 0.145 | test avg. loss: 218.333
Epochs: 7367 | epoch avg. loss: 0.320 | test avg. loss: 219.418
Epochs: 7368 | epoch avg. loss: 0.545 | test avg. loss: 222.526


 15%|█▍        | 7371/50000 [10:52<54:09, 13.12it/s]

Epochs: 7369 | epoch avg. loss: 1.173 | test avg. loss: 217.187
Epochs: 7370 | epoch avg. loss: 0.519 | test avg. loss: 216.201
Epochs: 7371 | epoch avg. loss: 0.512 | test avg. loss: 219.987


 15%|█▍        | 7375/50000 [10:52<53:35, 13.26it/s]

Epochs: 7372 | epoch avg. loss: 0.988 | test avg. loss: 215.489
Epochs: 7373 | epoch avg. loss: 0.180 | test avg. loss: 212.657
Epochs: 7374 | epoch avg. loss: 0.300 | test avg. loss: 214.121


 15%|█▍        | 7377/50000 [10:52<54:08, 13.12it/s]

Epochs: 7375 | epoch avg. loss: 0.089 | test avg. loss: 214.028
Epochs: 7376 | epoch avg. loss: 0.097 | test avg. loss: 215.038
Epochs: 7377 | epoch avg. loss: 0.083 | test avg. loss: 217.516


                                                    

Epochs: 7378 | epoch avg. loss: 0.202 | test avg. loss: 216.525
Epochs: 7379 | epoch avg. loss: 0.210 | test avg. loss: 216.282
Epochs: 7380 | epoch avg. loss: 0.290 | test avg. loss: 219.528


 15%|█▍        | 7383/50000 [10:53<52:13, 13.60it/s]

Epochs: 7381 | epoch avg. loss: 0.262 | test avg. loss: 215.848
Epochs: 7382 | epoch avg. loss: 0.235 | test avg. loss: 214.153
Epochs: 7383 | epoch avg. loss: 0.244 | test avg. loss: 215.224


 15%|█▍        | 7387/50000 [10:53<49:55, 14.23it/s]

Epochs: 7384 | epoch avg. loss: 0.251 | test avg. loss: 213.364
Epochs: 7385 | epoch avg. loss: 0.226 | test avg. loss: 212.082
Epochs: 7386 | epoch avg. loss: 0.457 | test avg. loss: 216.705


 15%|█▍        | 7389/50000 [10:53<52:44, 13.47it/s]

Epochs: 7387 | epoch avg. loss: 0.379 | test avg. loss: 216.109
Epochs: 7388 | epoch avg. loss: 0.110 | test avg. loss: 215.655
Epochs: 7389 | epoch avg. loss: 0.115 | test avg. loss: 216.464


 15%|█▍        | 7393/50000 [10:53<57:24, 12.37it/s]

Epochs: 7390 | epoch avg. loss: 0.126 | test avg. loss: 217.553
Epochs: 7391 | epoch avg. loss: 0.126 | test avg. loss: 215.654
Epochs: 7392 | epoch avg. loss: 0.260 | test avg. loss: 218.300


 15%|█▍        | 7395/50000 [10:54<59:07, 12.01it/s]

Epochs: 7393 | epoch avg. loss: 0.413 | test avg. loss: 219.140
Epochs: 7394 | epoch avg. loss: 0.337 | test avg. loss: 215.157
Epochs: 7395 | epoch avg. loss: 0.371 | test avg. loss: 214.892




Epochs: 7396 | epoch avg. loss: 0.108 | test avg. loss: 216.972
Epochs: 7397 | epoch avg. loss: 0.186 | test avg. loss: 215.617
Epochs: 7398 | epoch avg. loss: 0.095 | test avg. loss: 215.269


 15%|█▍        | 7399/50000 [10:54<54:25, 13.04it/s]

Epochs: 7399 | epoch avg. loss: 0.131 | test avg. loss: 217.727


 15%|█▍        | 7403/50000 [10:56<2:27:16,  4.82it/s]

Epochs: 7400 | epoch avg. loss: 0.144 | test avg. loss: 216.232
Epochs: 7401 | epoch avg. loss: 0.133 | test avg. loss: 215.092
Epochs: 7402 | epoch avg. loss: 0.146 | test avg. loss: 215.686




Epochs: 7403 | epoch avg. loss: 0.065 | test avg. loss: 214.492
Epochs: 7404 | epoch avg. loss: 0.049 | test avg. loss: 213.538
Epochs: 7405 | epoch avg. loss: 0.063 | test avg. loss: 213.986


 15%|█▍        | 7409/50000 [10:56<1:22:02,  8.65it/s]

Epochs: 7406 | epoch avg. loss: 0.048 | test avg. loss: 215.176
Epochs: 7407 | epoch avg. loss: 0.077 | test avg. loss: 214.234
Epochs: 7408 | epoch avg. loss: 0.074 | test avg. loss: 214.623
Epochs: 7409 | epoch avg. loss: 0.044 | test avg. loss: 215.498


                                                      

Epochs: 7410 | epoch avg. loss: 0.046 | test avg. loss: 215.140
Epochs: 7411 | epoch avg. loss: 0.036 | test avg. loss: 215.059
Epochs: 7412 | epoch avg. loss: 0.039 | test avg. loss: 215.549


 15%|█▍        | 7415/50000 [10:56<1:01:48, 11.48it/s]

Epochs: 7413 | epoch avg. loss: 0.031 | test avg. loss: 214.925
Epochs: 7414 | epoch avg. loss: 0.072 | test avg. loss: 214.866
Epochs: 7415 | epoch avg. loss: 0.062 | test avg. loss: 216.222


 15%|█▍        | 7419/50000 [10:57<1:04:04, 11.08it/s]

Epochs: 7416 | epoch avg. loss: 0.200 | test avg. loss: 214.808
Epochs: 7417 | epoch avg. loss: 0.042 | test avg. loss: 214.276
Epochs: 7418 | epoch avg. loss: 0.040 | test avg. loss: 215.610


 15%|█▍        | 7421/50000 [10:57<1:01:15, 11.58it/s]

Epochs: 7419 | epoch avg. loss: 0.114 | test avg. loss: 215.173
Epochs: 7420 | epoch avg. loss: 0.046 | test avg. loss: 214.750
Epochs: 7421 | epoch avg. loss: 0.076 | test avg. loss: 215.140


 15%|█▍        | 7425/50000 [10:57<53:21, 13.30it/s]

Epochs: 7422 | epoch avg. loss: 0.035 | test avg. loss: 215.689
Epochs: 7423 | epoch avg. loss: 0.035 | test avg. loss: 215.178
Epochs: 7424 | epoch avg. loss: 0.070 | test avg. loss: 214.782
Epochs: 7425 | epoch avg. loss: 0.111 | test avg. loss: 216.051


 15%|█▍        | 7429/50000 [10:57<54:27, 13.03it/s]

Epochs: 7426 | epoch avg. loss: 0.075 | test avg. loss: 216.736
Epochs: 7427 | epoch avg. loss: 0.076 | test avg. loss: 215.845
Epochs: 7428 | epoch avg. loss: 0.100 | test avg. loss: 215.214


 15%|█▍        | 7431/50000 [10:58<54:48, 12.95it/s]

Epochs: 7429 | epoch avg. loss: 0.114 | test avg. loss: 215.543
Epochs: 7430 | epoch avg. loss: 0.071 | test avg. loss: 215.480
Epochs: 7431 | epoch avg. loss: 0.061 | test avg. loss: 214.169




Epochs: 7432 | epoch avg. loss: 0.263 | test avg. loss: 214.418
Epochs: 7433 | epoch avg. loss: 0.096 | test avg. loss: 216.563
Epochs: 7434 | epoch avg. loss: 0.187 | test avg. loss: 215.546


 15%|█▍        | 7437/50000 [10:58<51:50, 13.68it/s]

Epochs: 7435 | epoch avg. loss: 0.391 | test avg. loss: 213.649
Epochs: 7436 | epoch avg. loss: 0.309 | test avg. loss: 218.633
Epochs: 7437 | epoch avg. loss: 1.493 | test avg. loss: 214.775


 15%|█▍        | 7441/50000 [10:58<54:22, 13.04it/s]

Epochs: 7438 | epoch avg. loss: 0.540 | test avg. loss: 211.553
Epochs: 7439 | epoch avg. loss: 1.095 | test avg. loss: 222.926
Epochs: 7440 | epoch avg. loss: 2.754 | test avg. loss: 219.851


 15%|█▍        | 7443/50000 [10:59<56:22, 12.58it/s]

Epochs: 7441 | epoch avg. loss: 0.730 | test avg. loss: 208.972
Epochs: 7442 | epoch avg. loss: 3.515 | test avg. loss: 205.960
Epochs: 7443 | epoch avg. loss: 1.104 | test avg. loss: 209.650


 15%|█▍        | 7447/50000 [10:59<59:15, 11.97it/s]

Epochs: 7444 | epoch avg. loss: 0.340 | test avg. loss: 213.004
Epochs: 7445 | epoch avg. loss: 0.218 | test avg. loss: 214.475
Epochs: 7446 | epoch avg. loss: 0.298 | test avg. loss: 214.761


 15%|█▍        | 7451/50000 [10:59<53:26, 13.27it/s]

Epochs: 7447 | epoch avg. loss: 0.177 | test avg. loss: 218.019
Epochs: 7448 | epoch avg. loss: 0.475 | test avg. loss: 212.375
Epochs: 7449 | epoch avg. loss: 0.556 | test avg. loss: 210.681
Epochs: 7450 | epoch avg. loss: 0.271 | test avg. loss: 210.701


 15%|█▍        | 7453/50000 [10:59<53:08, 13.34it/s]

Epochs: 7451 | epoch avg. loss: 0.393 | test avg. loss: 209.353
Epochs: 7452 | epoch avg. loss: 0.171 | test avg. loss: 209.185
Epochs: 7453 | epoch avg. loss: 0.105 | test avg. loss: 210.474


 15%|█▍        | 7457/50000 [11:00<56:47, 12.48it/s]

Epochs: 7454 | epoch avg. loss: 0.103 | test avg. loss: 211.395
Epochs: 7455 | epoch avg. loss: 0.093 | test avg. loss: 213.567
Epochs: 7456 | epoch avg. loss: 0.098 | test avg. loss: 213.408


 15%|█▍        | 7461/50000 [11:00<52:13, 13.58it/s]

Epochs: 7457 | epoch avg. loss: 0.085 | test avg. loss: 214.339
Epochs: 7458 | epoch avg. loss: 0.065 | test avg. loss: 216.324
Epochs: 7459 | epoch avg. loss: 0.188 | test avg. loss: 215.579
Epochs: 7460 | epoch avg. loss: 0.098 | test avg. loss: 213.912


 15%|█▍        | 7465/50000 [11:00<49:28, 14.33it/s]

Epochs: 7461 | epoch avg. loss: 0.128 | test avg. loss: 217.679
Epochs: 7462 | epoch avg. loss: 0.546 | test avg. loss: 217.911
Epochs: 7463 | epoch avg. loss: 0.262 | test avg. loss: 212.823
Epochs: 7464 | epoch avg. loss: 0.409 | test avg. loss: 211.520


 15%|█▍        | 7467/50000 [11:00<50:12, 14.12it/s]

Epochs: 7465 | epoch avg. loss: 0.144 | test avg. loss: 212.943
Epochs: 7466 | epoch avg. loss: 0.338 | test avg. loss: 209.710
Epochs: 7467 | epoch avg. loss: 0.214 | test avg. loss: 208.526


 15%|█▍        | 7471/50000 [11:01<56:14, 12.60it/s]

Epochs: 7468 | epoch avg. loss: 0.173 | test avg. loss: 210.017
Epochs: 7469 | epoch avg. loss: 0.085 | test avg. loss: 209.679
Epochs: 7470 | epoch avg. loss: 0.099 | test avg. loss: 210.843


 15%|█▍        | 7473/50000 [11:01<56:21, 12.58it/s]

Epochs: 7471 | epoch avg. loss: 0.128 | test avg. loss: 214.885
Epochs: 7472 | epoch avg. loss: 0.125 | test avg. loss: 215.790
Epochs: 7473 | epoch avg. loss: 0.097 | test avg. loss: 212.533


 15%|█▍        | 7477/50000 [11:01<58:54, 12.03it/s]

Epochs: 7474 | epoch avg. loss: 0.166 | test avg. loss: 211.993
Epochs: 7475 | epoch avg. loss: 0.236 | test avg. loss: 211.543
Epochs: 7476 | epoch avg. loss: 0.081 | test avg. loss: 212.701


 15%|█▍        | 7479/50000 [11:01<57:51, 12.25it/s]

Epochs: 7477 | epoch avg. loss: 0.127 | test avg. loss: 211.579
Epochs: 7478 | epoch avg. loss: 0.053 | test avg. loss: 211.650
Epochs: 7479 | epoch avg. loss: 0.075 | test avg. loss: 215.088


 15%|█▍        | 7483/50000 [11:02<57:58, 12.22it/s]  

Epochs: 7480 | epoch avg. loss: 0.440 | test avg. loss: 213.182
Epochs: 7481 | epoch avg. loss: 0.199 | test avg. loss: 212.959
Epochs: 7482 | epoch avg. loss: 0.173 | test avg. loss: 217.391


 15%|█▍        | 7485/50000 [11:02<57:16, 12.37it/s]

Epochs: 7483 | epoch avg. loss: 0.438 | test avg. loss: 212.658
Epochs: 7484 | epoch avg. loss: 0.263 | test avg. loss: 211.117
Epochs: 7485 | epoch avg. loss: 0.542 | test avg. loss: 214.758


 15%|█▍        | 7489/50000 [11:02<55:19, 12.81it/s]

Epochs: 7486 | epoch avg. loss: 0.170 | test avg. loss: 215.503
Epochs: 7487 | epoch avg. loss: 0.223 | test avg. loss: 214.346
Epochs: 7488 | epoch avg. loss: 0.321 | test avg. loss: 219.200


 15%|█▍        | 7491/50000 [11:02<52:41, 13.44it/s]

Epochs: 7489 | epoch avg. loss: 0.858 | test avg. loss: 218.389
Epochs: 7490 | epoch avg. loss: 0.258 | test avg. loss: 214.846
Epochs: 7491 | epoch avg. loss: 0.801 | test avg. loss: 217.456


 15%|█▍        | 7495/50000 [11:03<58:06, 12.19it/s]

Epochs: 7492 | epoch avg. loss: 0.183 | test avg. loss: 219.375
Epochs: 7493 | epoch avg. loss: 0.376 | test avg. loss: 216.082
Epochs: 7494 | epoch avg. loss: 0.264 | test avg. loss: 214.255


 15%|█▍        | 7497/50000 [11:03<57:21, 12.35it/s]

Epochs: 7495 | epoch avg. loss: 0.275 | test avg. loss: 217.748
Epochs: 7496 | epoch avg. loss: 0.482 | test avg. loss: 214.484
Epochs: 7497 | epoch avg. loss: 0.260 | test avg. loss: 212.103


 15%|█▍        | 7499/50000 [11:03<57:13, 12.38it/s]

Epochs: 7498 | epoch avg. loss: 0.601 | test avg. loss: 214.436
Epochs: 7499 | epoch avg. loss: 0.095 | test avg. loss: 215.587


 15%|█▌        | 7503/50000 [11:05<2:35:47,  4.55it/s]

Epochs: 7500 | epoch avg. loss: 0.108 | test avg. loss: 214.403
Epochs: 7501 | epoch avg. loss: 0.239 | test avg. loss: 216.262
Epochs: 7502 | epoch avg. loss: 0.151 | test avg. loss: 218.290


 15%|█▌        | 7505/50000 [11:05<2:06:17,  5.61it/s]

Epochs: 7503 | epoch avg. loss: 0.237 | test avg. loss: 213.952
Epochs: 7504 | epoch avg. loss: 0.362 | test avg. loss: 212.014
Epochs: 7505 | epoch avg. loss: 0.183 | test avg. loss: 212.675


 15%|█▌        | 7509/50000 [11:05<1:33:16,  7.59it/s]

Epochs: 7506 | epoch avg. loss: 0.434 | test avg. loss: 210.646
Epochs: 7507 | epoch avg. loss: 0.260 | test avg. loss: 212.933
Epochs: 7508 | epoch avg. loss: 0.144 | test avg. loss: 217.225


 15%|█▌        | 7511/50000 [11:05<1:26:13,  8.21it/s]

Epochs: 7509 | epoch avg. loss: 0.074 | test avg. loss: 217.083
Epochs: 7510 | epoch avg. loss: 0.070 | test avg. loss: 216.875
Epochs: 7511 | epoch avg. loss: 0.045 | test avg. loss: 216.212


 15%|█▌        | 7515/50000 [11:06<1:16:56,  9.20it/s]

Epochs: 7512 | epoch avg. loss: 0.056 | test avg. loss: 214.556
Epochs: 7513 | epoch avg. loss: 0.243 | test avg. loss: 215.227
Epochs: 7514 | epoch avg. loss: 0.070 | test avg. loss: 216.997


 15%|█▌        | 7519/50000 [11:06<1:00:56, 11.62it/s]

Epochs: 7515 | epoch avg. loss: 0.100 | test avg. loss: 215.469
Epochs: 7516 | epoch avg. loss: 0.258 | test avg. loss: 215.146
Epochs: 7517 | epoch avg. loss: 0.157 | test avg. loss: 216.763
Epochs: 7518 | epoch avg. loss: 0.122 | test avg. loss: 214.267


 15%|█▌        | 7521/50000 [11:06<56:33, 12.52it/s]

Epochs: 7519 | epoch avg. loss: 0.110 | test avg. loss: 212.774
Epochs: 7520 | epoch avg. loss: 0.233 | test avg. loss: 215.664
Epochs: 7521 | epoch avg. loss: 0.114 | test avg. loss: 215.692


 15%|█▌        | 7525/50000 [11:06<53:38, 13.20it/s]

Epochs: 7522 | epoch avg. loss: 0.074 | test avg. loss: 215.661
Epochs: 7523 | epoch avg. loss: 0.057 | test avg. loss: 216.756
Epochs: 7524 | epoch avg. loss: 0.113 | test avg. loss: 216.961


 15%|█▌        | 7527/50000 [11:07<1:00:11, 11.76it/s]

Epochs: 7525 | epoch avg. loss: 0.064 | test avg. loss: 215.386
Epochs: 7526 | epoch avg. loss: 0.148 | test avg. loss: 215.220
Epochs: 7527 | epoch avg. loss: 0.104 | test avg. loss: 216.710


 15%|█▌        | 7531/50000 [11:07<55:03, 12.86it/s]

Epochs: 7528 | epoch avg. loss: 0.117 | test avg. loss: 215.555
Epochs: 7529 | epoch avg. loss: 0.048 | test avg. loss: 214.383
Epochs: 7530 | epoch avg. loss: 0.071 | test avg. loss: 215.359


 15%|█▌        | 7535/50000 [11:07<50:58, 13.88it/s]

Epochs: 7531 | epoch avg. loss: 0.093 | test avg. loss: 216.183
Epochs: 7532 | epoch avg. loss: 0.106 | test avg. loss: 215.400
Epochs: 7533 | epoch avg. loss: 0.076 | test avg. loss: 216.010
Epochs: 7534 | epoch avg. loss: 0.053 | test avg. loss: 217.039


 15%|█▌        | 7537/50000 [11:07<51:00, 13.87it/s]

Epochs: 7535 | epoch avg. loss: 0.059 | test avg. loss: 216.913
Epochs: 7536 | epoch avg. loss: 0.047 | test avg. loss: 215.407
Epochs: 7537 | epoch avg. loss: 0.311 | test avg. loss: 216.051


 15%|█▌        | 7541/50000 [11:08<59:15, 11.94it/s]

Epochs: 7538 | epoch avg. loss: 0.094 | test avg. loss: 217.095
Epochs: 7539 | epoch avg. loss: 0.112 | test avg. loss: 215.150
Epochs: 7540 | epoch avg. loss: 0.265 | test avg. loss: 216.557


 15%|█▌        | 7543/50000 [11:08<1:01:33, 11.50it/s]

Epochs: 7541 | epoch avg. loss: 0.079 | test avg. loss: 216.949
Epochs: 7542 | epoch avg. loss: 0.091 | test avg. loss: 215.525
Epochs: 7543 | epoch avg. loss: 0.332 | test avg. loss: 216.996


 15%|█▌        | 7547/50000 [11:08<55:54, 12.65it/s]

Epochs: 7544 | epoch avg. loss: 0.069 | test avg. loss: 218.457
Epochs: 7545 | epoch avg. loss: 0.134 | test avg. loss: 217.900
Epochs: 7546 | epoch avg. loss: 0.058 | test avg. loss: 216.832


 15%|█▌        | 7549/50000 [11:08<54:33, 12.97it/s]

Epochs: 7547 | epoch avg. loss: 0.124 | test avg. loss: 217.948
Epochs: 7548 | epoch avg. loss: 0.105 | test avg. loss: 218.324
Epochs: 7549 | epoch avg. loss: 0.211 | test avg. loss: 215.307


 15%|█▌        | 7553/50000 [11:09<58:05, 12.18it/s]

Epochs: 7550 | epoch avg. loss: 0.085 | test avg. loss: 214.381
Epochs: 7551 | epoch avg. loss: 0.088 | test avg. loss: 217.529
Epochs: 7552 | epoch avg. loss: 0.509 | test avg. loss: 216.739


 15%|█▌        | 7555/50000 [11:09<55:54, 12.65it/s]

Epochs: 7553 | epoch avg. loss: 0.298 | test avg. loss: 217.474
Epochs: 7554 | epoch avg. loss: 0.557 | test avg. loss: 221.578
Epochs: 7555 | epoch avg. loss: 0.987 | test avg. loss: 218.874
Epochs: 7556 | epoch avg. loss: 1.730 | test avg. loss: 211.352


 15%|█▌        | 7559/50000 [11:09<51:05, 13.85it/s]

Epochs: 7557 | epoch avg. loss: 1.496 | test avg. loss: 223.026
Epochs: 7558 | epoch avg. loss: 13.714 | test avg. loss: 208.957
Epochs: 7559 | epoch avg. loss: 3.119 | test avg. loss: 235.713


 15%|█▌        | 7563/50000 [11:09<53:18, 13.27it/s]

Epochs: 7560 | epoch avg. loss: 12.312 | test avg. loss: 199.733
Epochs: 7561 | epoch avg. loss: 18.962 | test avg. loss: 194.338
Epochs: 7562 | epoch avg. loss: 6.911 | test avg. loss: 197.524


 15%|█▌        | 7565/50000 [11:10<59:26, 11.90it/s]

Epochs: 7563 | epoch avg. loss: 5.544 | test avg. loss: 202.569
Epochs: 7564 | epoch avg. loss: 21.997 | test avg. loss: 241.193
Epochs: 7565 | epoch avg. loss: 38.166 | test avg. loss: 127.452


 15%|█▌        | 7569/50000 [11:10<53:52, 13.13it/s]

Epochs: 7566 | epoch avg. loss: 11.831 | test avg. loss: 103.765
Epochs: 7567 | epoch avg. loss: 50.406 | test avg. loss: 96.661
Epochs: 7568 | epoch avg. loss: 37.132 | test avg. loss: 95.649
Epochs: 7569 | epoch avg. loss: 34.108 | test avg. loss: 42.290


 15%|█▌        | 7573/50000 [11:10<52:35, 13.44it/s]

Epochs: 7570 | epoch avg. loss: 22.812 | test avg. loss: 233.495
Epochs: 7571 | epoch avg. loss: 189.591 | test avg. loss: 60.805
Epochs: 7572 | epoch avg. loss: 89.336 | test avg. loss: 84.934


                                                    

Epochs: 7573 | epoch avg. loss: 91.601 | test avg. loss: 27.400
Epochs: 7574 | epoch avg. loss: 42.040 | test avg. loss: 20.389
Epochs: 7575 | epoch avg. loss: 27.692 | test avg. loss: 61.232


 15%|█▌        | 7579/50000 [11:11<54:19, 13.02it/s]

Epochs: 7576 | epoch avg. loss: 26.993 | test avg. loss: 53.140
Epochs: 7577 | epoch avg. loss: 41.506 | test avg. loss: 63.100
Epochs: 7578 | epoch avg. loss: 38.687 | test avg. loss: 30.351


 15%|█▌        | 7581/50000 [11:11<52:11, 13.54it/s]

Epochs: 7579 | epoch avg. loss: 18.001 | test avg. loss: 33.401
Epochs: 7580 | epoch avg. loss: 19.833 | test avg. loss: 46.743
Epochs: 7581 | epoch avg. loss: 24.224 | test avg. loss: 23.698
Epochs: 7582 | epoch avg. loss: 13.422 | test avg. loss: 27.011


 15%|█▌        | 7587/50000 [11:11<47:51, 14.77it/s]

Epochs: 7583 | epoch avg. loss: 10.947 | test avg. loss: 35.986
Epochs: 7584 | epoch avg. loss: 18.586 | test avg. loss: 20.627
Epochs: 7585 | epoch avg. loss: 11.060 | test avg. loss: 22.200
Epochs: 7586 | epoch avg. loss: 11.265 | test avg. loss: 27.454


 15%|█▌        | 7591/50000 [11:11<46:52, 15.08it/s]

Epochs: 7587 | epoch avg. loss: 12.423 | test avg. loss: 17.959
Epochs: 7588 | epoch avg. loss: 8.462 | test avg. loss: 17.554
Epochs: 7589 | epoch avg. loss: 8.933 | test avg. loss: 22.650
Epochs: 7590 | epoch avg. loss: 9.094 | test avg. loss: 16.015


 15%|█▌        | 7593/50000 [11:12<51:27, 13.74it/s]

Epochs: 7591 | epoch avg. loss: 6.285 | test avg. loss: 14.109
Epochs: 7592 | epoch avg. loss: 6.760 | test avg. loss: 16.361
Epochs: 7593 | epoch avg. loss: 5.751 | test avg. loss: 13.827


 15%|█▌        | 7597/50000 [11:12<50:43, 13.93it/s]

Epochs: 7594 | epoch avg. loss: 4.559 | test avg. loss: 12.172
Epochs: 7595 | epoch avg. loss: 5.709 | test avg. loss: 12.134
Epochs: 7596 | epoch avg. loss: 4.242 | test avg. loss: 12.082
Epochs: 7597 | epoch avg. loss: 3.913 | test avg. loss: 12.795


 15%|█▌        | 7599/50000 [11:12<48:53, 14.46it/s]

Epochs: 7598 | epoch avg. loss: 3.926 | test avg. loss: 11.670
Epochs: 7599 | epoch avg. loss: 3.678 | test avg. loss: 11.495


 15%|█▌        | 7603/50000 [11:13<2:21:46,  4.98it/s]

Epochs: 7600 | epoch avg. loss: 3.528 | test avg. loss: 12.418
Epochs: 7601 | epoch avg. loss: 3.484 | test avg. loss: 12.377
Epochs: 7602 | epoch avg. loss: 3.410 | test avg. loss: 11.600


 15%|█▌        | 7605/50000 [11:14<1:56:08,  6.08it/s]

Epochs: 7603 | epoch avg. loss: 3.076 | test avg. loss: 11.601
Epochs: 7604 | epoch avg. loss: 2.943 | test avg. loss: 11.500
Epochs: 7605 | epoch avg. loss: 2.852 | test avg. loss: 11.495


                                                      

Epochs: 7606 | epoch avg. loss: 2.766 | test avg. loss: 11.700
Epochs: 7607 | epoch avg. loss: 2.691 | test avg. loss: 11.545
Epochs: 7608 | epoch avg. loss: 2.558 | test avg. loss: 11.409


 15%|█▌        | 7613/50000 [11:14<1:03:39, 11.10it/s]

Epochs: 7609 | epoch avg. loss: 2.579 | test avg. loss: 11.819
Epochs: 7610 | epoch avg. loss: 2.429 | test avg. loss: 12.032
Epochs: 7611 | epoch avg. loss: 2.376 | test avg. loss: 11.203
Epochs: 7612 | epoch avg. loss: 2.887 | test avg. loss: 11.610


 15%|█▌        | 7615/50000 [11:14<58:33, 12.06it/s]

Epochs: 7613 | epoch avg. loss: 2.213 | test avg. loss: 13.200
Epochs: 7614 | epoch avg. loss: 2.825 | test avg. loss: 10.956
Epochs: 7615 | epoch avg. loss: 2.471 | test avg. loss: 10.962


 15%|█▌        | 7619/50000 [11:15<1:00:19, 11.71it/s]

Epochs: 7616 | epoch avg. loss: 2.227 | test avg. loss: 12.105
Epochs: 7617 | epoch avg. loss: 2.303 | test avg. loss: 10.819
Epochs: 7618 | epoch avg. loss: 2.126 | test avg. loss: 10.993


 15%|█▌        | 7621/50000 [11:15<1:00:33, 11.66it/s]

Epochs: 7619 | epoch avg. loss: 1.843 | test avg. loss: 11.897
Epochs: 7620 | epoch avg. loss: 2.184 | test avg. loss: 10.793
Epochs: 7621 | epoch avg. loss: 1.781 | test avg. loss: 10.552


 15%|█▌        | 7625/50000 [11:15<57:07, 12.36it/s]

Epochs: 7622 | epoch avg. loss: 1.950 | test avg. loss: 11.166
Epochs: 7623 | epoch avg. loss: 2.177 | test avg. loss: 10.808
Epochs: 7624 | epoch avg. loss: 1.761 | test avg. loss: 10.390


 15%|█▌        | 7627/50000 [11:15<55:24, 12.75it/s]

Epochs: 7625 | epoch avg. loss: 2.157 | test avg. loss: 11.087
Epochs: 7626 | epoch avg. loss: 2.047 | test avg. loss: 10.502
Epochs: 7627 | epoch avg. loss: 1.830 | test avg. loss: 10.089


 15%|█▌        | 7631/50000 [11:16<57:48, 12.22it/s]

Epochs: 7628 | epoch avg. loss: 1.822 | test avg. loss: 11.396
Epochs: 7629 | epoch avg. loss: 1.997 | test avg. loss: 10.410
Epochs: 7630 | epoch avg. loss: 1.594 | test avg. loss: 9.912


 15%|█▌        | 7633/50000 [11:16<56:46, 12.44it/s]

Epochs: 7631 | epoch avg. loss: 1.557 | test avg. loss: 10.508
Epochs: 7632 | epoch avg. loss: 1.645 | test avg. loss: 10.105
Epochs: 7633 | epoch avg. loss: 1.604 | test avg. loss: 9.804


 15%|█▌        | 7637/50000 [11:16<52:45, 13.38it/s]

Epochs: 7634 | epoch avg. loss: 1.454 | test avg. loss: 10.511
Epochs: 7635 | epoch avg. loss: 1.569 | test avg. loss: 9.755
Epochs: 7636 | epoch avg. loss: 1.446 | test avg. loss: 9.647
Epochs: 7637 | epoch avg. loss: 1.436 | test avg. loss: 10.533


 15%|█▌        | 7641/50000 [11:16<48:54, 14.43it/s]

Epochs: 7638 | epoch avg. loss: 1.612 | test avg. loss: 9.743
Epochs: 7639 | epoch avg. loss: 1.312 | test avg. loss: 9.516
Epochs: 7640 | epoch avg. loss: 1.441 | test avg. loss: 10.446
Epochs: 7641 | epoch avg. loss: 1.843 | test avg. loss: 9.691


 15%|█▌        | 7645/50000 [11:17<53:04, 13.30it/s]

Epochs: 7642 | epoch avg. loss: 1.241 | test avg. loss: 9.455
Epochs: 7643 | epoch avg. loss: 1.923 | test avg. loss: 10.169
Epochs: 7644 | epoch avg. loss: 1.399 | test avg. loss: 9.398


 15%|█▌        | 7647/50000 [11:17<52:22, 13.48it/s]

Epochs: 7645 | epoch avg. loss: 1.173 | test avg. loss: 9.244
Epochs: 7646 | epoch avg. loss: 1.384 | test avg. loss: 9.757
Epochs: 7647 | epoch avg. loss: 1.199 | test avg. loss: 9.332


 15%|█▌        | 7651/50000 [11:17<52:19, 13.49it/s]

Epochs: 7648 | epoch avg. loss: 1.085 | test avg. loss: 9.168
Epochs: 7649 | epoch avg. loss: 1.206 | test avg. loss: 9.244
Epochs: 7650 | epoch avg. loss: 1.093 | test avg. loss: 9.226


 15%|█▌        | 7655/50000 [11:17<48:44, 14.48it/s]

Epochs: 7651 | epoch avg. loss: 1.038 | test avg. loss: 9.442
Epochs: 7652 | epoch avg. loss: 1.041 | test avg. loss: 9.027
Epochs: 7653 | epoch avg. loss: 1.141 | test avg. loss: 9.238
Epochs: 7654 | epoch avg. loss: 1.072 | test avg. loss: 9.491


 15%|█▌        | 7657/50000 [11:18<52:24, 13.46it/s]

Epochs: 7655 | epoch avg. loss: 1.064 | test avg. loss: 8.954
Epochs: 7656 | epoch avg. loss: 1.194 | test avg. loss: 9.383
Epochs: 7657 | epoch avg. loss: 1.178 | test avg. loss: 9.589


 15%|█▌        | 7661/50000 [11:18<51:02, 13.82it/s]

Epochs: 7658 | epoch avg. loss: 1.132 | test avg. loss: 8.900
Epochs: 7659 | epoch avg. loss: 1.134 | test avg. loss: 9.224
Epochs: 7660 | epoch avg. loss: 0.968 | test avg. loss: 9.509
Epochs: 7661 | epoch avg. loss: 1.070 | test avg. loss: 8.914


 15%|█▌        | 7665/50000 [11:18<52:31, 13.44it/s]

Epochs: 7662 | epoch avg. loss: 0.877 | test avg. loss: 8.857
Epochs: 7663 | epoch avg. loss: 1.059 | test avg. loss: 9.613
Epochs: 7664 | epoch avg. loss: 1.136 | test avg. loss: 9.619


 15%|█▌        | 7667/50000 [11:18<52:35, 13.41it/s]

Epochs: 7665 | epoch avg. loss: 1.058 | test avg. loss: 8.829
Epochs: 7666 | epoch avg. loss: 1.065 | test avg. loss: 8.944
Epochs: 7667 | epoch avg. loss: 0.848 | test avg. loss: 9.115


 15%|█▌        | 7671/50000 [11:19<55:55, 12.62it/s]

Epochs: 7668 | epoch avg. loss: 0.880 | test avg. loss: 8.728
Epochs: 7669 | epoch avg. loss: 0.833 | test avg. loss: 8.870
Epochs: 7670 | epoch avg. loss: 0.873 | test avg. loss: 8.687


 15%|█▌        | 7673/50000 [11:19<57:34, 12.25it/s]

Epochs: 7671 | epoch avg. loss: 0.874 | test avg. loss: 8.644
Epochs: 7672 | epoch avg. loss: 0.841 | test avg. loss: 9.041
Epochs: 7673 | epoch avg. loss: 0.808 | test avg. loss: 8.508


 15%|█▌        | 7677/50000 [11:19<1:00:41, 11.62it/s]

Epochs: 7674 | epoch avg. loss: 0.846 | test avg. loss: 8.629
Epochs: 7675 | epoch avg. loss: 0.735 | test avg. loss: 9.032
Epochs: 7676 | epoch avg. loss: 0.849 | test avg. loss: 8.542


 15%|█▌        | 7679/50000 [11:19<1:00:14, 11.71it/s]

Epochs: 7677 | epoch avg. loss: 0.751 | test avg. loss: 8.657
Epochs: 7678 | epoch avg. loss: 0.688 | test avg. loss: 8.805
Epochs: 7679 | epoch avg. loss: 0.729 | test avg. loss: 8.608


 15%|█▌        | 7683/50000 [11:20<57:21, 12.30it/s]

Epochs: 7680 | epoch avg. loss: 0.728 | test avg. loss: 8.603
Epochs: 7681 | epoch avg. loss: 0.708 | test avg. loss: 8.965
Epochs: 7682 | epoch avg. loss: 0.754 | test avg. loss: 8.709


 15%|█▌        | 7685/50000 [11:20<57:50, 12.19it/s]

Epochs: 7683 | epoch avg. loss: 0.649 | test avg. loss: 8.602
Epochs: 7684 | epoch avg. loss: 0.687 | test avg. loss: 8.661
Epochs: 7685 | epoch avg. loss: 0.693 | test avg. loss: 8.658


 15%|█▌        | 7689/50000 [11:20<1:01:04, 11.55it/s]

Epochs: 7686 | epoch avg. loss: 0.598 | test avg. loss: 8.419
Epochs: 7687 | epoch avg. loss: 0.808 | test avg. loss: 8.636
Epochs: 7688 | epoch avg. loss: 0.659 | test avg. loss: 8.920


 15%|█▌        | 7691/50000 [11:20<1:00:14, 11.71it/s]

Epochs: 7689 | epoch avg. loss: 0.798 | test avg. loss: 8.432
Epochs: 7690 | epoch avg. loss: 0.649 | test avg. loss: 8.694
Epochs: 7691 | epoch avg. loss: 0.628 | test avg. loss: 8.623


 15%|█▌        | 7695/50000 [11:21<57:28, 12.27it/s]

Epochs: 7692 | epoch avg. loss: 0.614 | test avg. loss: 8.434
Epochs: 7693 | epoch avg. loss: 0.615 | test avg. loss: 8.575
Epochs: 7694 | epoch avg. loss: 0.573 | test avg. loss: 8.498


 15%|█▌        | 7697/50000 [11:21<54:07, 13.03it/s]

Epochs: 7695 | epoch avg. loss: 0.572 | test avg. loss: 8.555
Epochs: 7696 | epoch avg. loss: 0.554 | test avg. loss: 8.655
Epochs: 7697 | epoch avg. loss: 0.570 | test avg. loss: 8.485


 15%|█▌        | 7699/50000 [11:21<54:57, 12.83it/s]

Epochs: 7698 | epoch avg. loss: 0.565 | test avg. loss: 8.467
Epochs: 7699 | epoch avg. loss: 0.600 | test avg. loss: 8.533


 15%|█▌        | 7702/50000 [11:23<3:06:32,  3.78it/s]

Epochs: 7700 | epoch avg. loss: 0.527 | test avg. loss: 8.357
Epochs: 7701 | epoch avg. loss: 0.588 | test avg. loss: 8.383
Epochs: 7702 | epoch avg. loss: 0.530 | test avg. loss: 8.423


 15%|█▌        | 7706/50000 [11:23<1:53:33,  6.21it/s]

Epochs: 7703 | epoch avg. loss: 0.568 | test avg. loss: 8.279
Epochs: 7704 | epoch avg. loss: 0.670 | test avg. loss: 8.353
Epochs: 7705 | epoch avg. loss: 0.542 | test avg. loss: 8.781


 15%|█▌        | 7708/50000 [11:23<1:36:23,  7.31it/s]

Epochs: 7706 | epoch avg. loss: 0.703 | test avg. loss: 8.270
Epochs: 7707 | epoch avg. loss: 0.624 | test avg. loss: 8.325
Epochs: 7708 | epoch avg. loss: 0.522 | test avg. loss: 8.332


 15%|█▌        | 7712/50000 [11:23<1:11:28,  9.86it/s]

Epochs: 7709 | epoch avg. loss: 0.505 | test avg. loss: 8.435
Epochs: 7710 | epoch avg. loss: 0.516 | test avg. loss: 8.224
Epochs: 7711 | epoch avg. loss: 0.549 | test avg. loss: 8.237


 15%|█▌        | 7714/50000 [11:24<1:11:08,  9.91it/s]

Epochs: 7712 | epoch avg. loss: 0.491 | test avg. loss: 8.569
Epochs: 7713 | epoch avg. loss: 0.563 | test avg. loss: 8.225
Epochs: 7714 | epoch avg. loss: 0.530 | test avg. loss: 8.262


 15%|█▌        | 7718/50000 [11:24<1:01:05, 11.53it/s]

Epochs: 7715 | epoch avg. loss: 0.479 | test avg. loss: 8.497
Epochs: 7716 | epoch avg. loss: 0.495 | test avg. loss: 8.226
Epochs: 7717 | epoch avg. loss: 0.528 | test avg. loss: 8.253




Epochs: 7718 | epoch avg. loss: 0.467 | test avg. loss: 8.495
Epochs: 7719 | epoch avg. loss: 0.513 | test avg. loss: 8.153


 15%|█▌        | 7724/50000 [11:24<1:02:06, 11.34it/s]

Epochs: 7720 | epoch avg. loss: 0.521 | test avg. loss: 8.171
Epochs: 7721 | epoch avg. loss: 0.446 | test avg. loss: 8.435
Epochs: 7722 | epoch avg. loss: 0.517 | test avg. loss: 8.108
Epochs: 7723 | epoch avg. loss: 0.503 | test avg. loss: 8.196


 15%|█▌        | 7726/50000 [11:25<1:01:13, 11.51it/s]

Epochs: 7724 | epoch avg. loss: 0.545 | test avg. loss: 8.299
Epochs: 7725 | epoch avg. loss: 0.493 | test avg. loss: 8.158
Epochs: 7726 | epoch avg. loss: 0.513 | test avg. loss: 8.419


 15%|█▌        | 7730/50000 [11:25<1:02:55, 11.20it/s]

Epochs: 7727 | epoch avg. loss: 0.488 | test avg. loss: 8.248
Epochs: 7728 | epoch avg. loss: 0.430 | test avg. loss: 8.181
Epochs: 7729 | epoch avg. loss: 0.451 | test avg. loss: 8.429


 15%|█▌        | 7732/50000 [11:25<1:02:17, 11.31it/s]

Epochs: 7730 | epoch avg. loss: 0.481 | test avg. loss: 8.325
Epochs: 7731 | epoch avg. loss: 0.476 | test avg. loss: 8.210
Epochs: 7732 | epoch avg. loss: 0.490 | test avg. loss: 8.341


 15%|█▌        | 7736/50000 [11:25<57:26, 12.26it/s]

Epochs: 7733 | epoch avg. loss: 0.449 | test avg. loss: 8.247
Epochs: 7734 | epoch avg. loss: 0.419 | test avg. loss: 8.327
Epochs: 7735 | epoch avg. loss: 0.413 | test avg. loss: 8.251


 15%|█▌        | 7738/50000 [11:26<57:04, 12.34it/s]

Epochs: 7736 | epoch avg. loss: 0.425 | test avg. loss: 8.238
Epochs: 7737 | epoch avg. loss: 0.415 | test avg. loss: 8.276
Epochs: 7738 | epoch avg. loss: 0.450 | test avg. loss: 8.259


 15%|█▌        | 7742/50000 [11:26<55:56, 12.59it/s]

Epochs: 7739 | epoch avg. loss: 0.455 | test avg. loss: 8.190
Epochs: 7740 | epoch avg. loss: 0.417 | test avg. loss: 8.402
Epochs: 7741 | epoch avg. loss: 0.433 | test avg. loss: 8.140


 15%|█▌        | 7746/50000 [11:26<50:37, 13.91it/s]

Epochs: 7742 | epoch avg. loss: 0.449 | test avg. loss: 8.188
Epochs: 7743 | epoch avg. loss: 0.383 | test avg. loss: 8.401
Epochs: 7744 | epoch avg. loss: 0.453 | test avg. loss: 8.144
Epochs: 7745 | epoch avg. loss: 0.422 | test avg. loss: 8.145


 15%|█▌        | 7748/50000 [11:26<51:32, 13.66it/s]

Epochs: 7746 | epoch avg. loss: 0.382 | test avg. loss: 8.332
Epochs: 7747 | epoch avg. loss: 0.427 | test avg. loss: 8.088
Epochs: 7748 | epoch avg. loss: 0.419 | test avg. loss: 8.094


 16%|█▌        | 7752/50000 [11:27<52:54, 13.31it/s]

Epochs: 7749 | epoch avg. loss: 0.404 | test avg. loss: 8.317
Epochs: 7750 | epoch avg. loss: 0.408 | test avg. loss: 8.158
Epochs: 7751 | epoch avg. loss: 0.396 | test avg. loss: 8.195


 16%|█▌        | 7754/50000 [11:27<56:42, 12.42it/s]

Epochs: 7752 | epoch avg. loss: 0.388 | test avg. loss: 8.303
Epochs: 7753 | epoch avg. loss: 0.385 | test avg. loss: 8.135
Epochs: 7754 | epoch avg. loss: 0.404 | test avg. loss: 8.182


 16%|█▌        | 7758/50000 [11:27<1:00:20, 11.67it/s]

Epochs: 7755 | epoch avg. loss: 0.394 | test avg. loss: 8.200
Epochs: 7756 | epoch avg. loss: 0.367 | test avg. loss: 8.079
Epochs: 7757 | epoch avg. loss: 0.441 | test avg. loss: 8.185


 16%|█▌        | 7760/50000 [11:27<1:00:09, 11.70it/s]

Epochs: 7758 | epoch avg. loss: 0.364 | test avg. loss: 8.340
Epochs: 7759 | epoch avg. loss: 0.393 | test avg. loss: 8.198
Epochs: 7760 | epoch avg. loss: 0.395 | test avg. loss: 8.223


 16%|█▌        | 7764/50000 [11:28<59:24, 11.85it/s]  

Epochs: 7761 | epoch avg. loss: 0.362 | test avg. loss: 8.370
Epochs: 7762 | epoch avg. loss: 0.400 | test avg. loss: 8.139
Epochs: 7763 | epoch avg. loss: 0.405 | test avg. loss: 8.202


 16%|█▌        | 7766/50000 [11:28<1:03:23, 11.10it/s]

Epochs: 7764 | epoch avg. loss: 0.400 | test avg. loss: 8.229
Epochs: 7765 | epoch avg. loss: 0.398 | test avg. loss: 8.122
Epochs: 7766 | epoch avg. loss: 0.362 | test avg. loss: 8.295


 16%|█▌        | 7770/50000 [11:28<57:29, 12.24it/s]

Epochs: 7767 | epoch avg. loss: 0.397 | test avg. loss: 8.160
Epochs: 7768 | epoch avg. loss: 0.376 | test avg. loss: 8.150
Epochs: 7769 | epoch avg. loss: 0.354 | test avg. loss: 8.251


 16%|█▌        | 7772/50000 [11:28<55:31, 12.68it/s]

Epochs: 7770 | epoch avg. loss: 0.373 | test avg. loss: 8.157
Epochs: 7771 | epoch avg. loss: 0.358 | test avg. loss: 8.242
Epochs: 7772 | epoch avg. loss: 0.360 | test avg. loss: 8.246


 16%|█▌        | 7776/50000 [11:29<55:22, 12.71it/s]

Epochs: 7773 | epoch avg. loss: 0.353 | test avg. loss: 8.145
Epochs: 7774 | epoch avg. loss: 0.363 | test avg. loss: 8.200
Epochs: 7775 | epoch avg. loss: 0.354 | test avg. loss: 8.134


 16%|█▌        | 7778/50000 [11:29<55:02, 12.78it/s]

Epochs: 7776 | epoch avg. loss: 0.366 | test avg. loss: 8.145
Epochs: 7777 | epoch avg. loss: 0.344 | test avg. loss: 8.237
Epochs: 7778 | epoch avg. loss: 0.380 | test avg. loss: 8.134


                                                    

Epochs: 7779 | epoch avg. loss: 0.346 | test avg. loss: 8.176
Epochs: 7780 | epoch avg. loss: 0.357 | test avg. loss: 8.219
Epochs: 7781 | epoch avg. loss: 0.348 | test avg. loss: 8.221


 16%|█▌        | 7786/50000 [11:29<48:28, 14.51it/s]

Epochs: 7782 | epoch avg. loss: 0.345 | test avg. loss: 8.121
Epochs: 7783 | epoch avg. loss: 0.371 | test avg. loss: 8.158
Epochs: 7784 | epoch avg. loss: 0.393 | test avg. loss: 8.169
Epochs: 7785 | epoch avg. loss: 0.416 | test avg. loss: 8.166


 16%|█▌        | 7788/50000 [11:30<51:23, 13.69it/s]

Epochs: 7786 | epoch avg. loss: 0.432 | test avg. loss: 8.233
Epochs: 7787 | epoch avg. loss: 0.349 | test avg. loss: 8.023
Epochs: 7788 | epoch avg. loss: 0.448 | test avg. loss: 8.117


 16%|█▌        | 7792/50000 [11:30<55:00, 12.79it/s]

Epochs: 7789 | epoch avg. loss: 0.382 | test avg. loss: 8.067
Epochs: 7790 | epoch avg. loss: 0.427 | test avg. loss: 8.120
Epochs: 7791 | epoch avg. loss: 0.393 | test avg. loss: 8.247


 16%|█▌        | 7794/50000 [11:30<52:49, 13.32it/s]

Epochs: 7792 | epoch avg. loss: 0.348 | test avg. loss: 8.069
Epochs: 7793 | epoch avg. loss: 0.428 | test avg. loss: 8.258
Epochs: 7794 | epoch avg. loss: 0.384 | test avg. loss: 8.188


 16%|█▌        | 7798/50000 [11:30<51:19, 13.70it/s]

Epochs: 7795 | epoch avg. loss: 0.324 | test avg. loss: 8.046
Epochs: 7796 | epoch avg. loss: 0.405 | test avg. loss: 8.247
Epochs: 7797 | epoch avg. loss: 0.369 | test avg. loss: 8.267


 16%|█▌        | 7798/50000 [11:30<51:19, 13.70it/s]

Epochs: 7798 | epoch avg. loss: 0.346 | test avg. loss: 8.149
Epochs: 7799 | epoch avg. loss: 0.363 | test avg. loss: 8.330


 16%|█▌        | 7802/50000 [11:32<2:31:00,  4.66it/s]

Epochs: 7800 | epoch avg. loss: 0.432 | test avg. loss: 8.216
Epochs: 7801 | epoch avg. loss: 0.351 | test avg. loss: 8.121
Epochs: 7802 | epoch avg. loss: 0.388 | test avg. loss: 8.376


 16%|█▌        | 7806/50000 [11:32<1:39:27,  7.07it/s]

Epochs: 7803 | epoch avg. loss: 0.470 | test avg. loss: 8.029
Epochs: 7804 | epoch avg. loss: 0.511 | test avg. loss: 7.987
Epochs: 7805 | epoch avg. loss: 0.351 | test avg. loss: 8.454


 16%|█▌        | 7808/50000 [11:32<1:25:40,  8.21it/s]

Epochs: 7806 | epoch avg. loss: 0.480 | test avg. loss: 8.007
Epochs: 7807 | epoch avg. loss: 0.452 | test avg. loss: 8.080
Epochs: 7808 | epoch avg. loss: 0.326 | test avg. loss: 8.274


 16%|█▌        | 7812/50000 [11:33<1:10:42,  9.95it/s]

Epochs: 7809 | epoch avg. loss: 0.358 | test avg. loss: 8.133
Epochs: 7810 | epoch avg. loss: 0.340 | test avg. loss: 8.126
Epochs: 7811 | epoch avg. loss: 0.324 | test avg. loss: 8.109


 16%|█▌        | 7814/50000 [11:33<1:07:03, 10.49it/s]

Epochs: 7812 | epoch avg. loss: 0.328 | test avg. loss: 8.182
Epochs: 7813 | epoch avg. loss: 0.350 | test avg. loss: 8.159
Epochs: 7814 | epoch avg. loss: 0.375 | test avg. loss: 8.133




Epochs: 7815 | epoch avg. loss: 0.337 | test avg. loss: 8.192
Epochs: 7816 | epoch avg. loss: 0.338 | test avg. loss: 8.087
Epochs: 7817 | epoch avg. loss: 0.326 | test avg. loss: 8.078


 16%|█▌        | 7820/50000 [11:33<55:10, 12.74it/s]

Epochs: 7818 | epoch avg. loss: 0.396 | test avg. loss: 8.069
Epochs: 7819 | epoch avg. loss: 0.331 | test avg. loss: 8.043
Epochs: 7820 | epoch avg. loss: 0.340 | test avg. loss: 8.237


 16%|█▌        | 7822/50000 [11:33<55:58, 12.56it/s]

Epochs: 7821 | epoch avg. loss: 0.360 | test avg. loss: 8.084
Epochs: 7822 | epoch avg. loss: 0.320 | test avg. loss: 8.092


 16%|█▌        | 7826/50000 [11:34<57:41, 12.19it/s]  

Epochs: 7823 | epoch avg. loss: 0.313 | test avg. loss: 8.134
Epochs: 7824 | epoch avg. loss: 0.311 | test avg. loss: 8.159
Epochs: 7825 | epoch avg. loss: 0.314 | test avg. loss: 8.118


 16%|█▌        | 7830/50000 [11:34<51:52, 13.55it/s]

Epochs: 7826 | epoch avg. loss: 0.331 | test avg. loss: 8.156
Epochs: 7827 | epoch avg. loss: 0.312 | test avg. loss: 8.207
Epochs: 7828 | epoch avg. loss: 0.346 | test avg. loss: 8.032
Epochs: 7829 | epoch avg. loss: 0.316 | test avg. loss: 8.090


 16%|█▌        | 7832/50000 [11:34<50:07, 14.02it/s]

Epochs: 7830 | epoch avg. loss: 0.320 | test avg. loss: 8.071
Epochs: 7831 | epoch avg. loss: 0.311 | test avg. loss: 8.044
Epochs: 7832 | epoch avg. loss: 0.331 | test avg. loss: 8.262


 16%|█▌        | 7836/50000 [11:34<53:59, 13.01it/s]

Epochs: 7833 | epoch avg. loss: 0.447 | test avg. loss: 8.147
Epochs: 7834 | epoch avg. loss: 0.399 | test avg. loss: 8.140
Epochs: 7835 | epoch avg. loss: 0.468 | test avg. loss: 8.359


 16%|█▌        | 7838/50000 [11:35<52:47, 13.31it/s]

Epochs: 7836 | epoch avg. loss: 0.387 | test avg. loss: 8.132
Epochs: 7837 | epoch avg. loss: 0.355 | test avg. loss: 8.275
Epochs: 7838 | epoch avg. loss: 0.337 | test avg. loss: 8.110
Epochs: 7839 | epoch avg. loss: 0.309 | test avg. loss: 8.094


 16%|█▌        | 7842/50000 [11:35<52:16, 13.44it/s]

Epochs: 7840 | epoch avg. loss: 0.303 | test avg. loss: 8.129
Epochs: 7841 | epoch avg. loss: 0.311 | test avg. loss: 8.071
Epochs: 7842 | epoch avg. loss: 0.314 | test avg. loss: 8.035


 16%|█▌        | 7846/50000 [11:35<53:36, 13.11it/s]

Epochs: 7843 | epoch avg. loss: 0.352 | test avg. loss: 8.102
Epochs: 7844 | epoch avg. loss: 0.371 | test avg. loss: 8.070
Epochs: 7845 | epoch avg. loss: 0.318 | test avg. loss: 7.992


 16%|█▌        | 7848/50000 [11:35<53:57, 13.02it/s]

Epochs: 7846 | epoch avg. loss: 0.371 | test avg. loss: 8.189
Epochs: 7847 | epoch avg. loss: 0.326 | test avg. loss: 8.041
Epochs: 7848 | epoch avg. loss: 0.317 | test avg. loss: 8.064


 16%|█▌        | 7852/50000 [11:36<55:04, 12.76it/s]

Epochs: 7849 | epoch avg. loss: 0.302 | test avg. loss: 8.087
Epochs: 7850 | epoch avg. loss: 0.323 | test avg. loss: 8.055
Epochs: 7851 | epoch avg. loss: 0.296 | test avg. loss: 8.201


 16%|█▌        | 7854/50000 [11:36<55:05, 12.75it/s]

Epochs: 7852 | epoch avg. loss: 0.338 | test avg. loss: 8.051
Epochs: 7853 | epoch avg. loss: 0.295 | test avg. loss: 8.038
Epochs: 7854 | epoch avg. loss: 0.295 | test avg. loss: 8.132


 16%|█▌        | 7858/50000 [11:36<54:25, 12.90it/s]

Epochs: 7855 | epoch avg. loss: 0.309 | test avg. loss: 8.083
Epochs: 7856 | epoch avg. loss: 0.295 | test avg. loss: 8.086
Epochs: 7857 | epoch avg. loss: 0.299 | test avg. loss: 8.154


 16%|█▌        | 7860/50000 [11:36<53:57, 13.02it/s]

Epochs: 7858 | epoch avg. loss: 0.297 | test avg. loss: 8.195
Epochs: 7859 | epoch avg. loss: 0.288 | test avg. loss: 8.113
Epochs: 7860 | epoch avg. loss: 0.308 | test avg. loss: 8.157


 16%|█▌        | 7864/50000 [11:37<55:02, 12.76it/s]

Epochs: 7861 | epoch avg. loss: 0.297 | test avg. loss: 8.199
Epochs: 7862 | epoch avg. loss: 0.306 | test avg. loss: 8.078
Epochs: 7863 | epoch avg. loss: 0.287 | test avg. loss: 8.027


 16%|█▌        | 7866/50000 [11:37<52:28, 13.38it/s]

Epochs: 7864 | epoch avg. loss: 0.310 | test avg. loss: 8.218
Epochs: 7865 | epoch avg. loss: 0.348 | test avg. loss: 8.112
Epochs: 7866 | epoch avg. loss: 0.308 | test avg. loss: 8.092


 16%|█▌        | 7870/50000 [11:37<51:19, 13.68it/s]

Epochs: 7867 | epoch avg. loss: 0.312 | test avg. loss: 8.300
Epochs: 7868 | epoch avg. loss: 0.344 | test avg. loss: 8.139
Epochs: 7869 | epoch avg. loss: 0.312 | test avg. loss: 8.233


                                                    

Epochs: 7870 | epoch avg. loss: 0.320 | test avg. loss: 8.147
Epochs: 7871 | epoch avg. loss: 0.318 | test avg. loss: 7.993
Epochs: 7872 | epoch avg. loss: 0.367 | test avg. loss: 8.007


 16%|█▌        | 7876/50000 [11:37<54:42, 12.83it/s]

Epochs: 7873 | epoch avg. loss: 0.276 | test avg. loss: 7.972
Epochs: 7874 | epoch avg. loss: 0.348 | test avg. loss: 8.166
Epochs: 7875 | epoch avg. loss: 0.333 | test avg. loss: 8.140


 16%|█▌        | 7878/50000 [11:38<58:11, 12.07it/s]

Epochs: 7876 | epoch avg. loss: 0.310 | test avg. loss: 8.065
Epochs: 7877 | epoch avg. loss: 0.294 | test avg. loss: 8.274
Epochs: 7878 | epoch avg. loss: 0.332 | test avg. loss: 8.060


 16%|█▌        | 7882/50000 [11:38<56:33, 12.41it/s]

Epochs: 7879 | epoch avg. loss: 0.309 | test avg. loss: 8.057
Epochs: 7880 | epoch avg. loss: 0.290 | test avg. loss: 8.283
Epochs: 7881 | epoch avg. loss: 0.386 | test avg. loss: 8.071


 16%|█▌        | 7884/50000 [11:38<57:31, 12.20it/s]

Epochs: 7882 | epoch avg. loss: 0.416 | test avg. loss: 8.095
Epochs: 7883 | epoch avg. loss: 0.388 | test avg. loss: 8.312
Epochs: 7884 | epoch avg. loss: 0.377 | test avg. loss: 8.059


 16%|█▌        | 7888/50000 [11:38<1:01:27, 11.42it/s]

Epochs: 7885 | epoch avg. loss: 0.414 | test avg. loss: 8.123
Epochs: 7886 | epoch avg. loss: 0.312 | test avg. loss: 8.010
Epochs: 7887 | epoch avg. loss: 0.288 | test avg. loss: 8.017


 16%|█▌        | 7892/50000 [11:39<55:39, 12.61it/s]  

Epochs: 7888 | epoch avg. loss: 0.287 | test avg. loss: 8.204
Epochs: 7889 | epoch avg. loss: 0.323 | test avg. loss: 8.087
Epochs: 7890 | epoch avg. loss: 0.293 | test avg. loss: 8.092
Epochs: 7891 | epoch avg. loss: 0.289 | test avg. loss: 8.170


 16%|█▌        | 7894/50000 [11:39<55:19, 12.68it/s]

Epochs: 7892 | epoch avg. loss: 0.286 | test avg. loss: 8.119
Epochs: 7893 | epoch avg. loss: 0.287 | test avg. loss: 8.088
Epochs: 7894 | epoch avg. loss: 0.310 | test avg. loss: 8.103


 16%|█▌        | 7898/50000 [11:39<57:52, 12.13it/s]

Epochs: 7895 | epoch avg. loss: 0.283 | test avg. loss: 8.125
Epochs: 7896 | epoch avg. loss: 0.325 | test avg. loss: 8.179
Epochs: 7897 | epoch avg. loss: 0.310 | test avg. loss: 8.153


 16%|█▌        | 7898/50000 [11:39<57:52, 12.13it/s]

Epochs: 7898 | epoch avg. loss: 0.275 | test avg. loss: 8.262
Epochs: 7899 | epoch avg. loss: 0.309 | test avg. loss: 8.058


 16%|█▌        | 7902/50000 [11:41<2:47:59,  4.18it/s]

Epochs: 7900 | epoch avg. loss: 0.292 | test avg. loss: 8.120
Epochs: 7901 | epoch avg. loss: 0.279 | test avg. loss: 8.130
Epochs: 7902 | epoch avg. loss: 0.287 | test avg. loss: 8.088


 16%|█▌        | 7906/50000 [11:41<1:53:56,  6.16it/s]

Epochs: 7903 | epoch avg. loss: 0.271 | test avg. loss: 8.135
Epochs: 7904 | epoch avg. loss: 0.271 | test avg. loss: 8.125
Epochs: 7905 | epoch avg. loss: 0.271 | test avg. loss: 8.168


 16%|█▌        | 7908/50000 [11:42<1:35:32,  7.34it/s]

Epochs: 7906 | epoch avg. loss: 0.289 | test avg. loss: 8.107
Epochs: 7907 | epoch avg. loss: 0.324 | test avg. loss: 8.163
Epochs: 7908 | epoch avg. loss: 0.266 | test avg. loss: 8.310


 16%|█▌        | 7912/50000 [11:42<1:15:40,  9.27it/s]

Epochs: 7909 | epoch avg. loss: 0.312 | test avg. loss: 8.050
Epochs: 7910 | epoch avg. loss: 0.331 | test avg. loss: 8.045
Epochs: 7911 | epoch avg. loss: 0.274 | test avg. loss: 8.169


 16%|█▌        | 7914/50000 [11:42<1:07:29, 10.39it/s]

Epochs: 7912 | epoch avg. loss: 0.371 | test avg. loss: 8.008
Epochs: 7913 | epoch avg. loss: 0.420 | test avg. loss: 8.050
Epochs: 7914 | epoch avg. loss: 0.405 | test avg. loss: 8.258


 16%|█▌        | 7918/50000 [11:42<58:59, 11.89it/s]  

Epochs: 7915 | epoch avg. loss: 0.406 | test avg. loss: 8.012
Epochs: 7916 | epoch avg. loss: 0.383 | test avg. loss: 8.298
Epochs: 7917 | epoch avg. loss: 0.381 | test avg. loss: 7.982


 16%|█▌        | 7920/50000 [11:43<57:24, 12.22it/s]

Epochs: 7918 | epoch avg. loss: 0.360 | test avg. loss: 8.054
Epochs: 7919 | epoch avg. loss: 0.307 | test avg. loss: 8.284
Epochs: 7920 | epoch avg. loss: 0.317 | test avg. loss: 8.087


 16%|█▌        | 7924/50000 [11:43<56:50, 12.34it/s]

Epochs: 7921 | epoch avg. loss: 0.338 | test avg. loss: 8.362
Epochs: 7922 | epoch avg. loss: 0.361 | test avg. loss: 8.176
Epochs: 7923 | epoch avg. loss: 0.281 | test avg. loss: 8.133


 16%|█▌        | 7926/50000 [11:43<56:37, 12.38it/s]

Epochs: 7924 | epoch avg. loss: 0.365 | test avg. loss: 8.186
Epochs: 7925 | epoch avg. loss: 0.325 | test avg. loss: 8.023
Epochs: 7926 | epoch avg. loss: 0.295 | test avg. loss: 8.193


 16%|█▌        | 7930/50000 [11:43<56:52, 12.33it/s]

Epochs: 7927 | epoch avg. loss: 0.276 | test avg. loss: 8.084
Epochs: 7928 | epoch avg. loss: 0.334 | test avg. loss: 8.255
Epochs: 7929 | epoch avg. loss: 0.318 | test avg. loss: 8.224


 16%|█▌        | 7932/50000 [11:44<1:01:34, 11.39it/s]

Epochs: 7930 | epoch avg. loss: 0.241 | test avg. loss: 8.130
Epochs: 7931 | epoch avg. loss: 0.437 | test avg. loss: 8.193
Epochs: 7932 | epoch avg. loss: 0.297 | test avg. loss: 8.127


 16%|█▌        | 7936/50000 [11:44<58:42, 11.94it/s]

Epochs: 7933 | epoch avg. loss: 0.308 | test avg. loss: 8.099
Epochs: 7934 | epoch avg. loss: 0.270 | test avg. loss: 8.382
Epochs: 7935 | epoch avg. loss: 0.335 | test avg. loss: 8.152


 16%|█▌        | 7938/50000 [11:44<57:13, 12.25it/s]

Epochs: 7936 | epoch avg. loss: 0.279 | test avg. loss: 8.234
Epochs: 7937 | epoch avg. loss: 0.264 | test avg. loss: 8.175
Epochs: 7938 | epoch avg. loss: 0.283 | test avg. loss: 8.171


 16%|█▌        | 7942/50000 [11:44<55:37, 12.60it/s]

Epochs: 7939 | epoch avg. loss: 0.299 | test avg. loss: 8.144
Epochs: 7940 | epoch avg. loss: 0.243 | test avg. loss: 8.101
Epochs: 7941 | epoch avg. loss: 0.369 | test avg. loss: 8.174


 16%|█▌        | 7944/50000 [11:45<1:00:06, 11.66it/s]

Epochs: 7942 | epoch avg. loss: 0.278 | test avg. loss: 8.103
Epochs: 7943 | epoch avg. loss: 0.257 | test avg. loss: 8.064
Epochs: 7944 | epoch avg. loss: 0.258 | test avg. loss: 8.047


 16%|█▌        | 7948/50000 [11:45<58:29, 11.98it/s]

Epochs: 7945 | epoch avg. loss: 0.272 | test avg. loss: 8.030
Epochs: 7946 | epoch avg. loss: 0.249 | test avg. loss: 8.005
Epochs: 7947 | epoch avg. loss: 0.272 | test avg. loss: 8.206


 16%|█▌        | 7950/50000 [11:45<56:10, 12.47it/s]

Epochs: 7948 | epoch avg. loss: 0.293 | test avg. loss: 8.101
Epochs: 7949 | epoch avg. loss: 0.254 | test avg. loss: 8.068
Epochs: 7950 | epoch avg. loss: 0.268 | test avg. loss: 8.164


 16%|█▌        | 7954/50000 [11:45<52:59, 13.23it/s]

Epochs: 7951 | epoch avg. loss: 0.257 | test avg. loss: 8.088
Epochs: 7952 | epoch avg. loss: 0.264 | test avg. loss: 8.179
Epochs: 7953 | epoch avg. loss: 0.265 | test avg. loss: 8.103
Epochs: 7954 | epoch avg. loss: 0.253 | test avg. loss: 8.059


 16%|█▌        | 7958/50000 [11:46<57:22, 12.21it/s]

Epochs: 7955 | epoch avg. loss: 0.252 | test avg. loss: 8.244
Epochs: 7956 | epoch avg. loss: 0.311 | test avg. loss: 8.066
Epochs: 7957 | epoch avg. loss: 0.284 | test avg. loss: 8.080


 16%|█▌        | 7960/50000 [11:46<57:39, 12.15it/s]

Epochs: 7958 | epoch avg. loss: 0.270 | test avg. loss: 8.087
Epochs: 7959 | epoch avg. loss: 0.238 | test avg. loss: 8.021
Epochs: 7960 | epoch avg. loss: 0.306 | test avg. loss: 8.240


 16%|█▌        | 7964/50000 [11:46<58:41, 11.94it/s]

Epochs: 7961 | epoch avg. loss: 0.300 | test avg. loss: 8.144
Epochs: 7962 | epoch avg. loss: 0.257 | test avg. loss: 8.111
Epochs: 7963 | epoch avg. loss: 0.247 | test avg. loss: 8.187


 16%|█▌        | 7966/50000 [11:46<57:23, 12.21it/s]

Epochs: 7964 | epoch avg. loss: 0.273 | test avg. loss: 8.087
Epochs: 7965 | epoch avg. loss: 0.245 | test avg. loss: 8.070
Epochs: 7966 | epoch avg. loss: 0.249 | test avg. loss: 8.015


 16%|█▌        | 7970/50000 [11:47<57:36, 12.16it/s]

Epochs: 7967 | epoch avg. loss: 0.252 | test avg. loss: 8.078
Epochs: 7968 | epoch avg. loss: 0.258 | test avg. loss: 8.113
Epochs: 7969 | epoch avg. loss: 0.259 | test avg. loss: 8.141


 16%|█▌        | 7972/50000 [11:47<55:24, 12.64it/s]

Epochs: 7970 | epoch avg. loss: 0.261 | test avg. loss: 8.220
Epochs: 7971 | epoch avg. loss: 0.260 | test avg. loss: 8.107
Epochs: 7972 | epoch avg. loss: 0.247 | test avg. loss: 8.127


 16%|█▌        | 7976/50000 [11:47<54:56, 12.75it/s]

Epochs: 7973 | epoch avg. loss: 0.254 | test avg. loss: 8.053
Epochs: 7974 | epoch avg. loss: 0.255 | test avg. loss: 8.025
Epochs: 7975 | epoch avg. loss: 0.276 | test avg. loss: 8.112


 16%|█▌        | 7978/50000 [11:47<53:49, 13.01it/s]

Epochs: 7976 | epoch avg. loss: 0.266 | test avg. loss: 8.130
Epochs: 7977 | epoch avg. loss: 0.275 | test avg. loss: 8.081
Epochs: 7978 | epoch avg. loss: 0.262 | test avg. loss: 8.244


 16%|█▌        | 7982/50000 [11:48<55:33, 12.60it/s]

Epochs: 7979 | epoch avg. loss: 0.271 | test avg. loss: 8.042
Epochs: 7980 | epoch avg. loss: 0.401 | test avg. loss: 8.228
Epochs: 7981 | epoch avg. loss: 0.346 | test avg. loss: 8.123


 16%|█▌        | 7984/50000 [11:48<55:32, 12.61it/s]

Epochs: 7982 | epoch avg. loss: 0.379 | test avg. loss: 7.983
Epochs: 7983 | epoch avg. loss: 0.297 | test avg. loss: 8.410
Epochs: 7984 | epoch avg. loss: 0.384 | test avg. loss: 8.009


 16%|█▌        | 7988/50000 [11:48<53:25, 13.11it/s]

Epochs: 7985 | epoch avg. loss: 0.440 | test avg. loss: 8.242
Epochs: 7986 | epoch avg. loss: 0.358 | test avg. loss: 8.217
Epochs: 7987 | epoch avg. loss: 0.322 | test avg. loss: 8.017


 16%|█▌        | 7990/50000 [11:48<52:26, 13.35it/s]

Epochs: 7988 | epoch avg. loss: 0.302 | test avg. loss: 8.285
Epochs: 7989 | epoch avg. loss: 0.309 | test avg. loss: 8.016
Epochs: 7990 | epoch avg. loss: 0.303 | test avg. loss: 8.066


 16%|█▌        | 7994/50000 [11:48<52:05, 13.44it/s]

Epochs: 7991 | epoch avg. loss: 0.246 | test avg. loss: 8.020
Epochs: 7992 | epoch avg. loss: 0.238 | test avg. loss: 8.024
Epochs: 7993 | epoch avg. loss: 0.241 | test avg. loss: 8.183


 16%|█▌        | 7996/50000 [11:49<55:25, 12.63it/s]

Epochs: 7994 | epoch avg. loss: 0.275 | test avg. loss: 8.050
Epochs: 7995 | epoch avg. loss: 0.264 | test avg. loss: 8.090
Epochs: 7996 | epoch avg. loss: 0.257 | test avg. loss: 8.064


 16%|█▌        | 7998/50000 [11:49<54:05, 12.94it/s]

Epochs: 7997 | epoch avg. loss: 0.246 | test avg. loss: 8.028
Epochs: 7998 | epoch avg. loss: 0.316 | test avg. loss: 8.042
Epochs: 7999 | epoch avg. loss: 0.266 | test avg. loss: 8.042


 16%|█▌        | 8002/50000 [11:50<2:21:30,  4.95it/s]

Epochs: 8000 | epoch avg. loss: 0.233 | test avg. loss: 8.289
Epochs: 8001 | epoch avg. loss: 0.297 | test avg. loss: 8.031
Epochs: 8002 | epoch avg. loss: 0.318 | test avg. loss: 8.220




Epochs: 8003 | epoch avg. loss: 0.303 | test avg. loss: 8.113
Epochs: 8004 | epoch avg. loss: 0.259 | test avg. loss: 7.991
Epochs: 8005 | epoch avg. loss: 0.301 | test avg. loss: 8.208


                                                      

Epochs: 8006 | epoch avg. loss: 0.266 | test avg. loss: 8.096
Epochs: 8007 | epoch avg. loss: 0.264 | test avg. loss: 8.159
Epochs: 8008 | epoch avg. loss: 0.240 | test avg. loss: 8.169


                                                      

Epochs: 8009 | epoch avg. loss: 0.241 | test avg. loss: 8.068
Epochs: 8010 | epoch avg. loss: 0.241 | test avg. loss: 8.045
Epochs: 8011 | epoch avg. loss: 0.252 | test avg. loss: 8.029


                                                    

Epochs: 8012 | epoch avg. loss: 0.238 | test avg. loss: 8.020
Epochs: 8013 | epoch avg. loss: 0.229 | test avg. loss: 8.120
Epochs: 8014 | epoch avg. loss: 0.276 | test avg. loss: 8.078


 16%|█▌        | 8018/50000 [11:51<55:27, 12.62it/s]

Epochs: 8015 | epoch avg. loss: 0.238 | test avg. loss: 8.096
Epochs: 8016 | epoch avg. loss: 0.233 | test avg. loss: 8.045
Epochs: 8017 | epoch avg. loss: 0.317 | test avg. loss: 8.322


 16%|█▌        | 8020/50000 [11:52<58:30, 11.96it/s]

Epochs: 8018 | epoch avg. loss: 0.387 | test avg. loss: 8.085
Epochs: 8019 | epoch avg. loss: 0.265 | test avg. loss: 8.076
Epochs: 8020 | epoch avg. loss: 0.304 | test avg. loss: 8.481


 16%|█▌        | 8024/50000 [11:52<52:51, 13.23it/s]

Epochs: 8021 | epoch avg. loss: 0.368 | test avg. loss: 8.056
Epochs: 8022 | epoch avg. loss: 0.494 | test avg. loss: 8.165
Epochs: 8023 | epoch avg. loss: 0.350 | test avg. loss: 8.087
Epochs: 8024 | epoch avg. loss: 0.395 | test avg. loss: 7.912


 16%|█▌        | 8028/50000 [11:52<48:38, 14.38it/s]

Epochs: 8025 | epoch avg. loss: 0.287 | test avg. loss: 8.256
Epochs: 8026 | epoch avg. loss: 0.314 | test avg. loss: 8.018
Epochs: 8027 | epoch avg. loss: 0.432 | test avg. loss: 8.286
Epochs: 8028 | epoch avg. loss: 0.375 | test avg. loss: 7.983


 16%|█▌        | 8032/50000 [11:52<47:59, 14.58it/s]

Epochs: 8029 | epoch avg. loss: 0.258 | test avg. loss: 7.944
Epochs: 8030 | epoch avg. loss: 0.249 | test avg. loss: 8.242
Epochs: 8031 | epoch avg. loss: 0.320 | test avg. loss: 7.902


                                                    

Epochs: 8032 | epoch avg. loss: 0.371 | test avg. loss: 8.073
Epochs: 8033 | epoch avg. loss: 0.350 | test avg. loss: 7.891
Epochs: 8034 | epoch avg. loss: 0.292 | test avg. loss: 7.860


 16%|█▌        | 8038/50000 [11:53<49:02, 14.26it/s]

Epochs: 8035 | epoch avg. loss: 0.268 | test avg. loss: 8.082
Epochs: 8036 | epoch avg. loss: 0.290 | test avg. loss: 7.873
Epochs: 8037 | epoch avg. loss: 0.276 | test avg. loss: 8.255


                                                    

Epochs: 8038 | epoch avg. loss: 0.367 | test avg. loss: 7.981
Epochs: 8039 | epoch avg. loss: 0.384 | test avg. loss: 8.130
Epochs: 8040 | epoch avg. loss: 0.229 | test avg. loss: 8.521


 16%|█▌        | 8044/50000 [11:53<46:59, 14.88it/s]

Epochs: 8041 | epoch avg. loss: 0.354 | test avg. loss: 8.114
Epochs: 8042 | epoch avg. loss: 0.530 | test avg. loss: 8.518
Epochs: 8043 | epoch avg. loss: 0.804 | test avg. loss: 7.848
Epochs: 8044 | epoch avg. loss: 0.592 | test avg. loss: 7.870


 16%|█▌        | 8048/50000 [11:54<51:36, 13.55it/s]

Epochs: 8045 | epoch avg. loss: 0.523 | test avg. loss: 8.530
Epochs: 8046 | epoch avg. loss: 0.380 | test avg. loss: 8.311
Epochs: 8047 | epoch avg. loss: 0.625 | test avg. loss: 8.694


 16%|█▌        | 8050/50000 [11:54<52:18, 13.37it/s]

Epochs: 8048 | epoch avg. loss: 0.430 | test avg. loss: 8.032
Epochs: 8049 | epoch avg. loss: 0.298 | test avg. loss: 7.949
Epochs: 8050 | epoch avg. loss: 0.345 | test avg. loss: 7.795


 16%|█▌        | 8054/50000 [11:54<49:23, 14.15it/s]

Epochs: 8051 | epoch avg. loss: 0.249 | test avg. loss: 7.753
Epochs: 8052 | epoch avg. loss: 0.279 | test avg. loss: 8.022
Epochs: 8053 | epoch avg. loss: 0.318 | test avg. loss: 7.790
Epochs: 8054 | epoch avg. loss: 0.310 | test avg. loss: 7.901




Epochs: 8055 | epoch avg. loss: 0.227 | test avg. loss: 7.893
Epochs: 8056 | epoch avg. loss: 0.237 | test avg. loss: 8.014
Epochs: 8057 | epoch avg. loss: 0.244 | test avg. loss: 7.921


 16%|█▌        | 8060/50000 [11:54<48:34, 14.39it/s]

Epochs: 8058 | epoch avg. loss: 0.223 | test avg. loss: 7.895
Epochs: 8059 | epoch avg. loss: 0.228 | test avg. loss: 7.925
Epochs: 8060 | epoch avg. loss: 0.236 | test avg. loss: 7.874


 16%|█▌        | 8064/50000 [11:55<51:41, 13.52it/s]

Epochs: 8061 | epoch avg. loss: 0.231 | test avg. loss: 7.900
Epochs: 8062 | epoch avg. loss: 0.227 | test avg. loss: 7.971
Epochs: 8063 | epoch avg. loss: 0.232 | test avg. loss: 7.940


 16%|█▌        | 8068/50000 [11:55<50:08, 13.94it/s]

Epochs: 8064 | epoch avg. loss: 0.239 | test avg. loss: 7.987
Epochs: 8065 | epoch avg. loss: 0.225 | test avg. loss: 7.923
Epochs: 8066 | epoch avg. loss: 0.250 | test avg. loss: 8.032
Epochs: 8067 | epoch avg. loss: 0.252 | test avg. loss: 7.895


 16%|█▌        | 8072/50000 [11:55<48:01, 14.55it/s]

Epochs: 8068 | epoch avg. loss: 0.223 | test avg. loss: 7.822
Epochs: 8069 | epoch avg. loss: 0.246 | test avg. loss: 7.885
Epochs: 8070 | epoch avg. loss: 0.256 | test avg. loss: 7.876
Epochs: 8071 | epoch avg. loss: 0.229 | test avg. loss: 7.863


 16%|█▌        | 8074/50000 [11:55<50:01, 13.97it/s]

Epochs: 8072 | epoch avg. loss: 0.217 | test avg. loss: 7.801
Epochs: 8073 | epoch avg. loss: 0.298 | test avg. loss: 8.126
Epochs: 8074 | epoch avg. loss: 0.410 | test avg. loss: 7.805


 16%|█▌        | 8078/50000 [11:56<53:22, 13.09it/s]

Epochs: 8075 | epoch avg. loss: 0.388 | test avg. loss: 7.863
Epochs: 8076 | epoch avg. loss: 0.291 | test avg. loss: 8.077
Epochs: 8077 | epoch avg. loss: 0.223 | test avg. loss: 7.892


 16%|█▌        | 8080/50000 [11:56<53:27, 13.07it/s]

Epochs: 8078 | epoch avg. loss: 0.484 | test avg. loss: 8.144
Epochs: 8079 | epoch avg. loss: 0.393 | test avg. loss: 7.818
Epochs: 8080 | epoch avg. loss: 0.458 | test avg. loss: 7.931


 16%|█▌        | 8084/50000 [11:56<52:32, 13.29it/s]

Epochs: 8081 | epoch avg. loss: 0.332 | test avg. loss: 8.005
Epochs: 8082 | epoch avg. loss: 0.248 | test avg. loss: 7.835
Epochs: 8083 | epoch avg. loss: 0.344 | test avg. loss: 8.073


 16%|█▌        | 8086/50000 [11:56<54:13, 12.88it/s]

Epochs: 8084 | epoch avg. loss: 0.286 | test avg. loss: 7.835
Epochs: 8085 | epoch avg. loss: 0.399 | test avg. loss: 7.885
Epochs: 8086 | epoch avg. loss: 0.254 | test avg. loss: 7.901


 16%|█▌        | 8090/50000 [11:57<57:43, 12.10it/s]

Epochs: 8087 | epoch avg. loss: 0.227 | test avg. loss: 7.806
Epochs: 8088 | epoch avg. loss: 0.261 | test avg. loss: 8.018
Epochs: 8089 | epoch avg. loss: 0.265 | test avg. loss: 7.818


 16%|█▌        | 8092/50000 [11:57<57:55, 12.06it/s]

Epochs: 8090 | epoch avg. loss: 0.249 | test avg. loss: 7.852
Epochs: 8091 | epoch avg. loss: 0.261 | test avg. loss: 7.801
Epochs: 8092 | epoch avg. loss: 0.221 | test avg. loss: 7.758


 16%|█▌        | 8096/50000 [11:57<56:22, 12.39it/s]

Epochs: 8093 | epoch avg. loss: 0.244 | test avg. loss: 7.871
Epochs: 8094 | epoch avg. loss: 0.216 | test avg. loss: 7.814
Epochs: 8095 | epoch avg. loss: 0.243 | test avg. loss: 7.944


 16%|█▌        | 8098/50000 [11:57<59:00, 11.84it/s]

Epochs: 8096 | epoch avg. loss: 0.229 | test avg. loss: 7.881
Epochs: 8097 | epoch avg. loss: 0.216 | test avg. loss: 7.828


 16%|█▌        | 8098/50000 [11:58<59:00, 11.84it/s]

Epochs: 8098 | epoch avg. loss: 0.236 | test avg. loss: 7.775
Epochs: 8099 | epoch avg. loss: 0.232 | test avg. loss: 7.800


 16%|█▌        | 8102/50000 [11:59<2:42:34,  4.30it/s]

Epochs: 8100 | epoch avg. loss: 0.215 | test avg. loss: 7.848
Epochs: 8101 | epoch avg. loss: 0.220 | test avg. loss: 7.773
Epochs: 8102 | epoch avg. loss: 0.225 | test avg. loss: 7.915


 16%|█▌        | 8106/50000 [11:59<1:50:13,  6.33it/s]

Epochs: 8103 | epoch avg. loss: 0.241 | test avg. loss: 7.766
Epochs: 8104 | epoch avg. loss: 0.251 | test avg. loss: 7.885
Epochs: 8105 | epoch avg. loss: 0.244 | test avg. loss: 7.861


 16%|█▌        | 8108/50000 [12:00<1:36:26,  7.24it/s]

Epochs: 8106 | epoch avg. loss: 0.233 | test avg. loss: 7.712
Epochs: 8107 | epoch avg. loss: 0.240 | test avg. loss: 7.949
Epochs: 8108 | epoch avg. loss: 0.335 | test avg. loss: 7.724


 16%|█▌        | 8112/50000 [12:00<1:18:36,  8.88it/s]

Epochs: 8109 | epoch avg. loss: 0.297 | test avg. loss: 7.752
Epochs: 8110 | epoch avg. loss: 0.240 | test avg. loss: 8.042
Epochs: 8111 | epoch avg. loss: 0.275 | test avg. loss: 7.762


 16%|█▌        | 8114/50000 [12:00<1:09:26, 10.05it/s]

Epochs: 8112 | epoch avg. loss: 0.303 | test avg. loss: 7.745
Epochs: 8113 | epoch avg. loss: 0.220 | test avg. loss: 7.834
Epochs: 8114 | epoch avg. loss: 0.302 | test avg. loss: 7.660


 16%|█▌        | 8118/50000 [12:00<1:01:20, 11.38it/s]

Epochs: 8115 | epoch avg. loss: 0.384 | test avg. loss: 7.810
Epochs: 8116 | epoch avg. loss: 0.555 | test avg. loss: 7.819
Epochs: 8117 | epoch avg. loss: 0.677 | test avg. loss: 8.041


 16%|█▌        | 8120/50000 [12:01<56:57, 12.25it/s]

Epochs: 8118 | epoch avg. loss: 0.563 | test avg. loss: 8.207
Epochs: 8119 | epoch avg. loss: 0.894 | test avg. loss: 7.866
Epochs: 8120 | epoch avg. loss: 0.474 | test avg. loss: 8.153


 16%|█▌        | 8124/50000 [12:01<52:01, 13.42it/s]

Epochs: 8121 | epoch avg. loss: 0.586 | test avg. loss: 7.602
Epochs: 8122 | epoch avg. loss: 0.458 | test avg. loss: 7.867
Epochs: 8123 | epoch avg. loss: 0.389 | test avg. loss: 7.591
Epochs: 8124 | epoch avg. loss: 0.382 | test avg. loss: 8.163


 16%|█▋        | 8128/50000 [12:01<48:02, 14.53it/s]

Epochs: 8125 | epoch avg. loss: 0.407 | test avg. loss: 7.862
Epochs: 8126 | epoch avg. loss: 0.343 | test avg. loss: 8.000
Epochs: 8127 | epoch avg. loss: 0.327 | test avg. loss: 8.042
Epochs: 8128 | epoch avg. loss: 0.425 | test avg. loss: 7.805


 16%|█▋        | 8132/50000 [12:01<51:59, 13.42it/s]

Epochs: 8129 | epoch avg. loss: 0.273 | test avg. loss: 7.956
Epochs: 8130 | epoch avg. loss: 0.280 | test avg. loss: 7.720
Epochs: 8131 | epoch avg. loss: 0.379 | test avg. loss: 8.177


 16%|█▋        | 8134/50000 [12:02<59:06, 11.80it/s]

Epochs: 8132 | epoch avg. loss: 0.463 | test avg. loss: 7.678
Epochs: 8133 | epoch avg. loss: 0.377 | test avg. loss: 7.756
Epochs: 8134 | epoch avg. loss: 0.301 | test avg. loss: 7.812




Epochs: 8135 | epoch avg. loss: 0.259 | test avg. loss: 7.615
Epochs: 8136 | epoch avg. loss: 0.396 | test avg. loss: 7.956
Epochs: 8137 | epoch avg. loss: 0.412 | test avg. loss: 7.497


 16%|█▋        | 8140/50000 [12:02<50:57, 13.69it/s]

Epochs: 8138 | epoch avg. loss: 0.266 | test avg. loss: 7.658
Epochs: 8139 | epoch avg. loss: 0.239 | test avg. loss: 7.606
Epochs: 8140 | epoch avg. loss: 0.250 | test avg. loss: 7.747


 16%|█▋        | 8144/50000 [12:02<50:45, 13.74it/s]

Epochs: 8141 | epoch avg. loss: 0.216 | test avg. loss: 7.851
Epochs: 8142 | epoch avg. loss: 0.222 | test avg. loss: 7.772
Epochs: 8143 | epoch avg. loss: 0.221 | test avg. loss: 7.843


 16%|█▋        | 8146/50000 [12:03<55:26, 12.58it/s]

Epochs: 8144 | epoch avg. loss: 0.227 | test avg. loss: 7.768
Epochs: 8145 | epoch avg. loss: 0.206 | test avg. loss: 7.730
Epochs: 8146 | epoch avg. loss: 0.211 | test avg. loss: 7.693


 16%|█▋        | 8150/50000 [12:03<55:56, 12.47it/s]

Epochs: 8147 | epoch avg. loss: 0.210 | test avg. loss: 7.650
Epochs: 8148 | epoch avg. loss: 0.206 | test avg. loss: 7.637
Epochs: 8149 | epoch avg. loss: 0.204 | test avg. loss: 7.736


 16%|█▋        | 8152/50000 [12:03<53:48, 12.96it/s]

Epochs: 8150 | epoch avg. loss: 0.234 | test avg. loss: 7.609
Epochs: 8151 | epoch avg. loss: 0.300 | test avg. loss: 7.627
Epochs: 8152 | epoch avg. loss: 0.229 | test avg. loss: 7.739


 16%|█▋        | 8156/50000 [12:03<53:54, 12.94it/s]

Epochs: 8153 | epoch avg. loss: 0.213 | test avg. loss: 7.670
Epochs: 8154 | epoch avg. loss: 0.206 | test avg. loss: 7.668
Epochs: 8155 | epoch avg. loss: 0.203 | test avg. loss: 7.660




Epochs: 8156 | epoch avg. loss: 0.237 | test avg. loss: 7.652
Epochs: 8157 | epoch avg. loss: 0.211 | test avg. loss: 7.659


 16%|█▋        | 8160/50000 [12:04<1:01:00, 11.43it/s]

Epochs: 8158 | epoch avg. loss: 0.211 | test avg. loss: 7.647
Epochs: 8159 | epoch avg. loss: 0.200 | test avg. loss: 7.738
Epochs: 8160 | epoch avg. loss: 0.220 | test avg. loss: 7.620


 16%|█▋        | 8164/50000 [12:04<58:15, 11.97it/s]

Epochs: 8161 | epoch avg. loss: 0.244 | test avg. loss: 7.731
Epochs: 8162 | epoch avg. loss: 0.302 | test avg. loss: 7.596
Epochs: 8163 | epoch avg. loss: 0.262 | test avg. loss: 7.594


 16%|█▋        | 8166/50000 [12:04<56:38, 12.31it/s]

Epochs: 8164 | epoch avg. loss: 0.228 | test avg. loss: 7.706
Epochs: 8165 | epoch avg. loss: 0.204 | test avg. loss: 7.596
Epochs: 8166 | epoch avg. loss: 0.340 | test avg. loss: 7.838


 16%|█▋        | 8168/50000 [12:04<55:33, 12.55it/s]

Epochs: 8167 | epoch avg. loss: 0.276 | test avg. loss: 7.619
Epochs: 8168 | epoch avg. loss: 0.231 | test avg. loss: 7.850
Epochs: 8169 | epoch avg. loss: 0.268 | test avg. loss: 7.593


 16%|█▋        | 8172/50000 [12:05<1:00:04, 11.60it/s]

Epochs: 8170 | epoch avg. loss: 0.236 | test avg. loss: 7.653
Epochs: 8171 | epoch avg. loss: 0.209 | test avg. loss: 7.660
Epochs: 8172 | epoch avg. loss: 0.217 | test avg. loss: 7.576


 16%|█▋        | 8176/50000 [12:05<55:22, 12.59it/s]

Epochs: 8173 | epoch avg. loss: 0.215 | test avg. loss: 7.806
Epochs: 8174 | epoch avg. loss: 0.250 | test avg. loss: 7.602
Epochs: 8175 | epoch avg. loss: 0.240 | test avg. loss: 7.834
Epochs: 8176 | epoch avg. loss: 0.326 | test avg. loss: 7.585


 16%|█▋        | 8180/50000 [12:05<52:52, 13.18it/s]

Epochs: 8177 | epoch avg. loss: 0.435 | test avg. loss: 7.902
Epochs: 8178 | epoch avg. loss: 0.415 | test avg. loss: 7.613
Epochs: 8179 | epoch avg. loss: 0.406 | test avg. loss: 7.568


                                                    

Epochs: 8180 | epoch avg. loss: 0.351 | test avg. loss: 7.512
Epochs: 8181 | epoch avg. loss: 0.276 | test avg. loss: 7.482


 16%|█▋        | 8184/50000 [12:06<1:00:47, 11.46it/s]

Epochs: 8182 | epoch avg. loss: 0.311 | test avg. loss: 7.793
Epochs: 8183 | epoch avg. loss: 0.232 | test avg. loss: 7.631
Epochs: 8184 | epoch avg. loss: 0.411 | test avg. loss: 7.956


 16%|█▋        | 8188/50000 [12:06<57:34, 12.10it/s]

Epochs: 8185 | epoch avg. loss: 0.409 | test avg. loss: 7.690
Epochs: 8186 | epoch avg. loss: 0.555 | test avg. loss: 7.856
Epochs: 8187 | epoch avg. loss: 0.518 | test avg. loss: 7.597


 16%|█▋        | 8190/50000 [12:06<58:11, 11.98it/s]

Epochs: 8188 | epoch avg. loss: 0.532 | test avg. loss: 7.681
Epochs: 8189 | epoch avg. loss: 0.571 | test avg. loss: 7.693
Epochs: 8190 | epoch avg. loss: 0.581 | test avg. loss: 7.661


 16%|█▋        | 8194/50000 [12:06<55:47, 12.49it/s]

Epochs: 8191 | epoch avg. loss: 0.502 | test avg. loss: 8.243
Epochs: 8192 | epoch avg. loss: 0.633 | test avg. loss: 7.776
Epochs: 8193 | epoch avg. loss: 0.731 | test avg. loss: 8.369


 16%|█▋        | 8196/50000 [12:07<54:15, 12.84it/s]

Epochs: 8194 | epoch avg. loss: 0.564 | test avg. loss: 7.599
Epochs: 8195 | epoch avg. loss: 0.650 | test avg. loss: 7.800
Epochs: 8196 | epoch avg. loss: 0.391 | test avg. loss: 7.432


 16%|█▋        | 8198/50000 [12:07<55:15, 12.61it/s]

Epochs: 8197 | epoch avg. loss: 0.364 | test avg. loss: 7.579
Epochs: 8198 | epoch avg. loss: 0.242 | test avg. loss: 7.638
Epochs: 8199 | epoch avg. loss: 0.277 | test avg. loss: 7.478


 16%|█▋        | 8202/50000 [12:08<2:26:07,  4.77it/s]

Epochs: 8200 | epoch avg. loss: 0.237 | test avg. loss: 7.695
Epochs: 8201 | epoch avg. loss: 0.246 | test avg. loss: 7.498
Epochs: 8202 | epoch avg. loss: 0.299 | test avg. loss: 7.622


 16%|█▋        | 8206/50000 [12:09<1:39:29,  7.00it/s]

Epochs: 8203 | epoch avg. loss: 0.212 | test avg. loss: 7.576
Epochs: 8204 | epoch avg. loss: 0.209 | test avg. loss: 7.703
Epochs: 8205 | epoch avg. loss: 0.221 | test avg. loss: 7.558


 16%|█▋        | 8208/50000 [12:09<1:25:18,  8.16it/s]

Epochs: 8206 | epoch avg. loss: 0.218 | test avg. loss: 7.580
Epochs: 8207 | epoch avg. loss: 0.204 | test avg. loss: 7.546
Epochs: 8208 | epoch avg. loss: 0.192 | test avg. loss: 7.495


 16%|█▋        | 8212/50000 [12:09<1:06:59, 10.40it/s]

Epochs: 8209 | epoch avg. loss: 0.254 | test avg. loss: 7.894
Epochs: 8210 | epoch avg. loss: 0.374 | test avg. loss: 7.564
Epochs: 8211 | epoch avg. loss: 0.325 | test avg. loss: 7.645
Epochs: 8212 | epoch avg. loss: 0.336 | test avg. loss: 7.658


 16%|█▋        | 8216/50000 [12:09<1:00:32, 11.50it/s]

Epochs: 8213 | epoch avg. loss: 0.278 | test avg. loss: 7.661
Epochs: 8214 | epoch avg. loss: 0.389 | test avg. loss: 7.687
Epochs: 8215 | epoch avg. loss: 0.232 | test avg. loss: 7.506


                                                    

Epochs: 8216 | epoch avg. loss: 0.292 | test avg. loss: 7.597
Epochs: 8217 | epoch avg. loss: 0.248 | test avg. loss: 7.333
Epochs: 8218 | epoch avg. loss: 0.221 | test avg. loss: 7.552


 16%|█▋        | 8222/50000 [12:10<51:35, 13.49it/s]

Epochs: 8219 | epoch avg. loss: 0.257 | test avg. loss: 7.382
Epochs: 8220 | epoch avg. loss: 0.378 | test avg. loss: 7.977
Epochs: 8221 | epoch avg. loss: 0.507 | test avg. loss: 7.485
Epochs: 8222 | epoch avg. loss: 0.255 | test avg. loss: 7.542


 16%|█▋        | 8226/50000 [12:10<49:23, 14.09it/s]

Epochs: 8223 | epoch avg. loss: 0.291 | test avg. loss: 7.483
Epochs: 8224 | epoch avg. loss: 0.308 | test avg. loss: 7.548
Epochs: 8225 | epoch avg. loss: 0.271 | test avg. loss: 7.485
Epochs: 8226 | epoch avg. loss: 0.282 | test avg. loss: 7.354


 16%|█▋        | 8230/50000 [12:10<48:32, 14.34it/s]

Epochs: 8227 | epoch avg. loss: 0.340 | test avg. loss: 7.464
Epochs: 8228 | epoch avg. loss: 0.224 | test avg. loss: 7.364
Epochs: 8229 | epoch avg. loss: 0.306 | test avg. loss: 7.589


 16%|█▋        | 8232/50000 [12:11<52:26, 13.27it/s]

Epochs: 8230 | epoch avg. loss: 0.288 | test avg. loss: 7.386
Epochs: 8231 | epoch avg. loss: 0.236 | test avg. loss: 7.922
Epochs: 8232 | epoch avg. loss: 0.461 | test avg. loss: 7.465


 16%|█▋        | 8236/50000 [12:11<50:14, 13.86it/s]

Epochs: 8233 | epoch avg. loss: 0.219 | test avg. loss: 7.629
Epochs: 8234 | epoch avg. loss: 0.235 | test avg. loss: 7.446
Epochs: 8235 | epoch avg. loss: 0.201 | test avg. loss: 7.409


 16%|█▋        | 8238/50000 [12:11<49:55, 13.94it/s]

Epochs: 8236 | epoch avg. loss: 0.201 | test avg. loss: 7.600
Epochs: 8237 | epoch avg. loss: 0.244 | test avg. loss: 7.399
Epochs: 8238 | epoch avg. loss: 0.268 | test avg. loss: 7.580


 16%|█▋        | 8242/50000 [12:11<50:41, 13.73it/s]

Epochs: 8239 | epoch avg. loss: 0.242 | test avg. loss: 7.454
Epochs: 8240 | epoch avg. loss: 0.207 | test avg. loss: 7.409
Epochs: 8241 | epoch avg. loss: 0.202 | test avg. loss: 7.520


 16%|█▋        | 8244/50000 [12:11<49:32, 14.05it/s]

Epochs: 8242 | epoch avg. loss: 0.196 | test avg. loss: 7.382
Epochs: 8243 | epoch avg. loss: 0.305 | test avg. loss: 7.735
Epochs: 8244 | epoch avg. loss: 0.357 | test avg. loss: 7.371


 16%|█▋        | 8248/50000 [12:12<50:51, 13.68it/s]

Epochs: 8245 | epoch avg. loss: 0.391 | test avg. loss: 7.650
Epochs: 8246 | epoch avg. loss: 0.359 | test avg. loss: 7.389
Epochs: 8247 | epoch avg. loss: 0.297 | test avg. loss: 7.370


 17%|█▋        | 8252/50000 [12:12<48:37, 14.31it/s]

Epochs: 8248 | epoch avg. loss: 0.404 | test avg. loss: 7.442
Epochs: 8249 | epoch avg. loss: 0.322 | test avg. loss: 7.458
Epochs: 8250 | epoch avg. loss: 0.239 | test avg. loss: 7.629
Epochs: 8251 | epoch avg. loss: 0.366 | test avg. loss: 7.361


 17%|█▋        | 8254/50000 [12:12<50:06, 13.89it/s]

Epochs: 8252 | epoch avg. loss: 0.334 | test avg. loss: 7.467
Epochs: 8253 | epoch avg. loss: 0.196 | test avg. loss: 7.412
Epochs: 8254 | epoch avg. loss: 0.319 | test avg. loss: 7.661


 17%|█▋        | 8258/50000 [12:12<54:47, 12.70it/s]

Epochs: 8255 | epoch avg. loss: 0.232 | test avg. loss: 7.422
Epochs: 8256 | epoch avg. loss: 0.293 | test avg. loss: 7.637
Epochs: 8257 | epoch avg. loss: 0.321 | test avg. loss: 7.393


 17%|█▋        | 8260/50000 [12:13<55:48, 12.46it/s]

Epochs: 8258 | epoch avg. loss: 0.440 | test avg. loss: 7.629
Epochs: 8259 | epoch avg. loss: 0.401 | test avg. loss: 7.345
Epochs: 8260 | epoch avg. loss: 0.323 | test avg. loss: 7.375


 17%|█▋        | 8264/50000 [12:13<53:04, 13.11it/s]

Epochs: 8261 | epoch avg. loss: 0.422 | test avg. loss: 7.345
Epochs: 8262 | epoch avg. loss: 0.431 | test avg. loss: 7.511
Epochs: 8263 | epoch avg. loss: 0.301 | test avg. loss: 7.608


 17%|█▋        | 8266/50000 [12:13<52:29, 13.25it/s]

Epochs: 8264 | epoch avg. loss: 0.396 | test avg. loss: 7.340
Epochs: 8265 | epoch avg. loss: 0.382 | test avg. loss: 7.292
Epochs: 8266 | epoch avg. loss: 0.437 | test avg. loss: 7.438


 17%|█▋        | 8270/50000 [12:13<53:13, 13.07it/s]

Epochs: 8267 | epoch avg. loss: 0.312 | test avg. loss: 7.392
Epochs: 8268 | epoch avg. loss: 0.197 | test avg. loss: 7.364
Epochs: 8269 | epoch avg. loss: 0.313 | test avg. loss: 8.205


 17%|█▋        | 8272/50000 [12:14<57:33, 12.08it/s]

Epochs: 8270 | epoch avg. loss: 0.576 | test avg. loss: 7.449
Epochs: 8271 | epoch avg. loss: 0.450 | test avg. loss: 7.886
Epochs: 8272 | epoch avg. loss: 0.563 | test avg. loss: 7.386


 17%|█▋        | 8276/50000 [12:14<53:24, 13.02it/s]

Epochs: 8273 | epoch avg. loss: 0.661 | test avg. loss: 7.687
Epochs: 8274 | epoch avg. loss: 0.650 | test avg. loss: 7.284
Epochs: 8275 | epoch avg. loss: 0.700 | test avg. loss: 7.547


 17%|█▋        | 8278/50000 [12:14<52:47, 13.17it/s]

Epochs: 8276 | epoch avg. loss: 0.929 | test avg. loss: 7.181
Epochs: 8277 | epoch avg. loss: 1.096 | test avg. loss: 7.577
Epochs: 8278 | epoch avg. loss: 1.942 | test avg. loss: 7.612


 17%|█▋        | 8282/50000 [12:14<53:16, 13.05it/s]

Epochs: 8279 | epoch avg. loss: 2.218 | test avg. loss: 7.955
Epochs: 8280 | epoch avg. loss: 1.712 | test avg. loss: 7.499
Epochs: 8281 | epoch avg. loss: 1.162 | test avg. loss: 7.897


 17%|█▋        | 8284/50000 [12:14<53:41, 12.95it/s]

Epochs: 8282 | epoch avg. loss: 1.228 | test avg. loss: 10.364
Epochs: 8283 | epoch avg. loss: 1.771 | test avg. loss: 9.660
Epochs: 8284 | epoch avg. loss: 2.326 | test avg. loss: 10.906


 17%|█▋        | 8288/50000 [12:15<51:36, 13.47it/s]

Epochs: 8285 | epoch avg. loss: 2.286 | test avg. loss: 8.352
Epochs: 8286 | epoch avg. loss: 1.342 | test avg. loss: 8.987
Epochs: 8287 | epoch avg. loss: 1.368 | test avg. loss: 7.995


                                                    

Epochs: 8288 | epoch avg. loss: 1.375 | test avg. loss: 8.159
Epochs: 8289 | epoch avg. loss: 0.869 | test avg. loss: 8.410
Epochs: 8290 | epoch avg. loss: 0.952 | test avg. loss: 7.649


 17%|█▋        | 8294/50000 [12:15<49:57, 13.91it/s]

Epochs: 8291 | epoch avg. loss: 0.833 | test avg. loss: 8.770
Epochs: 8292 | epoch avg. loss: 0.998 | test avg. loss: 7.372
Epochs: 8293 | epoch avg. loss: 0.708 | test avg. loss: 8.128


 17%|█▋        | 8296/50000 [12:15<51:45, 13.43it/s]

Epochs: 8294 | epoch avg. loss: 0.697 | test avg. loss: 7.182
Epochs: 8295 | epoch avg. loss: 0.795 | test avg. loss: 7.420
Epochs: 8296 | epoch avg. loss: 0.519 | test avg. loss: 7.019


 17%|█▋        | 8298/50000 [12:16<53:10, 13.07it/s]

Epochs: 8297 | epoch avg. loss: 0.401 | test avg. loss: 7.003
Epochs: 8298 | epoch avg. loss: 0.559 | test avg. loss: 7.315
Epochs: 8299 | epoch avg. loss: 0.552 | test avg. loss: 6.869


 17%|█▋        | 8302/50000 [12:17<2:37:29,  4.41it/s]

Epochs: 8300 | epoch avg. loss: 0.277 | test avg. loss: 7.386
Epochs: 8301 | epoch avg. loss: 0.490 | test avg. loss: 6.782
Epochs: 8302 | epoch avg. loss: 0.367 | test avg. loss: 7.055


 17%|█▋        | 8304/50000 [12:18<2:05:37,  5.53it/s]

Epochs: 8303 | epoch avg. loss: 0.248 | test avg. loss: 6.873
Epochs: 8304 | epoch avg. loss: 0.249 | test avg. loss: 7.226
Epochs: 8305 | epoch avg. loss: 0.243 | test avg. loss: 7.086


 17%|█▋        | 8310/50000 [12:18<1:16:18,  9.11it/s]

Epochs: 8306 | epoch avg. loss: 0.234 | test avg. loss: 7.321
Epochs: 8307 | epoch avg. loss: 0.230 | test avg. loss: 7.198
Epochs: 8308 | epoch avg. loss: 0.187 | test avg. loss: 7.155
Epochs: 8309 | epoch avg. loss: 0.243 | test avg. loss: 7.220




Epochs: 8310 | epoch avg. loss: 0.263 | test avg. loss: 7.285
Epochs: 8311 | epoch avg. loss: 0.227 | test avg. loss: 7.240
Epochs: 8312 | epoch avg. loss: 0.207 | test avg. loss: 7.106
Epochs: 8313 | epoch avg. loss: 0.336 | test avg. loss: 7.384




Epochs: 8314 | epoch avg. loss: 0.351 | test avg. loss: 7.023
Epochs: 8315 | epoch avg. loss: 0.300 | test avg. loss: 7.075
Epochs: 8316 | epoch avg. loss: 0.214 | test avg. loss: 7.119


 17%|█▋        | 8320/50000 [12:19<57:31, 12.08it/s]

Epochs: 8317 | epoch avg. loss: 0.223 | test avg. loss: 6.987
Epochs: 8318 | epoch avg. loss: 0.263 | test avg. loss: 7.163
Epochs: 8319 | epoch avg. loss: 0.212 | test avg. loss: 7.020


 17%|█▋        | 8324/50000 [12:19<53:16, 13.04it/s]

Epochs: 8320 | epoch avg. loss: 0.193 | test avg. loss: 7.218
Epochs: 8321 | epoch avg. loss: 0.231 | test avg. loss: 6.978
Epochs: 8322 | epoch avg. loss: 0.210 | test avg. loss: 7.097
Epochs: 8323 | epoch avg. loss: 0.212 | test avg. loss: 6.984


 17%|█▋        | 8326/50000 [12:19<51:54, 13.38it/s]

Epochs: 8324 | epoch avg. loss: 0.180 | test avg. loss: 7.001
Epochs: 8325 | epoch avg. loss: 0.180 | test avg. loss: 7.023
Epochs: 8326 | epoch avg. loss: 0.190 | test avg. loss: 7.043


 17%|█▋        | 8330/50000 [12:19<52:54, 13.13it/s]

Epochs: 8327 | epoch avg. loss: 0.181 | test avg. loss: 7.028
Epochs: 8328 | epoch avg. loss: 0.178 | test avg. loss: 7.030
Epochs: 8329 | epoch avg. loss: 0.186 | test avg. loss: 7.181


 17%|█▋        | 8332/50000 [12:20<55:49, 12.44it/s]

Epochs: 8330 | epoch avg. loss: 0.235 | test avg. loss: 6.975
Epochs: 8331 | epoch avg. loss: 0.262 | test avg. loss: 7.079
Epochs: 8332 | epoch avg. loss: 0.194 | test avg. loss: 7.095


 17%|█▋        | 8336/50000 [12:20<55:19, 12.55it/s]

Epochs: 8333 | epoch avg. loss: 0.228 | test avg. loss: 7.019
Epochs: 8334 | epoch avg. loss: 0.201 | test avg. loss: 7.031
Epochs: 8335 | epoch avg. loss: 0.188 | test avg. loss: 7.045


 17%|█▋        | 8338/50000 [12:20<52:00, 13.35it/s]

Epochs: 8336 | epoch avg. loss: 0.254 | test avg. loss: 7.026
Epochs: 8337 | epoch avg. loss: 0.244 | test avg. loss: 7.129
Epochs: 8338 | epoch avg. loss: 0.193 | test avg. loss: 7.230


 17%|█▋        | 8342/50000 [12:20<54:33, 12.72it/s]

Epochs: 8339 | epoch avg. loss: 0.258 | test avg. loss: 7.081
Epochs: 8340 | epoch avg. loss: 0.222 | test avg. loss: 7.125
Epochs: 8341 | epoch avg. loss: 0.249 | test avg. loss: 6.971


 17%|█▋        | 8344/50000 [12:20<52:36, 13.20it/s]

Epochs: 8342 | epoch avg. loss: 0.249 | test avg. loss: 6.915
Epochs: 8343 | epoch avg. loss: 0.208 | test avg. loss: 6.886
Epochs: 8344 | epoch avg. loss: 0.226 | test avg. loss: 6.987


 17%|█▋        | 8348/50000 [12:21<1:02:06, 11.18it/s]

Epochs: 8345 | epoch avg. loss: 0.176 | test avg. loss: 6.937
Epochs: 8346 | epoch avg. loss: 0.224 | test avg. loss: 7.139
Epochs: 8347 | epoch avg. loss: 0.203 | test avg. loss: 7.052


 17%|█▋        | 8350/50000 [12:21<1:00:13, 11.53it/s]

Epochs: 8348 | epoch avg. loss: 0.187 | test avg. loss: 7.156
Epochs: 8349 | epoch avg. loss: 0.207 | test avg. loss: 6.955
Epochs: 8350 | epoch avg. loss: 0.198 | test avg. loss: 7.047


 17%|█▋        | 8354/50000 [12:21<57:19, 12.11it/s]

Epochs: 8351 | epoch avg. loss: 0.206 | test avg. loss: 6.932
Epochs: 8352 | epoch avg. loss: 0.187 | test avg. loss: 6.950
Epochs: 8353 | epoch avg. loss: 0.268 | test avg. loss: 6.932


 17%|█▋        | 8356/50000 [12:21<56:15, 12.34it/s]

Epochs: 8354 | epoch avg. loss: 0.285 | test avg. loss: 7.082
Epochs: 8355 | epoch avg. loss: 0.214 | test avg. loss: 7.055
Epochs: 8356 | epoch avg. loss: 0.182 | test avg. loss: 6.925




Epochs: 8357 | epoch avg. loss: 0.189 | test avg. loss: 7.197
Epochs: 8358 | epoch avg. loss: 0.256 | test avg. loss: 6.919
Epochs: 8359 | epoch avg. loss: 0.362 | test avg. loss: 7.527


 17%|█▋        | 8362/50000 [12:22<54:15, 12.79it/s]

Epochs: 8360 | epoch avg. loss: 0.689 | test avg. loss: 7.366
Epochs: 8361 | epoch avg. loss: 1.153 | test avg. loss: 8.244
Epochs: 8362 | epoch avg. loss: 1.216 | test avg. loss: 7.099


 17%|█▋        | 8366/50000 [12:22<49:26, 14.03it/s]

Epochs: 8363 | epoch avg. loss: 0.839 | test avg. loss: 7.111
Epochs: 8364 | epoch avg. loss: 0.373 | test avg. loss: 7.780
Epochs: 8365 | epoch avg. loss: 0.630 | test avg. loss: 7.166
Epochs: 8366 | epoch avg. loss: 0.719 | test avg. loss: 7.569


 17%|█▋        | 8370/50000 [12:22<50:02, 13.87it/s]

Epochs: 8367 | epoch avg. loss: 0.612 | test avg. loss: 7.079
Epochs: 8368 | epoch avg. loss: 0.345 | test avg. loss: 7.427
Epochs: 8369 | epoch avg. loss: 0.260 | test avg. loss: 7.242


 17%|█▋        | 8372/50000 [12:23<53:57, 12.86it/s]

Epochs: 8370 | epoch avg. loss: 0.336 | test avg. loss: 7.581
Epochs: 8371 | epoch avg. loss: 0.337 | test avg. loss: 7.185
Epochs: 8372 | epoch avg. loss: 0.399 | test avg. loss: 7.365


 17%|█▋        | 8376/50000 [12:23<55:20, 12.54it/s]

Epochs: 8373 | epoch avg. loss: 0.305 | test avg. loss: 7.051
Epochs: 8374 | epoch avg. loss: 0.228 | test avg. loss: 6.985
Epochs: 8375 | epoch avg. loss: 0.279 | test avg. loss: 7.030


 17%|█▋        | 8378/50000 [12:23<54:49, 12.65it/s]

Epochs: 8376 | epoch avg. loss: 0.257 | test avg. loss: 7.093
Epochs: 8377 | epoch avg. loss: 0.270 | test avg. loss: 7.066
Epochs: 8378 | epoch avg. loss: 0.238 | test avg. loss: 6.943


 17%|█▋        | 8382/50000 [12:23<52:33, 13.20it/s]

Epochs: 8379 | epoch avg. loss: 0.170 | test avg. loss: 7.317
Epochs: 8380 | epoch avg. loss: 0.321 | test avg. loss: 6.934
Epochs: 8381 | epoch avg. loss: 0.380 | test avg. loss: 7.149


 17%|█▋        | 8384/50000 [12:24<57:36, 12.04it/s]

Epochs: 8382 | epoch avg. loss: 0.279 | test avg. loss: 6.913
Epochs: 8383 | epoch avg. loss: 0.298 | test avg. loss: 7.223
Epochs: 8384 | epoch avg. loss: 0.244 | test avg. loss: 7.062


 17%|█▋        | 8388/50000 [12:24<54:36, 12.70it/s]

Epochs: 8385 | epoch avg. loss: 0.211 | test avg. loss: 7.092
Epochs: 8386 | epoch avg. loss: 0.182 | test avg. loss: 7.072
Epochs: 8387 | epoch avg. loss: 0.191 | test avg. loss: 6.955


 17%|█▋        | 8390/50000 [12:24<53:55, 12.86it/s]

Epochs: 8388 | epoch avg. loss: 0.180 | test avg. loss: 7.045
Epochs: 8389 | epoch avg. loss: 0.199 | test avg. loss: 6.946
Epochs: 8390 | epoch avg. loss: 0.184 | test avg. loss: 7.194


 17%|█▋        | 8394/50000 [12:24<53:00, 13.08it/s]

Epochs: 8391 | epoch avg. loss: 0.219 | test avg. loss: 7.016
Epochs: 8392 | epoch avg. loss: 0.232 | test avg. loss: 7.302
Epochs: 8393 | epoch avg. loss: 0.265 | test avg. loss: 7.012


 17%|█▋        | 8396/50000 [12:25<55:53, 12.41it/s]

Epochs: 8394 | epoch avg. loss: 0.176 | test avg. loss: 6.962
Epochs: 8395 | epoch avg. loss: 0.194 | test avg. loss: 6.922
Epochs: 8396 | epoch avg. loss: 0.182 | test avg. loss: 6.932


 17%|█▋        | 8398/50000 [12:25<57:06, 12.14it/s]

Epochs: 8397 | epoch avg. loss: 0.196 | test avg. loss: 6.933
Epochs: 8398 | epoch avg. loss: 0.187 | test avg. loss: 7.009
Epochs: 8399 | epoch avg. loss: 0.313 | test avg. loss: 6.980


 17%|█▋        | 8402/50000 [12:26<2:37:03,  4.41it/s]

Epochs: 8400 | epoch avg. loss: 0.432 | test avg. loss: 7.370
Epochs: 8401 | epoch avg. loss: 0.571 | test avg. loss: 7.049
Epochs: 8402 | epoch avg. loss: 0.742 | test avg. loss: 7.497
Epochs: 8403 | epoch avg. loss: 0.852 | test avg. loss: 7.059




Epochs: 8404 | epoch avg. loss: 1.069 | test avg. loss: 7.664
Epochs: 8405 | epoch avg. loss: 1.206 | test avg. loss: 7.011
Epochs: 8406 | epoch avg. loss: 1.130 | test avg. loss: 7.234


 17%|█▋        | 8410/50000 [12:27<1:12:10,  9.60it/s]

Epochs: 8407 | epoch avg. loss: 0.886 | test avg. loss: 7.042
Epochs: 8408 | epoch avg. loss: 0.613 | test avg. loss: 6.894
Epochs: 8409 | epoch avg. loss: 0.390 | test avg. loss: 8.360
Epochs: 8410 | epoch avg. loss: 1.286 | test avg. loss: 7.282


 17%|█▋        | 8414/50000 [12:27<58:54, 11.77it/s]  

Epochs: 8411 | epoch avg. loss: 0.974 | test avg. loss: 7.714
Epochs: 8412 | epoch avg. loss: 0.726 | test avg. loss: 7.103
Epochs: 8413 | epoch avg. loss: 0.602 | test avg. loss: 7.484


 17%|█▋        | 8416/50000 [12:27<55:21, 12.52it/s]

Epochs: 8414 | epoch avg. loss: 0.353 | test avg. loss: 7.052
Epochs: 8415 | epoch avg. loss: 0.243 | test avg. loss: 7.240
Epochs: 8416 | epoch avg. loss: 0.247 | test avg. loss: 6.942


 17%|█▋        | 8420/50000 [12:28<56:15, 12.32it/s]

Epochs: 8417 | epoch avg. loss: 0.315 | test avg. loss: 7.186
Epochs: 8418 | epoch avg. loss: 0.408 | test avg. loss: 6.860
Epochs: 8419 | epoch avg. loss: 0.535 | test avg. loss: 7.267


 17%|█▋        | 8422/50000 [12:28<56:05, 12.36it/s]

Epochs: 8420 | epoch avg. loss: 0.540 | test avg. loss: 6.789
Epochs: 8421 | epoch avg. loss: 0.459 | test avg. loss: 6.971
Epochs: 8422 | epoch avg. loss: 0.458 | test avg. loss: 6.875


 17%|█▋        | 8426/50000 [12:28<51:56, 13.34it/s]

Epochs: 8423 | epoch avg. loss: 0.370 | test avg. loss: 7.081
Epochs: 8424 | epoch avg. loss: 0.362 | test avg. loss: 7.187
Epochs: 8425 | epoch avg. loss: 0.589 | test avg. loss: 7.621
Epochs: 8426 | epoch avg. loss: 0.667 | test avg. loss: 7.056


 17%|█▋        | 8430/50000 [12:28<48:39, 14.24it/s]

Epochs: 8427 | epoch avg. loss: 0.710 | test avg. loss: 7.195
Epochs: 8428 | epoch avg. loss: 0.639 | test avg. loss: 6.595
Epochs: 8429 | epoch avg. loss: 0.292 | test avg. loss: 6.624


 17%|█▋        | 8432/50000 [12:29<53:03, 13.06it/s]

Epochs: 8430 | epoch avg. loss: 0.572 | test avg. loss: 7.542
Epochs: 8431 | epoch avg. loss: 0.616 | test avg. loss: 7.109
Epochs: 8432 | epoch avg. loss: 0.810 | test avg. loss: 8.218


 17%|█▋        | 8436/50000 [12:29<53:05, 13.05it/s]

Epochs: 8433 | epoch avg. loss: 0.813 | test avg. loss: 7.375
Epochs: 8434 | epoch avg. loss: 0.916 | test avg. loss: 8.457
Epochs: 8435 | epoch avg. loss: 0.976 | test avg. loss: 7.022


 17%|█▋        | 8440/50000 [12:29<48:00, 14.43it/s]

Epochs: 8436 | epoch avg. loss: 0.668 | test avg. loss: 7.378
Epochs: 8437 | epoch avg. loss: 0.610 | test avg. loss: 6.833
Epochs: 8438 | epoch avg. loss: 0.827 | test avg. loss: 7.477
Epochs: 8439 | epoch avg. loss: 0.892 | test avg. loss: 6.755


 17%|█▋        | 8444/50000 [12:29<46:03, 15.04it/s]

Epochs: 8440 | epoch avg. loss: 0.664 | test avg. loss: 6.937
Epochs: 8441 | epoch avg. loss: 0.564 | test avg. loss: 7.316
Epochs: 8442 | epoch avg. loss: 0.501 | test avg. loss: 7.142
Epochs: 8443 | epoch avg. loss: 0.510 | test avg. loss: 7.532


 17%|█▋        | 8446/50000 [12:30<47:57, 14.44it/s]

Epochs: 8444 | epoch avg. loss: 0.299 | test avg. loss: 7.240
Epochs: 8445 | epoch avg. loss: 0.574 | test avg. loss: 7.986
Epochs: 8446 | epoch avg. loss: 0.636 | test avg. loss: 7.061


 17%|█▋        | 8450/50000 [12:30<48:41, 14.22it/s]

Epochs: 8447 | epoch avg. loss: 0.619 | test avg. loss: 7.686
Epochs: 8448 | epoch avg. loss: 0.708 | test avg. loss: 6.954
Epochs: 8449 | epoch avg. loss: 0.537 | test avg. loss: 7.231
Epochs: 8450 | epoch avg. loss: 0.430 | test avg. loss: 6.956


 17%|█▋        | 8454/50000 [12:30<45:27, 15.23it/s]

Epochs: 8451 | epoch avg. loss: 0.382 | test avg. loss: 7.193
Epochs: 8452 | epoch avg. loss: 0.263 | test avg. loss: 7.290
Epochs: 8453 | epoch avg. loss: 0.177 | test avg. loss: 7.127
Epochs: 8454 | epoch avg. loss: 0.401 | test avg. loss: 7.506


 17%|█▋        | 8458/50000 [12:30<44:41, 15.49it/s]

Epochs: 8455 | epoch avg. loss: 0.430 | test avg. loss: 6.745
Epochs: 8456 | epoch avg. loss: 0.252 | test avg. loss: 6.907
Epochs: 8457 | epoch avg. loss: 0.245 | test avg. loss: 6.568
Epochs: 8458 | epoch avg. loss: 0.275 | test avg. loss: 6.841


 17%|█▋        | 8460/50000 [12:31<47:46, 14.49it/s]

Epochs: 8459 | epoch avg. loss: 0.248 | test avg. loss: 6.693
Epochs: 8460 | epoch avg. loss: 0.201 | test avg. loss: 6.881
Epochs: 8461 | epoch avg. loss: 0.212 | test avg. loss: 6.855


 17%|█▋        | 8466/50000 [12:31<49:32, 13.97it/s]

Epochs: 8462 | epoch avg. loss: 0.265 | test avg. loss: 6.991
Epochs: 8463 | epoch avg. loss: 0.258 | test avg. loss: 6.820
Epochs: 8464 | epoch avg. loss: 0.247 | test avg. loss: 6.839
Epochs: 8465 | epoch avg. loss: 0.216 | test avg. loss: 6.888


 17%|█▋        | 8468/50000 [12:31<48:07, 14.39it/s]

Epochs: 8466 | epoch avg. loss: 0.199 | test avg. loss: 6.873
Epochs: 8467 | epoch avg. loss: 0.272 | test avg. loss: 6.908
Epochs: 8468 | epoch avg. loss: 0.199 | test avg. loss: 6.862
Epochs: 8469 | epoch avg. loss: 0.319 | test avg. loss: 6.838


 17%|█▋        | 8472/50000 [12:31<46:20, 14.94it/s]

Epochs: 8470 | epoch avg. loss: 0.226 | test avg. loss: 6.847
Epochs: 8471 | epoch avg. loss: 0.192 | test avg. loss: 6.978
Epochs: 8472 | epoch avg. loss: 0.251 | test avg. loss: 6.780


 17%|█▋        | 8476/50000 [12:32<52:42, 13.13it/s]

Epochs: 8473 | epoch avg. loss: 0.186 | test avg. loss: 6.953
Epochs: 8474 | epoch avg. loss: 0.234 | test avg. loss: 6.777
Epochs: 8475 | epoch avg. loss: 0.275 | test avg. loss: 6.941




Epochs: 8476 | epoch avg. loss: 0.212 | test avg. loss: 6.802
Epochs: 8477 | epoch avg. loss: 0.308 | test avg. loss: 7.231
Epochs: 8478 | epoch avg. loss: 0.373 | test avg. loss: 6.784


 17%|█▋        | 8482/50000 [12:32<48:52, 14.16it/s]

Epochs: 8479 | epoch avg. loss: 0.372 | test avg. loss: 7.072
Epochs: 8480 | epoch avg. loss: 0.288 | test avg. loss: 6.792
Epochs: 8481 | epoch avg. loss: 0.195 | test avg. loss: 6.861
Epochs: 8482 | epoch avg. loss: 0.178 | test avg. loss: 6.881


 17%|█▋        | 8486/50000 [12:32<47:30, 14.56it/s]

Epochs: 8483 | epoch avg. loss: 0.183 | test avg. loss: 6.842
Epochs: 8484 | epoch avg. loss: 0.183 | test avg. loss: 6.859
Epochs: 8485 | epoch avg. loss: 0.180 | test avg. loss: 6.803


 17%|█▋        | 8488/50000 [12:33<47:34, 14.54it/s]

Epochs: 8486 | epoch avg. loss: 0.157 | test avg. loss: 6.913
Epochs: 8487 | epoch avg. loss: 0.192 | test avg. loss: 6.700
Epochs: 8488 | epoch avg. loss: 0.185 | test avg. loss: 6.893


 17%|█▋        | 8492/50000 [12:33<52:00, 13.30it/s]

Epochs: 8489 | epoch avg. loss: 0.207 | test avg. loss: 6.743
Epochs: 8490 | epoch avg. loss: 0.275 | test avg. loss: 7.000
Epochs: 8491 | epoch avg. loss: 0.240 | test avg. loss: 6.801


 17%|█▋        | 8494/50000 [12:33<49:54, 13.86it/s]

Epochs: 8492 | epoch avg. loss: 0.174 | test avg. loss: 6.859
Epochs: 8493 | epoch avg. loss: 0.227 | test avg. loss: 6.755
Epochs: 8494 | epoch avg. loss: 0.240 | test avg. loss: 6.830
Epochs: 8495 | epoch avg. loss: 0.194 | test avg. loss: 6.756


 17%|█▋        | 8498/50000 [12:33<47:51, 14.45it/s]

Epochs: 8496 | epoch avg. loss: 0.171 | test avg. loss: 6.754
Epochs: 8497 | epoch avg. loss: 0.262 | test avg. loss: 6.778
Epochs: 8498 | epoch avg. loss: 0.250 | test avg. loss: 7.059
Epochs: 8499 | epoch avg. loss: 0.269 | test avg. loss: 6.826


 17%|█▋        | 8502/50000 [12:35<2:22:58,  4.84it/s]

Epochs: 8500 | epoch avg. loss: 0.292 | test avg. loss: 7.051
Epochs: 8501 | epoch avg. loss: 0.416 | test avg. loss: 6.843
Epochs: 8502 | epoch avg. loss: 0.589 | test avg. loss: 7.297


 17%|█▋        | 8506/50000 [12:35<1:33:52,  7.37it/s]

Epochs: 8503 | epoch avg. loss: 0.637 | test avg. loss: 6.710
Epochs: 8504 | epoch avg. loss: 0.551 | test avg. loss: 6.981
Epochs: 8505 | epoch avg. loss: 0.380 | test avg. loss: 6.803
Epochs: 8506 | epoch avg. loss: 0.345 | test avg. loss: 6.750


 17%|█▋        | 8510/50000 [12:35<1:09:00, 10.02it/s]

Epochs: 8507 | epoch avg. loss: 0.188 | test avg. loss: 6.958
Epochs: 8508 | epoch avg. loss: 0.304 | test avg. loss: 6.724
Epochs: 8509 | epoch avg. loss: 0.281 | test avg. loss: 6.804


 17%|█▋        | 8512/50000 [12:36<1:05:01, 10.63it/s]

Epochs: 8510 | epoch avg. loss: 0.170 | test avg. loss: 6.711
Epochs: 8511 | epoch avg. loss: 0.322 | test avg. loss: 6.765
Epochs: 8512 | epoch avg. loss: 0.162 | test avg. loss: 6.599


 17%|█▋        | 8516/50000 [12:36<59:10, 11.68it/s]  

Epochs: 8513 | epoch avg. loss: 0.232 | test avg. loss: 6.679
Epochs: 8514 | epoch avg. loss: 0.250 | test avg. loss: 6.681
Epochs: 8515 | epoch avg. loss: 0.228 | test avg. loss: 6.781


 17%|█▋        | 8518/50000 [12:36<55:08, 12.54it/s]

Epochs: 8516 | epoch avg. loss: 0.268 | test avg. loss: 7.070
Epochs: 8517 | epoch avg. loss: 0.269 | test avg. loss: 6.834
Epochs: 8518 | epoch avg. loss: 0.218 | test avg. loss: 6.768


 17%|█▋        | 8522/50000 [12:36<53:19, 12.97it/s]

Epochs: 8519 | epoch avg. loss: 0.216 | test avg. loss: 6.581
Epochs: 8520 | epoch avg. loss: 0.222 | test avg. loss: 6.674
Epochs: 8521 | epoch avg. loss: 0.192 | test avg. loss: 6.732


 17%|█▋        | 8524/50000 [12:36<53:32, 12.91it/s]

Epochs: 8522 | epoch avg. loss: 0.180 | test avg. loss: 6.749
Epochs: 8523 | epoch avg. loss: 0.209 | test avg. loss: 6.783


 17%|█▋        | 8526/50000 [12:37<1:00:36, 11.41it/s]

Epochs: 8524 | epoch avg. loss: 0.270 | test avg. loss: 7.033
Epochs: 8525 | epoch avg. loss: 0.295 | test avg. loss: 6.641
Epochs: 8526 | epoch avg. loss: 0.251 | test avg. loss: 6.772


 17%|█▋        | 8530/50000 [12:37<58:52, 11.74it/s]

Epochs: 8527 | epoch avg. loss: 0.477 | test avg. loss: 6.917
Epochs: 8528 | epoch avg. loss: 0.812 | test avg. loss: 7.346
Epochs: 8529 | epoch avg. loss: 0.592 | test avg. loss: 6.692


 17%|█▋        | 8532/50000 [12:37<59:11, 11.68it/s]

Epochs: 8530 | epoch avg. loss: 0.368 | test avg. loss: 6.925
Epochs: 8531 | epoch avg. loss: 0.242 | test avg. loss: 6.805
Epochs: 8532 | epoch avg. loss: 0.156 | test avg. loss: 6.685


 17%|█▋        | 8536/50000 [12:37<58:31, 11.81it/s]

Epochs: 8533 | epoch avg. loss: 0.261 | test avg. loss: 6.894
Epochs: 8534 | epoch avg. loss: 0.201 | test avg. loss: 6.677
Epochs: 8535 | epoch avg. loss: 0.252 | test avg. loss: 7.238


 17%|█▋        | 8538/50000 [12:38<1:02:07, 11.12it/s]

Epochs: 8536 | epoch avg. loss: 0.347 | test avg. loss: 6.778
Epochs: 8537 | epoch avg. loss: 0.442 | test avg. loss: 7.264
Epochs: 8538 | epoch avg. loss: 0.475 | test avg. loss: 6.976


 17%|█▋        | 8542/50000 [12:38<56:13, 12.29it/s]

Epochs: 8539 | epoch avg. loss: 0.700 | test avg. loss: 7.347
Epochs: 8540 | epoch avg. loss: 0.658 | test avg. loss: 6.989
Epochs: 8541 | epoch avg. loss: 0.753 | test avg. loss: 7.429
Epochs: 8542 | epoch avg. loss: 0.603 | test avg. loss: 6.694


 17%|█▋        | 8546/50000 [12:38<52:53, 13.06it/s]

Epochs: 8543 | epoch avg. loss: 0.360 | test avg. loss: 6.787
Epochs: 8544 | epoch avg. loss: 0.294 | test avg. loss: 6.717
Epochs: 8545 | epoch avg. loss: 0.301 | test avg. loss: 6.729


 17%|█▋        | 8548/50000 [12:38<53:06, 13.01it/s]

Epochs: 8546 | epoch avg. loss: 0.253 | test avg. loss: 6.881
Epochs: 8547 | epoch avg. loss: 0.226 | test avg. loss: 6.709
Epochs: 8548 | epoch avg. loss: 0.295 | test avg. loss: 6.994


 17%|█▋        | 8552/50000 [12:39<55:52, 12.36it/s]

Epochs: 8549 | epoch avg. loss: 0.292 | test avg. loss: 6.848
Epochs: 8550 | epoch avg. loss: 0.381 | test avg. loss: 7.385
Epochs: 8551 | epoch avg. loss: 0.354 | test avg. loss: 7.013


 17%|█▋        | 8556/50000 [12:39<51:03, 13.53it/s]

Epochs: 8552 | epoch avg. loss: 0.584 | test avg. loss: 7.667
Epochs: 8553 | epoch avg. loss: 0.550 | test avg. loss: 7.008
Epochs: 8554 | epoch avg. loss: 0.564 | test avg. loss: 7.031
Epochs: 8555 | epoch avg. loss: 0.288 | test avg. loss: 6.744


 17%|█▋        | 8558/50000 [12:39<50:48, 13.59it/s]

Epochs: 8556 | epoch avg. loss: 0.306 | test avg. loss: 6.978
Epochs: 8557 | epoch avg. loss: 0.272 | test avg. loss: 6.851
Epochs: 8558 | epoch avg. loss: 0.155 | test avg. loss: 6.821


 17%|█▋        | 8562/50000 [12:39<54:54, 12.58it/s]

Epochs: 8559 | epoch avg. loss: 0.163 | test avg. loss: 7.192
Epochs: 8560 | epoch avg. loss: 0.274 | test avg. loss: 6.700
Epochs: 8561 | epoch avg. loss: 0.317 | test avg. loss: 6.718


 17%|█▋        | 8564/50000 [12:40<55:08, 12.52it/s]

Epochs: 8562 | epoch avg. loss: 0.160 | test avg. loss: 6.618
Epochs: 8563 | epoch avg. loss: 0.167 | test avg. loss: 6.692
Epochs: 8564 | epoch avg. loss: 0.165 | test avg. loss: 6.702


 17%|█▋        | 8568/50000 [12:40<55:32, 12.43it/s]

Epochs: 8565 | epoch avg. loss: 0.171 | test avg. loss: 6.870
Epochs: 8566 | epoch avg. loss: 0.192 | test avg. loss: 6.716
Epochs: 8567 | epoch avg. loss: 0.173 | test avg. loss: 6.846




Epochs: 8568 | epoch avg. loss: 0.202 | test avg. loss: 6.668
Epochs: 8569 | epoch avg. loss: 0.249 | test avg. loss: 6.882
Epochs: 8570 | epoch avg. loss: 0.213 | test avg. loss: 6.694


 17%|█▋        | 8574/50000 [12:40<55:35, 12.42it/s]

Epochs: 8571 | epoch avg. loss: 0.201 | test avg. loss: 6.881
Epochs: 8572 | epoch avg. loss: 0.233 | test avg. loss: 6.669
Epochs: 8573 | epoch avg. loss: 0.218 | test avg. loss: 6.677


 17%|█▋        | 8576/50000 [12:41<57:56, 11.92it/s]

Epochs: 8574 | epoch avg. loss: 0.157 | test avg. loss: 6.712
Epochs: 8575 | epoch avg. loss: 0.175 | test avg. loss: 6.787
Epochs: 8576 | epoch avg. loss: 0.185 | test avg. loss: 6.747


 17%|█▋        | 8580/50000 [12:41<53:50, 12.82it/s]

Epochs: 8577 | epoch avg. loss: 0.196 | test avg. loss: 6.751
Epochs: 8578 | epoch avg. loss: 0.179 | test avg. loss: 6.689
Epochs: 8579 | epoch avg. loss: 0.204 | test avg. loss: 6.974




Epochs: 8580 | epoch avg. loss: 0.226 | test avg. loss: 6.744
Epochs: 8581 | epoch avg. loss: 0.174 | test avg. loss: 6.776
Epochs: 8582 | epoch avg. loss: 0.161 | test avg. loss: 6.654


 17%|█▋        | 8586/50000 [12:41<52:41, 13.10it/s]

Epochs: 8583 | epoch avg. loss: 0.178 | test avg. loss: 6.672
Epochs: 8584 | epoch avg. loss: 0.165 | test avg. loss: 6.610
Epochs: 8585 | epoch avg. loss: 0.160 | test avg. loss: 6.650


 17%|█▋        | 8588/50000 [12:42<55:27, 12.45it/s]

Epochs: 8586 | epoch avg. loss: 0.161 | test avg. loss: 6.767
Epochs: 8587 | epoch avg. loss: 0.194 | test avg. loss: 6.756
Epochs: 8588 | epoch avg. loss: 0.148 | test avg. loss: 6.910


 17%|█▋        | 8592/50000 [12:42<58:00, 11.90it/s]

Epochs: 8589 | epoch avg. loss: 0.179 | test avg. loss: 6.675
Epochs: 8590 | epoch avg. loss: 0.245 | test avg. loss: 6.636
Epochs: 8591 | epoch avg. loss: 0.187 | test avg. loss: 6.764


 17%|█▋        | 8594/50000 [12:42<57:26, 12.01it/s]

Epochs: 8592 | epoch avg. loss: 0.217 | test avg. loss: 6.587
Epochs: 8593 | epoch avg. loss: 0.266 | test avg. loss: 6.960
Epochs: 8594 | epoch avg. loss: 0.330 | test avg. loss: 6.620


 17%|█▋        | 8598/50000 [12:42<58:48, 11.73it/s]

Epochs: 8595 | epoch avg. loss: 0.343 | test avg. loss: 7.025
Epochs: 8596 | epoch avg. loss: 0.338 | test avg. loss: 6.685
Epochs: 8597 | epoch avg. loss: 0.246 | test avg. loss: 7.080


 17%|█▋        | 8598/50000 [12:43<58:48, 11.73it/s]

Epochs: 8598 | epoch avg. loss: 0.305 | test avg. loss: 6.735
Epochs: 8599 | epoch avg. loss: 0.245 | test avg. loss: 6.790


 17%|█▋        | 8602/50000 [12:45<4:10:15,  2.76it/s]

Epochs: 8600 | epoch avg. loss: 0.205 | test avg. loss: 6.610
Epochs: 8601 | epoch avg. loss: 0.260 | test avg. loss: 6.983


 17%|█▋        | 8604/50000 [12:46<3:01:51,  3.79it/s]

Epochs: 8602 | epoch avg. loss: 0.337 | test avg. loss: 6.581
Epochs: 8603 | epoch avg. loss: 0.334 | test avg. loss: 6.900
Epochs: 8604 | epoch avg. loss: 0.277 | test avg. loss: 6.568




Epochs: 8605 | epoch avg. loss: 0.207 | test avg. loss: 6.764
Epochs: 8606 | epoch avg. loss: 0.199 | test avg. loss: 6.653
Epochs: 8607 | epoch avg. loss: 0.186 | test avg. loss: 6.742


 17%|█▋        | 8610/50000 [12:46<1:27:35,  7.88it/s]

Epochs: 8608 | epoch avg. loss: 0.153 | test avg. loss: 6.744
Epochs: 8609 | epoch avg. loss: 0.185 | test avg. loss: 6.688
Epochs: 8610 | epoch avg. loss: 0.160 | test avg. loss: 6.597


 17%|█▋        | 8614/50000 [12:46<1:11:50,  9.60it/s]

Epochs: 8611 | epoch avg. loss: 0.162 | test avg. loss: 6.609
Epochs: 8612 | epoch avg. loss: 0.154 | test avg. loss: 6.638
Epochs: 8613 | epoch avg. loss: 0.146 | test avg. loss: 6.647


 17%|█▋        | 8616/50000 [12:47<1:09:49,  9.88it/s]

Epochs: 8614 | epoch avg. loss: 0.174 | test avg. loss: 6.780
Epochs: 8615 | epoch avg. loss: 0.155 | test avg. loss: 6.731
Epochs: 8616 | epoch avg. loss: 0.157 | test avg. loss: 6.723


 17%|█▋        | 8620/50000 [12:47<1:02:08, 11.10it/s]

Epochs: 8617 | epoch avg. loss: 0.150 | test avg. loss: 6.628
Epochs: 8618 | epoch avg. loss: 0.172 | test avg. loss: 6.669
Epochs: 8619 | epoch avg. loss: 0.152 | test avg. loss: 6.595


 17%|█▋        | 8622/50000 [12:47<57:31, 11.99it/s]

Epochs: 8620 | epoch avg. loss: 0.146 | test avg. loss: 6.667
Epochs: 8621 | epoch avg. loss: 0.163 | test avg. loss: 6.570
Epochs: 8622 | epoch avg. loss: 0.219 | test avg. loss: 7.008


 17%|█▋        | 8626/50000 [12:47<55:05, 12.52it/s]

Epochs: 8623 | epoch avg. loss: 0.319 | test avg. loss: 6.626
Epochs: 8624 | epoch avg. loss: 0.194 | test avg. loss: 6.824
Epochs: 8625 | epoch avg. loss: 0.201 | test avg. loss: 6.689




Epochs: 8626 | epoch avg. loss: 0.325 | test avg. loss: 7.023
Epochs: 8627 | epoch avg. loss: 0.306 | test avg. loss: 6.617


 17%|█▋        | 8630/50000 [12:48<57:46, 11.93it/s]

Epochs: 8628 | epoch avg. loss: 0.292 | test avg. loss: 6.868
Epochs: 8629 | epoch avg. loss: 0.290 | test avg. loss: 6.576
Epochs: 8630 | epoch avg. loss: 0.157 | test avg. loss: 6.662


 17%|█▋        | 8634/50000 [12:48<53:29, 12.89it/s]

Epochs: 8631 | epoch avg. loss: 0.148 | test avg. loss: 6.622
Epochs: 8632 | epoch avg. loss: 0.156 | test avg. loss: 6.785
Epochs: 8633 | epoch avg. loss: 0.174 | test avg. loss: 6.668


 17%|█▋        | 8636/50000 [12:48<52:19, 13.17it/s]

Epochs: 8634 | epoch avg. loss: 0.153 | test avg. loss: 6.668
Epochs: 8635 | epoch avg. loss: 0.167 | test avg. loss: 6.796
Epochs: 8636 | epoch avg. loss: 0.196 | test avg. loss: 6.607
Epochs: 8637 | epoch avg. loss: 0.158 | test avg. loss: 6.558


 17%|█▋        | 8640/50000 [12:49<54:34, 12.63it/s]

Epochs: 8638 | epoch avg. loss: 0.175 | test avg. loss: 6.827
Epochs: 8639 | epoch avg. loss: 0.233 | test avg. loss: 6.618
Epochs: 8640 | epoch avg. loss: 0.157 | test avg. loss: 6.619


 17%|█▋        | 8644/50000 [12:49<57:49, 11.92it/s]  

Epochs: 8641 | epoch avg. loss: 0.150 | test avg. loss: 6.636
Epochs: 8642 | epoch avg. loss: 0.162 | test avg. loss: 6.578
Epochs: 8643 | epoch avg. loss: 0.186 | test avg. loss: 6.790


 17%|█▋        | 8646/50000 [12:49<56:07, 12.28it/s]

Epochs: 8644 | epoch avg. loss: 0.202 | test avg. loss: 6.546
Epochs: 8645 | epoch avg. loss: 0.198 | test avg. loss: 6.665
Epochs: 8646 | epoch avg. loss: 0.157 | test avg. loss: 6.581


 17%|█▋        | 8650/50000 [12:49<53:06, 12.98it/s]

Epochs: 8647 | epoch avg. loss: 0.175 | test avg. loss: 6.780
Epochs: 8648 | epoch avg. loss: 0.183 | test avg. loss: 6.608
Epochs: 8649 | epoch avg. loss: 0.190 | test avg. loss: 6.587


 17%|█▋        | 8652/50000 [12:49<51:54, 13.28it/s]

Epochs: 8650 | epoch avg. loss: 0.161 | test avg. loss: 6.699
Epochs: 8651 | epoch avg. loss: 0.161 | test avg. loss: 6.576
Epochs: 8652 | epoch avg. loss: 0.197 | test avg. loss: 6.564


 17%|█▋        | 8656/50000 [12:50<51:10, 13.47it/s]

Epochs: 8653 | epoch avg. loss: 0.217 | test avg. loss: 7.000
Epochs: 8654 | epoch avg. loss: 0.296 | test avg. loss: 6.604
Epochs: 8655 | epoch avg. loss: 0.296 | test avg. loss: 7.084
Epochs: 8656 | epoch avg. loss: 0.405 | test avg. loss: 6.545


 17%|█▋        | 8660/50000 [12:50<48:54, 14.09it/s]

Epochs: 8657 | epoch avg. loss: 0.223 | test avg. loss: 6.721
Epochs: 8658 | epoch avg. loss: 0.202 | test avg. loss: 6.606
Epochs: 8659 | epoch avg. loss: 0.162 | test avg. loss: 6.640


 17%|█▋        | 8664/50000 [12:50<47:46, 14.42it/s]

Epochs: 8660 | epoch avg. loss: 0.145 | test avg. loss: 6.603
Epochs: 8661 | epoch avg. loss: 0.151 | test avg. loss: 6.713
Epochs: 8662 | epoch avg. loss: 0.166 | test avg. loss: 6.491
Epochs: 8663 | epoch avg. loss: 0.187 | test avg. loss: 6.542


 17%|█▋        | 8666/50000 [12:50<50:30, 13.64it/s]

Epochs: 8664 | epoch avg. loss: 0.180 | test avg. loss: 6.634
Epochs: 8665 | epoch avg. loss: 0.179 | test avg. loss: 6.552
Epochs: 8666 | epoch avg. loss: 0.195 | test avg. loss: 6.793


 17%|█▋        | 8670/50000 [12:51<53:45, 12.81it/s]

Epochs: 8667 | epoch avg. loss: 0.238 | test avg. loss: 6.522
Epochs: 8668 | epoch avg. loss: 0.240 | test avg. loss: 6.784
Epochs: 8669 | epoch avg. loss: 0.253 | test avg. loss: 6.576


 17%|█▋        | 8672/50000 [12:51<56:30, 12.19it/s]

Epochs: 8670 | epoch avg. loss: 0.167 | test avg. loss: 6.721
Epochs: 8671 | epoch avg. loss: 0.186 | test avg. loss: 6.678
Epochs: 8672 | epoch avg. loss: 0.144 | test avg. loss: 6.722


 17%|█▋        | 8676/50000 [12:51<55:24, 12.43it/s]

Epochs: 8673 | epoch avg. loss: 0.149 | test avg. loss: 6.623
Epochs: 8674 | epoch avg. loss: 0.202 | test avg. loss: 7.294
Epochs: 8675 | epoch avg. loss: 0.459 | test avg. loss: 6.907


 17%|█▋        | 8678/50000 [12:52<57:17, 12.02it/s]

Epochs: 8676 | epoch avg. loss: 0.669 | test avg. loss: 7.467
Epochs: 8677 | epoch avg. loss: 0.598 | test avg. loss: 6.635
Epochs: 8678 | epoch avg. loss: 0.313 | test avg. loss: 7.122


 17%|█▋        | 8682/50000 [12:52<53:25, 12.89it/s]

Epochs: 8679 | epoch avg. loss: 0.393 | test avg. loss: 6.585
Epochs: 8680 | epoch avg. loss: 0.270 | test avg. loss: 6.745
Epochs: 8681 | epoch avg. loss: 0.309 | test avg. loss: 6.633


 17%|█▋        | 8684/50000 [12:52<51:41, 13.32it/s]

Epochs: 8682 | epoch avg. loss: 0.226 | test avg. loss: 6.688
Epochs: 8683 | epoch avg. loss: 0.191 | test avg. loss: 6.739
Epochs: 8684 | epoch avg. loss: 0.187 | test avg. loss: 6.556


 17%|█▋        | 8688/50000 [12:52<50:55, 13.52it/s]

Epochs: 8685 | epoch avg. loss: 0.168 | test avg. loss: 6.488
Epochs: 8686 | epoch avg. loss: 0.172 | test avg. loss: 6.544
Epochs: 8687 | epoch avg. loss: 0.204 | test avg. loss: 6.402


 17%|█▋        | 8690/50000 [12:52<51:03, 13.48it/s]

Epochs: 8688 | epoch avg. loss: 0.191 | test avg. loss: 6.845
Epochs: 8689 | epoch avg. loss: 0.260 | test avg. loss: 6.562
Epochs: 8690 | epoch avg. loss: 0.249 | test avg. loss: 6.677


 17%|█▋        | 8694/50000 [12:53<56:44, 12.13it/s]

Epochs: 8691 | epoch avg. loss: 0.142 | test avg. loss: 6.651
Epochs: 8692 | epoch avg. loss: 0.143 | test avg. loss: 6.729
Epochs: 8693 | epoch avg. loss: 0.179 | test avg. loss: 6.509


 17%|█▋        | 8696/50000 [12:53<55:55, 12.31it/s]

Epochs: 8694 | epoch avg. loss: 0.203 | test avg. loss: 6.457
Epochs: 8695 | epoch avg. loss: 0.167 | test avg. loss: 6.506
Epochs: 8696 | epoch avg. loss: 0.145 | test avg. loss: 6.545


 17%|█▋        | 8698/50000 [12:53<54:55, 12.53it/s]

Epochs: 8697 | epoch avg. loss: 0.149 | test avg. loss: 6.624
Epochs: 8698 | epoch avg. loss: 0.165 | test avg. loss: 6.566
Epochs: 8699 | epoch avg. loss: 0.220 | test avg. loss: 6.686


 17%|█▋        | 8702/50000 [12:55<2:29:49,  4.59it/s]

Epochs: 8700 | epoch avg. loss: 0.165 | test avg. loss: 6.548
Epochs: 8701 | epoch avg. loss: 0.141 | test avg. loss: 6.587
Epochs: 8702 | epoch avg. loss: 0.161 | test avg. loss: 6.468


 17%|█▋        | 8706/50000 [12:55<1:38:47,  6.97it/s]

Epochs: 8703 | epoch avg. loss: 0.142 | test avg. loss: 6.546
Epochs: 8704 | epoch avg. loss: 0.184 | test avg. loss: 6.589
Epochs: 8705 | epoch avg. loss: 0.152 | test avg. loss: 6.560


 17%|█▋        | 8708/50000 [12:55<1:23:45,  8.22it/s]

Epochs: 8706 | epoch avg. loss: 0.140 | test avg. loss: 6.528
Epochs: 8707 | epoch avg. loss: 0.142 | test avg. loss: 6.567
Epochs: 8708 | epoch avg. loss: 0.155 | test avg. loss: 6.452


 17%|█▋        | 8712/50000 [12:55<1:06:13, 10.39it/s]

Epochs: 8709 | epoch avg. loss: 0.231 | test avg. loss: 6.639
Epochs: 8710 | epoch avg. loss: 0.203 | test avg. loss: 6.419
Epochs: 8711 | epoch avg. loss: 0.156 | test avg. loss: 6.709


 17%|█▋        | 8714/50000 [12:56<1:08:33, 10.04it/s]

Epochs: 8712 | epoch avg. loss: 0.258 | test avg. loss: 6.482
Epochs: 8713 | epoch avg. loss: 0.277 | test avg. loss: 6.522
Epochs: 8714 | epoch avg. loss: 0.193 | test avg. loss: 6.587


 17%|█▋        | 8718/50000 [12:56<59:56, 11.48it/s]  

Epochs: 8715 | epoch avg. loss: 0.206 | test avg. loss: 6.536
Epochs: 8716 | epoch avg. loss: 0.212 | test avg. loss: 6.755
Epochs: 8717 | epoch avg. loss: 0.219 | test avg. loss: 6.553


 17%|█▋        | 8720/50000 [12:56<57:42, 11.92it/s]

Epochs: 8718 | epoch avg. loss: 0.246 | test avg. loss: 6.889
Epochs: 8719 | epoch avg. loss: 0.281 | test avg. loss: 6.605
Epochs: 8720 | epoch avg. loss: 0.312 | test avg. loss: 7.147


 17%|█▋        | 8724/50000 [12:56<53:26, 12.87it/s]

Epochs: 8721 | epoch avg. loss: 0.441 | test avg. loss: 6.577
Epochs: 8722 | epoch avg. loss: 0.431 | test avg. loss: 6.797
Epochs: 8723 | epoch avg. loss: 0.281 | test avg. loss: 6.474




Epochs: 8724 | epoch avg. loss: 0.245 | test avg. loss: 6.714
Epochs: 8725 | epoch avg. loss: 0.240 | test avg. loss: 6.464


 17%|█▋        | 8728/50000 [12:57<57:31, 11.96it/s]

Epochs: 8726 | epoch avg. loss: 0.327 | test avg. loss: 6.488
Epochs: 8727 | epoch avg. loss: 0.303 | test avg. loss: 6.412
Epochs: 8728 | epoch avg. loss: 0.254 | test avg. loss: 6.180


 17%|█▋        | 8732/50000 [12:57<54:01, 12.73it/s]

Epochs: 8729 | epoch avg. loss: 0.338 | test avg. loss: 7.019
Epochs: 8730 | epoch avg. loss: 0.553 | test avg. loss: 6.802
Epochs: 8731 | epoch avg. loss: 0.766 | test avg. loss: 7.334
Epochs: 8732 | epoch avg. loss: 0.467 | test avg. loss: 7.027


 17%|█▋        | 8736/50000 [12:57<48:47, 14.10it/s]

Epochs: 8733 | epoch avg. loss: 0.802 | test avg. loss: 7.404
Epochs: 8734 | epoch avg. loss: 0.577 | test avg. loss: 6.558
Epochs: 8735 | epoch avg. loss: 0.462 | test avg. loss: 6.761
Epochs: 8736 | epoch avg. loss: 0.302 | test avg. loss: 6.346


 17%|█▋        | 8738/50000 [12:57<49:06, 14.01it/s]

Epochs: 8737 | epoch avg. loss: 0.375 | test avg. loss: 6.771
Epochs: 8738 | epoch avg. loss: 0.500 | test avg. loss: 6.639


 17%|█▋        | 8742/50000 [12:58<1:00:28, 11.37it/s]

Epochs: 8739 | epoch avg. loss: 0.267 | test avg. loss: 6.764
Epochs: 8740 | epoch avg. loss: 0.253 | test avg. loss: 6.630
Epochs: 8741 | epoch avg. loss: 0.270 | test avg. loss: 6.598


 17%|█▋        | 8744/50000 [12:58<1:00:00, 11.46it/s]

Epochs: 8742 | epoch avg. loss: 0.175 | test avg. loss: 6.392
Epochs: 8743 | epoch avg. loss: 0.172 | test avg. loss: 6.581
Epochs: 8744 | epoch avg. loss: 0.232 | test avg. loss: 6.388


 17%|█▋        | 8748/50000 [12:58<53:01, 12.97it/s]

Epochs: 8745 | epoch avg. loss: 0.248 | test avg. loss: 7.267
Epochs: 8746 | epoch avg. loss: 0.636 | test avg. loss: 6.581
Epochs: 8747 | epoch avg. loss: 0.412 | test avg. loss: 7.594
Epochs: 8748 | epoch avg. loss: 0.676 | test avg. loss: 6.338


 18%|█▊        | 8752/50000 [12:59<56:36, 12.14it/s]

Epochs: 8749 | epoch avg. loss: 0.596 | test avg. loss: 6.359
Epochs: 8750 | epoch avg. loss: 0.218 | test avg. loss: 6.274
Epochs: 8751 | epoch avg. loss: 0.315 | test avg. loss: 6.754


 18%|█▊        | 8754/50000 [12:59<59:23, 11.58it/s]

Epochs: 8752 | epoch avg. loss: 0.660 | test avg. loss: 6.545
Epochs: 8753 | epoch avg. loss: 0.361 | test avg. loss: 6.817
Epochs: 8754 | epoch avg. loss: 0.580 | test avg. loss: 6.754


 18%|█▊        | 8758/50000 [12:59<55:16, 12.43it/s]

Epochs: 8755 | epoch avg. loss: 0.307 | test avg. loss: 6.368
Epochs: 8756 | epoch avg. loss: 0.370 | test avg. loss: 6.729
Epochs: 8757 | epoch avg. loss: 0.495 | test avg. loss: 6.361


 18%|█▊        | 8760/50000 [12:59<56:11, 12.23it/s]

Epochs: 8758 | epoch avg. loss: 0.404 | test avg. loss: 6.637
Epochs: 8759 | epoch avg. loss: 0.381 | test avg. loss: 6.503
Epochs: 8760 | epoch avg. loss: 0.603 | test avg. loss: 7.355


 18%|█▊        | 8764/50000 [13:00<58:33, 11.74it/s]

Epochs: 8761 | epoch avg. loss: 0.665 | test avg. loss: 6.481
Epochs: 8762 | epoch avg. loss: 0.425 | test avg. loss: 7.737
Epochs: 8763 | epoch avg. loss: 0.690 | test avg. loss: 6.839


 18%|█▊        | 8766/50000 [13:00<57:54, 11.87it/s]

Epochs: 8764 | epoch avg. loss: 0.709 | test avg. loss: 8.160
Epochs: 8765 | epoch avg. loss: 1.201 | test avg. loss: 6.628
Epochs: 8766 | epoch avg. loss: 0.560 | test avg. loss: 7.291


 18%|█▊        | 8770/50000 [13:00<52:08, 13.18it/s]

Epochs: 8767 | epoch avg. loss: 0.558 | test avg. loss: 6.390
Epochs: 8768 | epoch avg. loss: 0.635 | test avg. loss: 6.600
Epochs: 8769 | epoch avg. loss: 0.505 | test avg. loss: 6.322
Epochs: 8770 | epoch avg. loss: 0.696 | test avg. loss: 7.000


 18%|█▊        | 8774/50000 [13:00<47:32, 14.45it/s]

Epochs: 8771 | epoch avg. loss: 0.496 | test avg. loss: 6.742
Epochs: 8772 | epoch avg. loss: 0.742 | test avg. loss: 7.135
Epochs: 8773 | epoch avg. loss: 0.581 | test avg. loss: 6.367


 18%|█▊        | 8776/50000 [13:01<52:29, 13.09it/s]

Epochs: 8774 | epoch avg. loss: 0.495 | test avg. loss: 6.332
Epochs: 8775 | epoch avg. loss: 0.320 | test avg. loss: 6.039
Epochs: 8776 | epoch avg. loss: 0.461 | test avg. loss: 6.156


 18%|█▊        | 8780/50000 [13:01<51:33, 13.33it/s]

Epochs: 8777 | epoch avg. loss: 0.288 | test avg. loss: 6.269
Epochs: 8778 | epoch avg. loss: 0.254 | test avg. loss: 6.631
Epochs: 8779 | epoch avg. loss: 0.158 | test avg. loss: 6.601


 18%|█▊        | 8784/50000 [13:01<49:39, 13.84it/s]

Epochs: 8780 | epoch avg. loss: 0.261 | test avg. loss: 6.950
Epochs: 8781 | epoch avg. loss: 0.514 | test avg. loss: 6.301
Epochs: 8782 | epoch avg. loss: 0.298 | test avg. loss: 6.234
Epochs: 8783 | epoch avg. loss: 0.234 | test avg. loss: 6.006


 18%|█▊        | 8788/50000 [13:01<47:16, 14.53it/s]

Epochs: 8784 | epoch avg. loss: 0.228 | test avg. loss: 6.186
Epochs: 8785 | epoch avg. loss: 0.209 | test avg. loss: 6.024
Epochs: 8786 | epoch avg. loss: 0.268 | test avg. loss: 6.090
Epochs: 8787 | epoch avg. loss: 0.262 | test avg. loss: 6.532


 18%|█▊        | 8790/50000 [13:02<53:24, 12.86it/s]

Epochs: 8788 | epoch avg. loss: 0.327 | test avg. loss: 6.236
Epochs: 8789 | epoch avg. loss: 0.287 | test avg. loss: 7.021
Epochs: 8790 | epoch avg. loss: 0.626 | test avg. loss: 6.127


                                                    

Epochs: 8791 | epoch avg. loss: 0.325 | test avg. loss: 6.386
Epochs: 8792 | epoch avg. loss: 0.258 | test avg. loss: 6.010
Epochs: 8793 | epoch avg. loss: 0.396 | test avg. loss: 6.497


 18%|█▊        | 8796/50000 [13:02<52:00, 13.20it/s]

Epochs: 8794 | epoch avg. loss: 0.377 | test avg. loss: 6.009
Epochs: 8795 | epoch avg. loss: 0.202 | test avg. loss: 6.160
Epochs: 8796 | epoch avg. loss: 0.253 | test avg. loss: 5.957


 18%|█▊        | 8798/50000 [13:02<56:19, 12.19it/s]

Epochs: 8797 | epoch avg. loss: 0.250 | test avg. loss: 6.066
Epochs: 8798 | epoch avg. loss: 0.240 | test avg. loss: 6.547
Epochs: 8799 | epoch avg. loss: 0.312 | test avg. loss: 6.323


 18%|█▊        | 8802/50000 [13:04<2:47:19,  4.10it/s]

Epochs: 8800 | epoch avg. loss: 0.451 | test avg. loss: 7.077
Epochs: 8801 | epoch avg. loss: 0.499 | test avg. loss: 6.348
Epochs: 8802 | epoch avg. loss: 0.521 | test avg. loss: 6.686


 18%|█▊        | 8806/50000 [13:04<1:50:14,  6.23it/s]

Epochs: 8803 | epoch avg. loss: 0.524 | test avg. loss: 5.895
Epochs: 8804 | epoch avg. loss: 0.204 | test avg. loss: 6.425
Epochs: 8805 | epoch avg. loss: 0.386 | test avg. loss: 5.789


 18%|█▊        | 8808/50000 [13:05<1:31:48,  7.48it/s]

Epochs: 8806 | epoch avg. loss: 0.325 | test avg. loss: 6.408
Epochs: 8807 | epoch avg. loss: 0.430 | test avg. loss: 5.754
Epochs: 8808 | epoch avg. loss: 0.418 | test avg. loss: 5.994


 18%|█▊        | 8812/50000 [13:05<1:11:10,  9.65it/s]

Epochs: 8809 | epoch avg. loss: 0.372 | test avg. loss: 5.812
Epochs: 8810 | epoch avg. loss: 0.204 | test avg. loss: 6.318
Epochs: 8811 | epoch avg. loss: 0.429 | test avg. loss: 5.865


 18%|█▊        | 8814/50000 [13:05<1:05:00, 10.56it/s]

Epochs: 8812 | epoch avg. loss: 0.324 | test avg. loss: 5.816
Epochs: 8813 | epoch avg. loss: 0.334 | test avg. loss: 6.026
Epochs: 8814 | epoch avg. loss: 0.270 | test avg. loss: 5.805


 18%|█▊        | 8818/50000 [13:05<1:01:30, 11.16it/s]

Epochs: 8815 | epoch avg. loss: 0.189 | test avg. loss: 5.925
Epochs: 8816 | epoch avg. loss: 0.209 | test avg. loss: 6.012
Epochs: 8817 | epoch avg. loss: 0.208 | test avg. loss: 6.380


 18%|█▊        | 8820/50000 [13:06<1:01:01, 11.25it/s]

Epochs: 8818 | epoch avg. loss: 0.237 | test avg. loss: 6.183
Epochs: 8819 | epoch avg. loss: 0.333 | test avg. loss: 6.792
Epochs: 8820 | epoch avg. loss: 0.513 | test avg. loss: 6.221


 18%|█▊        | 8824/50000 [13:06<1:02:11, 11.04it/s]

Epochs: 8821 | epoch avg. loss: 0.506 | test avg. loss: 7.266
Epochs: 8822 | epoch avg. loss: 0.869 | test avg. loss: 6.703
Epochs: 8823 | epoch avg. loss: 0.882 | test avg. loss: 8.274


 18%|█▊        | 8826/50000 [13:06<1:00:30, 11.34it/s]

Epochs: 8824 | epoch avg. loss: 1.190 | test avg. loss: 6.901
Epochs: 8825 | epoch avg. loss: 1.684 | test avg. loss: 6.812
Epochs: 8826 | epoch avg. loss: 0.601 | test avg. loss: 6.147


 18%|█▊        | 8830/50000 [13:06<1:00:46, 11.29it/s]

Epochs: 8827 | epoch avg. loss: 0.709 | test avg. loss: 6.572
Epochs: 8828 | epoch avg. loss: 1.049 | test avg. loss: 5.706
Epochs: 8829 | epoch avg. loss: 0.529 | test avg. loss: 6.169


 18%|█▊        | 8834/50000 [13:07<53:37, 12.79it/s]

Epochs: 8830 | epoch avg. loss: 0.378 | test avg. loss: 6.087
Epochs: 8831 | epoch avg. loss: 0.227 | test avg. loss: 6.379
Epochs: 8832 | epoch avg. loss: 0.290 | test avg. loss: 6.238
Epochs: 8833 | epoch avg. loss: 0.325 | test avg. loss: 6.318


 18%|█▊        | 8838/50000 [13:07<48:23, 14.18it/s]

Epochs: 8834 | epoch avg. loss: 0.212 | test avg. loss: 6.329
Epochs: 8835 | epoch avg. loss: 0.235 | test avg. loss: 6.266
Epochs: 8836 | epoch avg. loss: 0.192 | test avg. loss: 6.308
Epochs: 8837 | epoch avg. loss: 0.222 | test avg. loss: 6.195


 18%|█▊        | 8842/50000 [13:07<45:41, 15.01it/s]

Epochs: 8838 | epoch avg. loss: 0.241 | test avg. loss: 6.550
Epochs: 8839 | epoch avg. loss: 0.280 | test avg. loss: 6.315
Epochs: 8840 | epoch avg. loss: 0.399 | test avg. loss: 6.588
Epochs: 8841 | epoch avg. loss: 0.277 | test avg. loss: 6.439


 18%|█▊        | 8844/50000 [13:07<45:14, 15.16it/s]

Epochs: 8842 | epoch avg. loss: 0.327 | test avg. loss: 6.831
Epochs: 8843 | epoch avg. loss: 0.291 | test avg. loss: 6.408
Epochs: 8844 | epoch avg. loss: 0.327 | test avg. loss: 7.264


 18%|█▊        | 8848/50000 [13:08<48:58, 14.00it/s]

Epochs: 8845 | epoch avg. loss: 0.536 | test avg. loss: 6.497
Epochs: 8846 | epoch avg. loss: 0.503 | test avg. loss: 6.718
Epochs: 8847 | epoch avg. loss: 0.360 | test avg. loss: 6.376


 18%|█▊        | 8850/50000 [13:08<47:28, 14.45it/s]

Epochs: 8848 | epoch avg. loss: 0.277 | test avg. loss: 6.972
Epochs: 8849 | epoch avg. loss: 0.870 | test avg. loss: 6.248
Epochs: 8850 | epoch avg. loss: 0.638 | test avg. loss: 6.673


 18%|█▊        | 8854/50000 [13:08<48:43, 14.08it/s]

Epochs: 8851 | epoch avg. loss: 0.647 | test avg. loss: 6.360
Epochs: 8852 | epoch avg. loss: 0.278 | test avg. loss: 6.818
Epochs: 8853 | epoch avg. loss: 0.818 | test avg. loss: 6.410
Epochs: 8854 | epoch avg. loss: 0.429 | test avg. loss: 6.880


 18%|█▊        | 8858/50000 [13:08<46:51, 14.63it/s]

Epochs: 8855 | epoch avg. loss: 0.909 | test avg. loss: 6.605
Epochs: 8856 | epoch avg. loss: 0.654 | test avg. loss: 6.457
Epochs: 8857 | epoch avg. loss: 0.508 | test avg. loss: 6.495


 18%|█▊        | 8860/50000 [13:08<50:34, 13.56it/s]

Epochs: 8858 | epoch avg. loss: 0.493 | test avg. loss: 5.840
Epochs: 8859 | epoch avg. loss: 0.590 | test avg. loss: 6.990
Epochs: 8860 | epoch avg. loss: 0.799 | test avg. loss: 6.306


 18%|█▊        | 8864/50000 [13:09<54:07, 12.67it/s]

Epochs: 8861 | epoch avg. loss: 0.646 | test avg. loss: 6.926
Epochs: 8862 | epoch avg. loss: 0.506 | test avg. loss: 6.608
Epochs: 8863 | epoch avg. loss: 0.453 | test avg. loss: 6.669




Epochs: 8864 | epoch avg. loss: 0.189 | test avg. loss: 6.433
Epochs: 8865 | epoch avg. loss: 0.329 | test avg. loss: 6.425


 18%|█▊        | 8868/50000 [13:09<59:06, 11.60it/s]

Epochs: 8866 | epoch avg. loss: 0.285 | test avg. loss: 5.885
Epochs: 8867 | epoch avg. loss: 0.162 | test avg. loss: 6.092
Epochs: 8868 | epoch avg. loss: 0.234 | test avg. loss: 5.771


 18%|█▊        | 8870/50000 [13:09<57:56, 11.83it/s]

Epochs: 8869 | epoch avg. loss: 0.251 | test avg. loss: 6.024
Epochs: 8870 | epoch avg. loss: 0.163 | test avg. loss: 6.015


 18%|█▊        | 8874/50000 [13:10<1:03:06, 10.86it/s]

Epochs: 8871 | epoch avg. loss: 0.168 | test avg. loss: 6.893
Epochs: 8872 | epoch avg. loss: 0.430 | test avg. loss: 6.320
Epochs: 8873 | epoch avg. loss: 0.360 | test avg. loss: 6.720


 18%|█▊        | 8876/50000 [13:10<1:03:22, 10.82it/s]

Epochs: 8874 | epoch avg. loss: 0.432 | test avg. loss: 6.082
Epochs: 8875 | epoch avg. loss: 0.340 | test avg. loss: 5.965
Epochs: 8876 | epoch avg. loss: 0.176 | test avg. loss: 6.019


 18%|█▊        | 8880/50000 [13:10<1:00:22, 11.35it/s]

Epochs: 8877 | epoch avg. loss: 0.137 | test avg. loss: 6.207
Epochs: 8878 | epoch avg. loss: 0.187 | test avg. loss: 6.181
Epochs: 8879 | epoch avg. loss: 0.232 | test avg. loss: 6.232


 18%|█▊        | 8882/50000 [13:10<58:55, 11.63it/s]

Epochs: 8880 | epoch avg. loss: 0.219 | test avg. loss: 6.651
Epochs: 8881 | epoch avg. loss: 0.325 | test avg. loss: 6.699
Epochs: 8882 | epoch avg. loss: 0.603 | test avg. loss: 7.557


 18%|█▊        | 8886/50000 [13:11<1:00:41, 11.29it/s]

Epochs: 8883 | epoch avg. loss: 0.794 | test avg. loss: 6.636
Epochs: 8884 | epoch avg. loss: 0.625 | test avg. loss: 7.829
Epochs: 8885 | epoch avg. loss: 1.034 | test avg. loss: 6.696


 18%|█▊        | 8890/50000 [13:11<52:57, 12.94it/s]

Epochs: 8886 | epoch avg. loss: 0.881 | test avg. loss: 7.817
Epochs: 8887 | epoch avg. loss: 1.205 | test avg. loss: 6.567
Epochs: 8888 | epoch avg. loss: 1.106 | test avg. loss: 6.729
Epochs: 8889 | epoch avg. loss: 0.445 | test avg. loss: 6.343


 18%|█▊        | 8894/50000 [13:11<47:44, 14.35it/s]

Epochs: 8890 | epoch avg. loss: 0.664 | test avg. loss: 6.724
Epochs: 8891 | epoch avg. loss: 0.465 | test avg. loss: 6.685
Epochs: 8892 | epoch avg. loss: 0.665 | test avg. loss: 6.963
Epochs: 8893 | epoch avg. loss: 0.809 | test avg. loss: 6.170


 18%|█▊        | 8896/50000 [13:12<51:05, 13.41it/s]

Epochs: 8894 | epoch avg. loss: 0.576 | test avg. loss: 6.026
Epochs: 8895 | epoch avg. loss: 0.350 | test avg. loss: 6.160
Epochs: 8896 | epoch avg. loss: 0.320 | test avg. loss: 5.959


 18%|█▊        | 8898/50000 [13:12<54:30, 12.57it/s]

Epochs: 8897 | epoch avg. loss: 0.324 | test avg. loss: 6.351
Epochs: 8898 | epoch avg. loss: 0.333 | test avg. loss: 6.027
Epochs: 8899 | epoch avg. loss: 0.274 | test avg. loss: 6.745


 18%|█▊        | 8903/50000 [13:14<2:19:32,  4.91it/s]

Epochs: 8900 | epoch avg. loss: 0.370 | test avg. loss: 6.525
Epochs: 8901 | epoch avg. loss: 0.487 | test avg. loss: 7.531
Epochs: 8902 | epoch avg. loss: 0.599 | test avg. loss: 7.238


 18%|█▊        | 8905/50000 [13:14<1:48:59,  6.28it/s]

Epochs: 8903 | epoch avg. loss: 0.990 | test avg. loss: 8.348
Epochs: 8904 | epoch avg. loss: 1.118 | test avg. loss: 7.153
Epochs: 8905 | epoch avg. loss: 1.274 | test avg. loss: 8.747


 18%|█▊        | 8909/50000 [13:14<1:16:07,  9.00it/s]

Epochs: 8906 | epoch avg. loss: 1.877 | test avg. loss: 7.159
Epochs: 8907 | epoch avg. loss: 1.562 | test avg. loss: 9.194
Epochs: 8908 | epoch avg. loss: 2.812 | test avg. loss: 8.671


 18%|█▊        | 8911/50000 [13:14<1:10:41,  9.69it/s]

Epochs: 8909 | epoch avg. loss: 2.717 | test avg. loss: 12.544
Epochs: 8910 | epoch avg. loss: 4.061 | test avg. loss: 9.080
Epochs: 8911 | epoch avg. loss: 3.546 | test avg. loss: 10.433


                                                      

Epochs: 8912 | epoch avg. loss: 2.997 | test avg. loss: 8.522
Epochs: 8913 | epoch avg. loss: 4.213 | test avg. loss: 11.315


 18%|█▊        | 8917/50000 [13:15<1:02:33, 10.94it/s]

Epochs: 8914 | epoch avg. loss: 4.372 | test avg. loss: 8.951
Epochs: 8915 | epoch avg. loss: 4.428 | test avg. loss: 9.071
Epochs: 8916 | epoch avg. loss: 2.768 | test avg. loss: 6.779


                                                    

Epochs: 8917 | epoch avg. loss: 1.549 | test avg. loss: 8.939
Epochs: 8918 | epoch avg. loss: 1.774 | test avg. loss: 8.172
Epochs: 8919 | epoch avg. loss: 2.759 | test avg. loss: 9.018


 18%|█▊        | 8923/50000 [13:15<50:57, 13.44it/s]

Epochs: 8920 | epoch avg. loss: 2.294 | test avg. loss: 6.723
Epochs: 8921 | epoch avg. loss: 1.745 | test avg. loss: 9.387
Epochs: 8922 | epoch avg. loss: 2.771 | test avg. loss: 8.696
Epochs: 8923 | epoch avg. loss: 2.866 | test avg. loss: 10.245


 18%|█▊        | 8927/50000 [13:15<47:28, 14.42it/s]

Epochs: 8924 | epoch avg. loss: 2.784 | test avg. loss: 9.090
Epochs: 8925 | epoch avg. loss: 2.701 | test avg. loss: 9.936
Epochs: 8926 | epoch avg. loss: 2.671 | test avg. loss: 8.137
Epochs: 8927 | epoch avg. loss: 2.221 | test avg. loss: 9.633


 18%|█▊        | 8931/50000 [13:16<52:21, 13.07it/s]

Epochs: 8928 | epoch avg. loss: 2.373 | test avg. loss: 8.085
Epochs: 8929 | epoch avg. loss: 2.946 | test avg. loss: 9.742
Epochs: 8930 | epoch avg. loss: 2.575 | test avg. loss: 7.725


 18%|█▊        | 8935/50000 [13:16<48:37, 14.07it/s]

Epochs: 8931 | epoch avg. loss: 2.742 | test avg. loss: 9.374
Epochs: 8932 | epoch avg. loss: 2.400 | test avg. loss: 6.394
Epochs: 8933 | epoch avg. loss: 1.272 | test avg. loss: 7.885
Epochs: 8934 | epoch avg. loss: 1.117 | test avg. loss: 6.750


 18%|█▊        | 8937/50000 [13:16<48:18, 14.17it/s]

Epochs: 8935 | epoch avg. loss: 1.677 | test avg. loss: 7.928
Epochs: 8936 | epoch avg. loss: 1.474 | test avg. loss: 5.926
Epochs: 8937 | epoch avg. loss: 0.971 | test avg. loss: 6.446


 18%|█▊        | 8941/50000 [13:16<50:41, 13.50it/s]

Epochs: 8938 | epoch avg. loss: 0.698 | test avg. loss: 5.735
Epochs: 8939 | epoch avg. loss: 0.670 | test avg. loss: 6.161
Epochs: 8940 | epoch avg. loss: 0.414 | test avg. loss: 5.701


 18%|█▊        | 8943/50000 [13:17<56:56, 12.02it/s]

Epochs: 8941 | epoch avg. loss: 0.370 | test avg. loss: 5.874
Epochs: 8942 | epoch avg. loss: 0.261 | test avg. loss: 5.685
Epochs: 8943 | epoch avg. loss: 0.262 | test avg. loss: 5.533


 18%|█▊        | 8947/50000 [13:17<59:12, 11.56it/s]

Epochs: 8944 | epoch avg. loss: 0.162 | test avg. loss: 5.947
Epochs: 8945 | epoch avg. loss: 0.315 | test avg. loss: 5.609
Epochs: 8946 | epoch avg. loss: 0.214 | test avg. loss: 5.994


 18%|█▊        | 8949/50000 [13:17<1:00:04, 11.39it/s]

Epochs: 8947 | epoch avg. loss: 0.208 | test avg. loss: 5.751
Epochs: 8948 | epoch avg. loss: 0.278 | test avg. loss: 6.252
Epochs: 8949 | epoch avg. loss: 0.326 | test avg. loss: 5.740


 18%|█▊        | 8953/50000 [13:17<1:01:50, 11.06it/s]

Epochs: 8950 | epoch avg. loss: 0.429 | test avg. loss: 6.081
Epochs: 8951 | epoch avg. loss: 0.304 | test avg. loss: 5.564
Epochs: 8952 | epoch avg. loss: 0.346 | test avg. loss: 5.645


 18%|█▊        | 8955/50000 [13:18<1:03:55, 10.70it/s]

Epochs: 8953 | epoch avg. loss: 0.150 | test avg. loss: 5.575
Epochs: 8954 | epoch avg. loss: 0.162 | test avg. loss: 5.811
Epochs: 8955 | epoch avg. loss: 0.166 | test avg. loss: 5.679


 18%|█▊        | 8959/50000 [13:18<1:01:33, 11.11it/s]

Epochs: 8956 | epoch avg. loss: 0.150 | test avg. loss: 5.716
Epochs: 8957 | epoch avg. loss: 0.171 | test avg. loss: 5.690
Epochs: 8958 | epoch avg. loss: 0.178 | test avg. loss: 5.912


 18%|█▊        | 8961/50000 [13:18<59:38, 11.47it/s]

Epochs: 8959 | epoch avg. loss: 0.207 | test avg. loss: 5.683
Epochs: 8960 | epoch avg. loss: 0.129 | test avg. loss: 5.626
Epochs: 8961 | epoch avg. loss: 0.136 | test avg. loss: 5.718


 18%|█▊        | 8965/50000 [13:18<56:42, 12.06it/s]

Epochs: 8962 | epoch avg. loss: 0.138 | test avg. loss: 5.604
Epochs: 8963 | epoch avg. loss: 0.132 | test avg. loss: 5.756
Epochs: 8964 | epoch avg. loss: 0.153 | test avg. loss: 5.641


 18%|█▊        | 8967/50000 [13:19<54:02, 12.65it/s]

Epochs: 8965 | epoch avg. loss: 0.150 | test avg. loss: 5.726
Epochs: 8966 | epoch avg. loss: 0.139 | test avg. loss: 5.725
Epochs: 8967 | epoch avg. loss: 0.136 | test avg. loss: 5.710


 18%|█▊        | 8971/50000 [13:19<54:43, 12.50it/s]

Epochs: 8968 | epoch avg. loss: 0.137 | test avg. loss: 5.602
Epochs: 8969 | epoch avg. loss: 0.153 | test avg. loss: 5.830
Epochs: 8970 | epoch avg. loss: 0.197 | test avg. loss: 5.620


 18%|█▊        | 8973/50000 [13:19<56:19, 12.14it/s]

Epochs: 8971 | epoch avg. loss: 0.156 | test avg. loss: 5.785
Epochs: 8972 | epoch avg. loss: 0.148 | test avg. loss: 5.659
Epochs: 8973 | epoch avg. loss: 0.176 | test avg. loss: 6.013


 18%|█▊        | 8975/50000 [13:19<55:40, 12.28it/s]

Epochs: 8974 | epoch avg. loss: 0.260 | test avg. loss: 5.727
Epochs: 8975 | epoch avg. loss: 0.361 | test avg. loss: 6.096


 18%|█▊        | 8979/50000 [13:20<58:38, 11.66it/s]  

Epochs: 8976 | epoch avg. loss: 0.316 | test avg. loss: 5.686
Epochs: 8977 | epoch avg. loss: 0.372 | test avg. loss: 6.208
Epochs: 8978 | epoch avg. loss: 0.433 | test avg. loss: 5.719


 18%|█▊        | 8981/50000 [13:20<54:50, 12.47it/s]

Epochs: 8979 | epoch avg. loss: 0.467 | test avg. loss: 6.276
Epochs: 8980 | epoch avg. loss: 0.487 | test avg. loss: 5.578
Epochs: 8981 | epoch avg. loss: 0.300 | test avg. loss: 6.131


 18%|█▊        | 8985/50000 [13:20<58:12, 11.74it/s]

Epochs: 8982 | epoch avg. loss: 0.388 | test avg. loss: 5.717
Epochs: 8983 | epoch avg. loss: 0.399 | test avg. loss: 6.191
Epochs: 8984 | epoch avg. loss: 0.400 | test avg. loss: 5.698


 18%|█▊        | 8987/50000 [13:20<55:30, 12.32it/s]

Epochs: 8985 | epoch avg. loss: 0.327 | test avg. loss: 6.311
Epochs: 8986 | epoch avg. loss: 0.442 | test avg. loss: 5.788
Epochs: 8987 | epoch avg. loss: 0.422 | test avg. loss: 5.927


 18%|█▊        | 8991/50000 [13:21<55:13, 12.38it/s]

Epochs: 8988 | epoch avg. loss: 0.230 | test avg. loss: 5.598
Epochs: 8989 | epoch avg. loss: 0.163 | test avg. loss: 5.789
Epochs: 8990 | epoch avg. loss: 0.191 | test avg. loss: 5.621


 18%|█▊        | 8993/50000 [13:21<55:02, 12.42it/s]

Epochs: 8991 | epoch avg. loss: 0.229 | test avg. loss: 5.828
Epochs: 8992 | epoch avg. loss: 0.194 | test avg. loss: 5.645
Epochs: 8993 | epoch avg. loss: 0.212 | test avg. loss: 5.865


 18%|█▊        | 8997/50000 [13:21<57:06, 11.97it/s]

Epochs: 8994 | epoch avg. loss: 0.189 | test avg. loss: 5.669
Epochs: 8995 | epoch avg. loss: 0.174 | test avg. loss: 5.924
Epochs: 8996 | epoch avg. loss: 0.196 | test avg. loss: 5.666


 18%|█▊        | 8999/50000 [13:21<54:48, 12.47it/s]

Epochs: 8997 | epoch avg. loss: 0.177 | test avg. loss: 5.691
Epochs: 8998 | epoch avg. loss: 0.123 | test avg. loss: 5.645
Epochs: 8999 | epoch avg. loss: 0.131 | test avg. loss: 5.724


 18%|█▊        | 9003/50000 [13:23<2:33:27,  4.45it/s]

Epochs: 9000 | epoch avg. loss: 0.142 | test avg. loss: 5.600
Epochs: 9001 | epoch avg. loss: 0.133 | test avg. loss: 5.796
Epochs: 9002 | epoch avg. loss: 0.159 | test avg. loss: 5.632


 18%|█▊        | 9005/50000 [13:23<2:03:11,  5.55it/s]

Epochs: 9003 | epoch avg. loss: 0.142 | test avg. loss: 5.717
Epochs: 9004 | epoch avg. loss: 0.123 | test avg. loss: 5.642
Epochs: 9005 | epoch avg. loss: 0.133 | test avg. loss: 5.633


 18%|█▊        | 9009/50000 [13:23<1:31:19,  7.48it/s]

Epochs: 9006 | epoch avg. loss: 0.127 | test avg. loss: 5.698
Epochs: 9007 | epoch avg. loss: 0.127 | test avg. loss: 5.631
Epochs: 9008 | epoch avg. loss: 0.129 | test avg. loss: 5.651


 18%|█▊        | 9011/50000 [13:24<1:20:51,  8.45it/s]

Epochs: 9009 | epoch avg. loss: 0.120 | test avg. loss: 5.654
Epochs: 9010 | epoch avg. loss: 0.122 | test avg. loss: 5.702
Epochs: 9011 | epoch avg. loss: 0.127 | test avg. loss: 5.648


 18%|█▊        | 9015/50000 [13:24<1:04:15, 10.63it/s]

Epochs: 9012 | epoch avg. loss: 0.123 | test avg. loss: 5.626
Epochs: 9013 | epoch avg. loss: 0.128 | test avg. loss: 5.766
Epochs: 9014 | epoch avg. loss: 0.146 | test avg. loss: 5.598


 18%|█▊        | 9017/50000 [13:24<1:00:08, 11.36it/s]

Epochs: 9015 | epoch avg. loss: 0.174 | test avg. loss: 5.839
Epochs: 9016 | epoch avg. loss: 0.181 | test avg. loss: 5.637
Epochs: 9017 | epoch avg. loss: 0.237 | test avg. loss: 6.084


 18%|█▊        | 9021/50000 [13:24<54:31, 12.53it/s]

Epochs: 9018 | epoch avg. loss: 0.357 | test avg. loss: 5.599
Epochs: 9019 | epoch avg. loss: 0.166 | test avg. loss: 5.744
Epochs: 9020 | epoch avg. loss: 0.157 | test avg. loss: 5.655


 18%|█▊        | 9023/50000 [13:25<58:43, 11.63it/s]

Epochs: 9021 | epoch avg. loss: 0.138 | test avg. loss: 5.610
Epochs: 9022 | epoch avg. loss: 0.137 | test avg. loss: 5.665
Epochs: 9023 | epoch avg. loss: 0.132 | test avg. loss: 5.655


 18%|█▊        | 9027/50000 [13:25<58:10, 11.74it/s]

Epochs: 9024 | epoch avg. loss: 0.125 | test avg. loss: 5.584
Epochs: 9025 | epoch avg. loss: 0.118 | test avg. loss: 5.652
Epochs: 9026 | epoch avg. loss: 0.125 | test avg. loss: 5.581


 18%|█▊        | 9029/50000 [13:25<58:51, 11.60it/s]

Epochs: 9027 | epoch avg. loss: 0.162 | test avg. loss: 5.958
Epochs: 9028 | epoch avg. loss: 0.227 | test avg. loss: 5.665
Epochs: 9029 | epoch avg. loss: 0.284 | test avg. loss: 5.870


 18%|█▊        | 9033/50000 [13:25<51:09, 13.35it/s]

Epochs: 9030 | epoch avg. loss: 0.177 | test avg. loss: 5.593
Epochs: 9031 | epoch avg. loss: 0.145 | test avg. loss: 5.815
Epochs: 9032 | epoch avg. loss: 0.186 | test avg. loss: 5.572


 18%|█▊        | 9035/50000 [13:26<52:31, 13.00it/s]

Epochs: 9033 | epoch avg. loss: 0.156 | test avg. loss: 5.646
Epochs: 9034 | epoch avg. loss: 0.144 | test avg. loss: 5.647
Epochs: 9035 | epoch avg. loss: 0.116 | test avg. loss: 5.755


 18%|█▊        | 9039/50000 [13:26<52:09, 13.09it/s]

Epochs: 9036 | epoch avg. loss: 0.142 | test avg. loss: 5.644
Epochs: 9037 | epoch avg. loss: 0.119 | test avg. loss: 5.774
Epochs: 9038 | epoch avg. loss: 0.128 | test avg. loss: 5.629


 18%|█▊        | 9041/50000 [13:26<52:39, 12.96it/s]

Epochs: 9039 | epoch avg. loss: 0.166 | test avg. loss: 5.740
Epochs: 9040 | epoch avg. loss: 0.130 | test avg. loss: 5.661
Epochs: 9041 | epoch avg. loss: 0.119 | test avg. loss: 5.665


 18%|█▊        | 9045/50000 [13:26<50:59, 13.39it/s]

Epochs: 9042 | epoch avg. loss: 0.125 | test avg. loss: 5.625
Epochs: 9043 | epoch avg. loss: 0.136 | test avg. loss: 5.578
Epochs: 9044 | epoch avg. loss: 0.135 | test avg. loss: 5.647


 18%|█▊        | 9047/50000 [13:26<50:17, 13.57it/s]

Epochs: 9045 | epoch avg. loss: 0.122 | test avg. loss: 5.597
Epochs: 9046 | epoch avg. loss: 0.132 | test avg. loss: 5.674
Epochs: 9047 | epoch avg. loss: 0.126 | test avg. loss: 5.621


 18%|█▊        | 9051/50000 [13:27<50:49, 13.43it/s]

Epochs: 9048 | epoch avg. loss: 0.135 | test avg. loss: 5.639
Epochs: 9049 | epoch avg. loss: 0.142 | test avg. loss: 5.605
Epochs: 9050 | epoch avg. loss: 0.169 | test avg. loss: 5.649


 18%|█▊        | 9053/50000 [13:27<54:00, 12.64it/s]

Epochs: 9051 | epoch avg. loss: 0.126 | test avg. loss: 5.699
Epochs: 9052 | epoch avg. loss: 0.125 | test avg. loss: 5.647
Epochs: 9053 | epoch avg. loss: 0.118 | test avg. loss: 5.653


 18%|█▊        | 9057/50000 [13:27<52:11, 13.07it/s]

Epochs: 9054 | epoch avg. loss: 0.125 | test avg. loss: 5.570
Epochs: 9055 | epoch avg. loss: 0.150 | test avg. loss: 5.886
Epochs: 9056 | epoch avg. loss: 0.211 | test avg. loss: 5.594


 18%|█▊        | 9059/50000 [13:27<51:30, 13.25it/s]

Epochs: 9057 | epoch avg. loss: 0.200 | test avg. loss: 5.627
Epochs: 9058 | epoch avg. loss: 0.140 | test avg. loss: 5.722
Epochs: 9059 | epoch avg. loss: 0.180 | test avg. loss: 5.591


 18%|█▊        | 9063/50000 [13:28<58:01, 11.76it/s]

Epochs: 9060 | epoch avg. loss: 0.272 | test avg. loss: 6.092
Epochs: 9061 | epoch avg. loss: 0.356 | test avg. loss: 5.616
Epochs: 9062 | epoch avg. loss: 0.249 | test avg. loss: 6.043




Epochs: 9063 | epoch avg. loss: 0.276 | test avg. loss: 5.660
Epochs: 9064 | epoch avg. loss: 0.346 | test avg. loss: 5.794
Epochs: 9065 | epoch avg. loss: 0.238 | test avg. loss: 5.679


 18%|█▊        | 9069/50000 [13:28<52:38, 12.96it/s]

Epochs: 9066 | epoch avg. loss: 0.252 | test avg. loss: 5.643
Epochs: 9067 | epoch avg. loss: 0.189 | test avg. loss: 5.588
Epochs: 9068 | epoch avg. loss: 0.131 | test avg. loss: 5.514


 18%|█▊        | 9071/50000 [13:28<50:47, 13.43it/s]

Epochs: 9069 | epoch avg. loss: 0.195 | test avg. loss: 5.458
Epochs: 9070 | epoch avg. loss: 0.159 | test avg. loss: 5.403
Epochs: 9071 | epoch avg. loss: 0.142 | test avg. loss: 5.550


 18%|█▊        | 9075/50000 [13:29<54:21, 12.55it/s]

Epochs: 9072 | epoch avg. loss: 0.126 | test avg. loss: 5.663
Epochs: 9073 | epoch avg. loss: 0.119 | test avg. loss: 5.717
Epochs: 9074 | epoch avg. loss: 0.132 | test avg. loss: 5.957


 18%|█▊        | 9077/50000 [13:29<53:50, 12.67it/s]

Epochs: 9075 | epoch avg. loss: 0.169 | test avg. loss: 5.709
Epochs: 9076 | epoch avg. loss: 0.165 | test avg. loss: 5.938
Epochs: 9077 | epoch avg. loss: 0.184 | test avg. loss: 5.612


 18%|█▊        | 9081/50000 [13:29<50:27, 13.51it/s]

Epochs: 9078 | epoch avg. loss: 0.323 | test avg. loss: 5.942
Epochs: 9079 | epoch avg. loss: 0.393 | test avg. loss: 5.501
Epochs: 9080 | epoch avg. loss: 0.136 | test avg. loss: 5.556


 18%|█▊        | 9083/50000 [13:29<52:11, 13.07it/s]

Epochs: 9081 | epoch avg. loss: 0.156 | test avg. loss: 5.508
Epochs: 9082 | epoch avg. loss: 0.160 | test avg. loss: 5.548
Epochs: 9083 | epoch avg. loss: 0.146 | test avg. loss: 5.669


 18%|█▊        | 9087/50000 [13:29<51:40, 13.20it/s]

Epochs: 9084 | epoch avg. loss: 0.116 | test avg. loss: 5.712
Epochs: 9085 | epoch avg. loss: 0.119 | test avg. loss: 5.704
Epochs: 9086 | epoch avg. loss: 0.135 | test avg. loss: 5.635


 18%|█▊        | 9091/50000 [13:30<48:36, 14.03it/s]

Epochs: 9087 | epoch avg. loss: 0.143 | test avg. loss: 5.775
Epochs: 9088 | epoch avg. loss: 0.180 | test avg. loss: 5.572
Epochs: 9089 | epoch avg. loss: 0.304 | test avg. loss: 5.762
Epochs: 9090 | epoch avg. loss: 0.173 | test avg. loss: 5.550




Epochs: 9091 | epoch avg. loss: 0.227 | test avg. loss: 5.961
Epochs: 9092 | epoch avg. loss: 0.284 | test avg. loss: 5.516
Epochs: 9093 | epoch avg. loss: 0.174 | test avg. loss: 5.937


 18%|█▊        | 9097/50000 [13:30<51:01, 13.36it/s]

Epochs: 9094 | epoch avg. loss: 0.258 | test avg. loss: 5.502
Epochs: 9095 | epoch avg. loss: 0.199 | test avg. loss: 5.874
Epochs: 9096 | epoch avg. loss: 0.252 | test avg. loss: 5.543


 18%|█▊        | 9099/50000 [13:30<49:52, 13.67it/s]

Epochs: 9097 | epoch avg. loss: 0.213 | test avg. loss: 5.861
Epochs: 9098 | epoch avg. loss: 0.403 | test avg. loss: 5.485
Epochs: 9099 | epoch avg. loss: 0.371 | test avg. loss: 5.443


 18%|█▊        | 9103/50000 [13:32<2:14:46,  5.06it/s]

Epochs: 9100 | epoch avg. loss: 0.351 | test avg. loss: 6.163
Epochs: 9101 | epoch avg. loss: 0.467 | test avg. loss: 5.481
Epochs: 9102 | epoch avg. loss: 0.388 | test avg. loss: 6.273


 18%|█▊        | 9105/50000 [13:32<1:48:38,  6.27it/s]

Epochs: 9103 | epoch avg. loss: 0.487 | test avg. loss: 5.560
Epochs: 9104 | epoch avg. loss: 0.269 | test avg. loss: 5.789
Epochs: 9105 | epoch avg. loss: 0.140 | test avg. loss: 5.689


 18%|█▊        | 9109/50000 [13:32<1:19:48,  8.54it/s]

Epochs: 9106 | epoch avg. loss: 0.195 | test avg. loss: 5.836
Epochs: 9107 | epoch avg. loss: 0.127 | test avg. loss: 5.656
Epochs: 9108 | epoch avg. loss: 0.198 | test avg. loss: 5.635


 18%|█▊        | 9111/50000 [13:33<1:14:20,  9.17it/s]

Epochs: 9109 | epoch avg. loss: 0.144 | test avg. loss: 5.742
Epochs: 9110 | epoch avg. loss: 0.174 | test avg. loss: 5.617
Epochs: 9111 | epoch avg. loss: 0.266 | test avg. loss: 6.090


 18%|█▊        | 9115/50000 [13:33<1:07:53, 10.04it/s]

Epochs: 9112 | epoch avg. loss: 0.319 | test avg. loss: 5.662
Epochs: 9113 | epoch avg. loss: 0.384 | test avg. loss: 6.127
Epochs: 9114 | epoch avg. loss: 0.422 | test avg. loss: 5.526


 18%|█▊        | 9117/50000 [13:33<1:02:16, 10.94it/s]

Epochs: 9115 | epoch avg. loss: 0.185 | test avg. loss: 5.631
Epochs: 9116 | epoch avg. loss: 0.120 | test avg. loss: 5.548
Epochs: 9117 | epoch avg. loss: 0.140 | test avg. loss: 5.758


 18%|█▊        | 9121/50000 [13:33<54:33, 12.49it/s]

Epochs: 9118 | epoch avg. loss: 0.181 | test avg. loss: 5.569
Epochs: 9119 | epoch avg. loss: 0.156 | test avg. loss: 5.732
Epochs: 9120 | epoch avg. loss: 0.297 | test avg. loss: 5.655


 18%|█▊        | 9123/50000 [13:33<54:42, 12.45it/s]

Epochs: 9121 | epoch avg. loss: 0.215 | test avg. loss: 5.482
Epochs: 9122 | epoch avg. loss: 0.225 | test avg. loss: 5.533
Epochs: 9123 | epoch avg. loss: 0.163 | test avg. loss: 5.506


 18%|█▊        | 9127/50000 [13:34<54:43, 12.45it/s]

Epochs: 9124 | epoch avg. loss: 0.165 | test avg. loss: 5.587
Epochs: 9125 | epoch avg. loss: 0.123 | test avg. loss: 5.651
Epochs: 9126 | epoch avg. loss: 0.154 | test avg. loss: 5.733


 18%|█▊        | 9129/50000 [13:34<54:37, 12.47it/s]

Epochs: 9127 | epoch avg. loss: 0.159 | test avg. loss: 5.523
Epochs: 9128 | epoch avg. loss: 0.228 | test avg. loss: 5.860
Epochs: 9129 | epoch avg. loss: 0.223 | test avg. loss: 5.498


 18%|█▊        | 9133/50000 [13:34<54:29, 12.50it/s]

Epochs: 9130 | epoch avg. loss: 0.241 | test avg. loss: 5.948
Epochs: 9131 | epoch avg. loss: 0.288 | test avg. loss: 5.520
Epochs: 9132 | epoch avg. loss: 0.137 | test avg. loss: 6.005


 18%|█▊        | 9135/50000 [13:34<54:04, 12.60it/s]

Epochs: 9133 | epoch avg. loss: 0.313 | test avg. loss: 5.576
Epochs: 9134 | epoch avg. loss: 0.233 | test avg. loss: 5.997
Epochs: 9135 | epoch avg. loss: 0.300 | test avg. loss: 5.546


 18%|█▊        | 9139/50000 [13:35<55:57, 12.17it/s]

Epochs: 9136 | epoch avg. loss: 0.258 | test avg. loss: 5.738
Epochs: 9137 | epoch avg. loss: 0.238 | test avg. loss: 5.523
Epochs: 9138 | epoch avg. loss: 0.145 | test avg. loss: 5.809


 18%|█▊        | 9141/50000 [13:35<54:24, 12.52it/s]

Epochs: 9139 | epoch avg. loss: 0.160 | test avg. loss: 5.652
Epochs: 9140 | epoch avg. loss: 0.210 | test avg. loss: 6.288
Epochs: 9141 | epoch avg. loss: 0.408 | test avg. loss: 5.648


 18%|█▊        | 9145/50000 [13:35<51:54, 13.12it/s]

Epochs: 9142 | epoch avg. loss: 0.356 | test avg. loss: 5.957
Epochs: 9143 | epoch avg. loss: 0.331 | test avg. loss: 5.510
Epochs: 9144 | epoch avg. loss: 0.300 | test avg. loss: 5.599


 18%|█▊        | 9147/50000 [13:35<52:01, 13.09it/s]

Epochs: 9145 | epoch avg. loss: 0.131 | test avg. loss: 5.581
Epochs: 9146 | epoch avg. loss: 0.145 | test avg. loss: 5.631
Epochs: 9147 | epoch avg. loss: 0.131 | test avg. loss: 5.442


 18%|█▊        | 9149/50000 [13:36<57:37, 11.82it/s]

Epochs: 9148 | epoch avg. loss: 0.150 | test avg. loss: 5.644
Epochs: 9149 | epoch avg. loss: 0.166 | test avg. loss: 5.389


 18%|█▊        | 9153/50000 [13:36<1:02:22, 10.91it/s]

Epochs: 9150 | epoch avg. loss: 0.167 | test avg. loss: 5.667
Epochs: 9151 | epoch avg. loss: 0.272 | test avg. loss: 5.451
Epochs: 9152 | epoch avg. loss: 0.248 | test avg. loss: 5.514


 18%|█▊        | 9155/50000 [13:36<1:02:15, 10.93it/s]

Epochs: 9153 | epoch avg. loss: 0.180 | test avg. loss: 5.804
Epochs: 9154 | epoch avg. loss: 0.270 | test avg. loss: 5.577
Epochs: 9155 | epoch avg. loss: 0.379 | test avg. loss: 6.325


 18%|█▊        | 9159/50000 [13:36<1:00:50, 11.19it/s]

Epochs: 9156 | epoch avg. loss: 0.583 | test avg. loss: 5.528
Epochs: 9157 | epoch avg. loss: 0.343 | test avg. loss: 5.987
Epochs: 9158 | epoch avg. loss: 0.506 | test avg. loss: 5.485


 18%|█▊        | 9161/50000 [13:37<59:58, 11.35it/s]

Epochs: 9159 | epoch avg. loss: 0.314 | test avg. loss: 6.074
Epochs: 9160 | epoch avg. loss: 0.696 | test avg. loss: 5.479
Epochs: 9161 | epoch avg. loss: 0.301 | test avg. loss: 5.581


                                                    

Epochs: 9162 | epoch avg. loss: 0.290 | test avg. loss: 5.450
Epochs: 9163 | epoch avg. loss: 0.202 | test avg. loss: 5.323
Epochs: 9164 | epoch avg. loss: 0.325 | test avg. loss: 5.828


 18%|█▊        | 9167/50000 [13:37<51:41, 13.17it/s]

Epochs: 9165 | epoch avg. loss: 0.416 | test avg. loss: 5.662
Epochs: 9166 | epoch avg. loss: 0.451 | test avg. loss: 6.786
Epochs: 9167 | epoch avg. loss: 0.690 | test avg. loss: 5.905


 18%|█▊        | 9171/50000 [13:37<51:47, 13.14it/s]

Epochs: 9168 | epoch avg. loss: 0.763 | test avg. loss: 6.524
Epochs: 9169 | epoch avg. loss: 0.557 | test avg. loss: 5.867
Epochs: 9170 | epoch avg. loss: 0.660 | test avg. loss: 6.324


 18%|█▊        | 9173/50000 [13:38<55:37, 12.23it/s]

Epochs: 9171 | epoch avg. loss: 0.857 | test avg. loss: 5.669
Epochs: 9172 | epoch avg. loss: 0.641 | test avg. loss: 5.600
Epochs: 9173 | epoch avg. loss: 0.265 | test avg. loss: 5.524


 18%|█▊        | 9177/50000 [13:38<55:14, 12.32it/s]

Epochs: 9174 | epoch avg. loss: 0.338 | test avg. loss: 6.576
Epochs: 9175 | epoch avg. loss: 0.704 | test avg. loss: 5.659
Epochs: 9176 | epoch avg. loss: 0.609 | test avg. loss: 5.881


 18%|█▊        | 9179/50000 [13:38<52:46, 12.89it/s]

Epochs: 9177 | epoch avg. loss: 0.459 | test avg. loss: 5.630
Epochs: 9178 | epoch avg. loss: 0.394 | test avg. loss: 5.499
Epochs: 9179 | epoch avg. loss: 0.311 | test avg. loss: 5.359


 18%|█▊        | 9183/50000 [13:38<56:36, 12.02it/s]

Epochs: 9180 | epoch avg. loss: 0.153 | test avg. loss: 5.559
Epochs: 9181 | epoch avg. loss: 0.192 | test avg. loss: 5.362
Epochs: 9182 | epoch avg. loss: 0.274 | test avg. loss: 6.145


 18%|█▊        | 9185/50000 [13:39<59:17, 11.47it/s]

Epochs: 9183 | epoch avg. loss: 0.375 | test avg. loss: 5.614
Epochs: 9184 | epoch avg. loss: 0.331 | test avg. loss: 6.609
Epochs: 9185 | epoch avg. loss: 0.721 | test avg. loss: 5.857


 18%|█▊        | 9189/50000 [13:39<58:21, 11.65it/s]  

Epochs: 9186 | epoch avg. loss: 0.272 | test avg. loss: 6.035
Epochs: 9187 | epoch avg. loss: 0.396 | test avg. loss: 6.152
Epochs: 9188 | epoch avg. loss: 0.345 | test avg. loss: 5.586


 18%|█▊        | 9191/50000 [13:39<55:10, 12.33it/s]

Epochs: 9189 | epoch avg. loss: 0.380 | test avg. loss: 5.921
Epochs: 9190 | epoch avg. loss: 0.311 | test avg. loss: 5.308
Epochs: 9191 | epoch avg. loss: 0.206 | test avg. loss: 5.367


 18%|█▊        | 9195/50000 [13:39<51:49, 13.12it/s]

Epochs: 9192 | epoch avg. loss: 0.152 | test avg. loss: 5.356
Epochs: 9193 | epoch avg. loss: 0.177 | test avg. loss: 5.399
Epochs: 9194 | epoch avg. loss: 0.193 | test avg. loss: 5.995


 18%|█▊        | 9197/50000 [13:40<54:25, 12.50it/s]

Epochs: 9195 | epoch avg. loss: 0.262 | test avg. loss: 5.626
Epochs: 9196 | epoch avg. loss: 0.339 | test avg. loss: 6.501
Epochs: 9197 | epoch avg. loss: 0.599 | test avg. loss: 5.542


 18%|█▊        | 9199/50000 [13:40<54:55, 12.38it/s]

Epochs: 9198 | epoch avg. loss: 0.249 | test avg. loss: 5.891
Epochs: 9199 | epoch avg. loss: 0.353 | test avg. loss: 5.354


 18%|█▊        | 9203/50000 [13:41<2:33:36,  4.43it/s]

Epochs: 9200 | epoch avg. loss: 0.230 | test avg. loss: 5.744
Epochs: 9201 | epoch avg. loss: 0.319 | test avg. loss: 5.475
Epochs: 9202 | epoch avg. loss: 0.254 | test avg. loss: 5.694


 18%|█▊        | 9205/50000 [13:42<2:03:47,  5.49it/s]

Epochs: 9203 | epoch avg. loss: 0.221 | test avg. loss: 6.069
Epochs: 9204 | epoch avg. loss: 0.258 | test avg. loss: 5.831
Epochs: 9205 | epoch avg. loss: 0.349 | test avg. loss: 6.055


 18%|█▊        | 9209/50000 [13:42<1:28:39,  7.67it/s]

Epochs: 9206 | epoch avg. loss: 0.357 | test avg. loss: 5.500
Epochs: 9207 | epoch avg. loss: 0.251 | test avg. loss: 6.301
Epochs: 9208 | epoch avg. loss: 0.696 | test avg. loss: 5.355


 18%|█▊        | 9211/50000 [13:42<1:18:48,  8.63it/s]

Epochs: 9209 | epoch avg. loss: 0.544 | test avg. loss: 6.081
Epochs: 9210 | epoch avg. loss: 0.659 | test avg. loss: 5.317
Epochs: 9211 | epoch avg. loss: 0.827 | test avg. loss: 5.548


 18%|█▊        | 9215/50000 [13:42<1:05:25, 10.39it/s]

Epochs: 9212 | epoch avg. loss: 0.435 | test avg. loss: 6.094
Epochs: 9213 | epoch avg. loss: 0.434 | test avg. loss: 5.827
Epochs: 9214 | epoch avg. loss: 0.394 | test avg. loss: 5.970


 18%|█▊        | 9217/50000 [13:43<1:02:44, 10.83it/s]

Epochs: 9215 | epoch avg. loss: 0.308 | test avg. loss: 5.486
Epochs: 9216 | epoch avg. loss: 0.390 | test avg. loss: 6.545
Epochs: 9217 | epoch avg. loss: 0.650 | test avg. loss: 5.461


 18%|█▊        | 9221/50000 [13:43<1:01:59, 10.96it/s]

Epochs: 9218 | epoch avg. loss: 0.791 | test avg. loss: 5.855
Epochs: 9219 | epoch avg. loss: 0.425 | test avg. loss: 5.499
Epochs: 9220 | epoch avg. loss: 0.467 | test avg. loss: 5.896


                                                    

Epochs: 9221 | epoch avg. loss: 0.461 | test avg. loss: 5.582
Epochs: 9222 | epoch avg. loss: 0.297 | test avg. loss: 5.382
Epochs: 9223 | epoch avg. loss: 0.286 | test avg. loss: 5.665


 18%|█▊        | 9227/50000 [13:43<53:52, 12.61it/s]

Epochs: 9224 | epoch avg. loss: 0.339 | test avg. loss: 5.422
Epochs: 9225 | epoch avg. loss: 0.499 | test avg. loss: 6.147
Epochs: 9226 | epoch avg. loss: 0.427 | test avg. loss: 5.771


 18%|█▊        | 9229/50000 [13:44<57:17, 11.86it/s]

Epochs: 9227 | epoch avg. loss: 0.699 | test avg. loss: 6.445
Epochs: 9228 | epoch avg. loss: 0.789 | test avg. loss: 5.269
Epochs: 9229 | epoch avg. loss: 0.195 | test avg. loss: 5.260


 18%|█▊        | 9233/50000 [13:44<58:10, 11.68it/s]

Epochs: 9230 | epoch avg. loss: 0.172 | test avg. loss: 5.296
Epochs: 9231 | epoch avg. loss: 0.249 | test avg. loss: 5.328
Epochs: 9232 | epoch avg. loss: 0.374 | test avg. loss: 6.182


 18%|█▊        | 9235/50000 [13:44<58:29, 11.62it/s]

Epochs: 9233 | epoch avg. loss: 0.443 | test avg. loss: 6.029
Epochs: 9234 | epoch avg. loss: 0.899 | test avg. loss: 7.247
Epochs: 9235 | epoch avg. loss: 0.808 | test avg. loss: 6.694


 18%|█▊        | 9239/50000 [13:44<54:09, 12.55it/s]

Epochs: 9236 | epoch avg. loss: 1.320 | test avg. loss: 7.715
Epochs: 9237 | epoch avg. loss: 1.147 | test avg. loss: 6.238
Epochs: 9238 | epoch avg. loss: 1.563 | test avg. loss: 7.343


 18%|█▊        | 9241/50000 [13:45<56:44, 11.97it/s]

Epochs: 9239 | epoch avg. loss: 1.917 | test avg. loss: 5.511
Epochs: 9240 | epoch avg. loss: 0.861 | test avg. loss: 5.973
Epochs: 9241 | epoch avg. loss: 0.747 | test avg. loss: 5.914


 18%|█▊        | 9245/50000 [13:45<52:38, 12.91it/s]

Epochs: 9242 | epoch avg. loss: 0.750 | test avg. loss: 6.104
Epochs: 9243 | epoch avg. loss: 0.719 | test avg. loss: 6.441
Epochs: 9244 | epoch avg. loss: 0.585 | test avg. loss: 5.654


 18%|█▊        | 9249/50000 [13:45<48:26, 14.02it/s]

Epochs: 9245 | epoch avg. loss: 0.436 | test avg. loss: 5.479
Epochs: 9246 | epoch avg. loss: 0.522 | test avg. loss: 4.936
Epochs: 9247 | epoch avg. loss: 0.267 | test avg. loss: 5.260
Epochs: 9248 | epoch avg. loss: 0.171 | test avg. loss: 5.420


 19%|█▊        | 9253/50000 [13:45<46:35, 14.57it/s]

Epochs: 9249 | epoch avg. loss: 0.299 | test avg. loss: 6.021
Epochs: 9250 | epoch avg. loss: 0.226 | test avg. loss: 5.900
Epochs: 9251 | epoch avg. loss: 0.317 | test avg. loss: 5.798
Epochs: 9252 | epoch avg. loss: 0.227 | test avg. loss: 5.865


 19%|█▊        | 9255/50000 [13:45<53:17, 12.74it/s]

Epochs: 9253 | epoch avg. loss: 0.279 | test avg. loss: 5.332
Epochs: 9254 | epoch avg. loss: 0.228 | test avg. loss: 5.271


 19%|█▊        | 9257/50000 [13:46<54:29, 12.46it/s]

Epochs: 9255 | epoch avg. loss: 0.132 | test avg. loss: 5.189
Epochs: 9256 | epoch avg. loss: 0.201 | test avg. loss: 5.453
Epochs: 9257 | epoch avg. loss: 0.125 | test avg. loss: 5.453


 19%|█▊        | 9261/50000 [13:46<51:28, 13.19it/s]

Epochs: 9258 | epoch avg. loss: 0.163 | test avg. loss: 5.819
Epochs: 9259 | epoch avg. loss: 0.196 | test avg. loss: 5.524
Epochs: 9260 | epoch avg. loss: 0.398 | test avg. loss: 5.750


 19%|█▊        | 9263/50000 [13:46<50:59, 13.32it/s]

Epochs: 9261 | epoch avg. loss: 0.380 | test avg. loss: 5.344
Epochs: 9262 | epoch avg. loss: 0.300 | test avg. loss: 6.208
Epochs: 9263 | epoch avg. loss: 0.727 | test avg. loss: 5.377


 19%|█▊        | 9267/50000 [13:46<50:24, 13.47it/s]

Epochs: 9264 | epoch avg. loss: 0.218 | test avg. loss: 5.604
Epochs: 9265 | epoch avg. loss: 0.344 | test avg. loss: 5.465
Epochs: 9266 | epoch avg. loss: 0.594 | test avg. loss: 5.499


 19%|█▊        | 9269/50000 [13:47<52:38, 12.90it/s]

Epochs: 9267 | epoch avg. loss: 0.362 | test avg. loss: 5.405
Epochs: 9268 | epoch avg. loss: 0.236 | test avg. loss: 5.162
Epochs: 9269 | epoch avg. loss: 0.253 | test avg. loss: 5.618


 19%|█▊        | 9273/50000 [13:47<54:12, 12.52it/s]

Epochs: 9270 | epoch avg. loss: 0.365 | test avg. loss: 5.478
Epochs: 9271 | epoch avg. loss: 0.557 | test avg. loss: 6.370
Epochs: 9272 | epoch avg. loss: 0.739 | test avg. loss: 5.795


 19%|█▊        | 9275/50000 [13:47<52:54, 12.83it/s]

Epochs: 9273 | epoch avg. loss: 0.777 | test avg. loss: 5.945
Epochs: 9274 | epoch avg. loss: 0.455 | test avg. loss: 5.267
Epochs: 9275 | epoch avg. loss: 0.459 | test avg. loss: 5.310


 19%|█▊        | 9279/50000 [13:47<51:27, 13.19it/s]

Epochs: 9276 | epoch avg. loss: 0.252 | test avg. loss: 5.393
Epochs: 9277 | epoch avg. loss: 0.154 | test avg. loss: 5.552
Epochs: 9278 | epoch avg. loss: 0.126 | test avg. loss: 5.397


 19%|█▊        | 9281/50000 [13:48<55:10, 12.30it/s]

Epochs: 9279 | epoch avg. loss: 0.290 | test avg. loss: 5.327
Epochs: 9280 | epoch avg. loss: 0.183 | test avg. loss: 5.569
Epochs: 9281 | epoch avg. loss: 0.218 | test avg. loss: 5.220


 19%|█▊        | 9285/50000 [13:48<53:02, 12.79it/s]

Epochs: 9282 | epoch avg. loss: 0.151 | test avg. loss: 5.295
Epochs: 9283 | epoch avg. loss: 0.137 | test avg. loss: 5.365
Epochs: 9284 | epoch avg. loss: 0.131 | test avg. loss: 5.254




Epochs: 9285 | epoch avg. loss: 0.149 | test avg. loss: 5.498
Epochs: 9286 | epoch avg. loss: 0.144 | test avg. loss: 5.471
Epochs: 9287 | epoch avg. loss: 0.295 | test avg. loss: 5.533


 19%|█▊        | 9291/50000 [13:48<48:08, 14.09it/s]

Epochs: 9288 | epoch avg. loss: 0.125 | test avg. loss: 5.635
Epochs: 9289 | epoch avg. loss: 0.126 | test avg. loss: 5.432
Epochs: 9290 | epoch avg. loss: 0.137 | test avg. loss: 5.387
Epochs: 9291 | epoch avg. loss: 0.122 | test avg. loss: 5.189


 19%|█▊        | 9295/50000 [13:48<49:01, 13.84it/s]

Epochs: 9292 | epoch avg. loss: 0.182 | test avg. loss: 5.362
Epochs: 9293 | epoch avg. loss: 0.176 | test avg. loss: 5.236
Epochs: 9294 | epoch avg. loss: 0.175 | test avg. loss: 5.271


 19%|█▊        | 9297/50000 [13:49<49:52, 13.60it/s]

Epochs: 9295 | epoch avg. loss: 0.142 | test avg. loss: 5.467
Epochs: 9296 | epoch avg. loss: 0.137 | test avg. loss: 5.330
Epochs: 9297 | epoch avg. loss: 0.177 | test avg. loss: 5.966


 19%|█▊        | 9299/50000 [13:49<50:52, 13.33it/s]

Epochs: 9298 | epoch avg. loss: 0.409 | test avg. loss: 5.461
Epochs: 9299 | epoch avg. loss: 0.442 | test avg. loss: 5.934


 19%|█▊        | 9303/50000 [13:50<2:20:25,  4.83it/s]

Epochs: 9300 | epoch avg. loss: 0.487 | test avg. loss: 5.446
Epochs: 9301 | epoch avg. loss: 0.313 | test avg. loss: 6.617
Epochs: 9302 | epoch avg. loss: 0.961 | test avg. loss: 5.498


 19%|█▊        | 9305/50000 [13:51<1:52:53,  6.01it/s]

Epochs: 9303 | epoch avg. loss: 0.728 | test avg. loss: 5.969
Epochs: 9304 | epoch avg. loss: 1.113 | test avg. loss: 5.062
Epochs: 9305 | epoch avg. loss: 0.653 | test avg. loss: 5.339


 19%|█▊        | 9309/50000 [13:51<1:20:07,  8.46it/s]

Epochs: 9306 | epoch avg. loss: 0.687 | test avg. loss: 5.419
Epochs: 9307 | epoch avg. loss: 0.435 | test avg. loss: 5.348
Epochs: 9308 | epoch avg. loss: 0.668 | test avg. loss: 5.049


 19%|█▊        | 9311/50000 [13:51<1:13:03,  9.28it/s]

Epochs: 9309 | epoch avg. loss: 0.455 | test avg. loss: 4.761
Epochs: 9310 | epoch avg. loss: 0.250 | test avg. loss: 5.092
Epochs: 9311 | epoch avg. loss: 0.268 | test avg. loss: 5.248


 19%|█▊        | 9315/50000 [13:51<1:00:46, 11.16it/s]

Epochs: 9312 | epoch avg. loss: 0.284 | test avg. loss: 5.888
Epochs: 9313 | epoch avg. loss: 0.209 | test avg. loss: 5.759
Epochs: 9314 | epoch avg. loss: 0.278 | test avg. loss: 6.182


 19%|█▊        | 9317/50000 [13:52<59:59, 11.30it/s]

Epochs: 9315 | epoch avg. loss: 0.384 | test avg. loss: 5.433
Epochs: 9316 | epoch avg. loss: 0.270 | test avg. loss: 5.175
Epochs: 9317 | epoch avg. loss: 0.124 | test avg. loss: 5.061


 19%|█▊        | 9321/50000 [13:52<56:49, 11.93it/s]  

Epochs: 9318 | epoch avg. loss: 0.146 | test avg. loss: 5.142
Epochs: 9319 | epoch avg. loss: 0.146 | test avg. loss: 5.160
Epochs: 9320 | epoch avg. loss: 0.142 | test avg. loss: 5.654


 19%|█▊        | 9323/50000 [13:52<53:02, 12.78it/s]

Epochs: 9321 | epoch avg. loss: 0.133 | test avg. loss: 5.775
Epochs: 9322 | epoch avg. loss: 0.408 | test avg. loss: 5.946
Epochs: 9323 | epoch avg. loss: 0.263 | test avg. loss: 5.447


 19%|█▊        | 9327/50000 [13:52<50:22, 13.46it/s]

Epochs: 9324 | epoch avg. loss: 0.197 | test avg. loss: 5.426
Epochs: 9325 | epoch avg. loss: 0.174 | test avg. loss: 5.179
Epochs: 9326 | epoch avg. loss: 0.148 | test avg. loss: 5.457
Epochs: 9327 | epoch avg. loss: 0.217 | test avg. loss: 5.302


 19%|█▊        | 9331/50000 [13:52<51:30, 13.16it/s]

Epochs: 9328 | epoch avg. loss: 0.141 | test avg. loss: 5.502
Epochs: 9329 | epoch avg. loss: 0.101 | test avg. loss: 5.435
Epochs: 9330 | epoch avg. loss: 0.153 | test avg. loss: 5.478


 19%|█▊        | 9333/50000 [13:53<54:07, 12.52it/s]

Epochs: 9331 | epoch avg. loss: 0.178 | test avg. loss: 5.575
Epochs: 9332 | epoch avg. loss: 0.179 | test avg. loss: 5.259
Epochs: 9333 | epoch avg. loss: 0.154 | test avg. loss: 5.300


 19%|█▊        | 9337/50000 [13:53<49:49, 13.60it/s]

Epochs: 9334 | epoch avg. loss: 0.125 | test avg. loss: 5.189
Epochs: 9335 | epoch avg. loss: 0.102 | test avg. loss: 5.238
Epochs: 9336 | epoch avg. loss: 0.108 | test avg. loss: 5.231


 19%|█▊        | 9339/50000 [13:53<50:25, 13.44it/s]

Epochs: 9337 | epoch avg. loss: 0.120 | test avg. loss: 5.250
Epochs: 9338 | epoch avg. loss: 0.112 | test avg. loss: 5.340
Epochs: 9339 | epoch avg. loss: 0.116 | test avg. loss: 5.189


 19%|█▊        | 9343/50000 [13:53<51:03, 13.27it/s]

Epochs: 9340 | epoch avg. loss: 0.122 | test avg. loss: 5.430
Epochs: 9341 | epoch avg. loss: 0.181 | test avg. loss: 5.329
Epochs: 9342 | epoch avg. loss: 0.343 | test avg. loss: 6.287


 19%|█▊        | 9345/50000 [13:54<54:03, 12.53it/s]

Epochs: 9343 | epoch avg. loss: 0.584 | test avg. loss: 5.499
Epochs: 9344 | epoch avg. loss: 0.543 | test avg. loss: 6.145
Epochs: 9345 | epoch avg. loss: 0.742 | test avg. loss: 5.488


                                                    

Epochs: 9346 | epoch avg. loss: 0.622 | test avg. loss: 5.997
Epochs: 9347 | epoch avg. loss: 0.573 | test avg. loss: 5.289
Epochs: 9348 | epoch avg. loss: 0.484 | test avg. loss: 5.122


 19%|█▊        | 9351/50000 [13:54<48:40, 13.92it/s]

Epochs: 9349 | epoch avg. loss: 0.345 | test avg. loss: 5.100
Epochs: 9350 | epoch avg. loss: 0.275 | test avg. loss: 5.273
Epochs: 9351 | epoch avg. loss: 0.221 | test avg. loss: 5.332


 19%|█▊        | 9355/50000 [13:54<50:31, 13.41it/s]

Epochs: 9352 | epoch avg. loss: 0.263 | test avg. loss: 5.778
Epochs: 9353 | epoch avg. loss: 0.232 | test avg. loss: 5.581
Epochs: 9354 | epoch avg. loss: 0.304 | test avg. loss: 5.571


 19%|█▊        | 9357/50000 [13:55<54:09, 12.51it/s]

Epochs: 9355 | epoch avg. loss: 0.281 | test avg. loss: 5.635
Epochs: 9356 | epoch avg. loss: 0.312 | test avg. loss: 5.045
Epochs: 9357 | epoch avg. loss: 0.376 | test avg. loss: 5.407


 19%|█▊        | 9361/50000 [13:55<58:52, 11.50it/s]

Epochs: 9358 | epoch avg. loss: 0.259 | test avg. loss: 5.157
Epochs: 9359 | epoch avg. loss: 0.238 | test avg. loss: 5.898
Epochs: 9360 | epoch avg. loss: 0.467 | test avg. loss: 5.333


 19%|█▊        | 9363/50000 [13:55<58:20, 11.61it/s]

Epochs: 9361 | epoch avg. loss: 0.250 | test avg. loss: 5.345
Epochs: 9362 | epoch avg. loss: 0.196 | test avg. loss: 5.463
Epochs: 9363 | epoch avg. loss: 0.257 | test avg. loss: 5.453


 19%|█▊        | 9367/50000 [13:55<57:56, 11.69it/s]

Epochs: 9364 | epoch avg. loss: 0.454 | test avg. loss: 5.582
Epochs: 9365 | epoch avg. loss: 0.193 | test avg. loss: 5.599
Epochs: 9366 | epoch avg. loss: 0.140 | test avg. loss: 5.439


 19%|█▊        | 9369/50000 [13:56<59:28, 11.39it/s]

Epochs: 9367 | epoch avg. loss: 0.140 | test avg. loss: 5.587
Epochs: 9368 | epoch avg. loss: 0.169 | test avg. loss: 5.277
Epochs: 9369 | epoch avg. loss: 0.113 | test avg. loss: 5.341


 19%|█▊        | 9373/50000 [13:56<56:09, 12.06it/s]

Epochs: 9370 | epoch avg. loss: 0.134 | test avg. loss: 5.265
Epochs: 9371 | epoch avg. loss: 0.111 | test avg. loss: 5.554
Epochs: 9372 | epoch avg. loss: 0.210 | test avg. loss: 5.427


 19%|█▉        | 9375/50000 [13:56<54:06, 12.51it/s]

Epochs: 9373 | epoch avg. loss: 0.183 | test avg. loss: 5.434
Epochs: 9374 | epoch avg. loss: 0.165 | test avg. loss: 5.508
Epochs: 9375 | epoch avg. loss: 0.163 | test avg. loss: 5.299


 19%|█▉        | 9379/50000 [13:56<51:55, 13.04it/s]

Epochs: 9376 | epoch avg. loss: 0.114 | test avg. loss: 5.132
Epochs: 9377 | epoch avg. loss: 0.198 | test avg. loss: 5.459
Epochs: 9378 | epoch avg. loss: 0.221 | test avg. loss: 5.187


 19%|█▉        | 9381/50000 [13:57<54:27, 12.43it/s]

Epochs: 9379 | epoch avg. loss: 0.235 | test avg. loss: 5.257
Epochs: 9380 | epoch avg. loss: 0.220 | test avg. loss: 5.717
Epochs: 9381 | epoch avg. loss: 0.272 | test avg. loss: 5.276


 19%|█▉        | 9385/50000 [13:57<58:16, 11.62it/s]

Epochs: 9382 | epoch avg. loss: 0.163 | test avg. loss: 5.192
Epochs: 9383 | epoch avg. loss: 0.129 | test avg. loss: 5.266
Epochs: 9384 | epoch avg. loss: 0.139 | test avg. loss: 5.133


 19%|█▉        | 9387/50000 [13:57<56:59, 11.88it/s]

Epochs: 9385 | epoch avg. loss: 0.136 | test avg. loss: 5.513
Epochs: 9386 | epoch avg. loss: 0.250 | test avg. loss: 5.239
Epochs: 9387 | epoch avg. loss: 0.122 | test avg. loss: 5.339


 19%|█▉        | 9391/50000 [13:57<56:58, 11.88it/s]

Epochs: 9388 | epoch avg. loss: 0.143 | test avg. loss: 5.249
Epochs: 9389 | epoch avg. loss: 0.151 | test avg. loss: 5.185
Epochs: 9390 | epoch avg. loss: 0.160 | test avg. loss: 5.474


 19%|█▉        | 9393/50000 [13:58<1:02:26, 10.84it/s]

Epochs: 9391 | epoch avg. loss: 0.239 | test avg. loss: 5.191
Epochs: 9392 | epoch avg. loss: 0.201 | test avg. loss: 5.236
Epochs: 9393 | epoch avg. loss: 0.128 | test avg. loss: 5.282


 19%|█▉        | 9397/50000 [13:58<57:14, 11.82it/s]

Epochs: 9394 | epoch avg. loss: 0.146 | test avg. loss: 5.203
Epochs: 9395 | epoch avg. loss: 0.230 | test avg. loss: 5.211
Epochs: 9396 | epoch avg. loss: 0.100 | test avg. loss: 5.143


 19%|█▉        | 9399/50000 [13:58<55:10, 12.27it/s]

Epochs: 9397 | epoch avg. loss: 0.152 | test avg. loss: 5.062
Epochs: 9398 | epoch avg. loss: 0.121 | test avg. loss: 5.160
Epochs: 9399 | epoch avg. loss: 0.130 | test avg. loss: 5.005


 19%|█▉        | 9402/50000 [14:00<2:55:41,  3.85it/s]

Epochs: 9400 | epoch avg. loss: 0.198 | test avg. loss: 5.068
Epochs: 9401 | epoch avg. loss: 0.124 | test avg. loss: 5.393
Epochs: 9402 | epoch avg. loss: 0.181 | test avg. loss: 5.212


 19%|█▉        | 9406/50000 [14:00<1:47:45,  6.28it/s]

Epochs: 9403 | epoch avg. loss: 0.122 | test avg. loss: 5.244
Epochs: 9404 | epoch avg. loss: 0.159 | test avg. loss: 5.507
Epochs: 9405 | epoch avg. loss: 0.244 | test avg. loss: 5.127


 19%|█▉        | 9408/50000 [14:00<1:31:46,  7.37it/s]

Epochs: 9406 | epoch avg. loss: 0.145 | test avg. loss: 5.352
Epochs: 9407 | epoch avg. loss: 0.150 | test avg. loss: 5.119
Epochs: 9408 | epoch avg. loss: 0.122 | test avg. loss: 5.442


 19%|█▉        | 9410/50000 [14:01<1:21:03,  8.35it/s]

Epochs: 9409 | epoch avg. loss: 0.197 | test avg. loss: 5.299
Epochs: 9410 | epoch avg. loss: 0.356 | test avg. loss: 5.409
Epochs: 9411 | epoch avg. loss: 0.356 | test avg. loss: 5.221


 19%|█▉        | 9414/50000 [14:01<1:10:52,  9.54it/s]

Epochs: 9412 | epoch avg. loss: 0.181 | test avg. loss: 5.423
Epochs: 9413 | epoch avg. loss: 0.489 | test avg. loss: 5.765
Epochs: 9414 | epoch avg. loss: 0.355 | test avg. loss: 5.393


 19%|█▉        | 9418/50000 [14:01<1:02:40, 10.79it/s]

Epochs: 9415 | epoch avg. loss: 0.419 | test avg. loss: 6.219
Epochs: 9416 | epoch avg. loss: 1.325 | test avg. loss: 5.199
Epochs: 9417 | epoch avg. loss: 0.585 | test avg. loss: 5.341


 19%|█▉        | 9420/50000 [14:01<59:59, 11.27it/s]

Epochs: 9418 | epoch avg. loss: 0.513 | test avg. loss: 5.642
Epochs: 9419 | epoch avg. loss: 0.360 | test avg. loss: 5.324
Epochs: 9420 | epoch avg. loss: 0.226 | test avg. loss: 5.314


 19%|█▉        | 9424/50000 [14:02<58:53, 11.48it/s]  

Epochs: 9421 | epoch avg. loss: 0.305 | test avg. loss: 5.287
Epochs: 9422 | epoch avg. loss: 0.259 | test avg. loss: 5.645
Epochs: 9423 | epoch avg. loss: 0.295 | test avg. loss: 5.165


 19%|█▉        | 9426/50000 [14:02<58:17, 11.60it/s]

Epochs: 9424 | epoch avg. loss: 0.317 | test avg. loss: 5.817
Epochs: 9425 | epoch avg. loss: 0.432 | test avg. loss: 5.020
Epochs: 9426 | epoch avg. loss: 0.313 | test avg. loss: 6.032


 19%|█▉        | 9430/50000 [14:02<55:28, 12.19it/s]

Epochs: 9427 | epoch avg. loss: 0.646 | test avg. loss: 4.886
Epochs: 9428 | epoch avg. loss: 0.347 | test avg. loss: 4.903
Epochs: 9429 | epoch avg. loss: 0.215 | test avg. loss: 4.990


 19%|█▉        | 9432/50000 [14:02<53:06, 12.73it/s]

Epochs: 9430 | epoch avg. loss: 0.188 | test avg. loss: 5.004
Epochs: 9431 | epoch avg. loss: 0.170 | test avg. loss: 5.301
Epochs: 9432 | epoch avg. loss: 0.169 | test avg. loss: 5.154


 19%|█▉        | 9436/50000 [14:02<52:54, 12.78it/s]

Epochs: 9433 | epoch avg. loss: 0.157 | test avg. loss: 5.204
Epochs: 9434 | epoch avg. loss: 0.128 | test avg. loss: 4.988
Epochs: 9435 | epoch avg. loss: 0.148 | test avg. loss: 5.135


 19%|█▉        | 9438/50000 [14:03<54:19, 12.44it/s]

Epochs: 9436 | epoch avg. loss: 0.295 | test avg. loss: 5.210
Epochs: 9437 | epoch avg. loss: 0.189 | test avg. loss: 4.982
Epochs: 9438 | epoch avg. loss: 0.156 | test avg. loss: 5.165


 19%|█▉        | 9442/50000 [14:03<57:05, 11.84it/s]

Epochs: 9439 | epoch avg. loss: 0.110 | test avg. loss: 5.226
Epochs: 9440 | epoch avg. loss: 0.110 | test avg. loss: 5.299
Epochs: 9441 | epoch avg. loss: 0.125 | test avg. loss: 5.286


 19%|█▉        | 9444/50000 [14:03<56:52, 11.88it/s]

Epochs: 9442 | epoch avg. loss: 0.120 | test avg. loss: 5.158
Epochs: 9443 | epoch avg. loss: 0.181 | test avg. loss: 5.908
Epochs: 9444 | epoch avg. loss: 0.462 | test avg. loss: 5.134


 19%|█▉        | 9448/50000 [14:03<58:55, 11.47it/s]

Epochs: 9445 | epoch avg. loss: 0.340 | test avg. loss: 5.064
Epochs: 9446 | epoch avg. loss: 0.115 | test avg. loss: 5.200
Epochs: 9447 | epoch avg. loss: 0.110 | test avg. loss: 5.206


 19%|█▉        | 9450/50000 [14:04<59:46, 11.31it/s]

Epochs: 9448 | epoch avg. loss: 0.106 | test avg. loss: 5.204
Epochs: 9449 | epoch avg. loss: 0.105 | test avg. loss: 5.130
Epochs: 9450 | epoch avg. loss: 0.109 | test avg. loss: 5.225


 19%|█▉        | 9454/50000 [14:04<55:22, 12.20it/s]

Epochs: 9451 | epoch avg. loss: 0.136 | test avg. loss: 5.024
Epochs: 9452 | epoch avg. loss: 0.145 | test avg. loss: 5.238
Epochs: 9453 | epoch avg. loss: 0.185 | test avg. loss: 5.090


 19%|█▉        | 9458/50000 [14:04<50:04, 13.50it/s]

Epochs: 9454 | epoch avg. loss: 0.193 | test avg. loss: 5.102
Epochs: 9455 | epoch avg. loss: 0.231 | test avg. loss: 5.262
Epochs: 9456 | epoch avg. loss: 0.170 | test avg. loss: 5.048
Epochs: 9457 | epoch avg. loss: 0.114 | test avg. loss: 5.008


 19%|█▉        | 9460/50000 [14:04<49:51, 13.55it/s]

Epochs: 9458 | epoch avg. loss: 0.113 | test avg. loss: 5.074
Epochs: 9459 | epoch avg. loss: 0.112 | test avg. loss: 5.206
Epochs: 9460 | epoch avg. loss: 0.163 | test avg. loss: 5.268


 19%|█▉        | 9464/50000 [14:05<47:17, 14.29it/s]

Epochs: 9461 | epoch avg. loss: 0.131 | test avg. loss: 5.093
Epochs: 9462 | epoch avg. loss: 0.140 | test avg. loss: 5.057
Epochs: 9463 | epoch avg. loss: 0.140 | test avg. loss: 5.555
Epochs: 9464 | epoch avg. loss: 0.310 | test avg. loss: 5.233


 19%|█▉        | 9468/50000 [14:05<49:05, 13.76it/s]

Epochs: 9465 | epoch avg. loss: 0.326 | test avg. loss: 5.637
Epochs: 9466 | epoch avg. loss: 0.557 | test avg. loss: 5.252
Epochs: 9467 | epoch avg. loss: 0.327 | test avg. loss: 5.342


 19%|█▉        | 9472/50000 [14:05<46:41, 14.47it/s]

Epochs: 9468 | epoch avg. loss: 0.421 | test avg. loss: 6.312
Epochs: 9469 | epoch avg. loss: 0.628 | test avg. loss: 5.069
Epochs: 9470 | epoch avg. loss: 0.366 | test avg. loss: 5.076
Epochs: 9471 | epoch avg. loss: 0.228 | test avg. loss: 5.151


 19%|█▉        | 9474/50000 [14:05<47:19, 14.27it/s]

Epochs: 9472 | epoch avg. loss: 0.214 | test avg. loss: 5.118
Epochs: 9473 | epoch avg. loss: 0.205 | test avg. loss: 5.133
Epochs: 9474 | epoch avg. loss: 0.110 | test avg. loss: 5.126


 19%|█▉        | 9478/50000 [14:06<50:00, 13.51it/s]

Epochs: 9475 | epoch avg. loss: 0.129 | test avg. loss: 4.964
Epochs: 9476 | epoch avg. loss: 0.175 | test avg. loss: 4.991
Epochs: 9477 | epoch avg. loss: 0.101 | test avg. loss: 4.886


 19%|█▉        | 9482/50000 [14:06<47:16, 14.28it/s]

Epochs: 9478 | epoch avg. loss: 0.175 | test avg. loss: 5.041
Epochs: 9479 | epoch avg. loss: 0.106 | test avg. loss: 5.027
Epochs: 9480 | epoch avg. loss: 0.131 | test avg. loss: 5.202
Epochs: 9481 | epoch avg. loss: 0.099 | test avg. loss: 5.167


 19%|█▉        | 9486/50000 [14:06<46:52, 14.41it/s]

Epochs: 9482 | epoch avg. loss: 0.149 | test avg. loss: 5.132
Epochs: 9483 | epoch avg. loss: 0.121 | test avg. loss: 5.158
Epochs: 9484 | epoch avg. loss: 0.105 | test avg. loss: 5.051
Epochs: 9485 | epoch avg. loss: 0.105 | test avg. loss: 5.133


 19%|█▉        | 9488/50000 [14:06<46:28, 14.53it/s]

Epochs: 9486 | epoch avg. loss: 0.128 | test avg. loss: 5.083
Epochs: 9487 | epoch avg. loss: 0.147 | test avg. loss: 5.081
Epochs: 9488 | epoch avg. loss: 0.167 | test avg. loss: 5.622


 19%|█▉        | 9492/50000 [14:07<48:25, 13.94it/s]

Epochs: 9489 | epoch avg. loss: 0.264 | test avg. loss: 5.645
Epochs: 9490 | epoch avg. loss: 0.788 | test avg. loss: 5.959
Epochs: 9491 | epoch avg. loss: 0.635 | test avg. loss: 5.232


 19%|█▉        | 9496/50000 [14:07<45:48, 14.73it/s]

Epochs: 9492 | epoch avg. loss: 0.345 | test avg. loss: 5.265
Epochs: 9493 | epoch avg. loss: 0.325 | test avg. loss: 5.038
Epochs: 9494 | epoch avg. loss: 0.564 | test avg. loss: 5.521
Epochs: 9495 | epoch avg. loss: 0.679 | test avg. loss: 6.497


 19%|█▉        | 9498/50000 [14:07<45:23, 14.87it/s]

Epochs: 9496 | epoch avg. loss: 1.100 | test avg. loss: 5.534
Epochs: 9497 | epoch avg. loss: 0.733 | test avg. loss: 5.707
Epochs: 9498 | epoch avg. loss: 0.545 | test avg. loss: 5.155


 19%|█▉        | 9498/50000 [14:07<45:23, 14.87it/s]

Epochs: 9499 | epoch avg. loss: 0.188 | test avg. loss: 5.388


 19%|█▉        | 9502/50000 [14:09<2:20:24,  4.81it/s]

Epochs: 9500 | epoch avg. loss: 0.362 | test avg. loss: 4.933
Epochs: 9501 | epoch avg. loss: 0.305 | test avg. loss: 5.196
Epochs: 9502 | epoch avg. loss: 0.571 | test avg. loss: 5.993


 19%|█▉        | 9506/50000 [14:09<1:36:06,  7.02it/s]

Epochs: 9503 | epoch avg. loss: 0.571 | test avg. loss: 6.490
Epochs: 9504 | epoch avg. loss: 2.033 | test avg. loss: 5.984
Epochs: 9505 | epoch avg. loss: 0.451 | test avg. loss: 5.492


 19%|█▉        | 9508/50000 [14:09<1:21:33,  8.27it/s]

Epochs: 9506 | epoch avg. loss: 0.380 | test avg. loss: 5.323
Epochs: 9507 | epoch avg. loss: 0.286 | test avg. loss: 5.551
Epochs: 9508 | epoch avg. loss: 0.339 | test avg. loss: 5.056


 19%|█▉        | 9512/50000 [14:09<1:04:58, 10.39it/s]

Epochs: 9509 | epoch avg. loss: 0.399 | test avg. loss: 5.795
Epochs: 9510 | epoch avg. loss: 0.478 | test avg. loss: 5.161
Epochs: 9511 | epoch avg. loss: 0.306 | test avg. loss: 5.369


 19%|█▉        | 9514/50000 [14:10<1:01:00, 11.06it/s]

Epochs: 9512 | epoch avg. loss: 0.252 | test avg. loss: 5.781
Epochs: 9513 | epoch avg. loss: 0.313 | test avg. loss: 5.635
Epochs: 9514 | epoch avg. loss: 0.623 | test avg. loss: 6.671


 19%|█▉        | 9518/50000 [14:10<55:05, 12.25it/s]

Epochs: 9515 | epoch avg. loss: 0.785 | test avg. loss: 5.190
Epochs: 9516 | epoch avg. loss: 0.741 | test avg. loss: 5.244
Epochs: 9517 | epoch avg. loss: 0.405 | test avg. loss: 5.242


 19%|█▉        | 9522/50000 [14:10<49:08, 13.73it/s]

Epochs: 9518 | epoch avg. loss: 0.172 | test avg. loss: 5.504
Epochs: 9519 | epoch avg. loss: 0.249 | test avg. loss: 5.598
Epochs: 9520 | epoch avg. loss: 0.174 | test avg. loss: 5.250
Epochs: 9521 | epoch avg. loss: 0.130 | test avg. loss: 5.243


 19%|█▉        | 9524/50000 [14:10<47:53, 14.09it/s]

Epochs: 9522 | epoch avg. loss: 0.175 | test avg. loss: 4.878
Epochs: 9523 | epoch avg. loss: 0.281 | test avg. loss: 5.189
Epochs: 9524 | epoch avg. loss: 0.334 | test avg. loss: 5.011




Epochs: 9525 | epoch avg. loss: 0.490 | test avg. loss: 5.185
Epochs: 9526 | epoch avg. loss: 0.281 | test avg. loss: 5.402


 19%|█▉        | 9530/50000 [14:11<54:02, 12.48it/s]

Epochs: 9527 | epoch avg. loss: 0.183 | test avg. loss: 5.229
Epochs: 9528 | epoch avg. loss: 0.173 | test avg. loss: 5.304
Epochs: 9529 | epoch avg. loss: 0.179 | test avg. loss: 5.098


 19%|█▉        | 9534/50000 [14:11<48:52, 13.80it/s]

Epochs: 9530 | epoch avg. loss: 0.195 | test avg. loss: 5.802
Epochs: 9531 | epoch avg. loss: 0.514 | test avg. loss: 5.141
Epochs: 9532 | epoch avg. loss: 0.311 | test avg. loss: 5.289
Epochs: 9533 | epoch avg. loss: 0.212 | test avg. loss: 5.277


                                                    

Epochs: 9534 | epoch avg. loss: 0.181 | test avg. loss: 5.378
Epochs: 9535 | epoch avg. loss: 0.151 | test avg. loss: 5.249
Epochs: 9536 | epoch avg. loss: 0.132 | test avg. loss: 5.254


 19%|█▉        | 9540/50000 [14:11<50:35, 13.33it/s]

Epochs: 9537 | epoch avg. loss: 0.183 | test avg. loss: 5.066
Epochs: 9538 | epoch avg. loss: 0.095 | test avg. loss: 5.253
Epochs: 9539 | epoch avg. loss: 0.206 | test avg. loss: 4.941


 19%|█▉        | 9542/50000 [14:12<50:17, 13.41it/s]

Epochs: 9540 | epoch avg. loss: 0.133 | test avg. loss: 5.316
Epochs: 9541 | epoch avg. loss: 0.245 | test avg. loss: 4.966
Epochs: 9542 | epoch avg. loss: 0.195 | test avg. loss: 5.219


 19%|█▉        | 9546/50000 [14:12<47:45, 14.12it/s]

Epochs: 9543 | epoch avg. loss: 0.213 | test avg. loss: 5.144
Epochs: 9544 | epoch avg. loss: 0.145 | test avg. loss: 5.198
Epochs: 9545 | epoch avg. loss: 0.130 | test avg. loss: 5.186
Epochs: 9546 | epoch avg. loss: 0.102 | test avg. loss: 5.246


 19%|█▉        | 9550/50000 [14:12<45:45, 14.73it/s]

Epochs: 9547 | epoch avg. loss: 0.151 | test avg. loss: 5.195
Epochs: 9548 | epoch avg. loss: 0.145 | test avg. loss: 4.998
Epochs: 9549 | epoch avg. loss: 0.156 | test avg. loss: 5.135
Epochs: 9550 | epoch avg. loss: 0.137 | test avg. loss: 4.981


 19%|█▉        | 9554/50000 [14:12<49:19, 13.67it/s]

Epochs: 9551 | epoch avg. loss: 0.153 | test avg. loss: 5.070
Epochs: 9552 | epoch avg. loss: 0.127 | test avg. loss: 5.345
Epochs: 9553 | epoch avg. loss: 0.139 | test avg. loss: 5.164


 19%|█▉        | 9558/50000 [14:13<48:44, 13.83it/s]

Epochs: 9554 | epoch avg. loss: 0.309 | test avg. loss: 5.116
Epochs: 9555 | epoch avg. loss: 0.093 | test avg. loss: 4.995
Epochs: 9556 | epoch avg. loss: 0.177 | test avg. loss: 4.949
Epochs: 9557 | epoch avg. loss: 0.143 | test avg. loss: 5.077


 19%|█▉        | 9562/50000 [14:13<46:26, 14.51it/s]

Epochs: 9558 | epoch avg. loss: 0.140 | test avg. loss: 5.186
Epochs: 9559 | epoch avg. loss: 0.137 | test avg. loss: 5.081
Epochs: 9560 | epoch avg. loss: 0.095 | test avg. loss: 5.045
Epochs: 9561 | epoch avg. loss: 0.093 | test avg. loss: 5.099


 19%|█▉        | 9566/50000 [14:13<44:25, 15.17it/s]

Epochs: 9562 | epoch avg. loss: 0.099 | test avg. loss: 5.047
Epochs: 9563 | epoch avg. loss: 0.142 | test avg. loss: 5.154
Epochs: 9564 | epoch avg. loss: 0.133 | test avg. loss: 5.116
Epochs: 9565 | epoch avg. loss: 0.115 | test avg. loss: 5.026


 19%|█▉        | 9568/50000 [14:13<45:43, 14.74it/s]

Epochs: 9566 | epoch avg. loss: 0.127 | test avg. loss: 5.043
Epochs: 9567 | epoch avg. loss: 0.091 | test avg. loss: 5.015
Epochs: 9568 | epoch avg. loss: 0.102 | test avg. loss: 5.261


 19%|█▉        | 9572/50000 [14:14<49:10, 13.70it/s]

Epochs: 9569 | epoch avg. loss: 0.208 | test avg. loss: 5.108
Epochs: 9570 | epoch avg. loss: 0.100 | test avg. loss: 5.108
Epochs: 9571 | epoch avg. loss: 0.123 | test avg. loss: 5.229


 19%|█▉        | 9574/50000 [14:14<49:56, 13.49it/s]

Epochs: 9572 | epoch avg. loss: 0.125 | test avg. loss: 5.065
Epochs: 9573 | epoch avg. loss: 0.120 | test avg. loss: 5.717
Epochs: 9574 | epoch avg. loss: 0.369 | test avg. loss: 5.207


 19%|█▉        | 9578/50000 [14:14<48:42, 13.83it/s]

Epochs: 9575 | epoch avg. loss: 0.470 | test avg. loss: 5.271
Epochs: 9576 | epoch avg. loss: 0.154 | test avg. loss: 5.174
Epochs: 9577 | epoch avg. loss: 0.234 | test avg. loss: 5.094


 19%|█▉        | 9580/50000 [14:14<48:50, 13.79it/s]

Epochs: 9578 | epoch avg. loss: 0.177 | test avg. loss: 5.116
Epochs: 9579 | epoch avg. loss: 0.102 | test avg. loss: 5.037
Epochs: 9580 | epoch avg. loss: 0.178 | test avg. loss: 5.109


 19%|█▉        | 9584/50000 [14:15<55:47, 12.07it/s]

Epochs: 9581 | epoch avg. loss: 0.235 | test avg. loss: 5.534
Epochs: 9582 | epoch avg. loss: 0.497 | test avg. loss: 5.216
Epochs: 9583 | epoch avg. loss: 0.247 | test avg. loss: 5.100


 19%|█▉        | 9586/50000 [14:15<55:24, 12.16it/s]

Epochs: 9584 | epoch avg. loss: 0.199 | test avg. loss: 5.190
Epochs: 9585 | epoch avg. loss: 0.213 | test avg. loss: 4.945
Epochs: 9586 | epoch avg. loss: 0.244 | test avg. loss: 4.996


 19%|█▉        | 9590/50000 [14:15<49:52, 13.50it/s]

Epochs: 9587 | epoch avg. loss: 0.122 | test avg. loss: 5.153
Epochs: 9588 | epoch avg. loss: 0.135 | test avg. loss: 5.250
Epochs: 9589 | epoch avg. loss: 0.125 | test avg. loss: 5.264
Epochs: 9590 | epoch avg. loss: 0.109 | test avg. loss: 5.324


 19%|█▉        | 9594/50000 [14:15<47:23, 14.21it/s]

Epochs: 9591 | epoch avg. loss: 0.120 | test avg. loss: 5.195
Epochs: 9592 | epoch avg. loss: 0.263 | test avg. loss: 5.307
Epochs: 9593 | epoch avg. loss: 0.264 | test avg. loss: 4.939


 19%|█▉        | 9596/50000 [14:16<52:06, 12.92it/s]

Epochs: 9594 | epoch avg. loss: 0.174 | test avg. loss: 4.961
Epochs: 9595 | epoch avg. loss: 0.151 | test avg. loss: 5.036
Epochs: 9596 | epoch avg. loss: 0.141 | test avg. loss: 5.218


 19%|█▉        | 9598/50000 [14:16<50:59, 13.21it/s]

Epochs: 9597 | epoch avg. loss: 0.205 | test avg. loss: 5.496
Epochs: 9598 | epoch avg. loss: 0.111 | test avg. loss: 5.544
Epochs: 9599 | epoch avg. loss: 0.416 | test avg. loss: 5.462


 19%|█▉        | 9602/50000 [14:18<2:34:15,  4.36it/s]

Epochs: 9600 | epoch avg. loss: 0.175 | test avg. loss: 5.043
Epochs: 9601 | epoch avg. loss: 0.185 | test avg. loss: 4.935
Epochs: 9602 | epoch avg. loss: 0.217 | test avg. loss: 4.971


 19%|█▉        | 9606/50000 [14:18<1:46:12,  6.34it/s]

Epochs: 9603 | epoch avg. loss: 0.146 | test avg. loss: 5.037
Epochs: 9604 | epoch avg. loss: 0.145 | test avg. loss: 5.500
Epochs: 9605 | epoch avg. loss: 0.208 | test avg. loss: 5.269


 19%|█▉        | 9608/50000 [14:18<1:30:48,  7.41it/s]

Epochs: 9606 | epoch avg. loss: 0.268 | test avg. loss: 5.285
Epochs: 9607 | epoch avg. loss: 0.179 | test avg. loss: 5.196
Epochs: 9608 | epoch avg. loss: 0.148 | test avg. loss: 4.916


 19%|█▉        | 9612/50000 [14:18<1:09:32,  9.68it/s]

Epochs: 9609 | epoch avg. loss: 0.145 | test avg. loss: 5.640
Epochs: 9610 | epoch avg. loss: 0.495 | test avg. loss: 4.958
Epochs: 9611 | epoch avg. loss: 0.256 | test avg. loss: 5.088


 19%|█▉        | 9614/50000 [14:18<1:02:59, 10.69it/s]

Epochs: 9612 | epoch avg. loss: 0.211 | test avg. loss: 5.212
Epochs: 9613 | epoch avg. loss: 0.107 | test avg. loss: 5.212
Epochs: 9614 | epoch avg. loss: 0.112 | test avg. loss: 5.191


 19%|█▉        | 9618/50000 [14:19<59:49, 11.25it/s]

Epochs: 9615 | epoch avg. loss: 0.123 | test avg. loss: 5.285
Epochs: 9616 | epoch avg. loss: 0.122 | test avg. loss: 5.129
Epochs: 9617 | epoch avg. loss: 0.090 | test avg. loss: 5.256


 19%|█▉        | 9620/50000 [14:19<1:00:21, 11.15it/s]

Epochs: 9618 | epoch avg. loss: 0.173 | test avg. loss: 5.113
Epochs: 9619 | epoch avg. loss: 0.087 | test avg. loss: 5.203
Epochs: 9620 | epoch avg. loss: 0.171 | test avg. loss: 5.300


 19%|█▉        | 9624/50000 [14:19<57:13, 11.76it/s]  

Epochs: 9621 | epoch avg. loss: 0.168 | test avg. loss: 5.034
Epochs: 9622 | epoch avg. loss: 0.108 | test avg. loss: 5.289
Epochs: 9623 | epoch avg. loss: 0.157 | test avg. loss: 5.082


 19%|█▉        | 9626/50000 [14:19<54:10, 12.42it/s]

Epochs: 9624 | epoch avg. loss: 0.200 | test avg. loss: 5.234
Epochs: 9625 | epoch avg. loss: 0.166 | test avg. loss: 5.319
Epochs: 9626 | epoch avg. loss: 0.107 | test avg. loss: 5.267


 19%|█▉        | 9630/50000 [14:20<53:46, 12.51it/s]

Epochs: 9627 | epoch avg. loss: 0.210 | test avg. loss: 5.777
Epochs: 9628 | epoch avg. loss: 0.352 | test avg. loss: 5.437
Epochs: 9629 | epoch avg. loss: 0.797 | test avg. loss: 5.856


 19%|█▉        | 9632/50000 [14:20<53:21, 12.61it/s]

Epochs: 9630 | epoch avg. loss: 0.886 | test avg. loss: 5.104
Epochs: 9631 | epoch avg. loss: 0.438 | test avg. loss: 5.303
Epochs: 9632 | epoch avg. loss: 0.527 | test avg. loss: 7.661


 19%|█▉        | 9636/50000 [14:20<55:01, 12.22it/s]

Epochs: 9633 | epoch avg. loss: 1.522 | test avg. loss: 5.654
Epochs: 9634 | epoch avg. loss: 0.906 | test avg. loss: 5.355
Epochs: 9635 | epoch avg. loss: 0.511 | test avg. loss: 5.036


 19%|█▉        | 9638/50000 [14:20<57:10, 11.77it/s]

Epochs: 9636 | epoch avg. loss: 0.603 | test avg. loss: 5.196
Epochs: 9637 | epoch avg. loss: 0.549 | test avg. loss: 6.488
Epochs: 9638 | epoch avg. loss: 0.617 | test avg. loss: 6.385


 19%|█▉        | 9642/50000 [14:21<56:37, 11.88it/s]

Epochs: 9639 | epoch avg. loss: 1.557 | test avg. loss: 6.426
Epochs: 9640 | epoch avg. loss: 1.241 | test avg. loss: 5.561
Epochs: 9641 | epoch avg. loss: 0.531 | test avg. loss: 5.435


 19%|█▉        | 9644/50000 [14:21<55:57, 12.02it/s]

Epochs: 9642 | epoch avg. loss: 0.916 | test avg. loss: 9.668
Epochs: 9643 | epoch avg. loss: 2.700 | test avg. loss: 8.338
Epochs: 9644 | epoch avg. loss: 3.686 | test avg. loss: 6.921


 19%|█▉        | 9648/50000 [14:21<54:58, 12.23it/s]

Epochs: 9645 | epoch avg. loss: 1.747 | test avg. loss: 5.845
Epochs: 9646 | epoch avg. loss: 0.527 | test avg. loss: 5.522
Epochs: 9647 | epoch avg. loss: 0.481 | test avg. loss: 6.098


 19%|█▉        | 9650/50000 [14:21<55:08, 12.19it/s]

Epochs: 9648 | epoch avg. loss: 0.403 | test avg. loss: 5.768
Epochs: 9649 | epoch avg. loss: 1.011 | test avg. loss: 5.886
Epochs: 9650 | epoch avg. loss: 0.457 | test avg. loss: 5.571


 19%|█▉        | 9654/50000 [14:22<52:03, 12.92it/s]

Epochs: 9651 | epoch avg. loss: 0.468 | test avg. loss: 6.208
Epochs: 9652 | epoch avg. loss: 0.745 | test avg. loss: 5.690
Epochs: 9653 | epoch avg. loss: 0.344 | test avg. loss: 5.337


 19%|█▉        | 9656/50000 [14:22<54:09, 12.42it/s]

Epochs: 9654 | epoch avg. loss: 0.493 | test avg. loss: 6.485
Epochs: 9655 | epoch avg. loss: 0.817 | test avg. loss: 5.711
Epochs: 9656 | epoch avg. loss: 0.900 | test avg. loss: 6.236


 19%|█▉        | 9660/50000 [14:22<57:04, 11.78it/s]

Epochs: 9657 | epoch avg. loss: 0.949 | test avg. loss: 5.114
Epochs: 9658 | epoch avg. loss: 0.554 | test avg. loss: 5.414
Epochs: 9659 | epoch avg. loss: 0.303 | test avg. loss: 5.452


 19%|█▉        | 9662/50000 [14:22<57:16, 11.74it/s]

Epochs: 9660 | epoch avg. loss: 0.349 | test avg. loss: 5.262
Epochs: 9661 | epoch avg. loss: 0.305 | test avg. loss: 6.005
Epochs: 9662 | epoch avg. loss: 0.421 | test avg. loss: 5.382


 19%|█▉        | 9666/50000 [14:23<1:00:13, 11.16it/s]

Epochs: 9663 | epoch avg. loss: 0.407 | test avg. loss: 5.662
Epochs: 9664 | epoch avg. loss: 0.498 | test avg. loss: 5.144
Epochs: 9665 | epoch avg. loss: 0.279 | test avg. loss: 5.015


 19%|█▉        | 9668/50000 [14:23<59:27, 11.31it/s]

Epochs: 9666 | epoch avg. loss: 0.231 | test avg. loss: 5.319
Epochs: 9667 | epoch avg. loss: 0.251 | test avg. loss: 4.965
Epochs: 9668 | epoch avg. loss: 0.228 | test avg. loss: 5.442


 19%|█▉        | 9672/50000 [14:23<53:52, 12.47it/s]

Epochs: 9669 | epoch avg. loss: 0.175 | test avg. loss: 5.336
Epochs: 9670 | epoch avg. loss: 0.265 | test avg. loss: 5.517
Epochs: 9671 | epoch avg. loss: 0.199 | test avg. loss: 5.745
Epochs: 9672 | epoch avg. loss: 0.212 | test avg. loss: 5.341


 19%|█▉        | 9676/50000 [14:24<53:51, 12.48it/s]

Epochs: 9673 | epoch avg. loss: 0.135 | test avg. loss: 5.363
Epochs: 9674 | epoch avg. loss: 0.167 | test avg. loss: 4.959
Epochs: 9675 | epoch avg. loss: 0.221 | test avg. loss: 4.999


 19%|█▉        | 9678/50000 [14:24<52:26, 12.81it/s]

Epochs: 9676 | epoch avg. loss: 0.133 | test avg. loss: 5.141
Epochs: 9677 | epoch avg. loss: 0.179 | test avg. loss: 5.308
Epochs: 9678 | epoch avg. loss: 0.423 | test avg. loss: 5.855


 19%|█▉        | 9682/50000 [14:24<50:01, 13.43it/s]

Epochs: 9679 | epoch avg. loss: 0.560 | test avg. loss: 5.071
Epochs: 9680 | epoch avg. loss: 0.285 | test avg. loss: 5.112
Epochs: 9681 | epoch avg. loss: 0.281 | test avg. loss: 5.168


                                                    

Epochs: 9682 | epoch avg. loss: 0.220 | test avg. loss: 5.120
Epochs: 9683 | epoch avg. loss: 0.191 | test avg. loss: 5.283
Epochs: 9684 | epoch avg. loss: 0.152 | test avg. loss: 5.158


 19%|█▉        | 9688/50000 [14:24<47:57, 14.01it/s]

Epochs: 9685 | epoch avg. loss: 0.241 | test avg. loss: 5.499
Epochs: 9686 | epoch avg. loss: 0.300 | test avg. loss: 5.029
Epochs: 9687 | epoch avg. loss: 0.253 | test avg. loss: 5.088


 19%|█▉        | 9690/50000 [14:25<53:21, 12.59it/s]

Epochs: 9688 | epoch avg. loss: 0.180 | test avg. loss: 5.660
Epochs: 9689 | epoch avg. loss: 0.289 | test avg. loss: 5.229
Epochs: 9690 | epoch avg. loss: 0.376 | test avg. loss: 5.249


 19%|█▉        | 9694/50000 [14:25<47:56, 14.01it/s]

Epochs: 9691 | epoch avg. loss: 0.179 | test avg. loss: 5.436
Epochs: 9692 | epoch avg. loss: 0.208 | test avg. loss: 5.339
Epochs: 9693 | epoch avg. loss: 0.378 | test avg. loss: 5.907
Epochs: 9694 | epoch avg. loss: 0.743 | test avg. loss: 4.995


 19%|█▉        | 9698/50000 [14:25<45:30, 14.76it/s]

Epochs: 9695 | epoch avg. loss: 0.344 | test avg. loss: 5.094
Epochs: 9696 | epoch avg. loss: 0.333 | test avg. loss: 5.943
Epochs: 9697 | epoch avg. loss: 0.583 | test avg. loss: 5.169
Epochs: 9698 | epoch avg. loss: 0.346 | test avg. loss: 5.551


 19%|█▉        | 9698/50000 [14:25<45:30, 14.76it/s]

Epochs: 9699 | epoch avg. loss: 0.450 | test avg. loss: 5.070


 19%|█▉        | 9704/50000 [14:27<1:53:15,  5.93it/s]

Epochs: 9700 | epoch avg. loss: 0.326 | test avg. loss: 5.624
Epochs: 9701 | epoch avg. loss: 0.998 | test avg. loss: 5.218
Epochs: 9702 | epoch avg. loss: 0.457 | test avg. loss: 5.104
Epochs: 9703 | epoch avg. loss: 0.467 | test avg. loss: 6.315


 19%|█▉        | 9708/50000 [14:27<1:17:35,  8.65it/s]

Epochs: 9704 | epoch avg. loss: 0.723 | test avg. loss: 5.869
Epochs: 9705 | epoch avg. loss: 1.414 | test avg. loss: 7.260
Epochs: 9706 | epoch avg. loss: 1.213 | test avg. loss: 5.389
Epochs: 9707 | epoch avg. loss: 0.847 | test avg. loss: 6.425


 19%|█▉        | 9710/50000 [14:27<1:09:33,  9.65it/s]

Epochs: 9708 | epoch avg. loss: 1.851 | test avg. loss: 5.333
Epochs: 9709 | epoch avg. loss: 0.729 | test avg. loss: 5.589
Epochs: 9710 | epoch avg. loss: 0.811 | test avg. loss: 6.300


 19%|█▉        | 9714/50000 [14:28<1:04:53, 10.35it/s]

Epochs: 9711 | epoch avg. loss: 1.009 | test avg. loss: 5.558
Epochs: 9712 | epoch avg. loss: 0.955 | test avg. loss: 5.891
Epochs: 9713 | epoch avg. loss: 0.763 | test avg. loss: 4.813


                                                      

Epochs: 9714 | epoch avg. loss: 0.185 | test avg. loss: 5.008
Epochs: 9715 | epoch avg. loss: 0.146 | test avg. loss: 5.151
Epochs: 9716 | epoch avg. loss: 0.197 | test avg. loss: 5.271


 19%|█▉        | 9720/50000 [14:28<53:30, 12.55it/s]

Epochs: 9717 | epoch avg. loss: 0.215 | test avg. loss: 5.513
Epochs: 9718 | epoch avg. loss: 0.190 | test avg. loss: 5.118
Epochs: 9719 | epoch avg. loss: 0.136 | test avg. loss: 5.093


 19%|█▉        | 9722/50000 [14:28<51:59, 12.91it/s]

Epochs: 9720 | epoch avg. loss: 0.113 | test avg. loss: 4.924
Epochs: 9721 | epoch avg. loss: 0.349 | test avg. loss: 5.380
Epochs: 9722 | epoch avg. loss: 0.742 | test avg. loss: 4.735


 19%|█▉        | 9726/50000 [14:29<53:51, 12.46it/s]

Epochs: 9723 | epoch avg. loss: 0.385 | test avg. loss: 4.984
Epochs: 9724 | epoch avg. loss: 0.254 | test avg. loss: 5.382
Epochs: 9725 | epoch avg. loss: 0.152 | test avg. loss: 5.243


 19%|█▉        | 9728/50000 [14:29<52:29, 12.79it/s]

Epochs: 9726 | epoch avg. loss: 0.144 | test avg. loss: 5.584
Epochs: 9727 | epoch avg. loss: 0.211 | test avg. loss: 5.180
Epochs: 9728 | epoch avg. loss: 0.324 | test avg. loss: 5.130


 19%|█▉        | 9732/50000 [14:29<50:12, 13.37it/s]

Epochs: 9729 | epoch avg. loss: 0.229 | test avg. loss: 5.225
Epochs: 9730 | epoch avg. loss: 0.168 | test avg. loss: 5.085
Epochs: 9731 | epoch avg. loss: 0.096 | test avg. loss: 4.994
Epochs: 9732 | epoch avg. loss: 0.170 | test avg. loss: 5.001




Epochs: 9733 | epoch avg. loss: 0.169 | test avg. loss: 5.301
Epochs: 9734 | epoch avg. loss: 0.238 | test avg. loss: 5.144
Epochs: 9735 | epoch avg. loss: 0.587 | test avg. loss: 5.552


 19%|█▉        | 9738/50000 [14:29<46:46, 14.34it/s]

Epochs: 9736 | epoch avg. loss: 0.375 | test avg. loss: 5.102
Epochs: 9737 | epoch avg. loss: 0.699 | test avg. loss: 4.947
Epochs: 9738 | epoch avg. loss: 0.339 | test avg. loss: 5.342


 19%|█▉        | 9742/50000 [14:30<48:49, 13.74it/s]

Epochs: 9739 | epoch avg. loss: 0.334 | test avg. loss: 5.157
Epochs: 9740 | epoch avg. loss: 0.388 | test avg. loss: 6.091
Epochs: 9741 | epoch avg. loss: 0.534 | test avg. loss: 5.278


                                                    

Epochs: 9742 | epoch avg. loss: 0.588 | test avg. loss: 5.254
Epochs: 9743 | epoch avg. loss: 0.269 | test avg. loss: 5.098
Epochs: 9744 | epoch avg. loss: 0.223 | test avg. loss: 4.899


 19%|█▉        | 9748/50000 [14:30<46:24, 14.45it/s]

Epochs: 9745 | epoch avg. loss: 0.190 | test avg. loss: 5.167
Epochs: 9746 | epoch avg. loss: 0.215 | test avg. loss: 4.713
Epochs: 9747 | epoch avg. loss: 0.208 | test avg. loss: 5.309
Epochs: 9748 | epoch avg. loss: 0.303 | test avg. loss: 4.897


 20%|█▉        | 9752/50000 [14:30<46:37, 14.39it/s]

Epochs: 9749 | epoch avg. loss: 0.179 | test avg. loss: 5.239
Epochs: 9750 | epoch avg. loss: 0.164 | test avg. loss: 5.103
Epochs: 9751 | epoch avg. loss: 0.255 | test avg. loss: 5.144


 20%|█▉        | 9754/50000 [14:31<48:55, 13.71it/s]

Epochs: 9752 | epoch avg. loss: 0.240 | test avg. loss: 6.165
Epochs: 9753 | epoch avg. loss: 0.527 | test avg. loss: 6.099
Epochs: 9754 | epoch avg. loss: 1.364 | test avg. loss: 6.472


 20%|█▉        | 9758/50000 [14:31<47:40, 14.07it/s]

Epochs: 9755 | epoch avg. loss: 0.974 | test avg. loss: 5.033
Epochs: 9756 | epoch avg. loss: 0.597 | test avg. loss: 4.918
Epochs: 9757 | epoch avg. loss: 0.245 | test avg. loss: 5.256




Epochs: 9758 | epoch avg. loss: 0.204 | test avg. loss: 5.012
Epochs: 9759 | epoch avg. loss: 0.252 | test avg. loss: 5.380
Epochs: 9760 | epoch avg. loss: 0.328 | test avg. loss: 5.064


 20%|█▉        | 9764/50000 [14:31<46:05, 14.55it/s]

Epochs: 9761 | epoch avg. loss: 0.124 | test avg. loss: 4.885
Epochs: 9762 | epoch avg. loss: 0.202 | test avg. loss: 5.116
Epochs: 9763 | epoch avg. loss: 0.357 | test avg. loss: 4.923
Epochs: 9764 | epoch avg. loss: 0.183 | test avg. loss: 5.133


 20%|█▉        | 9768/50000 [14:32<47:53, 14.00it/s]

Epochs: 9765 | epoch avg. loss: 0.215 | test avg. loss: 5.496
Epochs: 9766 | epoch avg. loss: 0.222 | test avg. loss: 5.040
Epochs: 9767 | epoch avg. loss: 0.219 | test avg. loss: 5.113


 20%|█▉        | 9770/50000 [14:32<48:53, 13.71it/s]

Epochs: 9768 | epoch avg. loss: 0.165 | test avg. loss: 5.102
Epochs: 9769 | epoch avg. loss: 0.401 | test avg. loss: 5.001
Epochs: 9770 | epoch avg. loss: 0.114 | test avg. loss: 5.141


 20%|█▉        | 9774/50000 [14:32<48:58, 13.69it/s]

Epochs: 9771 | epoch avg. loss: 0.191 | test avg. loss: 4.944
Epochs: 9772 | epoch avg. loss: 0.251 | test avg. loss: 5.233
Epochs: 9773 | epoch avg. loss: 0.175 | test avg. loss: 4.961


 20%|█▉        | 9778/50000 [14:32<46:17, 14.48it/s]

Epochs: 9774 | epoch avg. loss: 0.123 | test avg. loss: 5.074
Epochs: 9775 | epoch avg. loss: 0.284 | test avg. loss: 5.096
Epochs: 9776 | epoch avg. loss: 0.127 | test avg. loss: 5.050
Epochs: 9777 | epoch avg. loss: 0.115 | test avg. loss: 5.253


 20%|█▉        | 9780/50000 [14:32<46:42, 14.35it/s]

Epochs: 9778 | epoch avg. loss: 0.145 | test avg. loss: 4.973
Epochs: 9779 | epoch avg. loss: 0.128 | test avg. loss: 5.160
Epochs: 9780 | epoch avg. loss: 0.206 | test avg. loss: 4.872


 20%|█▉        | 9784/50000 [14:33<50:47, 13.20it/s]

Epochs: 9781 | epoch avg. loss: 0.102 | test avg. loss: 4.831
Epochs: 9782 | epoch avg. loss: 0.105 | test avg. loss: 4.872
Epochs: 9783 | epoch avg. loss: 0.173 | test avg. loss: 5.093


 20%|█▉        | 9788/50000 [14:33<48:03, 13.94it/s]

Epochs: 9784 | epoch avg. loss: 0.130 | test avg. loss: 5.044
Epochs: 9785 | epoch avg. loss: 0.136 | test avg. loss: 5.029
Epochs: 9786 | epoch avg. loss: 0.087 | test avg. loss: 4.939
Epochs: 9787 | epoch avg. loss: 0.110 | test avg. loss: 4.914


 20%|█▉        | 9790/50000 [14:33<50:16, 13.33it/s]

Epochs: 9788 | epoch avg. loss: 0.091 | test avg. loss: 4.993
Epochs: 9789 | epoch avg. loss: 0.089 | test avg. loss: 5.060
Epochs: 9790 | epoch avg. loss: 0.095 | test avg. loss: 5.124


 20%|█▉        | 9794/50000 [14:33<50:17, 13.33it/s]

Epochs: 9791 | epoch avg. loss: 0.111 | test avg. loss: 5.397
Epochs: 9792 | epoch avg. loss: 0.171 | test avg. loss: 5.055
Epochs: 9793 | epoch avg. loss: 0.138 | test avg. loss: 5.280


 20%|█▉        | 9796/50000 [14:34<55:16, 12.12it/s]

Epochs: 9794 | epoch avg. loss: 0.218 | test avg. loss: 5.169
Epochs: 9795 | epoch avg. loss: 0.402 | test avg. loss: 5.519
Epochs: 9796 | epoch avg. loss: 0.522 | test avg. loss: 5.075


 20%|█▉        | 9798/50000 [14:34<55:36, 12.05it/s]

Epochs: 9797 | epoch avg. loss: 0.341 | test avg. loss: 5.096
Epochs: 9798 | epoch avg. loss: 0.345 | test avg. loss: 5.851
Epochs: 9799 | epoch avg. loss: 0.443 | test avg. loss: 5.044


 20%|█▉        | 9802/50000 [14:36<2:32:32,  4.39it/s]

Epochs: 9800 | epoch avg. loss: 0.372 | test avg. loss: 6.229
Epochs: 9801 | epoch avg. loss: 1.102 | test avg. loss: 5.220
Epochs: 9802 | epoch avg. loss: 0.749 | test avg. loss: 5.526


 20%|█▉        | 9806/50000 [14:36<1:39:33,  6.73it/s]

Epochs: 9803 | epoch avg. loss: 0.751 | test avg. loss: 7.610
Epochs: 9804 | epoch avg. loss: 1.242 | test avg. loss: 6.717
Epochs: 9805 | epoch avg. loss: 1.721 | test avg. loss: 9.004
Epochs: 9806 | epoch avg. loss: 2.272 | test avg. loss: 6.467


 20%|█▉        | 9810/50000 [14:36<1:14:06,  9.04it/s]

Epochs: 9807 | epoch avg. loss: 2.783 | test avg. loss: 5.357
Epochs: 9808 | epoch avg. loss: 1.096 | test avg. loss: 5.576
Epochs: 9809 | epoch avg. loss: 0.859 | test avg. loss: 5.264


 20%|█▉        | 9812/50000 [14:36<1:08:19,  9.80it/s]

Epochs: 9810 | epoch avg. loss: 0.917 | test avg. loss: 7.521
Epochs: 9811 | epoch avg. loss: 1.616 | test avg. loss: 6.528
Epochs: 9812 | epoch avg. loss: 1.819 | test avg. loss: 8.061


 20%|█▉        | 9814/50000 [14:37<1:07:11,  9.97it/s]

Epochs: 9813 | epoch avg. loss: 2.512 | test avg. loss: 5.358
Epochs: 9814 | epoch avg. loss: 1.167 | test avg. loss: 5.259
Epochs: 9815 | epoch avg. loss: 0.566 | test avg. loss: 5.337


 20%|█▉        | 9818/50000 [14:37<1:02:15, 10.76it/s]

Epochs: 9816 | epoch avg. loss: 0.422 | test avg. loss: 5.233
Epochs: 9817 | epoch avg. loss: 0.439 | test avg. loss: 5.716
Epochs: 9818 | epoch avg. loss: 0.395 | test avg. loss: 4.848


 20%|█▉        | 9822/50000 [14:37<53:46, 12.45it/s]

Epochs: 9819 | epoch avg. loss: 0.458 | test avg. loss: 5.018
Epochs: 9820 | epoch avg. loss: 0.317 | test avg. loss: 4.972
Epochs: 9821 | epoch avg. loss: 0.215 | test avg. loss: 5.068
Epochs: 9822 | epoch avg. loss: 0.244 | test avg. loss: 5.844


 20%|█▉        | 9826/50000 [14:37<53:13, 12.58it/s]

Epochs: 9823 | epoch avg. loss: 0.361 | test avg. loss: 5.451
Epochs: 9824 | epoch avg. loss: 0.643 | test avg. loss: 5.540
Epochs: 9825 | epoch avg. loss: 0.276 | test avg. loss: 4.896


 20%|█▉        | 9828/50000 [14:38<56:24, 11.87it/s]

Epochs: 9826 | epoch avg. loss: 0.565 | test avg. loss: 4.873
Epochs: 9827 | epoch avg. loss: 0.576 | test avg. loss: 4.897
Epochs: 9828 | epoch avg. loss: 0.407 | test avg. loss: 4.821


 20%|█▉        | 9832/50000 [14:38<55:35, 12.04it/s]

Epochs: 9829 | epoch avg. loss: 0.469 | test avg. loss: 5.438
Epochs: 9830 | epoch avg. loss: 0.310 | test avg. loss: 5.290
Epochs: 9831 | epoch avg. loss: 0.443 | test avg. loss: 6.452


 20%|█▉        | 9834/50000 [14:38<53:30, 12.51it/s]

Epochs: 9832 | epoch avg. loss: 0.928 | test avg. loss: 5.471
Epochs: 9833 | epoch avg. loss: 1.067 | test avg. loss: 6.013
Epochs: 9834 | epoch avg. loss: 1.240 | test avg. loss: 5.356


 20%|█▉        | 9838/50000 [14:38<53:02, 12.62it/s]

Epochs: 9835 | epoch avg. loss: 1.066 | test avg. loss: 6.833
Epochs: 9836 | epoch avg. loss: 2.121 | test avg. loss: 5.230
Epochs: 9837 | epoch avg. loss: 0.871 | test avg. loss: 4.851


 20%|█▉        | 9840/50000 [14:39<56:24, 11.87it/s]

Epochs: 9838 | epoch avg. loss: 1.191 | test avg. loss: 5.615
Epochs: 9839 | epoch avg. loss: 1.267 | test avg. loss: 5.092
Epochs: 9840 | epoch avg. loss: 1.069 | test avg. loss: 6.740


 20%|█▉        | 9844/50000 [14:39<53:45, 12.45it/s]

Epochs: 9841 | epoch avg. loss: 1.191 | test avg. loss: 5.834
Epochs: 9842 | epoch avg. loss: 0.998 | test avg. loss: 6.717
Epochs: 9843 | epoch avg. loss: 0.784 | test avg. loss: 5.759


 20%|█▉        | 9848/50000 [14:39<48:41, 13.74it/s]

Epochs: 9844 | epoch avg. loss: 0.499 | test avg. loss: 6.836
Epochs: 9845 | epoch avg. loss: 1.084 | test avg. loss: 4.745
Epochs: 9846 | epoch avg. loss: 0.598 | test avg. loss: 4.526
Epochs: 9847 | epoch avg. loss: 0.385 | test avg. loss: 4.495


 20%|█▉        | 9850/50000 [14:39<51:15, 13.05it/s]

Epochs: 9848 | epoch avg. loss: 0.425 | test avg. loss: 4.936
Epochs: 9849 | epoch avg. loss: 0.258 | test avg. loss: 5.588
Epochs: 9850 | epoch avg. loss: 0.324 | test avg. loss: 5.480


 20%|█▉        | 9854/50000 [14:40<56:35, 11.82it/s]

Epochs: 9851 | epoch avg. loss: 0.303 | test avg. loss: 5.111
Epochs: 9852 | epoch avg. loss: 0.152 | test avg. loss: 4.613
Epochs: 9853 | epoch avg. loss: 0.137 | test avg. loss: 4.629


 20%|█▉        | 9856/50000 [14:40<56:34, 11.83it/s]

Epochs: 9854 | epoch avg. loss: 0.190 | test avg. loss: 4.690
Epochs: 9855 | epoch avg. loss: 0.160 | test avg. loss: 5.365
Epochs: 9856 | epoch avg. loss: 0.246 | test avg. loss: 5.343


 20%|█▉        | 9860/50000 [14:40<55:21, 12.09it/s]

Epochs: 9857 | epoch avg. loss: 0.146 | test avg. loss: 5.624
Epochs: 9858 | epoch avg. loss: 0.290 | test avg. loss: 5.406
Epochs: 9859 | epoch avg. loss: 0.176 | test avg. loss: 4.858


 20%|█▉        | 9862/50000 [14:40<54:02, 12.38it/s]

Epochs: 9860 | epoch avg. loss: 0.111 | test avg. loss: 4.717
Epochs: 9861 | epoch avg. loss: 0.113 | test avg. loss: 4.689
Epochs: 9862 | epoch avg. loss: 0.110 | test avg. loss: 4.832


 20%|█▉        | 9866/50000 [14:41<53:56, 12.40it/s]

Epochs: 9863 | epoch avg. loss: 0.106 | test avg. loss: 5.038
Epochs: 9864 | epoch avg. loss: 0.100 | test avg. loss: 5.262
Epochs: 9865 | epoch avg. loss: 0.105 | test avg. loss: 5.148


 20%|█▉        | 9868/50000 [14:41<54:28, 12.28it/s]

Epochs: 9866 | epoch avg. loss: 0.225 | test avg. loss: 4.956
Epochs: 9867 | epoch avg. loss: 0.102 | test avg. loss: 4.857
Epochs: 9868 | epoch avg. loss: 0.115 | test avg. loss: 4.715


 20%|█▉        | 9872/50000 [14:41<52:27, 12.75it/s]

Epochs: 9869 | epoch avg. loss: 0.178 | test avg. loss: 5.078
Epochs: 9870 | epoch avg. loss: 0.212 | test avg. loss: 4.849
Epochs: 9871 | epoch avg. loss: 0.104 | test avg. loss: 5.473


 20%|█▉        | 9874/50000 [14:41<51:01, 13.11it/s]

Epochs: 9872 | epoch avg. loss: 0.322 | test avg. loss: 4.885
Epochs: 9873 | epoch avg. loss: 0.252 | test avg. loss: 5.040
Epochs: 9874 | epoch avg. loss: 0.267 | test avg. loss: 5.021


 20%|█▉        | 9878/50000 [14:42<49:52, 13.41it/s]

Epochs: 9875 | epoch avg. loss: 0.174 | test avg. loss: 4.981
Epochs: 9876 | epoch avg. loss: 0.240 | test avg. loss: 4.960
Epochs: 9877 | epoch avg. loss: 0.109 | test avg. loss: 4.911


 20%|█▉        | 9880/50000 [14:42<49:35, 13.48it/s]

Epochs: 9878 | epoch avg. loss: 0.090 | test avg. loss: 4.901
Epochs: 9879 | epoch avg. loss: 0.113 | test avg. loss: 4.815
Epochs: 9880 | epoch avg. loss: 0.110 | test avg. loss: 4.862


 20%|█▉        | 9884/50000 [14:42<52:32, 12.72it/s]

Epochs: 9881 | epoch avg. loss: 0.092 | test avg. loss: 4.773
Epochs: 9882 | epoch avg. loss: 0.110 | test avg. loss: 4.876
Epochs: 9883 | epoch avg. loss: 0.113 | test avg. loss: 4.814


 20%|█▉        | 9886/50000 [14:42<51:52, 12.89it/s]

Epochs: 9884 | epoch avg. loss: 0.081 | test avg. loss: 4.991
Epochs: 9885 | epoch avg. loss: 0.134 | test avg. loss: 4.845
Epochs: 9886 | epoch avg. loss: 0.113 | test avg. loss: 4.944


 20%|█▉        | 9890/50000 [14:43<53:18, 12.54it/s]

Epochs: 9887 | epoch avg. loss: 0.096 | test avg. loss: 4.910
Epochs: 9888 | epoch avg. loss: 0.133 | test avg. loss: 4.964
Epochs: 9889 | epoch avg. loss: 0.119 | test avg. loss: 5.192


 20%|█▉        | 9892/50000 [14:43<52:44, 12.68it/s]

Epochs: 9890 | epoch avg. loss: 0.175 | test avg. loss: 5.051
Epochs: 9891 | epoch avg. loss: 0.428 | test avg. loss: 5.374
Epochs: 9892 | epoch avg. loss: 0.370 | test avg. loss: 4.852
Epochs: 9893 | epoch avg. loss: 0.200 | test avg. loss: 4.976


                                                    

Epochs: 9894 | epoch avg. loss: 0.157 | test avg. loss: 4.856
Epochs: 9895 | epoch avg. loss: 0.096 | test avg. loss: 4.902
Epochs: 9896 | epoch avg. loss: 0.085 | test avg. loss: 4.836


 20%|█▉        | 9898/50000 [14:43<47:28, 14.08it/s]

Epochs: 9897 | epoch avg. loss: 0.168 | test avg. loss: 4.886
Epochs: 9898 | epoch avg. loss: 0.147 | test avg. loss: 5.301
Epochs: 9899 | epoch avg. loss: 0.221 | test avg. loss: 4.886


 20%|█▉        | 9902/50000 [14:45<2:18:00,  4.84it/s]

Epochs: 9900 | epoch avg. loss: 0.103 | test avg. loss: 4.939
Epochs: 9901 | epoch avg. loss: 0.086 | test avg. loss: 4.831
Epochs: 9902 | epoch avg. loss: 0.168 | test avg. loss: 4.965


 20%|█▉        | 9906/50000 [14:45<1:32:40,  7.21it/s]

Epochs: 9903 | epoch avg. loss: 0.097 | test avg. loss: 4.951
Epochs: 9904 | epoch avg. loss: 0.121 | test avg. loss: 5.002
Epochs: 9905 | epoch avg. loss: 0.114 | test avg. loss: 4.970


 20%|█▉        | 9908/50000 [14:45<1:19:32,  8.40it/s]

Epochs: 9906 | epoch avg. loss: 0.111 | test avg. loss: 4.725
Epochs: 9907 | epoch avg. loss: 0.124 | test avg. loss: 4.688
Epochs: 9908 | epoch avg. loss: 0.093 | test avg. loss: 4.891
Epochs: 9909 | epoch avg. loss: 0.129 | test avg. loss: 4.796


 20%|█▉        | 9912/50000 [14:45<1:05:12, 10.25it/s]

Epochs: 9910 | epoch avg. loss: 0.101 | test avg. loss: 4.900
Epochs: 9911 | epoch avg. loss: 0.081 | test avg. loss: 4.893
Epochs: 9912 | epoch avg. loss: 0.109 | test avg. loss: 4.916


 20%|█▉        | 9916/50000 [14:46<56:31, 11.82it/s]  

Epochs: 9913 | epoch avg. loss: 0.102 | test avg. loss: 4.964
Epochs: 9914 | epoch avg. loss: 0.102 | test avg. loss: 4.773
Epochs: 9915 | epoch avg. loss: 0.100 | test avg. loss: 4.739


 20%|█▉        | 9918/50000 [14:46<53:59, 12.37it/s]

Epochs: 9916 | epoch avg. loss: 0.106 | test avg. loss: 4.780
Epochs: 9917 | epoch avg. loss: 0.096 | test avg. loss: 4.828
Epochs: 9918 | epoch avg. loss: 0.088 | test avg. loss: 4.980


 20%|█▉        | 9922/50000 [14:46<51:10, 13.05it/s]

Epochs: 9919 | epoch avg. loss: 0.088 | test avg. loss: 4.891
Epochs: 9920 | epoch avg. loss: 0.097 | test avg. loss: 5.425
Epochs: 9921 | epoch avg. loss: 0.262 | test avg. loss: 4.983


 20%|█▉        | 9924/50000 [14:46<49:20, 13.54it/s]

Epochs: 9922 | epoch avg. loss: 0.318 | test avg. loss: 5.347
Epochs: 9923 | epoch avg. loss: 0.353 | test avg. loss: 4.797
Epochs: 9924 | epoch avg. loss: 0.225 | test avg. loss: 4.828


 20%|█▉        | 9928/50000 [14:47<54:11, 12.33it/s]

Epochs: 9925 | epoch avg. loss: 0.144 | test avg. loss: 4.968
Epochs: 9926 | epoch avg. loss: 0.115 | test avg. loss: 4.930
Epochs: 9927 | epoch avg. loss: 0.133 | test avg. loss: 5.229


 20%|█▉        | 9930/50000 [14:47<52:14, 12.78it/s]

Epochs: 9928 | epoch avg. loss: 0.162 | test avg. loss: 4.971
Epochs: 9929 | epoch avg. loss: 0.168 | test avg. loss: 5.036
Epochs: 9930 | epoch avg. loss: 0.210 | test avg. loss: 4.966
Epochs: 9931 | epoch avg. loss: 0.213 | test avg. loss: 4.956


 20%|█▉        | 9934/50000 [14:47<48:22, 13.81it/s]

Epochs: 9932 | epoch avg. loss: 0.314 | test avg. loss: 5.130
Epochs: 9933 | epoch avg. loss: 0.135 | test avg. loss: 4.931
Epochs: 9934 | epoch avg. loss: 0.099 | test avg. loss: 5.162


 20%|█▉        | 9938/50000 [14:47<47:46, 13.98it/s]

Epochs: 9935 | epoch avg. loss: 0.152 | test avg. loss: 4.904
Epochs: 9936 | epoch avg. loss: 0.191 | test avg. loss: 5.240
Epochs: 9937 | epoch avg. loss: 0.539 | test avg. loss: 4.782


 20%|█▉        | 9940/50000 [14:48<49:39, 13.45it/s]

Epochs: 9938 | epoch avg. loss: 0.439 | test avg. loss: 4.946
Epochs: 9939 | epoch avg. loss: 0.434 | test avg. loss: 6.160
Epochs: 9940 | epoch avg. loss: 0.592 | test avg. loss: 5.254


 20%|█▉        | 9944/50000 [14:48<53:57, 12.37it/s]

Epochs: 9941 | epoch avg. loss: 0.574 | test avg. loss: 5.173
Epochs: 9942 | epoch avg. loss: 0.353 | test avg. loss: 4.860
Epochs: 9943 | epoch avg. loss: 0.255 | test avg. loss: 4.813




Epochs: 9944 | epoch avg. loss: 0.220 | test avg. loss: 4.860
Epochs: 9945 | epoch avg. loss: 0.385 | test avg. loss: 4.837
Epochs: 9946 | epoch avg. loss: 0.381 | test avg. loss: 5.079


 20%|█▉        | 9950/50000 [14:48<50:13, 13.29it/s]

Epochs: 9947 | epoch avg. loss: 0.147 | test avg. loss: 4.890
Epochs: 9948 | epoch avg. loss: 0.181 | test avg. loss: 4.841
Epochs: 9949 | epoch avg. loss: 0.117 | test avg. loss: 4.973


 20%|█▉        | 9952/50000 [14:48<55:32, 12.02it/s]

Epochs: 9950 | epoch avg. loss: 0.111 | test avg. loss: 4.899
Epochs: 9951 | epoch avg. loss: 0.103 | test avg. loss: 4.934
Epochs: 9952 | epoch avg. loss: 0.096 | test avg. loss: 4.916


 20%|█▉        | 9956/50000 [14:49<51:54, 12.86it/s]

Epochs: 9953 | epoch avg. loss: 0.100 | test avg. loss: 4.867
Epochs: 9954 | epoch avg. loss: 0.115 | test avg. loss: 4.924
Epochs: 9955 | epoch avg. loss: 0.102 | test avg. loss: 5.085


 20%|█▉        | 9958/50000 [14:49<51:18, 13.01it/s]

Epochs: 9956 | epoch avg. loss: 0.145 | test avg. loss: 4.819
Epochs: 9957 | epoch avg. loss: 0.140 | test avg. loss: 4.838
Epochs: 9958 | epoch avg. loss: 0.090 | test avg. loss: 4.907


 20%|█▉        | 9962/50000 [14:49<49:00, 13.62it/s]

Epochs: 9959 | epoch avg. loss: 0.094 | test avg. loss: 4.933
Epochs: 9960 | epoch avg. loss: 0.104 | test avg. loss: 5.019
Epochs: 9961 | epoch avg. loss: 0.115 | test avg. loss: 4.944


 20%|█▉        | 9964/50000 [14:49<51:37, 12.92it/s]

Epochs: 9962 | epoch avg. loss: 0.292 | test avg. loss: 5.098
Epochs: 9963 | epoch avg. loss: 0.130 | test avg. loss: 5.085
Epochs: 9964 | epoch avg. loss: 0.418 | test avg. loss: 5.279


 20%|█▉        | 9968/50000 [14:50<49:50, 13.39it/s]

Epochs: 9965 | epoch avg. loss: 0.368 | test avg. loss: 5.064
Epochs: 9966 | epoch avg. loss: 0.181 | test avg. loss: 4.964
Epochs: 9967 | epoch avg. loss: 0.216 | test avg. loss: 5.305
Epochs: 9968 | epoch avg. loss: 0.303 | test avg. loss: 4.779


 20%|█▉        | 9972/50000 [14:50<48:10, 13.85it/s]

Epochs: 9969 | epoch avg. loss: 0.221 | test avg. loss: 4.836
Epochs: 9970 | epoch avg. loss: 0.164 | test avg. loss: 5.061
Epochs: 9971 | epoch avg. loss: 0.143 | test avg. loss: 4.868


 20%|█▉        | 9974/50000 [14:50<48:48, 13.67it/s]

Epochs: 9972 | epoch avg. loss: 0.102 | test avg. loss: 5.168
Epochs: 9973 | epoch avg. loss: 0.144 | test avg. loss: 4.996
Epochs: 9974 | epoch avg. loss: 0.452 | test avg. loss: 5.019


 20%|█▉        | 9978/50000 [14:50<50:45, 13.14it/s]

Epochs: 9975 | epoch avg. loss: 0.201 | test avg. loss: 5.082
Epochs: 9976 | epoch avg. loss: 0.200 | test avg. loss: 5.097
Epochs: 9977 | epoch avg. loss: 0.215 | test avg. loss: 5.210


 20%|█▉        | 9980/50000 [14:51<50:47, 13.13it/s]

Epochs: 9978 | epoch avg. loss: 0.140 | test avg. loss: 4.823
Epochs: 9979 | epoch avg. loss: 0.203 | test avg. loss: 4.809
Epochs: 9980 | epoch avg. loss: 0.083 | test avg. loss: 4.795


 20%|█▉        | 9984/50000 [14:51<48:56, 13.63it/s]

Epochs: 9981 | epoch avg. loss: 0.134 | test avg. loss: 4.851
Epochs: 9982 | epoch avg. loss: 0.093 | test avg. loss: 4.965
Epochs: 9983 | epoch avg. loss: 0.089 | test avg. loss: 5.003


 20%|█▉        | 9988/50000 [14:51<46:31, 14.34it/s]

Epochs: 9984 | epoch avg. loss: 0.093 | test avg. loss: 4.935
Epochs: 9985 | epoch avg. loss: 0.096 | test avg. loss: 4.840
Epochs: 9986 | epoch avg. loss: 0.108 | test avg. loss: 4.845
Epochs: 9987 | epoch avg. loss: 0.115 | test avg. loss: 5.030


 20%|█▉        | 9992/50000 [14:51<45:18, 14.72it/s]

Epochs: 9988 | epoch avg. loss: 0.144 | test avg. loss: 4.852
Epochs: 9989 | epoch avg. loss: 0.093 | test avg. loss: 5.022
Epochs: 9990 | epoch avg. loss: 0.090 | test avg. loss: 4.873
Epochs: 9991 | epoch avg. loss: 0.172 | test avg. loss: 5.166


 20%|█▉        | 9994/50000 [14:52<49:18, 13.52it/s]

Epochs: 9992 | epoch avg. loss: 0.349 | test avg. loss: 4.791
Epochs: 9993 | epoch avg. loss: 0.259 | test avg. loss: 4.708
Epochs: 9994 | epoch avg. loss: 0.237 | test avg. loss: 5.500


 20%|█▉        | 9998/50000 [14:52<49:04, 13.59it/s]

Epochs: 9995 | epoch avg. loss: 0.437 | test avg. loss: 5.238
Epochs: 9996 | epoch avg. loss: 0.506 | test avg. loss: 6.488
Epochs: 9997 | epoch avg. loss: 1.181 | test avg. loss: 4.982
Epochs: 9998 | epoch avg. loss: 1.201 | test avg. loss: 4.871


 20%|█▉        | 9998/50000 [14:52<49:04, 13.59it/s]

Epochs: 9999 | epoch avg. loss: 0.610 | test avg. loss: 6.696


 20%|██        | 10002/50000 [14:54<2:26:12,  4.56it/s]

Epochs: 10000 | epoch avg. loss: 1.093 | test avg. loss: 6.065
Epochs: 10001 | epoch avg. loss: 1.338 | test avg. loss: 5.480
Epochs: 10002 | epoch avg. loss: 0.702 | test avg. loss: 4.740


 20%|██        | 10006/50000 [14:54<1:37:25,  6.84it/s]

Epochs: 10003 | epoch avg. loss: 0.516 | test avg. loss: 4.708
Epochs: 10004 | epoch avg. loss: 0.732 | test avg. loss: 7.623
Epochs: 10005 | epoch avg. loss: 1.606 | test avg. loss: 7.482


 20%|██        | 10008/50000 [14:54<1:23:54,  7.94it/s]

Epochs: 10006 | epoch avg. loss: 2.665 | test avg. loss: 9.532
Epochs: 10007 | epoch avg. loss: 3.279 | test avg. loss: 5.575
Epochs: 10008 | epoch avg. loss: 1.220 | test avg. loss: 6.265
Epochs: 10009 | epoch avg. loss: 1.435 | test avg. loss: 5.153


 20%|██        | 10012/50000 [14:54<1:04:38, 10.31it/s]

Epochs: 10010 | epoch avg. loss: 1.094 | test avg. loss: 5.224
Epochs: 10011 | epoch avg. loss: 0.954 | test avg. loss: 5.585
Epochs: 10012 | epoch avg. loss: 0.843 | test avg. loss: 5.403


 20%|██        | 10016/50000 [14:55<1:00:52, 10.95it/s]

Epochs: 10013 | epoch avg. loss: 1.095 | test avg. loss: 8.762
Epochs: 10014 | epoch avg. loss: 2.324 | test avg. loss: 5.947
Epochs: 10015 | epoch avg. loss: 1.472 | test avg. loss: 6.223


 20%|██        | 10018/50000 [14:55<58:25, 11.41it/s]

Epochs: 10016 | epoch avg. loss: 1.251 | test avg. loss: 5.077
Epochs: 10017 | epoch avg. loss: 0.800 | test avg. loss: 5.070
Epochs: 10018 | epoch avg. loss: 0.483 | test avg. loss: 5.569


 20%|██        | 10022/50000 [14:55<56:29, 11.80it/s]

Epochs: 10019 | epoch avg. loss: 0.500 | test avg. loss: 4.736
Epochs: 10020 | epoch avg. loss: 0.491 | test avg. loss: 5.301
Epochs: 10021 | epoch avg. loss: 0.473 | test avg. loss: 4.553


 20%|██        | 10024/50000 [14:55<56:53, 11.71it/s]

Epochs: 10022 | epoch avg. loss: 0.192 | test avg. loss: 4.933
Epochs: 10023 | epoch avg. loss: 0.370 | test avg. loss: 4.872
Epochs: 10024 | epoch avg. loss: 0.307 | test avg. loss: 5.090


 20%|██        | 10028/50000 [14:56<55:42, 11.96it/s]

Epochs: 10025 | epoch avg. loss: 0.437 | test avg. loss: 6.624
Epochs: 10026 | epoch avg. loss: 0.841 | test avg. loss: 5.996
Epochs: 10027 | epoch avg. loss: 1.177 | test avg. loss: 7.685


 20%|██        | 10030/50000 [14:56<55:34, 11.99it/s]

Epochs: 10028 | epoch avg. loss: 1.332 | test avg. loss: 5.899
Epochs: 10029 | epoch avg. loss: 1.482 | test avg. loss: 5.267
Epochs: 10030 | epoch avg. loss: 0.225 | test avg. loss: 4.912


 20%|██        | 10034/50000 [14:56<55:19, 12.04it/s]

Epochs: 10031 | epoch avg. loss: 0.443 | test avg. loss: 5.431
Epochs: 10032 | epoch avg. loss: 0.628 | test avg. loss: 5.026
Epochs: 10033 | epoch avg. loss: 0.308 | test avg. loss: 4.907


 20%|██        | 10036/50000 [14:56<54:05, 12.31it/s]

Epochs: 10034 | epoch avg. loss: 0.292 | test avg. loss: 5.563
Epochs: 10035 | epoch avg. loss: 0.347 | test avg. loss: 5.456
Epochs: 10036 | epoch avg. loss: 0.661 | test avg. loss: 5.982


 20%|██        | 10040/50000 [14:56<52:36, 12.66it/s]

Epochs: 10037 | epoch avg. loss: 0.590 | test avg. loss: 5.412
Epochs: 10038 | epoch avg. loss: 0.392 | test avg. loss: 5.770
Epochs: 10039 | epoch avg. loss: 0.617 | test avg. loss: 5.779


 20%|██        | 10042/50000 [14:57<52:51, 12.60it/s]

Epochs: 10040 | epoch avg. loss: 0.473 | test avg. loss: 4.822
Epochs: 10041 | epoch avg. loss: 0.377 | test avg. loss: 5.220
Epochs: 10042 | epoch avg. loss: 0.283 | test avg. loss: 5.199


 20%|██        | 10046/50000 [14:57<50:57, 13.07it/s]

Epochs: 10043 | epoch avg. loss: 0.609 | test avg. loss: 5.880
Epochs: 10044 | epoch avg. loss: 0.354 | test avg. loss: 5.327
Epochs: 10045 | epoch avg. loss: 0.583 | test avg. loss: 5.209


 20%|██        | 10048/50000 [14:57<49:36, 13.42it/s]

Epochs: 10046 | epoch avg. loss: 0.167 | test avg. loss: 5.381
Epochs: 10047 | epoch avg. loss: 0.271 | test avg. loss: 5.357
Epochs: 10048 | epoch avg. loss: 0.392 | test avg. loss: 5.710


 20%|██        | 10052/50000 [14:57<52:05, 12.78it/s]

Epochs: 10049 | epoch avg. loss: 0.249 | test avg. loss: 5.137
Epochs: 10050 | epoch avg. loss: 0.318 | test avg. loss: 5.342
Epochs: 10051 | epoch avg. loss: 0.145 | test avg. loss: 5.090


 20%|██        | 10054/50000 [14:58<52:10, 12.76it/s]

Epochs: 10052 | epoch avg. loss: 0.261 | test avg. loss: 5.638
Epochs: 10053 | epoch avg. loss: 0.430 | test avg. loss: 5.144
Epochs: 10054 | epoch avg. loss: 0.209 | test avg. loss: 5.289


 20%|██        | 10058/50000 [14:58<50:19, 13.23it/s]

Epochs: 10055 | epoch avg. loss: 0.233 | test avg. loss: 5.406
Epochs: 10056 | epoch avg. loss: 0.247 | test avg. loss: 5.115
Epochs: 10057 | epoch avg. loss: 0.441 | test avg. loss: 5.560


 20%|██        | 10060/50000 [14:58<49:27, 13.46it/s]

Epochs: 10058 | epoch avg. loss: 0.471 | test avg. loss: 4.881
Epochs: 10059 | epoch avg. loss: 0.231 | test avg. loss: 4.974
Epochs: 10060 | epoch avg. loss: 0.149 | test avg. loss: 4.996


 20%|██        | 10064/50000 [14:58<48:38, 13.68it/s]

Epochs: 10061 | epoch avg. loss: 0.128 | test avg. loss: 4.885
Epochs: 10062 | epoch avg. loss: 0.136 | test avg. loss: 5.290
Epochs: 10063 | epoch avg. loss: 0.206 | test avg. loss: 5.221


 20%|██        | 10066/50000 [14:59<54:48, 12.14it/s]

Epochs: 10064 | epoch avg. loss: 0.462 | test avg. loss: 5.429
Epochs: 10065 | epoch avg. loss: 0.246 | test avg. loss: 5.058
Epochs: 10066 | epoch avg. loss: 0.089 | test avg. loss: 5.164


 20%|██        | 10070/50000 [14:59<56:05, 11.86it/s]

Epochs: 10067 | epoch avg. loss: 0.114 | test avg. loss: 4.977
Epochs: 10068 | epoch avg. loss: 0.188 | test avg. loss: 5.078
Epochs: 10069 | epoch avg. loss: 0.129 | test avg. loss: 5.084


 20%|██        | 10072/50000 [14:59<57:05, 11.66it/s]

Epochs: 10070 | epoch avg. loss: 0.104 | test avg. loss: 4.923
Epochs: 10071 | epoch avg. loss: 0.086 | test avg. loss: 5.002
Epochs: 10072 | epoch avg. loss: 0.094 | test avg. loss: 5.007


 20%|██        | 10076/50000 [14:59<52:55, 12.57it/s]

Epochs: 10073 | epoch avg. loss: 0.099 | test avg. loss: 5.311
Epochs: 10074 | epoch avg. loss: 0.130 | test avg. loss: 5.138
Epochs: 10075 | epoch avg. loss: 0.171 | test avg. loss: 5.320


 20%|██        | 10078/50000 [15:00<57:33, 11.56it/s]

Epochs: 10076 | epoch avg. loss: 0.200 | test avg. loss: 5.096
Epochs: 10077 | epoch avg. loss: 0.118 | test avg. loss: 5.030
Epochs: 10078 | epoch avg. loss: 0.120 | test avg. loss: 5.235


 20%|██        | 10082/50000 [15:00<56:07, 11.85it/s]

Epochs: 10079 | epoch avg. loss: 0.131 | test avg. loss: 5.033
Epochs: 10080 | epoch avg. loss: 0.087 | test avg. loss: 5.134
Epochs: 10081 | epoch avg. loss: 0.105 | test avg. loss: 4.992


 20%|██        | 10084/50000 [15:00<58:14, 11.42it/s]

Epochs: 10082 | epoch avg. loss: 0.104 | test avg. loss: 4.977
Epochs: 10083 | epoch avg. loss: 0.078 | test avg. loss: 4.986
Epochs: 10084 | epoch avg. loss: 0.081 | test avg. loss: 4.951


 20%|██        | 10088/50000 [15:00<54:16, 12.26it/s]

Epochs: 10085 | epoch avg. loss: 0.087 | test avg. loss: 5.029
Epochs: 10086 | epoch avg. loss: 0.092 | test avg. loss: 4.933
Epochs: 10087 | epoch avg. loss: 0.135 | test avg. loss: 5.099


 20%|██        | 10090/50000 [15:01<56:28, 11.78it/s]

Epochs: 10088 | epoch avg. loss: 0.101 | test avg. loss: 4.974
Epochs: 10089 | epoch avg. loss: 0.141 | test avg. loss: 4.986
Epochs: 10090 | epoch avg. loss: 0.089 | test avg. loss: 5.037




Epochs: 10091 | epoch avg. loss: 0.110 | test avg. loss: 4.951
Epochs: 10092 | epoch avg. loss: 0.159 | test avg. loss: 5.312
Epochs: 10093 | epoch avg. loss: 0.170 | test avg. loss: 5.014


 20%|██        | 10096/50000 [15:01<49:34, 13.41it/s]

Epochs: 10094 | epoch avg. loss: 0.274 | test avg. loss: 5.191
Epochs: 10095 | epoch avg. loss: 0.201 | test avg. loss: 4.879
Epochs: 10096 | epoch avg. loss: 0.298 | test avg. loss: 4.910
Epochs: 10097 | epoch avg. loss: 0.305 | test avg. loss: 5.779


 20%|██        | 10098/50000 [15:01<48:07, 13.82it/s]

Epochs: 10098 | epoch avg. loss: 0.534 | test avg. loss: 5.028
Epochs: 10099 | epoch avg. loss: 0.275 | test avg. loss: 5.404


 20%|██        | 10104/50000 [15:03<1:55:18,  5.77it/s]

Epochs: 10100 | epoch avg. loss: 0.508 | test avg. loss: 4.849
Epochs: 10101 | epoch avg. loss: 0.215 | test avg. loss: 5.217
Epochs: 10102 | epoch avg. loss: 0.336 | test avg. loss: 5.208
Epochs: 10103 | epoch avg. loss: 0.274 | test avg. loss: 5.318


 20%|██        | 10106/50000 [15:03<1:33:44,  7.09it/s]

Epochs: 10104 | epoch avg. loss: 0.293 | test avg. loss: 5.437
Epochs: 10105 | epoch avg. loss: 0.295 | test avg. loss: 5.209
Epochs: 10106 | epoch avg. loss: 0.386 | test avg. loss: 5.742


 20%|██        | 10110/50000 [15:03<1:09:41,  9.54it/s]

Epochs: 10107 | epoch avg. loss: 0.355 | test avg. loss: 5.232
Epochs: 10108 | epoch avg. loss: 0.741 | test avg. loss: 6.277
Epochs: 10109 | epoch avg. loss: 1.452 | test avg. loss: 5.831


 20%|██        | 10112/50000 [15:04<1:04:56, 10.24it/s]

Epochs: 10110 | epoch avg. loss: 1.880 | test avg. loss: 6.353
Epochs: 10111 | epoch avg. loss: 1.212 | test avg. loss: 5.514
Epochs: 10112 | epoch avg. loss: 1.042 | test avg. loss: 5.637


 20%|██        | 10116/50000 [15:04<55:31, 11.97it/s]

Epochs: 10113 | epoch avg. loss: 0.744 | test avg. loss: 5.573
Epochs: 10114 | epoch avg. loss: 0.491 | test avg. loss: 5.364
Epochs: 10115 | epoch avg. loss: 0.589 | test avg. loss: 5.619


 20%|██        | 10120/50000 [15:04<49:39, 13.39it/s]

Epochs: 10116 | epoch avg. loss: 0.481 | test avg. loss: 4.979
Epochs: 10117 | epoch avg. loss: 0.377 | test avg. loss: 5.515
Epochs: 10118 | epoch avg. loss: 0.440 | test avg. loss: 5.240
Epochs: 10119 | epoch avg. loss: 0.708 | test avg. loss: 8.625


 20%|██        | 10124/50000 [15:04<45:53, 14.48it/s]

Epochs: 10120 | epoch avg. loss: 2.007 | test avg. loss: 5.929
Epochs: 10121 | epoch avg. loss: 2.021 | test avg. loss: 6.792
Epochs: 10122 | epoch avg. loss: 1.577 | test avg. loss: 4.901
Epochs: 10123 | epoch avg. loss: 0.809 | test avg. loss: 5.242


 20%|██        | 10126/50000 [15:05<50:30, 13.16it/s]

Epochs: 10124 | epoch avg. loss: 0.636 | test avg. loss: 4.973
Epochs: 10125 | epoch avg. loss: 0.674 | test avg. loss: 4.652
Epochs: 10126 | epoch avg. loss: 0.786 | test avg. loss: 5.621


 20%|██        | 10130/50000 [15:05<47:23, 14.02it/s]

Epochs: 10127 | epoch avg. loss: 0.842 | test avg. loss: 4.892
Epochs: 10128 | epoch avg. loss: 0.688 | test avg. loss: 5.924
Epochs: 10129 | epoch avg. loss: 0.794 | test avg. loss: 5.448
Epochs: 10130 | epoch avg. loss: 1.165 | test avg. loss: 5.027


 20%|██        | 10134/50000 [15:05<44:45, 14.85it/s]

Epochs: 10131 | epoch avg. loss: 0.267 | test avg. loss: 4.980
Epochs: 10132 | epoch avg. loss: 0.205 | test avg. loss: 4.867
Epochs: 10133 | epoch avg. loss: 0.176 | test avg. loss: 4.629
Epochs: 10134 | epoch avg. loss: 0.157 | test avg. loss: 4.637


 20%|██        | 10138/50000 [15:05<45:09, 14.71it/s]

Epochs: 10135 | epoch avg. loss: 0.168 | test avg. loss: 5.001
Epochs: 10136 | epoch avg. loss: 0.135 | test avg. loss: 5.323
Epochs: 10137 | epoch avg. loss: 0.161 | test avg. loss: 5.644


 20%|██        | 10140/50000 [15:06<46:37, 14.25it/s]

Epochs: 10138 | epoch avg. loss: 0.189 | test avg. loss: 5.189
Epochs: 10139 | epoch avg. loss: 0.138 | test avg. loss: 4.968
Epochs: 10140 | epoch avg. loss: 0.109 | test avg. loss: 4.857


 20%|██        | 10144/50000 [15:06<48:52, 13.59it/s]

Epochs: 10141 | epoch avg. loss: 0.108 | test avg. loss: 4.775
Epochs: 10142 | epoch avg. loss: 0.090 | test avg. loss: 4.974
Epochs: 10143 | epoch avg. loss: 0.111 | test avg. loss: 5.002


 20%|██        | 10146/50000 [15:06<47:55, 13.86it/s]

Epochs: 10144 | epoch avg. loss: 0.091 | test avg. loss: 5.122
Epochs: 10145 | epoch avg. loss: 0.116 | test avg. loss: 5.050
Epochs: 10146 | epoch avg. loss: 0.086 | test avg. loss: 5.192


 20%|██        | 10150/50000 [15:06<47:57, 13.85it/s]

Epochs: 10147 | epoch avg. loss: 0.120 | test avg. loss: 4.931
Epochs: 10148 | epoch avg. loss: 0.340 | test avg. loss: 4.879
Epochs: 10149 | epoch avg. loss: 0.108 | test avg. loss: 4.969


 20%|██        | 10152/50000 [15:06<48:02, 13.82it/s]

Epochs: 10150 | epoch avg. loss: 0.101 | test avg. loss: 4.908
Epochs: 10151 | epoch avg. loss: 0.099 | test avg. loss: 5.063
Epochs: 10152 | epoch avg. loss: 0.116 | test avg. loss: 4.884


 20%|██        | 10156/50000 [15:07<52:13, 12.72it/s]

Epochs: 10153 | epoch avg. loss: 0.092 | test avg. loss: 4.858
Epochs: 10154 | epoch avg. loss: 0.084 | test avg. loss: 4.881
Epochs: 10155 | epoch avg. loss: 0.077 | test avg. loss: 4.860


 20%|██        | 10160/50000 [15:07<47:47, 13.89it/s]

Epochs: 10156 | epoch avg. loss: 0.079 | test avg. loss: 4.878
Epochs: 10157 | epoch avg. loss: 0.104 | test avg. loss: 5.093
Epochs: 10158 | epoch avg. loss: 0.157 | test avg. loss: 4.828
Epochs: 10159 | epoch avg. loss: 0.180 | test avg. loss: 4.856


 20%|██        | 10164/50000 [15:07<44:55, 14.78it/s]

Epochs: 10160 | epoch avg. loss: 0.089 | test avg. loss: 4.994
Epochs: 10161 | epoch avg. loss: 0.105 | test avg. loss: 4.888
Epochs: 10162 | epoch avg. loss: 0.164 | test avg. loss: 5.425
Epochs: 10163 | epoch avg. loss: 0.365 | test avg. loss: 4.831


 20%|██        | 10166/50000 [15:07<44:34, 14.90it/s]

Epochs: 10164 | epoch avg. loss: 0.156 | test avg. loss: 4.966
Epochs: 10165 | epoch avg. loss: 0.127 | test avg. loss: 4.775
Epochs: 10166 | epoch avg. loss: 0.134 | test avg. loss: 4.743


 20%|██        | 10168/50000 [15:08<51:35, 12.87it/s]

Epochs: 10167 | epoch avg. loss: 0.099 | test avg. loss: 4.825
Epochs: 10168 | epoch avg. loss: 0.081 | test avg. loss: 4.913


 20%|██        | 10172/50000 [15:08<54:22, 12.21it/s]

Epochs: 10169 | epoch avg. loss: 0.080 | test avg. loss: 4.831
Epochs: 10170 | epoch avg. loss: 0.143 | test avg. loss: 4.897
Epochs: 10171 | epoch avg. loss: 0.134 | test avg. loss: 4.912


 20%|██        | 10174/50000 [15:08<51:55, 12.78it/s]

Epochs: 10172 | epoch avg. loss: 0.131 | test avg. loss: 4.778
Epochs: 10173 | epoch avg. loss: 0.251 | test avg. loss: 4.969
Epochs: 10174 | epoch avg. loss: 0.092 | test avg. loss: 4.956
Epochs: 10175 | epoch avg. loss: 0.266 | test avg. loss: 5.173


 20%|██        | 10178/50000 [15:08<49:04, 13.52it/s]

Epochs: 10176 | epoch avg. loss: 0.230 | test avg. loss: 5.132
Epochs: 10177 | epoch avg. loss: 0.177 | test avg. loss: 4.944
Epochs: 10178 | epoch avg. loss: 0.273 | test avg. loss: 5.540


 20%|██        | 10182/50000 [15:09<52:14, 12.70it/s]

Epochs: 10179 | epoch avg. loss: 0.443 | test avg. loss: 5.026
Epochs: 10180 | epoch avg. loss: 0.786 | test avg. loss: 5.028
Epochs: 10181 | epoch avg. loss: 0.361 | test avg. loss: 4.855


 20%|██        | 10184/50000 [15:09<51:09, 12.97it/s]

Epochs: 10182 | epoch avg. loss: 0.519 | test avg. loss: 5.259
Epochs: 10183 | epoch avg. loss: 0.548 | test avg. loss: 4.739
Epochs: 10184 | epoch avg. loss: 0.423 | test avg. loss: 4.593


 20%|██        | 10188/50000 [15:09<47:13, 14.05it/s]

Epochs: 10185 | epoch avg. loss: 0.376 | test avg. loss: 5.339
Epochs: 10186 | epoch avg. loss: 0.519 | test avg. loss: 4.729
Epochs: 10187 | epoch avg. loss: 0.535 | test avg. loss: 5.634
Epochs: 10188 | epoch avg. loss: 0.597 | test avg. loss: 4.969


 20%|██        | 10192/50000 [15:09<44:25, 14.93it/s]

Epochs: 10189 | epoch avg. loss: 0.230 | test avg. loss: 5.691
Epochs: 10190 | epoch avg. loss: 0.335 | test avg. loss: 5.392
Epochs: 10191 | epoch avg. loss: 0.724 | test avg. loss: 5.102
Epochs: 10192 | epoch avg. loss: 0.324 | test avg. loss: 5.190


 20%|██        | 10196/50000 [15:10<48:00, 13.82it/s]

Epochs: 10193 | epoch avg. loss: 0.409 | test avg. loss: 5.084
Epochs: 10194 | epoch avg. loss: 0.724 | test avg. loss: 6.039
Epochs: 10195 | epoch avg. loss: 0.731 | test avg. loss: 5.132


 20%|██        | 10198/50000 [15:10<48:08, 13.78it/s]

Epochs: 10196 | epoch avg. loss: 0.907 | test avg. loss: 6.282
Epochs: 10197 | epoch avg. loss: 1.001 | test avg. loss: 5.521
Epochs: 10198 | epoch avg. loss: 1.631 | test avg. loss: 5.453


 20%|██        | 10198/50000 [15:10<48:08, 13.78it/s]

Epochs: 10199 | epoch avg. loss: 0.562 | test avg. loss: 5.895


 20%|██        | 10202/50000 [15:11<2:20:32,  4.72it/s]

Epochs: 10200 | epoch avg. loss: 0.515 | test avg. loss: 5.307
Epochs: 10201 | epoch avg. loss: 0.777 | test avg. loss: 8.177
Epochs: 10202 | epoch avg. loss: 2.175 | test avg. loss: 5.765


 20%|██        | 10206/50000 [15:12<1:34:09,  7.04it/s]

Epochs: 10203 | epoch avg. loss: 1.481 | test avg. loss: 6.642
Epochs: 10204 | epoch avg. loss: 1.839 | test avg. loss: 5.216
Epochs: 10205 | epoch avg. loss: 1.645 | test avg. loss: 5.187


                                                       

Epochs: 10206 | epoch avg. loss: 1.009 | test avg. loss: 6.088
Epochs: 10207 | epoch avg. loss: 0.881 | test avg. loss: 5.542
Epochs: 10208 | epoch avg. loss: 0.848 | test avg. loss: 7.137


 20%|██        | 10212/50000 [15:12<1:03:23, 10.46it/s]

Epochs: 10209 | epoch avg. loss: 1.138 | test avg. loss: 6.205
Epochs: 10210 | epoch avg. loss: 1.286 | test avg. loss: 7.584
Epochs: 10211 | epoch avg. loss: 1.738 | test avg. loss: 6.433


                                                     

Epochs: 10212 | epoch avg. loss: 1.613 | test avg. loss: 5.855
Epochs: 10213 | epoch avg. loss: 0.723 | test avg. loss: 4.859
Epochs: 10214 | epoch avg. loss: 0.569 | test avg. loss: 4.585


 20%|██        | 10216/50000 [15:13<54:25, 12.18it/s]

Epochs: 10215 | epoch avg. loss: 0.421 | test avg. loss: 4.729
Epochs: 10216 | epoch avg. loss: 0.208 | test avg. loss: 4.616
Epochs: 10217 | epoch avg. loss: 0.187 | test avg. loss: 5.017


 20%|██        | 10220/50000 [15:13<58:25, 11.35it/s]

Epochs: 10218 | epoch avg. loss: 0.236 | test avg. loss: 5.109
Epochs: 10219 | epoch avg. loss: 0.108 | test avg. loss: 5.249
Epochs: 10220 | epoch avg. loss: 0.095 | test avg. loss: 5.230


 20%|██        | 10224/50000 [15:13<55:33, 11.93it/s]

Epochs: 10221 | epoch avg. loss: 0.093 | test avg. loss: 4.970
Epochs: 10222 | epoch avg. loss: 0.091 | test avg. loss: 4.850
Epochs: 10223 | epoch avg. loss: 0.088 | test avg. loss: 4.724


 20%|██        | 10226/50000 [15:13<57:14, 11.58it/s]

Epochs: 10224 | epoch avg. loss: 0.084 | test avg. loss: 4.927
Epochs: 10225 | epoch avg. loss: 0.105 | test avg. loss: 4.941
Epochs: 10226 | epoch avg. loss: 0.185 | test avg. loss: 5.238


 20%|██        | 10230/50000 [15:14<57:03, 11.62it/s]

Epochs: 10227 | epoch avg. loss: 0.158 | test avg. loss: 4.967
Epochs: 10228 | epoch avg. loss: 0.288 | test avg. loss: 4.874
Epochs: 10229 | epoch avg. loss: 0.185 | test avg. loss: 5.170


 20%|██        | 10232/50000 [15:14<53:34, 12.37it/s]

Epochs: 10230 | epoch avg. loss: 0.244 | test avg. loss: 4.840
Epochs: 10231 | epoch avg. loss: 0.200 | test avg. loss: 5.347
Epochs: 10232 | epoch avg. loss: 0.252 | test avg. loss: 4.973


 20%|██        | 10236/50000 [15:14<50:45, 13.06it/s]

Epochs: 10233 | epoch avg. loss: 0.198 | test avg. loss: 5.480
Epochs: 10234 | epoch avg. loss: 0.311 | test avg. loss: 5.002
Epochs: 10235 | epoch avg. loss: 0.375 | test avg. loss: 5.151


 20%|██        | 10238/50000 [15:14<51:10, 12.95it/s]

Epochs: 10236 | epoch avg. loss: 0.325 | test avg. loss: 4.877
Epochs: 10237 | epoch avg. loss: 0.204 | test avg. loss: 5.171
Epochs: 10238 | epoch avg. loss: 0.487 | test avg. loss: 5.045


 20%|██        | 10242/50000 [15:14<51:59, 12.75it/s]

Epochs: 10239 | epoch avg. loss: 0.235 | test avg. loss: 4.911
Epochs: 10240 | epoch avg. loss: 0.165 | test avg. loss: 4.785
Epochs: 10241 | epoch avg. loss: 0.392 | test avg. loss: 4.644


 20%|██        | 10244/50000 [15:15<51:15, 12.93it/s]

Epochs: 10242 | epoch avg. loss: 0.296 | test avg. loss: 5.172
Epochs: 10243 | epoch avg. loss: 0.325 | test avg. loss: 4.833
Epochs: 10244 | epoch avg. loss: 0.250 | test avg. loss: 5.010


 20%|██        | 10248/50000 [15:15<47:57, 13.82it/s]

Epochs: 10245 | epoch avg. loss: 0.152 | test avg. loss: 5.020
Epochs: 10246 | epoch avg. loss: 0.102 | test avg. loss: 4.899
Epochs: 10247 | epoch avg. loss: 0.096 | test avg. loss: 4.855


 20%|██        | 10250/50000 [15:15<49:21, 13.42it/s]

Epochs: 10248 | epoch avg. loss: 0.093 | test avg. loss: 4.651
Epochs: 10249 | epoch avg. loss: 0.141 | test avg. loss: 4.869
Epochs: 10250 | epoch avg. loss: 0.127 | test avg. loss: 4.684


 21%|██        | 10254/50000 [15:15<53:39, 12.35it/s]

Epochs: 10251 | epoch avg. loss: 0.096 | test avg. loss: 5.097
Epochs: 10252 | epoch avg. loss: 0.211 | test avg. loss: 4.771
Epochs: 10253 | epoch avg. loss: 0.174 | test avg. loss: 4.906


 21%|██        | 10256/50000 [15:16<55:23, 11.96it/s]

Epochs: 10254 | epoch avg. loss: 0.103 | test avg. loss: 4.935
Epochs: 10255 | epoch avg. loss: 0.157 | test avg. loss: 4.902
Epochs: 10256 | epoch avg. loss: 0.142 | test avg. loss: 5.126


 21%|██        | 10260/50000 [15:16<53:32, 12.37it/s]

Epochs: 10257 | epoch avg. loss: 0.136 | test avg. loss: 4.839
Epochs: 10258 | epoch avg. loss: 0.117 | test avg. loss: 4.885
Epochs: 10259 | epoch avg. loss: 0.123 | test avg. loss: 4.738


 21%|██        | 10262/50000 [15:16<51:21, 12.90it/s]

Epochs: 10260 | epoch avg. loss: 0.183 | test avg. loss: 4.762
Epochs: 10261 | epoch avg. loss: 0.144 | test avg. loss: 4.932
Epochs: 10262 | epoch avg. loss: 0.087 | test avg. loss: 4.852


 21%|██        | 10266/50000 [15:16<51:43, 12.80it/s]

Epochs: 10263 | epoch avg. loss: 0.074 | test avg. loss: 4.833
Epochs: 10264 | epoch avg. loss: 0.070 | test avg. loss: 4.774
Epochs: 10265 | epoch avg. loss: 0.079 | test avg. loss: 4.808


 21%|██        | 10270/50000 [15:17<50:51, 13.02it/s]

Epochs: 10266 | epoch avg. loss: 0.074 | test avg. loss: 4.770
Epochs: 10267 | epoch avg. loss: 0.076 | test avg. loss: 5.042
Epochs: 10268 | epoch avg. loss: 0.187 | test avg. loss: 4.840
Epochs: 10269 | epoch avg. loss: 0.084 | test avg. loss: 4.777


 21%|██        | 10274/50000 [15:17<47:39, 13.89it/s]

Epochs: 10270 | epoch avg. loss: 0.086 | test avg. loss: 4.855
Epochs: 10271 | epoch avg. loss: 0.093 | test avg. loss: 4.761
Epochs: 10272 | epoch avg. loss: 0.130 | test avg. loss: 4.877
Epochs: 10273 | epoch avg. loss: 0.099 | test avg. loss: 4.999


 21%|██        | 10276/50000 [15:17<46:30, 14.24it/s]

Epochs: 10274 | epoch avg. loss: 0.100 | test avg. loss: 4.856
Epochs: 10275 | epoch avg. loss: 0.115 | test avg. loss: 5.130
Epochs: 10276 | epoch avg. loss: 0.155 | test avg. loss: 4.789
Epochs: 10277 | epoch avg. loss: 0.143 | test avg. loss: 4.981


 21%|██        | 10280/50000 [15:17<46:28, 14.25it/s]

Epochs: 10278 | epoch avg. loss: 0.143 | test avg. loss: 4.771
Epochs: 10279 | epoch avg. loss: 0.123 | test avg. loss: 4.843
Epochs: 10280 | epoch avg. loss: 0.071 | test avg. loss: 4.849


 21%|██        | 10284/50000 [15:18<51:32, 12.84it/s]

Epochs: 10281 | epoch avg. loss: 0.116 | test avg. loss: 4.910
Epochs: 10282 | epoch avg. loss: 0.107 | test avg. loss: 4.882
Epochs: 10283 | epoch avg. loss: 0.079 | test avg. loss: 4.839


 21%|██        | 10286/50000 [15:18<51:50, 12.77it/s]

Epochs: 10284 | epoch avg. loss: 0.110 | test avg. loss: 4.928
Epochs: 10285 | epoch avg. loss: 0.116 | test avg. loss: 4.770
Epochs: 10286 | epoch avg. loss: 0.088 | test avg. loss: 4.791


 21%|██        | 10290/50000 [15:18<55:01, 12.03it/s]

Epochs: 10287 | epoch avg. loss: 0.108 | test avg. loss: 5.314
Epochs: 10288 | epoch avg. loss: 0.207 | test avg. loss: 5.017
Epochs: 10289 | epoch avg. loss: 0.184 | test avg. loss: 5.093


 21%|██        | 10292/50000 [15:18<54:12, 12.21it/s]

Epochs: 10290 | epoch avg. loss: 0.087 | test avg. loss: 4.896
Epochs: 10291 | epoch avg. loss: 0.226 | test avg. loss: 4.737
Epochs: 10292 | epoch avg. loss: 0.132 | test avg. loss: 4.794


 21%|██        | 10296/50000 [15:19<56:01, 11.81it/s]

Epochs: 10293 | epoch avg. loss: 0.141 | test avg. loss: 4.544
Epochs: 10294 | epoch avg. loss: 0.123 | test avg. loss: 4.712
Epochs: 10295 | epoch avg. loss: 0.125 | test avg. loss: 4.890


 21%|██        | 10298/50000 [15:19<56:03, 11.80it/s]

Epochs: 10296 | epoch avg. loss: 0.081 | test avg. loss: 4.980
Epochs: 10297 | epoch avg. loss: 0.097 | test avg. loss: 5.018
Epochs: 10298 | epoch avg. loss: 0.113 | test avg. loss: 5.040


 21%|██        | 10298/50000 [15:19<56:03, 11.80it/s]

Epochs: 10299 | epoch avg. loss: 0.069 | test avg. loss: 4.806


 21%|██        | 10302/50000 [15:21<2:41:44,  4.09it/s]

Epochs: 10300 | epoch avg. loss: 0.122 | test avg. loss: 4.721
Epochs: 10301 | epoch avg. loss: 0.132 | test avg. loss: 5.051
Epochs: 10302 | epoch avg. loss: 0.138 | test avg. loss: 4.832


 21%|██        | 10306/50000 [15:21<1:48:35,  6.09it/s]

Epochs: 10303 | epoch avg. loss: 0.205 | test avg. loss: 5.151
Epochs: 10304 | epoch avg. loss: 0.257 | test avg. loss: 4.880
Epochs: 10305 | epoch avg. loss: 0.140 | test avg. loss: 4.899


 21%|██        | 10308/50000 [15:21<1:34:58,  6.97it/s]

Epochs: 10306 | epoch avg. loss: 0.107 | test avg. loss: 5.001
Epochs: 10307 | epoch avg. loss: 0.113 | test avg. loss: 4.966
Epochs: 10308 | epoch avg. loss: 0.142 | test avg. loss: 5.159


 21%|██        | 10312/50000 [15:22<1:15:06,  8.81it/s]

Epochs: 10309 | epoch avg. loss: 0.111 | test avg. loss: 4.871
Epochs: 10310 | epoch avg. loss: 0.077 | test avg. loss: 4.853
Epochs: 10311 | epoch avg. loss: 0.098 | test avg. loss: 4.691


 21%|██        | 10314/50000 [15:22<1:10:40,  9.36it/s]

Epochs: 10312 | epoch avg. loss: 0.127 | test avg. loss: 4.723
Epochs: 10313 | epoch avg. loss: 0.068 | test avg. loss: 4.839
Epochs: 10314 | epoch avg. loss: 0.078 | test avg. loss: 4.986


                                                     

Epochs: 10315 | epoch avg. loss: 0.084 | test avg. loss: 5.134
Epochs: 10316 | epoch avg. loss: 0.083 | test avg. loss: 5.024
Epochs: 10317 | epoch avg. loss: 0.075 | test avg. loss: 4.926


 21%|██        | 10320/50000 [15:22<54:12, 12.20it/s]

Epochs: 10318 | epoch avg. loss: 0.079 | test avg. loss: 4.875
Epochs: 10319 | epoch avg. loss: 0.090 | test avg. loss: 4.708
Epochs: 10320 | epoch avg. loss: 0.081 | test avg. loss: 4.879


 21%|██        | 10324/50000 [15:23<57:33, 11.49it/s]

Epochs: 10321 | epoch avg. loss: 0.083 | test avg. loss: 4.849
Epochs: 10322 | epoch avg. loss: 0.090 | test avg. loss: 4.908
Epochs: 10323 | epoch avg. loss: 0.068 | test avg. loss: 4.901


 21%|██        | 10326/50000 [15:23<58:23, 11.32it/s]

Epochs: 10324 | epoch avg. loss: 0.070 | test avg. loss: 4.885
Epochs: 10325 | epoch avg. loss: 0.068 | test avg. loss: 4.857
Epochs: 10326 | epoch avg. loss: 0.063 | test avg. loss: 4.855


 21%|██        | 10330/50000 [15:23<51:56, 12.73it/s]

Epochs: 10327 | epoch avg. loss: 0.069 | test avg. loss: 4.827
Epochs: 10328 | epoch avg. loss: 0.093 | test avg. loss: 4.992
Epochs: 10329 | epoch avg. loss: 0.133 | test avg. loss: 4.897
Epochs: 10330 | epoch avg. loss: 0.075 | test avg. loss: 4.928


                                                     

Epochs: 10331 | epoch avg. loss: 0.063 | test avg. loss: 4.885
Epochs: 10332 | epoch avg. loss: 0.081 | test avg. loss: 4.857
Epochs: 10333 | epoch avg. loss: 0.071 | test avg. loss: 4.940


 21%|██        | 10336/50000 [15:24<49:18, 13.41it/s]

Epochs: 10334 | epoch avg. loss: 0.079 | test avg. loss: 4.805
Epochs: 10335 | epoch avg. loss: 0.129 | test avg. loss: 4.788
Epochs: 10336 | epoch avg. loss: 0.067 | test avg. loss: 4.778


 21%|██        | 10340/50000 [15:24<47:10, 14.01it/s]

Epochs: 10337 | epoch avg. loss: 0.063 | test avg. loss: 4.855
Epochs: 10338 | epoch avg. loss: 0.112 | test avg. loss: 5.080
Epochs: 10339 | epoch avg. loss: 0.124 | test avg. loss: 4.913
Epochs: 10340 | epoch avg. loss: 0.067 | test avg. loss: 4.906


 21%|██        | 10344/50000 [15:24<45:54, 14.40it/s]

Epochs: 10341 | epoch avg. loss: 0.062 | test avg. loss: 4.787
Epochs: 10342 | epoch avg. loss: 0.091 | test avg. loss: 4.794
Epochs: 10343 | epoch avg. loss: 0.085 | test avg. loss: 5.088
Epochs: 10344 | epoch avg. loss: 0.138 | test avg. loss: 4.860


                                                     

Epochs: 10345 | epoch avg. loss: 0.192 | test avg. loss: 5.031
Epochs: 10346 | epoch avg. loss: 0.157 | test avg. loss: 4.841
Epochs: 10347 | epoch avg. loss: 0.105 | test avg. loss: 4.828


 21%|██        | 10350/50000 [15:24<45:37, 14.48it/s]

Epochs: 10348 | epoch avg. loss: 0.110 | test avg. loss: 5.027
Epochs: 10349 | epoch avg. loss: 0.142 | test avg. loss: 4.982
Epochs: 10350 | epoch avg. loss: 0.079 | test avg. loss: 4.895


 21%|██        | 10354/50000 [15:25<45:20, 14.57it/s]

Epochs: 10351 | epoch avg. loss: 0.092 | test avg. loss: 4.892
Epochs: 10352 | epoch avg. loss: 0.079 | test avg. loss: 4.737
Epochs: 10353 | epoch avg. loss: 0.092 | test avg. loss: 4.683
Epochs: 10354 | epoch avg. loss: 0.191 | test avg. loss: 4.985




Epochs: 10355 | epoch avg. loss: 0.177 | test avg. loss: 4.807
Epochs: 10356 | epoch avg. loss: 0.385 | test avg. loss: 4.753
Epochs: 10357 | epoch avg. loss: 0.177 | test avg. loss: 5.114


 21%|██        | 10362/50000 [15:25<43:18, 15.26it/s]

Epochs: 10358 | epoch avg. loss: 0.294 | test avg. loss: 5.237
Epochs: 10359 | epoch avg. loss: 0.872 | test avg. loss: 6.001
Epochs: 10360 | epoch avg. loss: 0.645 | test avg. loss: 5.175
Epochs: 10361 | epoch avg. loss: 0.932 | test avg. loss: 5.030


 21%|██        | 10364/50000 [15:25<43:16, 15.27it/s]

Epochs: 10362 | epoch avg. loss: 0.640 | test avg. loss: 4.754
Epochs: 10363 | epoch avg. loss: 0.513 | test avg. loss: 4.836
Epochs: 10364 | epoch avg. loss: 0.785 | test avg. loss: 7.376


 21%|██        | 10368/50000 [15:26<44:12, 14.94it/s]

Epochs: 10365 | epoch avg. loss: 1.415 | test avg. loss: 6.174
Epochs: 10366 | epoch avg. loss: 1.890 | test avg. loss: 5.759
Epochs: 10367 | epoch avg. loss: 0.772 | test avg. loss: 4.552


 21%|██        | 10372/50000 [15:26<44:05, 14.98it/s]

Epochs: 10368 | epoch avg. loss: 0.779 | test avg. loss: 4.395
Epochs: 10369 | epoch avg. loss: 0.654 | test avg. loss: 5.755
Epochs: 10370 | epoch avg. loss: 0.835 | test avg. loss: 4.677
Epochs: 10371 | epoch avg. loss: 0.240 | test avg. loss: 5.381


 21%|██        | 10374/50000 [15:26<45:14, 14.60it/s]

Epochs: 10372 | epoch avg. loss: 0.248 | test avg. loss: 5.077
Epochs: 10373 | epoch avg. loss: 0.241 | test avg. loss: 5.348
Epochs: 10374 | epoch avg. loss: 0.159 | test avg. loss: 5.026


 21%|██        | 10378/50000 [15:26<45:01, 14.67it/s]

Epochs: 10375 | epoch avg. loss: 0.348 | test avg. loss: 5.033
Epochs: 10376 | epoch avg. loss: 0.120 | test avg. loss: 4.739
Epochs: 10377 | epoch avg. loss: 0.228 | test avg. loss: 4.929


 21%|██        | 10380/50000 [15:27<51:50, 12.74it/s]

Epochs: 10378 | epoch avg. loss: 0.253 | test avg. loss: 4.938
Epochs: 10379 | epoch avg. loss: 0.162 | test avg. loss: 4.864
Epochs: 10380 | epoch avg. loss: 0.209 | test avg. loss: 4.958


 21%|██        | 10384/50000 [15:27<48:59, 13.48it/s]

Epochs: 10381 | epoch avg. loss: 0.152 | test avg. loss: 5.025
Epochs: 10382 | epoch avg. loss: 0.241 | test avg. loss: 5.891
Epochs: 10383 | epoch avg. loss: 0.489 | test avg. loss: 5.213


 21%|██        | 10388/50000 [15:27<45:15, 14.59it/s]

Epochs: 10384 | epoch avg. loss: 0.218 | test avg. loss: 5.319
Epochs: 10385 | epoch avg. loss: 0.300 | test avg. loss: 4.973
Epochs: 10386 | epoch avg. loss: 0.221 | test avg. loss: 4.703
Epochs: 10387 | epoch avg. loss: 0.241 | test avg. loss: 5.530


 21%|██        | 10390/50000 [15:27<48:30, 13.61it/s]

Epochs: 10388 | epoch avg. loss: 0.412 | test avg. loss: 4.999
Epochs: 10389 | epoch avg. loss: 0.359 | test avg. loss: 5.197
Epochs: 10390 | epoch avg. loss: 0.196 | test avg. loss: 4.927


 21%|██        | 10394/50000 [15:28<49:38, 13.30it/s]

Epochs: 10391 | epoch avg. loss: 0.162 | test avg. loss: 4.732
Epochs: 10392 | epoch avg. loss: 0.150 | test avg. loss: 4.893
Epochs: 10393 | epoch avg. loss: 0.145 | test avg. loss: 4.778


 21%|██        | 10396/50000 [15:28<48:19, 13.66it/s]

Epochs: 10394 | epoch avg. loss: 0.157 | test avg. loss: 4.902
Epochs: 10395 | epoch avg. loss: 0.149 | test avg. loss: 5.244
Epochs: 10396 | epoch avg. loss: 0.196 | test avg. loss: 4.958


 21%|██        | 10398/50000 [15:28<51:18, 12.86it/s]

Epochs: 10397 | epoch avg. loss: 0.280 | test avg. loss: 4.976
Epochs: 10398 | epoch avg. loss: 0.200 | test avg. loss: 4.755
Epochs: 10399 | epoch avg. loss: 0.122 | test avg. loss: 4.696


 21%|██        | 10402/50000 [15:30<2:21:54,  4.65it/s]

Epochs: 10400 | epoch avg. loss: 0.155 | test avg. loss: 4.875
Epochs: 10401 | epoch avg. loss: 0.088 | test avg. loss: 4.967
Epochs: 10402 | epoch avg. loss: 0.075 | test avg. loss: 5.041


 21%|██        | 10406/50000 [15:30<1:36:15,  6.86it/s]

Epochs: 10403 | epoch avg. loss: 0.092 | test avg. loss: 4.925
Epochs: 10404 | epoch avg. loss: 0.071 | test avg. loss: 4.840
Epochs: 10405 | epoch avg. loss: 0.074 | test avg. loss: 4.787




Epochs: 10406 | epoch avg. loss: 0.084 | test avg. loss: 4.857
Epochs: 10407 | epoch avg. loss: 0.084 | test avg. loss: 4.820
Epochs: 10408 | epoch avg. loss: 0.102 | test avg. loss: 5.114




Epochs: 10409 | epoch avg. loss: 0.202 | test avg. loss: 4.982
Epochs: 10410 | epoch avg. loss: 0.109 | test avg. loss: 4.760
Epochs: 10411 | epoch avg. loss: 0.079 | test avg. loss: 4.836


 21%|██        | 10414/50000 [15:30<58:47, 11.22it/s]

Epochs: 10412 | epoch avg. loss: 0.105 | test avg. loss: 4.686
Epochs: 10413 | epoch avg. loss: 0.152 | test avg. loss: 4.774
Epochs: 10414 | epoch avg. loss: 0.227 | test avg. loss: 4.970


 21%|██        | 10418/50000 [15:31<54:39, 12.07it/s]

Epochs: 10415 | epoch avg. loss: 0.131 | test avg. loss: 4.691
Epochs: 10416 | epoch avg. loss: 0.134 | test avg. loss: 4.678
Epochs: 10417 | epoch avg. loss: 0.077 | test avg. loss: 4.598


 21%|██        | 10422/50000 [15:31<49:37, 13.29it/s]

Epochs: 10418 | epoch avg. loss: 0.093 | test avg. loss: 4.647
Epochs: 10419 | epoch avg. loss: 0.086 | test avg. loss: 4.893
Epochs: 10420 | epoch avg. loss: 0.097 | test avg. loss: 4.779
Epochs: 10421 | epoch avg. loss: 0.118 | test avg. loss: 4.985


 21%|██        | 10426/50000 [15:31<45:45, 14.41it/s]

Epochs: 10422 | epoch avg. loss: 0.151 | test avg. loss: 4.755
Epochs: 10423 | epoch avg. loss: 0.150 | test avg. loss: 4.697
Epochs: 10424 | epoch avg. loss: 0.130 | test avg. loss: 5.032
Epochs: 10425 | epoch avg. loss: 0.187 | test avg. loss: 4.907


 21%|██        | 10428/50000 [15:31<46:56, 14.05it/s]

Epochs: 10426 | epoch avg. loss: 0.249 | test avg. loss: 5.454
Epochs: 10427 | epoch avg. loss: 0.407 | test avg. loss: 5.084
Epochs: 10428 | epoch avg. loss: 0.429 | test avg. loss: 4.952


 21%|██        | 10432/50000 [15:32<51:56, 12.70it/s]

Epochs: 10429 | epoch avg. loss: 0.209 | test avg. loss: 4.966
Epochs: 10430 | epoch avg. loss: 0.206 | test avg. loss: 4.415
Epochs: 10431 | epoch avg. loss: 0.278 | test avg. loss: 4.684


 21%|██        | 10434/50000 [15:32<49:58, 13.20it/s]

Epochs: 10432 | epoch avg. loss: 0.149 | test avg. loss: 4.946
Epochs: 10433 | epoch avg. loss: 0.184 | test avg. loss: 5.282
Epochs: 10434 | epoch avg. loss: 0.400 | test avg. loss: 5.645


 21%|██        | 10438/50000 [15:32<51:05, 12.91it/s]

Epochs: 10435 | epoch avg. loss: 0.324 | test avg. loss: 4.914
Epochs: 10436 | epoch avg. loss: 0.170 | test avg. loss: 4.771
Epochs: 10437 | epoch avg. loss: 0.116 | test avg. loss: 4.753


 21%|██        | 10440/50000 [15:32<51:13, 12.87it/s]

Epochs: 10438 | epoch avg. loss: 0.111 | test avg. loss: 4.742
Epochs: 10439 | epoch avg. loss: 0.112 | test avg. loss: 5.127
Epochs: 10440 | epoch avg. loss: 0.107 | test avg. loss: 5.039


 21%|██        | 10444/50000 [15:33<56:27, 11.68it/s]

Epochs: 10441 | epoch avg. loss: 0.074 | test avg. loss: 5.002
Epochs: 10442 | epoch avg. loss: 0.077 | test avg. loss: 5.004
Epochs: 10443 | epoch avg. loss: 0.089 | test avg. loss: 4.727


 21%|██        | 10446/50000 [15:33<55:39, 11.84it/s]

Epochs: 10444 | epoch avg. loss: 0.095 | test avg. loss: 4.730
Epochs: 10445 | epoch avg. loss: 0.100 | test avg. loss: 4.930
Epochs: 10446 | epoch avg. loss: 0.119 | test avg. loss: 4.750


 21%|██        | 10450/50000 [15:33<51:30, 12.80it/s]

Epochs: 10447 | epoch avg. loss: 0.134 | test avg. loss: 4.827
Epochs: 10448 | epoch avg. loss: 0.081 | test avg. loss: 4.983
Epochs: 10449 | epoch avg. loss: 0.085 | test avg. loss: 4.824


 21%|██        | 10452/50000 [15:33<49:31, 13.31it/s]

Epochs: 10450 | epoch avg. loss: 0.227 | test avg. loss: 4.787
Epochs: 10451 | epoch avg. loss: 0.069 | test avg. loss: 4.733
Epochs: 10452 | epoch avg. loss: 0.078 | test avg. loss: 4.672


 21%|██        | 10456/50000 [15:34<53:49, 12.24it/s]

Epochs: 10453 | epoch avg. loss: 0.074 | test avg. loss: 4.749
Epochs: 10454 | epoch avg. loss: 0.078 | test avg. loss: 4.686
Epochs: 10455 | epoch avg. loss: 0.070 | test avg. loss: 4.726


 21%|██        | 10460/50000 [15:34<48:16, 13.65it/s]

Epochs: 10456 | epoch avg. loss: 0.069 | test avg. loss: 4.865
Epochs: 10457 | epoch avg. loss: 0.077 | test avg. loss: 4.778
Epochs: 10458 | epoch avg. loss: 0.080 | test avg. loss: 5.085
Epochs: 10459 | epoch avg. loss: 0.164 | test avg. loss: 4.691


 21%|██        | 10464/50000 [15:34<43:44, 15.06it/s]

Epochs: 10460 | epoch avg. loss: 0.178 | test avg. loss: 4.622
Epochs: 10461 | epoch avg. loss: 0.082 | test avg. loss: 4.568
Epochs: 10462 | epoch avg. loss: 0.087 | test avg. loss: 4.572
Epochs: 10463 | epoch avg. loss: 0.080 | test avg. loss: 4.848


 21%|██        | 10466/50000 [15:34<44:13, 14.90it/s]

Epochs: 10464 | epoch avg. loss: 0.137 | test avg. loss: 4.846
Epochs: 10465 | epoch avg. loss: 0.071 | test avg. loss: 4.944
Epochs: 10466 | epoch avg. loss: 0.072 | test avg. loss: 4.908




Epochs: 10467 | epoch avg. loss: 0.065 | test avg. loss: 4.875
Epochs: 10468 | epoch avg. loss: 0.062 | test avg. loss: 4.759


 21%|██        | 10472/50000 [15:35<55:45, 11.81it/s]

Epochs: 10469 | epoch avg. loss: 0.065 | test avg. loss: 4.825
Epochs: 10470 | epoch avg. loss: 0.090 | test avg. loss: 4.666
Epochs: 10471 | epoch avg. loss: 0.079 | test avg. loss: 4.631


 21%|██        | 10474/50000 [15:35<56:06, 11.74it/s]

Epochs: 10472 | epoch avg. loss: 0.075 | test avg. loss: 4.747
Epochs: 10473 | epoch avg. loss: 0.064 | test avg. loss: 4.774
Epochs: 10474 | epoch avg. loss: 0.065 | test avg. loss: 4.981


 21%|██        | 10478/50000 [15:35<54:05, 12.18it/s]

Epochs: 10475 | epoch avg. loss: 0.155 | test avg. loss: 4.840
Epochs: 10476 | epoch avg. loss: 0.078 | test avg. loss: 4.738
Epochs: 10477 | epoch avg. loss: 0.093 | test avg. loss: 5.014


                                                     

Epochs: 10478 | epoch avg. loss: 0.136 | test avg. loss: 4.849
Epochs: 10479 | epoch avg. loss: 0.120 | test avg. loss: 4.892
Epochs: 10480 | epoch avg. loss: 0.123 | test avg. loss: 5.224


 21%|██        | 10484/50000 [15:36<51:12, 12.86it/s]

Epochs: 10481 | epoch avg. loss: 0.218 | test avg. loss: 4.834
Epochs: 10482 | epoch avg. loss: 0.223 | test avg. loss: 4.876
Epochs: 10483 | epoch avg. loss: 0.103 | test avg. loss: 4.943


 21%|██        | 10486/50000 [15:36<50:04, 13.15it/s]

Epochs: 10484 | epoch avg. loss: 0.126 | test avg. loss: 4.890
Epochs: 10485 | epoch avg. loss: 0.189 | test avg. loss: 5.190
Epochs: 10486 | epoch avg. loss: 0.260 | test avg. loss: 4.697


                                                     

Epochs: 10487 | epoch avg. loss: 0.143 | test avg. loss: 4.630
Epochs: 10488 | epoch avg. loss: 0.157 | test avg. loss: 4.980
Epochs: 10489 | epoch avg. loss: 0.147 | test avg. loss: 4.710


 21%|██        | 10492/50000 [15:36<49:05, 13.41it/s]

Epochs: 10490 | epoch avg. loss: 0.162 | test avg. loss: 4.676
Epochs: 10491 | epoch avg. loss: 0.162 | test avg. loss: 4.759
Epochs: 10492 | epoch avg. loss: 0.176 | test avg. loss: 4.629


 21%|██        | 10496/50000 [15:37<51:08, 12.87it/s]

Epochs: 10493 | epoch avg. loss: 0.243 | test avg. loss: 4.876
Epochs: 10494 | epoch avg. loss: 0.154 | test avg. loss: 5.019
Epochs: 10495 | epoch avg. loss: 0.104 | test avg. loss: 5.031


 21%|██        | 10498/50000 [15:37<52:11, 12.61it/s]

Epochs: 10496 | epoch avg. loss: 0.161 | test avg. loss: 5.260
Epochs: 10497 | epoch avg. loss: 0.199 | test avg. loss: 4.948
Epochs: 10498 | epoch avg. loss: 0.173 | test avg. loss: 4.972


 21%|██        | 10498/50000 [15:37<52:11, 12.61it/s]

Epochs: 10499 | epoch avg. loss: 0.446 | test avg. loss: 5.252


 21%|██        | 10502/50000 [15:39<2:29:53,  4.39it/s]

Epochs: 10500 | epoch avg. loss: 0.574 | test avg. loss: 4.884
Epochs: 10501 | epoch avg. loss: 0.224 | test avg. loss: 5.218
Epochs: 10502 | epoch avg. loss: 0.491 | test avg. loss: 6.325


 21%|██        | 10506/50000 [15:39<1:43:07,  6.38it/s]

Epochs: 10503 | epoch avg. loss: 0.799 | test avg. loss: 5.400
Epochs: 10504 | epoch avg. loss: 0.903 | test avg. loss: 5.416
Epochs: 10505 | epoch avg. loss: 0.442 | test avg. loss: 5.915


 21%|██        | 10508/50000 [15:39<1:28:30,  7.44it/s]

Epochs: 10506 | epoch avg. loss: 0.409 | test avg. loss: 5.015
Epochs: 10507 | epoch avg. loss: 0.416 | test avg. loss: 4.721
Epochs: 10508 | epoch avg. loss: 0.122 | test avg. loss: 4.406


 21%|██        | 10512/50000 [15:39<1:11:29,  9.21it/s]

Epochs: 10509 | epoch avg. loss: 0.286 | test avg. loss: 4.580
Epochs: 10510 | epoch avg. loss: 0.262 | test avg. loss: 5.298
Epochs: 10511 | epoch avg. loss: 0.385 | test avg. loss: 4.798


 21%|██        | 10514/50000 [15:40<1:05:46, 10.01it/s]

Epochs: 10512 | epoch avg. loss: 0.494 | test avg. loss: 4.694
Epochs: 10513 | epoch avg. loss: 0.243 | test avg. loss: 4.984
Epochs: 10514 | epoch avg. loss: 0.228 | test avg. loss: 4.648




Epochs: 10515 | epoch avg. loss: 0.416 | test avg. loss: 4.922
Epochs: 10516 | epoch avg. loss: 0.331 | test avg. loss: 4.886
Epochs: 10517 | epoch avg. loss: 0.164 | test avg. loss: 4.736


 21%|██        | 10522/50000 [15:40<49:31, 13.29it/s]

Epochs: 10518 | epoch avg. loss: 0.187 | test avg. loss: 5.092
Epochs: 10519 | epoch avg. loss: 0.176 | test avg. loss: 4.864
Epochs: 10520 | epoch avg. loss: 0.097 | test avg. loss: 4.772
Epochs: 10521 | epoch avg. loss: 0.138 | test avg. loss: 5.064


 21%|██        | 10524/50000 [15:40<50:41, 12.98it/s]

Epochs: 10522 | epoch avg. loss: 0.216 | test avg. loss: 4.733
Epochs: 10523 | epoch avg. loss: 0.106 | test avg. loss: 4.768
Epochs: 10524 | epoch avg. loss: 0.083 | test avg. loss: 4.891


 21%|██        | 10528/50000 [15:41<48:53, 13.45it/s]

Epochs: 10525 | epoch avg. loss: 0.089 | test avg. loss: 4.773
Epochs: 10526 | epoch avg. loss: 0.127 | test avg. loss: 5.054
Epochs: 10527 | epoch avg. loss: 0.157 | test avg. loss: 4.815


 21%|██        | 10530/50000 [15:41<49:02, 13.42it/s]

Epochs: 10528 | epoch avg. loss: 0.131 | test avg. loss: 4.873
Epochs: 10529 | epoch avg. loss: 0.128 | test avg. loss: 5.053
Epochs: 10530 | epoch avg. loss: 0.169 | test avg. loss: 4.669


 21%|██        | 10534/50000 [15:41<51:02, 12.89it/s]

Epochs: 10531 | epoch avg. loss: 0.133 | test avg. loss: 4.645
Epochs: 10532 | epoch avg. loss: 0.114 | test avg. loss: 4.795
Epochs: 10533 | epoch avg. loss: 0.065 | test avg. loss: 4.825


 21%|██        | 10536/50000 [15:41<52:10, 12.61it/s]

Epochs: 10534 | epoch avg. loss: 0.118 | test avg. loss: 4.762
Epochs: 10535 | epoch avg. loss: 0.092 | test avg. loss: 4.837
Epochs: 10536 | epoch avg. loss: 0.113 | test avg. loss: 4.553


 21%|██        | 10540/50000 [15:42<49:57, 13.17it/s]

Epochs: 10537 | epoch avg. loss: 0.094 | test avg. loss: 4.576
Epochs: 10538 | epoch avg. loss: 0.098 | test avg. loss: 5.005
Epochs: 10539 | epoch avg. loss: 0.168 | test avg. loss: 4.794
Epochs: 10540 | epoch avg. loss: 0.088 | test avg. loss: 4.933


 21%|██        | 10544/50000 [15:42<50:16, 13.08it/s]

Epochs: 10541 | epoch avg. loss: 0.083 | test avg. loss: 4.819
Epochs: 10542 | epoch avg. loss: 0.094 | test avg. loss: 4.755
Epochs: 10543 | epoch avg. loss: 0.072 | test avg. loss: 4.702


 21%|██        | 10548/50000 [15:42<47:26, 13.86it/s]

Epochs: 10544 | epoch avg. loss: 0.066 | test avg. loss: 4.627
Epochs: 10545 | epoch avg. loss: 0.069 | test avg. loss: 4.857
Epochs: 10546 | epoch avg. loss: 0.122 | test avg. loss: 4.752
Epochs: 10547 | epoch avg. loss: 0.113 | test avg. loss: 4.772


 21%|██        | 10550/50000 [15:42<46:46, 14.06it/s]

Epochs: 10548 | epoch avg. loss: 0.088 | test avg. loss: 5.007
Epochs: 10549 | epoch avg. loss: 0.133 | test avg. loss: 4.875
Epochs: 10550 | epoch avg. loss: 0.446 | test avg. loss: 4.719


 21%|██        | 10554/50000 [15:43<52:32, 12.51it/s]

Epochs: 10551 | epoch avg. loss: 0.080 | test avg. loss: 4.823
Epochs: 10552 | epoch avg. loss: 0.079 | test avg. loss: 4.742
Epochs: 10553 | epoch avg. loss: 0.066 | test avg. loss: 4.793


 21%|██        | 10558/50000 [15:43<46:58, 13.99it/s]

Epochs: 10554 | epoch avg. loss: 0.065 | test avg. loss: 4.940
Epochs: 10555 | epoch avg. loss: 0.096 | test avg. loss: 4.812
Epochs: 10556 | epoch avg. loss: 0.099 | test avg. loss: 4.821
Epochs: 10557 | epoch avg. loss: 0.108 | test avg. loss: 5.497


 21%|██        | 10562/50000 [15:43<44:47, 14.67it/s]

Epochs: 10558 | epoch avg. loss: 0.442 | test avg. loss: 4.765
Epochs: 10559 | epoch avg. loss: 0.285 | test avg. loss: 4.694
Epochs: 10560 | epoch avg. loss: 0.239 | test avg. loss: 5.307
Epochs: 10561 | epoch avg. loss: 0.342 | test avg. loss: 4.817


 21%|██        | 10564/50000 [15:43<45:48, 14.35it/s]

Epochs: 10562 | epoch avg. loss: 0.416 | test avg. loss: 5.375
Epochs: 10563 | epoch avg. loss: 0.508 | test avg. loss: 5.112
Epochs: 10564 | epoch avg. loss: 0.206 | test avg. loss: 4.808


 21%|██        | 10568/50000 [15:44<51:21, 12.80it/s]

Epochs: 10565 | epoch avg. loss: 0.275 | test avg. loss: 5.053
Epochs: 10566 | epoch avg. loss: 0.220 | test avg. loss: 4.719
Epochs: 10567 | epoch avg. loss: 0.145 | test avg. loss: 4.827


 21%|██        | 10570/50000 [15:44<52:55, 12.42it/s]

Epochs: 10568 | epoch avg. loss: 0.128 | test avg. loss: 5.254
Epochs: 10569 | epoch avg. loss: 0.222 | test avg. loss: 4.903
Epochs: 10570 | epoch avg. loss: 0.152 | test avg. loss: 4.792


 21%|██        | 10574/50000 [15:44<49:12, 13.35it/s]

Epochs: 10571 | epoch avg. loss: 0.123 | test avg. loss: 4.908
Epochs: 10572 | epoch avg. loss: 0.150 | test avg. loss: 4.743
Epochs: 10573 | epoch avg. loss: 0.074 | test avg. loss: 5.086
Epochs: 10574 | epoch avg. loss: 0.082 | test avg. loss: 4.996


 21%|██        | 10578/50000 [15:44<45:30, 14.44it/s]

Epochs: 10575 | epoch avg. loss: 0.224 | test avg. loss: 4.943
Epochs: 10576 | epoch avg. loss: 0.131 | test avg. loss: 5.124
Epochs: 10577 | epoch avg. loss: 0.164 | test avg. loss: 4.760


 21%|██        | 10580/50000 [15:45<48:03, 13.67it/s]

Epochs: 10578 | epoch avg. loss: 0.095 | test avg. loss: 4.816
Epochs: 10579 | epoch avg. loss: 0.083 | test avg. loss: 4.918
Epochs: 10580 | epoch avg. loss: 0.103 | test avg. loss: 4.899


 21%|██        | 10584/50000 [15:45<46:52, 14.02it/s]

Epochs: 10581 | epoch avg. loss: 0.076 | test avg. loss: 4.830
Epochs: 10582 | epoch avg. loss: 0.157 | test avg. loss: 5.475
Epochs: 10583 | epoch avg. loss: 0.596 | test avg. loss: 4.890


 21%|██        | 10588/50000 [15:45<45:32, 14.42it/s]

Epochs: 10584 | epoch avg. loss: 0.489 | test avg. loss: 5.157
Epochs: 10585 | epoch avg. loss: 0.547 | test avg. loss: 6.919
Epochs: 10586 | epoch avg. loss: 1.463 | test avg. loss: 5.004
Epochs: 10587 | epoch avg. loss: 1.067 | test avg. loss: 4.676


 21%|██        | 10590/50000 [15:45<47:17, 13.89it/s]

Epochs: 10588 | epoch avg. loss: 0.414 | test avg. loss: 5.526
Epochs: 10589 | epoch avg. loss: 0.533 | test avg. loss: 5.148
Epochs: 10590 | epoch avg. loss: 0.570 | test avg. loss: 5.402


 21%|██        | 10594/50000 [15:46<52:45, 12.45it/s]

Epochs: 10591 | epoch avg. loss: 0.700 | test avg. loss: 5.102
Epochs: 10592 | epoch avg. loss: 0.351 | test avg. loss: 5.689
Epochs: 10593 | epoch avg. loss: 0.723 | test avg. loss: 6.885


 21%|██        | 10596/50000 [15:46<50:30, 13.00it/s]

Epochs: 10594 | epoch avg. loss: 1.295 | test avg. loss: 5.101
Epochs: 10595 | epoch avg. loss: 0.860 | test avg. loss: 4.574
Epochs: 10596 | epoch avg. loss: 0.503 | test avg. loss: 6.810
Epochs: 10597 | epoch avg. loss: 1.207 | test avg. loss: 4.820


 21%|██        | 10598/50000 [15:46<48:20, 13.58it/s]

Epochs: 10598 | epoch avg. loss: 0.952 | test avg. loss: 5.236
Epochs: 10599 | epoch avg. loss: 0.901 | test avg. loss: 5.026


 21%|██        | 10602/50000 [15:47<2:18:14,  4.75it/s]

Epochs: 10600 | epoch avg. loss: 0.554 | test avg. loss: 5.667
Epochs: 10601 | epoch avg. loss: 1.025 | test avg. loss: 6.205
Epochs: 10602 | epoch avg. loss: 0.701 | test avg. loss: 4.859


 21%|██        | 10606/50000 [15:48<1:34:30,  6.95it/s]

Epochs: 10603 | epoch avg. loss: 0.839 | test avg. loss: 4.580
Epochs: 10604 | epoch avg. loss: 0.373 | test avg. loss: 5.190
Epochs: 10605 | epoch avg. loss: 0.487 | test avg. loss: 4.247


                                                       

Epochs: 10606 | epoch avg. loss: 0.176 | test avg. loss: 4.525
Epochs: 10607 | epoch avg. loss: 0.282 | test avg. loss: 4.853
Epochs: 10608 | epoch avg. loss: 0.289 | test avg. loss: 4.686


 21%|██        | 10612/50000 [15:48<1:02:31, 10.50it/s]

Epochs: 10609 | epoch avg. loss: 0.190 | test avg. loss: 5.166
Epochs: 10610 | epoch avg. loss: 0.370 | test avg. loss: 4.789
Epochs: 10611 | epoch avg. loss: 0.147 | test avg. loss: 4.572


 21%|██        | 10614/50000 [15:48<56:52, 11.54it/s]

Epochs: 10612 | epoch avg. loss: 0.170 | test avg. loss: 4.919
Epochs: 10613 | epoch avg. loss: 0.301 | test avg. loss: 4.531
Epochs: 10614 | epoch avg. loss: 0.238 | test avg. loss: 4.574


 21%|██        | 10616/50000 [15:49<58:40, 11.19it/s]

Epochs: 10615 | epoch avg. loss: 0.115 | test avg. loss: 4.842
Epochs: 10616 | epoch avg. loss: 0.105 | test avg. loss: 4.703
Epochs: 10617 | epoch avg. loss: 0.077 | test avg. loss: 4.768


 21%|██        | 10622/50000 [15:49<51:43, 12.69it/s]

Epochs: 10618 | epoch avg. loss: 0.074 | test avg. loss: 4.774
Epochs: 10619 | epoch avg. loss: 0.256 | test avg. loss: 4.810
Epochs: 10620 | epoch avg. loss: 0.103 | test avg. loss: 5.047
Epochs: 10621 | epoch avg. loss: 0.161 | test avg. loss: 4.701


                                                     

Epochs: 10622 | epoch avg. loss: 0.105 | test avg. loss: 4.699
Epochs: 10623 | epoch avg. loss: 0.079 | test avg. loss: 4.507
Epochs: 10624 | epoch avg. loss: 0.105 | test avg. loss: 4.544


 21%|██▏       | 10628/50000 [15:49<51:00, 12.86it/s]

Epochs: 10625 | epoch avg. loss: 0.079 | test avg. loss: 4.650
Epochs: 10626 | epoch avg. loss: 0.070 | test avg. loss: 5.177
Epochs: 10627 | epoch avg. loss: 0.167 | test avg. loss: 5.186


 21%|██▏       | 10630/50000 [15:50<51:38, 12.70it/s]

Epochs: 10628 | epoch avg. loss: 0.670 | test avg. loss: 5.632
Epochs: 10629 | epoch avg. loss: 1.426 | test avg. loss: 4.682
Epochs: 10630 | epoch avg. loss: 1.234 | test avg. loss: 4.545


 21%|██▏       | 10634/50000 [15:50<51:06, 12.84it/s]

Epochs: 10631 | epoch avg. loss: 1.385 | test avg. loss: 8.046
Epochs: 10632 | epoch avg. loss: 2.791 | test avg. loss: 7.555
Epochs: 10633 | epoch avg. loss: 3.588 | test avg. loss: 6.727


 21%|██▏       | 10636/50000 [15:50<51:25, 12.76it/s]

Epochs: 10634 | epoch avg. loss: 4.000 | test avg. loss: 5.041
Epochs: 10635 | epoch avg. loss: 1.962 | test avg. loss: 6.426
Epochs: 10636 | epoch avg. loss: 2.196 | test avg. loss: 9.905


 21%|██▏       | 10640/50000 [15:50<49:51, 13.16it/s]

Epochs: 10637 | epoch avg. loss: 2.361 | test avg. loss: 8.127
Epochs: 10638 | epoch avg. loss: 3.781 | test avg. loss: 9.853
Epochs: 10639 | epoch avg. loss: 3.600 | test avg. loss: 5.355


 21%|██▏       | 10642/50000 [15:51<50:37, 12.96it/s]

Epochs: 10640 | epoch avg. loss: 1.579 | test avg. loss: 5.666
Epochs: 10641 | epoch avg. loss: 1.209 | test avg. loss: 6.806
Epochs: 10642 | epoch avg. loss: 1.037 | test avg. loss: 5.361


 21%|██▏       | 10646/50000 [15:51<47:42, 13.75it/s]

Epochs: 10643 | epoch avg. loss: 0.703 | test avg. loss: 5.534
Epochs: 10644 | epoch avg. loss: 0.572 | test avg. loss: 4.109
Epochs: 10645 | epoch avg. loss: 0.740 | test avg. loss: 4.417


 21%|██▏       | 10650/50000 [15:51<46:16, 14.17it/s]

Epochs: 10646 | epoch avg. loss: 0.719 | test avg. loss: 3.986
Epochs: 10647 | epoch avg. loss: 0.332 | test avg. loss: 4.438
Epochs: 10648 | epoch avg. loss: 0.236 | test avg. loss: 5.084
Epochs: 10649 | epoch avg. loss: 0.182 | test avg. loss: 5.164


                                                     

Epochs: 10650 | epoch avg. loss: 0.269 | test avg. loss: 5.320
Epochs: 10651 | epoch avg. loss: 0.217 | test avg. loss: 4.651
Epochs: 10652 | epoch avg. loss: 0.156 | test avg. loss: 4.419


 21%|██▏       | 10656/50000 [15:51<48:09, 13.61it/s]

Epochs: 10653 | epoch avg. loss: 0.111 | test avg. loss: 4.132
Epochs: 10654 | epoch avg. loss: 0.113 | test avg. loss: 4.134
Epochs: 10655 | epoch avg. loss: 0.128 | test avg. loss: 4.610


 21%|██▏       | 10658/50000 [15:52<51:14, 12.80it/s]

Epochs: 10656 | epoch avg. loss: 0.152 | test avg. loss: 4.680
Epochs: 10657 | epoch avg. loss: 0.210 | test avg. loss: 5.409
Epochs: 10658 | epoch avg. loss: 0.283 | test avg. loss: 5.036


 21%|██▏       | 10662/50000 [15:52<52:14, 12.55it/s]

Epochs: 10659 | epoch avg. loss: 0.513 | test avg. loss: 5.932
Epochs: 10660 | epoch avg. loss: 0.807 | test avg. loss: 4.561
Epochs: 10661 | epoch avg. loss: 0.531 | test avg. loss: 4.407


 21%|██▏       | 10664/50000 [15:52<50:48, 12.91it/s]

Epochs: 10662 | epoch avg. loss: 0.240 | test avg. loss: 4.287
Epochs: 10663 | epoch avg. loss: 0.217 | test avg. loss: 4.085
Epochs: 10664 | epoch avg. loss: 0.186 | test avg. loss: 4.380


 21%|██▏       | 10666/50000 [15:52<50:08, 13.08it/s]

Epochs: 10665 | epoch avg. loss: 0.170 | test avg. loss: 4.372
Epochs: 10666 | epoch avg. loss: 0.111 | test avg. loss: 4.986
Epochs: 10667 | epoch avg. loss: 0.172 | test avg. loss: 4.741


 21%|██▏       | 10670/50000 [15:53<55:14, 11.86it/s]

Epochs: 10668 | epoch avg. loss: 0.207 | test avg. loss: 4.934
Epochs: 10669 | epoch avg. loss: 0.121 | test avg. loss: 4.521
Epochs: 10670 | epoch avg. loss: 0.111 | test avg. loss: 4.677


 21%|██▏       | 10674/50000 [15:53<57:03, 11.49it/s]

Epochs: 10671 | epoch avg. loss: 0.160 | test avg. loss: 4.375
Epochs: 10672 | epoch avg. loss: 0.173 | test avg. loss: 4.635
Epochs: 10673 | epoch avg. loss: 0.329 | test avg. loss: 4.538


 21%|██▏       | 10678/50000 [15:53<50:10, 13.06it/s]

Epochs: 10674 | epoch avg. loss: 0.201 | test avg. loss: 4.469
Epochs: 10675 | epoch avg. loss: 0.252 | test avg. loss: 4.957
Epochs: 10676 | epoch avg. loss: 0.220 | test avg. loss: 4.831
Epochs: 10677 | epoch avg. loss: 0.652 | test avg. loss: 5.348


 21%|██▏       | 10680/50000 [15:53<47:58, 13.66it/s]

Epochs: 10678 | epoch avg. loss: 0.457 | test avg. loss: 4.522
Epochs: 10679 | epoch avg. loss: 0.288 | test avg. loss: 4.631
Epochs: 10680 | epoch avg. loss: 0.232 | test avg. loss: 4.963


 21%|██▏       | 10684/50000 [15:54<53:25, 12.27it/s]

Epochs: 10681 | epoch avg. loss: 0.153 | test avg. loss: 4.918
Epochs: 10682 | epoch avg. loss: 0.174 | test avg. loss: 5.240
Epochs: 10683 | epoch avg. loss: 0.250 | test avg. loss: 4.738


 21%|██▏       | 10686/50000 [15:54<50:21, 13.01it/s]

Epochs: 10684 | epoch avg. loss: 0.119 | test avg. loss: 4.560
Epochs: 10685 | epoch avg. loss: 0.102 | test avg. loss: 4.489
Epochs: 10686 | epoch avg. loss: 0.094 | test avg. loss: 4.445


 21%|██▏       | 10690/50000 [15:54<54:37, 11.99it/s]

Epochs: 10687 | epoch avg. loss: 0.087 | test avg. loss: 4.711
Epochs: 10688 | epoch avg. loss: 0.129 | test avg. loss: 4.671
Epochs: 10689 | epoch avg. loss: 0.247 | test avg. loss: 4.669


 21%|██▏       | 10692/50000 [15:54<55:56, 11.71it/s]

Epochs: 10690 | epoch avg. loss: 0.087 | test avg. loss: 4.794
Epochs: 10691 | epoch avg. loss: 0.122 | test avg. loss: 4.538


 21%|██▏       | 10694/50000 [15:55<57:25, 11.41it/s]

Epochs: 10692 | epoch avg. loss: 0.199 | test avg. loss: 4.785
Epochs: 10693 | epoch avg. loss: 0.135 | test avg. loss: 4.569
Epochs: 10694 | epoch avg. loss: 0.288 | test avg. loss: 4.881


 21%|██▏       | 10698/50000 [15:55<52:47, 12.41it/s]

Epochs: 10695 | epoch avg. loss: 0.238 | test avg. loss: 4.749
Epochs: 10696 | epoch avg. loss: 0.132 | test avg. loss: 4.736
Epochs: 10697 | epoch avg. loss: 0.142 | test avg. loss: 4.853


 21%|██▏       | 10698/50000 [15:55<52:47, 12.41it/s]

Epochs: 10698 | epoch avg. loss: 0.117 | test avg. loss: 4.841
Epochs: 10699 | epoch avg. loss: 0.136 | test avg. loss: 4.618


 21%|██▏       | 10702/50000 [15:57<2:28:49,  4.40it/s]

Epochs: 10700 | epoch avg. loss: 0.094 | test avg. loss: 4.460
Epochs: 10701 | epoch avg. loss: 0.139 | test avg. loss: 4.713
Epochs: 10702 | epoch avg. loss: 0.176 | test avg. loss: 4.552


 21%|██▏       | 10706/50000 [15:57<1:40:26,  6.52it/s]

Epochs: 10703 | epoch avg. loss: 0.137 | test avg. loss: 4.724
Epochs: 10704 | epoch avg. loss: 0.094 | test avg. loss: 4.732
Epochs: 10705 | epoch avg. loss: 0.096 | test avg. loss: 4.751


 21%|██▏       | 10708/50000 [15:57<1:23:55,  7.80it/s]

Epochs: 10706 | epoch avg. loss: 0.078 | test avg. loss: 4.786
Epochs: 10707 | epoch avg. loss: 0.074 | test avg. loss: 4.636
Epochs: 10708 | epoch avg. loss: 0.079 | test avg. loss: 4.604


 21%|██▏       | 10712/50000 [15:57<1:10:20,  9.31it/s]

Epochs: 10709 | epoch avg. loss: 0.061 | test avg. loss: 4.528
Epochs: 10710 | epoch avg. loss: 0.063 | test avg. loss: 4.531
Epochs: 10711 | epoch avg. loss: 0.076 | test avg. loss: 4.544


 21%|██▏       | 10714/50000 [15:58<1:07:31,  9.70it/s]

Epochs: 10712 | epoch avg. loss: 0.102 | test avg. loss: 4.670
Epochs: 10713 | epoch avg. loss: 0.062 | test avg. loss: 4.671
Epochs: 10714 | epoch avg. loss: 0.165 | test avg. loss: 4.963


                                                     

Epochs: 10715 | epoch avg. loss: 0.154 | test avg. loss: 4.581
Epochs: 10716 | epoch avg. loss: 0.125 | test avg. loss: 4.721
Epochs: 10717 | epoch avg. loss: 0.182 | test avg. loss: 4.479


 21%|██▏       | 10720/50000 [15:58<56:53, 11.51it/s]

Epochs: 10718 | epoch avg. loss: 0.099 | test avg. loss: 4.371
Epochs: 10719 | epoch avg. loss: 0.098 | test avg. loss: 4.493
Epochs: 10720 | epoch avg. loss: 0.059 | test avg. loss: 4.675




Epochs: 10721 | epoch avg. loss: 0.060 | test avg. loss: 4.751
Epochs: 10722 | epoch avg. loss: 0.063 | test avg. loss: 4.781




Epochs: 10723 | epoch avg. loss: 0.075 | test avg. loss: 4.636
Epochs: 10724 | epoch avg. loss: 0.066 | test avg. loss: 4.450


 21%|██▏       | 10728/50000 [15:59<1:00:10, 10.88it/s]

Epochs: 10725 | epoch avg. loss: 0.066 | test avg. loss: 4.584
Epochs: 10726 | epoch avg. loss: 0.093 | test avg. loss: 4.581
Epochs: 10727 | epoch avg. loss: 0.331 | test avg. loss: 4.717


 21%|██▏       | 10730/50000 [15:59<59:13, 11.05it/s]

Epochs: 10728 | epoch avg. loss: 0.086 | test avg. loss: 4.678
Epochs: 10729 | epoch avg. loss: 0.083 | test avg. loss: 4.615
Epochs: 10730 | epoch avg. loss: 0.092 | test avg. loss: 4.695


 21%|██▏       | 10734/50000 [15:59<56:39, 11.55it/s]

Epochs: 10731 | epoch avg. loss: 0.117 | test avg. loss: 4.382
Epochs: 10732 | epoch avg. loss: 0.164 | test avg. loss: 4.687
Epochs: 10733 | epoch avg. loss: 0.200 | test avg. loss: 4.497


 21%|██▏       | 10736/50000 [16:00<57:17, 11.42it/s]

Epochs: 10734 | epoch avg. loss: 0.233 | test avg. loss: 4.610
Epochs: 10735 | epoch avg. loss: 0.357 | test avg. loss: 5.057
Epochs: 10736 | epoch avg. loss: 0.288 | test avg. loss: 4.618


 21%|██▏       | 10740/50000 [16:00<52:47, 12.39it/s]

Epochs: 10737 | epoch avg. loss: 0.542 | test avg. loss: 4.743
Epochs: 10738 | epoch avg. loss: 0.359 | test avg. loss: 4.504
Epochs: 10739 | epoch avg. loss: 0.124 | test avg. loss: 4.541


 21%|██▏       | 10742/50000 [16:00<49:59, 13.09it/s]

Epochs: 10740 | epoch avg. loss: 0.105 | test avg. loss: 4.765
Epochs: 10741 | epoch avg. loss: 0.122 | test avg. loss: 4.731
Epochs: 10742 | epoch avg. loss: 0.155 | test avg. loss: 4.946


 21%|██▏       | 10746/50000 [16:00<49:07, 13.32it/s]

Epochs: 10743 | epoch avg. loss: 0.107 | test avg. loss: 4.745
Epochs: 10744 | epoch avg. loss: 0.107 | test avg. loss: 4.601
Epochs: 10745 | epoch avg. loss: 0.143 | test avg. loss: 4.580


 21%|██▏       | 10748/50000 [16:01<52:31, 12.46it/s]

Epochs: 10746 | epoch avg. loss: 0.096 | test avg. loss: 4.603
Epochs: 10747 | epoch avg. loss: 0.085 | test avg. loss: 4.522
Epochs: 10748 | epoch avg. loss: 0.097 | test avg. loss: 4.798


 22%|██▏       | 10752/50000 [16:01<55:01, 11.89it/s]

Epochs: 10749 | epoch avg. loss: 0.122 | test avg. loss: 4.608
Epochs: 10750 | epoch avg. loss: 0.062 | test avg. loss: 4.799
Epochs: 10751 | epoch avg. loss: 0.105 | test avg. loss: 4.486


 22%|██▏       | 10756/50000 [16:01<48:48, 13.40it/s]

Epochs: 10752 | epoch avg. loss: 0.172 | test avg. loss: 4.546
Epochs: 10753 | epoch avg. loss: 0.126 | test avg. loss: 4.500
Epochs: 10754 | epoch avg. loss: 0.136 | test avg. loss: 4.567
Epochs: 10755 | epoch avg. loss: 0.106 | test avg. loss: 4.761


 22%|██▏       | 10758/50000 [16:01<47:17, 13.83it/s]

Epochs: 10756 | epoch avg. loss: 0.080 | test avg. loss: 4.607
Epochs: 10757 | epoch avg. loss: 0.073 | test avg. loss: 4.779
Epochs: 10758 | epoch avg. loss: 0.142 | test avg. loss: 4.541


 22%|██▏       | 10762/50000 [16:02<55:05, 11.87it/s]

Epochs: 10759 | epoch avg. loss: 0.074 | test avg. loss: 4.542
Epochs: 10760 | epoch avg. loss: 0.069 | test avg. loss: 4.634
Epochs: 10761 | epoch avg. loss: 0.060 | test avg. loss: 4.647


 22%|██▏       | 10764/50000 [16:02<52:39, 12.42it/s]

Epochs: 10762 | epoch avg. loss: 0.064 | test avg. loss: 4.772
Epochs: 10763 | epoch avg. loss: 0.098 | test avg. loss: 4.626
Epochs: 10764 | epoch avg. loss: 0.059 | test avg. loss: 4.597


 22%|██▏       | 10768/50000 [16:02<48:12, 13.56it/s]

Epochs: 10765 | epoch avg. loss: 0.056 | test avg. loss: 4.597
Epochs: 10766 | epoch avg. loss: 0.058 | test avg. loss: 4.585
Epochs: 10767 | epoch avg. loss: 0.061 | test avg. loss: 4.579
Epochs: 10768 | epoch avg. loss: 0.059 | test avg. loss: 4.676


 22%|██▏       | 10772/50000 [16:02<44:56, 14.55it/s]

Epochs: 10769 | epoch avg. loss: 0.079 | test avg. loss: 4.603
Epochs: 10770 | epoch avg. loss: 0.058 | test avg. loss: 4.539
Epochs: 10771 | epoch avg. loss: 0.055 | test avg. loss: 4.551
Epochs: 10772 | epoch avg. loss: 0.056 | test avg. loss: 4.514


 22%|██▏       | 10776/50000 [16:03<46:37, 14.02it/s]

Epochs: 10773 | epoch avg. loss: 0.084 | test avg. loss: 4.561
Epochs: 10774 | epoch avg. loss: 0.054 | test avg. loss: 4.543
Epochs: 10775 | epoch avg. loss: 0.057 | test avg. loss: 4.618


 22%|██▏       | 10778/50000 [16:03<48:45, 13.41it/s]

Epochs: 10776 | epoch avg. loss: 0.109 | test avg. loss: 4.648
Epochs: 10777 | epoch avg. loss: 0.089 | test avg. loss: 4.499
Epochs: 10778 | epoch avg. loss: 0.058 | test avg. loss: 4.667


 22%|██▏       | 10782/50000 [16:03<51:12, 12.76it/s]

Epochs: 10779 | epoch avg. loss: 0.098 | test avg. loss: 4.561
Epochs: 10780 | epoch avg. loss: 0.062 | test avg. loss: 4.613
Epochs: 10781 | epoch avg. loss: 0.057 | test avg. loss: 4.640




Epochs: 10782 | epoch avg. loss: 0.058 | test avg. loss: 4.554
Epochs: 10783 | epoch avg. loss: 0.071 | test avg. loss: 4.541
Epochs: 10784 | epoch avg. loss: 0.057 | test avg. loss: 4.630


 22%|██▏       | 10788/50000 [16:03<48:52, 13.37it/s]

Epochs: 10785 | epoch avg. loss: 0.066 | test avg. loss: 4.515
Epochs: 10786 | epoch avg. loss: 0.061 | test avg. loss: 4.888
Epochs: 10787 | epoch avg. loss: 0.124 | test avg. loss: 4.699


 22%|██▏       | 10790/50000 [16:04<49:28, 13.21it/s]

Epochs: 10788 | epoch avg. loss: 0.233 | test avg. loss: 5.250
Epochs: 10789 | epoch avg. loss: 0.283 | test avg. loss: 4.685
Epochs: 10790 | epoch avg. loss: 0.235 | test avg. loss: 4.758


 22%|██▏       | 10794/50000 [16:04<48:12, 13.55it/s]

Epochs: 10791 | epoch avg. loss: 0.130 | test avg. loss: 4.615
Epochs: 10792 | epoch avg. loss: 0.109 | test avg. loss: 4.569
Epochs: 10793 | epoch avg. loss: 0.119 | test avg. loss: 4.793
Epochs: 10794 | epoch avg. loss: 0.144 | test avg. loss: 4.602




Epochs: 10795 | epoch avg. loss: 0.087 | test avg. loss: 4.670
Epochs: 10796 | epoch avg. loss: 0.067 | test avg. loss: 4.637
Epochs: 10797 | epoch avg. loss: 0.070 | test avg. loss: 4.714


 22%|██▏       | 10798/50000 [16:04<45:09, 14.47it/s]

Epochs: 10798 | epoch avg. loss: 0.075 | test avg. loss: 4.620
Epochs: 10799 | epoch avg. loss: 0.066 | test avg. loss: 4.689


 22%|██▏       | 10802/50000 [16:07<4:01:13,  2.71it/s]

Epochs: 10800 | epoch avg. loss: 0.103 | test avg. loss: 4.817
Epochs: 10801 | epoch avg. loss: 0.112 | test avg. loss: 4.580
Epochs: 10802 | epoch avg. loss: 0.243 | test avg. loss: 4.799


 22%|██▏       | 10806/50000 [16:08<2:23:36,  4.55it/s]

Epochs: 10803 | epoch avg. loss: 0.220 | test avg. loss: 4.426
Epochs: 10804 | epoch avg. loss: 0.227 | test avg. loss: 4.806
Epochs: 10805 | epoch avg. loss: 0.387 | test avg. loss: 6.180


 22%|██▏       | 10808/50000 [16:08<1:58:39,  5.50it/s]

Epochs: 10806 | epoch avg. loss: 0.712 | test avg. loss: 5.162
Epochs: 10807 | epoch avg. loss: 1.029 | test avg. loss: 4.873
Epochs: 10808 | epoch avg. loss: 0.391 | test avg. loss: 4.799


 22%|██▏       | 10812/50000 [16:08<1:22:32,  7.91it/s]

Epochs: 10809 | epoch avg. loss: 0.474 | test avg. loss: 5.254
Epochs: 10810 | epoch avg. loss: 0.728 | test avg. loss: 6.337
Epochs: 10811 | epoch avg. loss: 1.114 | test avg. loss: 5.354


                                                       

Epochs: 10812 | epoch avg. loss: 1.029 | test avg. loss: 4.732
Epochs: 10813 | epoch avg. loss: 0.478 | test avg. loss: 4.528
Epochs: 10814 | epoch avg. loss: 0.377 | test avg. loss: 4.524


 22%|██▏       | 10818/50000 [16:09<1:00:46, 10.75it/s]

Epochs: 10815 | epoch avg. loss: 0.517 | test avg. loss: 7.371
Epochs: 10816 | epoch avg. loss: 1.420 | test avg. loss: 5.894
Epochs: 10817 | epoch avg. loss: 1.855 | test avg. loss: 5.344


 22%|██▏       | 10820/50000 [16:09<59:47, 10.92it/s]

Epochs: 10818 | epoch avg. loss: 0.572 | test avg. loss: 5.024
Epochs: 10819 | epoch avg. loss: 0.571 | test avg. loss: 4.962
Epochs: 10820 | epoch avg. loss: 0.612 | test avg. loss: 7.177


 22%|██▏       | 10824/50000 [16:09<52:37, 12.41it/s]

Epochs: 10821 | epoch avg. loss: 1.430 | test avg. loss: 5.107
Epochs: 10822 | epoch avg. loss: 1.002 | test avg. loss: 5.643
Epochs: 10823 | epoch avg. loss: 0.993 | test avg. loss: 4.286


 22%|██▏       | 10828/50000 [16:09<48:49, 13.37it/s]

Epochs: 10824 | epoch avg. loss: 0.481 | test avg. loss: 4.235
Epochs: 10825 | epoch avg. loss: 0.408 | test avg. loss: 5.233
Epochs: 10826 | epoch avg. loss: 0.543 | test avg. loss: 5.135
Epochs: 10827 | epoch avg. loss: 0.471 | test avg. loss: 5.562


 22%|██▏       | 10830/50000 [16:10<51:02, 12.79it/s]

Epochs: 10828 | epoch avg. loss: 0.329 | test avg. loss: 5.148
Epochs: 10829 | epoch avg. loss: 0.292 | test avg. loss: 5.010
Epochs: 10830 | epoch avg. loss: 0.129 | test avg. loss: 4.705


 22%|██▏       | 10834/50000 [16:10<48:52, 13.36it/s]

Epochs: 10831 | epoch avg. loss: 0.193 | test avg. loss: 4.504
Epochs: 10832 | epoch avg. loss: 0.256 | test avg. loss: 4.895
Epochs: 10833 | epoch avg. loss: 0.132 | test avg. loss: 4.844


 22%|██▏       | 10836/50000 [16:10<49:04, 13.30it/s]

Epochs: 10834 | epoch avg. loss: 0.297 | test avg. loss: 5.392
Epochs: 10835 | epoch avg. loss: 0.499 | test avg. loss: 4.986
Epochs: 10836 | epoch avg. loss: 0.374 | test avg. loss: 5.086


 22%|██▏       | 10840/50000 [16:10<47:31, 13.73it/s]

Epochs: 10837 | epoch avg. loss: 0.298 | test avg. loss: 5.790
Epochs: 10838 | epoch avg. loss: 0.435 | test avg. loss: 4.781
Epochs: 10839 | epoch avg. loss: 0.319 | test avg. loss: 4.933


 22%|██▏       | 10842/50000 [16:10<47:50, 13.64it/s]

Epochs: 10840 | epoch avg. loss: 0.243 | test avg. loss: 4.401
Epochs: 10841 | epoch avg. loss: 0.105 | test avg. loss: 4.491
Epochs: 10842 | epoch avg. loss: 0.124 | test avg. loss: 4.587




Epochs: 10843 | epoch avg. loss: 0.166 | test avg. loss: 4.759
Epochs: 10844 | epoch avg. loss: 0.095 | test avg. loss: 4.786
Epochs: 10845 | epoch avg. loss: 0.085 | test avg. loss: 4.713


 22%|██▏       | 10848/50000 [16:11<46:56, 13.90it/s]

Epochs: 10846 | epoch avg. loss: 0.081 | test avg. loss: 4.664
Epochs: 10847 | epoch avg. loss: 0.080 | test avg. loss: 4.568
Epochs: 10848 | epoch avg. loss: 0.066 | test avg. loss: 4.599


 22%|██▏       | 10852/50000 [16:11<48:34, 13.43it/s]

Epochs: 10849 | epoch avg. loss: 0.077 | test avg. loss: 4.742
Epochs: 10850 | epoch avg. loss: 0.064 | test avg. loss: 4.786
Epochs: 10851 | epoch avg. loss: 0.067 | test avg. loss: 4.691


 22%|██▏       | 10854/50000 [16:11<51:21, 12.70it/s]

Epochs: 10852 | epoch avg. loss: 0.073 | test avg. loss: 4.682
Epochs: 10853 | epoch avg. loss: 0.074 | test avg. loss: 4.480
Epochs: 10854 | epoch avg. loss: 0.128 | test avg. loss: 4.527


 22%|██▏       | 10856/50000 [16:12<54:49, 11.90it/s]

Epochs: 10855 | epoch avg. loss: 0.066 | test avg. loss: 4.537
Epochs: 10856 | epoch avg. loss: 0.075 | test avg. loss: 4.485


 22%|██▏       | 10860/50000 [16:12<57:26, 11.36it/s]

Epochs: 10857 | epoch avg. loss: 0.082 | test avg. loss: 4.634
Epochs: 10858 | epoch avg. loss: 0.080 | test avg. loss: 4.590
Epochs: 10859 | epoch avg. loss: 0.056 | test avg. loss: 4.592


 22%|██▏       | 10862/50000 [16:12<57:02, 11.44it/s]

Epochs: 10860 | epoch avg. loss: 0.059 | test avg. loss: 4.596
Epochs: 10861 | epoch avg. loss: 0.059 | test avg. loss: 4.508
Epochs: 10862 | epoch avg. loss: 0.062 | test avg. loss: 4.564


 22%|██▏       | 10866/50000 [16:12<50:30, 12.91it/s]

Epochs: 10863 | epoch avg. loss: 0.082 | test avg. loss: 4.529
Epochs: 10864 | epoch avg. loss: 0.065 | test avg. loss: 4.430
Epochs: 10865 | epoch avg. loss: 0.056 | test avg. loss: 4.550


 22%|██▏       | 10868/50000 [16:13<53:38, 12.16it/s]

Epochs: 10866 | epoch avg. loss: 0.097 | test avg. loss: 4.670
Epochs: 10867 | epoch avg. loss: 0.093 | test avg. loss: 4.532
Epochs: 10868 | epoch avg. loss: 0.075 | test avg. loss: 4.574


 22%|██▏       | 10872/50000 [16:13<51:47, 12.59it/s]

Epochs: 10869 | epoch avg. loss: 0.063 | test avg. loss: 4.730
Epochs: 10870 | epoch avg. loss: 0.065 | test avg. loss: 4.591
Epochs: 10871 | epoch avg. loss: 0.109 | test avg. loss: 4.699


 22%|██▏       | 10874/50000 [16:13<49:11, 13.26it/s]

Epochs: 10872 | epoch avg. loss: 0.083 | test avg. loss: 4.517
Epochs: 10873 | epoch avg. loss: 0.101 | test avg. loss: 4.480
Epochs: 10874 | epoch avg. loss: 0.102 | test avg. loss: 4.699


 22%|██▏       | 10878/50000 [16:13<47:15, 13.80it/s]

Epochs: 10875 | epoch avg. loss: 0.092 | test avg. loss: 4.691
Epochs: 10876 | epoch avg. loss: 0.347 | test avg. loss: 4.813
Epochs: 10877 | epoch avg. loss: 0.156 | test avg. loss: 4.608


 22%|██▏       | 10880/50000 [16:13<51:10, 12.74it/s]

Epochs: 10878 | epoch avg. loss: 0.206 | test avg. loss: 4.831
Epochs: 10879 | epoch avg. loss: 0.158 | test avg. loss: 4.527


 22%|██▏       | 10882/50000 [16:14<56:12, 11.60it/s]

Epochs: 10880 | epoch avg. loss: 0.146 | test avg. loss: 4.626
Epochs: 10881 | epoch avg. loss: 0.236 | test avg. loss: 4.816
Epochs: 10882 | epoch avg. loss: 0.273 | test avg. loss: 4.602


 22%|██▏       | 10886/50000 [16:14<53:59, 12.08it/s]

Epochs: 10883 | epoch avg. loss: 0.548 | test avg. loss: 5.469
Epochs: 10884 | epoch avg. loss: 0.607 | test avg. loss: 4.679
Epochs: 10885 | epoch avg. loss: 0.575 | test avg. loss: 4.738


 22%|██▏       | 10888/50000 [16:14<53:11, 12.25it/s]

Epochs: 10886 | epoch avg. loss: 0.326 | test avg. loss: 5.890
Epochs: 10887 | epoch avg. loss: 0.563 | test avg. loss: 5.009
Epochs: 10888 | epoch avg. loss: 0.794 | test avg. loss: 4.905


 22%|██▏       | 10892/50000 [16:14<48:53, 13.33it/s]

Epochs: 10889 | epoch avg. loss: 0.351 | test avg. loss: 4.543
Epochs: 10890 | epoch avg. loss: 0.316 | test avg. loss: 4.617
Epochs: 10891 | epoch avg. loss: 0.188 | test avg. loss: 4.853


 22%|██▏       | 10894/50000 [16:15<52:36, 12.39it/s]

Epochs: 10892 | epoch avg. loss: 0.105 | test avg. loss: 4.858
Epochs: 10893 | epoch avg. loss: 0.178 | test avg. loss: 5.537
Epochs: 10894 | epoch avg. loss: 0.323 | test avg. loss: 4.549


 22%|██▏       | 10898/50000 [16:15<54:47, 11.90it/s]

Epochs: 10895 | epoch avg. loss: 0.317 | test avg. loss: 4.428
Epochs: 10896 | epoch avg. loss: 0.212 | test avg. loss: 4.367
Epochs: 10897 | epoch avg. loss: 0.215 | test avg. loss: 4.673


 22%|██▏       | 10898/50000 [16:15<54:47, 11.90it/s]

Epochs: 10898 | epoch avg. loss: 0.207 | test avg. loss: 5.508
Epochs: 10899 | epoch avg. loss: 0.380 | test avg. loss: 4.796


 22%|██▏       | 10902/50000 [16:17<2:26:45,  4.44it/s]

Epochs: 10900 | epoch avg. loss: 0.367 | test avg. loss: 4.569
Epochs: 10901 | epoch avg. loss: 0.265 | test avg. loss: 4.506
Epochs: 10902 | epoch avg. loss: 0.237 | test avg. loss: 4.158


 22%|██▏       | 10906/50000 [16:17<1:40:43,  6.47it/s]

Epochs: 10903 | epoch avg. loss: 0.313 | test avg. loss: 4.725
Epochs: 10904 | epoch avg. loss: 0.154 | test avg. loss: 4.502
Epochs: 10905 | epoch avg. loss: 0.563 | test avg. loss: 4.709


 22%|██▏       | 10908/50000 [16:17<1:28:52,  7.33it/s]

Epochs: 10906 | epoch avg. loss: 0.436 | test avg. loss: 4.486
Epochs: 10907 | epoch avg. loss: 0.333 | test avg. loss: 4.405
Epochs: 10908 | epoch avg. loss: 0.473 | test avg. loss: 4.997


 22%|██▏       | 10912/50000 [16:17<1:15:17,  8.65it/s]

Epochs: 10909 | epoch avg. loss: 0.449 | test avg. loss: 4.461
Epochs: 10910 | epoch avg. loss: 0.302 | test avg. loss: 4.916
Epochs: 10911 | epoch avg. loss: 0.545 | test avg. loss: 4.505


 22%|██▏       | 10914/50000 [16:18<1:09:34,  9.36it/s]

Epochs: 10912 | epoch avg. loss: 0.239 | test avg. loss: 4.133
Epochs: 10913 | epoch avg. loss: 0.263 | test avg. loss: 4.772
Epochs: 10914 | epoch avg. loss: 0.342 | test avg. loss: 4.434


 22%|██▏       | 10918/50000 [16:18<1:02:54, 10.36it/s]

Epochs: 10915 | epoch avg. loss: 0.321 | test avg. loss: 4.808
Epochs: 10916 | epoch avg. loss: 0.101 | test avg. loss: 4.880
Epochs: 10917 | epoch avg. loss: 0.209 | test avg. loss: 4.802


 22%|██▏       | 10920/50000 [16:18<1:00:34, 10.75it/s]

Epochs: 10918 | epoch avg. loss: 0.202 | test avg. loss: 4.388
Epochs: 10919 | epoch avg. loss: 0.127 | test avg. loss: 4.118
Epochs: 10920 | epoch avg. loss: 0.153 | test avg. loss: 4.482


 22%|██▏       | 10924/50000 [16:18<56:36, 11.50it/s]

Epochs: 10921 | epoch avg. loss: 0.239 | test avg. loss: 4.304
Epochs: 10922 | epoch avg. loss: 0.157 | test avg. loss: 4.576
Epochs: 10923 | epoch avg. loss: 0.149 | test avg. loss: 5.255


 22%|██▏       | 10926/50000 [16:19<52:44, 12.35it/s]

Epochs: 10924 | epoch avg. loss: 0.266 | test avg. loss: 4.744
Epochs: 10925 | epoch avg. loss: 0.249 | test avg. loss: 4.740
Epochs: 10926 | epoch avg. loss: 0.297 | test avg. loss: 4.372


 22%|██▏       | 10930/50000 [16:19<52:09, 12.49it/s]

Epochs: 10927 | epoch avg. loss: 0.238 | test avg. loss: 4.363
Epochs: 10928 | epoch avg. loss: 0.435 | test avg. loss: 5.062
Epochs: 10929 | epoch avg. loss: 0.292 | test avg. loss: 4.574


                                                     

Epochs: 10930 | epoch avg. loss: 0.214 | test avg. loss: 4.865
Epochs: 10931 | epoch avg. loss: 0.281 | test avg. loss: 4.840
Epochs: 10932 | epoch avg. loss: 0.211 | test avg. loss: 4.383


 22%|██▏       | 10936/50000 [16:19<48:07, 13.53it/s]

Epochs: 10933 | epoch avg. loss: 0.430 | test avg. loss: 4.314
Epochs: 10934 | epoch avg. loss: 0.162 | test avg. loss: 4.515
Epochs: 10935 | epoch avg. loss: 0.147 | test avg. loss: 4.369


 22%|██▏       | 10938/50000 [16:20<48:14, 13.50it/s]

Epochs: 10936 | epoch avg. loss: 0.069 | test avg. loss: 4.839
Epochs: 10937 | epoch avg. loss: 0.174 | test avg. loss: 4.623
Epochs: 10938 | epoch avg. loss: 0.081 | test avg. loss: 4.542


 22%|██▏       | 10942/50000 [16:20<51:11, 12.72it/s]

Epochs: 10939 | epoch avg. loss: 0.071 | test avg. loss: 4.396
Epochs: 10940 | epoch avg. loss: 0.077 | test avg. loss: 4.205
Epochs: 10941 | epoch avg. loss: 0.109 | test avg. loss: 4.335


 22%|██▏       | 10944/50000 [16:20<52:11, 12.47it/s]

Epochs: 10942 | epoch avg. loss: 0.149 | test avg. loss: 4.507
Epochs: 10943 | epoch avg. loss: 0.055 | test avg. loss: 4.438
Epochs: 10944 | epoch avg. loss: 0.099 | test avg. loss: 4.441


 22%|██▏       | 10948/50000 [16:20<52:35, 12.38it/s]

Epochs: 10945 | epoch avg. loss: 0.099 | test avg. loss: 4.598
Epochs: 10946 | epoch avg. loss: 0.135 | test avg. loss: 4.285
Epochs: 10947 | epoch avg. loss: 0.190 | test avg. loss: 4.635


 22%|██▏       | 10950/50000 [16:21<54:54, 11.85it/s]

Epochs: 10948 | epoch avg. loss: 0.246 | test avg. loss: 4.497
Epochs: 10949 | epoch avg. loss: 0.195 | test avg. loss: 4.617
Epochs: 10950 | epoch avg. loss: 0.347 | test avg. loss: 5.118


 22%|██▏       | 10954/50000 [16:21<51:07, 12.73it/s]

Epochs: 10951 | epoch avg. loss: 0.227 | test avg. loss: 4.379
Epochs: 10952 | epoch avg. loss: 0.333 | test avg. loss: 4.410
Epochs: 10953 | epoch avg. loss: 0.339 | test avg. loss: 4.272


 22%|██▏       | 10956/50000 [16:21<50:31, 12.88it/s]

Epochs: 10954 | epoch avg. loss: 0.254 | test avg. loss: 4.237
Epochs: 10955 | epoch avg. loss: 0.442 | test avg. loss: 4.736
Epochs: 10956 | epoch avg. loss: 0.319 | test avg. loss: 4.356


 22%|██▏       | 10960/50000 [16:21<50:40, 12.84it/s]

Epochs: 10957 | epoch avg. loss: 0.143 | test avg. loss: 4.563
Epochs: 10958 | epoch avg. loss: 0.197 | test avg. loss: 4.881
Epochs: 10959 | epoch avg. loss: 0.227 | test avg. loss: 4.510


 22%|██▏       | 10962/50000 [16:22<55:25, 11.74it/s]

Epochs: 10960 | epoch avg. loss: 0.456 | test avg. loss: 4.748
Epochs: 10961 | epoch avg. loss: 0.410 | test avg. loss: 4.097
Epochs: 10962 | epoch avg. loss: 0.195 | test avg. loss: 4.081


 22%|██▏       | 10966/50000 [16:22<50:26, 12.90it/s]

Epochs: 10963 | epoch avg. loss: 0.114 | test avg. loss: 4.137
Epochs: 10964 | epoch avg. loss: 0.084 | test avg. loss: 4.325
Epochs: 10965 | epoch avg. loss: 0.096 | test avg. loss: 4.265


 22%|██▏       | 10968/50000 [16:22<50:18, 12.93it/s]

Epochs: 10966 | epoch avg. loss: 0.242 | test avg. loss: 4.364
Epochs: 10967 | epoch avg. loss: 0.156 | test avg. loss: 4.606
Epochs: 10968 | epoch avg. loss: 0.159 | test avg. loss: 4.305


 22%|██▏       | 10972/50000 [16:22<47:38, 13.65it/s]

Epochs: 10969 | epoch avg. loss: 0.121 | test avg. loss: 4.346
Epochs: 10970 | epoch avg. loss: 0.074 | test avg. loss: 4.273
Epochs: 10971 | epoch avg. loss: 0.083 | test avg. loss: 4.588


 22%|██▏       | 10974/50000 [16:22<48:06, 13.52it/s]

Epochs: 10972 | epoch avg. loss: 0.168 | test avg. loss: 4.360
Epochs: 10973 | epoch avg. loss: 0.183 | test avg. loss: 4.860
Epochs: 10974 | epoch avg. loss: 0.357 | test avg. loss: 4.475


 22%|██▏       | 10978/50000 [16:23<49:31, 13.13it/s]

Epochs: 10975 | epoch avg. loss: 0.481 | test avg. loss: 5.004
Epochs: 10976 | epoch avg. loss: 0.662 | test avg. loss: 5.683
Epochs: 10977 | epoch avg. loss: 0.411 | test avg. loss: 4.876


 22%|██▏       | 10982/50000 [16:23<45:54, 14.17it/s]

Epochs: 10978 | epoch avg. loss: 0.879 | test avg. loss: 5.171
Epochs: 10979 | epoch avg. loss: 1.309 | test avg. loss: 4.593
Epochs: 10980 | epoch avg. loss: 0.575 | test avg. loss: 4.423
Epochs: 10981 | epoch avg. loss: 0.740 | test avg. loss: 5.553


 22%|██▏       | 10984/50000 [16:23<45:00, 14.45it/s]

Epochs: 10982 | epoch avg. loss: 0.738 | test avg. loss: 4.713
Epochs: 10983 | epoch avg. loss: 0.615 | test avg. loss: 4.695
Epochs: 10984 | epoch avg. loss: 0.236 | test avg. loss: 4.776


 22%|██▏       | 10988/50000 [16:23<47:17, 13.75it/s]

Epochs: 10985 | epoch avg. loss: 0.320 | test avg. loss: 4.854
Epochs: 10986 | epoch avg. loss: 0.826 | test avg. loss: 5.106
Epochs: 10987 | epoch avg. loss: 0.478 | test avg. loss: 4.183


 22%|██▏       | 10990/50000 [16:24<52:23, 12.41it/s]

Epochs: 10988 | epoch avg. loss: 0.377 | test avg. loss: 4.169
Epochs: 10989 | epoch avg. loss: 0.242 | test avg. loss: 4.345
Epochs: 10990 | epoch avg. loss: 0.203 | test avg. loss: 4.714


 22%|██▏       | 10994/50000 [16:24<49:39, 13.09it/s]

Epochs: 10991 | epoch avg. loss: 0.368 | test avg. loss: 5.093
Epochs: 10992 | epoch avg. loss: 0.208 | test avg. loss: 4.393
Epochs: 10993 | epoch avg. loss: 0.218 | test avg. loss: 4.658


 22%|██▏       | 10996/50000 [16:24<48:58, 13.27it/s]

Epochs: 10994 | epoch avg. loss: 0.561 | test avg. loss: 4.065
Epochs: 10995 | epoch avg. loss: 0.393 | test avg. loss: 4.285
Epochs: 10996 | epoch avg. loss: 0.476 | test avg. loss: 6.443


 22%|██▏       | 10998/50000 [16:24<48:16, 13.47it/s]

Epochs: 10997 | epoch avg. loss: 1.158 | test avg. loss: 6.844
Epochs: 10998 | epoch avg. loss: 2.505 | test avg. loss: 5.732
Epochs: 10999 | epoch avg. loss: 1.610 | test avg. loss: 5.266


 22%|██▏       | 11002/50000 [16:26<2:18:08,  4.71it/s]

Epochs: 11000 | epoch avg. loss: 2.325 | test avg. loss: 4.264
Epochs: 11001 | epoch avg. loss: 1.519 | test avg. loss: 6.692
Epochs: 11002 | epoch avg. loss: 1.592 | test avg. loss: 4.431


 22%|██▏       | 11006/50000 [16:26<1:32:15,  7.04it/s]

Epochs: 11003 | epoch avg. loss: 0.870 | test avg. loss: 4.724
Epochs: 11004 | epoch avg. loss: 0.862 | test avg. loss: 4.393
Epochs: 11005 | epoch avg. loss: 0.273 | test avg. loss: 4.268




Epochs: 11006 | epoch avg. loss: 0.143 | test avg. loss: 4.505
Epochs: 11007 | epoch avg. loss: 0.184 | test avg. loss: 4.384
Epochs: 11008 | epoch avg. loss: 0.118 | test avg. loss: 4.363


 22%|██▏       | 11012/50000 [16:26<1:04:40, 10.05it/s]

Epochs: 11009 | epoch avg. loss: 0.146 | test avg. loss: 4.705
Epochs: 11010 | epoch avg. loss: 0.128 | test avg. loss: 4.528
Epochs: 11011 | epoch avg. loss: 0.139 | test avg. loss: 4.932


 22%|██▏       | 11014/50000 [16:27<1:00:48, 10.68it/s]

Epochs: 11012 | epoch avg. loss: 0.167 | test avg. loss: 4.828
Epochs: 11013 | epoch avg. loss: 0.099 | test avg. loss: 4.851
Epochs: 11014 | epoch avg. loss: 0.088 | test avg. loss: 4.539


 22%|██▏       | 11018/50000 [16:27<54:58, 11.82it/s]

Epochs: 11015 | epoch avg. loss: 0.195 | test avg. loss: 4.387
Epochs: 11016 | epoch avg. loss: 0.109 | test avg. loss: 4.467
Epochs: 11017 | epoch avg. loss: 0.118 | test avg. loss: 4.216


 22%|██▏       | 11020/50000 [16:27<51:49, 12.54it/s]

Epochs: 11018 | epoch avg. loss: 0.169 | test avg. loss: 4.460
Epochs: 11019 | epoch avg. loss: 0.182 | test avg. loss: 4.201
Epochs: 11020 | epoch avg. loss: 0.152 | test avg. loss: 4.383
Epochs: 11021 | epoch avg. loss: 0.113 | test avg. loss: 4.329


 22%|██▏       | 11024/50000 [16:27<50:08, 12.96it/s]

Epochs: 11022 | epoch avg. loss: 0.154 | test avg. loss: 4.473
Epochs: 11023 | epoch avg. loss: 0.159 | test avg. loss: 5.011
Epochs: 11024 | epoch avg. loss: 0.262 | test avg. loss: 4.661


 22%|██▏       | 11028/50000 [16:28<48:34, 13.37it/s]

Epochs: 11025 | epoch avg. loss: 0.368 | test avg. loss: 5.939
Epochs: 11026 | epoch avg. loss: 0.987 | test avg. loss: 4.763
Epochs: 11027 | epoch avg. loss: 1.078 | test avg. loss: 4.473
Epochs: 11028 | epoch avg. loss: 0.324 | test avg. loss: 5.310


 22%|██▏       | 11032/50000 [16:28<45:46, 14.19it/s]

Epochs: 11029 | epoch avg. loss: 0.473 | test avg. loss: 4.607
Epochs: 11030 | epoch avg. loss: 0.643 | test avg. loss: 5.071
Epochs: 11031 | epoch avg. loss: 0.353 | test avg. loss: 4.295
Epochs: 11032 | epoch avg. loss: 0.418 | test avg. loss: 4.351


 22%|██▏       | 11036/50000 [16:28<43:42, 14.86it/s]

Epochs: 11033 | epoch avg. loss: 0.217 | test avg. loss: 4.676
Epochs: 11034 | epoch avg. loss: 0.326 | test avg. loss: 4.720
Epochs: 11035 | epoch avg. loss: 0.699 | test avg. loss: 4.893
Epochs: 11036 | epoch avg. loss: 0.398 | test avg. loss: 4.400


 22%|██▏       | 11040/50000 [16:28<45:14, 14.35it/s]

Epochs: 11037 | epoch avg. loss: 0.178 | test avg. loss: 4.886
Epochs: 11038 | epoch avg. loss: 0.147 | test avg. loss: 4.732
Epochs: 11039 | epoch avg. loss: 0.181 | test avg. loss: 4.603


 22%|██▏       | 11042/50000 [16:29<47:21, 13.71it/s]

Epochs: 11040 | epoch avg. loss: 0.140 | test avg. loss: 4.529
Epochs: 11041 | epoch avg. loss: 0.132 | test avg. loss: 4.211
Epochs: 11042 | epoch avg. loss: 0.208 | test avg. loss: 4.630
Epochs: 11043 | epoch avg. loss: 0.248 | test avg. loss: 4.198


 22%|██▏       | 11048/50000 [16:29<44:05, 14.72it/s]

Epochs: 11044 | epoch avg. loss: 0.169 | test avg. loss: 4.185
Epochs: 11045 | epoch avg. loss: 0.130 | test avg. loss: 4.495
Epochs: 11046 | epoch avg. loss: 0.155 | test avg. loss: 4.271
Epochs: 11047 | epoch avg. loss: 0.082 | test avg. loss: 4.663


 22%|██▏       | 11052/50000 [16:29<42:42, 15.20it/s]

Epochs: 11048 | epoch avg. loss: 0.136 | test avg. loss: 4.442
Epochs: 11049 | epoch avg. loss: 0.343 | test avg. loss: 4.526
Epochs: 11050 | epoch avg. loss: 0.129 | test avg. loss: 4.493
Epochs: 11051 | epoch avg. loss: 0.141 | test avg. loss: 4.396


 22%|██▏       | 11054/50000 [16:30<43:45, 14.83it/s]

Epochs: 11052 | epoch avg. loss: 0.154 | test avg. loss: 4.709
Epochs: 11053 | epoch avg. loss: 0.169 | test avg. loss: 4.476
Epochs: 11054 | epoch avg. loss: 0.082 | test avg. loss: 4.493


 22%|██▏       | 11058/50000 [16:30<47:50, 13.57it/s]

Epochs: 11055 | epoch avg. loss: 0.081 | test avg. loss: 4.719
Epochs: 11056 | epoch avg. loss: 0.116 | test avg. loss: 4.529
Epochs: 11057 | epoch avg. loss: 0.155 | test avg. loss: 4.584


 22%|██▏       | 11060/50000 [16:30<48:15, 13.45it/s]

Epochs: 11058 | epoch avg. loss: 0.165 | test avg. loss: 5.266
Epochs: 11059 | epoch avg. loss: 0.266 | test avg. loss: 4.643
Epochs: 11060 | epoch avg. loss: 0.279 | test avg. loss: 4.643
Epochs: 11061 | epoch avg. loss: 0.138 | test avg. loss: 4.344


 22%|██▏       | 11064/50000 [16:30<44:58, 14.43it/s]

Epochs: 11062 | epoch avg. loss: 0.086 | test avg. loss: 4.380
Epochs: 11063 | epoch avg. loss: 0.126 | test avg. loss: 4.591
Epochs: 11064 | epoch avg. loss: 0.096 | test avg. loss: 4.533


 22%|██▏       | 11068/50000 [16:30<48:07, 13.48it/s]

Epochs: 11065 | epoch avg. loss: 0.090 | test avg. loss: 4.724
Epochs: 11066 | epoch avg. loss: 0.078 | test avg. loss: 4.589
Epochs: 11067 | epoch avg. loss: 0.080 | test avg. loss: 4.454


 22%|██▏       | 11070/50000 [16:31<49:04, 13.22it/s]

Epochs: 11068 | epoch avg. loss: 0.092 | test avg. loss: 4.366
Epochs: 11069 | epoch avg. loss: 0.065 | test avg. loss: 4.497
Epochs: 11070 | epoch avg. loss: 0.134 | test avg. loss: 4.315


 22%|██▏       | 11074/50000 [16:31<50:53, 12.75it/s]

Epochs: 11071 | epoch avg. loss: 0.073 | test avg. loss: 4.292
Epochs: 11072 | epoch avg. loss: 0.115 | test avg. loss: 5.079
Epochs: 11073 | epoch avg. loss: 0.340 | test avg. loss: 4.306


 22%|██▏       | 11076/50000 [16:31<51:52, 12.51it/s]

Epochs: 11074 | epoch avg. loss: 0.289 | test avg. loss: 4.253
Epochs: 11075 | epoch avg. loss: 0.095 | test avg. loss: 4.458
Epochs: 11076 | epoch avg. loss: 0.100 | test avg. loss: 4.306


 22%|██▏       | 11080/50000 [16:31<53:31, 12.12it/s]

Epochs: 11077 | epoch avg. loss: 0.116 | test avg. loss: 4.671
Epochs: 11078 | epoch avg. loss: 0.132 | test avg. loss: 4.353
Epochs: 11079 | epoch avg. loss: 0.337 | test avg. loss: 4.425


 22%|██▏       | 11082/50000 [16:32<55:04, 11.78it/s]

Epochs: 11080 | epoch avg. loss: 0.125 | test avg. loss: 4.499
Epochs: 11081 | epoch avg. loss: 0.081 | test avg. loss: 4.430
Epochs: 11082 | epoch avg. loss: 0.071 | test avg. loss: 4.431


 22%|██▏       | 11086/50000 [16:32<53:52, 12.04it/s]

Epochs: 11083 | epoch avg. loss: 0.068 | test avg. loss: 4.228
Epochs: 11084 | epoch avg. loss: 0.069 | test avg. loss: 4.436
Epochs: 11085 | epoch avg. loss: 0.098 | test avg. loss: 4.157


 22%|██▏       | 11088/50000 [16:32<54:25, 11.91it/s]

Epochs: 11086 | epoch avg. loss: 0.200 | test avg. loss: 4.281
Epochs: 11087 | epoch avg. loss: 0.097 | test avg. loss: 4.293
Epochs: 11088 | epoch avg. loss: 0.066 | test avg. loss: 4.343


 22%|██▏       | 11092/50000 [16:32<51:29, 12.59it/s]

Epochs: 11089 | epoch avg. loss: 0.068 | test avg. loss: 4.448
Epochs: 11090 | epoch avg. loss: 0.059 | test avg. loss: 4.423
Epochs: 11091 | epoch avg. loss: 0.052 | test avg. loss: 4.425


 22%|██▏       | 11094/50000 [16:33<53:04, 12.22it/s]

Epochs: 11092 | epoch avg. loss: 0.055 | test avg. loss: 4.226
Epochs: 11093 | epoch avg. loss: 0.060 | test avg. loss: 4.325
Epochs: 11094 | epoch avg. loss: 0.070 | test avg. loss: 4.231


 22%|██▏       | 11098/50000 [16:33<52:01, 12.46it/s]

Epochs: 11095 | epoch avg. loss: 0.144 | test avg. loss: 4.354
Epochs: 11096 | epoch avg. loss: 0.078 | test avg. loss: 4.542
Epochs: 11097 | epoch avg. loss: 0.089 | test avg. loss: 4.504


 22%|██▏       | 11098/50000 [16:33<52:01, 12.46it/s]

Epochs: 11098 | epoch avg. loss: 0.065 | test avg. loss: 4.260
Epochs: 11099 | epoch avg. loss: 0.111 | test avg. loss: 4.271


 22%|██▏       | 11102/50000 [16:35<2:39:00,  4.08it/s]

Epochs: 11100 | epoch avg. loss: 0.081 | test avg. loss: 4.779
Epochs: 11101 | epoch avg. loss: 0.229 | test avg. loss: 4.319
Epochs: 11102 | epoch avg. loss: 0.147 | test avg. loss: 4.322


 22%|██▏       | 11106/50000 [16:35<1:43:44,  6.25it/s]

Epochs: 11103 | epoch avg. loss: 0.107 | test avg. loss: 4.790
Epochs: 11104 | epoch avg. loss: 0.204 | test avg. loss: 4.432
Epochs: 11105 | epoch avg. loss: 0.417 | test avg. loss: 4.557


 22%|██▏       | 11108/50000 [16:35<1:24:54,  7.63it/s]

Epochs: 11106 | epoch avg. loss: 0.193 | test avg. loss: 4.685
Epochs: 11107 | epoch avg. loss: 0.180 | test avg. loss: 4.303
Epochs: 11108 | epoch avg. loss: 0.169 | test avg. loss: 4.480


 22%|██▏       | 11112/50000 [16:36<1:06:47,  9.70it/s]

Epochs: 11109 | epoch avg. loss: 0.243 | test avg. loss: 4.302
Epochs: 11110 | epoch avg. loss: 0.170 | test avg. loss: 4.517
Epochs: 11111 | epoch avg. loss: 0.385 | test avg. loss: 5.075


 22%|██▏       | 11114/50000 [16:36<1:02:49, 10.32it/s]

Epochs: 11112 | epoch avg. loss: 0.377 | test avg. loss: 4.502
Epochs: 11113 | epoch avg. loss: 0.225 | test avg. loss: 4.614
Epochs: 11114 | epoch avg. loss: 0.109 | test avg. loss: 4.660


 22%|██▏       | 11118/50000 [16:36<53:48, 12.04it/s]

Epochs: 11115 | epoch avg. loss: 0.118 | test avg. loss: 4.479
Epochs: 11116 | epoch avg. loss: 0.084 | test avg. loss: 4.567
Epochs: 11117 | epoch avg. loss: 0.083 | test avg. loss: 4.337


 22%|██▏       | 11120/50000 [16:36<52:32, 12.33it/s]

Epochs: 11118 | epoch avg. loss: 0.114 | test avg. loss: 4.391
Epochs: 11119 | epoch avg. loss: 0.096 | test avg. loss: 4.368
Epochs: 11120 | epoch avg. loss: 0.061 | test avg. loss: 4.306


 22%|██▏       | 11122/50000 [16:36<52:44, 12.29it/s]

Epochs: 11121 | epoch avg. loss: 0.065 | test avg. loss: 4.373
Epochs: 11122 | epoch avg. loss: 0.055 | test avg. loss: 4.340


 22%|██▏       | 11126/50000 [16:37<55:35, 11.65it/s]

Epochs: 11123 | epoch avg. loss: 0.048 | test avg. loss: 4.346
Epochs: 11124 | epoch avg. loss: 0.046 | test avg. loss: 4.348
Epochs: 11125 | epoch avg. loss: 0.051 | test avg. loss: 4.331


 22%|██▏       | 11128/50000 [16:37<54:12, 11.95it/s]

Epochs: 11126 | epoch avg. loss: 0.056 | test avg. loss: 4.362
Epochs: 11127 | epoch avg. loss: 0.047 | test avg. loss: 4.374
Epochs: 11128 | epoch avg. loss: 0.054 | test avg. loss: 4.383


 22%|██▏       | 11132/50000 [16:37<54:24, 11.91it/s]

Epochs: 11129 | epoch avg. loss: 0.056 | test avg. loss: 4.210
Epochs: 11130 | epoch avg. loss: 0.128 | test avg. loss: 4.184
Epochs: 11131 | epoch avg. loss: 0.098 | test avg. loss: 4.272


 22%|██▏       | 11134/50000 [16:37<55:27, 11.68it/s]

Epochs: 11132 | epoch avg. loss: 0.059 | test avg. loss: 4.369
Epochs: 11133 | epoch avg. loss: 0.056 | test avg. loss: 4.628
Epochs: 11134 | epoch avg. loss: 0.106 | test avg. loss: 4.305


 22%|██▏       | 11138/50000 [16:38<55:25, 11.69it/s]

Epochs: 11135 | epoch avg. loss: 0.055 | test avg. loss: 4.368
Epochs: 11136 | epoch avg. loss: 0.063 | test avg. loss: 4.228
Epochs: 11137 | epoch avg. loss: 0.078 | test avg. loss: 4.343


 22%|██▏       | 11140/50000 [16:38<53:48, 12.03it/s]

Epochs: 11138 | epoch avg. loss: 0.055 | test avg. loss: 4.359
Epochs: 11139 | epoch avg. loss: 0.066 | test avg. loss: 4.344
Epochs: 11140 | epoch avg. loss: 0.075 | test avg. loss: 4.592


 22%|██▏       | 11144/50000 [16:38<49:19, 13.13it/s]

Epochs: 11141 | epoch avg. loss: 0.104 | test avg. loss: 4.289
Epochs: 11142 | epoch avg. loss: 0.149 | test avg. loss: 4.306
Epochs: 11143 | epoch avg. loss: 0.098 | test avg. loss: 4.596


 22%|██▏       | 11146/50000 [16:38<50:27, 12.83it/s]

Epochs: 11144 | epoch avg. loss: 0.134 | test avg. loss: 4.465
Epochs: 11145 | epoch avg. loss: 0.334 | test avg. loss: 4.677
Epochs: 11146 | epoch avg. loss: 0.217 | test avg. loss: 4.532




Epochs: 11147 | epoch avg. loss: 0.160 | test avg. loss: 4.488
Epochs: 11148 | epoch avg. loss: 0.150 | test avg. loss: 4.714
Epochs: 11149 | epoch avg. loss: 0.115 | test avg. loss: 4.359


 22%|██▏       | 11152/50000 [16:39<48:35, 13.33it/s]

Epochs: 11150 | epoch avg. loss: 0.128 | test avg. loss: 4.490
Epochs: 11151 | epoch avg. loss: 0.110 | test avg. loss: 4.313
Epochs: 11152 | epoch avg. loss: 0.075 | test avg. loss: 4.270


 22%|██▏       | 11156/50000 [16:39<50:12, 12.89it/s]

Epochs: 11153 | epoch avg. loss: 0.075 | test avg. loss: 4.609
Epochs: 11154 | epoch avg. loss: 0.084 | test avg. loss: 4.485
Epochs: 11155 | epoch avg. loss: 0.209 | test avg. loss: 4.533


 22%|██▏       | 11158/50000 [16:39<51:19, 12.61it/s]

Epochs: 11156 | epoch avg. loss: 0.069 | test avg. loss: 4.516
Epochs: 11157 | epoch avg. loss: 0.070 | test avg. loss: 4.313
Epochs: 11158 | epoch avg. loss: 0.093 | test avg. loss: 4.470


 22%|██▏       | 11162/50000 [16:40<53:48, 12.03it/s]

Epochs: 11159 | epoch avg. loss: 0.086 | test avg. loss: 4.250
Epochs: 11160 | epoch avg. loss: 0.208 | test avg. loss: 4.347
Epochs: 11161 | epoch avg. loss: 0.150 | test avg. loss: 4.796


                                                     

Epochs: 11162 | epoch avg. loss: 0.227 | test avg. loss: 4.489
Epochs: 11163 | epoch avg. loss: 0.075 | test avg. loss: 4.355
Epochs: 11164 | epoch avg. loss: 0.117 | test avg. loss: 4.300


 22%|██▏       | 11168/50000 [16:40<53:12, 12.16it/s]

Epochs: 11165 | epoch avg. loss: 0.049 | test avg. loss: 4.349
Epochs: 11166 | epoch avg. loss: 0.054 | test avg. loss: 4.255
Epochs: 11167 | epoch avg. loss: 0.088 | test avg. loss: 4.472


 22%|██▏       | 11170/50000 [16:40<50:37, 12.78it/s]

Epochs: 11168 | epoch avg. loss: 0.129 | test avg. loss: 4.374
Epochs: 11169 | epoch avg. loss: 0.059 | test avg. loss: 4.280
Epochs: 11170 | epoch avg. loss: 0.107 | test avg. loss: 4.535


 22%|██▏       | 11174/50000 [16:41<52:42, 12.28it/s]

Epochs: 11171 | epoch avg. loss: 0.162 | test avg. loss: 4.671
Epochs: 11172 | epoch avg. loss: 0.744 | test avg. loss: 4.726
Epochs: 11173 | epoch avg. loss: 0.537 | test avg. loss: 5.885


 22%|██▏       | 11176/50000 [16:41<52:11, 12.40it/s]

Epochs: 11174 | epoch avg. loss: 0.788 | test avg. loss: 4.649
Epochs: 11175 | epoch avg. loss: 0.687 | test avg. loss: 5.693
Epochs: 11176 | epoch avg. loss: 1.209 | test avg. loss: 4.901


 22%|██▏       | 11180/50000 [16:41<48:53, 13.23it/s]

Epochs: 11177 | epoch avg. loss: 0.591 | test avg. loss: 5.334
Epochs: 11178 | epoch avg. loss: 1.290 | test avg. loss: 6.277
Epochs: 11179 | epoch avg. loss: 1.352 | test avg. loss: 4.509


 22%|██▏       | 11182/50000 [16:41<47:28, 13.63it/s]

Epochs: 11180 | epoch avg. loss: 1.210 | test avg. loss: 4.863
Epochs: 11181 | epoch avg. loss: 0.754 | test avg. loss: 6.903
Epochs: 11182 | epoch avg. loss: 1.174 | test avg. loss: 5.744
Epochs: 11183 | epoch avg. loss: 2.108 | test avg. loss: 5.198


 22%|██▏       | 11186/50000 [16:42<48:50, 13.25it/s]

Epochs: 11184 | epoch avg. loss: 1.319 | test avg. loss: 4.135
Epochs: 11185 | epoch avg. loss: 0.680 | test avg. loss: 4.742
Epochs: 11186 | epoch avg. loss: 0.842 | test avg. loss: 6.436


 22%|██▏       | 11190/50000 [16:42<49:15, 13.13it/s]

Epochs: 11187 | epoch avg. loss: 1.103 | test avg. loss: 5.149
Epochs: 11188 | epoch avg. loss: 1.380 | test avg. loss: 5.470
Epochs: 11189 | epoch avg. loss: 2.524 | test avg. loss: 4.412


 22%|██▏       | 11192/50000 [16:42<48:29, 13.34it/s]

Epochs: 11190 | epoch avg. loss: 1.857 | test avg. loss: 8.539
Epochs: 11191 | epoch avg. loss: 3.701 | test avg. loss: 10.279
Epochs: 11192 | epoch avg. loss: 2.935 | test avg. loss: 7.198


 22%|██▏       | 11196/50000 [16:42<46:54, 13.79it/s]

Epochs: 11193 | epoch avg. loss: 3.133 | test avg. loss: 6.578
Epochs: 11194 | epoch avg. loss: 2.622 | test avg. loss: 4.081
Epochs: 11195 | epoch avg. loss: 1.792 | test avg. loss: 4.622


 22%|██▏       | 11198/50000 [16:42<46:09, 14.01it/s]

Epochs: 11196 | epoch avg. loss: 2.174 | test avg. loss: 5.864
Epochs: 11197 | epoch avg. loss: 1.809 | test avg. loss: 5.480
Epochs: 11198 | epoch avg. loss: 2.174 | test avg. loss: 11.948


 22%|██▏       | 11198/50000 [16:42<46:09, 14.01it/s]

Epochs: 11199 | epoch avg. loss: 4.920 | test avg. loss: 10.930


 22%|██▏       | 11202/50000 [16:44<2:14:56,  4.79it/s]

Epochs: 11200 | epoch avg. loss: 6.125 | test avg. loss: 11.016
Epochs: 11201 | epoch avg. loss: 5.776 | test avg. loss: 8.076
Epochs: 11202 | epoch avg. loss: 4.974 | test avg. loss: 7.223


 22%|██▏       | 11206/50000 [16:44<1:28:38,  7.29it/s]

Epochs: 11203 | epoch avg. loss: 2.809 | test avg. loss: 5.030
Epochs: 11204 | epoch avg. loss: 2.702 | test avg. loss: 5.265
Epochs: 11205 | epoch avg. loss: 1.714 | test avg. loss: 5.532
Epochs: 11206 | epoch avg. loss: 1.202 | test avg. loss: 4.493


 22%|██▏       | 11210/50000 [16:44<1:06:58,  9.65it/s]

Epochs: 11207 | epoch avg. loss: 0.959 | test avg. loss: 5.515
Epochs: 11208 | epoch avg. loss: 1.499 | test avg. loss: 6.198
Epochs: 11209 | epoch avg. loss: 2.139 | test avg. loss: 8.450


                                                       

Epochs: 11210 | epoch avg. loss: 2.607 | test avg. loss: 7.788
Epochs: 11211 | epoch avg. loss: 3.761 | test avg. loss: 9.674
Epochs: 11212 | epoch avg. loss: 3.283 | test avg. loss: 5.524


 22%|██▏       | 11216/50000 [16:45<52:03, 12.42it/s]

Epochs: 11213 | epoch avg. loss: 2.117 | test avg. loss: 7.048
Epochs: 11214 | epoch avg. loss: 1.509 | test avg. loss: 5.246
Epochs: 11215 | epoch avg. loss: 2.073 | test avg. loss: 7.023
Epochs: 11216 | epoch avg. loss: 1.923 | test avg. loss: 5.211


 22%|██▏       | 11220/50000 [16:45<47:17, 13.67it/s]

Epochs: 11217 | epoch avg. loss: 1.892 | test avg. loss: 6.027
Epochs: 11218 | epoch avg. loss: 1.357 | test avg. loss: 6.179
Epochs: 11219 | epoch avg. loss: 2.188 | test avg. loss: 7.005
Epochs: 11220 | epoch avg. loss: 1.907 | test avg. loss: 6.527


 22%|██▏       | 11224/50000 [16:45<48:11, 13.41it/s]

Epochs: 11221 | epoch avg. loss: 2.830 | test avg. loss: 5.852
Epochs: 11222 | epoch avg. loss: 1.016 | test avg. loss: 4.349
Epochs: 11223 | epoch avg. loss: 1.069 | test avg. loss: 6.092




Epochs: 11224 | epoch avg. loss: 1.430 | test avg. loss: 4.318
Epochs: 11225 | epoch avg. loss: 1.015 | test avg. loss: 5.516
Epochs: 11226 | epoch avg. loss: 0.815 | test avg. loss: 4.949


 22%|██▏       | 11230/50000 [16:46<47:09, 13.70it/s]

Epochs: 11227 | epoch avg. loss: 0.881 | test avg. loss: 6.025
Epochs: 11228 | epoch avg. loss: 0.718 | test avg. loss: 5.228
Epochs: 11229 | epoch avg. loss: 0.381 | test avg. loss: 5.998


 22%|██▏       | 11232/50000 [16:46<47:25, 13.62it/s]

Epochs: 11230 | epoch avg. loss: 0.542 | test avg. loss: 4.760
Epochs: 11231 | epoch avg. loss: 0.711 | test avg. loss: 5.779
Epochs: 11232 | epoch avg. loss: 0.976 | test avg. loss: 3.986


 22%|██▏       | 11236/50000 [16:46<46:02, 14.03it/s]

Epochs: 11233 | epoch avg. loss: 0.617 | test avg. loss: 6.130
Epochs: 11234 | epoch avg. loss: 1.272 | test avg. loss: 4.736
Epochs: 11235 | epoch avg. loss: 1.363 | test avg. loss: 5.773


 22%|██▏       | 11238/50000 [16:47<52:34, 12.29it/s]

Epochs: 11236 | epoch avg. loss: 0.881 | test avg. loss: 4.808
Epochs: 11237 | epoch avg. loss: 1.063 | test avg. loss: 6.847
Epochs: 11238 | epoch avg. loss: 1.777 | test avg. loss: 4.368




Epochs: 11239 | epoch avg. loss: 0.941 | test avg. loss: 5.644
Epochs: 11240 | epoch avg. loss: 1.182 | test avg. loss: 4.754
Epochs: 11241 | epoch avg. loss: 1.676 | test avg. loss: 5.891


 22%|██▏       | 11244/50000 [16:47<48:13, 13.40it/s]

Epochs: 11242 | epoch avg. loss: 1.316 | test avg. loss: 4.862
Epochs: 11243 | epoch avg. loss: 1.461 | test avg. loss: 5.803
Epochs: 11244 | epoch avg. loss: 1.006 | test avg. loss: 5.009


 22%|██▏       | 11248/50000 [16:47<48:08, 13.42it/s]

Epochs: 11245 | epoch avg. loss: 1.242 | test avg. loss: 5.849
Epochs: 11246 | epoch avg. loss: 0.913 | test avg. loss: 4.641
Epochs: 11247 | epoch avg. loss: 0.644 | test avg. loss: 5.066


 22%|██▎       | 11250/50000 [16:47<47:13, 13.68it/s]

Epochs: 11248 | epoch avg. loss: 0.400 | test avg. loss: 4.189
Epochs: 11249 | epoch avg. loss: 0.318 | test avg. loss: 4.904
Epochs: 11250 | epoch avg. loss: 0.394 | test avg. loss: 4.227


 23%|██▎       | 11254/50000 [16:48<48:26, 13.33it/s]

Epochs: 11251 | epoch avg. loss: 0.385 | test avg. loss: 4.989
Epochs: 11252 | epoch avg. loss: 0.379 | test avg. loss: 4.412
Epochs: 11253 | epoch avg. loss: 0.163 | test avg. loss: 4.953


 23%|██▎       | 11256/50000 [16:48<53:04, 12.17it/s]

Epochs: 11254 | epoch avg. loss: 0.289 | test avg. loss: 4.346
Epochs: 11255 | epoch avg. loss: 0.110 | test avg. loss: 4.515
Epochs: 11256 | epoch avg. loss: 0.128 | test avg. loss: 4.141


 23%|██▎       | 11260/50000 [16:48<49:26, 13.06it/s]

Epochs: 11257 | epoch avg. loss: 0.208 | test avg. loss: 4.452
Epochs: 11258 | epoch avg. loss: 0.162 | test avg. loss: 4.127
Epochs: 11259 | epoch avg. loss: 0.270 | test avg. loss: 4.596
Epochs: 11260 | epoch avg. loss: 0.190 | test avg. loss: 4.281


 23%|██▎       | 11264/50000 [16:48<49:37, 13.01it/s]

Epochs: 11261 | epoch avg. loss: 0.112 | test avg. loss: 4.470
Epochs: 11262 | epoch avg. loss: 0.096 | test avg. loss: 4.292
Epochs: 11263 | epoch avg. loss: 0.077 | test avg. loss: 4.409


 23%|██▎       | 11266/50000 [16:49<49:02, 13.16it/s]

Epochs: 11264 | epoch avg. loss: 0.098 | test avg. loss: 4.155
Epochs: 11265 | epoch avg. loss: 0.113 | test avg. loss: 4.338
Epochs: 11266 | epoch avg. loss: 0.098 | test avg. loss: 4.315
Epochs: 11267 | epoch avg. loss: 0.073 | test avg. loss: 4.299


 23%|██▎       | 11272/50000 [16:49<44:51, 14.39it/s]

Epochs: 11268 | epoch avg. loss: 0.073 | test avg. loss: 4.332
Epochs: 11269 | epoch avg. loss: 0.072 | test avg. loss: 4.275
Epochs: 11270 | epoch avg. loss: 0.072 | test avg. loss: 4.266
Epochs: 11271 | epoch avg. loss: 0.073 | test avg. loss: 4.180


 23%|██▎       | 11274/50000 [16:49<46:43, 13.81it/s]

Epochs: 11272 | epoch avg. loss: 0.071 | test avg. loss: 4.208
Epochs: 11273 | epoch avg. loss: 0.072 | test avg. loss: 4.290
Epochs: 11274 | epoch avg. loss: 0.081 | test avg. loss: 4.403


 23%|██▎       | 11278/50000 [16:49<47:28, 13.60it/s]

Epochs: 11275 | epoch avg. loss: 0.084 | test avg. loss: 4.255
Epochs: 11276 | epoch avg. loss: 0.107 | test avg. loss: 4.358
Epochs: 11277 | epoch avg. loss: 0.073 | test avg. loss: 4.201


 23%|██▎       | 11280/50000 [16:50<45:32, 14.17it/s]

Epochs: 11278 | epoch avg. loss: 0.076 | test avg. loss: 4.296
Epochs: 11279 | epoch avg. loss: 0.084 | test avg. loss: 4.112
Epochs: 11280 | epoch avg. loss: 0.085 | test avg. loss: 4.148


 23%|██▎       | 11284/50000 [16:50<45:42, 14.12it/s]

Epochs: 11281 | epoch avg. loss: 0.068 | test avg. loss: 4.283
Epochs: 11282 | epoch avg. loss: 0.073 | test avg. loss: 4.243
Epochs: 11283 | epoch avg. loss: 0.093 | test avg. loss: 4.417
Epochs: 11284 | epoch avg. loss: 0.080 | test avg. loss: 4.231


 23%|██▎       | 11288/50000 [16:50<47:19, 13.63it/s]

Epochs: 11285 | epoch avg. loss: 0.092 | test avg. loss: 4.150
Epochs: 11286 | epoch avg. loss: 0.091 | test avg. loss: 4.371
Epochs: 11287 | epoch avg. loss: 0.113 | test avg. loss: 4.226


 23%|██▎       | 11290/50000 [16:50<48:22, 13.34it/s]

Epochs: 11288 | epoch avg. loss: 0.067 | test avg. loss: 4.343
Epochs: 11289 | epoch avg. loss: 0.085 | test avg. loss: 4.295
Epochs: 11290 | epoch avg. loss: 0.068 | test avg. loss: 4.231


 23%|██▎       | 11294/50000 [16:51<48:48, 13.22it/s]

Epochs: 11291 | epoch avg. loss: 0.066 | test avg. loss: 4.175
Epochs: 11292 | epoch avg. loss: 0.072 | test avg. loss: 4.350
Epochs: 11293 | epoch avg. loss: 0.088 | test avg. loss: 4.166


 23%|██▎       | 11296/50000 [16:51<49:08, 13.13it/s]

Epochs: 11294 | epoch avg. loss: 0.113 | test avg. loss: 4.383
Epochs: 11295 | epoch avg. loss: 0.093 | test avg. loss: 4.183
Epochs: 11296 | epoch avg. loss: 0.071 | test avg. loss: 4.384


 23%|██▎       | 11298/50000 [16:51<47:52, 13.47it/s]

Epochs: 11297 | epoch avg. loss: 0.094 | test avg. loss: 4.118
Epochs: 11298 | epoch avg. loss: 0.103 | test avg. loss: 4.355
Epochs: 11299 | epoch avg. loss: 0.102 | test avg. loss: 4.201


 23%|██▎       | 11302/50000 [16:53<2:08:18,  5.03it/s]

Epochs: 11300 | epoch avg. loss: 0.068 | test avg. loss: 4.247
Epochs: 11301 | epoch avg. loss: 0.063 | test avg. loss: 4.174
Epochs: 11302 | epoch avg. loss: 0.071 | test avg. loss: 4.314


 23%|██▎       | 11306/50000 [16:53<1:29:14,  7.23it/s]

Epochs: 11303 | epoch avg. loss: 0.074 | test avg. loss: 4.238
Epochs: 11304 | epoch avg. loss: 0.064 | test avg. loss: 4.382
Epochs: 11305 | epoch avg. loss: 0.095 | test avg. loss: 4.296


 23%|██▎       | 11308/50000 [16:53<1:16:39,  8.41it/s]

Epochs: 11306 | epoch avg. loss: 0.090 | test avg. loss: 4.172
Epochs: 11307 | epoch avg. loss: 0.133 | test avg. loss: 4.407
Epochs: 11308 | epoch avg. loss: 0.087 | test avg. loss: 4.204


 23%|██▎       | 11312/50000 [16:53<1:02:52, 10.26it/s]

Epochs: 11309 | epoch avg. loss: 0.089 | test avg. loss: 4.323
Epochs: 11310 | epoch avg. loss: 0.073 | test avg. loss: 4.305
Epochs: 11311 | epoch avg. loss: 0.067 | test avg. loss: 4.198


 23%|██▎       | 11314/50000 [16:53<58:32, 11.01it/s]

Epochs: 11312 | epoch avg. loss: 0.095 | test avg. loss: 4.148
Epochs: 11313 | epoch avg. loss: 0.095 | test avg. loss: 4.425
Epochs: 11314 | epoch avg. loss: 0.116 | test avg. loss: 4.168


 23%|██▎       | 11318/50000 [16:54<54:12, 11.89it/s]

Epochs: 11315 | epoch avg. loss: 0.069 | test avg. loss: 4.305
Epochs: 11316 | epoch avg. loss: 0.108 | test avg. loss: 4.223
Epochs: 11317 | epoch avg. loss: 0.062 | test avg. loss: 4.319


 23%|██▎       | 11320/50000 [16:54<55:06, 11.70it/s]

Epochs: 11318 | epoch avg. loss: 0.092 | test avg. loss: 4.218
Epochs: 11319 | epoch avg. loss: 0.077 | test avg. loss: 4.197
Epochs: 11320 | epoch avg. loss: 0.066 | test avg. loss: 4.193


 23%|██▎       | 11324/50000 [16:54<56:57, 11.32it/s]

Epochs: 11321 | epoch avg. loss: 0.077 | test avg. loss: 4.132
Epochs: 11322 | epoch avg. loss: 0.077 | test avg. loss: 4.272
Epochs: 11323 | epoch avg. loss: 0.075 | test avg. loss: 4.169


 23%|██▎       | 11326/50000 [16:54<59:46, 10.78it/s]

Epochs: 11324 | epoch avg. loss: 0.081 | test avg. loss: 4.390
Epochs: 11325 | epoch avg. loss: 0.118 | test avg. loss: 4.120


 23%|██▎       | 11328/50000 [16:55<58:44, 10.97it/s]

Epochs: 11326 | epoch avg. loss: 0.221 | test avg. loss: 4.603
Epochs: 11327 | epoch avg. loss: 0.201 | test avg. loss: 4.129
Epochs: 11328 | epoch avg. loss: 0.163 | test avg. loss: 4.581


 23%|██▎       | 11332/50000 [16:55<57:10, 11.27it/s]

Epochs: 11329 | epoch avg. loss: 0.179 | test avg. loss: 4.210
Epochs: 11330 | epoch avg. loss: 0.068 | test avg. loss: 4.425
Epochs: 11331 | epoch avg. loss: 0.106 | test avg. loss: 4.137




Epochs: 11332 | epoch avg. loss: 0.107 | test avg. loss: 4.324
Epochs: 11333 | epoch avg. loss: 0.084 | test avg. loss: 4.132
Epochs: 11334 | epoch avg. loss: 0.101 | test avg. loss: 4.229


 23%|██▎       | 11338/50000 [16:55<49:20, 13.06it/s]

Epochs: 11335 | epoch avg. loss: 0.072 | test avg. loss: 4.304
Epochs: 11336 | epoch avg. loss: 0.070 | test avg. loss: 4.358
Epochs: 11337 | epoch avg. loss: 0.079 | test avg. loss: 4.159


                                                     

Epochs: 11338 | epoch avg. loss: 0.066 | test avg. loss: 4.424
Epochs: 11339 | epoch avg. loss: 0.144 | test avg. loss: 4.072
Epochs: 11340 | epoch avg. loss: 0.127 | test avg. loss: 4.245


 23%|██▎       | 11344/50000 [16:56<48:02, 13.41it/s]

Epochs: 11341 | epoch avg. loss: 0.094 | test avg. loss: 4.259
Epochs: 11342 | epoch avg. loss: 0.072 | test avg. loss: 4.224
Epochs: 11343 | epoch avg. loss: 0.077 | test avg. loss: 4.404


 23%|██▎       | 11348/50000 [16:56<45:15, 14.23it/s]

Epochs: 11344 | epoch avg. loss: 0.091 | test avg. loss: 4.186
Epochs: 11345 | epoch avg. loss: 0.101 | test avg. loss: 4.235
Epochs: 11346 | epoch avg. loss: 0.063 | test avg. loss: 4.274
Epochs: 11347 | epoch avg. loss: 0.066 | test avg. loss: 4.168


 23%|██▎       | 11350/50000 [16:56<47:49, 13.47it/s]

Epochs: 11348 | epoch avg. loss: 0.067 | test avg. loss: 4.312
Epochs: 11349 | epoch avg. loss: 0.090 | test avg. loss: 4.088
Epochs: 11350 | epoch avg. loss: 0.125 | test avg. loss: 4.606


 23%|██▎       | 11354/50000 [16:57<53:31, 12.03it/s]

Epochs: 11351 | epoch avg. loss: 0.198 | test avg. loss: 4.168
Epochs: 11352 | epoch avg. loss: 0.272 | test avg. loss: 4.907
Epochs: 11353 | epoch avg. loss: 0.329 | test avg. loss: 4.205


 23%|██▎       | 11356/50000 [16:57<51:53, 12.41it/s]

Epochs: 11354 | epoch avg. loss: 0.382 | test avg. loss: 4.984
Epochs: 11355 | epoch avg. loss: 0.376 | test avg. loss: 4.365
Epochs: 11356 | epoch avg. loss: 0.310 | test avg. loss: 5.339


 23%|██▎       | 11360/50000 [16:57<51:18, 12.55it/s]

Epochs: 11357 | epoch avg. loss: 0.516 | test avg. loss: 4.182
Epochs: 11358 | epoch avg. loss: 0.227 | test avg. loss: 4.747
Epochs: 11359 | epoch avg. loss: 0.233 | test avg. loss: 4.154


 23%|██▎       | 11362/50000 [16:57<51:10, 12.58it/s]

Epochs: 11360 | epoch avg. loss: 0.578 | test avg. loss: 4.739
Epochs: 11361 | epoch avg. loss: 0.498 | test avg. loss: 4.140
Epochs: 11362 | epoch avg. loss: 0.328 | test avg. loss: 4.748


 23%|██▎       | 11364/50000 [16:58<50:59, 12.63it/s]

Epochs: 11363 | epoch avg. loss: 0.305 | test avg. loss: 4.216
Epochs: 11364 | epoch avg. loss: 0.391 | test avg. loss: 4.412
Epochs: 11365 | epoch avg. loss: 0.339 | test avg. loss: 3.839


 23%|██▎       | 11368/50000 [16:58<55:39, 11.57it/s]

Epochs: 11366 | epoch avg. loss: 0.406 | test avg. loss: 3.985
Epochs: 11367 | epoch avg. loss: 0.227 | test avg. loss: 4.094
Epochs: 11368 | epoch avg. loss: 0.113 | test avg. loss: 4.573


 23%|██▎       | 11372/50000 [16:58<56:42, 11.35it/s]

Epochs: 11369 | epoch avg. loss: 0.237 | test avg. loss: 4.524
Epochs: 11370 | epoch avg. loss: 0.085 | test avg. loss: 4.498
Epochs: 11371 | epoch avg. loss: 0.115 | test avg. loss: 4.288


 23%|██▎       | 11374/50000 [16:58<54:48, 11.74it/s]

Epochs: 11372 | epoch avg. loss: 0.069 | test avg. loss: 4.253
Epochs: 11373 | epoch avg. loss: 0.099 | test avg. loss: 4.175
Epochs: 11374 | epoch avg. loss: 0.092 | test avg. loss: 4.180


 23%|██▎       | 11378/50000 [16:59<50:43, 12.69it/s]

Epochs: 11375 | epoch avg. loss: 0.089 | test avg. loss: 4.334
Epochs: 11376 | epoch avg. loss: 0.077 | test avg. loss: 4.301
Epochs: 11377 | epoch avg. loss: 0.095 | test avg. loss: 4.589


 23%|██▎       | 11380/50000 [16:59<49:06, 13.11it/s]

Epochs: 11378 | epoch avg. loss: 0.161 | test avg. loss: 4.275
Epochs: 11379 | epoch avg. loss: 0.311 | test avg. loss: 4.717
Epochs: 11380 | epoch avg. loss: 0.247 | test avg. loss: 4.164


 23%|██▎       | 11384/50000 [16:59<48:16, 13.33it/s]

Epochs: 11381 | epoch avg. loss: 0.211 | test avg. loss: 4.925
Epochs: 11382 | epoch avg. loss: 0.333 | test avg. loss: 4.237
Epochs: 11383 | epoch avg. loss: 0.353 | test avg. loss: 4.696


 23%|██▎       | 11386/50000 [16:59<48:15, 13.33it/s]

Epochs: 11384 | epoch avg. loss: 0.315 | test avg. loss: 4.144
Epochs: 11385 | epoch avg. loss: 0.262 | test avg. loss: 4.806
Epochs: 11386 | epoch avg. loss: 0.269 | test avg. loss: 4.177


 23%|██▎       | 11390/50000 [17:00<52:39, 12.22it/s]

Epochs: 11387 | epoch avg. loss: 0.435 | test avg. loss: 4.560
Epochs: 11388 | epoch avg. loss: 0.410 | test avg. loss: 4.112
Epochs: 11389 | epoch avg. loss: 0.248 | test avg. loss: 4.157


 23%|██▎       | 11392/50000 [17:00<54:21, 11.84it/s]

Epochs: 11390 | epoch avg. loss: 0.180 | test avg. loss: 3.977
Epochs: 11391 | epoch avg. loss: 0.239 | test avg. loss: 3.870
Epochs: 11392 | epoch avg. loss: 0.171 | test avg. loss: 4.162


 23%|██▎       | 11396/50000 [17:00<51:17, 12.54it/s]

Epochs: 11393 | epoch avg. loss: 0.169 | test avg. loss: 4.028
Epochs: 11394 | epoch avg. loss: 0.212 | test avg. loss: 4.500
Epochs: 11395 | epoch avg. loss: 0.165 | test avg. loss: 4.123


 23%|██▎       | 11398/50000 [17:00<49:44, 12.93it/s]

Epochs: 11396 | epoch avg. loss: 0.152 | test avg. loss: 4.552
Epochs: 11397 | epoch avg. loss: 0.182 | test avg. loss: 4.196
Epochs: 11398 | epoch avg. loss: 0.192 | test avg. loss: 4.296


 23%|██▎       | 11398/50000 [17:00<49:44, 12.93it/s]

Epochs: 11399 | epoch avg. loss: 0.090 | test avg. loss: 4.404




Epochs: 11400 | epoch avg. loss: 0.095 | test avg. loss: 4.172
Epochs: 11401 | epoch avg. loss: 0.082 | test avg. loss: 4.061
Epochs: 11402 | epoch avg. loss: 0.067 | test avg. loss: 3.964


 23%|██▎       | 11406/50000 [17:02<1:31:15,  7.05it/s]

Epochs: 11403 | epoch avg. loss: 0.080 | test avg. loss: 4.197
Epochs: 11404 | epoch avg. loss: 0.124 | test avg. loss: 4.166
Epochs: 11405 | epoch avg. loss: 0.246 | test avg. loss: 4.733


 23%|██▎       | 11408/50000 [17:02<1:17:47,  8.27it/s]

Epochs: 11406 | epoch avg. loss: 0.199 | test avg. loss: 4.307
Epochs: 11407 | epoch avg. loss: 0.159 | test avg. loss: 4.804
Epochs: 11408 | epoch avg. loss: 0.207 | test avg. loss: 4.117


 23%|██▎       | 11412/50000 [17:03<1:05:56,  9.75it/s]

Epochs: 11409 | epoch avg. loss: 0.326 | test avg. loss: 4.439
Epochs: 11410 | epoch avg. loss: 0.197 | test avg. loss: 3.992
Epochs: 11411 | epoch avg. loss: 0.116 | test avg. loss: 4.429




Epochs: 11412 | epoch avg. loss: 0.236 | test avg. loss: 4.148
Epochs: 11413 | epoch avg. loss: 0.253 | test avg. loss: 4.412
Epochs: 11414 | epoch avg. loss: 0.106 | test avg. loss: 4.368


 23%|██▎       | 11418/50000 [17:03<50:20, 12.77it/s]

Epochs: 11415 | epoch avg. loss: 0.145 | test avg. loss: 4.384
Epochs: 11416 | epoch avg. loss: 0.112 | test avg. loss: 4.046
Epochs: 11417 | epoch avg. loss: 0.092 | test avg. loss: 4.280
Epochs: 11418 | epoch avg. loss: 0.107 | test avg. loss: 3.996


 23%|██▎       | 11422/50000 [17:03<46:04, 13.96it/s]

Epochs: 11419 | epoch avg. loss: 0.169 | test avg. loss: 4.326
Epochs: 11420 | epoch avg. loss: 0.077 | test avg. loss: 4.181
Epochs: 11421 | epoch avg. loss: 0.104 | test avg. loss: 4.516


 23%|██▎       | 11424/50000 [17:03<46:55, 13.70it/s]

Epochs: 11422 | epoch avg. loss: 0.091 | test avg. loss: 4.267
Epochs: 11423 | epoch avg. loss: 0.127 | test avg. loss: 4.551
Epochs: 11424 | epoch avg. loss: 0.126 | test avg. loss: 4.187


 23%|██▎       | 11428/50000 [17:04<50:59, 12.61it/s]

Epochs: 11425 | epoch avg. loss: 0.105 | test avg. loss: 4.126
Epochs: 11426 | epoch avg. loss: 0.089 | test avg. loss: 4.097
Epochs: 11427 | epoch avg. loss: 0.067 | test avg. loss: 4.234


 23%|██▎       | 11432/50000 [17:04<47:00, 13.67it/s]

Epochs: 11428 | epoch avg. loss: 0.079 | test avg. loss: 4.213
Epochs: 11429 | epoch avg. loss: 0.114 | test avg. loss: 4.212
Epochs: 11430 | epoch avg. loss: 0.119 | test avg. loss: 4.265
Epochs: 11431 | epoch avg. loss: 0.063 | test avg. loss: 4.203


 23%|██▎       | 11434/50000 [17:04<45:28, 14.14it/s]

Epochs: 11432 | epoch avg. loss: 0.062 | test avg. loss: 4.115
Epochs: 11433 | epoch avg. loss: 0.068 | test avg. loss: 4.275
Epochs: 11434 | epoch avg. loss: 0.069 | test avg. loss: 4.222


 23%|██▎       | 11438/50000 [17:04<49:40, 12.94it/s]

Epochs: 11435 | epoch avg. loss: 0.080 | test avg. loss: 4.260
Epochs: 11436 | epoch avg. loss: 0.082 | test avg. loss: 4.423
Epochs: 11437 | epoch avg. loss: 0.097 | test avg. loss: 4.171


 23%|██▎       | 11442/50000 [17:05<46:29, 13.82it/s]

Epochs: 11438 | epoch avg. loss: 0.067 | test avg. loss: 4.321
Epochs: 11439 | epoch avg. loss: 0.089 | test avg. loss: 4.023
Epochs: 11440 | epoch avg. loss: 0.109 | test avg. loss: 4.302
Epochs: 11441 | epoch avg. loss: 0.117 | test avg. loss: 4.015


 23%|██▎       | 11444/50000 [17:05<45:28, 14.13it/s]

Epochs: 11442 | epoch avg. loss: 0.131 | test avg. loss: 4.248
Epochs: 11443 | epoch avg. loss: 0.078 | test avg. loss: 4.117
Epochs: 11444 | epoch avg. loss: 0.091 | test avg. loss: 4.386


                                                     

Epochs: 11445 | epoch avg. loss: 0.122 | test avg. loss: 4.095
Epochs: 11446 | epoch avg. loss: 0.086 | test avg. loss: 4.152
Epochs: 11447 | epoch avg. loss: 0.072 | test avg. loss: 4.182


 23%|██▎       | 11450/50000 [17:05<46:17, 13.88it/s]

Epochs: 11448 | epoch avg. loss: 0.070 | test avg. loss: 4.111
Epochs: 11449 | epoch avg. loss: 0.077 | test avg. loss: 4.235
Epochs: 11450 | epoch avg. loss: 0.082 | test avg. loss: 4.100


 23%|██▎       | 11454/50000 [17:06<52:15, 12.29it/s]

Epochs: 11451 | epoch avg. loss: 0.091 | test avg. loss: 4.435
Epochs: 11452 | epoch avg. loss: 0.105 | test avg. loss: 4.144
Epochs: 11453 | epoch avg. loss: 0.175 | test avg. loss: 4.686


 23%|██▎       | 11458/50000 [17:06<46:12, 13.90it/s]

Epochs: 11454 | epoch avg. loss: 0.251 | test avg. loss: 4.074
Epochs: 11455 | epoch avg. loss: 0.134 | test avg. loss: 4.324
Epochs: 11456 | epoch avg. loss: 0.097 | test avg. loss: 4.116
Epochs: 11457 | epoch avg. loss: 0.096 | test avg. loss: 4.399


 23%|██▎       | 11462/50000 [17:06<43:09, 14.88it/s]

Epochs: 11458 | epoch avg. loss: 0.114 | test avg. loss: 4.184
Epochs: 11459 | epoch avg. loss: 0.137 | test avg. loss: 4.335
Epochs: 11460 | epoch avg. loss: 0.126 | test avg. loss: 4.532
Epochs: 11461 | epoch avg. loss: 0.153 | test avg. loss: 3.989


 23%|██▎       | 11464/50000 [17:06<44:50, 14.33it/s]

Epochs: 11462 | epoch avg. loss: 0.088 | test avg. loss: 4.025
Epochs: 11463 | epoch avg. loss: 0.080 | test avg. loss: 3.994
Epochs: 11464 | epoch avg. loss: 0.126 | test avg. loss: 4.574


 23%|██▎       | 11468/50000 [17:07<48:44, 13.17it/s]

Epochs: 11465 | epoch avg. loss: 0.146 | test avg. loss: 4.365
Epochs: 11466 | epoch avg. loss: 0.356 | test avg. loss: 4.931
Epochs: 11467 | epoch avg. loss: 0.324 | test avg. loss: 4.221


 23%|██▎       | 11470/50000 [17:07<47:20, 13.57it/s]

Epochs: 11468 | epoch avg. loss: 0.267 | test avg. loss: 4.434
Epochs: 11469 | epoch avg. loss: 0.081 | test avg. loss: 4.157
Epochs: 11470 | epoch avg. loss: 0.161 | test avg. loss: 4.298


 23%|██▎       | 11474/50000 [17:07<44:50, 14.32it/s]

Epochs: 11471 | epoch avg. loss: 0.103 | test avg. loss: 4.164
Epochs: 11472 | epoch avg. loss: 0.059 | test avg. loss: 4.333
Epochs: 11473 | epoch avg. loss: 0.088 | test avg. loss: 4.093
Epochs: 11474 | epoch avg. loss: 0.098 | test avg. loss: 4.568


 23%|██▎       | 11478/50000 [17:07<42:44, 15.02it/s]

Epochs: 11475 | epoch avg. loss: 0.137 | test avg. loss: 4.177
Epochs: 11476 | epoch avg. loss: 0.251 | test avg. loss: 4.762
Epochs: 11477 | epoch avg. loss: 0.224 | test avg. loss: 4.221


 23%|██▎       | 11480/50000 [17:08<46:08, 13.91it/s]

Epochs: 11478 | epoch avg. loss: 0.427 | test avg. loss: 4.786
Epochs: 11479 | epoch avg. loss: 0.427 | test avg. loss: 3.846
Epochs: 11480 | epoch avg. loss: 0.217 | test avg. loss: 3.934


 23%|██▎       | 11484/50000 [17:08<47:39, 13.47it/s]

Epochs: 11481 | epoch avg. loss: 0.212 | test avg. loss: 4.197
Epochs: 11482 | epoch avg. loss: 0.150 | test avg. loss: 4.297
Epochs: 11483 | epoch avg. loss: 0.106 | test avg. loss: 4.708


 23%|██▎       | 11486/50000 [17:08<51:37, 12.44it/s]

Epochs: 11484 | epoch avg. loss: 0.121 | test avg. loss: 4.504
Epochs: 11485 | epoch avg. loss: 0.080 | test avg. loss: 4.552
Epochs: 11486 | epoch avg. loss: 0.116 | test avg. loss: 4.051


 23%|██▎       | 11490/50000 [17:08<47:55, 13.39it/s]

Epochs: 11487 | epoch avg. loss: 0.082 | test avg. loss: 4.043
Epochs: 11488 | epoch avg. loss: 0.102 | test avg. loss: 4.029
Epochs: 11489 | epoch avg. loss: 0.089 | test avg. loss: 4.207
Epochs: 11490 | epoch avg. loss: 0.065 | test avg. loss: 4.363


                                                     

Epochs: 11491 | epoch avg. loss: 0.087 | test avg. loss: 4.423
Epochs: 11492 | epoch avg. loss: 0.082 | test avg. loss: 4.516


 23%|██▎       | 11496/50000 [17:09<51:56, 12.36it/s]

Epochs: 11493 | epoch avg. loss: 0.089 | test avg. loss: 4.203
Epochs: 11494 | epoch avg. loss: 0.060 | test avg. loss: 4.142
Epochs: 11495 | epoch avg. loss: 0.066 | test avg. loss: 3.932


 23%|██▎       | 11498/50000 [17:09<49:29, 12.97it/s]

Epochs: 11496 | epoch avg. loss: 0.086 | test avg. loss: 4.555
Epochs: 11497 | epoch avg. loss: 0.243 | test avg. loss: 4.124
Epochs: 11498 | epoch avg. loss: 0.191 | test avg. loss: 4.558


 23%|██▎       | 11498/50000 [17:09<49:29, 12.97it/s]

Epochs: 11499 | epoch avg. loss: 0.102 | test avg. loss: 4.343


 23%|██▎       | 11502/50000 [17:11<2:22:54,  4.49it/s]

Epochs: 11500 | epoch avg. loss: 0.204 | test avg. loss: 4.611
Epochs: 11501 | epoch avg. loss: 0.138 | test avg. loss: 4.062
Epochs: 11502 | epoch avg. loss: 0.310 | test avg. loss: 4.394


 23%|██▎       | 11506/50000 [17:11<1:37:08,  6.60it/s]

Epochs: 11503 | epoch avg. loss: 0.288 | test avg. loss: 4.035
Epochs: 11504 | epoch avg. loss: 0.224 | test avg. loss: 4.536
Epochs: 11505 | epoch avg. loss: 0.193 | test avg. loss: 4.207


 23%|██▎       | 11508/50000 [17:11<1:25:10,  7.53it/s]

Epochs: 11506 | epoch avg. loss: 0.126 | test avg. loss: 4.423
Epochs: 11507 | epoch avg. loss: 0.158 | test avg. loss: 4.415
Epochs: 11508 | epoch avg. loss: 0.133 | test avg. loss: 4.225


 23%|██▎       | 11512/50000 [17:11<1:08:41,  9.34it/s]

Epochs: 11509 | epoch avg. loss: 0.128 | test avg. loss: 4.477
Epochs: 11510 | epoch avg. loss: 0.139 | test avg. loss: 4.095
Epochs: 11511 | epoch avg. loss: 0.158 | test avg. loss: 4.228


 23%|██▎       | 11516/50000 [17:12<54:22, 11.80it/s]  

Epochs: 11512 | epoch avg. loss: 0.076 | test avg. loss: 4.075
Epochs: 11513 | epoch avg. loss: 0.073 | test avg. loss: 4.381
Epochs: 11514 | epoch avg. loss: 0.103 | test avg. loss: 4.051
Epochs: 11515 | epoch avg. loss: 0.208 | test avg. loss: 4.543


 23%|██▎       | 11520/50000 [17:12<49:06, 13.06it/s]

Epochs: 11516 | epoch avg. loss: 0.372 | test avg. loss: 3.921
Epochs: 11517 | epoch avg. loss: 0.169 | test avg. loss: 4.057
Epochs: 11518 | epoch avg. loss: 0.122 | test avg. loss: 4.183
Epochs: 11519 | epoch avg. loss: 0.081 | test avg. loss: 4.247


 23%|██▎       | 11522/50000 [17:12<49:08, 13.05it/s]

Epochs: 11520 | epoch avg. loss: 0.066 | test avg. loss: 4.158
Epochs: 11521 | epoch avg. loss: 0.086 | test avg. loss: 4.181
Epochs: 11522 | epoch avg. loss: 0.071 | test avg. loss: 4.180


 23%|██▎       | 11526/50000 [17:12<51:02, 12.56it/s]

Epochs: 11523 | epoch avg. loss: 0.115 | test avg. loss: 4.176
Epochs: 11524 | epoch avg. loss: 0.122 | test avg. loss: 4.819
Epochs: 11525 | epoch avg. loss: 0.253 | test avg. loss: 4.233


 23%|██▎       | 11528/50000 [17:13<51:31, 12.45it/s]

Epochs: 11526 | epoch avg. loss: 0.198 | test avg. loss: 4.632
Epochs: 11527 | epoch avg. loss: 0.278 | test avg. loss: 4.107
Epochs: 11528 | epoch avg. loss: 0.216 | test avg. loss: 4.077


 23%|██▎       | 11532/50000 [17:13<51:20, 12.49it/s]

Epochs: 11529 | epoch avg. loss: 0.143 | test avg. loss: 4.427
Epochs: 11530 | epoch avg. loss: 0.131 | test avg. loss: 4.142
Epochs: 11531 | epoch avg. loss: 0.120 | test avg. loss: 4.186


 23%|██▎       | 11534/50000 [17:13<52:52, 12.13it/s]

Epochs: 11532 | epoch avg. loss: 0.061 | test avg. loss: 4.239
Epochs: 11533 | epoch avg. loss: 0.078 | test avg. loss: 4.160
Epochs: 11534 | epoch avg. loss: 0.065 | test avg. loss: 4.026


 23%|██▎       | 11538/50000 [17:13<55:13, 11.61it/s]

Epochs: 11535 | epoch avg. loss: 0.065 | test avg. loss: 4.201
Epochs: 11536 | epoch avg. loss: 0.099 | test avg. loss: 3.994
Epochs: 11537 | epoch avg. loss: 0.068 | test avg. loss: 4.080


 23%|██▎       | 11540/50000 [17:14<54:40, 11.73it/s]

Epochs: 11538 | epoch avg. loss: 0.085 | test avg. loss: 4.156
Epochs: 11539 | epoch avg. loss: 0.071 | test avg. loss: 4.163
Epochs: 11540 | epoch avg. loss: 0.072 | test avg. loss: 4.310


 23%|██▎       | 11544/50000 [17:14<53:26, 11.99it/s]

Epochs: 11541 | epoch avg. loss: 0.089 | test avg. loss: 4.263
Epochs: 11542 | epoch avg. loss: 0.076 | test avg. loss: 4.067
Epochs: 11543 | epoch avg. loss: 0.124 | test avg. loss: 4.506


 23%|██▎       | 11546/50000 [17:14<54:04, 11.85it/s]

Epochs: 11544 | epoch avg. loss: 0.210 | test avg. loss: 4.046
Epochs: 11545 | epoch avg. loss: 0.167 | test avg. loss: 4.329
Epochs: 11546 | epoch avg. loss: 0.145 | test avg. loss: 4.308


 23%|██▎       | 11550/50000 [17:14<53:54, 11.89it/s]

Epochs: 11547 | epoch avg. loss: 0.085 | test avg. loss: 4.171
Epochs: 11548 | epoch avg. loss: 0.069 | test avg. loss: 4.147
Epochs: 11549 | epoch avg. loss: 0.066 | test avg. loss: 4.110


 23%|██▎       | 11552/50000 [17:15<53:44, 11.92it/s]

Epochs: 11550 | epoch avg. loss: 0.063 | test avg. loss: 4.191
Epochs: 11551 | epoch avg. loss: 0.059 | test avg. loss: 4.234
Epochs: 11552 | epoch avg. loss: 0.061 | test avg. loss: 4.285


 23%|██▎       | 11556/50000 [17:15<53:04, 12.07it/s]

Epochs: 11553 | epoch avg. loss: 0.082 | test avg. loss: 4.453
Epochs: 11554 | epoch avg. loss: 0.112 | test avg. loss: 4.073
Epochs: 11555 | epoch avg. loss: 0.169 | test avg. loss: 4.503


 23%|██▎       | 11558/50000 [17:15<54:14, 11.81it/s]

Epochs: 11556 | epoch avg. loss: 0.269 | test avg. loss: 4.103
Epochs: 11557 | epoch avg. loss: 0.087 | test avg. loss: 4.107
Epochs: 11558 | epoch avg. loss: 0.078 | test avg. loss: 4.260


 23%|██▎       | 11562/50000 [17:15<55:47, 11.48it/s]

Epochs: 11559 | epoch avg. loss: 0.075 | test avg. loss: 4.207
Epochs: 11560 | epoch avg. loss: 0.090 | test avg. loss: 4.217
Epochs: 11561 | epoch avg. loss: 0.090 | test avg. loss: 4.322


 23%|██▎       | 11564/50000 [17:16<52:39, 12.16it/s]

Epochs: 11562 | epoch avg. loss: 0.068 | test avg. loss: 4.103
Epochs: 11563 | epoch avg. loss: 0.099 | test avg. loss: 3.967
Epochs: 11564 | epoch avg. loss: 0.077 | test avg. loss: 4.096


 23%|██▎       | 11568/50000 [17:16<52:27, 12.21it/s]

Epochs: 11565 | epoch avg. loss: 0.074 | test avg. loss: 3.997
Epochs: 11566 | epoch avg. loss: 0.089 | test avg. loss: 4.287
Epochs: 11567 | epoch avg. loss: 0.082 | test avg. loss: 4.253


 23%|██▎       | 11570/50000 [17:16<58:38, 10.92it/s]

Epochs: 11568 | epoch avg. loss: 0.057 | test avg. loss: 4.333
Epochs: 11569 | epoch avg. loss: 0.073 | test avg. loss: 4.113
Epochs: 11570 | epoch avg. loss: 0.078 | test avg. loss: 4.097


 23%|██▎       | 11574/50000 [17:16<55:23, 11.56it/s]

Epochs: 11571 | epoch avg. loss: 0.094 | test avg. loss: 4.250
Epochs: 11572 | epoch avg. loss: 0.103 | test avg. loss: 4.000
Epochs: 11573 | epoch avg. loss: 0.118 | test avg. loss: 4.572


 23%|██▎       | 11576/50000 [17:17<53:47, 11.90it/s]

Epochs: 11574 | epoch avg. loss: 0.176 | test avg. loss: 4.110
Epochs: 11575 | epoch avg. loss: 0.294 | test avg. loss: 4.383
Epochs: 11576 | epoch avg. loss: 0.113 | test avg. loss: 4.056


 23%|██▎       | 11580/50000 [17:17<52:26, 12.21it/s]

Epochs: 11577 | epoch avg. loss: 0.107 | test avg. loss: 4.233
Epochs: 11578 | epoch avg. loss: 0.086 | test avg. loss: 4.192
Epochs: 11579 | epoch avg. loss: 0.080 | test avg. loss: 4.127


 23%|██▎       | 11582/50000 [17:17<51:59, 12.31it/s]

Epochs: 11580 | epoch avg. loss: 0.078 | test avg. loss: 4.139
Epochs: 11581 | epoch avg. loss: 0.061 | test avg. loss: 4.213
Epochs: 11582 | epoch avg. loss: 0.071 | test avg. loss: 4.206


 23%|██▎       | 11586/50000 [17:17<52:05, 12.29it/s]

Epochs: 11583 | epoch avg. loss: 0.063 | test avg. loss: 4.111
Epochs: 11584 | epoch avg. loss: 0.068 | test avg. loss: 4.340
Epochs: 11585 | epoch avg. loss: 0.113 | test avg. loss: 4.082


 23%|██▎       | 11588/50000 [17:18<53:44, 11.91it/s]

Epochs: 11586 | epoch avg. loss: 0.127 | test avg. loss: 4.048
Epochs: 11587 | epoch avg. loss: 0.141 | test avg. loss: 4.626
Epochs: 11588 | epoch avg. loss: 0.166 | test avg. loss: 4.195


 23%|██▎       | 11592/50000 [17:18<51:27, 12.44it/s]

Epochs: 11589 | epoch avg. loss: 0.320 | test avg. loss: 4.578
Epochs: 11590 | epoch avg. loss: 0.286 | test avg. loss: 4.221
Epochs: 11591 | epoch avg. loss: 0.068 | test avg. loss: 4.209


 23%|██▎       | 11594/50000 [17:18<51:05, 12.53it/s]

Epochs: 11592 | epoch avg. loss: 0.115 | test avg. loss: 4.152
Epochs: 11593 | epoch avg. loss: 0.115 | test avg. loss: 3.988
Epochs: 11594 | epoch avg. loss: 0.165 | test avg. loss: 4.475


 23%|██▎       | 11598/50000 [17:18<51:33, 12.41it/s]

Epochs: 11595 | epoch avg. loss: 0.209 | test avg. loss: 4.039
Epochs: 11596 | epoch avg. loss: 0.241 | test avg. loss: 4.273
Epochs: 11597 | epoch avg. loss: 0.196 | test avg. loss: 4.245


 23%|██▎       | 11598/50000 [17:19<51:33, 12.41it/s]

Epochs: 11598 | epoch avg. loss: 0.128 | test avg. loss: 3.903
Epochs: 11599 | epoch avg. loss: 0.136 | test avg. loss: 4.441


                                                       

Epochs: 11600 | epoch avg. loss: 0.208 | test avg. loss: 4.267
Epochs: 11601 | epoch avg. loss: 0.483 | test avg. loss: 5.570
Epochs: 11602 | epoch avg. loss: 0.580 | test avg. loss: 4.433


 23%|██▎       | 11606/50000 [17:20<1:33:47,  6.82it/s]

Epochs: 11603 | epoch avg. loss: 0.476 | test avg. loss: 4.520
Epochs: 11604 | epoch avg. loss: 0.430 | test avg. loss: 4.229
Epochs: 11605 | epoch avg. loss: 0.312 | test avg. loss: 4.021


 23%|██▎       | 11608/50000 [17:21<1:21:40,  7.83it/s]

Epochs: 11606 | epoch avg. loss: 0.384 | test avg. loss: 4.938
Epochs: 11607 | epoch avg. loss: 0.436 | test avg. loss: 4.195
Epochs: 11608 | epoch avg. loss: 0.430 | test avg. loss: 4.755


 23%|██▎       | 11612/50000 [17:21<1:05:42,  9.74it/s]

Epochs: 11609 | epoch avg. loss: 0.416 | test avg. loss: 4.554
Epochs: 11610 | epoch avg. loss: 0.374 | test avg. loss: 4.637
Epochs: 11611 | epoch avg. loss: 0.247 | test avg. loss: 4.719


 23%|██▎       | 11614/50000 [17:21<59:37, 10.73it/s]

Epochs: 11612 | epoch avg. loss: 0.193 | test avg. loss: 3.954
Epochs: 11613 | epoch avg. loss: 0.163 | test avg. loss: 4.296
Epochs: 11614 | epoch avg. loss: 0.256 | test avg. loss: 3.614


 23%|██▎       | 11618/50000 [17:21<53:51, 11.88it/s]

Epochs: 11615 | epoch avg. loss: 0.289 | test avg. loss: 4.020
Epochs: 11616 | epoch avg. loss: 0.118 | test avg. loss: 4.123
Epochs: 11617 | epoch avg. loss: 0.085 | test avg. loss: 4.322


 23%|██▎       | 11620/50000 [17:22<52:04, 12.29it/s]

Epochs: 11618 | epoch avg. loss: 0.089 | test avg. loss: 4.119
Epochs: 11619 | epoch avg. loss: 0.201 | test avg. loss: 3.902
Epochs: 11620 | epoch avg. loss: 0.086 | test avg. loss: 3.847


 23%|██▎       | 11624/50000 [17:22<50:09, 12.75it/s]

Epochs: 11621 | epoch avg. loss: 0.099 | test avg. loss: 3.689
Epochs: 11622 | epoch avg. loss: 0.101 | test avg. loss: 4.305
Epochs: 11623 | epoch avg. loss: 0.177 | test avg. loss: 4.184


 23%|██▎       | 11628/50000 [17:22<45:14, 14.13it/s]

Epochs: 11624 | epoch avg. loss: 0.056 | test avg. loss: 4.688
Epochs: 11625 | epoch avg. loss: 0.133 | test avg. loss: 4.324
Epochs: 11626 | epoch avg. loss: 0.260 | test avg. loss: 4.619
Epochs: 11627 | epoch avg. loss: 0.252 | test avg. loss: 4.241


                                                     

Epochs: 11628 | epoch avg. loss: 0.085 | test avg. loss: 4.342
Epochs: 11629 | epoch avg. loss: 0.209 | test avg. loss: 4.540
Epochs: 11630 | epoch avg. loss: 0.238 | test avg. loss: 4.359


 23%|██▎       | 11634/50000 [17:22<47:17, 13.52it/s]

Epochs: 11631 | epoch avg. loss: 0.634 | test avg. loss: 5.021
Epochs: 11632 | epoch avg. loss: 0.721 | test avg. loss: 4.340
Epochs: 11633 | epoch avg. loss: 0.902 | test avg. loss: 4.273


 23%|██▎       | 11636/50000 [17:23<47:47, 13.38it/s]

Epochs: 11634 | epoch avg. loss: 0.619 | test avg. loss: 5.716
Epochs: 11635 | epoch avg. loss: 0.885 | test avg. loss: 5.116
Epochs: 11636 | epoch avg. loss: 1.107 | test avg. loss: 5.405


 23%|██▎       | 11640/50000 [17:23<45:55, 13.92it/s]

Epochs: 11637 | epoch avg. loss: 1.067 | test avg. loss: 4.452
Epochs: 11638 | epoch avg. loss: 1.821 | test avg. loss: 4.663
Epochs: 11639 | epoch avg. loss: 1.150 | test avg. loss: 4.086
Epochs: 11640 | epoch avg. loss: 0.568 | test avg. loss: 4.413


 23%|██▎       | 11644/50000 [17:23<43:52, 14.57it/s]

Epochs: 11641 | epoch avg. loss: 0.959 | test avg. loss: 4.264
Epochs: 11642 | epoch avg. loss: 0.932 | test avg. loss: 4.625
Epochs: 11643 | epoch avg. loss: 0.595 | test avg. loss: 4.988


 23%|██▎       | 11646/50000 [17:23<44:42, 14.30it/s]

Epochs: 11644 | epoch avg. loss: 0.333 | test avg. loss: 4.262
Epochs: 11645 | epoch avg. loss: 0.444 | test avg. loss: 4.790
Epochs: 11646 | epoch avg. loss: 0.781 | test avg. loss: 3.486


 23%|██▎       | 11650/50000 [17:24<45:18, 14.11it/s]

Epochs: 11647 | epoch avg. loss: 0.184 | test avg. loss: 4.314
Epochs: 11648 | epoch avg. loss: 0.438 | test avg. loss: 3.766
Epochs: 11649 | epoch avg. loss: 0.465 | test avg. loss: 4.205


                                                     

Epochs: 11650 | epoch avg. loss: 0.485 | test avg. loss: 3.653
Epochs: 11651 | epoch avg. loss: 0.232 | test avg. loss: 3.613
Epochs: 11652 | epoch avg. loss: 0.171 | test avg. loss: 3.664


 23%|██▎       | 11656/50000 [17:24<42:27, 15.05it/s]

Epochs: 11653 | epoch avg. loss: 0.190 | test avg. loss: 3.993
Epochs: 11654 | epoch avg. loss: 0.110 | test avg. loss: 4.335
Epochs: 11655 | epoch avg. loss: 0.109 | test avg. loss: 4.377
Epochs: 11656 | epoch avg. loss: 0.110 | test avg. loss: 4.477


 23%|██▎       | 11660/50000 [17:24<42:20, 15.09it/s]

Epochs: 11657 | epoch avg. loss: 0.118 | test avg. loss: 4.127
Epochs: 11658 | epoch avg. loss: 0.072 | test avg. loss: 4.085
Epochs: 11659 | epoch avg. loss: 0.088 | test avg. loss: 3.986
Epochs: 11660 | epoch avg. loss: 0.074 | test avg. loss: 4.051


 23%|██▎       | 11664/50000 [17:25<44:38, 14.31it/s]

Epochs: 11661 | epoch avg. loss: 0.066 | test avg. loss: 4.168
Epochs: 11662 | epoch avg. loss: 0.085 | test avg. loss: 4.288
Epochs: 11663 | epoch avg. loss: 0.088 | test avg. loss: 4.481


 23%|██▎       | 11666/50000 [17:25<43:53, 14.56it/s]

Epochs: 11664 | epoch avg. loss: 0.086 | test avg. loss: 4.162
Epochs: 11665 | epoch avg. loss: 0.081 | test avg. loss: 4.089
Epochs: 11666 | epoch avg. loss: 0.076 | test avg. loss: 4.048


 23%|██▎       | 11670/50000 [17:25<45:37, 14.00it/s]

Epochs: 11667 | epoch avg. loss: 0.080 | test avg. loss: 3.966
Epochs: 11668 | epoch avg. loss: 0.115 | test avg. loss: 4.562
Epochs: 11669 | epoch avg. loss: 0.357 | test avg. loss: 4.048


 23%|██▎       | 11672/50000 [17:25<45:22, 14.08it/s]

Epochs: 11670 | epoch avg. loss: 0.136 | test avg. loss: 3.991
Epochs: 11671 | epoch avg. loss: 0.139 | test avg. loss: 4.183
Epochs: 11672 | epoch avg. loss: 0.087 | test avg. loss: 3.998


 23%|██▎       | 11676/50000 [17:25<46:27, 13.75it/s]

Epochs: 11673 | epoch avg. loss: 0.125 | test avg. loss: 4.221
Epochs: 11674 | epoch avg. loss: 0.108 | test avg. loss: 3.947
Epochs: 11675 | epoch avg. loss: 0.122 | test avg. loss: 3.934


 23%|██▎       | 11678/50000 [17:26<49:49, 12.82it/s]

Epochs: 11676 | epoch avg. loss: 0.080 | test avg. loss: 4.030
Epochs: 11677 | epoch avg. loss: 0.078 | test avg. loss: 3.875
Epochs: 11678 | epoch avg. loss: 0.101 | test avg. loss: 4.138


 23%|██▎       | 11682/50000 [17:26<50:10, 12.73it/s]

Epochs: 11679 | epoch avg. loss: 0.097 | test avg. loss: 3.993
Epochs: 11680 | epoch avg. loss: 0.085 | test avg. loss: 4.014
Epochs: 11681 | epoch avg. loss: 0.074 | test avg. loss: 4.008


 23%|██▎       | 11686/50000 [17:26<46:10, 13.83it/s]

Epochs: 11682 | epoch avg. loss: 0.100 | test avg. loss: 4.017
Epochs: 11683 | epoch avg. loss: 0.064 | test avg. loss: 4.126
Epochs: 11684 | epoch avg. loss: 0.072 | test avg. loss: 3.989
Epochs: 11685 | epoch avg. loss: 0.079 | test avg. loss: 4.341


 23%|██▎       | 11688/50000 [17:26<44:14, 14.43it/s]

Epochs: 11686 | epoch avg. loss: 0.163 | test avg. loss: 4.099
Epochs: 11687 | epoch avg. loss: 0.087 | test avg. loss: 4.050
Epochs: 11688 | epoch avg. loss: 0.095 | test avg. loss: 4.365


 23%|██▎       | 11692/50000 [17:27<45:20, 14.08it/s]

Epochs: 11689 | epoch avg. loss: 0.155 | test avg. loss: 4.011
Epochs: 11690 | epoch avg. loss: 0.257 | test avg. loss: 4.221
Epochs: 11691 | epoch avg. loss: 0.126 | test avg. loss: 4.160


 23%|██▎       | 11694/50000 [17:27<44:42, 14.28it/s]

Epochs: 11692 | epoch avg. loss: 0.161 | test avg. loss: 4.172
Epochs: 11693 | epoch avg. loss: 0.123 | test avg. loss: 4.460
Epochs: 11694 | epoch avg. loss: 0.129 | test avg. loss: 4.087


 23%|██▎       | 11698/50000 [17:27<45:13, 14.12it/s]

Epochs: 11695 | epoch avg. loss: 0.319 | test avg. loss: 4.327
Epochs: 11696 | epoch avg. loss: 0.282 | test avg. loss: 4.046
Epochs: 11697 | epoch avg. loss: 0.187 | test avg. loss: 4.078


 23%|██▎       | 11698/50000 [17:27<45:13, 14.12it/s]

Epochs: 11698 | epoch avg. loss: 0.154 | test avg. loss: 4.602
Epochs: 11699 | epoch avg. loss: 0.204 | test avg. loss: 4.001


 23%|██▎       | 11704/50000 [17:29<1:50:09,  5.79it/s]

Epochs: 11700 | epoch avg. loss: 0.212 | test avg. loss: 4.674
Epochs: 11701 | epoch avg. loss: 0.316 | test avg. loss: 4.038
Epochs: 11702 | epoch avg. loss: 0.535 | test avg. loss: 4.101
Epochs: 11703 | epoch avg. loss: 0.167 | test avg. loss: 4.428


 23%|██▎       | 11706/50000 [17:29<1:28:53,  7.18it/s]

Epochs: 11704 | epoch avg. loss: 0.121 | test avg. loss: 4.235
Epochs: 11705 | epoch avg. loss: 0.097 | test avg. loss: 4.495
Epochs: 11706 | epoch avg. loss: 0.144 | test avg. loss: 4.036
Epochs: 11707 | epoch avg. loss: 0.154 | test avg. loss: 4.300


 23%|██▎       | 11710/50000 [17:29<1:06:08,  9.65it/s]

Epochs: 11708 | epoch avg. loss: 0.168 | test avg. loss: 4.025
Epochs: 11709 | epoch avg. loss: 0.083 | test avg. loss: 4.369
Epochs: 11710 | epoch avg. loss: 0.169 | test avg. loss: 4.303


 23%|██▎       | 11714/50000 [17:30<59:09, 10.79it/s]  

Epochs: 11711 | epoch avg. loss: 0.084 | test avg. loss: 4.160
Epochs: 11712 | epoch avg. loss: 0.071 | test avg. loss: 4.127
Epochs: 11713 | epoch avg. loss: 0.070 | test avg. loss: 4.023


 23%|██▎       | 11716/50000 [17:30<55:56, 11.41it/s]

Epochs: 11714 | epoch avg. loss: 0.062 | test avg. loss: 4.083
Epochs: 11715 | epoch avg. loss: 0.063 | test avg. loss: 4.193
Epochs: 11716 | epoch avg. loss: 0.061 | test avg. loss: 4.319


 23%|██▎       | 11720/50000 [17:30<56:47, 11.23it/s]

Epochs: 11717 | epoch avg. loss: 0.067 | test avg. loss: 4.147
Epochs: 11718 | epoch avg. loss: 0.078 | test avg. loss: 4.087
Epochs: 11719 | epoch avg. loss: 0.062 | test avg. loss: 3.992


 23%|██▎       | 11722/50000 [17:30<53:41, 11.88it/s]

Epochs: 11720 | epoch avg. loss: 0.058 | test avg. loss: 3.929
Epochs: 11721 | epoch avg. loss: 0.058 | test avg. loss: 3.953
Epochs: 11722 | epoch avg. loss: 0.068 | test avg. loss: 3.976
Epochs: 11723 | epoch avg. loss: 0.081 | test avg. loss: 4.299


 23%|██▎       | 11726/50000 [17:31<53:38, 11.89it/s]

Epochs: 11724 | epoch avg. loss: 0.090 | test avg. loss: 4.033
Epochs: 11725 | epoch avg. loss: 0.109 | test avg. loss: 4.121
Epochs: 11726 | epoch avg. loss: 0.068 | test avg. loss: 3.984


 23%|██▎       | 11730/50000 [17:31<49:48, 12.81it/s]

Epochs: 11727 | epoch avg. loss: 0.051 | test avg. loss: 4.110
Epochs: 11728 | epoch avg. loss: 0.108 | test avg. loss: 3.994
Epochs: 11729 | epoch avg. loss: 0.133 | test avg. loss: 4.036


 23%|██▎       | 11732/50000 [17:31<48:38, 13.11it/s]

Epochs: 11730 | epoch avg. loss: 0.140 | test avg. loss: 4.402
Epochs: 11731 | epoch avg. loss: 0.154 | test avg. loss: 4.125
Epochs: 11732 | epoch avg. loss: 0.096 | test avg. loss: 4.225


 23%|██▎       | 11736/50000 [17:31<48:29, 13.15it/s]

Epochs: 11733 | epoch avg. loss: 0.143 | test avg. loss: 4.102
Epochs: 11734 | epoch avg. loss: 0.097 | test avg. loss: 3.848
Epochs: 11735 | epoch avg. loss: 0.116 | test avg. loss: 4.272


 23%|██▎       | 11738/50000 [17:32<51:58, 12.27it/s]

Epochs: 11736 | epoch avg. loss: 0.202 | test avg. loss: 3.965
Epochs: 11737 | epoch avg. loss: 0.216 | test avg. loss: 4.016
Epochs: 11738 | epoch avg. loss: 0.193 | test avg. loss: 4.699


 23%|██▎       | 11742/50000 [17:32<50:55, 12.52it/s]

Epochs: 11739 | epoch avg. loss: 0.307 | test avg. loss: 3.947
Epochs: 11740 | epoch avg. loss: 0.185 | test avg. loss: 4.348
Epochs: 11741 | epoch avg. loss: 0.164 | test avg. loss: 3.992


 23%|██▎       | 11744/50000 [17:32<51:43, 12.33it/s]

Epochs: 11742 | epoch avg. loss: 0.311 | test avg. loss: 4.456
Epochs: 11743 | epoch avg. loss: 0.233 | test avg. loss: 4.126
Epochs: 11744 | epoch avg. loss: 0.249 | test avg. loss: 4.059


 23%|██▎       | 11748/50000 [17:32<51:40, 12.34it/s]

Epochs: 11745 | epoch avg. loss: 0.171 | test avg. loss: 4.443
Epochs: 11746 | epoch avg. loss: 0.172 | test avg. loss: 4.102
Epochs: 11747 | epoch avg. loss: 0.326 | test avg. loss: 4.531


 24%|██▎       | 11750/50000 [17:33<52:58, 12.03it/s]

Epochs: 11748 | epoch avg. loss: 0.244 | test avg. loss: 4.123
Epochs: 11749 | epoch avg. loss: 0.235 | test avg. loss: 4.168
Epochs: 11750 | epoch avg. loss: 0.171 | test avg. loss: 4.627


 24%|██▎       | 11754/50000 [17:33<51:18, 12.43it/s]

Epochs: 11751 | epoch avg. loss: 0.262 | test avg. loss: 3.938
Epochs: 11752 | epoch avg. loss: 0.196 | test avg. loss: 4.150
Epochs: 11753 | epoch avg. loss: 0.300 | test avg. loss: 3.911


 24%|██▎       | 11756/50000 [17:33<50:11, 12.70it/s]

Epochs: 11754 | epoch avg. loss: 0.201 | test avg. loss: 4.008
Epochs: 11755 | epoch avg. loss: 0.340 | test avg. loss: 4.510
Epochs: 11756 | epoch avg. loss: 0.143 | test avg. loss: 4.198


 24%|██▎       | 11758/50000 [17:33<52:54, 12.05it/s]

Epochs: 11757 | epoch avg. loss: 0.195 | test avg. loss: 4.297
Epochs: 11758 | epoch avg. loss: 0.147 | test avg. loss: 4.261
Epochs: 11759 | epoch avg. loss: 0.113 | test avg. loss: 3.931


 24%|██▎       | 11762/50000 [17:34<1:00:34, 10.52it/s]

Epochs: 11760 | epoch avg. loss: 0.148 | test avg. loss: 4.238
Epochs: 11761 | epoch avg. loss: 0.185 | test avg. loss: 3.883
Epochs: 11762 | epoch avg. loss: 0.211 | test avg. loss: 4.128


 24%|██▎       | 11766/50000 [17:34<59:35, 10.69it/s]

Epochs: 11763 | epoch avg. loss: 0.322 | test avg. loss: 4.666
Epochs: 11764 | epoch avg. loss: 0.229 | test avg. loss: 4.286
Epochs: 11765 | epoch avg. loss: 0.112 | test avg. loss: 4.336


 24%|██▎       | 11768/50000 [17:34<59:57, 10.63it/s]

Epochs: 11766 | epoch avg. loss: 0.073 | test avg. loss: 4.000
Epochs: 11767 | epoch avg. loss: 0.130 | test avg. loss: 3.988
Epochs: 11768 | epoch avg. loss: 0.094 | test avg. loss: 4.305


 24%|██▎       | 11772/50000 [17:34<59:56, 10.63it/s]

Epochs: 11769 | epoch avg. loss: 0.102 | test avg. loss: 4.173
Epochs: 11770 | epoch avg. loss: 0.066 | test avg. loss: 4.492
Epochs: 11771 | epoch avg. loss: 0.157 | test avg. loss: 4.182


 24%|██▎       | 11774/50000 [17:35<59:42, 10.67it/s]

Epochs: 11772 | epoch avg. loss: 0.064 | test avg. loss: 4.017
Epochs: 11773 | epoch avg. loss: 0.061 | test avg. loss: 3.998
Epochs: 11774 | epoch avg. loss: 0.067 | test avg. loss: 3.981


 24%|██▎       | 11778/50000 [17:35<55:41, 11.44it/s]

Epochs: 11775 | epoch avg. loss: 0.098 | test avg. loss: 4.119
Epochs: 11776 | epoch avg. loss: 0.057 | test avg. loss: 4.176
Epochs: 11777 | epoch avg. loss: 0.069 | test avg. loss: 3.984


 24%|██▎       | 11780/50000 [17:35<55:37, 11.45it/s]

Epochs: 11778 | epoch avg. loss: 0.086 | test avg. loss: 4.245
Epochs: 11779 | epoch avg. loss: 0.162 | test avg. loss: 3.816
Epochs: 11780 | epoch avg. loss: 0.250 | test avg. loss: 4.063


                                                     

Epochs: 11781 | epoch avg. loss: 0.235 | test avg. loss: 4.809
Epochs: 11782 | epoch avg. loss: 0.298 | test avg. loss: 4.268


 24%|██▎       | 11786/50000 [17:36<55:37, 11.45it/s]

Epochs: 11783 | epoch avg. loss: 0.315 | test avg. loss: 4.357
Epochs: 11784 | epoch avg. loss: 0.217 | test avg. loss: 4.049
Epochs: 11785 | epoch avg. loss: 0.194 | test avg. loss: 3.841


 24%|██▎       | 11788/50000 [17:36<54:23, 11.71it/s]

Epochs: 11786 | epoch avg. loss: 0.317 | test avg. loss: 4.530
Epochs: 11787 | epoch avg. loss: 0.291 | test avg. loss: 3.998
Epochs: 11788 | epoch avg. loss: 0.185 | test avg. loss: 4.398


 24%|██▎       | 11792/50000 [17:36<49:11, 12.95it/s]

Epochs: 11789 | epoch avg. loss: 0.228 | test avg. loss: 4.154
Epochs: 11790 | epoch avg. loss: 0.206 | test avg. loss: 4.138
Epochs: 11791 | epoch avg. loss: 0.260 | test avg. loss: 4.858


 24%|██▎       | 11796/50000 [17:36<45:26, 14.01it/s]

Epochs: 11792 | epoch avg. loss: 0.364 | test avg. loss: 3.981
Epochs: 11793 | epoch avg. loss: 0.064 | test avg. loss: 3.891
Epochs: 11794 | epoch avg. loss: 0.067 | test avg. loss: 3.832
Epochs: 11795 | epoch avg. loss: 0.146 | test avg. loss: 3.901


 24%|██▎       | 11798/50000 [17:37<52:04, 12.23it/s]

Epochs: 11796 | epoch avg. loss: 0.089 | test avg. loss: 4.268
Epochs: 11797 | epoch avg. loss: 0.105 | test avg. loss: 4.065


 24%|██▎       | 11798/50000 [17:37<52:04, 12.23it/s]

Epochs: 11798 | epoch avg. loss: 0.318 | test avg. loss: 4.019
Epochs: 11799 | epoch avg. loss: 0.113 | test avg. loss: 4.059


 24%|██▎       | 11802/50000 [17:39<2:34:04,  4.13it/s]

Epochs: 11800 | epoch avg. loss: 0.122 | test avg. loss: 4.067
Epochs: 11801 | epoch avg. loss: 0.236 | test avg. loss: 4.617
Epochs: 11802 | epoch avg. loss: 0.292 | test avg. loss: 4.350


 24%|██▎       | 11806/50000 [17:39<1:40:20,  6.34it/s]

Epochs: 11803 | epoch avg. loss: 0.095 | test avg. loss: 4.076
Epochs: 11804 | epoch avg. loss: 0.106 | test avg. loss: 4.535
Epochs: 11805 | epoch avg. loss: 0.243 | test avg. loss: 3.845


 24%|██▎       | 11808/50000 [17:39<1:24:31,  7.53it/s]

Epochs: 11806 | epoch avg. loss: 0.218 | test avg. loss: 4.165
Epochs: 11807 | epoch avg. loss: 0.098 | test avg. loss: 4.177
Epochs: 11808 | epoch avg. loss: 0.127 | test avg. loss: 4.273


 24%|██▎       | 11812/50000 [17:39<1:08:13,  9.33it/s]

Epochs: 11809 | epoch avg. loss: 0.116 | test avg. loss: 4.694
Epochs: 11810 | epoch avg. loss: 0.182 | test avg. loss: 4.232
Epochs: 11811 | epoch avg. loss: 0.307 | test avg. loss: 4.126


 24%|██▎       | 11814/50000 [17:39<1:04:39,  9.84it/s]

Epochs: 11812 | epoch avg. loss: 0.117 | test avg. loss: 4.331
Epochs: 11813 | epoch avg. loss: 0.160 | test avg. loss: 3.871
Epochs: 11814 | epoch avg. loss: 0.164 | test avg. loss: 4.175


 24%|██▎       | 11818/50000 [17:40<1:00:21, 10.54it/s]

Epochs: 11815 | epoch avg. loss: 0.156 | test avg. loss: 4.187
Epochs: 11816 | epoch avg. loss: 0.078 | test avg. loss: 4.129
Epochs: 11817 | epoch avg. loss: 0.080 | test avg. loss: 4.394


 24%|██▎       | 11820/50000 [17:40<57:16, 11.11it/s]

Epochs: 11818 | epoch avg. loss: 0.141 | test avg. loss: 4.103
Epochs: 11819 | epoch avg. loss: 0.186 | test avg. loss: 4.153
Epochs: 11820 | epoch avg. loss: 0.114 | test avg. loss: 4.349


 24%|██▎       | 11824/50000 [17:40<54:36, 11.65it/s]

Epochs: 11821 | epoch avg. loss: 0.112 | test avg. loss: 4.101
Epochs: 11822 | epoch avg. loss: 0.123 | test avg. loss: 4.102
Epochs: 11823 | epoch avg. loss: 0.071 | test avg. loss: 4.126


 24%|██▎       | 11826/50000 [17:41<56:23, 11.28it/s]

Epochs: 11824 | epoch avg. loss: 0.056 | test avg. loss: 4.150
Epochs: 11825 | epoch avg. loss: 0.056 | test avg. loss: 4.138
Epochs: 11826 | epoch avg. loss: 0.052 | test avg. loss: 4.245


 24%|██▎       | 11830/50000 [17:41<50:12, 12.67it/s]

Epochs: 11827 | epoch avg. loss: 0.056 | test avg. loss: 4.186
Epochs: 11828 | epoch avg. loss: 0.070 | test avg. loss: 4.382
Epochs: 11829 | epoch avg. loss: 0.055 | test avg. loss: 4.231
Epochs: 11830 | epoch avg. loss: 0.168 | test avg. loss: 4.283


 24%|██▎       | 11834/50000 [17:41<45:27, 13.99it/s]

Epochs: 11831 | epoch avg. loss: 0.088 | test avg. loss: 4.228
Epochs: 11832 | epoch avg. loss: 0.093 | test avg. loss: 3.943
Epochs: 11833 | epoch avg. loss: 0.179 | test avg. loss: 4.214
Epochs: 11834 | epoch avg. loss: 0.113 | test avg. loss: 4.176


 24%|██▎       | 11838/50000 [17:41<45:28, 13.99it/s]

Epochs: 11835 | epoch avg. loss: 0.065 | test avg. loss: 4.513
Epochs: 11836 | epoch avg. loss: 0.096 | test avg. loss: 4.474
Epochs: 11837 | epoch avg. loss: 0.153 | test avg. loss: 4.390


 24%|██▎       | 11840/50000 [17:41<46:46, 13.60it/s]

Epochs: 11838 | epoch avg. loss: 0.111 | test avg. loss: 4.475
Epochs: 11839 | epoch avg. loss: 0.129 | test avg. loss: 4.019
Epochs: 11840 | epoch avg. loss: 0.126 | test avg. loss: 4.439


 24%|██▎       | 11844/50000 [17:42<50:47, 12.52it/s]

Epochs: 11841 | epoch avg. loss: 0.274 | test avg. loss: 4.257
Epochs: 11842 | epoch avg. loss: 0.218 | test avg. loss: 4.424
Epochs: 11843 | epoch avg. loss: 0.203 | test avg. loss: 4.597


 24%|██▎       | 11846/50000 [17:42<50:42, 12.54it/s]

Epochs: 11844 | epoch avg. loss: 0.229 | test avg. loss: 4.597
Epochs: 11845 | epoch avg. loss: 0.826 | test avg. loss: 4.702
Epochs: 11846 | epoch avg. loss: 0.675 | test avg. loss: 3.989


 24%|██▎       | 11850/50000 [17:42<47:52, 13.28it/s]

Epochs: 11847 | epoch avg. loss: 0.216 | test avg. loss: 4.068
Epochs: 11848 | epoch avg. loss: 0.389 | test avg. loss: 5.795
Epochs: 11849 | epoch avg. loss: 0.819 | test avg. loss: 4.918


 24%|██▎       | 11852/50000 [17:42<49:14, 12.91it/s]

Epochs: 11850 | epoch avg. loss: 1.269 | test avg. loss: 4.948
Epochs: 11851 | epoch avg. loss: 0.888 | test avg. loss: 3.940
Epochs: 11852 | epoch avg. loss: 0.310 | test avg. loss: 3.879


 24%|██▎       | 11856/50000 [17:43<52:27, 12.12it/s]

Epochs: 11853 | epoch avg. loss: 0.211 | test avg. loss: 4.239
Epochs: 11854 | epoch avg. loss: 0.164 | test avg. loss: 3.945
Epochs: 11855 | epoch avg. loss: 0.086 | test avg. loss: 4.121


 24%|██▎       | 11858/50000 [17:43<53:09, 11.96it/s]

Epochs: 11856 | epoch avg. loss: 0.114 | test avg. loss: 4.044
Epochs: 11857 | epoch avg. loss: 0.346 | test avg. loss: 4.139
Epochs: 11858 | epoch avg. loss: 0.227 | test avg. loss: 4.105


 24%|██▎       | 11862/50000 [17:43<55:36, 11.43it/s]

Epochs: 11859 | epoch avg. loss: 0.155 | test avg. loss: 4.107
Epochs: 11860 | epoch avg. loss: 0.196 | test avg. loss: 4.479
Epochs: 11861 | epoch avg. loss: 0.113 | test avg. loss: 4.347


 24%|██▎       | 11864/50000 [17:43<55:16, 11.50it/s]

Epochs: 11862 | epoch avg. loss: 0.222 | test avg. loss: 4.474
Epochs: 11863 | epoch avg. loss: 0.198 | test avg. loss: 4.509
Epochs: 11864 | epoch avg. loss: 0.193 | test avg. loss: 3.956


 24%|██▎       | 11868/50000 [17:44<49:10, 12.92it/s]

Epochs: 11865 | epoch avg. loss: 0.146 | test avg. loss: 3.979
Epochs: 11866 | epoch avg. loss: 0.109 | test avg. loss: 4.273
Epochs: 11867 | epoch avg. loss: 0.107 | test avg. loss: 4.353


 24%|██▎       | 11870/50000 [17:44<48:06, 13.21it/s]

Epochs: 11868 | epoch avg. loss: 0.241 | test avg. loss: 5.040
Epochs: 11869 | epoch avg. loss: 0.420 | test avg. loss: 4.247
Epochs: 11870 | epoch avg. loss: 0.184 | test avg. loss: 4.306


 24%|██▎       | 11874/50000 [17:44<52:04, 12.20it/s]

Epochs: 11871 | epoch avg. loss: 0.214 | test avg. loss: 4.083
Epochs: 11872 | epoch avg. loss: 0.136 | test avg. loss: 3.941
Epochs: 11873 | epoch avg. loss: 0.149 | test avg. loss: 4.628


 24%|██▍       | 11876/50000 [17:44<54:17, 11.70it/s]

Epochs: 11874 | epoch avg. loss: 0.283 | test avg. loss: 4.131
Epochs: 11875 | epoch avg. loss: 0.853 | test avg. loss: 4.105
Epochs: 11876 | epoch avg. loss: 0.322 | test avg. loss: 4.260


 24%|██▍       | 11880/50000 [17:45<57:48, 10.99it/s]

Epochs: 11877 | epoch avg. loss: 0.227 | test avg. loss: 4.219
Epochs: 11878 | epoch avg. loss: 0.321 | test avg. loss: 5.043
Epochs: 11879 | epoch avg. loss: 0.520 | test avg. loss: 4.188


 24%|██▍       | 11882/50000 [17:45<59:45, 10.63it/s]

Epochs: 11880 | epoch avg. loss: 0.297 | test avg. loss: 4.526
Epochs: 11881 | epoch avg. loss: 0.597 | test avg. loss: 4.215
Epochs: 11882 | epoch avg. loss: 0.314 | test avg. loss: 4.206


 24%|██▍       | 11886/50000 [17:45<51:41, 12.29it/s]

Epochs: 11883 | epoch avg. loss: 0.439 | test avg. loss: 5.564
Epochs: 11884 | epoch avg. loss: 0.692 | test avg. loss: 4.304
Epochs: 11885 | epoch avg. loss: 0.572 | test avg. loss: 4.334


 24%|██▍       | 11888/50000 [17:46<51:23, 12.36it/s]

Epochs: 11886 | epoch avg. loss: 0.529 | test avg. loss: 4.210
Epochs: 11887 | epoch avg. loss: 0.263 | test avg. loss: 3.983
Epochs: 11888 | epoch avg. loss: 0.322 | test avg. loss: 5.575


 24%|██▍       | 11892/50000 [17:46<50:52, 12.48it/s]

Epochs: 11889 | epoch avg. loss: 0.747 | test avg. loss: 4.492
Epochs: 11890 | epoch avg. loss: 0.925 | test avg. loss: 4.597
Epochs: 11891 | epoch avg. loss: 0.702 | test avg. loss: 3.866


 24%|██▍       | 11896/50000 [17:46<46:00, 13.80it/s]

Epochs: 11892 | epoch avg. loss: 0.204 | test avg. loss: 3.869
Epochs: 11893 | epoch avg. loss: 0.237 | test avg. loss: 4.482
Epochs: 11894 | epoch avg. loss: 0.306 | test avg. loss: 4.181
Epochs: 11895 | epoch avg. loss: 0.369 | test avg. loss: 4.396


                                                     

Epochs: 11896 | epoch avg. loss: 0.232 | test avg. loss: 4.086
Epochs: 11897 | epoch avg. loss: 0.117 | test avg. loss: 4.087
Epochs: 11898 | epoch avg. loss: 0.126 | test avg. loss: 4.334


 24%|██▍       | 11898/50000 [17:46<44:46, 14.18it/s]

Epochs: 11899 | epoch avg. loss: 0.074 | test avg. loss: 4.323


 24%|██▍       | 11904/50000 [17:48<1:45:25,  6.02it/s]

Epochs: 11900 | epoch avg. loss: 0.090 | test avg. loss: 4.321
Epochs: 11901 | epoch avg. loss: 0.065 | test avg. loss: 4.171
Epochs: 11902 | epoch avg. loss: 0.075 | test avg. loss: 4.410
Epochs: 11903 | epoch avg. loss: 0.300 | test avg. loss: 4.015


 24%|██▍       | 11908/50000 [17:48<1:12:04,  8.81it/s]

Epochs: 11904 | epoch avg. loss: 0.094 | test avg. loss: 3.942
Epochs: 11905 | epoch avg. loss: 0.138 | test avg. loss: 4.251
Epochs: 11906 | epoch avg. loss: 0.164 | test avg. loss: 4.000
Epochs: 11907 | epoch avg. loss: 0.187 | test avg. loss: 4.041


 24%|██▍       | 11910/50000 [17:48<1:03:06, 10.06it/s]

Epochs: 11908 | epoch avg. loss: 0.124 | test avg. loss: 4.399
Epochs: 11909 | epoch avg. loss: 0.140 | test avg. loss: 4.172
Epochs: 11910 | epoch avg. loss: 0.066 | test avg. loss: 4.106


 24%|██▍       | 11914/50000 [17:49<56:46, 11.18it/s]

Epochs: 11911 | epoch avg. loss: 0.063 | test avg. loss: 4.030
Epochs: 11912 | epoch avg. loss: 0.062 | test avg. loss: 4.196
Epochs: 11913 | epoch avg. loss: 0.076 | test avg. loss: 4.104


                                                     

Epochs: 11914 | epoch avg. loss: 0.083 | test avg. loss: 4.224
Epochs: 11915 | epoch avg. loss: 0.056 | test avg. loss: 4.096
Epochs: 11916 | epoch avg. loss: 0.238 | test avg. loss: 4.249


 24%|██▍       | 11920/50000 [17:49<47:40, 13.31it/s]

Epochs: 11917 | epoch avg. loss: 0.166 | test avg. loss: 4.644
Epochs: 11918 | epoch avg. loss: 0.257 | test avg. loss: 3.939
Epochs: 11919 | epoch avg. loss: 0.275 | test avg. loss: 4.117
Epochs: 11920 | epoch avg. loss: 0.235 | test avg. loss: 4.087


                                                     

Epochs: 11921 | epoch avg. loss: 0.209 | test avg. loss: 4.276
Epochs: 11922 | epoch avg. loss: 0.338 | test avg. loss: 5.054
Epochs: 11923 | epoch avg. loss: 0.563 | test avg. loss: 4.317


 24%|██▍       | 11926/50000 [17:49<45:27, 13.96it/s]

Epochs: 11924 | epoch avg. loss: 0.173 | test avg. loss: 4.171
Epochs: 11925 | epoch avg. loss: 0.124 | test avg. loss: 4.039
Epochs: 11926 | epoch avg. loss: 0.116 | test avg. loss: 3.834


 24%|██▍       | 11930/50000 [17:50<47:04, 13.48it/s]

Epochs: 11927 | epoch avg. loss: 0.217 | test avg. loss: 4.146
Epochs: 11928 | epoch avg. loss: 0.065 | test avg. loss: 4.156
Epochs: 11929 | epoch avg. loss: 0.232 | test avg. loss: 4.511


 24%|██▍       | 11934/50000 [17:50<45:05, 14.07it/s]

Epochs: 11930 | epoch avg. loss: 0.330 | test avg. loss: 4.245
Epochs: 11931 | epoch avg. loss: 0.205 | test avg. loss: 4.252
Epochs: 11932 | epoch avg. loss: 0.394 | test avg. loss: 5.286
Epochs: 11933 | epoch avg. loss: 0.664 | test avg. loss: 4.073


 24%|██▍       | 11936/50000 [17:50<44:10, 14.36it/s]

Epochs: 11934 | epoch avg. loss: 0.183 | test avg. loss: 4.383
Epochs: 11935 | epoch avg. loss: 0.347 | test avg. loss: 4.090
Epochs: 11936 | epoch avg. loss: 0.240 | test avg. loss: 4.221


 24%|██▍       | 11940/50000 [17:50<48:34, 13.06it/s]

Epochs: 11937 | epoch avg. loss: 0.253 | test avg. loss: 4.543
Epochs: 11938 | epoch avg. loss: 0.208 | test avg. loss: 4.099
Epochs: 11939 | epoch avg. loss: 0.118 | test avg. loss: 4.762


 24%|██▍       | 11942/50000 [17:51<51:41, 12.27it/s]

Epochs: 11940 | epoch avg. loss: 0.316 | test avg. loss: 4.121
Epochs: 11941 | epoch avg. loss: 0.552 | test avg. loss: 4.260
Epochs: 11942 | epoch avg. loss: 0.300 | test avg. loss: 4.060


 24%|██▍       | 11946/50000 [17:51<50:35, 12.54it/s]

Epochs: 11943 | epoch avg. loss: 0.141 | test avg. loss: 4.449
Epochs: 11944 | epoch avg. loss: 0.166 | test avg. loss: 4.767
Epochs: 11945 | epoch avg. loss: 0.173 | test avg. loss: 4.346


 24%|██▍       | 11948/50000 [17:51<50:41, 12.51it/s]

Epochs: 11946 | epoch avg. loss: 0.163 | test avg. loss: 4.750
Epochs: 11947 | epoch avg. loss: 0.451 | test avg. loss: 3.689
Epochs: 11948 | epoch avg. loss: 0.322 | test avg. loss: 3.622


 24%|██▍       | 11952/50000 [17:51<54:17, 11.68it/s]

Epochs: 11949 | epoch avg. loss: 0.214 | test avg. loss: 4.504
Epochs: 11950 | epoch avg. loss: 0.393 | test avg. loss: 4.240
Epochs: 11951 | epoch avg. loss: 0.407 | test avg. loss: 4.283


 24%|██▍       | 11954/50000 [17:52<56:06, 11.30it/s]

Epochs: 11952 | epoch avg. loss: 0.172 | test avg. loss: 4.536
Epochs: 11953 | epoch avg. loss: 0.233 | test avg. loss: 4.368
Epochs: 11954 | epoch avg. loss: 0.159 | test avg. loss: 4.039


 24%|██▍       | 11958/50000 [17:52<54:39, 11.60it/s]

Epochs: 11955 | epoch avg. loss: 0.155 | test avg. loss: 4.148
Epochs: 11956 | epoch avg. loss: 0.150 | test avg. loss: 4.766
Epochs: 11957 | epoch avg. loss: 0.398 | test avg. loss: 4.748


 24%|██▍       | 11960/50000 [17:52<53:09, 11.93it/s]

Epochs: 11958 | epoch avg. loss: 0.198 | test avg. loss: 4.487
Epochs: 11959 | epoch avg. loss: 0.319 | test avg. loss: 4.557
Epochs: 11960 | epoch avg. loss: 0.230 | test avg. loss: 3.943


 24%|██▍       | 11964/50000 [17:52<53:31, 11.84it/s]

Epochs: 11961 | epoch avg. loss: 0.163 | test avg. loss: 3.949
Epochs: 11962 | epoch avg. loss: 0.097 | test avg. loss: 3.986
Epochs: 11963 | epoch avg. loss: 0.087 | test avg. loss: 4.278


 24%|██▍       | 11966/50000 [17:53<51:52, 12.22it/s]

Epochs: 11964 | epoch avg. loss: 0.109 | test avg. loss: 4.048
Epochs: 11965 | epoch avg. loss: 0.153 | test avg. loss: 4.089
Epochs: 11966 | epoch avg. loss: 0.084 | test avg. loss: 4.155


 24%|██▍       | 11970/50000 [17:53<51:47, 12.24it/s]

Epochs: 11967 | epoch avg. loss: 0.080 | test avg. loss: 4.096
Epochs: 11968 | epoch avg. loss: 0.078 | test avg. loss: 4.374
Epochs: 11969 | epoch avg. loss: 0.174 | test avg. loss: 4.262


 24%|██▍       | 11972/50000 [17:53<52:32, 12.06it/s]

Epochs: 11970 | epoch avg. loss: 0.070 | test avg. loss: 4.126
Epochs: 11971 | epoch avg. loss: 0.057 | test avg. loss: 4.160
Epochs: 11972 | epoch avg. loss: 0.073 | test avg. loss: 4.221


 24%|██▍       | 11976/50000 [17:53<54:09, 11.70it/s]

Epochs: 11973 | epoch avg. loss: 0.081 | test avg. loss: 4.352
Epochs: 11974 | epoch avg. loss: 0.072 | test avg. loss: 4.233
Epochs: 11975 | epoch avg. loss: 0.076 | test avg. loss: 4.458


 24%|██▍       | 11978/50000 [17:54<53:03, 11.94it/s]

Epochs: 11976 | epoch avg. loss: 0.118 | test avg. loss: 4.374
Epochs: 11977 | epoch avg. loss: 0.106 | test avg. loss: 4.280
Epochs: 11978 | epoch avg. loss: 0.197 | test avg. loss: 4.737


 24%|██▍       | 11982/50000 [17:54<50:41, 12.50it/s]

Epochs: 11979 | epoch avg. loss: 0.291 | test avg. loss: 4.235
Epochs: 11980 | epoch avg. loss: 0.231 | test avg. loss: 4.273
Epochs: 11981 | epoch avg. loss: 0.187 | test avg. loss: 4.251


 24%|██▍       | 11984/50000 [17:54<47:37, 13.30it/s]

Epochs: 11982 | epoch avg. loss: 0.203 | test avg. loss: 4.164
Epochs: 11983 | epoch avg. loss: 0.456 | test avg. loss: 4.853
Epochs: 11984 | epoch avg. loss: 0.327 | test avg. loss: 4.268


 24%|██▍       | 11988/50000 [17:54<45:51, 13.82it/s]

Epochs: 11985 | epoch avg. loss: 0.176 | test avg. loss: 4.482
Epochs: 11986 | epoch avg. loss: 0.252 | test avg. loss: 4.555
Epochs: 11987 | epoch avg. loss: 0.222 | test avg. loss: 3.972


 24%|██▍       | 11990/50000 [17:55<48:36, 13.03it/s]

Epochs: 11988 | epoch avg. loss: 0.143 | test avg. loss: 3.985
Epochs: 11989 | epoch avg. loss: 0.064 | test avg. loss: 4.146
Epochs: 11990 | epoch avg. loss: 0.091 | test avg. loss: 4.262


 24%|██▍       | 11994/50000 [17:55<48:35, 13.04it/s]

Epochs: 11991 | epoch avg. loss: 0.301 | test avg. loss: 4.616
Epochs: 11992 | epoch avg. loss: 0.232 | test avg. loss: 4.175
Epochs: 11993 | epoch avg. loss: 0.134 | test avg. loss: 4.007


 24%|██▍       | 11998/50000 [17:55<43:08, 14.68it/s]

Epochs: 11994 | epoch avg. loss: 0.123 | test avg. loss: 4.353
Epochs: 11995 | epoch avg. loss: 0.190 | test avg. loss: 3.853
Epochs: 11996 | epoch avg. loss: 0.293 | test avg. loss: 3.913
Epochs: 11997 | epoch avg. loss: 0.113 | test avg. loss: 4.452


 24%|██▍       | 11998/50000 [17:55<43:08, 14.68it/s]

Epochs: 11998 | epoch avg. loss: 0.182 | test avg. loss: 4.141
Epochs: 11999 | epoch avg. loss: 0.475 | test avg. loss: 4.126


 24%|██▍       | 12002/50000 [17:57<2:27:23,  4.30it/s]

Epochs: 12000 | epoch avg. loss: 0.087 | test avg. loss: 4.309
Epochs: 12001 | epoch avg. loss: 0.094 | test avg. loss: 3.963
Epochs: 12002 | epoch avg. loss: 0.120 | test avg. loss: 4.059


 24%|██▍       | 12006/50000 [17:57<1:37:05,  6.52it/s]

Epochs: 12003 | epoch avg. loss: 0.125 | test avg. loss: 4.083
Epochs: 12004 | epoch avg. loss: 0.075 | test avg. loss: 4.052
Epochs: 12005 | epoch avg. loss: 0.075 | test avg. loss: 4.311


 24%|██▍       | 12008/50000 [17:57<1:24:17,  7.51it/s]

Epochs: 12006 | epoch avg. loss: 0.139 | test avg. loss: 4.318
Epochs: 12007 | epoch avg. loss: 0.095 | test avg. loss: 4.044
Epochs: 12008 | epoch avg. loss: 0.219 | test avg. loss: 4.263


 24%|██▍       | 12012/50000 [17:58<1:08:23,  9.26it/s]

Epochs: 12009 | epoch avg. loss: 0.268 | test avg. loss: 4.122
Epochs: 12010 | epoch avg. loss: 0.102 | test avg. loss: 4.092
Epochs: 12011 | epoch avg. loss: 0.123 | test avg. loss: 4.866


 24%|██▍       | 12016/50000 [17:58<56:38, 11.18it/s]  

Epochs: 12012 | epoch avg. loss: 0.278 | test avg. loss: 4.344
Epochs: 12013 | epoch avg. loss: 0.488 | test avg. loss: 4.584
Epochs: 12014 | epoch avg. loss: 0.603 | test avg. loss: 3.859
Epochs: 12015 | epoch avg. loss: 0.528 | test avg. loss: 4.959


 24%|██▍       | 12020/50000 [17:58<48:19, 13.10it/s]

Epochs: 12016 | epoch avg. loss: 1.402 | test avg. loss: 7.136
Epochs: 12017 | epoch avg. loss: 1.723 | test avg. loss: 4.413
Epochs: 12018 | epoch avg. loss: 1.375 | test avg. loss: 4.773
Epochs: 12019 | epoch avg. loss: 1.360 | test avg. loss: 3.773


 24%|██▍       | 12022/50000 [17:58<47:18, 13.38it/s]

Epochs: 12020 | epoch avg. loss: 1.205 | test avg. loss: 4.604
Epochs: 12021 | epoch avg. loss: 1.558 | test avg. loss: 5.175
Epochs: 12022 | epoch avg. loss: 0.688 | test avg. loss: 4.460


 24%|██▍       | 12026/50000 [17:59<48:23, 13.08it/s]

Epochs: 12023 | epoch avg. loss: 0.329 | test avg. loss: 4.366
Epochs: 12024 | epoch avg. loss: 0.205 | test avg. loss: 3.970
Epochs: 12025 | epoch avg. loss: 0.527 | test avg. loss: 3.654


 24%|██▍       | 12028/50000 [17:59<48:58, 12.92it/s]

Epochs: 12026 | epoch avg. loss: 0.200 | test avg. loss: 3.883
Epochs: 12027 | epoch avg. loss: 0.196 | test avg. loss: 3.782
Epochs: 12028 | epoch avg. loss: 0.216 | test avg. loss: 4.101


 24%|██▍       | 12032/50000 [17:59<49:12, 12.86it/s]

Epochs: 12029 | epoch avg. loss: 0.159 | test avg. loss: 4.269
Epochs: 12030 | epoch avg. loss: 0.133 | test avg. loss: 4.134
Epochs: 12031 | epoch avg. loss: 0.094 | test avg. loss: 4.281


 24%|██▍       | 12034/50000 [17:59<51:00, 12.40it/s]

Epochs: 12032 | epoch avg. loss: 0.145 | test avg. loss: 3.904
Epochs: 12033 | epoch avg. loss: 0.070 | test avg. loss: 3.942
Epochs: 12034 | epoch avg. loss: 0.117 | test avg. loss: 3.785


 24%|██▍       | 12038/50000 [18:00<48:57, 12.92it/s]

Epochs: 12035 | epoch avg. loss: 0.115 | test avg. loss: 3.924
Epochs: 12036 | epoch avg. loss: 0.081 | test avg. loss: 4.216
Epochs: 12037 | epoch avg. loss: 0.081 | test avg. loss: 4.224


 24%|██▍       | 12042/50000 [18:00<44:11, 14.32it/s]

Epochs: 12038 | epoch avg. loss: 0.063 | test avg. loss: 4.271
Epochs: 12039 | epoch avg. loss: 0.076 | test avg. loss: 4.245
Epochs: 12040 | epoch avg. loss: 0.084 | test avg. loss: 4.182
Epochs: 12041 | epoch avg. loss: 0.069 | test avg. loss: 3.990


 24%|██▍       | 12044/50000 [18:00<45:01, 14.05it/s]

Epochs: 12042 | epoch avg. loss: 0.063 | test avg. loss: 4.182
Epochs: 12043 | epoch avg. loss: 0.088 | test avg. loss: 3.972
Epochs: 12044 | epoch avg. loss: 0.098 | test avg. loss: 4.138


 24%|██▍       | 12048/50000 [18:00<45:28, 13.91it/s]

Epochs: 12045 | epoch avg. loss: 0.107 | test avg. loss: 4.190
Epochs: 12046 | epoch avg. loss: 0.111 | test avg. loss: 4.229
Epochs: 12047 | epoch avg. loss: 0.324 | test avg. loss: 4.591


 24%|██▍       | 12050/50000 [18:01<46:20, 13.65it/s]

Epochs: 12048 | epoch avg. loss: 0.293 | test avg. loss: 3.988
Epochs: 12049 | epoch avg. loss: 0.232 | test avg. loss: 3.915
Epochs: 12050 | epoch avg. loss: 0.187 | test avg. loss: 4.709


 24%|██▍       | 12054/50000 [18:01<46:25, 13.62it/s]

Epochs: 12051 | epoch avg. loss: 0.364 | test avg. loss: 4.034
Epochs: 12052 | epoch avg. loss: 0.140 | test avg. loss: 4.155
Epochs: 12053 | epoch avg. loss: 0.099 | test avg. loss: 4.095


 24%|██▍       | 12058/50000 [18:01<43:50, 14.43it/s]

Epochs: 12054 | epoch avg. loss: 0.082 | test avg. loss: 3.991
Epochs: 12055 | epoch avg. loss: 0.117 | test avg. loss: 4.111
Epochs: 12056 | epoch avg. loss: 0.130 | test avg. loss: 4.096
Epochs: 12057 | epoch avg. loss: 0.134 | test avg. loss: 3.991


 24%|██▍       | 12062/50000 [18:01<42:33, 14.86it/s]

Epochs: 12058 | epoch avg. loss: 0.056 | test avg. loss: 4.019
Epochs: 12059 | epoch avg. loss: 0.054 | test avg. loss: 4.103
Epochs: 12060 | epoch avg. loss: 0.058 | test avg. loss: 4.029
Epochs: 12061 | epoch avg. loss: 0.111 | test avg. loss: 4.143


 24%|██▍       | 12064/50000 [18:02<43:24, 14.56it/s]

Epochs: 12062 | epoch avg. loss: 0.067 | test avg. loss: 4.230
Epochs: 12063 | epoch avg. loss: 0.070 | test avg. loss: 4.046
Epochs: 12064 | epoch avg. loss: 0.057 | test avg. loss: 4.295
Epochs: 12065 | epoch avg. loss: 0.197 | test avg. loss: 3.964


 24%|██▍       | 12068/50000 [18:02<42:42, 14.80it/s]

Epochs: 12066 | epoch avg. loss: 0.053 | test avg. loss: 3.972
Epochs: 12067 | epoch avg. loss: 0.046 | test avg. loss: 3.933
Epochs: 12068 | epoch avg. loss: 0.163 | test avg. loss: 4.118


 24%|██▍       | 12072/50000 [18:02<41:59, 15.05it/s]

Epochs: 12069 | epoch avg. loss: 0.205 | test avg. loss: 4.341
Epochs: 12070 | epoch avg. loss: 0.159 | test avg. loss: 4.010
Epochs: 12071 | epoch avg. loss: 0.111 | test avg. loss: 4.442
Epochs: 12072 | epoch avg. loss: 0.303 | test avg. loss: 3.816




Epochs: 12073 | epoch avg. loss: 0.153 | test avg. loss: 3.757
Epochs: 12074 | epoch avg. loss: 0.223 | test avg. loss: 5.388
Epochs: 12075 | epoch avg. loss: 0.817 | test avg. loss: 4.145


 24%|██▍       | 12078/50000 [18:03<42:19, 14.93it/s]

Epochs: 12076 | epoch avg. loss: 0.775 | test avg. loss: 4.113
Epochs: 12077 | epoch avg. loss: 0.261 | test avg. loss: 4.285
Epochs: 12078 | epoch avg. loss: 0.236 | test avg. loss: 4.380


 24%|██▍       | 12082/50000 [18:03<46:48, 13.50it/s]

Epochs: 12079 | epoch avg. loss: 0.289 | test avg. loss: 5.062
Epochs: 12080 | epoch avg. loss: 0.504 | test avg. loss: 4.196
Epochs: 12081 | epoch avg. loss: 0.292 | test avg. loss: 4.073


 24%|██▍       | 12084/50000 [18:03<47:15, 13.37it/s]

Epochs: 12082 | epoch avg. loss: 0.154 | test avg. loss: 4.321
Epochs: 12083 | epoch avg. loss: 0.150 | test avg. loss: 3.987
Epochs: 12084 | epoch avg. loss: 0.128 | test avg. loss: 4.202


 24%|██▍       | 12088/50000 [18:03<46:34, 13.57it/s]

Epochs: 12085 | epoch avg. loss: 0.147 | test avg. loss: 4.073
Epochs: 12086 | epoch avg. loss: 0.110 | test avg. loss: 4.131
Epochs: 12087 | epoch avg. loss: 0.116 | test avg. loss: 4.536


 24%|██▍       | 12090/50000 [18:03<46:46, 13.51it/s]

Epochs: 12088 | epoch avg. loss: 0.147 | test avg. loss: 4.087
Epochs: 12089 | epoch avg. loss: 0.114 | test avg. loss: 4.562
Epochs: 12090 | epoch avg. loss: 0.527 | test avg. loss: 4.002


 24%|██▍       | 12094/50000 [18:04<45:43, 13.82it/s]

Epochs: 12091 | epoch avg. loss: 0.228 | test avg. loss: 3.983
Epochs: 12092 | epoch avg. loss: 0.234 | test avg. loss: 4.394
Epochs: 12093 | epoch avg. loss: 0.151 | test avg. loss: 4.087
Epochs: 12094 | epoch avg. loss: 0.207 | test avg. loss: 4.439




Epochs: 12095 | epoch avg. loss: 0.168 | test avg. loss: 4.320
Epochs: 12096 | epoch avg. loss: 0.098 | test avg. loss: 4.119
Epochs: 12097 | epoch avg. loss: 0.093 | test avg. loss: 3.992


 24%|██▍       | 12098/50000 [18:04<43:08, 14.64it/s]

Epochs: 12098 | epoch avg. loss: 0.122 | test avg. loss: 4.130
Epochs: 12099 | epoch avg. loss: 0.079 | test avg. loss: 3.953


 24%|██▍       | 12102/50000 [18:06<2:12:47,  4.76it/s]

Epochs: 12100 | epoch avg. loss: 0.113 | test avg. loss: 4.185
Epochs: 12101 | epoch avg. loss: 0.188 | test avg. loss: 3.952
Epochs: 12102 | epoch avg. loss: 0.077 | test avg. loss: 3.865


 24%|██▍       | 12106/50000 [18:06<1:27:36,  7.21it/s]

Epochs: 12103 | epoch avg. loss: 0.094 | test avg. loss: 3.916
Epochs: 12104 | epoch avg. loss: 0.112 | test avg. loss: 3.685
Epochs: 12105 | epoch avg. loss: 0.125 | test avg. loss: 4.263


 24%|██▍       | 12110/50000 [18:06<1:04:14,  9.83it/s]

Epochs: 12106 | epoch avg. loss: 0.229 | test avg. loss: 3.943
Epochs: 12107 | epoch avg. loss: 0.160 | test avg. loss: 3.995
Epochs: 12108 | epoch avg. loss: 0.301 | test avg. loss: 4.203
Epochs: 12109 | epoch avg. loss: 0.167 | test avg. loss: 3.856


 24%|██▍       | 12112/50000 [18:06<59:08, 10.68it/s]

Epochs: 12110 | epoch avg. loss: 0.117 | test avg. loss: 3.777
Epochs: 12111 | epoch avg. loss: 0.137 | test avg. loss: 3.953
Epochs: 12112 | epoch avg. loss: 0.064 | test avg. loss: 3.999


 24%|██▍       | 12116/50000 [18:07<57:34, 10.97it/s]

Epochs: 12113 | epoch avg. loss: 0.055 | test avg. loss: 4.122
Epochs: 12114 | epoch avg. loss: 0.068 | test avg. loss: 4.051
Epochs: 12115 | epoch avg. loss: 0.056 | test avg. loss: 3.952


                                                     

Epochs: 12116 | epoch avg. loss: 0.051 | test avg. loss: 3.882
Epochs: 12117 | epoch avg. loss: 0.061 | test avg. loss: 4.089
Epochs: 12118 | epoch avg. loss: 0.067 | test avg. loss: 3.994


 24%|██▍       | 12122/50000 [18:07<47:57, 13.16it/s]

Epochs: 12119 | epoch avg. loss: 0.119 | test avg. loss: 4.107
Epochs: 12120 | epoch avg. loss: 0.075 | test avg. loss: 4.243
Epochs: 12121 | epoch avg. loss: 0.087 | test avg. loss: 4.034
Epochs: 12122 | epoch avg. loss: 0.072 | test avg. loss: 3.969


 24%|██▍       | 12126/50000 [18:07<46:25, 13.60it/s]

Epochs: 12123 | epoch avg. loss: 0.071 | test avg. loss: 4.171
Epochs: 12124 | epoch avg. loss: 0.104 | test avg. loss: 3.964
Epochs: 12125 | epoch avg. loss: 0.292 | test avg. loss: 3.968


 24%|██▍       | 12128/50000 [18:07<47:48, 13.20it/s]

Epochs: 12126 | epoch avg. loss: 0.125 | test avg. loss: 4.681
Epochs: 12127 | epoch avg. loss: 0.348 | test avg. loss: 3.932
Epochs: 12128 | epoch avg. loss: 0.162 | test avg. loss: 4.150


 24%|██▍       | 12132/50000 [18:08<46:09, 13.67it/s]

Epochs: 12129 | epoch avg. loss: 0.289 | test avg. loss: 4.134
Epochs: 12130 | epoch avg. loss: 0.274 | test avg. loss: 4.767
Epochs: 12131 | epoch avg. loss: 0.956 | test avg. loss: 5.410
Epochs: 12132 | epoch avg. loss: 0.724 | test avg. loss: 4.512


 24%|██▍       | 12136/50000 [18:08<43:49, 14.40it/s]

Epochs: 12133 | epoch avg. loss: 0.910 | test avg. loss: 4.526
Epochs: 12134 | epoch avg. loss: 0.609 | test avg. loss: 6.270
Epochs: 12135 | epoch avg. loss: 1.537 | test avg. loss: 4.314
Epochs: 12136 | epoch avg. loss: 0.956 | test avg. loss: 4.080


 24%|██▍       | 12140/50000 [18:08<43:55, 14.37it/s]

Epochs: 12137 | epoch avg. loss: 0.202 | test avg. loss: 4.344
Epochs: 12138 | epoch avg. loss: 0.389 | test avg. loss: 4.296
Epochs: 12139 | epoch avg. loss: 0.397 | test avg. loss: 5.230


 24%|██▍       | 12142/50000 [18:08<44:18, 14.24it/s]

Epochs: 12140 | epoch avg. loss: 0.734 | test avg. loss: 3.842
Epochs: 12141 | epoch avg. loss: 0.555 | test avg. loss: 4.320
Epochs: 12142 | epoch avg. loss: 0.875 | test avg. loss: 4.059


 24%|██▍       | 12146/50000 [18:09<45:17, 13.93it/s]

Epochs: 12143 | epoch avg. loss: 0.318 | test avg. loss: 4.393
Epochs: 12144 | epoch avg. loss: 0.347 | test avg. loss: 5.564
Epochs: 12145 | epoch avg. loss: 0.607 | test avg. loss: 4.396


 24%|██▍       | 12150/50000 [18:09<42:56, 14.69it/s]

Epochs: 12146 | epoch avg. loss: 0.424 | test avg. loss: 4.070
Epochs: 12147 | epoch avg. loss: 0.178 | test avg. loss: 3.698
Epochs: 12148 | epoch avg. loss: 0.209 | test avg. loss: 3.728
Epochs: 12149 | epoch avg. loss: 0.314 | test avg. loss: 4.660


 24%|██▍       | 12154/50000 [18:09<41:53, 15.06it/s]

Epochs: 12150 | epoch avg. loss: 0.394 | test avg. loss: 4.031
Epochs: 12151 | epoch avg. loss: 0.521 | test avg. loss: 4.020
Epochs: 12152 | epoch avg. loss: 0.240 | test avg. loss: 4.459
Epochs: 12153 | epoch avg. loss: 0.293 | test avg. loss: 4.556


 24%|██▍       | 12156/50000 [18:09<45:08, 13.97it/s]

Epochs: 12154 | epoch avg. loss: 0.667 | test avg. loss: 4.581
Epochs: 12155 | epoch avg. loss: 0.244 | test avg. loss: 4.149
Epochs: 12156 | epoch avg. loss: 0.162 | test avg. loss: 4.065


 24%|██▍       | 12158/50000 [18:10<49:38, 12.70it/s]

Epochs: 12157 | epoch avg. loss: 0.243 | test avg. loss: 5.299
Epochs: 12158 | epoch avg. loss: 0.690 | test avg. loss: 4.303
Epochs: 12159 | epoch avg. loss: 0.625 | test avg. loss: 4.509


 24%|██▍       | 12162/50000 [18:10<52:14, 12.07it/s]

Epochs: 12160 | epoch avg. loss: 0.349 | test avg. loss: 4.359
Epochs: 12161 | epoch avg. loss: 0.178 | test avg. loss: 4.488
Epochs: 12162 | epoch avg. loss: 0.174 | test avg. loss: 4.870


 24%|██▍       | 12166/50000 [18:10<50:20, 12.52it/s]

Epochs: 12163 | epoch avg. loss: 0.253 | test avg. loss: 4.043
Epochs: 12164 | epoch avg. loss: 0.138 | test avg. loss: 3.950
Epochs: 12165 | epoch avg. loss: 0.067 | test avg. loss: 3.832


 24%|██▍       | 12168/50000 [18:10<52:38, 11.98it/s]

Epochs: 12166 | epoch avg. loss: 0.110 | test avg. loss: 3.985
Epochs: 12167 | epoch avg. loss: 0.075 | test avg. loss: 4.185
Epochs: 12168 | epoch avg. loss: 0.071 | test avg. loss: 4.111


 24%|██▍       | 12172/50000 [18:11<48:16, 13.06it/s]

Epochs: 12169 | epoch avg. loss: 0.081 | test avg. loss: 4.263
Epochs: 12170 | epoch avg. loss: 0.083 | test avg. loss: 3.948
Epochs: 12171 | epoch avg. loss: 0.095 | test avg. loss: 3.984
Epochs: 12172 | epoch avg. loss: 0.106 | test avg. loss: 4.162


 24%|██▍       | 12176/50000 [18:11<46:01, 13.70it/s]

Epochs: 12173 | epoch avg. loss: 0.139 | test avg. loss: 3.909
Epochs: 12174 | epoch avg. loss: 0.101 | test avg. loss: 4.012
Epochs: 12175 | epoch avg. loss: 0.068 | test avg. loss: 4.225


 24%|██▍       | 12178/50000 [18:11<45:43, 13.79it/s]

Epochs: 12176 | epoch avg. loss: 0.082 | test avg. loss: 4.052
Epochs: 12177 | epoch avg. loss: 0.056 | test avg. loss: 4.158
Epochs: 12178 | epoch avg. loss: 0.079 | test avg. loss: 3.981


 24%|██▍       | 12182/50000 [18:11<50:44, 12.42it/s]

Epochs: 12179 | epoch avg. loss: 0.129 | test avg. loss: 4.042
Epochs: 12180 | epoch avg. loss: 0.158 | test avg. loss: 4.500
Epochs: 12181 | epoch avg. loss: 0.191 | test avg. loss: 4.085


 24%|██▍       | 12186/50000 [18:12<45:17, 13.92it/s]

Epochs: 12182 | epoch avg. loss: 0.157 | test avg. loss: 4.043
Epochs: 12183 | epoch avg. loss: 0.110 | test avg. loss: 4.332
Epochs: 12184 | epoch avg. loss: 0.177 | test avg. loss: 3.903
Epochs: 12185 | epoch avg. loss: 0.225 | test avg. loss: 4.065


 24%|██▍       | 12190/50000 [18:12<41:53, 15.04it/s]

Epochs: 12186 | epoch avg. loss: 0.121 | test avg. loss: 4.044
Epochs: 12187 | epoch avg. loss: 0.089 | test avg. loss: 4.141
Epochs: 12188 | epoch avg. loss: 0.084 | test avg. loss: 4.312
Epochs: 12189 | epoch avg. loss: 0.074 | test avg. loss: 4.100


 24%|██▍       | 12192/50000 [18:12<41:51, 15.06it/s]

Epochs: 12190 | epoch avg. loss: 0.103 | test avg. loss: 4.802
Epochs: 12191 | epoch avg. loss: 0.340 | test avg. loss: 3.966
Epochs: 12192 | epoch avg. loss: 0.371 | test avg. loss: 3.971


 24%|██▍       | 12196/50000 [18:12<46:37, 13.52it/s]

Epochs: 12193 | epoch avg. loss: 0.146 | test avg. loss: 4.506
Epochs: 12194 | epoch avg. loss: 0.229 | test avg. loss: 3.999
Epochs: 12195 | epoch avg. loss: 0.235 | test avg. loss: 4.061


 24%|██▍       | 12198/50000 [18:13<48:29, 12.99it/s]

Epochs: 12196 | epoch avg. loss: 0.143 | test avg. loss: 4.174
Epochs: 12197 | epoch avg. loss: 0.137 | test avg. loss: 3.917
Epochs: 12198 | epoch avg. loss: 0.172 | test avg. loss: 3.949


 24%|██▍       | 12198/50000 [18:13<48:29, 12.99it/s]

Epochs: 12199 | epoch avg. loss: 0.109 | test avg. loss: 4.506


 24%|██▍       | 12202/50000 [18:14<2:24:22,  4.36it/s]

Epochs: 12200 | epoch avg. loss: 0.225 | test avg. loss: 4.030
Epochs: 12201 | epoch avg. loss: 0.060 | test avg. loss: 4.514
Epochs: 12202 | epoch avg. loss: 0.182 | test avg. loss: 4.015


 24%|██▍       | 12206/50000 [18:15<1:34:30,  6.67it/s]

Epochs: 12203 | epoch avg. loss: 0.210 | test avg. loss: 4.210
Epochs: 12204 | epoch avg. loss: 0.165 | test avg. loss: 4.062
Epochs: 12205 | epoch avg. loss: 0.087 | test avg. loss: 3.993


 24%|██▍       | 12208/50000 [18:15<1:21:51,  7.69it/s]

Epochs: 12206 | epoch avg. loss: 0.075 | test avg. loss: 4.117
Epochs: 12207 | epoch avg. loss: 0.070 | test avg. loss: 4.116
Epochs: 12208 | epoch avg. loss: 0.125 | test avg. loss: 4.955


 24%|██▍       | 12212/50000 [18:15<1:04:01,  9.84it/s]

Epochs: 12209 | epoch avg. loss: 0.566 | test avg. loss: 4.203
Epochs: 12210 | epoch avg. loss: 0.693 | test avg. loss: 4.113
Epochs: 12211 | epoch avg. loss: 0.283 | test avg. loss: 4.170


                                                     

Epochs: 12212 | epoch avg. loss: 0.254 | test avg. loss: 4.078
Epochs: 12213 | epoch avg. loss: 0.283 | test avg. loss: 4.496
Epochs: 12214 | epoch avg. loss: 0.271 | test avg. loss: 3.905


 24%|██▍       | 12218/50000 [18:15<50:31, 12.46it/s]

Epochs: 12215 | epoch avg. loss: 0.084 | test avg. loss: 4.196
Epochs: 12216 | epoch avg. loss: 0.158 | test avg. loss: 3.946
Epochs: 12217 | epoch avg. loss: 0.178 | test avg. loss: 3.962


 24%|██▍       | 12220/50000 [18:16<47:51, 13.15it/s]

Epochs: 12218 | epoch avg. loss: 0.201 | test avg. loss: 4.442
Epochs: 12219 | epoch avg. loss: 0.279 | test avg. loss: 3.787
Epochs: 12220 | epoch avg. loss: 0.149 | test avg. loss: 4.049


 24%|██▍       | 12224/50000 [18:16<49:02, 12.84it/s]

Epochs: 12221 | epoch avg. loss: 0.157 | test avg. loss: 3.810
Epochs: 12222 | epoch avg. loss: 0.076 | test avg. loss: 3.888
Epochs: 12223 | epoch avg. loss: 0.111 | test avg. loss: 4.340


 24%|██▍       | 12228/50000 [18:16<45:30, 13.83it/s]

Epochs: 12224 | epoch avg. loss: 0.155 | test avg. loss: 4.295
Epochs: 12225 | epoch avg. loss: 0.439 | test avg. loss: 4.493
Epochs: 12226 | epoch avg. loss: 0.172 | test avg. loss: 4.136
Epochs: 12227 | epoch avg. loss: 0.132 | test avg. loss: 3.872


 24%|██▍       | 12230/50000 [18:16<47:35, 13.23it/s]

Epochs: 12228 | epoch avg. loss: 0.102 | test avg. loss: 4.027
Epochs: 12229 | epoch avg. loss: 0.144 | test avg. loss: 3.748
Epochs: 12230 | epoch avg. loss: 0.120 | test avg. loss: 4.112


 24%|██▍       | 12234/50000 [18:17<49:42, 12.66it/s]

Epochs: 12231 | epoch avg. loss: 0.304 | test avg. loss: 4.314
Epochs: 12232 | epoch avg. loss: 0.134 | test avg. loss: 4.154
Epochs: 12233 | epoch avg. loss: 0.133 | test avg. loss: 4.121


 24%|██▍       | 12236/50000 [18:17<50:38, 12.43it/s]

Epochs: 12234 | epoch avg. loss: 0.058 | test avg. loss: 3.934
Epochs: 12235 | epoch avg. loss: 0.051 | test avg. loss: 3.815
Epochs: 12236 | epoch avg. loss: 0.053 | test avg. loss: 3.841


 24%|██▍       | 12240/50000 [18:17<46:55, 13.41it/s]

Epochs: 12237 | epoch avg. loss: 0.059 | test avg. loss: 3.883
Epochs: 12238 | epoch avg. loss: 0.080 | test avg. loss: 4.145
Epochs: 12239 | epoch avg. loss: 0.107 | test avg. loss: 4.245


 24%|██▍       | 12242/50000 [18:17<46:30, 13.53it/s]

Epochs: 12240 | epoch avg. loss: 0.087 | test avg. loss: 4.022
Epochs: 12241 | epoch avg. loss: 0.108 | test avg. loss: 4.765
Epochs: 12242 | epoch avg. loss: 0.435 | test avg. loss: 3.852


 24%|██▍       | 12246/50000 [18:18<44:44, 14.06it/s]

Epochs: 12243 | epoch avg. loss: 0.231 | test avg. loss: 3.818
Epochs: 12244 | epoch avg. loss: 0.283 | test avg. loss: 4.187
Epochs: 12245 | epoch avg. loss: 0.166 | test avg. loss: 4.177
Epochs: 12246 | epoch avg. loss: 0.061 | test avg. loss: 4.310


 24%|██▍       | 12248/50000 [18:18<44:04, 14.27it/s]

Epochs: 12247 | epoch avg. loss: 0.103 | test avg. loss: 4.108
Epochs: 12248 | epoch avg. loss: 0.104 | test avg. loss: 3.905
Epochs: 12249 | epoch avg. loss: 0.106 | test avg. loss: 3.988


 25%|██▍       | 12254/50000 [18:18<47:05, 13.36it/s]

Epochs: 12250 | epoch avg. loss: 0.204 | test avg. loss: 4.195
Epochs: 12251 | epoch avg. loss: 0.155 | test avg. loss: 3.972
Epochs: 12252 | epoch avg. loss: 0.087 | test avg. loss: 4.169
Epochs: 12253 | epoch avg. loss: 0.077 | test avg. loss: 4.143


 25%|██▍       | 12256/50000 [18:18<47:07, 13.35it/s]

Epochs: 12254 | epoch avg. loss: 0.059 | test avg. loss: 4.018
Epochs: 12255 | epoch avg. loss: 0.053 | test avg. loss: 4.036
Epochs: 12256 | epoch avg. loss: 0.048 | test avg. loss: 3.994


 25%|██▍       | 12260/50000 [18:19<52:12, 12.05it/s]

Epochs: 12257 | epoch avg. loss: 0.050 | test avg. loss: 4.156
Epochs: 12258 | epoch avg. loss: 0.114 | test avg. loss: 4.173
Epochs: 12259 | epoch avg. loss: 0.089 | test avg. loss: 3.917


 25%|██▍       | 12262/50000 [18:19<53:31, 11.75it/s]

Epochs: 12260 | epoch avg. loss: 0.061 | test avg. loss: 4.106
Epochs: 12261 | epoch avg. loss: 0.095 | test avg. loss: 4.057
Epochs: 12262 | epoch avg. loss: 0.050 | test avg. loss: 4.055


 25%|██▍       | 12266/50000 [18:19<51:57, 12.10it/s]

Epochs: 12263 | epoch avg. loss: 0.057 | test avg. loss: 4.275
Epochs: 12264 | epoch avg. loss: 0.103 | test avg. loss: 4.066
Epochs: 12265 | epoch avg. loss: 0.069 | test avg. loss: 3.997


 25%|██▍       | 12268/50000 [18:19<51:13, 12.28it/s]

Epochs: 12266 | epoch avg. loss: 0.066 | test avg. loss: 4.185
Epochs: 12267 | epoch avg. loss: 0.087 | test avg. loss: 3.948
Epochs: 12268 | epoch avg. loss: 0.065 | test avg. loss: 4.069


 25%|██▍       | 12272/50000 [18:20<50:15, 12.51it/s]

Epochs: 12269 | epoch avg. loss: 0.093 | test avg. loss: 4.158
Epochs: 12270 | epoch avg. loss: 0.081 | test avg. loss: 3.987
Epochs: 12271 | epoch avg. loss: 0.055 | test avg. loss: 4.355




Epochs: 12272 | epoch avg. loss: 0.307 | test avg. loss: 4.176
Epochs: 12273 | epoch avg. loss: 0.149 | test avg. loss: 4.125
Epochs: 12274 | epoch avg. loss: 0.530 | test avg. loss: 4.397


 25%|██▍       | 12278/50000 [18:20<46:10, 13.62it/s]

Epochs: 12275 | epoch avg. loss: 0.578 | test avg. loss: 4.133
Epochs: 12276 | epoch avg. loss: 0.189 | test avg. loss: 3.933
Epochs: 12277 | epoch avg. loss: 0.195 | test avg. loss: 4.825


 25%|██▍       | 12280/50000 [18:20<46:53, 13.41it/s]

Epochs: 12278 | epoch avg. loss: 0.538 | test avg. loss: 3.868
Epochs: 12279 | epoch avg. loss: 0.568 | test avg. loss: 3.878
Epochs: 12280 | epoch avg. loss: 0.277 | test avg. loss: 3.963


 25%|██▍       | 12284/50000 [18:21<48:17, 13.02it/s]

Epochs: 12281 | epoch avg. loss: 0.425 | test avg. loss: 4.204
Epochs: 12282 | epoch avg. loss: 0.399 | test avg. loss: 4.918
Epochs: 12283 | epoch avg. loss: 0.413 | test avg. loss: 3.919


                                                     

Epochs: 12284 | epoch avg. loss: 0.322 | test avg. loss: 3.902
Epochs: 12285 | epoch avg. loss: 0.238 | test avg. loss: 4.136
Epochs: 12286 | epoch avg. loss: 0.257 | test avg. loss: 3.747


 25%|██▍       | 12290/50000 [18:21<44:27, 14.14it/s]

Epochs: 12287 | epoch avg. loss: 0.191 | test avg. loss: 4.276
Epochs: 12288 | epoch avg. loss: 0.155 | test avg. loss: 4.115
Epochs: 12289 | epoch avg. loss: 0.179 | test avg. loss: 4.290


 25%|██▍       | 12292/50000 [18:21<45:24, 13.84it/s]

Epochs: 12290 | epoch avg. loss: 0.106 | test avg. loss: 4.382
Epochs: 12291 | epoch avg. loss: 0.113 | test avg. loss: 3.930
Epochs: 12292 | epoch avg. loss: 0.132 | test avg. loss: 3.801


 25%|██▍       | 12296/50000 [18:21<48:18, 13.01it/s]

Epochs: 12293 | epoch avg. loss: 0.072 | test avg. loss: 3.968
Epochs: 12294 | epoch avg. loss: 0.069 | test avg. loss: 4.022
Epochs: 12295 | epoch avg. loss: 0.081 | test avg. loss: 4.248


 25%|██▍       | 12298/50000 [18:22<48:29, 12.96it/s]

Epochs: 12296 | epoch avg. loss: 0.105 | test avg. loss: 4.116
Epochs: 12297 | epoch avg. loss: 0.070 | test avg. loss: 3.962
Epochs: 12298 | epoch avg. loss: 0.094 | test avg. loss: 3.925


 25%|██▍       | 12298/50000 [18:22<48:29, 12.96it/s]

Epochs: 12299 | epoch avg. loss: 0.065 | test avg. loss: 4.422


 25%|██▍       | 12302/50000 [18:23<2:11:59,  4.76it/s]

Epochs: 12300 | epoch avg. loss: 0.192 | test avg. loss: 4.037
Epochs: 12301 | epoch avg. loss: 0.229 | test avg. loss: 4.021
Epochs: 12302 | epoch avg. loss: 0.108 | test avg. loss: 4.353


 25%|██▍       | 12306/50000 [18:24<1:31:30,  6.87it/s]

Epochs: 12303 | epoch avg. loss: 0.170 | test avg. loss: 4.113
Epochs: 12304 | epoch avg. loss: 0.319 | test avg. loss: 4.253
Epochs: 12305 | epoch avg. loss: 0.158 | test avg. loss: 4.840


 25%|██▍       | 12310/50000 [18:24<1:06:35,  9.43it/s]

Epochs: 12306 | epoch avg. loss: 0.365 | test avg. loss: 4.145
Epochs: 12307 | epoch avg. loss: 0.351 | test avg. loss: 4.230
Epochs: 12308 | epoch avg. loss: 0.244 | test avg. loss: 5.452
Epochs: 12309 | epoch avg. loss: 0.635 | test avg. loss: 4.340


 25%|██▍       | 12314/50000 [18:24<53:29, 11.74it/s]

Epochs: 12310 | epoch avg. loss: 0.817 | test avg. loss: 3.968
Epochs: 12311 | epoch avg. loss: 0.255 | test avg. loss: 4.350
Epochs: 12312 | epoch avg. loss: 0.337 | test avg. loss: 3.942
Epochs: 12313 | epoch avg. loss: 0.241 | test avg. loss: 4.360


 25%|██▍       | 12316/50000 [18:24<49:24, 12.71it/s]

Epochs: 12314 | epoch avg. loss: 0.345 | test avg. loss: 4.137
Epochs: 12315 | epoch avg. loss: 0.190 | test avg. loss: 4.211
Epochs: 12316 | epoch avg. loss: 0.338 | test avg. loss: 4.247


 25%|██▍       | 12320/50000 [18:25<53:01, 11.85it/s]

Epochs: 12317 | epoch avg. loss: 0.193 | test avg. loss: 3.877
Epochs: 12318 | epoch avg. loss: 0.100 | test avg. loss: 4.035
Epochs: 12319 | epoch avg. loss: 0.217 | test avg. loss: 4.420


 25%|██▍       | 12324/50000 [18:25<46:49, 13.41it/s]

Epochs: 12320 | epoch avg. loss: 0.251 | test avg. loss: 3.979
Epochs: 12321 | epoch avg. loss: 0.192 | test avg. loss: 4.527
Epochs: 12322 | epoch avg. loss: 0.488 | test avg. loss: 4.384
Epochs: 12323 | epoch avg. loss: 0.344 | test avg. loss: 4.785


 25%|██▍       | 12328/50000 [18:25<43:31, 14.42it/s]

Epochs: 12324 | epoch avg. loss: 0.952 | test avg. loss: 5.012
Epochs: 12325 | epoch avg. loss: 0.531 | test avg. loss: 3.610
Epochs: 12326 | epoch avg. loss: 0.233 | test avg. loss: 3.297
Epochs: 12327 | epoch avg. loss: 0.290 | test avg. loss: 4.874


 25%|██▍       | 12332/50000 [18:25<42:20, 14.82it/s]

Epochs: 12328 | epoch avg. loss: 0.932 | test avg. loss: 3.951
Epochs: 12329 | epoch avg. loss: 0.736 | test avg. loss: 4.233
Epochs: 12330 | epoch avg. loss: 0.388 | test avg. loss: 6.221
Epochs: 12331 | epoch avg. loss: 1.291 | test avg. loss: 4.056


                                                     

Epochs: 12332 | epoch avg. loss: 0.569 | test avg. loss: 3.901
Epochs: 12333 | epoch avg. loss: 0.336 | test avg. loss: 4.474
Epochs: 12334 | epoch avg. loss: 0.363 | test avg. loss: 4.308


 25%|██▍       | 12338/50000 [18:26<43:57, 14.28it/s]

Epochs: 12335 | epoch avg. loss: 0.542 | test avg. loss: 5.006
Epochs: 12336 | epoch avg. loss: 0.561 | test avg. loss: 4.545
Epochs: 12337 | epoch avg. loss: 0.297 | test avg. loss: 4.680
Epochs: 12338 | epoch avg. loss: 0.483 | test avg. loss: 6.010


 25%|██▍       | 12342/50000 [18:26<42:16, 14.85it/s]

Epochs: 12339 | epoch avg. loss: 0.858 | test avg. loss: 4.505
Epochs: 12340 | epoch avg. loss: 1.043 | test avg. loss: 4.102
Epochs: 12341 | epoch avg. loss: 0.290 | test avg. loss: 4.726
Epochs: 12342 | epoch avg. loss: 0.334 | test avg. loss: 4.199


                                                     

Epochs: 12343 | epoch avg. loss: 0.354 | test avg. loss: 4.764
Epochs: 12344 | epoch avg. loss: 0.518 | test avg. loss: 4.032
Epochs: 12345 | epoch avg. loss: 0.185 | test avg. loss: 3.930


 25%|██▍       | 12348/50000 [18:27<43:32, 14.41it/s]

Epochs: 12346 | epoch avg. loss: 0.137 | test avg. loss: 4.141
Epochs: 12347 | epoch avg. loss: 0.171 | test avg. loss: 3.901
Epochs: 12348 | epoch avg. loss: 0.211 | test avg. loss: 4.227


 25%|██▍       | 12352/50000 [18:27<47:34, 13.19it/s]

Epochs: 12349 | epoch avg. loss: 0.185 | test avg. loss: 3.995
Epochs: 12350 | epoch avg. loss: 0.210 | test avg. loss: 4.166
Epochs: 12351 | epoch avg. loss: 0.110 | test avg. loss: 4.333


 25%|██▍       | 12354/50000 [18:27<46:20, 13.54it/s]

Epochs: 12352 | epoch avg. loss: 0.103 | test avg. loss: 4.242
Epochs: 12353 | epoch avg. loss: 0.124 | test avg. loss: 4.502
Epochs: 12354 | epoch avg. loss: 0.138 | test avg. loss: 4.210


 25%|██▍       | 12358/50000 [18:27<44:58, 13.95it/s]

Epochs: 12355 | epoch avg. loss: 0.110 | test avg. loss: 4.127
Epochs: 12356 | epoch avg. loss: 0.073 | test avg. loss: 3.949
Epochs: 12357 | epoch avg. loss: 0.137 | test avg. loss: 3.945
Epochs: 12358 | epoch avg. loss: 0.217 | test avg. loss: 5.212


 25%|██▍       | 12362/50000 [18:28<47:06, 13.32it/s]

Epochs: 12359 | epoch avg. loss: 0.559 | test avg. loss: 3.998
Epochs: 12360 | epoch avg. loss: 0.542 | test avg. loss: 3.965
Epochs: 12361 | epoch avg. loss: 0.176 | test avg. loss: 4.078


 25%|██▍       | 12364/50000 [18:28<47:17, 13.26it/s]

Epochs: 12362 | epoch avg. loss: 0.107 | test avg. loss: 3.924
Epochs: 12363 | epoch avg. loss: 0.093 | test avg. loss: 4.220
Epochs: 12364 | epoch avg. loss: 0.153 | test avg. loss: 3.791
Epochs: 12365 | epoch avg. loss: 0.186 | test avg. loss: 4.219


 25%|██▍       | 12370/50000 [18:28<41:51, 14.98it/s]

Epochs: 12366 | epoch avg. loss: 0.350 | test avg. loss: 4.011
Epochs: 12367 | epoch avg. loss: 0.162 | test avg. loss: 3.860
Epochs: 12368 | epoch avg. loss: 0.196 | test avg. loss: 4.242
Epochs: 12369 | epoch avg. loss: 0.176 | test avg. loss: 3.877


 25%|██▍       | 12374/50000 [18:28<41:37, 15.07it/s]

Epochs: 12370 | epoch avg. loss: 0.139 | test avg. loss: 4.061
Epochs: 12371 | epoch avg. loss: 0.122 | test avg. loss: 3.853
Epochs: 12372 | epoch avg. loss: 0.130 | test avg. loss: 3.962
Epochs: 12373 | epoch avg. loss: 0.066 | test avg. loss: 4.032


 25%|██▍       | 12376/50000 [18:29<43:12, 14.51it/s]

Epochs: 12374 | epoch avg. loss: 0.061 | test avg. loss: 3.988
Epochs: 12375 | epoch avg. loss: 0.070 | test avg. loss: 4.002
Epochs: 12376 | epoch avg. loss: 0.048 | test avg. loss: 4.048


 25%|██▍       | 12380/50000 [18:29<45:39, 13.73it/s]

Epochs: 12377 | epoch avg. loss: 0.115 | test avg. loss: 4.414
Epochs: 12378 | epoch avg. loss: 0.249 | test avg. loss: 3.959
Epochs: 12379 | epoch avg. loss: 0.164 | test avg. loss: 3.991


 25%|██▍       | 12382/50000 [18:29<45:00, 13.93it/s]

Epochs: 12380 | epoch avg. loss: 0.117 | test avg. loss: 4.288
Epochs: 12381 | epoch avg. loss: 0.196 | test avg. loss: 3.983
Epochs: 12382 | epoch avg. loss: 0.083 | test avg. loss: 4.097


 25%|██▍       | 12386/50000 [18:29<43:09, 14.53it/s]

Epochs: 12383 | epoch avg. loss: 0.065 | test avg. loss: 4.219
Epochs: 12384 | epoch avg. loss: 0.074 | test avg. loss: 3.990
Epochs: 12385 | epoch avg. loss: 0.082 | test avg. loss: 4.309


 25%|██▍       | 12388/50000 [18:29<46:26, 13.50it/s]

Epochs: 12386 | epoch avg. loss: 0.212 | test avg. loss: 3.962
Epochs: 12387 | epoch avg. loss: 0.082 | test avg. loss: 4.037
Epochs: 12388 | epoch avg. loss: 0.092 | test avg. loss: 4.367


 25%|██▍       | 12392/50000 [18:30<52:57, 11.83it/s]

Epochs: 12389 | epoch avg. loss: 0.161 | test avg. loss: 4.069
Epochs: 12390 | epoch avg. loss: 0.057 | test avg. loss: 4.138
Epochs: 12391 | epoch avg. loss: 0.130 | test avg. loss: 3.976


 25%|██▍       | 12394/50000 [18:30<52:37, 11.91it/s]

Epochs: 12392 | epoch avg. loss: 0.067 | test avg. loss: 3.796
Epochs: 12393 | epoch avg. loss: 0.055 | test avg. loss: 3.828
Epochs: 12394 | epoch avg. loss: 0.056 | test avg. loss: 4.058


 25%|██▍       | 12398/50000 [18:30<50:24, 12.43it/s]

Epochs: 12395 | epoch avg. loss: 0.071 | test avg. loss: 3.998
Epochs: 12396 | epoch avg. loss: 0.109 | test avg. loss: 4.261
Epochs: 12397 | epoch avg. loss: 0.142 | test avg. loss: 4.072


 25%|██▍       | 12398/50000 [18:30<50:24, 12.43it/s]

Epochs: 12398 | epoch avg. loss: 0.105 | test avg. loss: 4.048
Epochs: 12399 | epoch avg. loss: 0.287 | test avg. loss: 4.154


 25%|██▍       | 12402/50000 [18:32<2:23:18,  4.37it/s]

Epochs: 12400 | epoch avg. loss: 0.203 | test avg. loss: 4.163
Epochs: 12401 | epoch avg. loss: 0.114 | test avg. loss: 3.936
Epochs: 12402 | epoch avg. loss: 0.152 | test avg. loss: 4.499


 25%|██▍       | 12406/50000 [18:32<1:29:41,  6.99it/s]

Epochs: 12403 | epoch avg. loss: 0.242 | test avg. loss: 4.008
Epochs: 12404 | epoch avg. loss: 0.308 | test avg. loss: 3.953
Epochs: 12405 | epoch avg. loss: 0.185 | test avg. loss: 4.588
Epochs: 12406 | epoch avg. loss: 0.353 | test avg. loss: 3.882


 25%|██▍       | 12410/50000 [18:33<1:13:27,  8.53it/s]

Epochs: 12407 | epoch avg. loss: 0.219 | test avg. loss: 4.103
Epochs: 12408 | epoch avg. loss: 0.262 | test avg. loss: 4.054
Epochs: 12409 | epoch avg. loss: 0.146 | test avg. loss: 4.175


 25%|██▍       | 12412/50000 [18:33<1:08:43,  9.12it/s]

Epochs: 12410 | epoch avg. loss: 0.233 | test avg. loss: 4.610
Epochs: 12411 | epoch avg. loss: 0.214 | test avg. loss: 4.201
Epochs: 12412 | epoch avg. loss: 0.156 | test avg. loss: 4.014


 25%|██▍       | 12416/50000 [18:33<1:01:31, 10.18it/s]

Epochs: 12413 | epoch avg. loss: 0.144 | test avg. loss: 4.403
Epochs: 12414 | epoch avg. loss: 0.290 | test avg. loss: 3.865
Epochs: 12415 | epoch avg. loss: 0.199 | test avg. loss: 4.232


 25%|██▍       | 12418/50000 [18:33<59:33, 10.52it/s]

Epochs: 12416 | epoch avg. loss: 0.284 | test avg. loss: 4.344
Epochs: 12417 | epoch avg. loss: 0.149 | test avg. loss: 4.171
Epochs: 12418 | epoch avg. loss: 0.256 | test avg. loss: 4.241


 25%|██▍       | 12422/50000 [18:34<55:58, 11.19it/s]

Epochs: 12419 | epoch avg. loss: 0.253 | test avg. loss: 4.157
Epochs: 12420 | epoch avg. loss: 0.136 | test avg. loss: 4.239
Epochs: 12421 | epoch avg. loss: 0.469 | test avg. loss: 4.378


 25%|██▍       | 12424/50000 [18:34<52:20, 11.96it/s]

Epochs: 12422 | epoch avg. loss: 0.369 | test avg. loss: 3.920
Epochs: 12423 | epoch avg. loss: 0.292 | test avg. loss: 4.093
Epochs: 12424 | epoch avg. loss: 0.602 | test avg. loss: 4.432


 25%|██▍       | 12428/50000 [18:34<47:54, 13.07it/s]

Epochs: 12425 | epoch avg. loss: 0.370 | test avg. loss: 3.886
Epochs: 12426 | epoch avg. loss: 0.215 | test avg. loss: 3.859
Epochs: 12427 | epoch avg. loss: 0.145 | test avg. loss: 4.172
Epochs: 12428 | epoch avg. loss: 0.167 | test avg. loss: 3.891


 25%|██▍       | 12432/50000 [18:34<45:42, 13.70it/s]

Epochs: 12429 | epoch avg. loss: 0.137 | test avg. loss: 3.884
Epochs: 12430 | epoch avg. loss: 0.143 | test avg. loss: 4.221
Epochs: 12431 | epoch avg. loss: 0.220 | test avg. loss: 3.915
Epochs: 12432 | epoch avg. loss: 0.077 | test avg. loss: 3.760


 25%|██▍       | 12436/50000 [18:35<44:26, 14.09it/s]

Epochs: 12433 | epoch avg. loss: 0.074 | test avg. loss: 4.114
Epochs: 12434 | epoch avg. loss: 0.093 | test avg. loss: 4.033
Epochs: 12435 | epoch avg. loss: 0.114 | test avg. loss: 4.139


 25%|██▍       | 12438/50000 [18:35<47:12, 13.26it/s]

Epochs: 12436 | epoch avg. loss: 0.081 | test avg. loss: 4.288
Epochs: 12437 | epoch avg. loss: 0.073 | test avg. loss: 4.082
Epochs: 12438 | epoch avg. loss: 0.050 | test avg. loss: 4.014


 25%|██▍       | 12442/50000 [18:35<43:40, 14.33it/s]

Epochs: 12439 | epoch avg. loss: 0.058 | test avg. loss: 4.061
Epochs: 12440 | epoch avg. loss: 0.069 | test avg. loss: 4.026
Epochs: 12441 | epoch avg. loss: 0.055 | test avg. loss: 4.155
Epochs: 12442 | epoch avg. loss: 0.052 | test avg. loss: 4.254


 25%|██▍       | 12446/50000 [18:35<43:45, 14.30it/s]

Epochs: 12443 | epoch avg. loss: 0.053 | test avg. loss: 4.165
Epochs: 12444 | epoch avg. loss: 0.064 | test avg. loss: 4.098
Epochs: 12445 | epoch avg. loss: 0.053 | test avg. loss: 3.873


 25%|██▍       | 12450/50000 [18:36<43:18, 14.45it/s]

Epochs: 12446 | epoch avg. loss: 0.096 | test avg. loss: 3.900
Epochs: 12447 | epoch avg. loss: 0.082 | test avg. loss: 4.483
Epochs: 12448 | epoch avg. loss: 0.198 | test avg. loss: 4.031
Epochs: 12449 | epoch avg. loss: 0.264 | test avg. loss: 4.030


 25%|██▍       | 12452/50000 [18:36<43:02, 14.54it/s]

Epochs: 12450 | epoch avg. loss: 0.143 | test avg. loss: 4.321
Epochs: 12451 | epoch avg. loss: 0.158 | test avg. loss: 4.046
Epochs: 12452 | epoch avg. loss: 0.133 | test avg. loss: 4.026


 25%|██▍       | 12456/50000 [18:36<46:06, 13.57it/s]

Epochs: 12453 | epoch avg. loss: 0.073 | test avg. loss: 4.233
Epochs: 12454 | epoch avg. loss: 0.103 | test avg. loss: 3.877
Epochs: 12455 | epoch avg. loss: 0.093 | test avg. loss: 3.883


 25%|██▍       | 12458/50000 [18:36<45:47, 13.66it/s]

Epochs: 12456 | epoch avg. loss: 0.054 | test avg. loss: 4.006
Epochs: 12457 | epoch avg. loss: 0.045 | test avg. loss: 4.174
Epochs: 12458 | epoch avg. loss: 0.048 | test avg. loss: 4.187


 25%|██▍       | 12462/50000 [18:36<45:59, 13.60it/s]

Epochs: 12459 | epoch avg. loss: 0.108 | test avg. loss: 4.137
Epochs: 12460 | epoch avg. loss: 0.047 | test avg. loss: 3.953
Epochs: 12461 | epoch avg. loss: 0.046 | test avg. loss: 4.003


 25%|██▍       | 12464/50000 [18:37<47:31, 13.16it/s]

Epochs: 12462 | epoch avg. loss: 0.074 | test avg. loss: 3.879
Epochs: 12463 | epoch avg. loss: 0.081 | test avg. loss: 3.979
Epochs: 12464 | epoch avg. loss: 0.099 | test avg. loss: 4.468


 25%|██▍       | 12468/50000 [18:37<49:15, 12.70it/s]

Epochs: 12465 | epoch avg. loss: 0.157 | test avg. loss: 4.151
Epochs: 12466 | epoch avg. loss: 0.313 | test avg. loss: 4.109
Epochs: 12467 | epoch avg. loss: 0.174 | test avg. loss: 3.910


 25%|██▍       | 12470/50000 [18:37<48:24, 12.92it/s]

Epochs: 12468 | epoch avg. loss: 0.144 | test avg. loss: 3.961
Epochs: 12469 | epoch avg. loss: 0.145 | test avg. loss: 4.779
Epochs: 12470 | epoch avg. loss: 0.324 | test avg. loss: 4.151


 25%|██▍       | 12474/50000 [18:37<48:56, 12.78it/s]

Epochs: 12471 | epoch avg. loss: 0.151 | test avg. loss: 4.176
Epochs: 12472 | epoch avg. loss: 0.067 | test avg. loss: 4.241
Epochs: 12473 | epoch avg. loss: 0.073 | test avg. loss: 4.066


 25%|██▍       | 12476/50000 [18:38<49:58, 12.52it/s]

Epochs: 12474 | epoch avg. loss: 0.139 | test avg. loss: 4.117
Epochs: 12475 | epoch avg. loss: 0.049 | test avg. loss: 4.140
Epochs: 12476 | epoch avg. loss: 0.049 | test avg. loss: 4.313


                                                     

Epochs: 12477 | epoch avg. loss: 0.150 | test avg. loss: 4.175
Epochs: 12478 | epoch avg. loss: 0.075 | test avg. loss: 4.024
Epochs: 12479 | epoch avg. loss: 0.175 | test avg. loss: 4.126


 25%|██▍       | 12484/50000 [18:38<44:25, 14.08it/s]

Epochs: 12480 | epoch avg. loss: 0.134 | test avg. loss: 3.911
Epochs: 12481 | epoch avg. loss: 0.065 | test avg. loss: 3.935
Epochs: 12482 | epoch avg. loss: 0.062 | test avg. loss: 4.093
Epochs: 12483 | epoch avg. loss: 0.065 | test avg. loss: 3.996


 25%|██▍       | 12486/50000 [18:38<43:12, 14.47it/s]

Epochs: 12484 | epoch avg. loss: 0.047 | test avg. loss: 4.178
Epochs: 12485 | epoch avg. loss: 0.082 | test avg. loss: 3.950
Epochs: 12486 | epoch avg. loss: 0.101 | test avg. loss: 3.979


 25%|██▍       | 12490/50000 [18:39<48:35, 12.87it/s]

Epochs: 12487 | epoch avg. loss: 0.070 | test avg. loss: 4.206
Epochs: 12488 | epoch avg. loss: 0.084 | test avg. loss: 4.076
Epochs: 12489 | epoch avg. loss: 0.088 | test avg. loss: 4.033


 25%|██▍       | 12492/50000 [18:39<48:49, 12.80it/s]

Epochs: 12490 | epoch avg. loss: 0.085 | test avg. loss: 4.376
Epochs: 12491 | epoch avg. loss: 0.246 | test avg. loss: 3.998
Epochs: 12492 | epoch avg. loss: 0.072 | test avg. loss: 4.128


 25%|██▍       | 12496/50000 [18:39<50:56, 12.27it/s]

Epochs: 12493 | epoch avg. loss: 0.089 | test avg. loss: 4.275
Epochs: 12494 | epoch avg. loss: 0.103 | test avg. loss: 4.184
Epochs: 12495 | epoch avg. loss: 0.295 | test avg. loss: 4.229


 25%|██▍       | 12498/50000 [18:39<51:46, 12.07it/s]

Epochs: 12496 | epoch avg. loss: 0.179 | test avg. loss: 4.059
Epochs: 12497 | epoch avg. loss: 0.114 | test avg. loss: 3.974
Epochs: 12498 | epoch avg. loss: 0.305 | test avg. loss: 4.195


 25%|██▍       | 12498/50000 [18:39<51:46, 12.07it/s]

Epochs: 12499 | epoch avg. loss: 0.201 | test avg. loss: 3.960


 25%|██▌       | 12502/50000 [18:41<2:18:18,  4.52it/s]

Epochs: 12500 | epoch avg. loss: 0.123 | test avg. loss: 4.018
Epochs: 12501 | epoch avg. loss: 0.093 | test avg. loss: 4.322
Epochs: 12502 | epoch avg. loss: 0.115 | test avg. loss: 4.164


 25%|██▌       | 12506/50000 [18:41<1:31:53,  6.80it/s]

Epochs: 12503 | epoch avg. loss: 0.110 | test avg. loss: 4.055
Epochs: 12504 | epoch avg. loss: 0.084 | test avg. loss: 4.124
Epochs: 12505 | epoch avg. loss: 0.124 | test avg. loss: 3.762


 25%|██▌       | 12508/50000 [18:41<1:19:21,  7.87it/s]

Epochs: 12506 | epoch avg. loss: 0.062 | test avg. loss: 3.940
Epochs: 12507 | epoch avg. loss: 0.107 | test avg. loss: 3.957
Epochs: 12508 | epoch avg. loss: 0.074 | test avg. loss: 4.002


 25%|██▌       | 12512/50000 [18:42<1:00:25, 10.34it/s]

Epochs: 12509 | epoch avg. loss: 0.094 | test avg. loss: 4.447
Epochs: 12510 | epoch avg. loss: 0.200 | test avg. loss: 4.002
Epochs: 12511 | epoch avg. loss: 0.161 | test avg. loss: 3.997


 25%|██▌       | 12514/50000 [18:42<57:46, 10.81it/s]

Epochs: 12512 | epoch avg. loss: 0.116 | test avg. loss: 4.225
Epochs: 12513 | epoch avg. loss: 0.152 | test avg. loss: 4.002
Epochs: 12514 | epoch avg. loss: 0.048 | test avg. loss: 4.079


 25%|██▌       | 12518/50000 [18:42<52:20, 11.94it/s]

Epochs: 12515 | epoch avg. loss: 0.069 | test avg. loss: 4.270
Epochs: 12516 | epoch avg. loss: 0.102 | test avg. loss: 3.998
Epochs: 12517 | epoch avg. loss: 0.080 | test avg. loss: 3.976


 25%|██▌       | 12520/50000 [18:42<51:01, 12.24it/s]

Epochs: 12518 | epoch avg. loss: 0.054 | test avg. loss: 4.094
Epochs: 12519 | epoch avg. loss: 0.062 | test avg. loss: 3.933
Epochs: 12520 | epoch avg. loss: 0.098 | test avg. loss: 4.139


 25%|██▌       | 12524/50000 [18:43<50:33, 12.35it/s]

Epochs: 12521 | epoch avg. loss: 0.170 | test avg. loss: 4.180
Epochs: 12522 | epoch avg. loss: 0.118 | test avg. loss: 4.046
Epochs: 12523 | epoch avg. loss: 0.232 | test avg. loss: 4.378


 25%|██▌       | 12526/50000 [18:43<49:57, 12.50it/s]

Epochs: 12524 | epoch avg. loss: 0.231 | test avg. loss: 4.259
Epochs: 12525 | epoch avg. loss: 0.126 | test avg. loss: 4.139
Epochs: 12526 | epoch avg. loss: 0.322 | test avg. loss: 4.240


 25%|██▌       | 12530/50000 [18:43<45:46, 13.64it/s]

Epochs: 12527 | epoch avg. loss: 0.222 | test avg. loss: 3.890
Epochs: 12528 | epoch avg. loss: 0.092 | test avg. loss: 3.897
Epochs: 12529 | epoch avg. loss: 0.127 | test avg. loss: 4.199
Epochs: 12530 | epoch avg. loss: 0.117 | test avg. loss: 3.891


 25%|██▌       | 12534/50000 [18:43<44:58, 13.89it/s]

Epochs: 12531 | epoch avg. loss: 0.178 | test avg. loss: 3.837
Epochs: 12532 | epoch avg. loss: 0.120 | test avg. loss: 4.477
Epochs: 12533 | epoch avg. loss: 0.331 | test avg. loss: 4.041


 25%|██▌       | 12536/50000 [18:44<47:41, 13.09it/s]

Epochs: 12534 | epoch avg. loss: 0.560 | test avg. loss: 4.010
Epochs: 12535 | epoch avg. loss: 0.272 | test avg. loss: 4.662
Epochs: 12536 | epoch avg. loss: 0.331 | test avg. loss: 4.402


 25%|██▌       | 12540/50000 [18:44<44:43, 13.96it/s]

Epochs: 12537 | epoch avg. loss: 0.845 | test avg. loss: 4.233
Epochs: 12538 | epoch avg. loss: 0.433 | test avg. loss: 4.081
Epochs: 12539 | epoch avg. loss: 0.279 | test avg. loss: 4.321


 25%|██▌       | 12544/50000 [18:44<43:38, 14.30it/s]

Epochs: 12540 | epoch avg. loss: 0.393 | test avg. loss: 4.880
Epochs: 12541 | epoch avg. loss: 0.432 | test avg. loss: 4.084
Epochs: 12542 | epoch avg. loss: 0.267 | test avg. loss: 4.016
Epochs: 12543 | epoch avg. loss: 0.343 | test avg. loss: 5.088


 25%|██▌       | 12548/50000 [18:44<42:33, 14.67it/s]

Epochs: 12544 | epoch avg. loss: 0.657 | test avg. loss: 3.932
Epochs: 12545 | epoch avg. loss: 1.003 | test avg. loss: 3.870
Epochs: 12546 | epoch avg. loss: 0.368 | test avg. loss: 4.885
Epochs: 12547 | epoch avg. loss: 0.397 | test avg. loss: 4.495


 25%|██▌       | 12550/50000 [18:45<46:58, 13.29it/s]

Epochs: 12548 | epoch avg. loss: 0.451 | test avg. loss: 4.398
Epochs: 12549 | epoch avg. loss: 0.184 | test avg. loss: 4.140
Epochs: 12550 | epoch avg. loss: 0.120 | test avg. loss: 3.812


 25%|██▌       | 12554/50000 [18:45<44:11, 14.12it/s]

Epochs: 12551 | epoch avg. loss: 0.197 | test avg. loss: 4.086
Epochs: 12552 | epoch avg. loss: 0.211 | test avg. loss: 4.250
Epochs: 12553 | epoch avg. loss: 0.135 | test avg. loss: 4.033
Epochs: 12554 | epoch avg. loss: 0.160 | test avg. loss: 4.388


 25%|██▌       | 12558/50000 [18:45<44:22, 14.06it/s]

Epochs: 12555 | epoch avg. loss: 0.243 | test avg. loss: 3.979
Epochs: 12556 | epoch avg. loss: 0.259 | test avg. loss: 4.055
Epochs: 12557 | epoch avg. loss: 0.318 | test avg. loss: 4.555
Epochs: 12558 | epoch avg. loss: 0.390 | test avg. loss: 3.871


 25%|██▌       | 12562/50000 [18:45<42:28, 14.69it/s]

Epochs: 12559 | epoch avg. loss: 0.056 | test avg. loss: 3.857
Epochs: 12560 | epoch avg. loss: 0.077 | test avg. loss: 3.896
Epochs: 12561 | epoch avg. loss: 0.076 | test avg. loss: 3.818




Epochs: 12562 | epoch avg. loss: 0.113 | test avg. loss: 4.053
Epochs: 12563 | epoch avg. loss: 0.061 | test avg. loss: 4.070
Epochs: 12564 | epoch avg. loss: 0.068 | test avg. loss: 4.088




Epochs: 12565 | epoch avg. loss: 0.048 | test avg. loss: 3.990
Epochs: 12566 | epoch avg. loss: 0.091 | test avg. loss: 3.871
Epochs: 12567 | epoch avg. loss: 0.088 | test avg. loss: 4.047


 25%|██▌       | 12570/50000 [18:46<43:58, 14.19it/s]

Epochs: 12568 | epoch avg. loss: 0.112 | test avg. loss: 3.947
Epochs: 12569 | epoch avg. loss: 0.043 | test avg. loss: 4.164
Epochs: 12570 | epoch avg. loss: 0.081 | test avg. loss: 4.026


 25%|██▌       | 12574/50000 [18:46<47:10, 13.22it/s]

Epochs: 12571 | epoch avg. loss: 0.095 | test avg. loss: 4.053
Epochs: 12572 | epoch avg. loss: 0.098 | test avg. loss: 4.712
Epochs: 12573 | epoch avg. loss: 0.410 | test avg. loss: 3.921


 25%|██▌       | 12576/50000 [18:46<47:11, 13.22it/s]

Epochs: 12574 | epoch avg. loss: 0.518 | test avg. loss: 3.835
Epochs: 12575 | epoch avg. loss: 0.293 | test avg. loss: 5.265
Epochs: 12576 | epoch avg. loss: 1.041 | test avg. loss: 4.093


 25%|██▌       | 12580/50000 [18:47<48:00, 12.99it/s]

Epochs: 12577 | epoch avg. loss: 0.727 | test avg. loss: 3.914
Epochs: 12578 | epoch avg. loss: 0.655 | test avg. loss: 7.183
Epochs: 12579 | epoch avg. loss: 2.137 | test avg. loss: 4.489


 25%|██▌       | 12584/50000 [18:47<44:15, 14.09it/s]

Epochs: 12580 | epoch avg. loss: 1.314 | test avg. loss: 4.430
Epochs: 12581 | epoch avg. loss: 0.862 | test avg. loss: 4.304
Epochs: 12582 | epoch avg. loss: 0.906 | test avg. loss: 4.759
Epochs: 12583 | epoch avg. loss: 0.787 | test avg. loss: 5.255


 25%|██▌       | 12586/50000 [18:47<45:38, 13.66it/s]

Epochs: 12584 | epoch avg. loss: 0.859 | test avg. loss: 3.757
Epochs: 12585 | epoch avg. loss: 0.615 | test avg. loss: 3.802
Epochs: 12586 | epoch avg. loss: 0.505 | test avg. loss: 3.756


 25%|██▌       | 12590/50000 [18:47<46:28, 13.42it/s]

Epochs: 12587 | epoch avg. loss: 0.382 | test avg. loss: 3.929
Epochs: 12588 | epoch avg. loss: 0.613 | test avg. loss: 4.487
Epochs: 12589 | epoch avg. loss: 0.518 | test avg. loss: 4.002


 25%|██▌       | 12592/50000 [18:48<46:34, 13.38it/s]

Epochs: 12590 | epoch avg. loss: 0.179 | test avg. loss: 3.900
Epochs: 12591 | epoch avg. loss: 0.139 | test avg. loss: 4.187
Epochs: 12592 | epoch avg. loss: 0.193 | test avg. loss: 3.515


 25%|██▌       | 12596/50000 [18:48<44:11, 14.11it/s]

Epochs: 12593 | epoch avg. loss: 0.173 | test avg. loss: 4.239
Epochs: 12594 | epoch avg. loss: 0.499 | test avg. loss: 3.540
Epochs: 12595 | epoch avg. loss: 0.527 | test avg. loss: 4.250
Epochs: 12596 | epoch avg. loss: 0.663 | test avg. loss: 6.107


 25%|██▌       | 12598/50000 [18:48<43:05, 14.46it/s]

Epochs: 12597 | epoch avg. loss: 1.103 | test avg. loss: 4.534
Epochs: 12598 | epoch avg. loss: 0.910 | test avg. loss: 4.308
Epochs: 12599 | epoch avg. loss: 0.501 | test avg. loss: 4.392


 25%|██▌       | 12602/50000 [18:50<2:14:27,  4.64it/s]

Epochs: 12600 | epoch avg. loss: 0.384 | test avg. loss: 4.421
Epochs: 12601 | epoch avg. loss: 0.669 | test avg. loss: 5.514
Epochs: 12602 | epoch avg. loss: 0.987 | test avg. loss: 3.833


 25%|██▌       | 12606/50000 [18:50<1:30:17,  6.90it/s]

Epochs: 12603 | epoch avg. loss: 0.397 | test avg. loss: 3.807
Epochs: 12604 | epoch avg. loss: 0.175 | test avg. loss: 4.036
Epochs: 12605 | epoch avg. loss: 0.164 | test avg. loss: 3.902


 25%|██▌       | 12608/50000 [18:50<1:19:04,  7.88it/s]

Epochs: 12606 | epoch avg. loss: 0.182 | test avg. loss: 4.064
Epochs: 12607 | epoch avg. loss: 0.148 | test avg. loss: 3.924
Epochs: 12608 | epoch avg. loss: 0.081 | test avg. loss: 3.812


 25%|██▌       | 12612/50000 [18:50<1:08:13,  9.13it/s]

Epochs: 12609 | epoch avg. loss: 0.064 | test avg. loss: 3.713
Epochs: 12610 | epoch avg. loss: 0.100 | test avg. loss: 3.872
Epochs: 12611 | epoch avg. loss: 0.117 | test avg. loss: 3.810


 25%|██▌       | 12614/50000 [18:51<1:05:06,  9.57it/s]

Epochs: 12612 | epoch avg. loss: 0.079 | test avg. loss: 3.670
Epochs: 12613 | epoch avg. loss: 0.067 | test avg. loss: 3.661
Epochs: 12614 | epoch avg. loss: 0.066 | test avg. loss: 3.878


 25%|██▌       | 12618/50000 [18:51<54:29, 11.43it/s]  

Epochs: 12615 | epoch avg. loss: 0.098 | test avg. loss: 3.794
Epochs: 12616 | epoch avg. loss: 0.320 | test avg. loss: 3.826
Epochs: 12617 | epoch avg. loss: 0.120 | test avg. loss: 3.932


 25%|██▌       | 12620/50000 [18:51<52:05, 11.96it/s]

Epochs: 12618 | epoch avg. loss: 0.117 | test avg. loss: 3.749
Epochs: 12619 | epoch avg. loss: 0.259 | test avg. loss: 3.938
Epochs: 12620 | epoch avg. loss: 0.129 | test avg. loss: 3.987


 25%|██▌       | 12624/50000 [18:51<52:09, 11.94it/s]

Epochs: 12621 | epoch avg. loss: 0.083 | test avg. loss: 3.826
Epochs: 12622 | epoch avg. loss: 0.082 | test avg. loss: 3.963
Epochs: 12623 | epoch avg. loss: 0.068 | test avg. loss: 3.797


 25%|██▌       | 12626/50000 [18:52<50:40, 12.29it/s]

Epochs: 12624 | epoch avg. loss: 0.060 | test avg. loss: 3.845
Epochs: 12625 | epoch avg. loss: 0.057 | test avg. loss: 3.813
Epochs: 12626 | epoch avg. loss: 0.055 | test avg. loss: 3.790


 25%|██▌       | 12630/50000 [18:52<52:59, 11.75it/s]

Epochs: 12627 | epoch avg. loss: 0.057 | test avg. loss: 3.929
Epochs: 12628 | epoch avg. loss: 0.069 | test avg. loss: 3.799
Epochs: 12629 | epoch avg. loss: 0.047 | test avg. loss: 3.755


 25%|██▌       | 12632/50000 [18:52<51:54, 12.00it/s]

Epochs: 12630 | epoch avg. loss: 0.048 | test avg. loss: 3.786
Epochs: 12631 | epoch avg. loss: 0.085 | test avg. loss: 3.883
Epochs: 12632 | epoch avg. loss: 0.088 | test avg. loss: 3.708


 25%|██▌       | 12636/50000 [18:52<50:43, 12.28it/s]

Epochs: 12633 | epoch avg. loss: 0.078 | test avg. loss: 3.742
Epochs: 12634 | epoch avg. loss: 0.061 | test avg. loss: 4.015
Epochs: 12635 | epoch avg. loss: 0.092 | test avg. loss: 3.718


 25%|██▌       | 12638/50000 [18:53<50:40, 12.29it/s]

Epochs: 12636 | epoch avg. loss: 0.140 | test avg. loss: 3.742
Epochs: 12637 | epoch avg. loss: 0.094 | test avg. loss: 4.133
Epochs: 12638 | epoch avg. loss: 0.178 | test avg. loss: 3.788


 25%|██▌       | 12642/50000 [18:53<48:29, 12.84it/s]

Epochs: 12639 | epoch avg. loss: 0.155 | test avg. loss: 3.998
Epochs: 12640 | epoch avg. loss: 0.102 | test avg. loss: 4.019
Epochs: 12641 | epoch avg. loss: 0.079 | test avg. loss: 3.805


 25%|██▌       | 12644/50000 [18:53<48:23, 12.87it/s]

Epochs: 12642 | epoch avg. loss: 0.088 | test avg. loss: 4.233
Epochs: 12643 | epoch avg. loss: 0.186 | test avg. loss: 3.687
Epochs: 12644 | epoch avg. loss: 0.319 | test avg. loss: 3.788


 25%|██▌       | 12648/50000 [18:53<50:53, 12.23it/s]

Epochs: 12645 | epoch avg. loss: 0.233 | test avg. loss: 4.580
Epochs: 12646 | epoch avg. loss: 0.407 | test avg. loss: 3.879
Epochs: 12647 | epoch avg. loss: 0.212 | test avg. loss: 3.949


 25%|██▌       | 12650/50000 [18:54<53:46, 11.57it/s]

Epochs: 12648 | epoch avg. loss: 0.117 | test avg. loss: 4.065
Epochs: 12649 | epoch avg. loss: 0.117 | test avg. loss: 3.774
Epochs: 12650 | epoch avg. loss: 0.091 | test avg. loss: 4.262


 25%|██▌       | 12654/50000 [18:54<49:16, 12.63it/s]

Epochs: 12651 | epoch avg. loss: 0.266 | test avg. loss: 3.640
Epochs: 12652 | epoch avg. loss: 0.099 | test avg. loss: 3.731
Epochs: 12653 | epoch avg. loss: 0.135 | test avg. loss: 3.935
Epochs: 12654 | epoch avg. loss: 0.113 | test avg. loss: 3.904


 25%|██▌       | 12658/50000 [18:54<49:32, 12.56it/s]

Epochs: 12655 | epoch avg. loss: 0.202 | test avg. loss: 4.439
Epochs: 12656 | epoch avg. loss: 0.294 | test avg. loss: 3.824
Epochs: 12657 | epoch avg. loss: 0.152 | test avg. loss: 3.737


 25%|██▌       | 12660/50000 [18:54<48:04, 12.94it/s]

Epochs: 12658 | epoch avg. loss: 0.126 | test avg. loss: 4.122
Epochs: 12659 | epoch avg. loss: 0.139 | test avg. loss: 3.962
Epochs: 12660 | epoch avg. loss: 0.120 | test avg. loss: 4.338


 25%|██▌       | 12664/50000 [18:55<50:51, 12.24it/s]

Epochs: 12661 | epoch avg. loss: 0.323 | test avg. loss: 4.186
Epochs: 12662 | epoch avg. loss: 0.183 | test avg. loss: 3.778
Epochs: 12663 | epoch avg. loss: 0.341 | test avg. loss: 3.903


 25%|██▌       | 12666/50000 [18:55<49:26, 12.59it/s]

Epochs: 12664 | epoch avg. loss: 0.230 | test avg. loss: 3.984
Epochs: 12665 | epoch avg. loss: 0.131 | test avg. loss: 4.218
Epochs: 12666 | epoch avg. loss: 0.214 | test avg. loss: 4.783


 25%|██▌       | 12670/50000 [18:55<46:35, 13.35it/s]

Epochs: 12667 | epoch avg. loss: 0.265 | test avg. loss: 4.169
Epochs: 12668 | epoch avg. loss: 0.257 | test avg. loss: 4.077
Epochs: 12669 | epoch avg. loss: 0.142 | test avg. loss: 4.308


 25%|██▌       | 12672/50000 [18:55<45:59, 13.53it/s]

Epochs: 12670 | epoch avg. loss: 0.111 | test avg. loss: 3.901
Epochs: 12671 | epoch avg. loss: 0.220 | test avg. loss: 3.905
Epochs: 12672 | epoch avg. loss: 0.072 | test avg. loss: 4.130


 25%|██▌       | 12676/50000 [18:56<47:52, 12.99it/s]

Epochs: 12673 | epoch avg. loss: 0.088 | test avg. loss: 3.952
Epochs: 12674 | epoch avg. loss: 0.142 | test avg. loss: 4.107
Epochs: 12675 | epoch avg. loss: 0.124 | test avg. loss: 4.018


 25%|██▌       | 12678/50000 [18:56<50:52, 12.23it/s]

Epochs: 12676 | epoch avg. loss: 0.104 | test avg. loss: 4.054
Epochs: 12677 | epoch avg. loss: 0.116 | test avg. loss: 4.654
Epochs: 12678 | epoch avg. loss: 0.330 | test avg. loss: 3.862


 25%|██▌       | 12682/50000 [18:56<50:12, 12.39it/s]

Epochs: 12679 | epoch avg. loss: 0.129 | test avg. loss: 4.016
Epochs: 12680 | epoch avg. loss: 0.245 | test avg. loss: 3.800
Epochs: 12681 | epoch avg. loss: 0.134 | test avg. loss: 3.679


 25%|██▌       | 12684/50000 [18:56<50:08, 12.40it/s]

Epochs: 12682 | epoch avg. loss: 0.072 | test avg. loss: 3.949
Epochs: 12683 | epoch avg. loss: 0.045 | test avg. loss: 4.064
Epochs: 12684 | epoch avg. loss: 0.073 | test avg. loss: 4.383


 25%|██▌       | 12688/50000 [18:57<48:17, 12.88it/s]

Epochs: 12685 | epoch avg. loss: 0.183 | test avg. loss: 4.014
Epochs: 12686 | epoch avg. loss: 0.057 | test avg. loss: 3.801
Epochs: 12687 | epoch avg. loss: 0.054 | test avg. loss: 3.780


 25%|██▌       | 12690/50000 [18:57<47:55, 12.97it/s]

Epochs: 12688 | epoch avg. loss: 0.061 | test avg. loss: 4.087
Epochs: 12689 | epoch avg. loss: 0.097 | test avg. loss: 4.028
Epochs: 12690 | epoch avg. loss: 0.118 | test avg. loss: 4.132


 25%|██▌       | 12694/50000 [18:57<47:20, 13.13it/s]

Epochs: 12691 | epoch avg. loss: 0.076 | test avg. loss: 4.211
Epochs: 12692 | epoch avg. loss: 0.091 | test avg. loss: 3.887
Epochs: 12693 | epoch avg. loss: 0.059 | test avg. loss: 3.740


 25%|██▌       | 12696/50000 [18:57<47:07, 13.19it/s]

Epochs: 12694 | epoch avg. loss: 0.082 | test avg. loss: 3.895
Epochs: 12695 | epoch avg. loss: 0.049 | test avg. loss: 4.042
Epochs: 12696 | epoch avg. loss: 0.045 | test avg. loss: 4.144


 25%|██▌       | 12698/50000 [18:57<49:41, 12.51it/s]

Epochs: 12697 | epoch avg. loss: 0.060 | test avg. loss: 4.261
Epochs: 12698 | epoch avg. loss: 0.083 | test avg. loss: 3.897
Epochs: 12699 | epoch avg. loss: 0.105 | test avg. loss: 3.916


 25%|██▌       | 12704/50000 [18:59<1:49:14,  5.69it/s]

Epochs: 12700 | epoch avg. loss: 0.054 | test avg. loss: 4.054
Epochs: 12701 | epoch avg. loss: 0.057 | test avg. loss: 4.031
Epochs: 12702 | epoch avg. loss: 0.097 | test avg. loss: 4.509
Epochs: 12703 | epoch avg. loss: 0.169 | test avg. loss: 3.990


 25%|██▌       | 12708/50000 [18:59<1:14:04,  8.39it/s]

Epochs: 12704 | epoch avg. loss: 0.176 | test avg. loss: 3.978
Epochs: 12705 | epoch avg. loss: 0.218 | test avg. loss: 4.074
Epochs: 12706 | epoch avg. loss: 0.184 | test avg. loss: 3.823
Epochs: 12707 | epoch avg. loss: 0.231 | test avg. loss: 4.173


 25%|██▌       | 12712/50000 [19:00<55:48, 11.14it/s]  

Epochs: 12708 | epoch avg. loss: 0.143 | test avg. loss: 4.130
Epochs: 12709 | epoch avg. loss: 0.164 | test avg. loss: 4.251
Epochs: 12710 | epoch avg. loss: 0.116 | test avg. loss: 4.450
Epochs: 12711 | epoch avg. loss: 0.136 | test avg. loss: 3.879


 25%|██▌       | 12716/50000 [19:00<47:30, 13.08it/s]

Epochs: 12712 | epoch avg. loss: 0.112 | test avg. loss: 3.688
Epochs: 12713 | epoch avg. loss: 0.078 | test avg. loss: 3.851
Epochs: 12714 | epoch avg. loss: 0.066 | test avg. loss: 4.005
Epochs: 12715 | epoch avg. loss: 0.075 | test avg. loss: 4.316


 25%|██▌       | 12718/50000 [19:00<46:35, 13.34it/s]

Epochs: 12716 | epoch avg. loss: 0.075 | test avg. loss: 4.119
Epochs: 12717 | epoch avg. loss: 0.095 | test avg. loss: 3.978
Epochs: 12718 | epoch avg. loss: 0.118 | test avg. loss: 4.042


 25%|██▌       | 12722/50000 [19:00<45:42, 13.59it/s]

Epochs: 12719 | epoch avg. loss: 0.077 | test avg. loss: 3.935
Epochs: 12720 | epoch avg. loss: 0.041 | test avg. loss: 4.092
Epochs: 12721 | epoch avg. loss: 0.081 | test avg. loss: 3.994


 25%|██▌       | 12724/50000 [19:01<45:21, 13.70it/s]

Epochs: 12722 | epoch avg. loss: 0.040 | test avg. loss: 4.051
Epochs: 12723 | epoch avg. loss: 0.041 | test avg. loss: 3.989
Epochs: 12724 | epoch avg. loss: 0.064 | test avg. loss: 4.268
Epochs: 12725 | epoch avg. loss: 0.103 | test avg. loss: 4.098


 25%|██▌       | 12730/50000 [19:01<42:12, 14.71it/s]

Epochs: 12726 | epoch avg. loss: 0.061 | test avg. loss: 4.033
Epochs: 12727 | epoch avg. loss: 0.060 | test avg. loss: 4.082
Epochs: 12728 | epoch avg. loss: 0.082 | test avg. loss: 3.971
Epochs: 12729 | epoch avg. loss: 0.330 | test avg. loss: 4.215


 25%|██▌       | 12734/50000 [19:01<42:04, 14.76it/s]

Epochs: 12730 | epoch avg. loss: 0.234 | test avg. loss: 4.092
Epochs: 12731 | epoch avg. loss: 0.119 | test avg. loss: 4.103
Epochs: 12732 | epoch avg. loss: 0.103 | test avg. loss: 4.174
Epochs: 12733 | epoch avg. loss: 0.075 | test avg. loss: 3.946


 25%|██▌       | 12736/50000 [19:01<43:17, 14.35it/s]

Epochs: 12734 | epoch avg. loss: 0.060 | test avg. loss: 4.082
Epochs: 12735 | epoch avg. loss: 0.175 | test avg. loss: 3.857
Epochs: 12736 | epoch avg. loss: 0.066 | test avg. loss: 3.761


 25%|██▌       | 12740/50000 [19:02<46:29, 13.36it/s]

Epochs: 12737 | epoch avg. loss: 0.065 | test avg. loss: 3.874
Epochs: 12738 | epoch avg. loss: 0.186 | test avg. loss: 4.202
Epochs: 12739 | epoch avg. loss: 0.144 | test avg. loss: 4.431


                                                     

Epochs: 12740 | epoch avg. loss: 0.122 | test avg. loss: 4.188
Epochs: 12741 | epoch avg. loss: 0.274 | test avg. loss: 3.999


 25%|██▌       | 12744/50000 [19:02<52:27, 11.84it/s]

Epochs: 12742 | epoch avg. loss: 0.055 | test avg. loss: 3.821
Epochs: 12743 | epoch avg. loss: 0.084 | test avg. loss: 3.799
Epochs: 12744 | epoch avg. loss: 0.136 | test avg. loss: 4.224


 25%|██▌       | 12748/50000 [19:02<48:43, 12.74it/s]

Epochs: 12745 | epoch avg. loss: 0.210 | test avg. loss: 3.986
Epochs: 12746 | epoch avg. loss: 0.087 | test avg. loss: 3.972
Epochs: 12747 | epoch avg. loss: 0.188 | test avg. loss: 4.229


 26%|██▌       | 12750/50000 [19:03<51:19, 12.10it/s]

Epochs: 12748 | epoch avg. loss: 0.165 | test avg. loss: 3.982
Epochs: 12749 | epoch avg. loss: 0.079 | test avg. loss: 3.966
Epochs: 12750 | epoch avg. loss: 0.057 | test avg. loss: 3.798


 26%|██▌       | 12754/50000 [19:03<46:16, 13.42it/s]

Epochs: 12751 | epoch avg. loss: 0.059 | test avg. loss: 3.705
Epochs: 12752 | epoch avg. loss: 0.060 | test avg. loss: 3.874
Epochs: 12753 | epoch avg. loss: 0.064 | test avg. loss: 3.943
Epochs: 12754 | epoch avg. loss: 0.260 | test avg. loss: 4.066


 26%|██▌       | 12758/50000 [19:03<43:18, 14.33it/s]

Epochs: 12755 | epoch avg. loss: 0.095 | test avg. loss: 4.398
Epochs: 12756 | epoch avg. loss: 0.141 | test avg. loss: 4.060
Epochs: 12757 | epoch avg. loss: 0.243 | test avg. loss: 3.958
Epochs: 12758 | epoch avg. loss: 0.085 | test avg. loss: 3.699


 26%|██▌       | 12760/50000 [19:03<42:32, 14.59it/s]

Epochs: 12759 | epoch avg. loss: 0.214 | test avg. loss: 3.866
Epochs: 12760 | epoch avg. loss: 0.371 | test avg. loss: 4.384


 26%|██▌       | 12762/50000 [19:04<51:50, 11.97it/s]

Epochs: 12761 | epoch avg. loss: 0.321 | test avg. loss: 4.039
Epochs: 12762 | epoch avg. loss: 0.174 | test avg. loss: 4.312


 26%|██▌       | 12766/50000 [19:04<57:30, 10.79it/s]

Epochs: 12763 | epoch avg. loss: 0.114 | test avg. loss: 4.224
Epochs: 12764 | epoch avg. loss: 0.187 | test avg. loss: 4.038
Epochs: 12765 | epoch avg. loss: 0.152 | test avg. loss: 4.202


 26%|██▌       | 12768/50000 [19:04<56:18, 11.02it/s]

Epochs: 12766 | epoch avg. loss: 0.169 | test avg. loss: 3.701
Epochs: 12767 | epoch avg. loss: 0.139 | test avg. loss: 3.809
Epochs: 12768 | epoch avg. loss: 0.077 | test avg. loss: 3.822


 26%|██▌       | 12772/50000 [19:04<54:57, 11.29it/s]

Epochs: 12769 | epoch avg. loss: 0.121 | test avg. loss: 4.025
Epochs: 12770 | epoch avg. loss: 0.126 | test avg. loss: 4.221
Epochs: 12771 | epoch avg. loss: 0.057 | test avg. loss: 3.979


 26%|██▌       | 12774/50000 [19:05<53:03, 11.69it/s]

Epochs: 12772 | epoch avg. loss: 0.124 | test avg. loss: 4.064
Epochs: 12773 | epoch avg. loss: 0.183 | test avg. loss: 4.180
Epochs: 12774 | epoch avg. loss: 0.097 | test avg. loss: 4.202


 26%|██▌       | 12778/50000 [19:05<47:49, 12.97it/s]

Epochs: 12775 | epoch avg. loss: 0.126 | test avg. loss: 4.564
Epochs: 12776 | epoch avg. loss: 0.292 | test avg. loss: 4.285
Epochs: 12777 | epoch avg. loss: 0.127 | test avg. loss: 3.800
Epochs: 12778 | epoch avg. loss: 0.073 | test avg. loss: 3.767


 26%|██▌       | 12782/50000 [19:05<43:30, 14.26it/s]

Epochs: 12779 | epoch avg. loss: 0.084 | test avg. loss: 3.915
Epochs: 12780 | epoch avg. loss: 0.139 | test avg. loss: 4.163
Epochs: 12781 | epoch avg. loss: 0.076 | test avg. loss: 4.338
Epochs: 12782 | epoch avg. loss: 0.082 | test avg. loss: 4.161


 26%|██▌       | 12786/50000 [19:05<43:31, 14.25it/s]

Epochs: 12783 | epoch avg. loss: 0.053 | test avg. loss: 3.850
Epochs: 12784 | epoch avg. loss: 0.045 | test avg. loss: 3.838
Epochs: 12785 | epoch avg. loss: 0.087 | test avg. loss: 3.801


 26%|██▌       | 12788/50000 [19:06<45:33, 13.62it/s]

Epochs: 12786 | epoch avg. loss: 0.047 | test avg. loss: 3.935
Epochs: 12787 | epoch avg. loss: 0.048 | test avg. loss: 3.975
Epochs: 12788 | epoch avg. loss: 0.047 | test avg. loss: 4.094


 26%|██▌       | 12792/50000 [19:06<45:38, 13.59it/s]

Epochs: 12789 | epoch avg. loss: 0.070 | test avg. loss: 4.083
Epochs: 12790 | epoch avg. loss: 0.072 | test avg. loss: 3.974
Epochs: 12791 | epoch avg. loss: 0.286 | test avg. loss: 4.304


 26%|██▌       | 12794/50000 [19:06<43:27, 14.27it/s]

Epochs: 12792 | epoch avg. loss: 0.383 | test avg. loss: 4.089
Epochs: 12793 | epoch avg. loss: 0.285 | test avg. loss: 4.088
Epochs: 12794 | epoch avg. loss: 0.162 | test avg. loss: 4.227
Epochs: 12795 | epoch avg. loss: 0.145 | test avg. loss: 3.694


 26%|██▌       | 12798/50000 [19:06<43:01, 14.41it/s]

Epochs: 12796 | epoch avg. loss: 0.333 | test avg. loss: 3.538
Epochs: 12797 | epoch avg. loss: 0.190 | test avg. loss: 4.333
Epochs: 12798 | epoch avg. loss: 0.362 | test avg. loss: 3.870
Epochs: 12799 | epoch avg. loss: 0.318 | test avg. loss: 3.961


 26%|██▌       | 12804/50000 [19:08<1:40:22,  6.18it/s]

Epochs: 12800 | epoch avg. loss: 0.120 | test avg. loss: 4.265
Epochs: 12801 | epoch avg. loss: 0.118 | test avg. loss: 3.980
Epochs: 12802 | epoch avg. loss: 0.391 | test avg. loss: 4.051
Epochs: 12803 | epoch avg. loss: 0.420 | test avg. loss: 4.103


 26%|██▌       | 12808/50000 [19:08<1:08:28,  9.05it/s]

Epochs: 12804 | epoch avg. loss: 0.507 | test avg. loss: 6.541
Epochs: 12805 | epoch avg. loss: 3.410 | test avg. loss: 5.525
Epochs: 12806 | epoch avg. loss: 2.437 | test avg. loss: 5.348
Epochs: 12807 | epoch avg. loss: 1.161 | test avg. loss: 5.916


 26%|██▌       | 12810/50000 [19:08<1:01:50, 10.02it/s]

Epochs: 12808 | epoch avg. loss: 2.004 | test avg. loss: 6.515
Epochs: 12809 | epoch avg. loss: 2.064 | test avg. loss: 4.018
Epochs: 12810 | epoch avg. loss: 0.750 | test avg. loss: 4.269


 26%|██▌       | 12814/50000 [19:09<57:33, 10.77it/s]

Epochs: 12811 | epoch avg. loss: 0.944 | test avg. loss: 4.769
Epochs: 12812 | epoch avg. loss: 0.616 | test avg. loss: 4.565
Epochs: 12813 | epoch avg. loss: 0.479 | test avg. loss: 4.287




Epochs: 12814 | epoch avg. loss: 0.295 | test avg. loss: 3.716
Epochs: 12815 | epoch avg. loss: 0.706 | test avg. loss: 3.414
Epochs: 12816 | epoch avg. loss: 0.751 | test avg. loss: 4.224


 26%|██▌       | 12820/50000 [19:09<49:07, 12.61it/s]

Epochs: 12817 | epoch avg. loss: 0.513 | test avg. loss: 4.858
Epochs: 12818 | epoch avg. loss: 1.236 | test avg. loss: 5.504
Epochs: 12819 | epoch avg. loss: 0.987 | test avg. loss: 4.866


 26%|██▌       | 12822/50000 [19:09<46:40, 13.27it/s]

Epochs: 12820 | epoch avg. loss: 0.945 | test avg. loss: 4.517
Epochs: 12821 | epoch avg. loss: 0.773 | test avg. loss: 5.239
Epochs: 12822 | epoch avg. loss: 0.935 | test avg. loss: 5.317
Epochs: 12823 | epoch avg. loss: 2.917 | test avg. loss: 5.016


 26%|██▌       | 12826/50000 [19:09<46:44, 13.25it/s]

Epochs: 12824 | epoch avg. loss: 2.498 | test avg. loss: 3.356
Epochs: 12825 | epoch avg. loss: 0.826 | test avg. loss: 3.789
Epochs: 12826 | epoch avg. loss: 0.802 | test avg. loss: 5.776


 26%|██▌       | 12830/50000 [19:10<51:44, 11.97it/s]

Epochs: 12827 | epoch avg. loss: 1.211 | test avg. loss: 4.848
Epochs: 12828 | epoch avg. loss: 1.010 | test avg. loss: 5.649
Epochs: 12829 | epoch avg. loss: 1.348 | test avg. loss: 4.089


 26%|██▌       | 12832/50000 [19:10<51:34, 12.01it/s]

Epochs: 12830 | epoch avg. loss: 0.965 | test avg. loss: 4.215
Epochs: 12831 | epoch avg. loss: 0.646 | test avg. loss: 4.325
Epochs: 12832 | epoch avg. loss: 0.592 | test avg. loss: 4.643




Epochs: 12833 | epoch avg. loss: 0.751 | test avg. loss: 6.423
Epochs: 12834 | epoch avg. loss: 1.321 | test avg. loss: 4.838
Epochs: 12835 | epoch avg. loss: 0.952 | test avg. loss: 6.000
Epochs: 12836 | epoch avg. loss: 1.274 | test avg. loss: 4.026

 26%|██▌       | 12838/50000 [19:10<47:16, 13.10it/s]


Epochs: 12837 | epoch avg. loss: 1.379 | test avg. loss: 3.984
Epochs: 12838 | epoch avg. loss: 0.510 | test avg. loss: 4.339


 26%|██▌       | 12842/50000 [19:11<45:45, 13.53it/s]

Epochs: 12839 | epoch avg. loss: 0.398 | test avg. loss: 4.394
Epochs: 12840 | epoch avg. loss: 0.535 | test avg. loss: 5.188
Epochs: 12841 | epoch avg. loss: 0.628 | test avg. loss: 4.362
Epochs: 12842 | epoch avg. loss: 0.328 | test avg. loss: 4.299


 26%|██▌       | 12846/50000 [19:11<42:17, 14.64it/s]

Epochs: 12843 | epoch avg. loss: 0.170 | test avg. loss: 3.867
Epochs: 12844 | epoch avg. loss: 0.270 | test avg. loss: 3.788
Epochs: 12845 | epoch avg. loss: 0.174 | test avg. loss: 3.865


 26%|██▌       | 12848/50000 [19:11<44:24, 13.95it/s]

Epochs: 12846 | epoch avg. loss: 0.183 | test avg. loss: 4.009
Epochs: 12847 | epoch avg. loss: 0.252 | test avg. loss: 4.285
Epochs: 12848 | epoch avg. loss: 0.127 | test avg. loss: 4.178


 26%|██▌       | 12852/50000 [19:11<46:31, 13.31it/s]

Epochs: 12849 | epoch avg. loss: 0.167 | test avg. loss: 4.092
Epochs: 12850 | epoch avg. loss: 0.074 | test avg. loss: 3.834
Epochs: 12851 | epoch avg. loss: 0.090 | test avg. loss: 3.797


                                                     

Epochs: 12852 | epoch avg. loss: 0.091 | test avg. loss: 4.043
Epochs: 12853 | epoch avg. loss: 0.072 | test avg. loss: 4.168
Epochs: 12854 | epoch avg. loss: 0.059 | test avg. loss: 4.325


 26%|██▌       | 12858/50000 [19:12<46:34, 13.29it/s]

Epochs: 12855 | epoch avg. loss: 0.058 | test avg. loss: 4.252
Epochs: 12856 | epoch avg. loss: 0.068 | test avg. loss: 4.483
Epochs: 12857 | epoch avg. loss: 0.150 | test avg. loss: 4.082


 26%|██▌       | 12860/50000 [19:12<46:27, 13.33it/s]

Epochs: 12858 | epoch avg. loss: 0.084 | test avg. loss: 4.242
Epochs: 12859 | epoch avg. loss: 0.145 | test avg. loss: 3.957
Epochs: 12860 | epoch avg. loss: 0.081 | test avg. loss: 4.065


 26%|██▌       | 12864/50000 [19:12<45:41, 13.54it/s]

Epochs: 12861 | epoch avg. loss: 0.104 | test avg. loss: 4.043
Epochs: 12862 | epoch avg. loss: 0.095 | test avg. loss: 4.185
Epochs: 12863 | epoch avg. loss: 0.094 | test avg. loss: 4.318


 26%|██▌       | 12866/50000 [19:13<49:14, 12.57it/s]

Epochs: 12864 | epoch avg. loss: 0.102 | test avg. loss: 4.120
Epochs: 12865 | epoch avg. loss: 0.132 | test avg. loss: 4.486
Epochs: 12866 | epoch avg. loss: 0.156 | test avg. loss: 4.260


 26%|██▌       | 12870/50000 [19:13<49:39, 12.46it/s]

Epochs: 12867 | epoch avg. loss: 0.384 | test avg. loss: 4.866
Epochs: 12868 | epoch avg. loss: 0.452 | test avg. loss: 4.083
Epochs: 12869 | epoch avg. loss: 0.253 | test avg. loss: 4.222


 26%|██▌       | 12872/50000 [19:13<48:26, 12.77it/s]

Epochs: 12870 | epoch avg. loss: 0.206 | test avg. loss: 4.133
Epochs: 12871 | epoch avg. loss: 0.167 | test avg. loss: 3.999
Epochs: 12872 | epoch avg. loss: 0.251 | test avg. loss: 4.630


 26%|██▌       | 12876/50000 [19:13<46:53, 13.19it/s]

Epochs: 12873 | epoch avg. loss: 0.373 | test avg. loss: 4.030
Epochs: 12874 | epoch avg. loss: 0.357 | test avg. loss: 4.185
Epochs: 12875 | epoch avg. loss: 0.151 | test avg. loss: 3.986


 26%|██▌       | 12878/50000 [19:13<47:52, 12.92it/s]

Epochs: 12876 | epoch avg. loss: 0.157 | test avg. loss: 4.167
Epochs: 12877 | epoch avg. loss: 0.306 | test avg. loss: 4.085
Epochs: 12878 | epoch avg. loss: 0.142 | test avg. loss: 3.784




Epochs: 12879 | epoch avg. loss: 0.125 | test avg. loss: 4.263
Epochs: 12880 | epoch avg. loss: 0.189 | test avg. loss: 4.008
Epochs: 12881 | epoch avg. loss: 0.130 | test avg. loss: 4.537


 26%|██▌       | 12884/50000 [19:14<45:40, 13.54it/s]

Epochs: 12882 | epoch avg. loss: 0.216 | test avg. loss: 4.116
Epochs: 12883 | epoch avg. loss: 0.134 | test avg. loss: 4.187
Epochs: 12884 | epoch avg. loss: 0.139 | test avg. loss: 4.131




Epochs: 12885 | epoch avg. loss: 0.098 | test avg. loss: 3.906
Epochs: 12886 | epoch avg. loss: 0.083 | test avg. loss: 4.082
Epochs: 12887 | epoch avg. loss: 0.074 | test avg. loss: 4.032


 26%|██▌       | 12890/50000 [19:14<45:03, 13.73it/s]

Epochs: 12888 | epoch avg. loss: 0.081 | test avg. loss: 4.197
Epochs: 12889 | epoch avg. loss: 0.090 | test avg. loss: 4.243
Epochs: 12890 | epoch avg. loss: 0.072 | test avg. loss: 4.003


 26%|██▌       | 12894/50000 [19:15<45:41, 13.53it/s]

Epochs: 12891 | epoch avg. loss: 0.064 | test avg. loss: 4.028
Epochs: 12892 | epoch avg. loss: 0.046 | test avg. loss: 4.076
Epochs: 12893 | epoch avg. loss: 0.047 | test avg. loss: 4.120


 26%|██▌       | 12898/50000 [19:15<42:02, 14.71it/s]

Epochs: 12894 | epoch avg. loss: 0.041 | test avg. loss: 4.202
Epochs: 12895 | epoch avg. loss: 0.042 | test avg. loss: 4.139
Epochs: 12896 | epoch avg. loss: 0.044 | test avg. loss: 4.134
Epochs: 12897 | epoch avg. loss: 0.044 | test avg. loss: 3.969


 26%|██▌       | 12898/50000 [19:15<42:02, 14.71it/s]

Epochs: 12898 | epoch avg. loss: 0.069 | test avg. loss: 4.199
Epochs: 12899 | epoch avg. loss: 0.104 | test avg. loss: 3.967


 26%|██▌       | 12902/50000 [19:17<2:25:22,  4.25it/s]

Epochs: 12900 | epoch avg. loss: 0.126 | test avg. loss: 4.078
Epochs: 12901 | epoch avg. loss: 0.093 | test avg. loss: 4.255
Epochs: 12902 | epoch avg. loss: 0.102 | test avg. loss: 4.055


 26%|██▌       | 12906/50000 [19:17<1:36:01,  6.44it/s]

Epochs: 12903 | epoch avg. loss: 0.057 | test avg. loss: 4.137
Epochs: 12904 | epoch avg. loss: 0.044 | test avg. loss: 4.033
Epochs: 12905 | epoch avg. loss: 0.065 | test avg. loss: 4.131




Epochs: 12906 | epoch avg. loss: 0.052 | test avg. loss: 4.015
Epochs: 12907 | epoch avg. loss: 0.053 | test avg. loss: 4.107
Epochs: 12908 | epoch avg. loss: 0.059 | test avg. loss: 4.027


 26%|██▌       | 12912/50000 [19:17<1:02:23,  9.91it/s]

Epochs: 12909 | epoch avg. loss: 0.046 | test avg. loss: 4.094
Epochs: 12910 | epoch avg. loss: 0.059 | test avg. loss: 4.154
Epochs: 12911 | epoch avg. loss: 0.051 | test avg. loss: 4.135


 26%|██▌       | 12914/50000 [19:18<58:05, 10.64it/s]

Epochs: 12912 | epoch avg. loss: 0.063 | test avg. loss: 4.152
Epochs: 12913 | epoch avg. loss: 0.088 | test avg. loss: 4.106
Epochs: 12914 | epoch avg. loss: 0.264 | test avg. loss: 4.803


 26%|██▌       | 12918/50000 [19:18<55:54, 11.06it/s]

Epochs: 12915 | epoch avg. loss: 0.342 | test avg. loss: 4.092
Epochs: 12916 | epoch avg. loss: 0.386 | test avg. loss: 4.353
Epochs: 12917 | epoch avg. loss: 0.375 | test avg. loss: 3.974


 26%|██▌       | 12920/50000 [19:18<54:18, 11.38it/s]

Epochs: 12918 | epoch avg. loss: 0.527 | test avg. loss: 3.926
Epochs: 12919 | epoch avg. loss: 0.323 | test avg. loss: 4.714
Epochs: 12920 | epoch avg. loss: 0.411 | test avg. loss: 4.221


 26%|██▌       | 12924/50000 [19:18<51:41, 11.96it/s]

Epochs: 12921 | epoch avg. loss: 0.369 | test avg. loss: 4.798
Epochs: 12922 | epoch avg. loss: 0.402 | test avg. loss: 4.234
Epochs: 12923 | epoch avg. loss: 0.652 | test avg. loss: 4.157


 26%|██▌       | 12926/50000 [19:19<49:47, 12.41it/s]

Epochs: 12924 | epoch avg. loss: 0.205 | test avg. loss: 4.586
Epochs: 12925 | epoch avg. loss: 0.190 | test avg. loss: 4.202
Epochs: 12926 | epoch avg. loss: 0.132 | test avg. loss: 4.060


 26%|██▌       | 12930/50000 [19:19<46:50, 13.19it/s]

Epochs: 12927 | epoch avg. loss: 0.054 | test avg. loss: 3.837
Epochs: 12928 | epoch avg. loss: 0.087 | test avg. loss: 3.968
Epochs: 12929 | epoch avg. loss: 0.059 | test avg. loss: 4.012


 26%|██▌       | 12934/50000 [19:19<43:20, 14.25it/s]

Epochs: 12930 | epoch avg. loss: 0.083 | test avg. loss: 4.144
Epochs: 12931 | epoch avg. loss: 0.054 | test avg. loss: 4.219
Epochs: 12932 | epoch avg. loss: 0.060 | test avg. loss: 4.065
Epochs: 12933 | epoch avg. loss: 0.075 | test avg. loss: 4.115


 26%|██▌       | 12936/50000 [19:19<41:53, 14.74it/s]

Epochs: 12934 | epoch avg. loss: 0.052 | test avg. loss: 3.974
Epochs: 12935 | epoch avg. loss: 0.041 | test avg. loss: 4.164
Epochs: 12936 | epoch avg. loss: 0.064 | test avg. loss: 4.018


 26%|██▌       | 12940/50000 [19:20<44:24, 13.91it/s]

Epochs: 12937 | epoch avg. loss: 0.117 | test avg. loss: 4.364
Epochs: 12938 | epoch avg. loss: 0.155 | test avg. loss: 4.184
Epochs: 12939 | epoch avg. loss: 0.075 | test avg. loss: 4.072


 26%|██▌       | 12944/50000 [19:20<43:10, 14.30it/s]

Epochs: 12940 | epoch avg. loss: 0.082 | test avg. loss: 4.141
Epochs: 12941 | epoch avg. loss: 0.114 | test avg. loss: 3.851
Epochs: 12942 | epoch avg. loss: 0.077 | test avg. loss: 4.078
Epochs: 12943 | epoch avg. loss: 0.057 | test avg. loss: 4.159


 26%|██▌       | 12948/50000 [19:20<40:40, 15.18it/s]

Epochs: 12944 | epoch avg. loss: 0.065 | test avg. loss: 4.307
Epochs: 12945 | epoch avg. loss: 0.072 | test avg. loss: 4.305
Epochs: 12946 | epoch avg. loss: 0.065 | test avg. loss: 4.018
Epochs: 12947 | epoch avg. loss: 0.079 | test avg. loss: 4.027


 26%|██▌       | 12952/50000 [19:20<40:34, 15.22it/s]

Epochs: 12948 | epoch avg. loss: 0.054 | test avg. loss: 3.961
Epochs: 12949 | epoch avg. loss: 0.076 | test avg. loss: 4.069
Epochs: 12950 | epoch avg. loss: 0.067 | test avg. loss: 4.493
Epochs: 12951 | epoch avg. loss: 0.115 | test avg. loss: 4.207




Epochs: 12952 | epoch avg. loss: 0.239 | test avg. loss: 4.258
Epochs: 12953 | epoch avg. loss: 0.072 | test avg. loss: 4.095
Epochs: 12954 | epoch avg. loss: 0.122 | test avg. loss: 4.291


 26%|██▌       | 12958/50000 [19:21<42:58, 14.37it/s]

Epochs: 12955 | epoch avg. loss: 0.204 | test avg. loss: 4.077
Epochs: 12956 | epoch avg. loss: 0.153 | test avg. loss: 4.030
Epochs: 12957 | epoch avg. loss: 0.144 | test avg. loss: 4.231


 26%|██▌       | 12962/50000 [19:21<40:42, 15.17it/s]

Epochs: 12958 | epoch avg. loss: 0.086 | test avg. loss: 3.979
Epochs: 12959 | epoch avg. loss: 0.120 | test avg. loss: 4.280
Epochs: 12960 | epoch avg. loss: 0.148 | test avg. loss: 4.077
Epochs: 12961 | epoch avg. loss: 0.085 | test avg. loss: 4.104


 26%|██▌       | 12966/50000 [19:21<39:55, 15.46it/s]

Epochs: 12962 | epoch avg. loss: 0.083 | test avg. loss: 4.442
Epochs: 12963 | epoch avg. loss: 0.165 | test avg. loss: 4.044
Epochs: 12964 | epoch avg. loss: 0.131 | test avg. loss: 3.990
Epochs: 12965 | epoch avg. loss: 0.123 | test avg. loss: 4.337


 26%|██▌       | 12968/50000 [19:21<41:11, 14.99it/s]

Epochs: 12966 | epoch avg. loss: 0.133 | test avg. loss: 4.023
Epochs: 12967 | epoch avg. loss: 0.185 | test avg. loss: 4.332
Epochs: 12968 | epoch avg. loss: 0.111 | test avg. loss: 4.038


 26%|██▌       | 12972/50000 [19:22<42:14, 14.61it/s]

Epochs: 12969 | epoch avg. loss: 0.145 | test avg. loss: 4.065
Epochs: 12970 | epoch avg. loss: 0.076 | test avg. loss: 4.303
Epochs: 12971 | epoch avg. loss: 0.099 | test avg. loss: 4.077


 26%|██▌       | 12974/50000 [19:22<47:19, 13.04it/s]

Epochs: 12972 | epoch avg. loss: 0.056 | test avg. loss: 4.205
Epochs: 12973 | epoch avg. loss: 0.052 | test avg. loss: 3.991
Epochs: 12974 | epoch avg. loss: 0.138 | test avg. loss: 4.122


 26%|██▌       | 12978/50000 [19:22<46:11, 13.36it/s]

Epochs: 12975 | epoch avg. loss: 0.080 | test avg. loss: 4.207
Epochs: 12976 | epoch avg. loss: 0.065 | test avg. loss: 4.109
Epochs: 12977 | epoch avg. loss: 0.067 | test avg. loss: 4.334


 26%|██▌       | 12980/50000 [19:22<47:33, 12.97it/s]

Epochs: 12978 | epoch avg. loss: 0.083 | test avg. loss: 4.079
Epochs: 12979 | epoch avg. loss: 0.157 | test avg. loss: 4.128
Epochs: 12980 | epoch avg. loss: 0.080 | test avg. loss: 4.051


 26%|██▌       | 12984/50000 [19:23<45:53, 13.44it/s]

Epochs: 12981 | epoch avg. loss: 0.075 | test avg. loss: 3.944
Epochs: 12982 | epoch avg. loss: 0.070 | test avg. loss: 4.116
Epochs: 12983 | epoch avg. loss: 0.090 | test avg. loss: 4.084


 26%|██▌       | 12986/50000 [19:23<45:02, 13.69it/s]

Epochs: 12984 | epoch avg. loss: 0.046 | test avg. loss: 4.087
Epochs: 12985 | epoch avg. loss: 0.049 | test avg. loss: 4.174
Epochs: 12986 | epoch avg. loss: 0.039 | test avg. loss: 4.102


                                                     

Epochs: 12987 | epoch avg. loss: 0.086 | test avg. loss: 4.080
Epochs: 12988 | epoch avg. loss: 0.065 | test avg. loss: 4.390
Epochs: 12989 | epoch avg. loss: 0.107 | test avg. loss: 4.094


 26%|██▌       | 12992/50000 [19:23<44:15, 13.94it/s]

Epochs: 12990 | epoch avg. loss: 0.176 | test avg. loss: 4.854
Epochs: 12991 | epoch avg. loss: 0.363 | test avg. loss: 4.107
Epochs: 12992 | epoch avg. loss: 0.332 | test avg. loss: 4.020


 26%|██▌       | 12996/50000 [19:24<48:19, 12.76it/s]

Epochs: 12993 | epoch avg. loss: 0.162 | test avg. loss: 4.232
Epochs: 12994 | epoch avg. loss: 0.160 | test avg. loss: 3.984
Epochs: 12995 | epoch avg. loss: 0.058 | test avg. loss: 4.241


 26%|██▌       | 12998/50000 [19:24<46:40, 13.21it/s]

Epochs: 12996 | epoch avg. loss: 0.094 | test avg. loss: 3.981
Epochs: 12997 | epoch avg. loss: 0.067 | test avg. loss: 4.001
Epochs: 12998 | epoch avg. loss: 0.061 | test avg. loss: 4.141


 26%|██▌       | 12998/50000 [19:24<46:40, 13.21it/s]

Epochs: 12999 | epoch avg. loss: 0.070 | test avg. loss: 4.014


 26%|██▌       | 13002/50000 [19:25<2:16:41,  4.51it/s]

Epochs: 13000 | epoch avg. loss: 0.087 | test avg. loss: 4.393
Epochs: 13001 | epoch avg. loss: 0.209 | test avg. loss: 4.058
Epochs: 13002 | epoch avg. loss: 0.051 | test avg. loss: 4.050




Epochs: 13003 | epoch avg. loss: 0.072 | test avg. loss: 4.077
Epochs: 13004 | epoch avg. loss: 0.055 | test avg. loss: 4.114
Epochs: 13005 | epoch avg. loss: 0.062 | test avg. loss: 4.220


                                                       

Epochs: 13006 | epoch avg. loss: 0.046 | test avg. loss: 4.120
Epochs: 13007 | epoch avg. loss: 0.079 | test avg. loss: 4.254
Epochs: 13008 | epoch avg. loss: 0.097 | test avg. loss: 4.017


 26%|██▌       | 13012/50000 [19:26<58:17, 10.58it/s]  

Epochs: 13009 | epoch avg. loss: 0.092 | test avg. loss: 4.038
Epochs: 13010 | epoch avg. loss: 0.037 | test avg. loss: 4.022
Epochs: 13011 | epoch avg. loss: 0.063 | test avg. loss: 4.287


 26%|██▌       | 13014/50000 [19:26<54:05, 11.40it/s]

Epochs: 13012 | epoch avg. loss: 0.184 | test avg. loss: 4.072
Epochs: 13013 | epoch avg. loss: 0.207 | test avg. loss: 4.053
Epochs: 13014 | epoch avg. loss: 0.145 | test avg. loss: 4.448


 26%|██▌       | 13018/50000 [19:27<51:26, 11.98it/s]

Epochs: 13015 | epoch avg. loss: 0.199 | test avg. loss: 3.960
Epochs: 13016 | epoch avg. loss: 0.261 | test avg. loss: 4.239
Epochs: 13017 | epoch avg. loss: 0.395 | test avg. loss: 4.054


 26%|██▌       | 13020/50000 [19:27<49:08, 12.54it/s]

Epochs: 13018 | epoch avg. loss: 0.152 | test avg. loss: 4.114
Epochs: 13019 | epoch avg. loss: 0.158 | test avg. loss: 4.790
Epochs: 13020 | epoch avg. loss: 0.282 | test avg. loss: 4.334




Epochs: 13021 | epoch avg. loss: 0.288 | test avg. loss: 4.191
Epochs: 13022 | epoch avg. loss: 0.069 | test avg. loss: 4.128
Epochs: 13023 | epoch avg. loss: 0.079 | test avg. loss: 4.022


 26%|██▌       | 13026/50000 [19:27<44:20, 13.90it/s]

Epochs: 13024 | epoch avg. loss: 0.123 | test avg. loss: 4.272
Epochs: 13025 | epoch avg. loss: 0.093 | test avg. loss: 4.137
Epochs: 13026 | epoch avg. loss: 0.055 | test avg. loss: 4.566


 26%|██▌       | 13030/50000 [19:27<44:26, 13.87it/s]

Epochs: 13027 | epoch avg. loss: 0.190 | test avg. loss: 4.165
Epochs: 13028 | epoch avg. loss: 0.124 | test avg. loss: 4.120
Epochs: 13029 | epoch avg. loss: 0.100 | test avg. loss: 4.475


 26%|██▌       | 13034/50000 [19:28<42:53, 14.36it/s]

Epochs: 13030 | epoch avg. loss: 0.187 | test avg. loss: 4.140
Epochs: 13031 | epoch avg. loss: 0.149 | test avg. loss: 4.183
Epochs: 13032 | epoch avg. loss: 0.061 | test avg. loss: 4.203
Epochs: 13033 | epoch avg. loss: 0.048 | test avg. loss: 4.069


 26%|██▌       | 13038/50000 [19:28<40:34, 15.18it/s]

Epochs: 13034 | epoch avg. loss: 0.045 | test avg. loss: 4.026
Epochs: 13035 | epoch avg. loss: 0.053 | test avg. loss: 3.978
Epochs: 13036 | epoch avg. loss: 0.055 | test avg. loss: 4.037
Epochs: 13037 | epoch avg. loss: 0.059 | test avg. loss: 4.242


 26%|██▌       | 13042/50000 [19:28<39:14, 15.70it/s]

Epochs: 13038 | epoch avg. loss: 0.086 | test avg. loss: 4.226
Epochs: 13039 | epoch avg. loss: 0.061 | test avg. loss: 4.112
Epochs: 13040 | epoch avg. loss: 0.168 | test avg. loss: 4.117
Epochs: 13041 | epoch avg. loss: 0.079 | test avg. loss: 4.215


 26%|██▌       | 13044/50000 [19:28<41:46, 14.75it/s]

Epochs: 13042 | epoch avg. loss: 0.075 | test avg. loss: 4.060
Epochs: 13043 | epoch avg. loss: 0.081 | test avg. loss: 4.274
Epochs: 13044 | epoch avg. loss: 0.061 | test avg. loss: 4.053


 26%|██▌       | 13048/50000 [19:29<45:46, 13.45it/s]

Epochs: 13045 | epoch avg. loss: 0.102 | test avg. loss: 4.346
Epochs: 13046 | epoch avg. loss: 0.250 | test avg. loss: 4.056
Epochs: 13047 | epoch avg. loss: 0.164 | test avg. loss: 4.133


 26%|██▌       | 13050/50000 [19:29<48:06, 12.80it/s]

Epochs: 13048 | epoch avg. loss: 0.257 | test avg. loss: 4.506
Epochs: 13049 | epoch avg. loss: 0.151 | test avg. loss: 4.306
Epochs: 13050 | epoch avg. loss: 0.092 | test avg. loss: 4.293


 26%|██▌       | 13054/50000 [19:29<47:53, 12.86it/s]

Epochs: 13051 | epoch avg. loss: 0.116 | test avg. loss: 4.409
Epochs: 13052 | epoch avg. loss: 0.204 | test avg. loss: 3.949
Epochs: 13053 | epoch avg. loss: 0.108 | test avg. loss: 4.156


 26%|██▌       | 13056/50000 [19:29<47:54, 12.85it/s]

Epochs: 13054 | epoch avg. loss: 0.119 | test avg. loss: 4.115
Epochs: 13055 | epoch avg. loss: 0.122 | test avg. loss: 4.225
Epochs: 13056 | epoch avg. loss: 0.116 | test avg. loss: 4.713


 26%|██▌       | 13060/50000 [19:30<47:13, 13.04it/s]

Epochs: 13057 | epoch avg. loss: 0.229 | test avg. loss: 4.120
Epochs: 13058 | epoch avg. loss: 0.303 | test avg. loss: 4.101
Epochs: 13059 | epoch avg. loss: 0.113 | test avg. loss: 4.043
Epochs: 13060 | epoch avg. loss: 0.101 | test avg. loss: 4.115


 26%|██▌       | 13064/50000 [19:30<42:11, 14.59it/s]

Epochs: 13061 | epoch avg. loss: 0.111 | test avg. loss: 4.357
Epochs: 13062 | epoch avg. loss: 0.064 | test avg. loss: 4.228
Epochs: 13063 | epoch avg. loss: 0.131 | test avg. loss: 4.427
Epochs: 13064 | epoch avg. loss: 0.155 | test avg. loss: 3.973


 26%|██▌       | 13068/50000 [19:30<39:33, 15.56it/s]

Epochs: 13065 | epoch avg. loss: 0.163 | test avg. loss: 3.876
Epochs: 13066 | epoch avg. loss: 0.118 | test avg. loss: 4.244
Epochs: 13067 | epoch avg. loss: 0.119 | test avg. loss: 4.244
Epochs: 13068 | epoch avg. loss: 0.279 | test avg. loss: 4.491


 26%|██▌       | 13072/50000 [19:30<38:44, 15.88it/s]

Epochs: 13069 | epoch avg. loss: 0.201 | test avg. loss: 4.294
Epochs: 13070 | epoch avg. loss: 0.159 | test avg. loss: 4.116
Epochs: 13071 | epoch avg. loss: 0.150 | test avg. loss: 4.205
Epochs: 13072 | epoch avg. loss: 0.145 | test avg. loss: 3.848


 26%|██▌       | 13076/50000 [19:31<44:02, 13.97it/s]

Epochs: 13073 | epoch avg. loss: 0.202 | test avg. loss: 4.155
Epochs: 13074 | epoch avg. loss: 0.224 | test avg. loss: 4.062
Epochs: 13075 | epoch avg. loss: 0.102 | test avg. loss: 4.135


 26%|██▌       | 13078/50000 [19:31<45:56, 13.39it/s]

Epochs: 13076 | epoch avg. loss: 0.098 | test avg. loss: 4.375
Epochs: 13077 | epoch avg. loss: 0.091 | test avg. loss: 4.242
Epochs: 13078 | epoch avg. loss: 0.051 | test avg. loss: 4.098


 26%|██▌       | 13082/50000 [19:31<43:50, 14.03it/s]

Epochs: 13079 | epoch avg. loss: 0.054 | test avg. loss: 4.153
Epochs: 13080 | epoch avg. loss: 0.081 | test avg. loss: 3.982
Epochs: 13081 | epoch avg. loss: 0.074 | test avg. loss: 4.135
Epochs: 13082 | epoch avg. loss: 0.110 | test avg. loss: 4.506


 26%|██▌       | 13086/50000 [19:31<42:02, 14.64it/s]

Epochs: 13083 | epoch avg. loss: 0.224 | test avg. loss: 4.137
Epochs: 13084 | epoch avg. loss: 0.081 | test avg. loss: 4.136
Epochs: 13085 | epoch avg. loss: 0.064 | test avg. loss: 4.146


 26%|██▌       | 13088/50000 [19:32<44:43, 13.76it/s]

Epochs: 13086 | epoch avg. loss: 0.065 | test avg. loss: 4.084
Epochs: 13087 | epoch avg. loss: 0.096 | test avg. loss: 4.236
Epochs: 13088 | epoch avg. loss: 0.057 | test avg. loss: 4.191


 26%|██▌       | 13092/50000 [19:32<48:22, 12.72it/s]

Epochs: 13089 | epoch avg. loss: 0.047 | test avg. loss: 4.195
Epochs: 13090 | epoch avg. loss: 0.041 | test avg. loss: 4.100
Epochs: 13091 | epoch avg. loss: 0.064 | test avg. loss: 4.076


 26%|██▌       | 13094/50000 [19:32<48:52, 12.59it/s]

Epochs: 13092 | epoch avg. loss: 0.058 | test avg. loss: 4.272
Epochs: 13093 | epoch avg. loss: 0.081 | test avg. loss: 4.150
Epochs: 13094 | epoch avg. loss: 0.038 | test avg. loss: 4.416


 26%|██▌       | 13098/50000 [19:32<46:19, 13.27it/s]

Epochs: 13095 | epoch avg. loss: 0.108 | test avg. loss: 4.039
Epochs: 13096 | epoch avg. loss: 0.210 | test avg. loss: 3.988
Epochs: 13097 | epoch avg. loss: 0.091 | test avg. loss: 4.194


 26%|██▌       | 13098/50000 [19:32<46:19, 13.27it/s]

Epochs: 13098 | epoch avg. loss: 0.087 | test avg. loss: 4.126
Epochs: 13099 | epoch avg. loss: 0.137 | test avg. loss: 4.313


 26%|██▌       | 13104/50000 [19:34<1:55:36,  5.32it/s]

Epochs: 13100 | epoch avg. loss: 0.083 | test avg. loss: 4.245
Epochs: 13101 | epoch avg. loss: 0.064 | test avg. loss: 4.091
Epochs: 13102 | epoch avg. loss: 0.086 | test avg. loss: 4.057
Epochs: 13103 | epoch avg. loss: 0.056 | test avg. loss: 4.084


 26%|██▌       | 13106/50000 [19:35<1:37:00,  6.34it/s]

Epochs: 13104 | epoch avg. loss: 0.042 | test avg. loss: 4.135
Epochs: 13105 | epoch avg. loss: 0.043 | test avg. loss: 4.102
Epochs: 13106 | epoch avg. loss: 0.039 | test avg. loss: 4.128


 26%|██▌       | 13110/50000 [19:35<1:12:08,  8.52it/s]

Epochs: 13107 | epoch avg. loss: 0.042 | test avg. loss: 4.072
Epochs: 13108 | epoch avg. loss: 0.173 | test avg. loss: 4.051
Epochs: 13109 | epoch avg. loss: 0.067 | test avg. loss: 4.190


 26%|██▌       | 13112/50000 [19:35<1:05:25,  9.40it/s]

Epochs: 13110 | epoch avg. loss: 0.062 | test avg. loss: 4.118
Epochs: 13111 | epoch avg. loss: 0.042 | test avg. loss: 4.311
Epochs: 13112 | epoch avg. loss: 0.083 | test avg. loss: 4.084


 26%|██▌       | 13116/50000 [19:35<55:51, 11.01it/s]

Epochs: 13113 | epoch avg. loss: 0.045 | test avg. loss: 4.060
Epochs: 13114 | epoch avg. loss: 0.045 | test avg. loss: 4.127
Epochs: 13115 | epoch avg. loss: 0.047 | test avg. loss: 4.174


 26%|██▌       | 13118/50000 [19:35<52:06, 11.80it/s]

Epochs: 13116 | epoch avg. loss: 0.076 | test avg. loss: 4.238
Epochs: 13117 | epoch avg. loss: 0.036 | test avg. loss: 4.152
Epochs: 13118 | epoch avg. loss: 0.054 | test avg. loss: 4.118


 26%|██▌       | 13122/50000 [19:36<52:58, 11.60it/s]

Epochs: 13119 | epoch avg. loss: 0.040 | test avg. loss: 4.098
Epochs: 13120 | epoch avg. loss: 0.042 | test avg. loss: 4.005
Epochs: 13121 | epoch avg. loss: 0.069 | test avg. loss: 4.092


 26%|██▌       | 13124/50000 [19:36<55:46, 11.02it/s]

Epochs: 13122 | epoch avg. loss: 0.059 | test avg. loss: 4.163
Epochs: 13123 | epoch avg. loss: 0.050 | test avg. loss: 4.160
Epochs: 13124 | epoch avg. loss: 0.039 | test avg. loss: 4.337


 26%|██▋       | 13128/50000 [19:36<51:20, 11.97it/s]

Epochs: 13125 | epoch avg. loss: 0.082 | test avg. loss: 4.100
Epochs: 13126 | epoch avg. loss: 0.056 | test avg. loss: 4.029
Epochs: 13127 | epoch avg. loss: 0.064 | test avg. loss: 4.454


 26%|██▋       | 13130/50000 [19:37<54:42, 11.23it/s]

Epochs: 13128 | epoch avg. loss: 0.178 | test avg. loss: 4.101
Epochs: 13129 | epoch avg. loss: 0.185 | test avg. loss: 4.201
Epochs: 13130 | epoch avg. loss: 0.129 | test avg. loss: 4.231


 26%|██▋       | 13134/50000 [19:37<51:58, 11.82it/s]

Epochs: 13131 | epoch avg. loss: 0.086 | test avg. loss: 4.076
Epochs: 13132 | epoch avg. loss: 0.067 | test avg. loss: 4.322
Epochs: 13133 | epoch avg. loss: 0.077 | test avg. loss: 4.112


 26%|██▋       | 13136/50000 [19:37<50:47, 12.10it/s]

Epochs: 13134 | epoch avg. loss: 0.082 | test avg. loss: 4.205
Epochs: 13135 | epoch avg. loss: 0.094 | test avg. loss: 4.758
Epochs: 13136 | epoch avg. loss: 0.320 | test avg. loss: 4.151


 26%|██▋       | 13140/50000 [19:37<46:59, 13.07it/s]

Epochs: 13137 | epoch avg. loss: 0.380 | test avg. loss: 4.080
Epochs: 13138 | epoch avg. loss: 0.183 | test avg. loss: 4.888
Epochs: 13139 | epoch avg. loss: 0.345 | test avg. loss: 4.444
Epochs: 13140 | epoch avg. loss: 0.849 | test avg. loss: 4.171


 26%|██▋       | 13144/50000 [19:37<45:21, 13.54it/s]

Epochs: 13141 | epoch avg. loss: 0.518 | test avg. loss: 4.882
Epochs: 13142 | epoch avg. loss: 0.499 | test avg. loss: 4.882
Epochs: 13143 | epoch avg. loss: 0.969 | test avg. loss: 5.004


 26%|██▋       | 13146/50000 [19:38<45:01, 13.64it/s]

Epochs: 13144 | epoch avg. loss: 0.877 | test avg. loss: 4.361
Epochs: 13145 | epoch avg. loss: 0.330 | test avg. loss: 4.011
Epochs: 13146 | epoch avg. loss: 0.379 | test avg. loss: 5.390


 26%|██▋       | 13150/50000 [19:38<46:23, 13.24it/s]

Epochs: 13147 | epoch avg. loss: 0.857 | test avg. loss: 3.765
Epochs: 13148 | epoch avg. loss: 0.277 | test avg. loss: 4.102
Epochs: 13149 | epoch avg. loss: 0.429 | test avg. loss: 4.293


 26%|██▋       | 13154/50000 [19:38<44:37, 13.76it/s]

Epochs: 13150 | epoch avg. loss: 0.311 | test avg. loss: 5.245
Epochs: 13151 | epoch avg. loss: 0.701 | test avg. loss: 7.014
Epochs: 13152 | epoch avg. loss: 1.346 | test avg. loss: 5.195
Epochs: 13153 | epoch avg. loss: 0.882 | test avg. loss: 4.357


 26%|██▋       | 13156/50000 [19:38<43:59, 13.96it/s]

Epochs: 13154 | epoch avg. loss: 0.476 | test avg. loss: 4.061
Epochs: 13155 | epoch avg. loss: 0.520 | test avg. loss: 4.118
Epochs: 13156 | epoch avg. loss: 0.930 | test avg. loss: 5.123


 26%|██▋       | 13160/50000 [19:39<46:02, 13.34it/s]

Epochs: 13157 | epoch avg. loss: 0.826 | test avg. loss: 4.645
Epochs: 13158 | epoch avg. loss: 1.026 | test avg. loss: 5.011
Epochs: 13159 | epoch avg. loss: 0.837 | test avg. loss: 8.868




Epochs: 13160 | epoch avg. loss: 2.675 | test avg. loss: 7.019
Epochs: 13161 | epoch avg. loss: 4.147 | test avg. loss: 5.373
Epochs: 13162 | epoch avg. loss: 2.970 | test avg. loss: 4.145


 26%|██▋       | 13166/50000 [19:39<43:19, 14.17it/s]

Epochs: 13163 | epoch avg. loss: 1.500 | test avg. loss: 4.630
Epochs: 13164 | epoch avg. loss: 1.420 | test avg. loss: 7.795
Epochs: 13165 | epoch avg. loss: 2.091 | test avg. loss: 6.956
Epochs: 13166 | epoch avg. loss: 3.237 | test avg. loss: 6.310


 26%|██▋       | 13170/50000 [19:39<46:44, 13.13it/s]

Epochs: 13167 | epoch avg. loss: 2.169 | test avg. loss: 5.059
Epochs: 13168 | epoch avg. loss: 1.887 | test avg. loss: 4.484
Epochs: 13169 | epoch avg. loss: 1.264 | test avg. loss: 4.623


 26%|██▋       | 13172/50000 [19:40<46:07, 13.31it/s]

Epochs: 13170 | epoch avg. loss: 0.724 | test avg. loss: 4.330
Epochs: 13171 | epoch avg. loss: 0.521 | test avg. loss: 5.042
Epochs: 13172 | epoch avg. loss: 0.532 | test avg. loss: 4.258


 26%|██▋       | 13176/50000 [19:40<43:25, 14.13it/s]

Epochs: 13173 | epoch avg. loss: 0.238 | test avg. loss: 4.564
Epochs: 13174 | epoch avg. loss: 0.290 | test avg. loss: 4.008
Epochs: 13175 | epoch avg. loss: 0.229 | test avg. loss: 4.130
Epochs: 13176 | epoch avg. loss: 0.248 | test avg. loss: 4.494


 26%|██▋       | 13180/50000 [19:40<42:17, 14.51it/s]

Epochs: 13177 | epoch avg. loss: 0.121 | test avg. loss: 4.406
Epochs: 13178 | epoch avg. loss: 0.124 | test avg. loss: 4.472
Epochs: 13179 | epoch avg. loss: 0.074 | test avg. loss: 4.414


 26%|██▋       | 13184/50000 [19:40<42:46, 14.34it/s]

Epochs: 13180 | epoch avg. loss: 0.067 | test avg. loss: 4.187
Epochs: 13181 | epoch avg. loss: 0.082 | test avg. loss: 4.420
Epochs: 13182 | epoch avg. loss: 0.155 | test avg. loss: 4.122
Epochs: 13183 | epoch avg. loss: 0.242 | test avg. loss: 4.355


 26%|██▋       | 13186/50000 [19:41<43:51, 13.99it/s]

Epochs: 13184 | epoch avg. loss: 0.131 | test avg. loss: 4.305
Epochs: 13185 | epoch avg. loss: 0.121 | test avg. loss: 4.617
Epochs: 13186 | epoch avg. loss: 0.194 | test avg. loss: 4.541


 26%|██▋       | 13190/50000 [19:41<42:41, 14.37it/s]

Epochs: 13187 | epoch avg. loss: 0.080 | test avg. loss: 4.407
Epochs: 13188 | epoch avg. loss: 0.084 | test avg. loss: 4.586
Epochs: 13189 | epoch avg. loss: 0.136 | test avg. loss: 4.339


 26%|██▋       | 13194/50000 [19:41<41:07, 14.91it/s]

Epochs: 13190 | epoch avg. loss: 0.071 | test avg. loss: 4.342
Epochs: 13191 | epoch avg. loss: 0.058 | test avg. loss: 4.478
Epochs: 13192 | epoch avg. loss: 0.072 | test avg. loss: 4.289
Epochs: 13193 | epoch avg. loss: 0.082 | test avg. loss: 4.200


 26%|██▋       | 13196/50000 [19:41<40:56, 14.98it/s]

Epochs: 13194 | epoch avg. loss: 0.068 | test avg. loss: 4.290
Epochs: 13195 | epoch avg. loss: 0.061 | test avg. loss: 4.128
Epochs: 13196 | epoch avg. loss: 0.061 | test avg. loss: 4.315


 26%|██▋       | 13198/50000 [19:42<41:58, 14.61it/s]

Epochs: 13197 | epoch avg. loss: 0.082 | test avg. loss: 4.278
Epochs: 13198 | epoch avg. loss: 0.048 | test avg. loss: 4.274
Epochs: 13199 | epoch avg. loss: 0.040 | test avg. loss: 4.214


 26%|██▋       | 13204/50000 [19:43<1:40:53,  6.08it/s]

Epochs: 13200 | epoch avg. loss: 0.036 | test avg. loss: 4.287
Epochs: 13201 | epoch avg. loss: 0.060 | test avg. loss: 4.122
Epochs: 13202 | epoch avg. loss: 0.070 | test avg. loss: 4.251
Epochs: 13203 | epoch avg. loss: 0.054 | test avg. loss: 4.316


 26%|██▋       | 13206/50000 [19:43<1:23:55,  7.31it/s]

Epochs: 13204 | epoch avg. loss: 0.055 | test avg. loss: 4.223
Epochs: 13205 | epoch avg. loss: 0.085 | test avg. loss: 4.314
Epochs: 13206 | epoch avg. loss: 0.046 | test avg. loss: 4.227


 26%|██▋       | 13210/50000 [19:44<1:03:18,  9.68it/s]

Epochs: 13207 | epoch avg. loss: 0.196 | test avg. loss: 4.320
Epochs: 13208 | epoch avg. loss: 0.073 | test avg. loss: 4.210
Epochs: 13209 | epoch avg. loss: 0.106 | test avg. loss: 4.234


 26%|██▋       | 13212/50000 [19:44<55:44, 11.00it/s]

Epochs: 13210 | epoch avg. loss: 0.126 | test avg. loss: 4.546
Epochs: 13211 | epoch avg. loss: 0.120 | test avg. loss: 4.275
Epochs: 13212 | epoch avg. loss: 0.123 | test avg. loss: 4.395
Epochs: 13213 | epoch avg. loss: 0.127 | test avg. loss: 4.270


 26%|██▋       | 13218/50000 [19:44<45:48, 13.38it/s]

Epochs: 13214 | epoch avg. loss: 0.071 | test avg. loss: 4.253
Epochs: 13215 | epoch avg. loss: 0.064 | test avg. loss: 4.407
Epochs: 13216 | epoch avg. loss: 0.064 | test avg. loss: 4.217
Epochs: 13217 | epoch avg. loss: 0.086 | test avg. loss: 4.238


 26%|██▋       | 13222/50000 [19:44<41:25, 14.80it/s]

Epochs: 13218 | epoch avg. loss: 0.037 | test avg. loss: 4.252
Epochs: 13219 | epoch avg. loss: 0.037 | test avg. loss: 4.284
Epochs: 13220 | epoch avg. loss: 0.039 | test avg. loss: 4.244
Epochs: 13221 | epoch avg. loss: 0.052 | test avg. loss: 4.185


 26%|██▋       | 13224/50000 [19:44<41:50, 14.65it/s]

Epochs: 13222 | epoch avg. loss: 0.049 | test avg. loss: 4.281
Epochs: 13223 | epoch avg. loss: 0.047 | test avg. loss: 4.181
Epochs: 13224 | epoch avg. loss: 0.061 | test avg. loss: 4.304


 26%|██▋       | 13228/50000 [19:45<40:56, 14.97it/s]

Epochs: 13225 | epoch avg. loss: 0.045 | test avg. loss: 4.286
Epochs: 13226 | epoch avg. loss: 0.039 | test avg. loss: 4.269
Epochs: 13227 | epoch avg. loss: 0.043 | test avg. loss: 4.347
Epochs: 13228 | epoch avg. loss: 0.055 | test avg. loss: 4.164


 26%|██▋       | 13232/50000 [19:45<39:51, 15.37it/s]

Epochs: 13229 | epoch avg. loss: 0.048 | test avg. loss: 4.302
Epochs: 13230 | epoch avg. loss: 0.076 | test avg. loss: 4.185
Epochs: 13231 | epoch avg. loss: 0.051 | test avg. loss: 4.413
Epochs: 13232 | epoch avg. loss: 0.098 | test avg. loss: 4.303


 26%|██▋       | 13236/50000 [19:45<39:02, 15.69it/s]

Epochs: 13233 | epoch avg. loss: 0.059 | test avg. loss: 4.225
Epochs: 13234 | epoch avg. loss: 0.056 | test avg. loss: 4.244
Epochs: 13235 | epoch avg. loss: 0.040 | test avg. loss: 4.165
Epochs: 13236 | epoch avg. loss: 0.038 | test avg. loss: 4.316


 26%|██▋       | 13240/50000 [19:45<40:14, 15.23it/s]

Epochs: 13237 | epoch avg. loss: 0.075 | test avg. loss: 4.147
Epochs: 13238 | epoch avg. loss: 0.037 | test avg. loss: 4.229
Epochs: 13239 | epoch avg. loss: 0.057 | test avg. loss: 4.176


 26%|██▋       | 13242/50000 [19:46<42:46, 14.32it/s]

Epochs: 13240 | epoch avg. loss: 0.058 | test avg. loss: 4.173
Epochs: 13241 | epoch avg. loss: 0.061 | test avg. loss: 4.324
Epochs: 13242 | epoch avg. loss: 0.079 | test avg. loss: 4.338


 26%|██▋       | 13246/50000 [19:46<41:18, 14.83it/s]

Epochs: 13243 | epoch avg. loss: 0.060 | test avg. loss: 4.155
Epochs: 13244 | epoch avg. loss: 0.094 | test avg. loss: 4.478
Epochs: 13245 | epoch avg. loss: 0.143 | test avg. loss: 4.134
Epochs: 13246 | epoch avg. loss: 0.172 | test avg. loss: 4.311


 26%|██▋       | 13250/50000 [19:46<39:52, 15.36it/s]

Epochs: 13247 | epoch avg. loss: 0.073 | test avg. loss: 4.547
Epochs: 13248 | epoch avg. loss: 0.093 | test avg. loss: 4.376
Epochs: 13249 | epoch avg. loss: 0.183 | test avg. loss: 4.615
Epochs: 13250 | epoch avg. loss: 0.186 | test avg. loss: 4.098


 27%|██▋       | 13254/50000 [19:46<40:10, 15.24it/s]

Epochs: 13251 | epoch avg. loss: 0.072 | test avg. loss: 4.173
Epochs: 13252 | epoch avg. loss: 0.105 | test avg. loss: 4.180
Epochs: 13253 | epoch avg. loss: 0.068 | test avg. loss: 4.204
Epochs: 13254 | epoch avg. loss: 0.070 | test avg. loss: 4.394


 27%|██▋       | 13258/50000 [19:47<41:25, 14.78it/s]

Epochs: 13255 | epoch avg. loss: 0.050 | test avg. loss: 4.315
Epochs: 13256 | epoch avg. loss: 0.053 | test avg. loss: 4.257
Epochs: 13257 | epoch avg. loss: 0.040 | test avg. loss: 4.232


 27%|██▋       | 13262/50000 [19:47<40:19, 15.18it/s]

Epochs: 13258 | epoch avg. loss: 0.040 | test avg. loss: 4.117
Epochs: 13259 | epoch avg. loss: 0.082 | test avg. loss: 4.180
Epochs: 13260 | epoch avg. loss: 0.055 | test avg. loss: 4.266
Epochs: 13261 | epoch avg. loss: 0.060 | test avg. loss: 4.166


 27%|██▋       | 13264/50000 [19:47<39:47, 15.38it/s]

Epochs: 13262 | epoch avg. loss: 0.137 | test avg. loss: 4.326
Epochs: 13263 | epoch avg. loss: 0.079 | test avg. loss: 4.243
Epochs: 13264 | epoch avg. loss: 0.059 | test avg. loss: 4.213


 27%|██▋       | 13268/50000 [19:47<42:30, 14.40it/s]

Epochs: 13265 | epoch avg. loss: 0.070 | test avg. loss: 4.377
Epochs: 13266 | epoch avg. loss: 0.067 | test avg. loss: 4.373
Epochs: 13267 | epoch avg. loss: 0.049 | test avg. loss: 4.296


 27%|██▋       | 13270/50000 [19:48<44:18, 13.82it/s]

Epochs: 13268 | epoch avg. loss: 0.097 | test avg. loss: 4.414
Epochs: 13269 | epoch avg. loss: 0.078 | test avg. loss: 4.179
Epochs: 13270 | epoch avg. loss: 0.086 | test avg. loss: 4.288


 27%|██▋       | 13274/50000 [19:48<45:00, 13.60it/s]

Epochs: 13271 | epoch avg. loss: 0.091 | test avg. loss: 4.396
Epochs: 13272 | epoch avg. loss: 0.094 | test avg. loss: 4.340
Epochs: 13273 | epoch avg. loss: 0.203 | test avg. loss: 4.816


 27%|██▋       | 13276/50000 [19:48<44:54, 13.63it/s]

Epochs: 13274 | epoch avg. loss: 0.207 | test avg. loss: 4.360
Epochs: 13275 | epoch avg. loss: 0.407 | test avg. loss: 4.355
Epochs: 13276 | epoch avg. loss: 0.228 | test avg. loss: 4.500


 27%|██▋       | 13280/50000 [19:48<43:53, 13.94it/s]

Epochs: 13277 | epoch avg. loss: 0.145 | test avg. loss: 4.282
Epochs: 13278 | epoch avg. loss: 0.165 | test avg. loss: 4.443
Epochs: 13279 | epoch avg. loss: 0.129 | test avg. loss: 4.118


 27%|██▋       | 13282/50000 [19:48<48:53, 12.52it/s]

Epochs: 13280 | epoch avg. loss: 0.064 | test avg. loss: 4.186
Epochs: 13281 | epoch avg. loss: 0.046 | test avg. loss: 4.255


 27%|██▋       | 13284/50000 [19:49<52:07, 11.74it/s]

Epochs: 13282 | epoch avg. loss: 0.039 | test avg. loss: 4.368
Epochs: 13283 | epoch avg. loss: 0.050 | test avg. loss: 4.377
Epochs: 13284 | epoch avg. loss: 0.042 | test avg. loss: 4.375


 27%|██▋       | 13288/50000 [19:49<51:30, 11.88it/s]

Epochs: 13285 | epoch avg. loss: 0.050 | test avg. loss: 4.159
Epochs: 13286 | epoch avg. loss: 0.037 | test avg. loss: 4.194
Epochs: 13287 | epoch avg. loss: 0.065 | test avg. loss: 3.992


 27%|██▋       | 13290/50000 [19:49<52:40, 11.62it/s]

Epochs: 13288 | epoch avg. loss: 0.070 | test avg. loss: 4.144
Epochs: 13289 | epoch avg. loss: 0.057 | test avg. loss: 4.171
Epochs: 13290 | epoch avg. loss: 0.066 | test avg. loss: 4.206


 27%|██▋       | 13294/50000 [19:49<53:33, 11.42it/s]

Epochs: 13291 | epoch avg. loss: 0.072 | test avg. loss: 4.391
Epochs: 13292 | epoch avg. loss: 0.078 | test avg. loss: 4.175
Epochs: 13293 | epoch avg. loss: 0.064 | test avg. loss: 4.232


 27%|██▋       | 13296/50000 [19:50<50:44, 12.06it/s]

Epochs: 13294 | epoch avg. loss: 0.056 | test avg. loss: 4.185
Epochs: 13295 | epoch avg. loss: 0.066 | test avg. loss: 4.158
Epochs: 13296 | epoch avg. loss: 0.052 | test avg. loss: 4.251


 27%|██▋       | 13298/50000 [19:50<48:57, 12.50it/s]

Epochs: 13297 | epoch avg. loss: 0.051 | test avg. loss: 4.242
Epochs: 13298 | epoch avg. loss: 0.036 | test avg. loss: 4.214
Epochs: 13299 | epoch avg. loss: 0.056 | test avg. loss: 4.239


 27%|██▋       | 13304/50000 [19:52<1:53:35,  5.38it/s]

Epochs: 13300 | epoch avg. loss: 0.042 | test avg. loss: 4.313
Epochs: 13301 | epoch avg. loss: 0.039 | test avg. loss: 4.216
Epochs: 13302 | epoch avg. loss: 0.042 | test avg. loss: 4.379
Epochs: 13303 | epoch avg. loss: 0.085 | test avg. loss: 4.187


 27%|██▋       | 13306/50000 [19:52<1:32:17,  6.63it/s]

Epochs: 13304 | epoch avg. loss: 0.073 | test avg. loss: 4.210
Epochs: 13305 | epoch avg. loss: 0.110 | test avg. loss: 4.341
Epochs: 13306 | epoch avg. loss: 0.103 | test avg. loss: 4.307


 27%|██▋       | 13310/50000 [19:52<1:05:56,  9.27it/s]

Epochs: 13307 | epoch avg. loss: 0.193 | test avg. loss: 4.841
Epochs: 13308 | epoch avg. loss: 0.271 | test avg. loss: 4.323
Epochs: 13309 | epoch avg. loss: 0.189 | test avg. loss: 4.369
Epochs: 13310 | epoch avg. loss: 0.111 | test avg. loss: 4.440


 27%|██▋       | 13314/50000 [19:52<54:27, 11.23it/s]

Epochs: 13311 | epoch avg. loss: 0.093 | test avg. loss: 4.161
Epochs: 13312 | epoch avg. loss: 0.090 | test avg. loss: 4.428
Epochs: 13313 | epoch avg. loss: 0.251 | test avg. loss: 4.138


 27%|██▋       | 13316/50000 [19:53<53:06, 11.51it/s]

Epochs: 13314 | epoch avg. loss: 0.107 | test avg. loss: 4.227
Epochs: 13315 | epoch avg. loss: 0.079 | test avg. loss: 4.449
Epochs: 13316 | epoch avg. loss: 0.066 | test avg. loss: 4.259


 27%|██▋       | 13320/50000 [19:53<49:18, 12.40it/s]

Epochs: 13317 | epoch avg. loss: 0.113 | test avg. loss: 4.146
Epochs: 13318 | epoch avg. loss: 0.037 | test avg. loss: 4.008
Epochs: 13319 | epoch avg. loss: 0.047 | test avg. loss: 4.039


 27%|██▋       | 13322/50000 [19:53<47:57, 12.75it/s]

Epochs: 13320 | epoch avg. loss: 0.037 | test avg. loss: 4.067
Epochs: 13321 | epoch avg. loss: 0.063 | test avg. loss: 4.171
Epochs: 13322 | epoch avg. loss: 0.043 | test avg. loss: 4.328


 27%|██▋       | 13326/50000 [19:53<48:40, 12.56it/s]

Epochs: 13323 | epoch avg. loss: 0.055 | test avg. loss: 4.240
Epochs: 13324 | epoch avg. loss: 0.035 | test avg. loss: 4.181
Epochs: 13325 | epoch avg. loss: 0.035 | test avg. loss: 4.193


 27%|██▋       | 13328/50000 [19:54<51:38, 11.84it/s]

Epochs: 13326 | epoch avg. loss: 0.042 | test avg. loss: 4.136
Epochs: 13327 | epoch avg. loss: 0.039 | test avg. loss: 4.260
Epochs: 13328 | epoch avg. loss: 0.046 | test avg. loss: 4.323


 27%|██▋       | 13332/50000 [19:54<51:47, 11.80it/s]

Epochs: 13329 | epoch avg. loss: 0.042 | test avg. loss: 4.219
Epochs: 13330 | epoch avg. loss: 0.054 | test avg. loss: 4.229
Epochs: 13331 | epoch avg. loss: 0.036 | test avg. loss: 4.253


 27%|██▋       | 13334/50000 [19:54<51:49, 11.79it/s]

Epochs: 13332 | epoch avg. loss: 0.042 | test avg. loss: 4.185
Epochs: 13333 | epoch avg. loss: 0.070 | test avg. loss: 4.435
Epochs: 13334 | epoch avg. loss: 0.136 | test avg. loss: 4.148


 27%|██▋       | 13338/50000 [19:54<50:56, 12.00it/s]

Epochs: 13335 | epoch avg. loss: 0.076 | test avg. loss: 4.168
Epochs: 13336 | epoch avg. loss: 0.048 | test avg. loss: 4.244
Epochs: 13337 | epoch avg. loss: 0.053 | test avg. loss: 4.193


 27%|██▋       | 13340/50000 [19:55<53:34, 11.41it/s]

Epochs: 13338 | epoch avg. loss: 0.067 | test avg. loss: 4.365
Epochs: 13339 | epoch avg. loss: 0.068 | test avg. loss: 4.192
Epochs: 13340 | epoch avg. loss: 0.069 | test avg. loss: 4.204


 27%|██▋       | 13344/50000 [19:55<52:32, 11.63it/s]

Epochs: 13341 | epoch avg. loss: 0.055 | test avg. loss: 4.427
Epochs: 13342 | epoch avg. loss: 0.130 | test avg. loss: 4.165
Epochs: 13343 | epoch avg. loss: 0.057 | test avg. loss: 4.183


 27%|██▋       | 13346/50000 [19:55<53:26, 11.43it/s]

Epochs: 13344 | epoch avg. loss: 0.108 | test avg. loss: 4.503
Epochs: 13345 | epoch avg. loss: 0.238 | test avg. loss: 4.022
Epochs: 13346 | epoch avg. loss: 0.157 | test avg. loss: 4.115


 27%|██▋       | 13350/50000 [19:55<49:25, 12.36it/s]

Epochs: 13347 | epoch avg. loss: 0.087 | test avg. loss: 4.562
Epochs: 13348 | epoch avg. loss: 0.156 | test avg. loss: 4.276
Epochs: 13349 | epoch avg. loss: 0.172 | test avg. loss: 4.385


 27%|██▋       | 13352/50000 [19:56<50:01, 12.21it/s]

Epochs: 13350 | epoch avg. loss: 0.262 | test avg. loss: 4.320
Epochs: 13351 | epoch avg. loss: 0.112 | test avg. loss: 4.099
Epochs: 13352 | epoch avg. loss: 0.088 | test avg. loss: 4.120


 27%|██▋       | 13356/50000 [19:56<51:09, 11.94it/s]

Epochs: 13353 | epoch avg. loss: 0.043 | test avg. loss: 4.227
Epochs: 13354 | epoch avg. loss: 0.048 | test avg. loss: 4.404
Epochs: 13355 | epoch avg. loss: 0.056 | test avg. loss: 4.312


 27%|██▋       | 13358/50000 [19:56<50:46, 12.03it/s]

Epochs: 13356 | epoch avg. loss: 0.072 | test avg. loss: 4.360
Epochs: 13357 | epoch avg. loss: 0.056 | test avg. loss: 4.313
Epochs: 13358 | epoch avg. loss: 0.040 | test avg. loss: 4.289


 27%|██▋       | 13362/50000 [19:56<46:31, 13.13it/s]

Epochs: 13359 | epoch avg. loss: 0.037 | test avg. loss: 4.219
Epochs: 13360 | epoch avg. loss: 0.049 | test avg. loss: 4.384
Epochs: 13361 | epoch avg. loss: 0.092 | test avg. loss: 4.190


 27%|██▋       | 13364/50000 [19:57<49:05, 12.44it/s]

Epochs: 13362 | epoch avg. loss: 0.087 | test avg. loss: 4.174
Epochs: 13363 | epoch avg. loss: 0.080 | test avg. loss: 4.259
Epochs: 13364 | epoch avg. loss: 0.098 | test avg. loss: 4.140


 27%|██▋       | 13368/50000 [19:57<48:07, 12.69it/s]

Epochs: 13365 | epoch avg. loss: 0.040 | test avg. loss: 4.290
Epochs: 13366 | epoch avg. loss: 0.071 | test avg. loss: 4.349
Epochs: 13367 | epoch avg. loss: 0.058 | test avg. loss: 4.159


                                                     

Epochs: 13368 | epoch avg. loss: 0.109 | test avg. loss: 4.116
Epochs: 13369 | epoch avg. loss: 0.044 | test avg. loss: 4.198
Epochs: 13370 | epoch avg. loss: 0.075 | test avg. loss: 4.196


 27%|██▋       | 13374/50000 [19:57<43:11, 14.13it/s]

Epochs: 13371 | epoch avg. loss: 0.041 | test avg. loss: 4.220
Epochs: 13372 | epoch avg. loss: 0.052 | test avg. loss: 4.432
Epochs: 13373 | epoch avg. loss: 0.082 | test avg. loss: 4.243
Epochs: 13374 | epoch avg. loss: 0.092 | test avg. loss: 4.166


 27%|██▋       | 13378/50000 [19:58<45:18, 13.47it/s]

Epochs: 13375 | epoch avg. loss: 0.083 | test avg. loss: 4.587
Epochs: 13376 | epoch avg. loss: 0.264 | test avg. loss: 4.174
Epochs: 13377 | epoch avg. loss: 0.312 | test avg. loss: 4.276


 27%|██▋       | 13380/50000 [19:58<44:36, 13.68it/s]

Epochs: 13378 | epoch avg. loss: 0.234 | test avg. loss: 5.399
Epochs: 13379 | epoch avg. loss: 0.594 | test avg. loss: 4.923
Epochs: 13380 | epoch avg. loss: 1.157 | test avg. loss: 4.873


 27%|██▋       | 13384/50000 [19:58<42:45, 14.27it/s]

Epochs: 13381 | epoch avg. loss: 0.800 | test avg. loss: 5.277
Epochs: 13382 | epoch avg. loss: 0.537 | test avg. loss: 4.877
Epochs: 13383 | epoch avg. loss: 0.824 | test avg. loss: 4.919
Epochs: 13384 | epoch avg. loss: 0.672 | test avg. loss: 4.362


 27%|██▋       | 13388/50000 [19:58<41:37, 14.66it/s]

Epochs: 13385 | epoch avg. loss: 0.474 | test avg. loss: 4.728
Epochs: 13386 | epoch avg. loss: 0.629 | test avg. loss: 4.861
Epochs: 13387 | epoch avg. loss: 0.653 | test avg. loss: 4.276
Epochs: 13388 | epoch avg. loss: 0.239 | test avg. loss: 4.461


 27%|██▋       | 13392/50000 [19:59<43:25, 14.05it/s]

Epochs: 13389 | epoch avg. loss: 0.194 | test avg. loss: 4.893
Epochs: 13390 | epoch avg. loss: 0.221 | test avg. loss: 4.504
Epochs: 13391 | epoch avg. loss: 0.169 | test avg. loss: 4.704


 27%|██▋       | 13394/50000 [19:59<47:17, 12.90it/s]

Epochs: 13392 | epoch avg. loss: 0.283 | test avg. loss: 4.108
Epochs: 13393 | epoch avg. loss: 0.132 | test avg. loss: 4.142
Epochs: 13394 | epoch avg. loss: 0.229 | test avg. loss: 4.209


 27%|██▋       | 13398/50000 [19:59<44:28, 13.72it/s]

Epochs: 13395 | epoch avg. loss: 0.113 | test avg. loss: 4.197
Epochs: 13396 | epoch avg. loss: 0.097 | test avg. loss: 4.704
Epochs: 13397 | epoch avg. loss: 0.155 | test avg. loss: 4.306


 27%|██▋       | 13398/50000 [19:59<44:28, 13.72it/s]

Epochs: 13398 | epoch avg. loss: 0.185 | test avg. loss: 4.311
Epochs: 13399 | epoch avg. loss: 0.222 | test avg. loss: 4.168


 27%|██▋       | 13402/50000 [20:01<2:08:48,  4.74it/s]

Epochs: 13400 | epoch avg. loss: 0.176 | test avg. loss: 4.235
Epochs: 13401 | epoch avg. loss: 0.447 | test avg. loss: 4.616
Epochs: 13402 | epoch avg. loss: 0.292 | test avg. loss: 4.158


 27%|██▋       | 13406/50000 [20:01<1:25:02,  7.17it/s]

Epochs: 13403 | epoch avg. loss: 0.225 | test avg. loss: 4.223
Epochs: 13404 | epoch avg. loss: 0.264 | test avg. loss: 4.248
Epochs: 13405 | epoch avg. loss: 0.242 | test avg. loss: 4.074
Epochs: 13406 | epoch avg. loss: 0.315 | test avg. loss: 4.755


 27%|██▋       | 13410/50000 [20:01<1:01:25,  9.93it/s]

Epochs: 13407 | epoch avg. loss: 0.326 | test avg. loss: 4.544
Epochs: 13408 | epoch avg. loss: 0.650 | test avg. loss: 4.436
Epochs: 13409 | epoch avg. loss: 0.250 | test avg. loss: 4.800
Epochs: 13410 | epoch avg. loss: 0.244 | test avg. loss: 4.521


 27%|██▋       | 13414/50000 [20:01<50:49, 12.00it/s]

Epochs: 13411 | epoch avg. loss: 0.440 | test avg. loss: 4.616
Epochs: 13412 | epoch avg. loss: 0.416 | test avg. loss: 3.964
Epochs: 13413 | epoch avg. loss: 0.192 | test avg. loss: 4.041


 27%|██▋       | 13418/50000 [20:02<45:15, 13.47it/s]

Epochs: 13414 | epoch avg. loss: 0.213 | test avg. loss: 4.782
Epochs: 13415 | epoch avg. loss: 0.463 | test avg. loss: 4.290
Epochs: 13416 | epoch avg. loss: 0.274 | test avg. loss: 4.356
Epochs: 13417 | epoch avg. loss: 0.240 | test avg. loss: 4.320


                                                     

Epochs: 13418 | epoch avg. loss: 0.175 | test avg. loss: 4.147
Epochs: 13419 | epoch avg. loss: 0.288 | test avg. loss: 4.519
Epochs: 13420 | epoch avg. loss: 0.247 | test avg. loss: 4.075


 27%|██▋       | 13424/50000 [20:02<43:40, 13.96it/s]

Epochs: 13421 | epoch avg. loss: 0.196 | test avg. loss: 4.116
Epochs: 13422 | epoch avg. loss: 0.111 | test avg. loss: 4.553
Epochs: 13423 | epoch avg. loss: 0.162 | test avg. loss: 4.294


 27%|██▋       | 13426/50000 [20:02<45:11, 13.49it/s]

Epochs: 13424 | epoch avg. loss: 0.408 | test avg. loss: 4.314
Epochs: 13425 | epoch avg. loss: 0.094 | test avg. loss: 4.426
Epochs: 13426 | epoch avg. loss: 0.085 | test avg. loss: 4.243


 27%|██▋       | 13430/50000 [20:03<47:34, 12.81it/s]

Epochs: 13427 | epoch avg. loss: 0.047 | test avg. loss: 4.368
Epochs: 13428 | epoch avg. loss: 0.080 | test avg. loss: 4.140
Epochs: 13429 | epoch avg. loss: 0.092 | test avg. loss: 4.269


 27%|██▋       | 13432/50000 [20:03<45:30, 13.39it/s]

Epochs: 13430 | epoch avg. loss: 0.040 | test avg. loss: 4.184
Epochs: 13431 | epoch avg. loss: 0.060 | test avg. loss: 4.203
Epochs: 13432 | epoch avg. loss: 0.059 | test avg. loss: 4.262


 27%|██▋       | 13436/50000 [20:03<43:57, 13.86it/s]

Epochs: 13433 | epoch avg. loss: 0.065 | test avg. loss: 4.100
Epochs: 13434 | epoch avg. loss: 0.146 | test avg. loss: 4.239
Epochs: 13435 | epoch avg. loss: 0.086 | test avg. loss: 4.028
Epochs: 13436 | epoch avg. loss: 0.117 | test avg. loss: 4.092


 27%|██▋       | 13440/50000 [20:03<41:43, 14.60it/s]

Epochs: 13437 | epoch avg. loss: 0.100 | test avg. loss: 4.696
Epochs: 13438 | epoch avg. loss: 0.249 | test avg. loss: 4.788
Epochs: 13439 | epoch avg. loss: 0.993 | test avg. loss: 4.974


 27%|██▋       | 13442/50000 [20:03<44:42, 13.63it/s]

Epochs: 13440 | epoch avg. loss: 1.245 | test avg. loss: 3.896
Epochs: 13441 | epoch avg. loss: 0.837 | test avg. loss: 3.785
Epochs: 13442 | epoch avg. loss: 0.576 | test avg. loss: 5.011


 27%|██▋       | 13446/50000 [20:04<43:55, 13.87it/s]

Epochs: 13443 | epoch avg. loss: 0.685 | test avg. loss: 4.507
Epochs: 13444 | epoch avg. loss: 1.124 | test avg. loss: 4.749
Epochs: 13445 | epoch avg. loss: 0.859 | test avg. loss: 4.199


 27%|██▋       | 13450/50000 [20:04<43:03, 14.15it/s]

Epochs: 13446 | epoch avg. loss: 0.362 | test avg. loss: 4.314
Epochs: 13447 | epoch avg. loss: 0.328 | test avg. loss: 5.279
Epochs: 13448 | epoch avg. loss: 0.482 | test avg. loss: 4.713
Epochs: 13449 | epoch avg. loss: 0.759 | test avg. loss: 5.310


 27%|██▋       | 13452/50000 [20:04<45:14, 13.46it/s]

Epochs: 13450 | epoch avg. loss: 1.151 | test avg. loss: 4.100
Epochs: 13451 | epoch avg. loss: 0.950 | test avg. loss: 4.622
Epochs: 13452 | epoch avg. loss: 1.793 | test avg. loss: 4.046


 27%|██▋       | 13456/50000 [20:04<45:45, 13.31it/s]

Epochs: 13453 | epoch avg. loss: 0.815 | test avg. loss: 4.073
Epochs: 13454 | epoch avg. loss: 0.662 | test avg. loss: 5.762
Epochs: 13455 | epoch avg. loss: 1.030 | test avg. loss: 4.885


 27%|██▋       | 13458/50000 [20:05<43:53, 13.88it/s]

Epochs: 13456 | epoch avg. loss: 0.949 | test avg. loss: 4.824
Epochs: 13457 | epoch avg. loss: 0.402 | test avg. loss: 4.385
Epochs: 13458 | epoch avg. loss: 0.328 | test avg. loss: 4.370


 27%|██▋       | 13462/50000 [20:05<45:29, 13.39it/s]

Epochs: 13459 | epoch avg. loss: 0.259 | test avg. loss: 4.209
Epochs: 13460 | epoch avg. loss: 0.194 | test avg. loss: 3.852
Epochs: 13461 | epoch avg. loss: 0.206 | test avg. loss: 4.529


 27%|██▋       | 13466/50000 [20:05<43:16, 14.07it/s]

Epochs: 13462 | epoch avg. loss: 0.296 | test avg. loss: 4.252
Epochs: 13463 | epoch avg. loss: 0.216 | test avg. loss: 4.783
Epochs: 13464 | epoch avg. loss: 0.279 | test avg. loss: 4.435
Epochs: 13465 | epoch avg. loss: 0.147 | test avg. loss: 4.225


 27%|██▋       | 13468/50000 [20:05<43:10, 14.10it/s]

Epochs: 13466 | epoch avg. loss: 0.112 | test avg. loss: 4.307
Epochs: 13467 | epoch avg. loss: 0.096 | test avg. loss: 3.982
Epochs: 13468 | epoch avg. loss: 0.117 | test avg. loss: 4.176


 27%|██▋       | 13472/50000 [20:06<43:01, 14.15it/s]

Epochs: 13469 | epoch avg. loss: 0.066 | test avg. loss: 4.053
Epochs: 13470 | epoch avg. loss: 0.126 | test avg. loss: 4.312
Epochs: 13471 | epoch avg. loss: 0.068 | test avg. loss: 4.259
Epochs: 13472 | epoch avg. loss: 0.090 | test avg. loss: 4.219




Epochs: 13473 | epoch avg. loss: 0.064 | test avg. loss: 4.253
Epochs: 13474 | epoch avg. loss: 0.073 | test avg. loss: 4.149
Epochs: 13475 | epoch avg. loss: 0.181 | test avg. loss: 4.353


 27%|██▋       | 13478/50000 [20:06<42:54, 14.18it/s]

Epochs: 13476 | epoch avg. loss: 0.094 | test avg. loss: 4.088
Epochs: 13477 | epoch avg. loss: 0.211 | test avg. loss: 4.361
Epochs: 13478 | epoch avg. loss: 0.167 | test avg. loss: 4.196


 27%|██▋       | 13482/50000 [20:06<44:45, 13.60it/s]

Epochs: 13479 | epoch avg. loss: 0.088 | test avg. loss: 4.095
Epochs: 13480 | epoch avg. loss: 0.063 | test avg. loss: 4.103
Epochs: 13481 | epoch avg. loss: 0.087 | test avg. loss: 4.103


 27%|██▋       | 13484/50000 [20:07<47:03, 12.93it/s]

Epochs: 13482 | epoch avg. loss: 0.170 | test avg. loss: 4.622
Epochs: 13483 | epoch avg. loss: 0.222 | test avg. loss: 4.300
Epochs: 13484 | epoch avg. loss: 0.084 | test avg. loss: 4.468


 27%|██▋       | 13488/50000 [20:07<50:07, 12.14it/s]

Epochs: 13485 | epoch avg. loss: 0.066 | test avg. loss: 4.237
Epochs: 13486 | epoch avg. loss: 0.121 | test avg. loss: 4.156
Epochs: 13487 | epoch avg. loss: 0.061 | test avg. loss: 4.119


 27%|██▋       | 13490/50000 [20:07<53:28, 11.38it/s]

Epochs: 13488 | epoch avg. loss: 0.046 | test avg. loss: 4.110
Epochs: 13489 | epoch avg. loss: 0.047 | test avg. loss: 4.214
Epochs: 13490 | epoch avg. loss: 0.042 | test avg. loss: 4.248


 27%|██▋       | 13494/50000 [20:07<48:32, 12.54it/s]

Epochs: 13491 | epoch avg. loss: 0.047 | test avg. loss: 4.396
Epochs: 13492 | epoch avg. loss: 0.067 | test avg. loss: 4.176
Epochs: 13493 | epoch avg. loss: 0.040 | test avg. loss: 4.163


 27%|██▋       | 13496/50000 [20:08<51:03, 11.92it/s]

Epochs: 13494 | epoch avg. loss: 0.034 | test avg. loss: 4.101
Epochs: 13495 | epoch avg. loss: 0.055 | test avg. loss: 4.283
Epochs: 13496 | epoch avg. loss: 0.066 | test avg. loss: 4.175


 27%|██▋       | 13498/50000 [20:08<47:29, 12.81it/s]

Epochs: 13497 | epoch avg. loss: 0.037 | test avg. loss: 4.296
Epochs: 13498 | epoch avg. loss: 0.086 | test avg. loss: 4.325
Epochs: 13499 | epoch avg. loss: 0.068 | test avg. loss: 4.204


 27%|██▋       | 13502/50000 [20:11<4:21:51,  2.32it/s]

Epochs: 13500 | epoch avg. loss: 0.037 | test avg. loss: 4.240
Epochs: 13501 | epoch avg. loss: 0.043 | test avg. loss: 4.123
Epochs: 13502 | epoch avg. loss: 0.033 | test avg. loss: 4.284


 27%|██▋       | 13506/50000 [20:12<2:32:59,  3.98it/s]

Epochs: 13503 | epoch avg. loss: 0.083 | test avg. loss: 4.123
Epochs: 13504 | epoch avg. loss: 0.057 | test avg. loss: 4.150
Epochs: 13505 | epoch avg. loss: 0.050 | test avg. loss: 4.337


 27%|██▋       | 13508/50000 [20:12<1:59:23,  5.09it/s]

Epochs: 13506 | epoch avg. loss: 0.086 | test avg. loss: 4.071
Epochs: 13507 | epoch avg. loss: 0.106 | test avg. loss: 4.343
Epochs: 13508 | epoch avg. loss: 0.110 | test avg. loss: 4.147


 27%|██▋       | 13512/50000 [20:12<1:25:16,  7.13it/s]

Epochs: 13509 | epoch avg. loss: 0.142 | test avg. loss: 4.185
Epochs: 13510 | epoch avg. loss: 0.102 | test avg. loss: 4.306
Epochs: 13511 | epoch avg. loss: 0.094 | test avg. loss: 4.260


 27%|██▋       | 13514/50000 [20:12<1:13:41,  8.25it/s]

Epochs: 13512 | epoch avg. loss: 0.153 | test avg. loss: 4.713
Epochs: 13513 | epoch avg. loss: 0.181 | test avg. loss: 4.253
Epochs: 13514 | epoch avg. loss: 0.095 | test avg. loss: 4.383


 27%|██▋       | 13518/50000 [20:13<1:05:38,  9.26it/s]

Epochs: 13515 | epoch avg. loss: 0.118 | test avg. loss: 4.220
Epochs: 13516 | epoch avg. loss: 0.060 | test avg. loss: 4.123
Epochs: 13517 | epoch avg. loss: 0.061 | test avg. loss: 4.269


 27%|██▋       | 13520/50000 [20:13<1:02:53,  9.67it/s]

Epochs: 13518 | epoch avg. loss: 0.042 | test avg. loss: 4.192
Epochs: 13519 | epoch avg. loss: 0.079 | test avg. loss: 4.344
Epochs: 13520 | epoch avg. loss: 0.132 | test avg. loss: 4.275


 27%|██▋       | 13524/50000 [20:13<56:33, 10.75it/s]

Epochs: 13521 | epoch avg. loss: 0.073 | test avg. loss: 4.058
Epochs: 13522 | epoch avg. loss: 0.088 | test avg. loss: 4.424
Epochs: 13523 | epoch avg. loss: 0.191 | test avg. loss: 4.134


 27%|██▋       | 13526/50000 [20:13<54:37, 11.13it/s]

Epochs: 13524 | epoch avg. loss: 0.115 | test avg. loss: 4.309
Epochs: 13525 | epoch avg. loss: 0.089 | test avg. loss: 4.438
Epochs: 13526 | epoch avg. loss: 0.072 | test avg. loss: 4.269


 27%|██▋       | 13530/50000 [20:14<54:52, 11.08it/s]

Epochs: 13527 | epoch avg. loss: 0.061 | test avg. loss: 4.378
Epochs: 13528 | epoch avg. loss: 0.055 | test avg. loss: 4.118
Epochs: 13529 | epoch avg. loss: 0.093 | test avg. loss: 4.220


 27%|██▋       | 13532/50000 [20:14<55:09, 11.02it/s]

Epochs: 13530 | epoch avg. loss: 0.096 | test avg. loss: 4.228
Epochs: 13531 | epoch avg. loss: 0.084 | test avg. loss: 4.199
Epochs: 13532 | epoch avg. loss: 0.199 | test avg. loss: 4.495


 27%|██▋       | 13536/50000 [20:14<52:09, 11.65it/s]

Epochs: 13533 | epoch avg. loss: 0.185 | test avg. loss: 4.108
Epochs: 13534 | epoch avg. loss: 0.226 | test avg. loss: 4.091
Epochs: 13535 | epoch avg. loss: 0.145 | test avg. loss: 4.227


 27%|██▋       | 13538/50000 [20:14<50:45, 11.97it/s]

Epochs: 13536 | epoch avg. loss: 0.109 | test avg. loss: 4.097
Epochs: 13537 | epoch avg. loss: 0.058 | test avg. loss: 4.289
Epochs: 13538 | epoch avg. loss: 0.067 | test avg. loss: 4.148


 27%|██▋       | 13542/50000 [20:15<44:56, 13.52it/s]

Epochs: 13539 | epoch avg. loss: 0.129 | test avg. loss: 4.102
Epochs: 13540 | epoch avg. loss: 0.096 | test avg. loss: 4.547
Epochs: 13541 | epoch avg. loss: 0.188 | test avg. loss: 4.204


 27%|██▋       | 13544/50000 [20:15<46:37, 13.03it/s]

Epochs: 13542 | epoch avg. loss: 0.283 | test avg. loss: 4.485
Epochs: 13543 | epoch avg. loss: 0.182 | test avg. loss: 4.310
Epochs: 13544 | epoch avg. loss: 0.126 | test avg. loss: 4.379


 27%|██▋       | 13548/50000 [20:15<44:25, 13.67it/s]

Epochs: 13545 | epoch avg. loss: 0.079 | test avg. loss: 4.402
Epochs: 13546 | epoch avg. loss: 0.045 | test avg. loss: 4.241
Epochs: 13547 | epoch avg. loss: 0.040 | test avg. loss: 4.161


 27%|██▋       | 13550/50000 [20:15<45:10, 13.45it/s]

Epochs: 13548 | epoch avg. loss: 0.043 | test avg. loss: 3.933
Epochs: 13549 | epoch avg. loss: 0.088 | test avg. loss: 4.073
Epochs: 13550 | epoch avg. loss: 0.085 | test avg. loss: 4.144


 27%|██▋       | 13554/50000 [20:16<49:31, 12.27it/s]

Epochs: 13551 | epoch avg. loss: 0.052 | test avg. loss: 4.248
Epochs: 13552 | epoch avg. loss: 0.054 | test avg. loss: 4.431
Epochs: 13553 | epoch avg. loss: 0.079 | test avg. loss: 4.181


 27%|██▋       | 13556/50000 [20:16<49:56, 12.16it/s]

Epochs: 13554 | epoch avg. loss: 0.058 | test avg. loss: 4.124
Epochs: 13555 | epoch avg. loss: 0.047 | test avg. loss: 4.162
Epochs: 13556 | epoch avg. loss: 0.054 | test avg. loss: 4.025


 27%|██▋       | 13560/50000 [20:16<46:12, 13.14it/s]

Epochs: 13557 | epoch avg. loss: 0.077 | test avg. loss: 4.251
Epochs: 13558 | epoch avg. loss: 0.056 | test avg. loss: 4.265
Epochs: 13559 | epoch avg. loss: 0.032 | test avg. loss: 4.341
Epochs: 13560 | epoch avg. loss: 0.042 | test avg. loss: 4.177


 27%|██▋       | 13564/50000 [20:16<42:52, 14.16it/s]

Epochs: 13561 | epoch avg. loss: 0.088 | test avg. loss: 4.118
Epochs: 13562 | epoch avg. loss: 0.074 | test avg. loss: 4.281
Epochs: 13563 | epoch avg. loss: 0.088 | test avg. loss: 4.099


 27%|██▋       | 13566/50000 [20:17<43:57, 13.82it/s]

Epochs: 13564 | epoch avg. loss: 0.057 | test avg. loss: 4.222
Epochs: 13565 | epoch avg. loss: 0.070 | test avg. loss: 4.218
Epochs: 13566 | epoch avg. loss: 0.056 | test avg. loss: 4.187




Epochs: 13567 | epoch avg. loss: 0.067 | test avg. loss: 4.288
Epochs: 13568 | epoch avg. loss: 0.038 | test avg. loss: 4.189
Epochs: 13569 | epoch avg. loss: 0.065 | test avg. loss: 4.226
Epochs: 13570 | epoch avg. loss: 0.040 | test avg. loss: 4.239


 27%|██▋       | 13572/50000 [20:17<41:47, 14.53it/s]

Epochs: 13571 | epoch avg. loss: 0.041 | test avg. loss: 4.215
Epochs: 13572 | epoch avg. loss: 0.035 | test avg. loss: 4.190


 27%|██▋       | 13576/50000 [20:17<42:21, 14.33it/s]

Epochs: 13573 | epoch avg. loss: 0.035 | test avg. loss: 4.087
Epochs: 13574 | epoch avg. loss: 0.040 | test avg. loss: 4.160
Epochs: 13575 | epoch avg. loss: 0.050 | test avg. loss: 4.234
Epochs: 13576 | epoch avg. loss: 0.041 | test avg. loss: 4.206


 27%|██▋       | 13580/50000 [20:17<43:10, 14.06it/s]

Epochs: 13577 | epoch avg. loss: 0.034 | test avg. loss: 4.200
Epochs: 13578 | epoch avg. loss: 0.033 | test avg. loss: 4.220
Epochs: 13579 | epoch avg. loss: 0.032 | test avg. loss: 4.140


 27%|██▋       | 13582/50000 [20:18<44:18, 13.70it/s]

Epochs: 13580 | epoch avg. loss: 0.067 | test avg. loss: 4.227
Epochs: 13581 | epoch avg. loss: 0.086 | test avg. loss: 4.350
Epochs: 13582 | epoch avg. loss: 0.080 | test avg. loss: 4.102


 27%|██▋       | 13586/50000 [20:18<43:07, 14.07it/s]

Epochs: 13583 | epoch avg. loss: 0.115 | test avg. loss: 4.330
Epochs: 13584 | epoch avg. loss: 0.169 | test avg. loss: 4.165
Epochs: 13585 | epoch avg. loss: 0.086 | test avg. loss: 4.030
Epochs: 13586 | epoch avg. loss: 0.133 | test avg. loss: 4.231




Epochs: 13587 | epoch avg. loss: 0.126 | test avg. loss: 4.427
Epochs: 13588 | epoch avg. loss: 0.097 | test avg. loss: 4.233
Epochs: 13589 | epoch avg. loss: 0.125 | test avg. loss: 4.363


 27%|██▋       | 13592/50000 [20:18<42:30, 14.27it/s]

Epochs: 13590 | epoch avg. loss: 0.098 | test avg. loss: 3.954
Epochs: 13591 | epoch avg. loss: 0.054 | test avg. loss: 3.954
Epochs: 13592 | epoch avg. loss: 0.053 | test avg. loss: 4.118


 27%|██▋       | 13596/50000 [20:19<43:21, 13.99it/s]

Epochs: 13593 | epoch avg. loss: 0.045 | test avg. loss: 4.285
Epochs: 13594 | epoch avg. loss: 0.042 | test avg. loss: 4.400
Epochs: 13595 | epoch avg. loss: 0.040 | test avg. loss: 4.345


 27%|██▋       | 13598/50000 [20:19<42:56, 14.13it/s]

Epochs: 13596 | epoch avg. loss: 0.039 | test avg. loss: 4.361
Epochs: 13597 | epoch avg. loss: 0.044 | test avg. loss: 4.209
Epochs: 13598 | epoch avg. loss: 0.048 | test avg. loss: 4.199
Epochs: 13599 | epoch avg. loss: 0.032 | test avg. loss: 4.160


 27%|██▋       | 13602/50000 [20:20<2:08:01,  4.74it/s]

Epochs: 13600 | epoch avg. loss: 0.038 | test avg. loss: 4.198
Epochs: 13601 | epoch avg. loss: 0.039 | test avg. loss: 4.262
Epochs: 13602 | epoch avg. loss: 0.045 | test avg. loss: 4.241


 27%|██▋       | 13606/50000 [20:21<1:23:06,  7.30it/s]

Epochs: 13603 | epoch avg. loss: 0.042 | test avg. loss: 4.186
Epochs: 13604 | epoch avg. loss: 0.032 | test avg. loss: 4.151
Epochs: 13605 | epoch avg. loss: 0.040 | test avg. loss: 4.145
Epochs: 13606 | epoch avg. loss: 0.068 | test avg. loss: 4.403


 27%|██▋       | 13610/50000 [20:21<1:02:32,  9.70it/s]

Epochs: 13607 | epoch avg. loss: 0.107 | test avg. loss: 4.299
Epochs: 13608 | epoch avg. loss: 0.074 | test avg. loss: 4.191
Epochs: 13609 | epoch avg. loss: 0.181 | test avg. loss: 4.139


 27%|██▋       | 13612/50000 [20:21<57:59, 10.46it/s]

Epochs: 13610 | epoch avg. loss: 0.073 | test avg. loss: 4.137
Epochs: 13611 | epoch avg. loss: 0.060 | test avg. loss: 4.025
Epochs: 13612 | epoch avg. loss: 0.107 | test avg. loss: 4.152


 27%|██▋       | 13616/50000 [20:21<51:14, 11.84it/s]

Epochs: 13613 | epoch avg. loss: 0.069 | test avg. loss: 4.173
Epochs: 13614 | epoch avg. loss: 0.044 | test avg. loss: 4.102
Epochs: 13615 | epoch avg. loss: 0.042 | test avg. loss: 4.200


 27%|██▋       | 13618/50000 [20:22<48:55, 12.39it/s]

Epochs: 13616 | epoch avg. loss: 0.062 | test avg. loss: 4.100
Epochs: 13617 | epoch avg. loss: 0.047 | test avg. loss: 4.051
Epochs: 13618 | epoch avg. loss: 0.065 | test avg. loss: 4.294


 27%|██▋       | 13622/50000 [20:22<44:17, 13.69it/s]

Epochs: 13619 | epoch avg. loss: 0.058 | test avg. loss: 4.181
Epochs: 13620 | epoch avg. loss: 0.144 | test avg. loss: 4.391
Epochs: 13621 | epoch avg. loss: 0.087 | test avg. loss: 4.226
Epochs: 13622 | epoch avg. loss: 0.073 | test avg. loss: 4.155


 27%|██▋       | 13626/50000 [20:22<41:18, 14.67it/s]

Epochs: 13623 | epoch avg. loss: 0.053 | test avg. loss: 4.157
Epochs: 13624 | epoch avg. loss: 0.047 | test avg. loss: 3.973
Epochs: 13625 | epoch avg. loss: 0.048 | test avg. loss: 4.092
Epochs: 13626 | epoch avg. loss: 0.071 | test avg. loss: 4.101


 27%|██▋       | 13630/50000 [20:22<41:38, 14.56it/s]

Epochs: 13627 | epoch avg. loss: 0.046 | test avg. loss: 4.191
Epochs: 13628 | epoch avg. loss: 0.039 | test avg. loss: 4.241
Epochs: 13629 | epoch avg. loss: 0.038 | test avg. loss: 4.093


 27%|██▋       | 13632/50000 [20:23<44:35, 13.59it/s]

Epochs: 13630 | epoch avg. loss: 0.054 | test avg. loss: 4.060
Epochs: 13631 | epoch avg. loss: 0.036 | test avg. loss: 4.114
Epochs: 13632 | epoch avg. loss: 0.038 | test avg. loss: 4.141


 27%|██▋       | 13636/50000 [20:23<41:45, 14.51it/s]

Epochs: 13633 | epoch avg. loss: 0.060 | test avg. loss: 4.269
Epochs: 13634 | epoch avg. loss: 0.036 | test avg. loss: 4.385
Epochs: 13635 | epoch avg. loss: 0.045 | test avg. loss: 4.221
Epochs: 13636 | epoch avg. loss: 0.046 | test avg. loss: 4.229


 27%|██▋       | 13640/50000 [20:23<43:31, 13.92it/s]

Epochs: 13637 | epoch avg. loss: 0.042 | test avg. loss: 4.253
Epochs: 13638 | epoch avg. loss: 0.042 | test avg. loss: 4.123
Epochs: 13639 | epoch avg. loss: 0.070 | test avg. loss: 4.190


                                                     

Epochs: 13640 | epoch avg. loss: 0.048 | test avg. loss: 4.332
Epochs: 13641 | epoch avg. loss: 0.056 | test avg. loss: 4.210
Epochs: 13642 | epoch avg. loss: 0.127 | test avg. loss: 4.354


 27%|██▋       | 13646/50000 [20:23<45:39, 13.27it/s]

Epochs: 13643 | epoch avg. loss: 0.132 | test avg. loss: 4.173
Epochs: 13644 | epoch avg. loss: 0.077 | test avg. loss: 4.022
Epochs: 13645 | epoch avg. loss: 0.132 | test avg. loss: 4.248


 27%|██▋       | 13650/50000 [20:24<41:35, 14.57it/s]

Epochs: 13646 | epoch avg. loss: 0.103 | test avg. loss: 4.130
Epochs: 13647 | epoch avg. loss: 0.112 | test avg. loss: 4.264
Epochs: 13648 | epoch avg. loss: 0.092 | test avg. loss: 4.713
Epochs: 13649 | epoch avg. loss: 0.238 | test avg. loss: 4.262


 27%|██▋       | 13654/50000 [20:24<39:52, 15.19it/s]

Epochs: 13650 | epoch avg. loss: 0.135 | test avg. loss: 4.264
Epochs: 13651 | epoch avg. loss: 0.136 | test avg. loss: 4.395
Epochs: 13652 | epoch avg. loss: 0.075 | test avg. loss: 4.310
Epochs: 13653 | epoch avg. loss: 0.048 | test avg. loss: 4.266


 27%|██▋       | 13658/50000 [20:24<39:18, 15.41it/s]

Epochs: 13654 | epoch avg. loss: 0.040 | test avg. loss: 4.106
Epochs: 13655 | epoch avg. loss: 0.162 | test avg. loss: 4.136
Epochs: 13656 | epoch avg. loss: 0.087 | test avg. loss: 4.312
Epochs: 13657 | epoch avg. loss: 0.054 | test avg. loss: 4.152


 27%|██▋       | 13660/50000 [20:24<39:49, 15.21it/s]

Epochs: 13658 | epoch avg. loss: 0.057 | test avg. loss: 4.222
Epochs: 13659 | epoch avg. loss: 0.100 | test avg. loss: 4.379
Epochs: 13660 | epoch avg. loss: 0.111 | test avg. loss: 4.168


 27%|██▋       | 13664/50000 [20:25<42:12, 14.35it/s]

Epochs: 13661 | epoch avg. loss: 0.056 | test avg. loss: 4.210
Epochs: 13662 | epoch avg. loss: 0.043 | test avg. loss: 4.334
Epochs: 13663 | epoch avg. loss: 0.059 | test avg. loss: 4.294


 27%|██▋       | 13668/50000 [20:25<40:16, 15.04it/s]

Epochs: 13664 | epoch avg. loss: 0.033 | test avg. loss: 4.271
Epochs: 13665 | epoch avg. loss: 0.031 | test avg. loss: 4.166
Epochs: 13666 | epoch avg. loss: 0.033 | test avg. loss: 4.241
Epochs: 13667 | epoch avg. loss: 0.092 | test avg. loss: 4.268


 27%|██▋       | 13672/50000 [20:25<39:46, 15.22it/s]

Epochs: 13668 | epoch avg. loss: 0.066 | test avg. loss: 4.092
Epochs: 13669 | epoch avg. loss: 0.051 | test avg. loss: 4.114
Epochs: 13670 | epoch avg. loss: 0.038 | test avg. loss: 4.232
Epochs: 13671 | epoch avg. loss: 0.049 | test avg. loss: 4.167


 27%|██▋       | 13674/50000 [20:25<39:32, 15.31it/s]

Epochs: 13672 | epoch avg. loss: 0.035 | test avg. loss: 4.297
Epochs: 13673 | epoch avg. loss: 0.067 | test avg. loss: 4.254
Epochs: 13674 | epoch avg. loss: 0.037 | test avg. loss: 4.160


 27%|██▋       | 13678/50000 [20:26<42:27, 14.26it/s]

Epochs: 13675 | epoch avg. loss: 0.056 | test avg. loss: 4.089
Epochs: 13676 | epoch avg. loss: 0.103 | test avg. loss: 4.264
Epochs: 13677 | epoch avg. loss: 0.090 | test avg. loss: 4.168


 27%|██▋       | 13680/50000 [20:26<42:35, 14.21it/s]

Epochs: 13678 | epoch avg. loss: 0.075 | test avg. loss: 4.177
Epochs: 13679 | epoch avg. loss: 0.188 | test avg. loss: 4.216
Epochs: 13680 | epoch avg. loss: 0.030 | test avg. loss: 4.199


 27%|██▋       | 13684/50000 [20:26<44:21, 13.65it/s]

Epochs: 13681 | epoch avg. loss: 0.031 | test avg. loss: 4.303
Epochs: 13682 | epoch avg. loss: 0.032 | test avg. loss: 4.167
Epochs: 13683 | epoch avg. loss: 0.142 | test avg. loss: 4.195


 27%|██▋       | 13686/50000 [20:26<45:32, 13.29it/s]

Epochs: 13684 | epoch avg. loss: 0.090 | test avg. loss: 4.527
Epochs: 13685 | epoch avg. loss: 0.143 | test avg. loss: 4.174
Epochs: 13686 | epoch avg. loss: 0.265 | test avg. loss: 4.158


 27%|██▋       | 13690/50000 [20:27<49:35, 12.20it/s]

Epochs: 13687 | epoch avg. loss: 0.174 | test avg. loss: 4.970
Epochs: 13688 | epoch avg. loss: 0.449 | test avg. loss: 4.110
Epochs: 13689 | epoch avg. loss: 0.290 | test avg. loss: 4.225


 27%|██▋       | 13692/50000 [20:27<54:02, 11.20it/s]

Epochs: 13690 | epoch avg. loss: 0.195 | test avg. loss: 5.081
Epochs: 13691 | epoch avg. loss: 0.622 | test avg. loss: 4.120
Epochs: 13692 | epoch avg. loss: 0.301 | test avg. loss: 4.064


 27%|██▋       | 13696/50000 [20:27<55:09, 10.97it/s]

Epochs: 13693 | epoch avg. loss: 0.220 | test avg. loss: 5.315
Epochs: 13694 | epoch avg. loss: 0.802 | test avg. loss: 4.058
Epochs: 13695 | epoch avg. loss: 0.293 | test avg. loss: 4.045


 27%|██▋       | 13698/50000 [20:27<51:20, 11.79it/s]

Epochs: 13696 | epoch avg. loss: 0.199 | test avg. loss: 4.796
Epochs: 13697 | epoch avg. loss: 0.426 | test avg. loss: 4.249
Epochs: 13698 | epoch avg. loss: 0.517 | test avg. loss: 4.250


 27%|██▋       | 13698/50000 [20:27<51:20, 11.79it/s]

Epochs: 13699 | epoch avg. loss: 0.387 | test avg. loss: 5.203


 27%|██▋       | 13702/50000 [20:29<2:33:18,  3.95it/s]

Epochs: 13700 | epoch avg. loss: 0.616 | test avg. loss: 4.236
Epochs: 13701 | epoch avg. loss: 0.527 | test avg. loss: 4.362
Epochs: 13702 | epoch avg. loss: 0.313 | test avg. loss: 4.724


 27%|██▋       | 13706/50000 [20:30<1:37:44,  6.19it/s]

Epochs: 13703 | epoch avg. loss: 0.281 | test avg. loss: 5.117
Epochs: 13704 | epoch avg. loss: 1.176 | test avg. loss: 5.376
Epochs: 13705 | epoch avg. loss: 0.964 | test avg. loss: 4.869
Epochs: 13706 | epoch avg. loss: 0.519 | test avg. loss: 4.274


 27%|██▋       | 13710/50000 [20:30<1:11:48,  8.42it/s]

Epochs: 13707 | epoch avg. loss: 0.402 | test avg. loss: 4.412
Epochs: 13708 | epoch avg. loss: 0.565 | test avg. loss: 4.192
Epochs: 13709 | epoch avg. loss: 0.358 | test avg. loss: 4.344


 27%|██▋       | 13712/50000 [20:30<1:05:57,  9.17it/s]

Epochs: 13710 | epoch avg. loss: 0.622 | test avg. loss: 4.410
Epochs: 13711 | epoch avg. loss: 0.253 | test avg. loss: 4.096
Epochs: 13712 | epoch avg. loss: 0.269 | test avg. loss: 4.376


 27%|██▋       | 13716/50000 [20:30<1:01:27,  9.84it/s]

Epochs: 13713 | epoch avg. loss: 0.236 | test avg. loss: 4.804
Epochs: 13714 | epoch avg. loss: 0.214 | test avg. loss: 4.406
Epochs: 13715 | epoch avg. loss: 0.481 | test avg. loss: 4.086


 27%|██▋       | 13718/50000 [20:31<58:24, 10.35it/s]

Epochs: 13716 | epoch avg. loss: 0.159 | test avg. loss: 4.007
Epochs: 13717 | epoch avg. loss: 0.164 | test avg. loss: 4.249
Epochs: 13718 | epoch avg. loss: 0.298 | test avg. loss: 5.257


 27%|██▋       | 13722/50000 [20:31<52:56, 11.42it/s]

Epochs: 13719 | epoch avg. loss: 0.451 | test avg. loss: 4.618
Epochs: 13720 | epoch avg. loss: 0.209 | test avg. loss: 4.583
Epochs: 13721 | epoch avg. loss: 0.149 | test avg. loss: 4.519


 27%|██▋       | 13724/50000 [20:31<51:43, 11.69it/s]

Epochs: 13722 | epoch avg. loss: 0.131 | test avg. loss: 4.109
Epochs: 13723 | epoch avg. loss: 0.093 | test avg. loss: 4.079
Epochs: 13724 | epoch avg. loss: 0.056 | test avg. loss: 4.170


 27%|██▋       | 13728/50000 [20:31<45:14, 13.36it/s]

Epochs: 13725 | epoch avg. loss: 0.059 | test avg. loss: 4.166
Epochs: 13726 | epoch avg. loss: 0.110 | test avg. loss: 4.457
Epochs: 13727 | epoch avg. loss: 0.107 | test avg. loss: 4.334


 27%|██▋       | 13730/50000 [20:31<44:55, 13.45it/s]

Epochs: 13728 | epoch avg. loss: 0.070 | test avg. loss: 4.291
Epochs: 13729 | epoch avg. loss: 0.074 | test avg. loss: 4.385
Epochs: 13730 | epoch avg. loss: 0.104 | test avg. loss: 4.072


 27%|██▋       | 13734/50000 [20:32<43:53, 13.77it/s]

Epochs: 13731 | epoch avg. loss: 0.068 | test avg. loss: 4.171
Epochs: 13732 | epoch avg. loss: 0.096 | test avg. loss: 4.024
Epochs: 13733 | epoch avg. loss: 0.147 | test avg. loss: 4.151


 27%|██▋       | 13736/50000 [20:32<45:14, 13.36it/s]

Epochs: 13734 | epoch avg. loss: 0.172 | test avg. loss: 4.410
Epochs: 13735 | epoch avg. loss: 0.098 | test avg. loss: 4.208
Epochs: 13736 | epoch avg. loss: 0.113 | test avg. loss: 4.189


 27%|██▋       | 13740/50000 [20:32<47:28, 12.73it/s]

Epochs: 13737 | epoch avg. loss: 0.079 | test avg. loss: 4.323
Epochs: 13738 | epoch avg. loss: 0.080 | test avg. loss: 4.272
Epochs: 13739 | epoch avg. loss: 0.044 | test avg. loss: 4.209


 27%|██▋       | 13742/50000 [20:32<50:42, 11.92it/s]

Epochs: 13740 | epoch avg. loss: 0.042 | test avg. loss: 4.219
Epochs: 13741 | epoch avg. loss: 0.038 | test avg. loss: 4.288


 27%|██▋       | 13744/50000 [20:33<53:41, 11.26it/s]

Epochs: 13742 | epoch avg. loss: 0.042 | test avg. loss: 4.325
Epochs: 13743 | epoch avg. loss: 0.042 | test avg. loss: 4.171
Epochs: 13744 | epoch avg. loss: 0.045 | test avg. loss: 4.318


 27%|██▋       | 13748/50000 [20:33<50:35, 11.94it/s]

Epochs: 13745 | epoch avg. loss: 0.151 | test avg. loss: 4.095
Epochs: 13746 | epoch avg. loss: 0.091 | test avg. loss: 4.149
Epochs: 13747 | epoch avg. loss: 0.294 | test avg. loss: 4.503


 28%|██▊       | 13750/50000 [20:33<49:59, 12.08it/s]

Epochs: 13748 | epoch avg. loss: 0.272 | test avg. loss: 4.505
Epochs: 13749 | epoch avg. loss: 0.121 | test avg. loss: 4.296
Epochs: 13750 | epoch avg. loss: 0.162 | test avg. loss: 4.707


 28%|██▊       | 13754/50000 [20:33<49:31, 12.20it/s]

Epochs: 13751 | epoch avg. loss: 0.226 | test avg. loss: 4.136
Epochs: 13752 | epoch avg. loss: 0.262 | test avg. loss: 4.158
Epochs: 13753 | epoch avg. loss: 0.104 | test avg. loss: 4.478


 28%|██▊       | 13756/50000 [20:34<52:49, 11.43it/s]

Epochs: 13754 | epoch avg. loss: 0.110 | test avg. loss: 4.198
Epochs: 13755 | epoch avg. loss: 0.139 | test avg. loss: 4.471
Epochs: 13756 | epoch avg. loss: 0.232 | test avg. loss: 4.017


 28%|██▊       | 13760/50000 [20:34<53:16, 11.34it/s]

Epochs: 13757 | epoch avg. loss: 0.052 | test avg. loss: 4.039
Epochs: 13758 | epoch avg. loss: 0.108 | test avg. loss: 4.225
Epochs: 13759 | epoch avg. loss: 0.076 | test avg. loss: 4.305


 28%|██▊       | 13762/50000 [20:34<53:06, 11.37it/s]

Epochs: 13760 | epoch avg. loss: 0.109 | test avg. loss: 4.535
Epochs: 13761 | epoch avg. loss: 0.048 | test avg. loss: 4.382
Epochs: 13762 | epoch avg. loss: 0.050 | test avg. loss: 4.329




Epochs: 13763 | epoch avg. loss: 0.073 | test avg. loss: 4.039
Epochs: 13764 | epoch avg. loss: 0.078 | test avg. loss: 3.921


 28%|██▊       | 13768/50000 [20:35<53:06, 11.37it/s]

Epochs: 13765 | epoch avg. loss: 0.118 | test avg. loss: 4.634
Epochs: 13766 | epoch avg. loss: 0.297 | test avg. loss: 4.154
Epochs: 13767 | epoch avg. loss: 0.285 | test avg. loss: 4.181


                                                     

Epochs: 13768 | epoch avg. loss: 0.147 | test avg. loss: 4.713
Epochs: 13769 | epoch avg. loss: 0.267 | test avg. loss: 4.211
Epochs: 13770 | epoch avg. loss: 0.234 | test avg. loss: 4.548


 28%|██▊       | 13774/50000 [20:35<46:27, 13.00it/s]

Epochs: 13771 | epoch avg. loss: 0.463 | test avg. loss: 4.216
Epochs: 13772 | epoch avg. loss: 0.188 | test avg. loss: 4.243
Epochs: 13773 | epoch avg. loss: 0.140 | test avg. loss: 4.547


 28%|██▊       | 13776/50000 [20:35<44:37, 13.53it/s]

Epochs: 13774 | epoch avg. loss: 0.170 | test avg. loss: 4.045
Epochs: 13775 | epoch avg. loss: 0.235 | test avg. loss: 3.895
Epochs: 13776 | epoch avg. loss: 0.077 | test avg. loss: 4.061


 28%|██▊       | 13778/50000 [20:35<44:34, 13.54it/s]

Epochs: 13777 | epoch avg. loss: 0.094 | test avg. loss: 4.062
Epochs: 13778 | epoch avg. loss: 0.219 | test avg. loss: 4.709




Epochs: 13779 | epoch avg. loss: 0.362 | test avg. loss: 4.248
Epochs: 13780 | epoch avg. loss: 0.201 | test avg. loss: 4.443
Epochs: 13781 | epoch avg. loss: 0.420 | test avg. loss: 4.890


 28%|██▊       | 13786/50000 [20:36<43:40, 13.82it/s]

Epochs: 13782 | epoch avg. loss: 0.502 | test avg. loss: 4.254
Epochs: 13783 | epoch avg. loss: 0.135 | test avg. loss: 4.329
Epochs: 13784 | epoch avg. loss: 0.171 | test avg. loss: 5.104
Epochs: 13785 | epoch avg. loss: 0.402 | test avg. loss: 4.198


                                                     

Epochs: 13786 | epoch avg. loss: 0.456 | test avg. loss: 4.079
Epochs: 13787 | epoch avg. loss: 0.132 | test avg. loss: 4.446
Epochs: 13788 | epoch avg. loss: 0.144 | test avg. loss: 4.653


 28%|██▊       | 13790/50000 [20:36<42:44, 14.12it/s]

Epochs: 13789 | epoch avg. loss: 0.454 | test avg. loss: 4.877
Epochs: 13790 | epoch avg. loss: 0.605 | test avg. loss: 4.230


 28%|██▊       | 13794/50000 [20:37<47:26, 12.72it/s]

Epochs: 13791 | epoch avg. loss: 0.374 | test avg. loss: 4.468
Epochs: 13792 | epoch avg. loss: 0.670 | test avg. loss: 4.928
Epochs: 13793 | epoch avg. loss: 0.891 | test avg. loss: 4.257


 28%|██▊       | 13798/50000 [20:37<43:45, 13.79it/s]

Epochs: 13794 | epoch avg. loss: 1.260 | test avg. loss: 4.476
Epochs: 13795 | epoch avg. loss: 0.487 | test avg. loss: 5.406
Epochs: 13796 | epoch avg. loss: 0.470 | test avg. loss: 4.834
Epochs: 13797 | epoch avg. loss: 0.785 | test avg. loss: 4.493


 28%|██▊       | 13798/50000 [20:37<43:45, 13.79it/s]

Epochs: 13798 | epoch avg. loss: 0.326 | test avg. loss: 4.179
Epochs: 13799 | epoch avg. loss: 0.179 | test avg. loss: 4.290


 28%|██▊       | 13802/50000 [20:39<2:10:37,  4.62it/s]

Epochs: 13800 | epoch avg. loss: 0.142 | test avg. loss: 4.820
Epochs: 13801 | epoch avg. loss: 0.216 | test avg. loss: 4.394
Epochs: 13802 | epoch avg. loss: 0.475 | test avg. loss: 4.538


 28%|██▊       | 13806/50000 [20:39<1:27:41,  6.88it/s]

Epochs: 13803 | epoch avg. loss: 0.333 | test avg. loss: 4.231
Epochs: 13804 | epoch avg. loss: 0.235 | test avg. loss: 4.265
Epochs: 13805 | epoch avg. loss: 0.589 | test avg. loss: 4.691


 28%|██▊       | 13808/50000 [20:39<1:15:49,  7.95it/s]

Epochs: 13806 | epoch avg. loss: 0.553 | test avg. loss: 4.279
Epochs: 13807 | epoch avg. loss: 0.219 | test avg. loss: 4.467
Epochs: 13808 | epoch avg. loss: 0.326 | test avg. loss: 4.961


 28%|██▊       | 13812/50000 [20:39<57:31, 10.48it/s]  

Epochs: 13809 | epoch avg. loss: 0.309 | test avg. loss: 4.058
Epochs: 13810 | epoch avg. loss: 0.122 | test avg. loss: 4.127
Epochs: 13811 | epoch avg. loss: 0.249 | test avg. loss: 4.109


 28%|██▊       | 13816/50000 [20:40<49:03, 12.29it/s]

Epochs: 13812 | epoch avg. loss: 0.249 | test avg. loss: 4.644
Epochs: 13813 | epoch avg. loss: 0.535 | test avg. loss: 5.525
Epochs: 13814 | epoch avg. loss: 0.455 | test avg. loss: 4.613
Epochs: 13815 | epoch avg. loss: 0.253 | test avg. loss: 4.280


 28%|██▊       | 13818/50000 [20:40<46:43, 12.91it/s]

Epochs: 13816 | epoch avg. loss: 0.117 | test avg. loss: 4.258
Epochs: 13817 | epoch avg. loss: 0.173 | test avg. loss: 4.056
Epochs: 13818 | epoch avg. loss: 0.391 | test avg. loss: 4.410


 28%|██▊       | 13822/50000 [20:40<46:46, 12.89it/s]

Epochs: 13819 | epoch avg. loss: 0.324 | test avg. loss: 4.312
Epochs: 13820 | epoch avg. loss: 0.091 | test avg. loss: 4.356
Epochs: 13821 | epoch avg. loss: 0.076 | test avg. loss: 4.244


 28%|██▊       | 13826/50000 [20:40<42:42, 14.12it/s]

Epochs: 13822 | epoch avg. loss: 0.047 | test avg. loss: 4.041
Epochs: 13823 | epoch avg. loss: 0.039 | test avg. loss: 3.872
Epochs: 13824 | epoch avg. loss: 0.046 | test avg. loss: 3.956
Epochs: 13825 | epoch avg. loss: 0.055 | test avg. loss: 4.058


 28%|██▊       | 13828/50000 [20:40<42:37, 14.14it/s]

Epochs: 13826 | epoch avg. loss: 0.037 | test avg. loss: 4.235
Epochs: 13827 | epoch avg. loss: 0.044 | test avg. loss: 4.369
Epochs: 13828 | epoch avg. loss: 0.036 | test avg. loss: 4.228




Epochs: 13829 | epoch avg. loss: 0.036 | test avg. loss: 4.134
Epochs: 13830 | epoch avg. loss: 0.036 | test avg. loss: 4.181
Epochs: 13831 | epoch avg. loss: 0.040 | test avg. loss: 4.152
Epochs: 13832 | epoch avg. loss: 0.047 | test avg. loss: 4.218




Epochs: 13833 | epoch avg. loss: 0.048 | test avg. loss: 4.586
Epochs: 13834 | epoch avg. loss: 0.142 | test avg. loss: 4.347
Epochs: 13835 | epoch avg. loss: 0.394 | test avg. loss: 4.347


 28%|██▊       | 13840/50000 [20:41<40:47, 14.78it/s]

Epochs: 13836 | epoch avg. loss: 0.130 | test avg. loss: 4.692
Epochs: 13837 | epoch avg. loss: 0.153 | test avg. loss: 4.195
Epochs: 13838 | epoch avg. loss: 0.125 | test avg. loss: 4.303
Epochs: 13839 | epoch avg. loss: 0.226 | test avg. loss: 3.977


 28%|██▊       | 13842/50000 [20:41<40:50, 14.75it/s]

Epochs: 13840 | epoch avg. loss: 0.113 | test avg. loss: 3.942
Epochs: 13841 | epoch avg. loss: 0.157 | test avg. loss: 4.489
Epochs: 13842 | epoch avg. loss: 0.228 | test avg. loss: 4.075


 28%|██▊       | 13846/50000 [20:42<45:32, 13.23it/s]

Epochs: 13843 | epoch avg. loss: 0.159 | test avg. loss: 4.112
Epochs: 13844 | epoch avg. loss: 0.113 | test avg. loss: 4.478
Epochs: 13845 | epoch avg. loss: 0.185 | test avg. loss: 4.108


 28%|██▊       | 13848/50000 [20:42<45:25, 13.26it/s]

Epochs: 13846 | epoch avg. loss: 0.164 | test avg. loss: 4.079
Epochs: 13847 | epoch avg. loss: 0.124 | test avg. loss: 4.468
Epochs: 13848 | epoch avg. loss: 0.229 | test avg. loss: 3.916


 28%|██▊       | 13852/50000 [20:42<43:48, 13.75it/s]

Epochs: 13849 | epoch avg. loss: 0.207 | test avg. loss: 4.028
Epochs: 13850 | epoch avg. loss: 0.050 | test avg. loss: 4.284
Epochs: 13851 | epoch avg. loss: 0.066 | test avg. loss: 4.272


 28%|██▊       | 13854/50000 [20:42<43:29, 13.85it/s]

Epochs: 13852 | epoch avg. loss: 0.119 | test avg. loss: 4.540
Epochs: 13853 | epoch avg. loss: 0.129 | test avg. loss: 4.163
Epochs: 13854 | epoch avg. loss: 0.103 | test avg. loss: 4.109


 28%|██▊       | 13858/50000 [20:43<49:15, 12.23it/s]

Epochs: 13855 | epoch avg. loss: 0.120 | test avg. loss: 4.403
Epochs: 13856 | epoch avg. loss: 0.092 | test avg. loss: 4.226
Epochs: 13857 | epoch avg. loss: 0.203 | test avg. loss: 4.418


 28%|██▊       | 13860/50000 [20:43<47:22, 12.71it/s]

Epochs: 13858 | epoch avg. loss: 0.112 | test avg. loss: 4.249
Epochs: 13859 | epoch avg. loss: 0.145 | test avg. loss: 4.166
Epochs: 13860 | epoch avg. loss: 0.208 | test avg. loss: 4.518


 28%|██▊       | 13864/50000 [20:43<45:24, 13.26it/s]

Epochs: 13861 | epoch avg. loss: 0.176 | test avg. loss: 4.119
Epochs: 13862 | epoch avg. loss: 0.103 | test avg. loss: 4.176
Epochs: 13863 | epoch avg. loss: 0.067 | test avg. loss: 4.405


 28%|██▊       | 13866/50000 [20:43<44:31, 13.53it/s]

Epochs: 13864 | epoch avg. loss: 0.136 | test avg. loss: 4.190
Epochs: 13865 | epoch avg. loss: 0.043 | test avg. loss: 4.047
Epochs: 13866 | epoch avg. loss: 0.033 | test avg. loss: 4.045


 28%|██▊       | 13870/50000 [20:44<44:53, 13.41it/s]

Epochs: 13867 | epoch avg. loss: 0.036 | test avg. loss: 4.041
Epochs: 13868 | epoch avg. loss: 0.036 | test avg. loss: 4.075
Epochs: 13869 | epoch avg. loss: 0.038 | test avg. loss: 4.314




Epochs: 13870 | epoch avg. loss: 0.045 | test avg. loss: 4.230
Epochs: 13871 | epoch avg. loss: 0.173 | test avg. loss: 4.275
Epochs: 13872 | epoch avg. loss: 0.044 | test avg. loss: 4.254


 28%|██▊       | 13876/50000 [20:44<42:03, 14.32it/s]

Epochs: 13873 | epoch avg. loss: 0.035 | test avg. loss: 4.167
Epochs: 13874 | epoch avg. loss: 0.034 | test avg. loss: 4.187
Epochs: 13875 | epoch avg. loss: 0.033 | test avg. loss: 4.219


 28%|██▊       | 13878/50000 [20:44<43:47, 13.75it/s]

Epochs: 13876 | epoch avg. loss: 0.030 | test avg. loss: 4.295
Epochs: 13877 | epoch avg. loss: 0.034 | test avg. loss: 4.222
Epochs: 13878 | epoch avg. loss: 0.036 | test avg. loss: 4.237


 28%|██▊       | 13882/50000 [20:44<43:03, 13.98it/s]

Epochs: 13879 | epoch avg. loss: 0.029 | test avg. loss: 4.216
Epochs: 13880 | epoch avg. loss: 0.037 | test avg. loss: 4.177
Epochs: 13881 | epoch avg. loss: 0.057 | test avg. loss: 4.171


 28%|██▊       | 13884/50000 [20:45<44:48, 13.43it/s]

Epochs: 13882 | epoch avg. loss: 0.061 | test avg. loss: 4.501
Epochs: 13883 | epoch avg. loss: 0.109 | test avg. loss: 4.141
Epochs: 13884 | epoch avg. loss: 0.087 | test avg. loss: 4.316


 28%|██▊       | 13888/50000 [20:45<45:20, 13.27it/s]

Epochs: 13885 | epoch avg. loss: 0.221 | test avg. loss: 4.283
Epochs: 13886 | epoch avg. loss: 0.150 | test avg. loss: 4.250
Epochs: 13887 | epoch avg. loss: 0.424 | test avg. loss: 4.565


 28%|██▊       | 13890/50000 [20:45<47:27, 12.68it/s]

Epochs: 13888 | epoch avg. loss: 0.225 | test avg. loss: 4.583
Epochs: 13889 | epoch avg. loss: 0.120 | test avg. loss: 4.428
Epochs: 13890 | epoch avg. loss: 0.216 | test avg. loss: 4.548


 28%|██▊       | 13894/50000 [20:45<52:45, 11.41it/s]

Epochs: 13891 | epoch avg. loss: 0.063 | test avg. loss: 4.254
Epochs: 13892 | epoch avg. loss: 0.121 | test avg. loss: 4.206
Epochs: 13893 | epoch avg. loss: 0.052 | test avg. loss: 4.303


 28%|██▊       | 13896/50000 [20:46<53:55, 11.16it/s]

Epochs: 13894 | epoch avg. loss: 0.065 | test avg. loss: 4.188
Epochs: 13895 | epoch avg. loss: 0.037 | test avg. loss: 4.189
Epochs: 13896 | epoch avg. loss: 0.033 | test avg. loss: 4.223


 28%|██▊       | 13898/50000 [20:46<50:45, 11.85it/s]

Epochs: 13897 | epoch avg. loss: 0.033 | test avg. loss: 4.130
Epochs: 13898 | epoch avg. loss: 0.053 | test avg. loss: 4.231
Epochs: 13899 | epoch avg. loss: 0.037 | test avg. loss: 4.198


 28%|██▊       | 13902/50000 [20:48<2:28:12,  4.06it/s]

Epochs: 13900 | epoch avg. loss: 0.034 | test avg. loss: 4.190
Epochs: 13901 | epoch avg. loss: 0.037 | test avg. loss: 4.266
Epochs: 13902 | epoch avg. loss: 0.047 | test avg. loss: 4.138


 28%|██▊       | 13906/50000 [20:48<1:38:51,  6.09it/s]

Epochs: 13903 | epoch avg. loss: 0.046 | test avg. loss: 4.198
Epochs: 13904 | epoch avg. loss: 0.032 | test avg. loss: 4.198
Epochs: 13905 | epoch avg. loss: 0.029 | test avg. loss: 4.170


 28%|██▊       | 13908/50000 [20:48<1:25:17,  7.05it/s]

Epochs: 13906 | epoch avg. loss: 0.029 | test avg. loss: 4.156
Epochs: 13907 | epoch avg. loss: 0.030 | test avg. loss: 4.278
Epochs: 13908 | epoch avg. loss: 0.072 | test avg. loss: 4.306


 28%|██▊       | 13912/50000 [20:48<1:08:02,  8.84it/s]

Epochs: 13909 | epoch avg. loss: 0.044 | test avg. loss: 4.169
Epochs: 13910 | epoch avg. loss: 0.043 | test avg. loss: 4.272
Epochs: 13911 | epoch avg. loss: 0.087 | test avg. loss: 4.283


                                                       

Epochs: 13912 | epoch avg. loss: 0.066 | test avg. loss: 4.219
Epochs: 13913 | epoch avg. loss: 0.029 | test avg. loss: 4.193
Epochs: 13914 | epoch avg. loss: 0.032 | test avg. loss: 4.275


 28%|██▊       | 13918/50000 [20:49<50:37, 11.88it/s]

Epochs: 13915 | epoch avg. loss: 0.029 | test avg. loss: 4.204
Epochs: 13916 | epoch avg. loss: 0.040 | test avg. loss: 4.219
Epochs: 13917 | epoch avg. loss: 0.029 | test avg. loss: 4.246
Epochs: 13918 | epoch avg. loss: 0.035 | test avg. loss: 4.295


 28%|██▊       | 13922/50000 [20:49<45:17, 13.27it/s]

Epochs: 13919 | epoch avg. loss: 0.043 | test avg. loss: 4.153
Epochs: 13920 | epoch avg. loss: 0.037 | test avg. loss: 4.181
Epochs: 13921 | epoch avg. loss: 0.028 | test avg. loss: 4.183


 28%|██▊       | 13924/50000 [20:49<45:23, 13.24it/s]

Epochs: 13922 | epoch avg. loss: 0.036 | test avg. loss: 4.214
Epochs: 13923 | epoch avg. loss: 0.048 | test avg. loss: 4.336
Epochs: 13924 | epoch avg. loss: 0.032 | test avg. loss: 4.251


 28%|██▊       | 13928/50000 [20:50<47:52, 12.56it/s]

Epochs: 13925 | epoch avg. loss: 0.048 | test avg. loss: 4.190
Epochs: 13926 | epoch avg. loss: 0.052 | test avg. loss: 4.139
Epochs: 13927 | epoch avg. loss: 0.049 | test avg. loss: 4.174


 28%|██▊       | 13930/50000 [20:50<47:22, 12.69it/s]

Epochs: 13928 | epoch avg. loss: 0.028 | test avg. loss: 4.157
Epochs: 13929 | epoch avg. loss: 0.030 | test avg. loss: 4.156
Epochs: 13930 | epoch avg. loss: 0.033 | test avg. loss: 4.293


 28%|██▊       | 13934/50000 [20:50<43:25, 13.84it/s]

Epochs: 13931 | epoch avg. loss: 0.039 | test avg. loss: 4.146
Epochs: 13932 | epoch avg. loss: 0.057 | test avg. loss: 4.163
Epochs: 13933 | epoch avg. loss: 0.044 | test avg. loss: 4.213
Epochs: 13934 | epoch avg. loss: 0.044 | test avg. loss: 4.119


 28%|██▊       | 13938/50000 [20:50<43:58, 13.67it/s]

Epochs: 13935 | epoch avg. loss: 0.086 | test avg. loss: 4.345
Epochs: 13936 | epoch avg. loss: 0.070 | test avg. loss: 4.146
Epochs: 13937 | epoch avg. loss: 0.064 | test avg. loss: 4.061


 28%|██▊       | 13940/50000 [20:51<48:58, 12.27it/s]

Epochs: 13938 | epoch avg. loss: 0.065 | test avg. loss: 4.300
Epochs: 13939 | epoch avg. loss: 0.139 | test avg. loss: 4.289
Epochs: 13940 | epoch avg. loss: 0.040 | test avg. loss: 4.343


 28%|██▊       | 13944/50000 [20:51<48:29, 12.39it/s]

Epochs: 13941 | epoch avg. loss: 0.032 | test avg. loss: 4.207
Epochs: 13942 | epoch avg. loss: 0.075 | test avg. loss: 4.088
Epochs: 13943 | epoch avg. loss: 0.059 | test avg. loss: 4.362


 28%|██▊       | 13946/50000 [20:51<47:58, 12.53it/s]

Epochs: 13944 | epoch avg. loss: 0.149 | test avg. loss: 4.058
Epochs: 13945 | epoch avg. loss: 0.089 | test avg. loss: 4.201
Epochs: 13946 | epoch avg. loss: 0.040 | test avg. loss: 4.413


 28%|██▊       | 13950/50000 [20:51<50:12, 11.97it/s]

Epochs: 13947 | epoch avg. loss: 0.049 | test avg. loss: 4.321
Epochs: 13948 | epoch avg. loss: 0.077 | test avg. loss: 4.295
Epochs: 13949 | epoch avg. loss: 0.034 | test avg. loss: 4.231


 28%|██▊       | 13952/50000 [20:52<51:03, 11.77it/s]

Epochs: 13950 | epoch avg. loss: 0.033 | test avg. loss: 4.251
Epochs: 13951 | epoch avg. loss: 0.091 | test avg. loss: 4.547
Epochs: 13952 | epoch avg. loss: 0.134 | test avg. loss: 4.201


 28%|██▊       | 13956/50000 [20:52<51:50, 11.59it/s]

Epochs: 13953 | epoch avg. loss: 0.118 | test avg. loss: 4.311
Epochs: 13954 | epoch avg. loss: 0.103 | test avg. loss: 4.596
Epochs: 13955 | epoch avg. loss: 0.137 | test avg. loss: 4.283


 28%|██▊       | 13960/50000 [20:52<45:03, 13.33it/s]

Epochs: 13956 | epoch avg. loss: 0.114 | test avg. loss: 4.445
Epochs: 13957 | epoch avg. loss: 0.099 | test avg. loss: 4.364
Epochs: 13958 | epoch avg. loss: 0.087 | test avg. loss: 4.394
Epochs: 13959 | epoch avg. loss: 0.304 | test avg. loss: 4.522


 28%|██▊       | 13962/50000 [20:52<43:41, 13.75it/s]

Epochs: 13960 | epoch avg. loss: 0.277 | test avg. loss: 4.325
Epochs: 13961 | epoch avg. loss: 0.128 | test avg. loss: 4.416
Epochs: 13962 | epoch avg. loss: 0.294 | test avg. loss: 4.540


 28%|██▊       | 13966/50000 [20:53<45:34, 13.18it/s]

Epochs: 13963 | epoch avg. loss: 0.211 | test avg. loss: 4.508
Epochs: 13964 | epoch avg. loss: 0.105 | test avg. loss: 4.233
Epochs: 13965 | epoch avg. loss: 0.105 | test avg. loss: 4.350
Epochs: 13966 | epoch avg. loss: 0.068 | test avg. loss: 4.164


 28%|██▊       | 13970/50000 [20:53<45:38, 13.16it/s]

Epochs: 13967 | epoch avg. loss: 0.093 | test avg. loss: 4.236
Epochs: 13968 | epoch avg. loss: 0.107 | test avg. loss: 4.616
Epochs: 13969 | epoch avg. loss: 0.140 | test avg. loss: 4.381


 28%|██▊       | 13972/50000 [20:53<44:55, 13.36it/s]

Epochs: 13970 | epoch avg. loss: 0.044 | test avg. loss: 4.192
Epochs: 13971 | epoch avg. loss: 0.037 | test avg. loss: 4.181
Epochs: 13972 | epoch avg. loss: 0.032 | test avg. loss: 4.251


 28%|██▊       | 13976/50000 [20:53<44:53, 13.38it/s]

Epochs: 13973 | epoch avg. loss: 0.030 | test avg. loss: 4.211
Epochs: 13974 | epoch avg. loss: 0.050 | test avg. loss: 4.290
Epochs: 13975 | epoch avg. loss: 0.043 | test avg. loss: 4.430


 28%|██▊       | 13978/50000 [20:54<48:36, 12.35it/s]

Epochs: 13976 | epoch avg. loss: 0.083 | test avg. loss: 4.152
Epochs: 13977 | epoch avg. loss: 0.070 | test avg. loss: 4.245
Epochs: 13978 | epoch avg. loss: 0.064 | test avg. loss: 4.203


 28%|██▊       | 13982/50000 [20:54<51:53, 11.57it/s]

Epochs: 13979 | epoch avg. loss: 0.040 | test avg. loss: 4.220
Epochs: 13980 | epoch avg. loss: 0.039 | test avg. loss: 4.332
Epochs: 13981 | epoch avg. loss: 0.041 | test avg. loss: 4.242


 28%|██▊       | 13986/50000 [20:54<46:13, 12.98it/s]

Epochs: 13982 | epoch avg. loss: 0.064 | test avg. loss: 4.244
Epochs: 13983 | epoch avg. loss: 0.041 | test avg. loss: 4.478
Epochs: 13984 | epoch avg. loss: 0.121 | test avg. loss: 4.337
Epochs: 13985 | epoch avg. loss: 0.064 | test avg. loss: 4.399


 28%|██▊       | 13988/50000 [20:54<45:15, 13.26it/s]

Epochs: 13986 | epoch avg. loss: 0.070 | test avg. loss: 4.844
Epochs: 13987 | epoch avg. loss: 0.230 | test avg. loss: 4.130
Epochs: 13988 | epoch avg. loss: 0.119 | test avg. loss: 4.094


                                                     

Epochs: 13989 | epoch avg. loss: 0.262 | test avg. loss: 4.272
Epochs: 13990 | epoch avg. loss: 0.128 | test avg. loss: 4.271
Epochs: 13991 | epoch avg. loss: 0.059 | test avg. loss: 4.225


                                                     

Epochs: 13992 | epoch avg. loss: 0.078 | test avg. loss: 4.288
Epochs: 13993 | epoch avg. loss: 0.032 | test avg. loss: 4.264
Epochs: 13994 | epoch avg. loss: 0.035 | test avg. loss: 4.189


                                                     

Epochs: 13995 | epoch avg. loss: 0.038 | test avg. loss: 4.293
Epochs: 13996 | epoch avg. loss: 0.050 | test avg. loss: 4.217
Epochs: 13997 | epoch avg. loss: 0.044 | test avg. loss: 4.199


 28%|██▊       | 13998/50000 [20:55<42:52, 13.99it/s]

Epochs: 13998 | epoch avg. loss: 0.117 | test avg. loss: 4.303
Epochs: 13999 | epoch avg. loss: 0.071 | test avg. loss: 4.408


 28%|██▊       | 14002/50000 [20:57<2:12:06,  4.54it/s]

Epochs: 14000 | epoch avg. loss: 0.067 | test avg. loss: 4.275
Epochs: 14001 | epoch avg. loss: 0.030 | test avg. loss: 4.314
Epochs: 14002 | epoch avg. loss: 0.031 | test avg. loss: 4.190


 28%|██▊       | 14006/50000 [20:57<1:27:27,  6.86it/s]

Epochs: 14003 | epoch avg. loss: 0.056 | test avg. loss: 4.258
Epochs: 14004 | epoch avg. loss: 0.105 | test avg. loss: 4.260
Epochs: 14005 | epoch avg. loss: 0.073 | test avg. loss: 4.293


 28%|██▊       | 14010/50000 [20:57<1:03:20,  9.47it/s]

Epochs: 14006 | epoch avg. loss: 0.316 | test avg. loss: 4.472
Epochs: 14007 | epoch avg. loss: 0.287 | test avg. loss: 4.297
Epochs: 14008 | epoch avg. loss: 0.143 | test avg. loss: 4.367
Epochs: 14009 | epoch avg. loss: 0.288 | test avg. loss: 4.678


 28%|██▊       | 14012/50000 [20:58<1:02:39,  9.57it/s]

Epochs: 14010 | epoch avg. loss: 0.296 | test avg. loss: 4.312
Epochs: 14011 | epoch avg. loss: 0.171 | test avg. loss: 4.514
Epochs: 14012 | epoch avg. loss: 0.536 | test avg. loss: 4.980


 28%|██▊       | 14016/50000 [20:58<51:42, 11.60it/s]

Epochs: 14013 | epoch avg. loss: 0.917 | test avg. loss: 5.265
Epochs: 14014 | epoch avg. loss: 0.480 | test avg. loss: 4.704
Epochs: 14015 | epoch avg. loss: 0.655 | test avg. loss: 4.500


 28%|██▊       | 14020/50000 [20:58<45:13, 13.26it/s]

Epochs: 14016 | epoch avg. loss: 0.318 | test avg. loss: 4.385
Epochs: 14017 | epoch avg. loss: 0.241 | test avg. loss: 4.068
Epochs: 14018 | epoch avg. loss: 0.253 | test avg. loss: 4.299
Epochs: 14019 | epoch avg. loss: 0.064 | test avg. loss: 4.729


 28%|██▊       | 14022/50000 [20:58<44:38, 13.43it/s]

Epochs: 14020 | epoch avg. loss: 0.084 | test avg. loss: 4.668
Epochs: 14021 | epoch avg. loss: 0.089 | test avg. loss: 4.458
Epochs: 14022 | epoch avg. loss: 0.069 | test avg. loss: 4.363


 28%|██▊       | 14026/50000 [20:58<43:15, 13.86it/s]

Epochs: 14023 | epoch avg. loss: 0.066 | test avg. loss: 4.179
Epochs: 14024 | epoch avg. loss: 0.052 | test avg. loss: 4.246
Epochs: 14025 | epoch avg. loss: 0.035 | test avg. loss: 4.348


 28%|██▊       | 14028/50000 [20:59<45:26, 13.19it/s]

Epochs: 14026 | epoch avg. loss: 0.063 | test avg. loss: 4.367
Epochs: 14027 | epoch avg. loss: 0.057 | test avg. loss: 4.562
Epochs: 14028 | epoch avg. loss: 0.142 | test avg. loss: 4.245


                                                     

Epochs: 14029 | epoch avg. loss: 0.065 | test avg. loss: 4.185
Epochs: 14030 | epoch avg. loss: 0.100 | test avg. loss: 4.399
Epochs: 14031 | epoch avg. loss: 0.180 | test avg. loss: 4.392


 28%|██▊       | 14034/50000 [20:59<41:47, 14.34it/s]

Epochs: 14032 | epoch avg. loss: 0.078 | test avg. loss: 4.271
Epochs: 14033 | epoch avg. loss: 0.094 | test avg. loss: 4.488
Epochs: 14034 | epoch avg. loss: 0.160 | test avg. loss: 4.076


 28%|██▊       | 14038/50000 [20:59<42:19, 14.16it/s]

Epochs: 14035 | epoch avg. loss: 0.199 | test avg. loss: 4.175
Epochs: 14036 | epoch avg. loss: 0.141 | test avg. loss: 4.798
Epochs: 14037 | epoch avg. loss: 0.307 | test avg. loss: 4.398


 28%|██▊       | 14040/50000 [21:00<42:24, 14.14it/s]

Epochs: 14038 | epoch avg. loss: 0.195 | test avg. loss: 4.475
Epochs: 14039 | epoch avg. loss: 0.365 | test avg. loss: 4.495
Epochs: 14040 | epoch avg. loss: 0.187 | test avg. loss: 4.277


 28%|██▊       | 14044/50000 [21:00<42:23, 14.13it/s]

Epochs: 14041 | epoch avg. loss: 0.121 | test avg. loss: 4.004
Epochs: 14042 | epoch avg. loss: 0.104 | test avg. loss: 4.139
Epochs: 14043 | epoch avg. loss: 0.089 | test avg. loss: 4.381
Epochs: 14044 | epoch avg. loss: 0.096 | test avg. loss: 4.222


 28%|██▊       | 14048/50000 [21:00<40:45, 14.70it/s]

Epochs: 14045 | epoch avg. loss: 0.071 | test avg. loss: 4.294
Epochs: 14046 | epoch avg. loss: 0.060 | test avg. loss: 4.653
Epochs: 14047 | epoch avg. loss: 0.181 | test avg. loss: 4.143
Epochs: 14048 | epoch avg. loss: 0.053 | test avg. loss: 4.110


 28%|██▊       | 14052/50000 [21:00<40:35, 14.76it/s]

Epochs: 14049 | epoch avg. loss: 0.075 | test avg. loss: 4.339
Epochs: 14050 | epoch avg. loss: 0.070 | test avg. loss: 4.289
Epochs: 14051 | epoch avg. loss: 0.163 | test avg. loss: 4.488


                                                     

Epochs: 14052 | epoch avg. loss: 0.097 | test avg. loss: 4.486
Epochs: 14053 | epoch avg. loss: 0.070 | test avg. loss: 4.195
Epochs: 14054 | epoch avg. loss: 0.139 | test avg. loss: 4.188


 28%|██▊       | 14058/50000 [21:01<39:47, 15.06it/s]

Epochs: 14055 | epoch avg. loss: 0.065 | test avg. loss: 4.704
Epochs: 14056 | epoch avg. loss: 0.235 | test avg. loss: 4.284
Epochs: 14057 | epoch avg. loss: 0.041 | test avg. loss: 4.230
Epochs: 14058 | epoch avg. loss: 0.044 | test avg. loss: 4.256


 28%|██▊       | 14062/50000 [21:01<39:03, 15.33it/s]

Epochs: 14059 | epoch avg. loss: 0.036 | test avg. loss: 4.291
Epochs: 14060 | epoch avg. loss: 0.037 | test avg. loss: 4.450
Epochs: 14061 | epoch avg. loss: 0.061 | test avg. loss: 4.390
Epochs: 14062 | epoch avg. loss: 0.033 | test avg. loss: 4.290


 28%|██▊       | 14066/50000 [21:01<39:01, 15.35it/s]

Epochs: 14063 | epoch avg. loss: 0.031 | test avg. loss: 4.275
Epochs: 14064 | epoch avg. loss: 0.032 | test avg. loss: 4.162
Epochs: 14065 | epoch avg. loss: 0.094 | test avg. loss: 4.210
Epochs: 14066 | epoch avg. loss: 0.051 | test avg. loss: 4.445


 28%|██▊       | 14070/50000 [21:02<43:39, 13.72it/s]

Epochs: 14067 | epoch avg. loss: 0.073 | test avg. loss: 4.288
Epochs: 14068 | epoch avg. loss: 0.029 | test avg. loss: 4.317
Epochs: 14069 | epoch avg. loss: 0.057 | test avg. loss: 4.280


 28%|██▊       | 14072/50000 [21:02<44:51, 13.35it/s]

Epochs: 14070 | epoch avg. loss: 0.032 | test avg. loss: 4.313
Epochs: 14071 | epoch avg. loss: 0.037 | test avg. loss: 4.500
Epochs: 14072 | epoch avg. loss: 0.057 | test avg. loss: 4.297


 28%|██▊       | 14076/50000 [21:02<43:48, 13.66it/s]

Epochs: 14073 | epoch avg. loss: 0.144 | test avg. loss: 4.244
Epochs: 14074 | epoch avg. loss: 0.073 | test avg. loss: 4.331
Epochs: 14075 | epoch avg. loss: 0.063 | test avg. loss: 4.265


 28%|██▊       | 14080/50000 [21:02<40:59, 14.61it/s]

Epochs: 14076 | epoch avg. loss: 0.036 | test avg. loss: 4.207
Epochs: 14077 | epoch avg. loss: 0.102 | test avg. loss: 4.245
Epochs: 14078 | epoch avg. loss: 0.031 | test avg. loss: 4.278
Epochs: 14079 | epoch avg. loss: 0.028 | test avg. loss: 4.313


 28%|██▊       | 14082/50000 [21:02<41:28, 14.43it/s]

Epochs: 14080 | epoch avg. loss: 0.027 | test avg. loss: 4.254
Epochs: 14081 | epoch avg. loss: 0.042 | test avg. loss: 4.213
Epochs: 14082 | epoch avg. loss: 0.073 | test avg. loss: 4.261


 28%|██▊       | 14086/50000 [21:03<43:03, 13.90it/s]

Epochs: 14083 | epoch avg. loss: 0.030 | test avg. loss: 4.343
Epochs: 14084 | epoch avg. loss: 0.042 | test avg. loss: 4.399
Epochs: 14085 | epoch avg. loss: 0.064 | test avg. loss: 4.322
Epochs: 14086 | epoch avg. loss: 0.035 | test avg. loss: 4.213


 28%|██▊       | 14090/50000 [21:03<42:59, 13.92it/s]

Epochs: 14087 | epoch avg. loss: 0.082 | test avg. loss: 4.254
Epochs: 14088 | epoch avg. loss: 0.078 | test avg. loss: 4.678
Epochs: 14089 | epoch avg. loss: 0.206 | test avg. loss: 4.131


 28%|██▊       | 14092/50000 [21:03<43:10, 13.86it/s]

Epochs: 14090 | epoch avg. loss: 0.113 | test avg. loss: 4.220
Epochs: 14091 | epoch avg. loss: 0.128 | test avg. loss: 4.504
Epochs: 14092 | epoch avg. loss: 0.109 | test avg. loss: 4.647


 28%|██▊       | 14096/50000 [21:03<44:19, 13.50it/s]

Epochs: 14093 | epoch avg. loss: 0.132 | test avg. loss: 4.293
Epochs: 14094 | epoch avg. loss: 0.037 | test avg. loss: 4.241
Epochs: 14095 | epoch avg. loss: 0.039 | test avg. loss: 4.263


 28%|██▊       | 14098/50000 [21:04<45:19, 13.20it/s]

Epochs: 14096 | epoch avg. loss: 0.042 | test avg. loss: 4.249
Epochs: 14097 | epoch avg. loss: 0.178 | test avg. loss: 4.311
Epochs: 14098 | epoch avg. loss: 0.076 | test avg. loss: 4.744


 28%|██▊       | 14098/50000 [21:04<45:19, 13.20it/s]

Epochs: 14099 | epoch avg. loss: 0.237 | test avg. loss: 4.240


 28%|██▊       | 14103/50000 [21:05<1:57:07,  5.11it/s]

Epochs: 14100 | epoch avg. loss: 0.060 | test avg. loss: 4.024
Epochs: 14101 | epoch avg. loss: 0.112 | test avg. loss: 4.208
Epochs: 14102 | epoch avg. loss: 0.150 | test avg. loss: 4.375


 28%|██▊       | 14105/50000 [21:06<1:33:34,  6.39it/s]

Epochs: 14103 | epoch avg. loss: 0.085 | test avg. loss: 4.328
Epochs: 14104 | epoch avg. loss: 0.173 | test avg. loss: 4.528
Epochs: 14105 | epoch avg. loss: 0.105 | test avg. loss: 4.441


 28%|██▊       | 14109/50000 [21:06<1:08:25,  8.74it/s]

Epochs: 14106 | epoch avg. loss: 0.068 | test avg. loss: 4.250
Epochs: 14107 | epoch avg. loss: 0.267 | test avg. loss: 4.157
Epochs: 14108 | epoch avg. loss: 0.066 | test avg. loss: 4.250


 28%|██▊       | 14111/50000 [21:06<1:00:03,  9.96it/s]

Epochs: 14109 | epoch avg. loss: 0.037 | test avg. loss: 4.338
Epochs: 14110 | epoch avg. loss: 0.039 | test avg. loss: 4.344
Epochs: 14111 | epoch avg. loss: 0.033 | test avg. loss: 4.255


 28%|██▊       | 14115/50000 [21:06<49:40, 12.04it/s]

Epochs: 14112 | epoch avg. loss: 0.029 | test avg. loss: 4.269
Epochs: 14113 | epoch avg. loss: 0.033 | test avg. loss: 4.237
Epochs: 14114 | epoch avg. loss: 0.036 | test avg. loss: 4.280


 28%|██▊       | 14117/50000 [21:07<52:02, 11.49it/s]

Epochs: 14115 | epoch avg. loss: 0.034 | test avg. loss: 4.351
Epochs: 14116 | epoch avg. loss: 0.030 | test avg. loss: 4.436
Epochs: 14117 | epoch avg. loss: 0.051 | test avg. loss: 4.329


 28%|██▊       | 14121/50000 [21:07<51:45, 11.55it/s]

Epochs: 14118 | epoch avg. loss: 0.030 | test avg. loss: 4.225
Epochs: 14119 | epoch avg. loss: 0.033 | test avg. loss: 4.444
Epochs: 14120 | epoch avg. loss: 0.123 | test avg. loss: 4.314


 28%|██▊       | 14123/50000 [21:07<52:39, 11.35it/s]

Epochs: 14121 | epoch avg. loss: 0.049 | test avg. loss: 4.284
Epochs: 14122 | epoch avg. loss: 0.187 | test avg. loss: 4.393
Epochs: 14123 | epoch avg. loss: 0.074 | test avg. loss: 4.625


 28%|██▊       | 14127/50000 [21:07<52:27, 11.40it/s]

Epochs: 14124 | epoch avg. loss: 0.102 | test avg. loss: 4.280
Epochs: 14125 | epoch avg. loss: 0.114 | test avg. loss: 4.210
Epochs: 14126 | epoch avg. loss: 0.048 | test avg. loss: 4.364


 28%|██▊       | 14129/50000 [21:08<52:22, 11.41it/s]

Epochs: 14127 | epoch avg. loss: 0.056 | test avg. loss: 4.224
Epochs: 14128 | epoch avg. loss: 0.053 | test avg. loss: 4.289
Epochs: 14129 | epoch avg. loss: 0.080 | test avg. loss: 4.814


 28%|██▊       | 14133/50000 [21:08<48:15, 12.39it/s]

Epochs: 14130 | epoch avg. loss: 0.224 | test avg. loss: 4.284
Epochs: 14131 | epoch avg. loss: 0.309 | test avg. loss: 4.449
Epochs: 14132 | epoch avg. loss: 0.366 | test avg. loss: 4.697


 28%|██▊       | 14135/50000 [21:08<47:55, 12.47it/s]

Epochs: 14133 | epoch avg. loss: 0.165 | test avg. loss: 4.784
Epochs: 14134 | epoch avg. loss: 0.147 | test avg. loss: 4.207
Epochs: 14135 | epoch avg. loss: 0.201 | test avg. loss: 4.091


 28%|██▊       | 14139/50000 [21:08<42:58, 13.91it/s]

Epochs: 14136 | epoch avg. loss: 0.070 | test avg. loss: 4.335
Epochs: 14137 | epoch avg. loss: 0.083 | test avg. loss: 4.340
Epochs: 14138 | epoch avg. loss: 0.238 | test avg. loss: 4.453
Epochs: 14139 | epoch avg. loss: 0.078 | test avg. loss: 4.704




Epochs: 14140 | epoch avg. loss: 0.103 | test avg. loss: 4.349
Epochs: 14141 | epoch avg. loss: 0.068 | test avg. loss: 4.334


 28%|██▊       | 14145/50000 [21:09<46:09, 12.95it/s]

Epochs: 14142 | epoch avg. loss: 0.075 | test avg. loss: 4.384
Epochs: 14143 | epoch avg. loss: 0.077 | test avg. loss: 4.439
Epochs: 14144 | epoch avg. loss: 0.164 | test avg. loss: 5.134




Epochs: 14145 | epoch avg. loss: 0.475 | test avg. loss: 4.412
Epochs: 14146 | epoch avg. loss: 0.151 | test avg. loss: 4.554
Epochs: 14147 | epoch avg. loss: 0.764 | test avg. loss: 4.257


 28%|██▊       | 14151/50000 [21:09<43:29, 13.74it/s]

Epochs: 14148 | epoch avg. loss: 0.219 | test avg. loss: 5.359
Epochs: 14149 | epoch avg. loss: 0.511 | test avg. loss: 4.377
Epochs: 14150 | epoch avg. loss: 0.358 | test avg. loss: 4.370


 28%|██▊       | 14153/50000 [21:09<43:10, 13.84it/s]

Epochs: 14151 | epoch avg. loss: 0.168 | test avg. loss: 5.177
Epochs: 14152 | epoch avg. loss: 0.585 | test avg. loss: 4.343
Epochs: 14153 | epoch avg. loss: 0.438 | test avg. loss: 4.602


 28%|██▊       | 14157/50000 [21:10<49:08, 12.16it/s]

Epochs: 14154 | epoch avg. loss: 0.452 | test avg. loss: 5.281
Epochs: 14155 | epoch avg. loss: 0.612 | test avg. loss: 4.326
Epochs: 14156 | epoch avg. loss: 0.201 | test avg. loss: 4.245


 28%|██▊       | 14159/50000 [21:10<48:03, 12.43it/s]

Epochs: 14157 | epoch avg. loss: 0.187 | test avg. loss: 5.110
Epochs: 14158 | epoch avg. loss: 0.628 | test avg. loss: 4.248
Epochs: 14159 | epoch avg. loss: 0.151 | test avg. loss: 4.215


 28%|██▊       | 14163/50000 [21:10<47:02, 12.70it/s]

Epochs: 14160 | epoch avg. loss: 0.148 | test avg. loss: 5.076
Epochs: 14161 | epoch avg. loss: 0.462 | test avg. loss: 4.396
Epochs: 14162 | epoch avg. loss: 0.092 | test avg. loss: 4.350


 28%|██▊       | 14165/50000 [21:10<45:32, 13.12it/s]

Epochs: 14163 | epoch avg. loss: 0.074 | test avg. loss: 4.655
Epochs: 14164 | epoch avg. loss: 0.148 | test avg. loss: 4.428
Epochs: 14165 | epoch avg. loss: 0.088 | test avg. loss: 4.581


 28%|██▊       | 14169/50000 [21:11<49:51, 11.98it/s]

Epochs: 14166 | epoch avg. loss: 0.084 | test avg. loss: 4.434
Epochs: 14167 | epoch avg. loss: 0.062 | test avg. loss: 4.327
Epochs: 14168 | epoch avg. loss: 0.044 | test avg. loss: 4.171


 28%|██▊       | 14171/50000 [21:11<51:13, 11.66it/s]

Epochs: 14169 | epoch avg. loss: 0.036 | test avg. loss: 4.145
Epochs: 14170 | epoch avg. loss: 0.034 | test avg. loss: 4.148
Epochs: 14171 | epoch avg. loss: 0.069 | test avg. loss: 4.213


 28%|██▊       | 14175/50000 [21:11<51:01, 11.70it/s]

Epochs: 14172 | epoch avg. loss: 0.129 | test avg. loss: 4.414
Epochs: 14173 | epoch avg. loss: 0.072 | test avg. loss: 4.516
Epochs: 14174 | epoch avg. loss: 0.064 | test avg. loss: 4.250


 28%|██▊       | 14177/50000 [21:11<52:47, 11.31it/s]

Epochs: 14175 | epoch avg. loss: 0.202 | test avg. loss: 4.154
Epochs: 14176 | epoch avg. loss: 0.080 | test avg. loss: 4.430
Epochs: 14177 | epoch avg. loss: 0.099 | test avg. loss: 4.383


 28%|██▊       | 14181/50000 [21:12<53:24, 11.18it/s]

Epochs: 14178 | epoch avg. loss: 0.087 | test avg. loss: 4.478
Epochs: 14179 | epoch avg. loss: 0.096 | test avg. loss: 4.685
Epochs: 14180 | epoch avg. loss: 0.207 | test avg. loss: 4.444


 28%|██▊       | 14185/50000 [21:12<47:10, 12.66it/s]

Epochs: 14181 | epoch avg. loss: 0.113 | test avg. loss: 4.657
Epochs: 14182 | epoch avg. loss: 0.606 | test avg. loss: 4.503
Epochs: 14183 | epoch avg. loss: 0.310 | test avg. loss: 4.863
Epochs: 14184 | epoch avg. loss: 0.269 | test avg. loss: 4.311


 28%|██▊       | 14187/50000 [21:12<45:39, 13.07it/s]

Epochs: 14185 | epoch avg. loss: 0.191 | test avg. loss: 4.298
Epochs: 14186 | epoch avg. loss: 0.069 | test avg. loss: 4.492
Epochs: 14187 | epoch avg. loss: 0.082 | test avg. loss: 4.280


 28%|██▊       | 14189/50000 [21:12<48:20, 12.35it/s]

Epochs: 14188 | epoch avg. loss: 0.083 | test avg. loss: 4.369
Epochs: 14189 | epoch avg. loss: 0.050 | test avg. loss: 4.583


 28%|██▊       | 14193/50000 [21:13<51:39, 11.55it/s]

Epochs: 14190 | epoch avg. loss: 0.116 | test avg. loss: 4.288
Epochs: 14191 | epoch avg. loss: 0.046 | test avg. loss: 4.249
Epochs: 14192 | epoch avg. loss: 0.054 | test avg. loss: 4.465


 28%|██▊       | 14197/50000 [21:13<44:27, 13.42it/s]

Epochs: 14193 | epoch avg. loss: 0.110 | test avg. loss: 4.321
Epochs: 14194 | epoch avg. loss: 0.127 | test avg. loss: 4.333
Epochs: 14195 | epoch avg. loss: 0.105 | test avg. loss: 4.587
Epochs: 14196 | epoch avg. loss: 0.198 | test avg. loss: 4.468


 28%|██▊       | 14199/50000 [21:13<42:38, 13.99it/s]

Epochs: 14197 | epoch avg. loss: 0.127 | test avg. loss: 4.779
Epochs: 14198 | epoch avg. loss: 0.388 | test avg. loss: 5.080
Epochs: 14199 | epoch avg. loss: 0.433 | test avg. loss: 4.973


 28%|██▊       | 14203/50000 [21:15<2:01:36,  4.91it/s]

Epochs: 14200 | epoch avg. loss: 0.276 | test avg. loss: 4.179
Epochs: 14201 | epoch avg. loss: 0.225 | test avg. loss: 4.165
Epochs: 14202 | epoch avg. loss: 0.190 | test avg. loss: 5.362
Epochs: 14203 | epoch avg. loss: 0.663 | test avg. loss: 4.506


 28%|██▊       | 14207/50000 [21:15<1:18:48,  7.57it/s]

Epochs: 14204 | epoch avg. loss: 0.202 | test avg. loss: 4.500
Epochs: 14205 | epoch avg. loss: 0.175 | test avg. loss: 5.270
Epochs: 14206 | epoch avg. loss: 0.603 | test avg. loss: 4.322
Epochs: 14207 | epoch avg. loss: 0.120 | test avg. loss: 4.451


 28%|██▊       | 14211/50000 [21:15<57:49, 10.32it/s]

Epochs: 14208 | epoch avg. loss: 0.109 | test avg. loss: 5.088
Epochs: 14209 | epoch avg. loss: 0.266 | test avg. loss: 4.543
Epochs: 14210 | epoch avg. loss: 0.490 | test avg. loss: 4.449
Epochs: 14211 | epoch avg. loss: 0.238 | test avg. loss: 5.466


 28%|██▊       | 14215/50000 [21:15<48:43, 12.24it/s]

Epochs: 14212 | epoch avg. loss: 0.837 | test avg. loss: 4.367
Epochs: 14213 | epoch avg. loss: 0.152 | test avg. loss: 4.525
Epochs: 14214 | epoch avg. loss: 0.215 | test avg. loss: 4.739


 28%|██▊       | 14217/50000 [21:16<54:11, 11.01it/s]

Epochs: 14215 | epoch avg. loss: 0.134 | test avg. loss: 4.811
Epochs: 14216 | epoch avg. loss: 0.109 | test avg. loss: 4.343
Epochs: 14217 | epoch avg. loss: 0.319 | test avg. loss: 4.276


 28%|██▊       | 14221/50000 [21:16<46:36, 12.79it/s]

Epochs: 14218 | epoch avg. loss: 0.108 | test avg. loss: 5.288
Epochs: 14219 | epoch avg. loss: 0.401 | test avg. loss: 4.343
Epochs: 14220 | epoch avg. loss: 0.356 | test avg. loss: 4.159
Epochs: 14221 | epoch avg. loss: 0.181 | test avg. loss: 5.183


 28%|██▊       | 14225/50000 [21:16<42:20, 14.08it/s]

Epochs: 14222 | epoch avg. loss: 0.570 | test avg. loss: 4.187
Epochs: 14223 | epoch avg. loss: 0.384 | test avg. loss: 4.443
Epochs: 14224 | epoch avg. loss: 0.225 | test avg. loss: 5.919
Epochs: 14225 | epoch avg. loss: 0.589 | test avg. loss: 4.856


 28%|██▊       | 14229/50000 [21:16<43:01, 13.86it/s]

Epochs: 14226 | epoch avg. loss: 0.520 | test avg. loss: 4.585
Epochs: 14227 | epoch avg. loss: 0.183 | test avg. loss: 5.107
Epochs: 14228 | epoch avg. loss: 0.348 | test avg. loss: 4.564


 28%|██▊       | 14231/50000 [21:17<42:45, 13.94it/s]

Epochs: 14229 | epoch avg. loss: 0.539 | test avg. loss: 4.480
Epochs: 14230 | epoch avg. loss: 0.138 | test avg. loss: 4.974
Epochs: 14231 | epoch avg. loss: 0.181 | test avg. loss: 4.619


 28%|██▊       | 14235/50000 [21:17<41:41, 14.29it/s]

Epochs: 14232 | epoch avg. loss: 0.565 | test avg. loss: 4.576
Epochs: 14233 | epoch avg. loss: 0.432 | test avg. loss: 4.513
Epochs: 14234 | epoch avg. loss: 0.213 | test avg. loss: 4.084
Epochs: 14235 | epoch avg. loss: 0.119 | test avg. loss: 4.721


 28%|██▊       | 14239/50000 [21:17<39:51, 14.95it/s]

Epochs: 14236 | epoch avg. loss: 0.485 | test avg. loss: 4.378
Epochs: 14237 | epoch avg. loss: 0.174 | test avg. loss: 4.469
Epochs: 14238 | epoch avg. loss: 0.181 | test avg. loss: 5.009
Epochs: 14239 | epoch avg. loss: 0.334 | test avg. loss: 4.108


 28%|██▊       | 14243/50000 [21:17<39:58, 14.91it/s]

Epochs: 14240 | epoch avg. loss: 0.249 | test avg. loss: 4.113
Epochs: 14241 | epoch avg. loss: 0.192 | test avg. loss: 4.951
Epochs: 14242 | epoch avg. loss: 0.408 | test avg. loss: 4.534


 28%|██▊       | 14245/50000 [21:18<43:03, 13.84it/s]

Epochs: 14243 | epoch avg. loss: 0.625 | test avg. loss: 4.772
Epochs: 14244 | epoch avg. loss: 0.324 | test avg. loss: 5.262
Epochs: 14245 | epoch avg. loss: 0.368 | test avg. loss: 4.500


 28%|██▊       | 14249/50000 [21:18<43:08, 13.81it/s]

Epochs: 14246 | epoch avg. loss: 0.377 | test avg. loss: 4.399
Epochs: 14247 | epoch avg. loss: 0.291 | test avg. loss: 6.562
Epochs: 14248 | epoch avg. loss: 1.309 | test avg. loss: 4.607


 29%|██▊       | 14253/50000 [21:18<41:17, 14.43it/s]

Epochs: 14249 | epoch avg. loss: 0.427 | test avg. loss: 4.767
Epochs: 14250 | epoch avg. loss: 0.413 | test avg. loss: 5.438
Epochs: 14251 | epoch avg. loss: 0.393 | test avg. loss: 5.407
Epochs: 14252 | epoch avg. loss: 0.877 | test avg. loss: 5.538


 29%|██▊       | 14257/50000 [21:18<39:23, 15.12it/s]

Epochs: 14253 | epoch avg. loss: 0.656 | test avg. loss: 4.985
Epochs: 14254 | epoch avg. loss: 1.286 | test avg. loss: 4.737
Epochs: 14255 | epoch avg. loss: 0.624 | test avg. loss: 5.706
Epochs: 14256 | epoch avg. loss: 0.791 | test avg. loss: 5.764


 29%|██▊       | 14259/50000 [21:19<45:48, 13.00it/s]

Epochs: 14257 | epoch avg. loss: 2.065 | test avg. loss: 4.724
Epochs: 14258 | epoch avg. loss: 0.466 | test avg. loss: 5.185
Epochs: 14259 | epoch avg. loss: 0.381 | test avg. loss: 4.658


 29%|██▊       | 14263/50000 [21:19<45:21, 13.13it/s]

Epochs: 14260 | epoch avg. loss: 0.527 | test avg. loss: 4.829
Epochs: 14261 | epoch avg. loss: 0.604 | test avg. loss: 3.719
Epochs: 14262 | epoch avg. loss: 0.493 | test avg. loss: 3.657


 29%|██▊       | 14267/50000 [21:19<41:42, 14.28it/s]

Epochs: 14263 | epoch avg. loss: 0.478 | test avg. loss: 5.073
Epochs: 14264 | epoch avg. loss: 0.631 | test avg. loss: 5.348
Epochs: 14265 | epoch avg. loss: 1.555 | test avg. loss: 5.557
Epochs: 14266 | epoch avg. loss: 0.922 | test avg. loss: 4.983


 29%|██▊       | 14271/50000 [21:19<39:19, 15.14it/s]

Epochs: 14267 | epoch avg. loss: 1.553 | test avg. loss: 4.976
Epochs: 14268 | epoch avg. loss: 0.757 | test avg. loss: 5.991
Epochs: 14269 | epoch avg. loss: 0.775 | test avg. loss: 4.801
Epochs: 14270 | epoch avg. loss: 0.808 | test avg. loss: 4.753




Epochs: 14271 | epoch avg. loss: 0.565 | test avg. loss: 4.324
Epochs: 14272 | epoch avg. loss: 0.238 | test avg. loss: 4.513
Epochs: 14273 | epoch avg. loss: 0.247 | test avg. loss: 5.378


 29%|██▊       | 14277/50000 [21:20<40:55, 14.55it/s]

Epochs: 14274 | epoch avg. loss: 0.390 | test avg. loss: 4.667
Epochs: 14275 | epoch avg. loss: 0.659 | test avg. loss: 4.321
Epochs: 14276 | epoch avg. loss: 0.150 | test avg. loss: 3.954


 29%|██▊       | 14281/50000 [21:20<41:03, 14.50it/s]

Epochs: 14277 | epoch avg. loss: 0.182 | test avg. loss: 3.990
Epochs: 14278 | epoch avg. loss: 0.188 | test avg. loss: 4.798
Epochs: 14279 | epoch avg. loss: 0.307 | test avg. loss: 4.384
Epochs: 14280 | epoch avg. loss: 0.244 | test avg. loss: 4.550


 29%|██▊       | 14285/50000 [21:20<39:11, 15.19it/s]

Epochs: 14281 | epoch avg. loss: 0.096 | test avg. loss: 4.595
Epochs: 14282 | epoch avg. loss: 0.096 | test avg. loss: 4.299
Epochs: 14283 | epoch avg. loss: 0.139 | test avg. loss: 4.373
Epochs: 14284 | epoch avg. loss: 0.093 | test avg. loss: 4.326


 29%|██▊       | 14287/50000 [21:21<43:14, 13.77it/s]

Epochs: 14285 | epoch avg. loss: 0.080 | test avg. loss: 4.417
Epochs: 14286 | epoch avg. loss: 0.084 | test avg. loss: 4.918
Epochs: 14287 | epoch avg. loss: 0.183 | test avg. loss: 4.442


 29%|██▊       | 14291/50000 [21:21<43:16, 13.75it/s]

Epochs: 14288 | epoch avg. loss: 0.114 | test avg. loss: 4.484
Epochs: 14289 | epoch avg. loss: 0.097 | test avg. loss: 4.577
Epochs: 14290 | epoch avg. loss: 0.115 | test avg. loss: 4.560


 29%|██▊       | 14293/50000 [21:21<43:35, 13.65it/s]

Epochs: 14291 | epoch avg. loss: 0.291 | test avg. loss: 5.128
Epochs: 14292 | epoch avg. loss: 0.269 | test avg. loss: 4.544
Epochs: 14293 | epoch avg. loss: 0.269 | test avg. loss: 4.566


 29%|██▊       | 14297/50000 [21:21<41:46, 14.25it/s]

Epochs: 14294 | epoch avg. loss: 0.134 | test avg. loss: 5.031
Epochs: 14295 | epoch avg. loss: 0.243 | test avg. loss: 4.304
Epochs: 14296 | epoch avg. loss: 0.218 | test avg. loss: 4.204
Epochs: 14297 | epoch avg. loss: 0.054 | test avg. loss: 4.254


 29%|██▊       | 14299/50000 [21:21<42:30, 14.00it/s]

Epochs: 14298 | epoch avg. loss: 0.080 | test avg. loss: 4.320
Epochs: 14299 | epoch avg. loss: 0.174 | test avg. loss: 5.116


 29%|██▊       | 14303/50000 [21:23<2:07:36,  4.66it/s]

Epochs: 14300 | epoch avg. loss: 0.280 | test avg. loss: 4.432
Epochs: 14301 | epoch avg. loss: 0.247 | test avg. loss: 4.386
Epochs: 14302 | epoch avg. loss: 0.101 | test avg. loss: 4.749


 29%|██▊       | 14305/50000 [21:23<1:45:34,  5.64it/s]

Epochs: 14303 | epoch avg. loss: 0.151 | test avg. loss: 4.395
Epochs: 14304 | epoch avg. loss: 0.226 | test avg. loss: 4.720
Epochs: 14305 | epoch avg. loss: 0.228 | test avg. loss: 4.480


 29%|██▊       | 14309/50000 [21:24<1:19:38,  7.47it/s]

Epochs: 14306 | epoch avg. loss: 0.125 | test avg. loss: 4.591
Epochs: 14307 | epoch avg. loss: 0.166 | test avg. loss: 4.692
Epochs: 14308 | epoch avg. loss: 0.149 | test avg. loss: 4.414


 29%|██▊       | 14311/50000 [21:24<1:08:40,  8.66it/s]

Epochs: 14309 | epoch avg. loss: 0.243 | test avg. loss: 4.939
Epochs: 14310 | epoch avg. loss: 0.365 | test avg. loss: 4.338
Epochs: 14311 | epoch avg. loss: 0.217 | test avg. loss: 4.526


 29%|██▊       | 14315/50000 [21:24<54:02, 11.01it/s]

Epochs: 14312 | epoch avg. loss: 0.154 | test avg. loss: 5.079
Epochs: 14313 | epoch avg. loss: 0.244 | test avg. loss: 5.076
Epochs: 14314 | epoch avg. loss: 0.845 | test avg. loss: 5.201
Epochs: 14315 | epoch avg. loss: 0.583 | test avg. loss: 4.633


 29%|██▊       | 14319/50000 [21:24<48:38, 12.23it/s]

Epochs: 14316 | epoch avg. loss: 0.714 | test avg. loss: 4.630
Epochs: 14317 | epoch avg. loss: 0.634 | test avg. loss: 7.510
Epochs: 14318 | epoch avg. loss: 1.697 | test avg. loss: 6.521


 29%|██▊       | 14321/50000 [21:24<46:47, 12.71it/s]

Epochs: 14319 | epoch avg. loss: 2.817 | test avg. loss: 5.343
Epochs: 14320 | epoch avg. loss: 1.055 | test avg. loss: 4.463
Epochs: 14321 | epoch avg. loss: 0.418 | test avg. loss: 4.720


 29%|██▊       | 14325/50000 [21:25<43:43, 13.60it/s]

Epochs: 14322 | epoch avg. loss: 0.512 | test avg. loss: 6.343
Epochs: 14323 | epoch avg. loss: 0.849 | test avg. loss: 5.343
Epochs: 14324 | epoch avg. loss: 0.655 | test avg. loss: 5.414
Epochs: 14325 | epoch avg. loss: 0.439 | test avg. loss: 4.449


                                                     

Epochs: 14326 | epoch avg. loss: 0.171 | test avg. loss: 4.196
Epochs: 14327 | epoch avg. loss: 0.154 | test avg. loss: 4.628
Epochs: 14328 | epoch avg. loss: 0.216 | test avg. loss: 4.256


 29%|██▊       | 14333/50000 [21:25<40:24, 14.71it/s]

Epochs: 14329 | epoch avg. loss: 0.316 | test avg. loss: 4.383
Epochs: 14330 | epoch avg. loss: 0.083 | test avg. loss: 4.481
Epochs: 14331 | epoch avg. loss: 0.084 | test avg. loss: 4.333
Epochs: 14332 | epoch avg. loss: 0.096 | test avg. loss: 4.646


 29%|██▊       | 14335/50000 [21:25<41:08, 14.45it/s]

Epochs: 14333 | epoch avg. loss: 0.129 | test avg. loss: 4.323
Epochs: 14334 | epoch avg. loss: 0.248 | test avg. loss: 4.587
Epochs: 14335 | epoch avg. loss: 0.248 | test avg. loss: 4.425


 29%|██▊       | 14339/50000 [21:26<46:25, 12.80it/s]

Epochs: 14336 | epoch avg. loss: 0.255 | test avg. loss: 4.534
Epochs: 14337 | epoch avg. loss: 0.211 | test avg. loss: 5.067
Epochs: 14338 | epoch avg. loss: 0.261 | test avg. loss: 4.316


 29%|██▊       | 14341/50000 [21:26<46:15, 12.85it/s]

Epochs: 14339 | epoch avg. loss: 0.156 | test avg. loss: 4.621
Epochs: 14340 | epoch avg. loss: 0.341 | test avg. loss: 4.060
Epochs: 14341 | epoch avg. loss: 0.377 | test avg. loss: 4.237


 29%|██▊       | 14345/50000 [21:26<48:34, 12.24it/s]

Epochs: 14342 | epoch avg. loss: 0.176 | test avg. loss: 4.731
Epochs: 14343 | epoch avg. loss: 0.153 | test avg. loss: 4.575
Epochs: 14344 | epoch avg. loss: 0.144 | test avg. loss: 5.047


 29%|██▊       | 14347/50000 [21:26<50:26, 11.78it/s]

Epochs: 14345 | epoch avg. loss: 0.157 | test avg. loss: 4.613
Epochs: 14346 | epoch avg. loss: 0.222 | test avg. loss: 4.844
Epochs: 14347 | epoch avg. loss: 0.232 | test avg. loss: 4.245


 29%|██▊       | 14351/50000 [21:27<47:04, 12.62it/s]

Epochs: 14348 | epoch avg. loss: 0.130 | test avg. loss: 4.394
Epochs: 14349 | epoch avg. loss: 0.202 | test avg. loss: 4.293
Epochs: 14350 | epoch avg. loss: 0.125 | test avg. loss: 4.338


 29%|██▊       | 14355/50000 [21:27<42:04, 14.12it/s]

Epochs: 14351 | epoch avg. loss: 0.168 | test avg. loss: 4.909
Epochs: 14352 | epoch avg. loss: 0.182 | test avg. loss: 4.437
Epochs: 14353 | epoch avg. loss: 0.134 | test avg. loss: 4.559
Epochs: 14354 | epoch avg. loss: 0.148 | test avg. loss: 4.286


 29%|██▊       | 14359/50000 [21:27<40:02, 14.83it/s]

Epochs: 14355 | epoch avg. loss: 0.106 | test avg. loss: 4.186
Epochs: 14356 | epoch avg. loss: 0.120 | test avg. loss: 4.675
Epochs: 14357 | epoch avg. loss: 0.168 | test avg. loss: 4.436
Epochs: 14358 | epoch avg. loss: 0.120 | test avg. loss: 4.662


 29%|██▊       | 14361/50000 [21:27<41:42, 14.24it/s]

Epochs: 14359 | epoch avg. loss: 0.045 | test avg. loss: 4.582
Epochs: 14360 | epoch avg. loss: 0.055 | test avg. loss: 4.469
Epochs: 14361 | epoch avg. loss: 0.043 | test avg. loss: 4.485


 29%|██▊       | 14365/50000 [21:28<42:30, 13.97it/s]

Epochs: 14362 | epoch avg. loss: 0.062 | test avg. loss: 4.219
Epochs: 14363 | epoch avg. loss: 0.072 | test avg. loss: 4.314
Epochs: 14364 | epoch avg. loss: 0.038 | test avg. loss: 4.417


 29%|██▊       | 14367/50000 [21:28<44:24, 13.37it/s]

Epochs: 14365 | epoch avg. loss: 0.039 | test avg. loss: 4.447
Epochs: 14366 | epoch avg. loss: 0.056 | test avg. loss: 4.667
Epochs: 14367 | epoch avg. loss: 0.059 | test avg. loss: 4.386


 29%|██▊       | 14371/50000 [21:28<41:05, 14.45it/s]

Epochs: 14368 | epoch avg. loss: 0.071 | test avg. loss: 4.529
Epochs: 14369 | epoch avg. loss: 0.119 | test avg. loss: 4.313
Epochs: 14370 | epoch avg. loss: 0.049 | test avg. loss: 4.246
Epochs: 14371 | epoch avg. loss: 0.057 | test avg. loss: 4.440


 29%|██▉       | 14375/50000 [21:28<40:36, 14.62it/s]

Epochs: 14372 | epoch avg. loss: 0.060 | test avg. loss: 4.369
Epochs: 14373 | epoch avg. loss: 0.055 | test avg. loss: 4.478
Epochs: 14374 | epoch avg. loss: 0.038 | test avg. loss: 4.549


 29%|██▉       | 14377/50000 [21:29<48:11, 12.32it/s]

Epochs: 14375 | epoch avg. loss: 0.049 | test avg. loss: 4.368
Epochs: 14376 | epoch avg. loss: 0.138 | test avg. loss: 4.576




Epochs: 14377 | epoch avg. loss: 0.083 | test avg. loss: 4.345
Epochs: 14378 | epoch avg. loss: 0.116 | test avg. loss: 4.370


 29%|██▉       | 14383/50000 [21:29<52:10, 11.38it/s]

Epochs: 14379 | epoch avg. loss: 0.058 | test avg. loss: 4.541
Epochs: 14380 | epoch avg. loss: 0.060 | test avg. loss: 4.409
Epochs: 14381 | epoch avg. loss: 0.031 | test avg. loss: 4.473
Epochs: 14382 | epoch avg. loss: 0.035 | test avg. loss: 4.546


 29%|██▉       | 14385/50000 [21:29<54:29, 10.89it/s]

Epochs: 14383 | epoch avg. loss: 0.040 | test avg. loss: 4.390
Epochs: 14384 | epoch avg. loss: 0.076 | test avg. loss: 4.417
Epochs: 14385 | epoch avg. loss: 0.034 | test avg. loss: 4.564


 29%|██▉       | 14389/50000 [21:30<55:32, 10.69it/s]

Epochs: 14386 | epoch avg. loss: 0.050 | test avg. loss: 4.329
Epochs: 14387 | epoch avg. loss: 0.055 | test avg. loss: 4.368
Epochs: 14388 | epoch avg. loss: 0.024 | test avg. loss: 4.271


 29%|██▉       | 14391/50000 [21:30<53:33, 11.08it/s]

Epochs: 14389 | epoch avg. loss: 0.069 | test avg. loss: 4.309
Epochs: 14390 | epoch avg. loss: 0.045 | test avg. loss: 4.511
Epochs: 14391 | epoch avg. loss: 0.066 | test avg. loss: 4.350


 29%|██▉       | 14395/50000 [21:30<49:35, 11.97it/s]

Epochs: 14392 | epoch avg. loss: 0.032 | test avg. loss: 4.488
Epochs: 14393 | epoch avg. loss: 0.039 | test avg. loss: 4.374
Epochs: 14394 | epoch avg. loss: 0.055 | test avg. loss: 4.395


 29%|██▉       | 14397/50000 [21:30<48:23, 12.26it/s]

Epochs: 14395 | epoch avg. loss: 0.037 | test avg. loss: 4.437
Epochs: 14396 | epoch avg. loss: 0.040 | test avg. loss: 4.305
Epochs: 14397 | epoch avg. loss: 0.059 | test avg. loss: 4.721


 29%|██▉       | 14399/50000 [21:31<52:09, 11.38it/s]

Epochs: 14398 | epoch avg. loss: 0.128 | test avg. loss: 4.317
Epochs: 14399 | epoch avg. loss: 0.182 | test avg. loss: 4.500


 29%|██▉       | 14403/50000 [21:32<2:20:50,  4.21it/s]

Epochs: 14400 | epoch avg. loss: 0.085 | test avg. loss: 4.451
Epochs: 14401 | epoch avg. loss: 0.065 | test avg. loss: 4.361
Epochs: 14402 | epoch avg. loss: 0.067 | test avg. loss: 4.607


 29%|██▉       | 14407/50000 [21:33<1:29:05,  6.66it/s]

Epochs: 14403 | epoch avg. loss: 0.101 | test avg. loss: 4.350
Epochs: 14404 | epoch avg. loss: 0.068 | test avg. loss: 4.464
Epochs: 14405 | epoch avg. loss: 0.066 | test avg. loss: 4.431
Epochs: 14406 | epoch avg. loss: 0.056 | test avg. loss: 4.351


 29%|██▉       | 14409/50000 [21:33<1:13:52,  8.03it/s]

Epochs: 14407 | epoch avg. loss: 0.062 | test avg. loss: 4.636
Epochs: 14408 | epoch avg. loss: 0.071 | test avg. loss: 4.370
Epochs: 14409 | epoch avg. loss: 0.164 | test avg. loss: 4.573




Epochs: 14410 | epoch avg. loss: 0.189 | test avg. loss: 4.598
Epochs: 14411 | epoch avg. loss: 0.094 | test avg. loss: 4.421
Epochs: 14412 | epoch avg. loss: 0.131 | test avg. loss: 4.787


 29%|██▉       | 14417/50000 [21:33<47:44, 12.42it/s]

Epochs: 14413 | epoch avg. loss: 0.198 | test avg. loss: 4.407
Epochs: 14414 | epoch avg. loss: 0.125 | test avg. loss: 4.493
Epochs: 14415 | epoch avg. loss: 0.180 | test avg. loss: 4.833
Epochs: 14416 | epoch avg. loss: 0.216 | test avg. loss: 4.431


 29%|██▉       | 14419/50000 [21:34<49:28, 11.99it/s]

Epochs: 14417 | epoch avg. loss: 0.201 | test avg. loss: 4.707
Epochs: 14418 | epoch avg. loss: 0.097 | test avg. loss: 4.627
Epochs: 14419 | epoch avg. loss: 0.068 | test avg. loss: 4.564


 29%|██▉       | 14423/50000 [21:34<48:29, 12.23it/s]

Epochs: 14420 | epoch avg. loss: 0.051 | test avg. loss: 4.613
Epochs: 14421 | epoch avg. loss: 0.072 | test avg. loss: 4.316
Epochs: 14422 | epoch avg. loss: 0.052 | test avg. loss: 4.571


 29%|██▉       | 14425/50000 [21:34<45:51, 12.93it/s]

Epochs: 14423 | epoch avg. loss: 0.190 | test avg. loss: 4.448
Epochs: 14424 | epoch avg. loss: 0.256 | test avg. loss: 4.722
Epochs: 14425 | epoch avg. loss: 0.196 | test avg. loss: 5.075


 29%|██▉       | 14429/50000 [21:34<42:54, 13.82it/s]

Epochs: 14426 | epoch avg. loss: 0.200 | test avg. loss: 4.713
Epochs: 14427 | epoch avg. loss: 0.376 | test avg. loss: 4.740
Epochs: 14428 | epoch avg. loss: 0.154 | test avg. loss: 4.225
Epochs: 14429 | epoch avg. loss: 0.079 | test avg. loss: 4.417


 29%|██▉       | 14431/50000 [21:34<41:26, 14.30it/s]

Epochs: 14430 | epoch avg. loss: 0.109 | test avg. loss: 4.466
Epochs: 14431 | epoch avg. loss: 0.102 | test avg. loss: 4.596
Epochs: 14432 | epoch avg. loss: 0.139 | test avg. loss: 5.099


 29%|██▉       | 14437/50000 [21:35<43:41, 13.57it/s]

Epochs: 14433 | epoch avg. loss: 0.171 | test avg. loss: 4.552
Epochs: 14434 | epoch avg. loss: 0.129 | test avg. loss: 4.714
Epochs: 14435 | epoch avg. loss: 0.138 | test avg. loss: 4.289
Epochs: 14436 | epoch avg. loss: 0.131 | test avg. loss: 4.280




Epochs: 14437 | epoch avg. loss: 0.107 | test avg. loss: 4.681
Epochs: 14438 | epoch avg. loss: 0.111 | test avg. loss: 4.579
Epochs: 14439 | epoch avg. loss: 0.182 | test avg. loss: 5.201


 29%|██▉       | 14443/50000 [21:35<41:01, 14.45it/s]

Epochs: 14440 | epoch avg. loss: 0.243 | test avg. loss: 4.542
Epochs: 14441 | epoch avg. loss: 0.180 | test avg. loss: 4.450
Epochs: 14442 | epoch avg. loss: 0.076 | test avg. loss: 4.327
Epochs: 14443 | epoch avg. loss: 0.094 | test avg. loss: 4.268


 29%|██▉       | 14447/50000 [21:35<43:19, 13.68it/s]

Epochs: 14444 | epoch avg. loss: 0.079 | test avg. loss: 4.563
Epochs: 14445 | epoch avg. loss: 0.046 | test avg. loss: 4.529
Epochs: 14446 | epoch avg. loss: 0.042 | test avg. loss: 4.678


 29%|██▉       | 14449/50000 [21:36<43:13, 13.71it/s]

Epochs: 14447 | epoch avg. loss: 0.049 | test avg. loss: 4.690
Epochs: 14448 | epoch avg. loss: 0.041 | test avg. loss: 4.495
Epochs: 14449 | epoch avg. loss: 0.046 | test avg. loss: 4.545


 29%|██▉       | 14453/50000 [21:36<42:04, 14.08it/s]

Epochs: 14450 | epoch avg. loss: 0.049 | test avg. loss: 4.381
Epochs: 14451 | epoch avg. loss: 0.047 | test avg. loss: 4.366
Epochs: 14452 | epoch avg. loss: 0.101 | test avg. loss: 4.583
Epochs: 14453 | epoch avg. loss: 0.055 | test avg. loss: 4.405


                                                     

Epochs: 14454 | epoch avg. loss: 0.085 | test avg. loss: 4.416
Epochs: 14455 | epoch avg. loss: 0.040 | test avg. loss: 4.521
Epochs: 14456 | epoch avg. loss: 0.041 | test avg. loss: 4.389


 29%|██▉       | 14459/50000 [21:36<41:01, 14.44it/s]

Epochs: 14457 | epoch avg. loss: 0.045 | test avg. loss: 4.784
Epochs: 14458 | epoch avg. loss: 0.137 | test avg. loss: 4.405
Epochs: 14459 | epoch avg. loss: 0.045 | test avg. loss: 4.550


 29%|██▉       | 14463/50000 [21:37<41:38, 14.22it/s]

Epochs: 14460 | epoch avg. loss: 0.050 | test avg. loss: 4.392
Epochs: 14461 | epoch avg. loss: 0.085 | test avg. loss: 4.391
Epochs: 14462 | epoch avg. loss: 0.052 | test avg. loss: 4.564


 29%|██▉       | 14467/50000 [21:37<39:25, 15.02it/s]

Epochs: 14463 | epoch avg. loss: 0.050 | test avg. loss: 4.433
Epochs: 14464 | epoch avg. loss: 0.025 | test avg. loss: 4.557
Epochs: 14465 | epoch avg. loss: 0.040 | test avg. loss: 4.447
Epochs: 14466 | epoch avg. loss: 0.024 | test avg. loss: 4.440


 29%|██▉       | 14469/50000 [21:37<38:58, 15.20it/s]

Epochs: 14467 | epoch avg. loss: 0.026 | test avg. loss: 4.491
Epochs: 14468 | epoch avg. loss: 0.026 | test avg. loss: 4.464
Epochs: 14469 | epoch avg. loss: 0.026 | test avg. loss: 4.507


                                                     

Epochs: 14470 | epoch avg. loss: 0.026 | test avg. loss: 4.545
Epochs: 14471 | epoch avg. loss: 0.028 | test avg. loss: 4.452
Epochs: 14472 | epoch avg. loss: 0.024 | test avg. loss: 4.547


 29%|██▉       | 14475/50000 [21:37<40:33, 14.60it/s]

Epochs: 14473 | epoch avg. loss: 0.036 | test avg. loss: 4.429
Epochs: 14474 | epoch avg. loss: 0.023 | test avg. loss: 4.518
Epochs: 14475 | epoch avg. loss: 0.031 | test avg. loss: 4.435


 29%|██▉       | 14479/50000 [21:38<41:24, 14.29it/s]

Epochs: 14476 | epoch avg. loss: 0.049 | test avg. loss: 4.420
Epochs: 14477 | epoch avg. loss: 0.074 | test avg. loss: 4.544
Epochs: 14478 | epoch avg. loss: 0.042 | test avg. loss: 4.455
Epochs: 14479 | epoch avg. loss: 0.043 | test avg. loss: 4.462


 29%|██▉       | 14483/50000 [21:38<39:39, 14.92it/s]

Epochs: 14480 | epoch avg. loss: 0.035 | test avg. loss: 4.605
Epochs: 14481 | epoch avg. loss: 0.050 | test avg. loss: 4.501
Epochs: 14482 | epoch avg. loss: 0.031 | test avg. loss: 4.553
Epochs: 14483 | epoch avg. loss: 0.036 | test avg. loss: 4.672


 29%|██▉       | 14487/50000 [21:38<41:05, 14.40it/s]

Epochs: 14484 | epoch avg. loss: 0.048 | test avg. loss: 4.456
Epochs: 14485 | epoch avg. loss: 0.098 | test avg. loss: 4.598
Epochs: 14486 | epoch avg. loss: 0.109 | test avg. loss: 4.372


 29%|██▉       | 14489/50000 [21:38<41:15, 14.34it/s]

Epochs: 14487 | epoch avg. loss: 0.044 | test avg. loss: 4.353
Epochs: 14488 | epoch avg. loss: 0.042 | test avg. loss: 4.638
Epochs: 14489 | epoch avg. loss: 0.063 | test avg. loss: 4.494


 29%|██▉       | 14493/50000 [21:39<44:33, 13.28it/s]

Epochs: 14490 | epoch avg. loss: 0.083 | test avg. loss: 4.645
Epochs: 14491 | epoch avg. loss: 0.125 | test avg. loss: 4.809
Epochs: 14492 | epoch avg. loss: 0.125 | test avg. loss: 4.451


 29%|██▉       | 14497/50000 [21:39<41:42, 14.19it/s]

Epochs: 14493 | epoch avg. loss: 0.204 | test avg. loss: 4.684
Epochs: 14494 | epoch avg. loss: 0.185 | test avg. loss: 4.367
Epochs: 14495 | epoch avg. loss: 0.108 | test avg. loss: 4.448
Epochs: 14496 | epoch avg. loss: 0.085 | test avg. loss: 4.771


 29%|██▉       | 14499/50000 [21:39<41:17, 14.33it/s]

Epochs: 14497 | epoch avg. loss: 0.124 | test avg. loss: 4.443
Epochs: 14498 | epoch avg. loss: 0.078 | test avg. loss: 4.438
Epochs: 14499 | epoch avg. loss: 0.032 | test avg. loss: 4.461


 29%|██▉       | 14503/50000 [21:41<2:01:00,  4.89it/s]

Epochs: 14500 | epoch avg. loss: 0.033 | test avg. loss: 4.404
Epochs: 14501 | epoch avg. loss: 0.047 | test avg. loss: 4.750
Epochs: 14502 | epoch avg. loss: 0.091 | test avg. loss: 4.504


 29%|██▉       | 14505/50000 [21:41<1:36:38,  6.12it/s]

Epochs: 14503 | epoch avg. loss: 0.120 | test avg. loss: 4.568
Epochs: 14504 | epoch avg. loss: 0.061 | test avg. loss: 4.908
Epochs: 14505 | epoch avg. loss: 0.107 | test avg. loss: 4.483
Epochs: 14506 | epoch avg. loss: 0.123 | test avg. loss: 4.623


                                                       

Epochs: 14507 | epoch avg. loss: 0.112 | test avg. loss: 4.489
Epochs: 14508 | epoch avg. loss: 0.095 | test avg. loss: 4.570
Epochs: 14509 | epoch avg. loss: 0.181 | test avg. loss: 5.009


 29%|██▉       | 14513/50000 [21:41<54:43, 10.81it/s]

Epochs: 14510 | epoch avg. loss: 0.243 | test avg. loss: 4.684
Epochs: 14511 | epoch avg. loss: 0.215 | test avg. loss: 4.755
Epochs: 14512 | epoch avg. loss: 0.128 | test avg. loss: 5.200


 29%|██▉       | 14515/50000 [21:42<53:48, 10.99it/s]

Epochs: 14513 | epoch avg. loss: 0.253 | test avg. loss: 4.475
Epochs: 14514 | epoch avg. loss: 0.114 | test avg. loss: 4.401
Epochs: 14515 | epoch avg. loss: 0.065 | test avg. loss: 4.670


 29%|██▉       | 14519/50000 [21:42<47:22, 12.48it/s]

Epochs: 14516 | epoch avg. loss: 0.065 | test avg. loss: 4.479
Epochs: 14517 | epoch avg. loss: 0.150 | test avg. loss: 4.558
Epochs: 14518 | epoch avg. loss: 0.077 | test avg. loss: 5.059
Epochs: 14519 | epoch avg. loss: 0.243 | test avg. loss: 4.521


 29%|██▉       | 14523/50000 [21:42<42:01, 14.07it/s]

Epochs: 14520 | epoch avg. loss: 0.088 | test avg. loss: 4.470
Epochs: 14521 | epoch avg. loss: 0.125 | test avg. loss: 4.974
Epochs: 14522 | epoch avg. loss: 0.235 | test avg. loss: 4.448
Epochs: 14523 | epoch avg. loss: 0.380 | test avg. loss: 4.537


 29%|██▉       | 14527/50000 [21:42<43:53, 13.47it/s]

Epochs: 14524 | epoch avg. loss: 0.144 | test avg. loss: 4.844
Epochs: 14525 | epoch avg. loss: 0.178 | test avg. loss: 4.544
Epochs: 14526 | epoch avg. loss: 0.189 | test avg. loss: 4.593


 29%|██▉       | 14529/50000 [21:43<47:30, 12.44it/s]

Epochs: 14527 | epoch avg. loss: 0.039 | test avg. loss: 4.549
Epochs: 14528 | epoch avg. loss: 0.044 | test avg. loss: 4.491
Epochs: 14529 | epoch avg. loss: 0.055 | test avg. loss: 4.859


 29%|██▉       | 14533/50000 [21:43<44:40, 13.23it/s]

Epochs: 14530 | epoch avg. loss: 0.120 | test avg. loss: 4.503
Epochs: 14531 | epoch avg. loss: 0.152 | test avg. loss: 4.714
Epochs: 14532 | epoch avg. loss: 0.201 | test avg. loss: 4.798


 29%|██▉       | 14535/50000 [21:43<43:14, 13.67it/s]

Epochs: 14533 | epoch avg. loss: 0.102 | test avg. loss: 4.646
Epochs: 14534 | epoch avg. loss: 0.178 | test avg. loss: 4.707
Epochs: 14535 | epoch avg. loss: 0.065 | test avg. loss: 4.316


 29%|██▉       | 14539/50000 [21:43<43:24, 13.62it/s]

Epochs: 14536 | epoch avg. loss: 0.082 | test avg. loss: 4.313
Epochs: 14537 | epoch avg. loss: 0.104 | test avg. loss: 4.513
Epochs: 14538 | epoch avg. loss: 0.058 | test avg. loss: 4.664


 29%|██▉       | 14541/50000 [21:44<46:41, 12.66it/s]

Epochs: 14539 | epoch avg. loss: 0.037 | test avg. loss: 4.869
Epochs: 14540 | epoch avg. loss: 0.048 | test avg. loss: 4.651
Epochs: 14541 | epoch avg. loss: 0.087 | test avg. loss: 4.684


 29%|██▉       | 14545/50000 [21:44<44:41, 13.22it/s]

Epochs: 14542 | epoch avg. loss: 0.117 | test avg. loss: 4.490
Epochs: 14543 | epoch avg. loss: 0.091 | test avg. loss: 4.468
Epochs: 14544 | epoch avg. loss: 0.163 | test avg. loss: 4.805
Epochs: 14545 | epoch avg. loss: 0.197 | test avg. loss: 4.622


 29%|██▉       | 14549/50000 [21:44<40:33, 14.57it/s]

Epochs: 14546 | epoch avg. loss: 0.081 | test avg. loss: 4.525
Epochs: 14547 | epoch avg. loss: 0.190 | test avg. loss: 4.792
Epochs: 14548 | epoch avg. loss: 0.152 | test avg. loss: 4.683
Epochs: 14549 | epoch avg. loss: 0.046 | test avg. loss: 4.602


 29%|██▉       | 14553/50000 [21:44<40:37, 14.54it/s]

Epochs: 14550 | epoch avg. loss: 0.046 | test avg. loss: 4.608
Epochs: 14551 | epoch avg. loss: 0.028 | test avg. loss: 4.559
Epochs: 14552 | epoch avg. loss: 0.029 | test avg. loss: 4.570


 29%|██▉       | 14555/50000 [21:45<43:22, 13.62it/s]

Epochs: 14553 | epoch avg. loss: 0.035 | test avg. loss: 4.513
Epochs: 14554 | epoch avg. loss: 0.025 | test avg. loss: 4.499
Epochs: 14555 | epoch avg. loss: 0.026 | test avg. loss: 4.464


 29%|██▉       | 14559/50000 [21:45<44:31, 13.27it/s]

Epochs: 14556 | epoch avg. loss: 0.030 | test avg. loss: 4.718
Epochs: 14557 | epoch avg. loss: 0.095 | test avg. loss: 4.461
Epochs: 14558 | epoch avg. loss: 0.036 | test avg. loss: 4.533


 29%|██▉       | 14561/50000 [21:45<42:53, 13.77it/s]

Epochs: 14559 | epoch avg. loss: 0.051 | test avg. loss: 4.725
Epochs: 14560 | epoch avg. loss: 0.116 | test avg. loss: 4.404
Epochs: 14561 | epoch avg. loss: 0.090 | test avg. loss: 4.402


 29%|██▉       | 14565/50000 [21:45<44:13, 13.35it/s]

Epochs: 14562 | epoch avg. loss: 0.052 | test avg. loss: 4.717
Epochs: 14563 | epoch avg. loss: 0.105 | test avg. loss: 4.402
Epochs: 14564 | epoch avg. loss: 0.187 | test avg. loss: 4.459


 29%|██▉       | 14567/50000 [21:45<46:44, 12.63it/s]

Epochs: 14565 | epoch avg. loss: 0.133 | test avg. loss: 5.100
Epochs: 14566 | epoch avg. loss: 0.345 | test avg. loss: 4.471
Epochs: 14567 | epoch avg. loss: 0.315 | test avg. loss: 4.509


 29%|██▉       | 14571/50000 [21:46<47:41, 12.38it/s]

Epochs: 14568 | epoch avg. loss: 0.182 | test avg. loss: 4.755
Epochs: 14569 | epoch avg. loss: 0.146 | test avg. loss: 4.509
Epochs: 14570 | epoch avg. loss: 0.209 | test avg. loss: 4.641


 29%|██▉       | 14573/50000 [21:46<46:22, 12.73it/s]

Epochs: 14571 | epoch avg. loss: 0.262 | test avg. loss: 4.329
Epochs: 14572 | epoch avg. loss: 0.130 | test avg. loss: 4.310
Epochs: 14573 | epoch avg. loss: 0.124 | test avg. loss: 4.873


 29%|██▉       | 14577/50000 [21:46<49:15, 11.99it/s]

Epochs: 14574 | epoch avg. loss: 0.174 | test avg. loss: 4.489
Epochs: 14575 | epoch avg. loss: 0.210 | test avg. loss: 4.618
Epochs: 14576 | epoch avg. loss: 0.160 | test avg. loss: 4.907


 29%|██▉       | 14579/50000 [21:46<50:18, 11.74it/s]

Epochs: 14577 | epoch avg. loss: 0.188 | test avg. loss: 4.541
Epochs: 14578 | epoch avg. loss: 0.120 | test avg. loss: 4.670
Epochs: 14579 | epoch avg. loss: 0.115 | test avg. loss: 4.534


 29%|██▉       | 14583/50000 [21:47<47:45, 12.36it/s]

Epochs: 14580 | epoch avg. loss: 0.060 | test avg. loss: 4.417
Epochs: 14581 | epoch avg. loss: 0.052 | test avg. loss: 4.531
Epochs: 14582 | epoch avg. loss: 0.064 | test avg. loss: 4.327
Epochs: 14583 | epoch avg. loss: 0.039 | test avg. loss: 4.385


 29%|██▉       | 14587/50000 [21:47<44:41, 13.20it/s]

Epochs: 14584 | epoch avg. loss: 0.035 | test avg. loss: 4.567
Epochs: 14585 | epoch avg. loss: 0.058 | test avg. loss: 4.398
Epochs: 14586 | epoch avg. loss: 0.029 | test avg. loss: 4.513


 29%|██▉       | 14589/50000 [21:47<44:00, 13.41it/s]

Epochs: 14587 | epoch avg. loss: 0.062 | test avg. loss: 4.484
Epochs: 14588 | epoch avg. loss: 0.044 | test avg. loss: 4.440
Epochs: 14589 | epoch avg. loss: 0.138 | test avg. loss: 4.590


 29%|██▉       | 14593/50000 [21:47<45:19, 13.02it/s]

Epochs: 14590 | epoch avg. loss: 0.034 | test avg. loss: 4.605
Epochs: 14591 | epoch avg. loss: 0.045 | test avg. loss: 4.518
Epochs: 14592 | epoch avg. loss: 0.142 | test avg. loss: 4.562




Epochs: 14593 | epoch avg. loss: 0.047 | test avg. loss: 4.593
Epochs: 14594 | epoch avg. loss: 0.050 | test avg. loss: 4.404
Epochs: 14595 | epoch avg. loss: 0.089 | test avg. loss: 4.458


 29%|██▉       | 14599/50000 [21:48<42:04, 14.02it/s]

Epochs: 14596 | epoch avg. loss: 0.052 | test avg. loss: 4.823
Epochs: 14597 | epoch avg. loss: 0.155 | test avg. loss: 4.444
Epochs: 14598 | epoch avg. loss: 0.055 | test avg. loss: 4.499


 29%|██▉       | 14599/50000 [21:48<42:04, 14.02it/s]

Epochs: 14599 | epoch avg. loss: 0.050 | test avg. loss: 4.565


 29%|██▉       | 14603/50000 [21:50<2:22:56,  4.13it/s]

Epochs: 14600 | epoch avg. loss: 0.037 | test avg. loss: 4.531
Epochs: 14601 | epoch avg. loss: 0.033 | test avg. loss: 4.684
Epochs: 14602 | epoch avg. loss: 0.064 | test avg. loss: 4.692


 29%|██▉       | 14605/50000 [21:50<1:55:26,  5.11it/s]

Epochs: 14603 | epoch avg. loss: 0.044 | test avg. loss: 4.479
Epochs: 14604 | epoch avg. loss: 0.053 | test avg. loss: 4.462
Epochs: 14605 | epoch avg. loss: 0.026 | test avg. loss: 4.508


 29%|██▉       | 14609/50000 [21:50<1:22:54,  7.11it/s]

Epochs: 14606 | epoch avg. loss: 0.041 | test avg. loss: 4.621
Epochs: 14607 | epoch avg. loss: 0.052 | test avg. loss: 4.534
Epochs: 14608 | epoch avg. loss: 0.025 | test avg. loss: 4.592


 29%|██▉       | 14611/50000 [21:50<1:17:39,  7.60it/s]

Epochs: 14609 | epoch avg. loss: 0.048 | test avg. loss: 4.785
Epochs: 14610 | epoch avg. loss: 0.101 | test avg. loss: 4.532


 29%|██▉       | 14613/50000 [21:51<1:09:38,  8.47it/s]

Epochs: 14611 | epoch avg. loss: 0.078 | test avg. loss: 4.518
Epochs: 14612 | epoch avg. loss: 0.038 | test avg. loss: 4.597
Epochs: 14613 | epoch avg. loss: 0.052 | test avg. loss: 4.406


 29%|██▉       | 14617/50000 [21:51<1:00:46,  9.70it/s]

Epochs: 14614 | epoch avg. loss: 0.068 | test avg. loss: 4.917
Epochs: 14615 | epoch avg. loss: 0.207 | test avg. loss: 4.547
Epochs: 14616 | epoch avg. loss: 0.091 | test avg. loss: 4.628


 29%|██▉       | 14619/50000 [21:51<55:31, 10.62it/s]

Epochs: 14617 | epoch avg. loss: 0.138 | test avg. loss: 5.023
Epochs: 14618 | epoch avg. loss: 0.155 | test avg. loss: 4.694
Epochs: 14619 | epoch avg. loss: 0.042 | test avg. loss: 4.490


 29%|██▉       | 14623/50000 [21:51<52:14, 11.29it/s]

Epochs: 14620 | epoch avg. loss: 0.039 | test avg. loss: 4.631
Epochs: 14621 | epoch avg. loss: 0.117 | test avg. loss: 4.431
Epochs: 14622 | epoch avg. loss: 0.031 | test avg. loss: 4.539


 29%|██▉       | 14625/50000 [21:52<49:11, 11.98it/s]

Epochs: 14623 | epoch avg. loss: 0.028 | test avg. loss: 4.662
Epochs: 14624 | epoch avg. loss: 0.029 | test avg. loss: 4.634
Epochs: 14625 | epoch avg. loss: 0.030 | test avg. loss: 4.591


 29%|██▉       | 14629/50000 [21:52<44:33, 13.23it/s]

Epochs: 14626 | epoch avg. loss: 0.032 | test avg. loss: 4.831
Epochs: 14627 | epoch avg. loss: 0.161 | test avg. loss: 4.622
Epochs: 14628 | epoch avg. loss: 0.050 | test avg. loss: 4.511
Epochs: 14629 | epoch avg. loss: 0.025 | test avg. loss: 4.547


 29%|██▉       | 14633/50000 [21:52<41:26, 14.22it/s]

Epochs: 14630 | epoch avg. loss: 0.023 | test avg. loss: 4.620
Epochs: 14631 | epoch avg. loss: 0.024 | test avg. loss: 4.601
Epochs: 14632 | epoch avg. loss: 0.047 | test avg. loss: 4.570
Epochs: 14633 | epoch avg. loss: 0.030 | test avg. loss: 4.586


 29%|██▉       | 14637/50000 [21:52<43:06, 13.67it/s]

Epochs: 14634 | epoch avg. loss: 0.030 | test avg. loss: 4.592
Epochs: 14635 | epoch avg. loss: 0.028 | test avg. loss: 4.509
Epochs: 14636 | epoch avg. loss: 0.047 | test avg. loss: 4.778


 29%|██▉       | 14639/50000 [21:53<41:58, 14.04it/s]

Epochs: 14637 | epoch avg. loss: 0.078 | test avg. loss: 4.689
Epochs: 14638 | epoch avg. loss: 0.038 | test avg. loss: 4.584
Epochs: 14639 | epoch avg. loss: 0.063 | test avg. loss: 4.700


 29%|██▉       | 14643/50000 [21:53<40:36, 14.51it/s]

Epochs: 14640 | epoch avg. loss: 0.059 | test avg. loss: 4.599
Epochs: 14641 | epoch avg. loss: 0.033 | test avg. loss: 4.561
Epochs: 14642 | epoch avg. loss: 0.052 | test avg. loss: 4.867
Epochs: 14643 | epoch avg. loss: 0.119 | test avg. loss: 4.449


 29%|██▉       | 14647/50000 [21:53<38:49, 15.18it/s]

Epochs: 14644 | epoch avg. loss: 0.090 | test avg. loss: 4.612
Epochs: 14645 | epoch avg. loss: 0.186 | test avg. loss: 4.924
Epochs: 14646 | epoch avg. loss: 0.142 | test avg. loss: 4.719
Epochs: 14647 | epoch avg. loss: 0.312 | test avg. loss: 4.806


 29%|██▉       | 14651/50000 [21:53<41:13, 14.29it/s]

Epochs: 14648 | epoch avg. loss: 0.346 | test avg. loss: 5.184
Epochs: 14649 | epoch avg. loss: 0.325 | test avg. loss: 4.623
Epochs: 14650 | epoch avg. loss: 0.457 | test avg. loss: 4.562


 29%|██▉       | 14655/50000 [21:54<40:31, 14.54it/s]

Epochs: 14651 | epoch avg. loss: 0.183 | test avg. loss: 5.353
Epochs: 14652 | epoch avg. loss: 0.402 | test avg. loss: 4.824
Epochs: 14653 | epoch avg. loss: 1.038 | test avg. loss: 4.906
Epochs: 14654 | epoch avg. loss: 1.452 | test avg. loss: 5.492


 29%|██▉       | 14657/50000 [21:54<41:22, 14.24it/s]

Epochs: 14655 | epoch avg. loss: 0.769 | test avg. loss: 5.834
Epochs: 14656 | epoch avg. loss: 0.986 | test avg. loss: 5.826
Epochs: 14657 | epoch avg. loss: 0.431 | test avg. loss: 5.012


 29%|██▉       | 14661/50000 [21:54<40:15, 14.63it/s]

Epochs: 14658 | epoch avg. loss: 0.478 | test avg. loss: 4.923
Epochs: 14659 | epoch avg. loss: 0.696 | test avg. loss: 4.816
Epochs: 14660 | epoch avg. loss: 0.332 | test avg. loss: 4.420
Epochs: 14661 | epoch avg. loss: 0.376 | test avg. loss: 4.769


 29%|██▉       | 14665/50000 [21:54<41:36, 14.15it/s]

Epochs: 14662 | epoch avg. loss: 0.372 | test avg. loss: 7.481
Epochs: 14663 | epoch avg. loss: 1.679 | test avg. loss: 5.278
Epochs: 14664 | epoch avg. loss: 1.207 | test avg. loss: 4.718


 29%|██▉       | 14667/50000 [21:55<44:31, 13.23it/s]

Epochs: 14665 | epoch avg. loss: 0.700 | test avg. loss: 5.197
Epochs: 14666 | epoch avg. loss: 0.688 | test avg. loss: 4.920
Epochs: 14667 | epoch avg. loss: 1.391 | test avg. loss: 4.584


                                                     

Epochs: 14668 | epoch avg. loss: 0.134 | test avg. loss: 4.731
Epochs: 14669 | epoch avg. loss: 0.109 | test avg. loss: 4.818
Epochs: 14670 | epoch avg. loss: 0.160 | test avg. loss: 5.273


 29%|██▉       | 14675/50000 [21:55<40:56, 14.38it/s]

Epochs: 14671 | epoch avg. loss: 0.306 | test avg. loss: 4.453
Epochs: 14672 | epoch avg. loss: 0.144 | test avg. loss: 4.446
Epochs: 14673 | epoch avg. loss: 0.122 | test avg. loss: 4.166
Epochs: 14674 | epoch avg. loss: 0.128 | test avg. loss: 4.393


 29%|██▉       | 14677/50000 [21:55<39:51, 14.77it/s]

Epochs: 14675 | epoch avg. loss: 0.065 | test avg. loss: 4.429
Epochs: 14676 | epoch avg. loss: 0.067 | test avg. loss: 4.449
Epochs: 14677 | epoch avg. loss: 0.056 | test avg. loss: 4.509


 29%|██▉       | 14681/50000 [21:56<43:39, 13.48it/s]

Epochs: 14678 | epoch avg. loss: 0.046 | test avg. loss: 4.563
Epochs: 14679 | epoch avg. loss: 0.044 | test avg. loss: 4.454
Epochs: 14680 | epoch avg. loss: 0.082 | test avg. loss: 4.366


 29%|██▉       | 14683/50000 [21:56<43:33, 13.51it/s]

Epochs: 14681 | epoch avg. loss: 0.037 | test avg. loss: 4.386
Epochs: 14682 | epoch avg. loss: 0.042 | test avg. loss: 4.217
Epochs: 14683 | epoch avg. loss: 0.042 | test avg. loss: 4.362


 29%|██▉       | 14687/50000 [21:56<45:08, 13.04it/s]

Epochs: 14684 | epoch avg. loss: 0.094 | test avg. loss: 4.488
Epochs: 14685 | epoch avg. loss: 0.079 | test avg. loss: 4.339
Epochs: 14686 | epoch avg. loss: 0.073 | test avg. loss: 4.397


 29%|██▉       | 14689/50000 [21:56<43:58, 13.38it/s]

Epochs: 14687 | epoch avg. loss: 0.054 | test avg. loss: 4.692
Epochs: 14688 | epoch avg. loss: 0.156 | test avg. loss: 4.536
Epochs: 14689 | epoch avg. loss: 0.046 | test avg. loss: 4.483
Epochs: 14690 | epoch avg. loss: 0.050 | test avg. loss: 4.636


 29%|██▉       | 14693/50000 [21:57<42:04, 13.99it/s]

Epochs: 14691 | epoch avg. loss: 0.099 | test avg. loss: 4.277
Epochs: 14692 | epoch avg. loss: 0.142 | test avg. loss: 4.284
Epochs: 14693 | epoch avg. loss: 0.067 | test avg. loss: 4.703


 29%|██▉       | 14697/50000 [21:57<43:44, 13.45it/s]

Epochs: 14694 | epoch avg. loss: 0.127 | test avg. loss: 4.387
Epochs: 14695 | epoch avg. loss: 0.119 | test avg. loss: 4.440
Epochs: 14696 | epoch avg. loss: 0.063 | test avg. loss: 4.511


 29%|██▉       | 14699/50000 [21:57<46:47, 12.57it/s]

Epochs: 14697 | epoch avg. loss: 0.073 | test avg. loss: 4.346
Epochs: 14698 | epoch avg. loss: 0.153 | test avg. loss: 4.669
Epochs: 14699 | epoch avg. loss: 0.104 | test avg. loss: 4.431


 29%|██▉       | 14703/50000 [21:59<2:12:02,  4.46it/s]

Epochs: 14700 | epoch avg. loss: 0.151 | test avg. loss: 4.478
Epochs: 14701 | epoch avg. loss: 0.110 | test avg. loss: 5.166
Epochs: 14702 | epoch avg. loss: 0.299 | test avg. loss: 4.478


 29%|██▉       | 14705/50000 [21:59<1:46:27,  5.53it/s]

Epochs: 14703 | epoch avg. loss: 0.427 | test avg. loss: 4.616
Epochs: 14704 | epoch avg. loss: 0.437 | test avg. loss: 4.924
Epochs: 14705 | epoch avg. loss: 0.372 | test avg. loss: 4.371


                                                       

Epochs: 14706 | epoch avg. loss: 0.363 | test avg. loss: 4.965
Epochs: 14707 | epoch avg. loss: 0.444 | test avg. loss: 4.918
Epochs: 14708 | epoch avg. loss: 0.248 | test avg. loss: 5.138


 29%|██▉       | 14711/50000 [21:59<1:03:49,  9.22it/s]

Epochs: 14709 | epoch avg. loss: 0.761 | test avg. loss: 5.209
Epochs: 14710 | epoch avg. loss: 0.413 | test avg. loss: 4.515
Epochs: 14711 | epoch avg. loss: 0.105 | test avg. loss: 4.360


 29%|██▉       | 14715/50000 [22:00<51:55, 11.33it/s]

Epochs: 14712 | epoch avg. loss: 0.091 | test avg. loss: 4.421
Epochs: 14713 | epoch avg. loss: 0.065 | test avg. loss: 4.504
Epochs: 14714 | epoch avg. loss: 0.056 | test avg. loss: 4.653
Epochs: 14715 | epoch avg. loss: 0.078 | test avg. loss: 4.738




Epochs: 14716 | epoch avg. loss: 0.325 | test avg. loss: 4.817
Epochs: 14717 | epoch avg. loss: 0.189 | test avg. loss: 4.523
Epochs: 14718 | epoch avg. loss: 0.081 | test avg. loss: 4.369


 29%|██▉       | 14723/50000 [22:00<41:03, 14.32it/s]

Epochs: 14719 | epoch avg. loss: 0.065 | test avg. loss: 4.470
Epochs: 14720 | epoch avg. loss: 0.076 | test avg. loss: 4.350
Epochs: 14721 | epoch avg. loss: 0.185 | test avg. loss: 4.688
Epochs: 14722 | epoch avg. loss: 0.064 | test avg. loss: 4.649


 29%|██▉       | 14725/50000 [22:00<40:24, 14.55it/s]

Epochs: 14723 | epoch avg. loss: 0.068 | test avg. loss: 4.627
Epochs: 14724 | epoch avg. loss: 0.058 | test avg. loss: 4.823
Epochs: 14725 | epoch avg. loss: 0.097 | test avg. loss: 4.477


 29%|██▉       | 14729/50000 [22:00<42:04, 13.97it/s]

Epochs: 14726 | epoch avg. loss: 0.092 | test avg. loss: 4.491
Epochs: 14727 | epoch avg. loss: 0.032 | test avg. loss: 4.538
Epochs: 14728 | epoch avg. loss: 0.025 | test avg. loss: 4.694


 29%|██▉       | 14731/50000 [22:01<41:50, 14.05it/s]

Epochs: 14729 | epoch avg. loss: 0.057 | test avg. loss: 4.723
Epochs: 14730 | epoch avg. loss: 0.039 | test avg. loss: 4.565
Epochs: 14731 | epoch avg. loss: 0.055 | test avg. loss: 4.699
Epochs: 14732 | epoch avg. loss: 0.073 | test avg. loss: 4.476


 29%|██▉       | 14735/50000 [22:01<43:59, 13.36it/s]

Epochs: 14733 | epoch avg. loss: 0.068 | test avg. loss: 4.454
Epochs: 14734 | epoch avg. loss: 0.056 | test avg. loss: 4.773
Epochs: 14735 | epoch avg. loss: 0.089 | test avg. loss: 4.556


 29%|██▉       | 14739/50000 [22:01<46:46, 12.56it/s]

Epochs: 14736 | epoch avg. loss: 0.171 | test avg. loss: 4.664
Epochs: 14737 | epoch avg. loss: 0.152 | test avg. loss: 4.807
Epochs: 14738 | epoch avg. loss: 0.147 | test avg. loss: 4.598


 29%|██▉       | 14741/50000 [22:02<46:44, 12.57it/s]

Epochs: 14739 | epoch avg. loss: 0.372 | test avg. loss: 4.883
Epochs: 14740 | epoch avg. loss: 0.176 | test avg. loss: 4.789
Epochs: 14741 | epoch avg. loss: 0.167 | test avg. loss: 4.770


 29%|██▉       | 14745/50000 [22:02<46:31, 12.63it/s]

Epochs: 14742 | epoch avg. loss: 0.180 | test avg. loss: 5.108
Epochs: 14743 | epoch avg. loss: 0.227 | test avg. loss: 4.467
Epochs: 14744 | epoch avg. loss: 0.131 | test avg. loss: 4.421


 29%|██▉       | 14747/50000 [22:02<45:21, 12.95it/s]

Epochs: 14745 | epoch avg. loss: 0.166 | test avg. loss: 4.963
Epochs: 14746 | epoch avg. loss: 0.164 | test avg. loss: 4.648
Epochs: 14747 | epoch avg. loss: 0.087 | test avg. loss: 4.729


 30%|██▉       | 14751/50000 [22:02<47:59, 12.24it/s]

Epochs: 14748 | epoch avg. loss: 0.050 | test avg. loss: 4.878
Epochs: 14749 | epoch avg. loss: 0.062 | test avg. loss: 4.730
Epochs: 14750 | epoch avg. loss: 0.040 | test avg. loss: 4.715


 30%|██▉       | 14753/50000 [22:02<47:06, 12.47it/s]

Epochs: 14751 | epoch avg. loss: 0.040 | test avg. loss: 4.734
Epochs: 14752 | epoch avg. loss: 0.040 | test avg. loss: 4.584
Epochs: 14753 | epoch avg. loss: 0.108 | test avg. loss: 4.616


 30%|██▉       | 14757/50000 [22:03<48:25, 12.13it/s]

Epochs: 14754 | epoch avg. loss: 0.053 | test avg. loss: 4.874
Epochs: 14755 | epoch avg. loss: 0.107 | test avg. loss: 4.537
Epochs: 14756 | epoch avg. loss: 0.074 | test avg. loss: 4.701


 30%|██▉       | 14759/50000 [22:03<46:59, 12.50it/s]

Epochs: 14757 | epoch avg. loss: 0.057 | test avg. loss: 4.593
Epochs: 14758 | epoch avg. loss: 0.046 | test avg. loss: 4.532
Epochs: 14759 | epoch avg. loss: 0.084 | test avg. loss: 4.641


 30%|██▉       | 14763/50000 [22:03<46:18, 12.68it/s]

Epochs: 14760 | epoch avg. loss: 0.027 | test avg. loss: 4.562
Epochs: 14761 | epoch avg. loss: 0.058 | test avg. loss: 4.578
Epochs: 14762 | epoch avg. loss: 0.028 | test avg. loss: 4.628


 30%|██▉       | 14765/50000 [22:03<49:05, 11.96it/s]

Epochs: 14763 | epoch avg. loss: 0.027 | test avg. loss: 4.556
Epochs: 14764 | epoch avg. loss: 0.045 | test avg. loss: 4.628


 30%|██▉       | 14767/50000 [22:04<50:35, 11.61it/s]

Epochs: 14765 | epoch avg. loss: 0.021 | test avg. loss: 4.635
Epochs: 14766 | epoch avg. loss: 0.028 | test avg. loss: 4.589
Epochs: 14767 | epoch avg. loss: 0.053 | test avg. loss: 4.674


 30%|██▉       | 14771/50000 [22:04<49:36, 11.84it/s]

Epochs: 14768 | epoch avg. loss: 0.049 | test avg. loss: 4.600
Epochs: 14769 | epoch avg. loss: 0.026 | test avg. loss: 4.526
Epochs: 14770 | epoch avg. loss: 0.028 | test avg. loss: 4.668


 30%|██▉       | 14773/50000 [22:04<51:29, 11.40it/s]

Epochs: 14771 | epoch avg. loss: 0.047 | test avg. loss: 4.536
Epochs: 14772 | epoch avg. loss: 0.063 | test avg. loss: 4.600
Epochs: 14773 | epoch avg. loss: 0.049 | test avg. loss: 5.032


 30%|██▉       | 14777/50000 [22:04<52:14, 11.24it/s]

Epochs: 14774 | epoch avg. loss: 0.127 | test avg. loss: 4.589
Epochs: 14775 | epoch avg. loss: 0.180 | test avg. loss: 4.573
Epochs: 14776 | epoch avg. loss: 0.069 | test avg. loss: 4.926


 30%|██▉       | 14779/50000 [22:05<50:34, 11.61it/s]

Epochs: 14777 | epoch avg. loss: 0.139 | test avg. loss: 4.609
Epochs: 14778 | epoch avg. loss: 0.146 | test avg. loss: 4.627
Epochs: 14779 | epoch avg. loss: 0.063 | test avg. loss: 4.918


 30%|██▉       | 14783/50000 [22:05<44:26, 13.20it/s]

Epochs: 14780 | epoch avg. loss: 0.092 | test avg. loss: 4.633
Epochs: 14781 | epoch avg. loss: 0.058 | test avg. loss: 4.882
Epochs: 14782 | epoch avg. loss: 0.092 | test avg. loss: 4.744
Epochs: 14783 | epoch avg. loss: 0.065 | test avg. loss: 4.705


 30%|██▉       | 14787/50000 [22:05<41:30, 14.14it/s]

Epochs: 14784 | epoch avg. loss: 0.115 | test avg. loss: 5.244
Epochs: 14785 | epoch avg. loss: 0.177 | test avg. loss: 4.699
Epochs: 14786 | epoch avg. loss: 0.269 | test avg. loss: 4.699


 30%|██▉       | 14789/50000 [22:05<43:23, 13.53it/s]

Epochs: 14787 | epoch avg. loss: 0.106 | test avg. loss: 4.953
Epochs: 14788 | epoch avg. loss: 0.093 | test avg. loss: 4.690
Epochs: 14789 | epoch avg. loss: 0.071 | test avg. loss: 4.596


 30%|██▉       | 14793/50000 [22:06<46:49, 12.53it/s]

Epochs: 14790 | epoch avg. loss: 0.053 | test avg. loss: 4.705
Epochs: 14791 | epoch avg. loss: 0.101 | test avg. loss: 4.332
Epochs: 14792 | epoch avg. loss: 0.082 | test avg. loss: 4.371


 30%|██▉       | 14795/50000 [22:06<45:21, 12.94it/s]

Epochs: 14793 | epoch avg. loss: 0.042 | test avg. loss: 4.556
Epochs: 14794 | epoch avg. loss: 0.029 | test avg. loss: 4.581
Epochs: 14795 | epoch avg. loss: 0.061 | test avg. loss: 4.711


 30%|██▉       | 14799/50000 [22:06<45:27, 12.91it/s]

Epochs: 14796 | epoch avg. loss: 0.028 | test avg. loss: 4.619
Epochs: 14797 | epoch avg. loss: 0.022 | test avg. loss: 4.753
Epochs: 14798 | epoch avg. loss: 0.081 | test avg. loss: 4.612


 30%|██▉       | 14799/50000 [22:06<45:27, 12.91it/s]

Epochs: 14799 | epoch avg. loss: 0.032 | test avg. loss: 4.575


 30%|██▉       | 14803/50000 [22:08<2:23:13,  4.10it/s]

Epochs: 14800 | epoch avg. loss: 0.033 | test avg. loss: 4.751
Epochs: 14801 | epoch avg. loss: 0.068 | test avg. loss: 4.533
Epochs: 14802 | epoch avg. loss: 0.052 | test avg. loss: 4.485


 30%|██▉       | 14805/50000 [22:08<1:54:48,  5.11it/s]

Epochs: 14803 | epoch avg. loss: 0.055 | test avg. loss: 4.798
Epochs: 14804 | epoch avg. loss: 0.173 | test avg. loss: 4.634
Epochs: 14805 | epoch avg. loss: 0.118 | test avg. loss: 4.681




Epochs: 14806 | epoch avg. loss: 0.278 | test avg. loss: 5.195
Epochs: 14807 | epoch avg. loss: 0.310 | test avg. loss: 4.500
Epochs: 14808 | epoch avg. loss: 0.162 | test avg. loss: 4.513




Epochs: 14809 | epoch avg. loss: 0.159 | test avg. loss: 5.441
Epochs: 14810 | epoch avg. loss: 0.410 | test avg. loss: 4.862
Epochs: 14811 | epoch avg. loss: 0.391 | test avg. loss: 5.145


 30%|██▉       | 14815/50000 [22:09<56:28, 10.38it/s]  

Epochs: 14812 | epoch avg. loss: 0.461 | test avg. loss: 5.317
Epochs: 14813 | epoch avg. loss: 0.270 | test avg. loss: 4.922
Epochs: 14814 | epoch avg. loss: 0.141 | test avg. loss: 4.547


 30%|██▉       | 14817/50000 [22:09<52:35, 11.15it/s]

Epochs: 14815 | epoch avg. loss: 0.388 | test avg. loss: 4.817
Epochs: 14816 | epoch avg. loss: 0.681 | test avg. loss: 4.670
Epochs: 14817 | epoch avg. loss: 0.170 | test avg. loss: 4.702




Epochs: 14818 | epoch avg. loss: 0.168 | test avg. loss: 5.275
Epochs: 14819 | epoch avg. loss: 0.274 | test avg. loss: 4.754


 30%|██▉       | 14823/50000 [22:10<52:18, 11.21it/s]

Epochs: 14820 | epoch avg. loss: 0.160 | test avg. loss: 4.660
Epochs: 14821 | epoch avg. loss: 0.182 | test avg. loss: 5.332
Epochs: 14822 | epoch avg. loss: 0.462 | test avg. loss: 4.538


 30%|██▉       | 14825/50000 [22:10<50:14, 11.67it/s]

Epochs: 14823 | epoch avg. loss: 0.317 | test avg. loss: 4.775
Epochs: 14824 | epoch avg. loss: 0.294 | test avg. loss: 5.428
Epochs: 14825 | epoch avg. loss: 0.336 | test avg. loss: 4.778


 30%|██▉       | 14829/50000 [22:10<46:28, 12.61it/s]

Epochs: 14826 | epoch avg. loss: 0.497 | test avg. loss: 4.810
Epochs: 14827 | epoch avg. loss: 0.240 | test avg. loss: 4.784
Epochs: 14828 | epoch avg. loss: 0.206 | test avg. loss: 4.970


 30%|██▉       | 14831/50000 [22:10<45:27, 12.90it/s]

Epochs: 14829 | epoch avg. loss: 0.447 | test avg. loss: 5.551
Epochs: 14830 | epoch avg. loss: 0.452 | test avg. loss: 4.637
Epochs: 14831 | epoch avg. loss: 0.339 | test avg. loss: 4.583


 30%|██▉       | 14835/50000 [22:11<49:15, 11.90it/s]

Epochs: 14832 | epoch avg. loss: 0.268 | test avg. loss: 5.329
Epochs: 14833 | epoch avg. loss: 0.404 | test avg. loss: 4.744
Epochs: 14834 | epoch avg. loss: 0.544 | test avg. loss: 4.993


 30%|██▉       | 14837/50000 [22:11<50:20, 11.64it/s]

Epochs: 14835 | epoch avg. loss: 0.589 | test avg. loss: 4.784
Epochs: 14836 | epoch avg. loss: 0.349 | test avg. loss: 4.823
Epochs: 14837 | epoch avg. loss: 0.792 | test avg. loss: 5.021


 30%|██▉       | 14841/50000 [22:11<43:42, 13.41it/s]

Epochs: 14838 | epoch avg. loss: 0.584 | test avg. loss: 4.803
Epochs: 14839 | epoch avg. loss: 0.206 | test avg. loss: 5.045
Epochs: 14840 | epoch avg. loss: 0.277 | test avg. loss: 5.579


 30%|██▉       | 14843/50000 [22:11<44:28, 13.18it/s]

Epochs: 14841 | epoch avg. loss: 0.304 | test avg. loss: 4.840
Epochs: 14842 | epoch avg. loss: 0.326 | test avg. loss: 4.821
Epochs: 14843 | epoch avg. loss: 0.285 | test avg. loss: 4.324


 30%|██▉       | 14847/50000 [22:12<47:29, 12.34it/s]

Epochs: 14844 | epoch avg. loss: 0.125 | test avg. loss: 4.183
Epochs: 14845 | epoch avg. loss: 0.118 | test avg. loss: 4.587
Epochs: 14846 | epoch avg. loss: 0.174 | test avg. loss: 4.402


 30%|██▉       | 14849/50000 [22:12<45:27, 12.89it/s]

Epochs: 14847 | epoch avg. loss: 0.082 | test avg. loss: 4.505
Epochs: 14848 | epoch avg. loss: 0.047 | test avg. loss: 4.634
Epochs: 14849 | epoch avg. loss: 0.046 | test avg. loss: 4.576


 30%|██▉       | 14853/50000 [22:12<44:30, 13.16it/s]

Epochs: 14850 | epoch avg. loss: 0.042 | test avg. loss: 4.531
Epochs: 14851 | epoch avg. loss: 0.052 | test avg. loss: 4.606
Epochs: 14852 | epoch avg. loss: 0.055 | test avg. loss: 4.841


 30%|██▉       | 14855/50000 [22:12<43:34, 13.44it/s]

Epochs: 14853 | epoch avg. loss: 0.078 | test avg. loss: 4.569
Epochs: 14854 | epoch avg. loss: 0.062 | test avg. loss: 4.677
Epochs: 14855 | epoch avg. loss: 0.062 | test avg. loss: 4.792


 30%|██▉       | 14859/50000 [22:12<47:05, 12.44it/s]

Epochs: 14856 | epoch avg. loss: 0.080 | test avg. loss: 4.456
Epochs: 14857 | epoch avg. loss: 0.130 | test avg. loss: 4.817
Epochs: 14858 | epoch avg. loss: 0.355 | test avg. loss: 4.464


 30%|██▉       | 14861/50000 [22:13<48:23, 12.10it/s]

Epochs: 14859 | epoch avg. loss: 0.185 | test avg. loss: 4.674
Epochs: 14860 | epoch avg. loss: 0.183 | test avg. loss: 5.228
Epochs: 14861 | epoch avg. loss: 0.218 | test avg. loss: 4.721


 30%|██▉       | 14865/50000 [22:13<43:43, 13.39it/s]

Epochs: 14862 | epoch avg. loss: 0.124 | test avg. loss: 4.699
Epochs: 14863 | epoch avg. loss: 0.094 | test avg. loss: 4.923
Epochs: 14864 | epoch avg. loss: 0.127 | test avg. loss: 4.626


 30%|██▉       | 14867/50000 [22:13<45:22, 12.91it/s]

Epochs: 14865 | epoch avg. loss: 0.304 | test avg. loss: 4.761
Epochs: 14866 | epoch avg. loss: 0.055 | test avg. loss: 4.905
Epochs: 14867 | epoch avg. loss: 0.050 | test avg. loss: 4.778


 30%|██▉       | 14871/50000 [22:13<46:38, 12.55it/s]

Epochs: 14868 | epoch avg. loss: 0.057 | test avg. loss: 4.725
Epochs: 14869 | epoch avg. loss: 0.028 | test avg. loss: 4.628
Epochs: 14870 | epoch avg. loss: 0.027 | test avg. loss: 4.476


 30%|██▉       | 14873/50000 [22:14<45:39, 12.82it/s]

Epochs: 14871 | epoch avg. loss: 0.038 | test avg. loss: 4.599
Epochs: 14872 | epoch avg. loss: 0.050 | test avg. loss: 4.650
Epochs: 14873 | epoch avg. loss: 0.032 | test avg. loss: 4.690


 30%|██▉       | 14877/50000 [22:14<45:24, 12.89it/s]

Epochs: 14874 | epoch avg. loss: 0.048 | test avg. loss: 5.133
Epochs: 14875 | epoch avg. loss: 0.152 | test avg. loss: 4.631
Epochs: 14876 | epoch avg. loss: 0.257 | test avg. loss: 4.567


 30%|██▉       | 14879/50000 [22:14<46:11, 12.67it/s]

Epochs: 14877 | epoch avg. loss: 0.062 | test avg. loss: 4.698
Epochs: 14878 | epoch avg. loss: 0.061 | test avg. loss: 4.637
Epochs: 14879 | epoch avg. loss: 0.110 | test avg. loss: 4.740


 30%|██▉       | 14883/50000 [22:14<45:36, 12.83it/s]

Epochs: 14880 | epoch avg. loss: 0.067 | test avg. loss: 4.884
Epochs: 14881 | epoch avg. loss: 0.087 | test avg. loss: 4.504
Epochs: 14882 | epoch avg. loss: 0.164 | test avg. loss: 4.599


 30%|██▉       | 14885/50000 [22:15<47:00, 12.45it/s]

Epochs: 14883 | epoch avg. loss: 0.118 | test avg. loss: 5.069
Epochs: 14884 | epoch avg. loss: 0.154 | test avg. loss: 4.855
Epochs: 14885 | epoch avg. loss: 0.259 | test avg. loss: 5.254


 30%|██▉       | 14889/50000 [22:15<45:36, 12.83it/s]

Epochs: 14886 | epoch avg. loss: 0.201 | test avg. loss: 4.869
Epochs: 14887 | epoch avg. loss: 0.139 | test avg. loss: 4.709
Epochs: 14888 | epoch avg. loss: 0.087 | test avg. loss: 4.578


 30%|██▉       | 14891/50000 [22:15<45:51, 12.76it/s]

Epochs: 14889 | epoch avg. loss: 0.064 | test avg. loss: 4.383
Epochs: 14890 | epoch avg. loss: 0.084 | test avg. loss: 4.545
Epochs: 14891 | epoch avg. loss: 0.075 | test avg. loss: 4.414


 30%|██▉       | 14895/50000 [22:15<44:57, 13.01it/s]

Epochs: 14892 | epoch avg. loss: 0.079 | test avg. loss: 4.475
Epochs: 14893 | epoch avg. loss: 0.060 | test avg. loss: 4.741
Epochs: 14894 | epoch avg. loss: 0.083 | test avg. loss: 4.572


 30%|██▉       | 14897/50000 [22:16<45:19, 12.91it/s]

Epochs: 14895 | epoch avg. loss: 0.091 | test avg. loss: 4.561
Epochs: 14896 | epoch avg. loss: 0.051 | test avg. loss: 4.681
Epochs: 14897 | epoch avg. loss: 0.072 | test avg. loss: 4.441


 30%|██▉       | 14899/50000 [22:16<47:44, 12.25it/s]

Epochs: 14898 | epoch avg. loss: 0.070 | test avg. loss: 4.481
Epochs: 14899 | epoch avg. loss: 0.041 | test avg. loss: 4.648


 30%|██▉       | 14903/50000 [22:17<2:16:21,  4.29it/s]

Epochs: 14900 | epoch avg. loss: 0.035 | test avg. loss: 4.524
Epochs: 14901 | epoch avg. loss: 0.072 | test avg. loss: 4.649
Epochs: 14902 | epoch avg. loss: 0.026 | test avg. loss: 4.729


                                                       

Epochs: 14903 | epoch avg. loss: 0.030 | test avg. loss: 4.585
Epochs: 14904 | epoch avg. loss: 0.089 | test avg. loss: 4.583
Epochs: 14905 | epoch avg. loss: 0.034 | test avg. loss: 4.759


 30%|██▉       | 14909/50000 [22:18<1:13:41,  7.94it/s]

Epochs: 14906 | epoch avg. loss: 0.062 | test avg. loss: 4.609
Epochs: 14907 | epoch avg. loss: 0.034 | test avg. loss: 4.605
Epochs: 14908 | epoch avg. loss: 0.024 | test avg. loss: 4.632


 30%|██▉       | 14913/50000 [22:18<55:36, 10.52it/s]  

Epochs: 14909 | epoch avg. loss: 0.023 | test avg. loss: 4.529
Epochs: 14910 | epoch avg. loss: 0.029 | test avg. loss: 4.595
Epochs: 14911 | epoch avg. loss: 0.034 | test avg. loss: 4.529
Epochs: 14912 | epoch avg. loss: 0.031 | test avg. loss: 4.506


 30%|██▉       | 14915/50000 [22:18<50:42, 11.53it/s]

Epochs: 14913 | epoch avg. loss: 0.040 | test avg. loss: 4.648
Epochs: 14914 | epoch avg. loss: 0.060 | test avg. loss: 4.748
Epochs: 14915 | epoch avg. loss: 0.075 | test avg. loss: 4.504


 30%|██▉       | 14919/50000 [22:19<48:12, 12.13it/s]

Epochs: 14916 | epoch avg. loss: 0.078 | test avg. loss: 4.510
Epochs: 14917 | epoch avg. loss: 0.036 | test avg. loss: 4.611
Epochs: 14918 | epoch avg. loss: 0.035 | test avg. loss: 4.590


 30%|██▉       | 14921/50000 [22:19<45:58, 12.72it/s]

Epochs: 14919 | epoch avg. loss: 0.020 | test avg. loss: 4.583
Epochs: 14920 | epoch avg. loss: 0.020 | test avg. loss: 4.566
Epochs: 14921 | epoch avg. loss: 0.021 | test avg. loss: 4.512
Epochs: 14922 | epoch avg. loss: 0.022 | test avg. loss: 4.525


 30%|██▉       | 14927/50000 [22:19<39:24, 14.84it/s]

Epochs: 14923 | epoch avg. loss: 0.019 | test avg. loss: 4.493
Epochs: 14924 | epoch avg. loss: 0.032 | test avg. loss: 4.493
Epochs: 14925 | epoch avg. loss: 0.060 | test avg. loss: 4.539
Epochs: 14926 | epoch avg. loss: 0.028 | test avg. loss: 4.645


 30%|██▉       | 14931/50000 [22:19<38:33, 15.16it/s]

Epochs: 14927 | epoch avg. loss: 0.028 | test avg. loss: 4.631
Epochs: 14928 | epoch avg. loss: 0.025 | test avg. loss: 4.566
Epochs: 14929 | epoch avg. loss: 0.032 | test avg. loss: 4.570
Epochs: 14930 | epoch avg. loss: 0.019 | test avg. loss: 4.499


 30%|██▉       | 14933/50000 [22:20<40:43, 14.35it/s]

Epochs: 14931 | epoch avg. loss: 0.043 | test avg. loss: 4.520
Epochs: 14932 | epoch avg. loss: 0.035 | test avg. loss: 4.720
Epochs: 14933 | epoch avg. loss: 0.069 | test avg. loss: 4.712




Epochs: 14934 | epoch avg. loss: 0.040 | test avg. loss: 4.572
Epochs: 14935 | epoch avg. loss: 0.029 | test avg. loss: 4.619
Epochs: 14936 | epoch avg. loss: 0.018 | test avg. loss: 4.586
Epochs: 14937 | epoch avg. loss: 0.038 | test avg. loss: 4.570


 30%|██▉       | 14941/50000 [22:20<40:33, 14.41it/s]

Epochs: 14938 | epoch avg. loss: 0.046 | test avg. loss: 4.922
Epochs: 14939 | epoch avg. loss: 0.161 | test avg. loss: 4.623
Epochs: 14940 | epoch avg. loss: 0.130 | test avg. loss: 4.705


 30%|██▉       | 14943/50000 [22:20<41:07, 14.21it/s]

Epochs: 14941 | epoch avg. loss: 0.076 | test avg. loss: 4.807
Epochs: 14942 | epoch avg. loss: 0.071 | test avg. loss: 4.546
Epochs: 14943 | epoch avg. loss: 0.120 | test avg. loss: 4.645


 30%|██▉       | 14947/50000 [22:20<45:08, 12.94it/s]

Epochs: 14944 | epoch avg. loss: 0.053 | test avg. loss: 4.661
Epochs: 14945 | epoch avg. loss: 0.082 | test avg. loss: 4.696
Epochs: 14946 | epoch avg. loss: 0.057 | test avg. loss: 4.819


 30%|██▉       | 14949/50000 [22:21<46:33, 12.55it/s]

Epochs: 14947 | epoch avg. loss: 0.076 | test avg. loss: 4.495
Epochs: 14948 | epoch avg. loss: 0.055 | test avg. loss: 4.439
Epochs: 14949 | epoch avg. loss: 0.044 | test avg. loss: 4.531




Epochs: 14950 | epoch avg. loss: 0.029 | test avg. loss: 4.572
Epochs: 14951 | epoch avg. loss: 0.046 | test avg. loss: 4.623
Epochs: 14952 | epoch avg. loss: 0.048 | test avg. loss: 4.682




Epochs: 14953 | epoch avg. loss: 0.034 | test avg. loss: 4.798
Epochs: 14954 | epoch avg. loss: 0.067 | test avg. loss: 4.716
Epochs: 14955 | epoch avg. loss: 0.028 | test avg. loss: 4.680


 30%|██▉       | 14959/50000 [22:21<41:13, 14.16it/s]

Epochs: 14956 | epoch avg. loss: 0.045 | test avg. loss: 4.718
Epochs: 14957 | epoch avg. loss: 0.028 | test avg. loss: 4.694
Epochs: 14958 | epoch avg. loss: 0.033 | test avg. loss: 4.500


 30%|██▉       | 14961/50000 [22:22<43:05, 13.55it/s]

Epochs: 14959 | epoch avg. loss: 0.081 | test avg. loss: 4.677
Epochs: 14960 | epoch avg. loss: 0.037 | test avg. loss: 4.695
Epochs: 14961 | epoch avg. loss: 0.026 | test avg. loss: 4.833


 30%|██▉       | 14965/50000 [22:22<42:23, 13.77it/s]

Epochs: 14962 | epoch avg. loss: 0.084 | test avg. loss: 4.826
Epochs: 14963 | epoch avg. loss: 0.036 | test avg. loss: 4.699
Epochs: 14964 | epoch avg. loss: 0.054 | test avg. loss: 4.721
Epochs: 14965 | epoch avg. loss: 0.023 | test avg. loss: 4.596


 30%|██▉       | 14969/50000 [22:22<39:11, 14.90it/s]

Epochs: 14966 | epoch avg. loss: 0.040 | test avg. loss: 4.706
Epochs: 14967 | epoch avg. loss: 0.022 | test avg. loss: 4.645
Epochs: 14968 | epoch avg. loss: 0.061 | test avg. loss: 4.592
Epochs: 14969 | epoch avg. loss: 0.049 | test avg. loss: 4.836


 30%|██▉       | 14973/50000 [22:22<37:42, 15.48it/s]

Epochs: 14970 | epoch avg. loss: 0.140 | test avg. loss: 4.539
Epochs: 14971 | epoch avg. loss: 0.131 | test avg. loss: 4.604
Epochs: 14972 | epoch avg. loss: 0.167 | test avg. loss: 5.038
Epochs: 14973 | epoch avg. loss: 0.243 | test avg. loss: 4.677


 30%|██▉       | 14977/50000 [22:23<42:13, 13.83it/s]

Epochs: 14974 | epoch avg. loss: 0.094 | test avg. loss: 4.623
Epochs: 14975 | epoch avg. loss: 0.133 | test avg. loss: 5.003
Epochs: 14976 | epoch avg. loss: 0.185 | test avg. loss: 4.812


 30%|██▉       | 14979/50000 [22:23<44:05, 13.24it/s]

Epochs: 14977 | epoch avg. loss: 0.088 | test avg. loss: 4.800
Epochs: 14978 | epoch avg. loss: 0.108 | test avg. loss: 5.025
Epochs: 14979 | epoch avg. loss: 0.133 | test avg. loss: 4.593


 30%|██▉       | 14983/50000 [22:23<42:25, 13.75it/s]

Epochs: 14980 | epoch avg. loss: 0.059 | test avg. loss: 4.455
Epochs: 14981 | epoch avg. loss: 0.171 | test avg. loss: 4.607
Epochs: 14982 | epoch avg. loss: 0.055 | test avg. loss: 4.931


 30%|██▉       | 14985/50000 [22:23<42:54, 13.60it/s]

Epochs: 14983 | epoch avg. loss: 0.050 | test avg. loss: 4.876
Epochs: 14984 | epoch avg. loss: 0.096 | test avg. loss: 4.874
Epochs: 14985 | epoch avg. loss: 0.047 | test avg. loss: 4.856


                                                     

Epochs: 14986 | epoch avg. loss: 0.070 | test avg. loss: 4.520
Epochs: 14987 | epoch avg. loss: 0.112 | test avg. loss: 4.615


 30%|██▉       | 14991/50000 [22:24<48:54, 11.93it/s]

Epochs: 14988 | epoch avg. loss: 0.078 | test avg. loss: 4.907
Epochs: 14989 | epoch avg. loss: 0.080 | test avg. loss: 4.860
Epochs: 14990 | epoch avg. loss: 0.275 | test avg. loss: 5.020


 30%|██▉       | 14993/50000 [22:24<49:54, 11.69it/s]

Epochs: 14991 | epoch avg. loss: 0.090 | test avg. loss: 4.841
Epochs: 14992 | epoch avg. loss: 0.052 | test avg. loss: 4.631
Epochs: 14993 | epoch avg. loss: 0.064 | test avg. loss: 4.985


 30%|██▉       | 14997/50000 [22:24<50:35, 11.53it/s]

Epochs: 14994 | epoch avg. loss: 0.226 | test avg. loss: 4.898
Epochs: 14995 | epoch avg. loss: 0.075 | test avg. loss: 4.826
Epochs: 14996 | epoch avg. loss: 0.106 | test avg. loss: 5.531


 30%|██▉       | 14999/50000 [22:25<52:06, 11.19it/s]

Epochs: 14997 | epoch avg. loss: 0.395 | test avg. loss: 4.408
Epochs: 14998 | epoch avg. loss: 0.105 | test avg. loss: 4.221
Epochs: 14999 | epoch avg. loss: 0.095 | test avg. loss: 4.731


 30%|███       | 15003/50000 [22:26<2:17:53,  4.23it/s]

Epochs: 15000 | epoch avg. loss: 0.108 | test avg. loss: 4.685
Epochs: 15001 | epoch avg. loss: 0.311 | test avg. loss: 4.908
Epochs: 15002 | epoch avg. loss: 0.164 | test avg. loss: 5.388


 30%|███       | 15005/50000 [22:26<1:52:08,  5.20it/s]

Epochs: 15003 | epoch avg. loss: 0.196 | test avg. loss: 4.896
Epochs: 15004 | epoch avg. loss: 0.507 | test avg. loss: 4.954
Epochs: 15005 | epoch avg. loss: 0.726 | test avg. loss: 5.490


 30%|███       | 15009/50000 [22:27<1:17:47,  7.50it/s]

Epochs: 15006 | epoch avg. loss: 0.669 | test avg. loss: 4.711
Epochs: 15007 | epoch avg. loss: 0.651 | test avg. loss: 4.821
Epochs: 15008 | epoch avg. loss: 0.201 | test avg. loss: 5.631


 30%|███       | 15013/50000 [22:27<58:05, 10.04it/s]  

Epochs: 15009 | epoch avg. loss: 0.384 | test avg. loss: 4.849
Epochs: 15010 | epoch avg. loss: 0.678 | test avg. loss: 4.296
Epochs: 15011 | epoch avg. loss: 0.188 | test avg. loss: 4.663
Epochs: 15012 | epoch avg. loss: 0.300 | test avg. loss: 4.293


 30%|███       | 15015/50000 [22:27<53:10, 10.96it/s]

Epochs: 15013 | epoch avg. loss: 0.177 | test avg. loss: 4.770
Epochs: 15014 | epoch avg. loss: 0.145 | test avg. loss: 5.226
Epochs: 15015 | epoch avg. loss: 0.182 | test avg. loss: 4.729
Epochs: 15016 | epoch avg. loss: 0.404 | test avg. loss: 4.750


 30%|███       | 15019/50000 [22:28<50:05, 11.64it/s]

Epochs: 15017 | epoch avg. loss: 0.419 | test avg. loss: 4.679
Epochs: 15018 | epoch avg. loss: 0.257 | test avg. loss: 5.129
Epochs: 15019 | epoch avg. loss: 1.031 | test avg. loss: 5.300


 30%|███       | 15023/50000 [22:28<46:48, 12.46it/s]

Epochs: 15020 | epoch avg. loss: 0.579 | test avg. loss: 5.011
Epochs: 15021 | epoch avg. loss: 0.272 | test avg. loss: 4.281
Epochs: 15022 | epoch avg. loss: 0.268 | test avg. loss: 4.491
Epochs: 15023 | epoch avg. loss: 0.267 | test avg. loss: 4.317


 30%|███       | 15027/50000 [22:28<41:06, 14.18it/s]

Epochs: 15024 | epoch avg. loss: 0.275 | test avg. loss: 4.708
Epochs: 15025 | epoch avg. loss: 0.368 | test avg. loss: 7.734
Epochs: 15026 | epoch avg. loss: 1.868 | test avg. loss: 4.994
Epochs: 15027 | epoch avg. loss: 1.604 | test avg. loss: 4.369


                                                     

Epochs: 15028 | epoch avg. loss: 0.586 | test avg. loss: 6.341
Epochs: 15029 | epoch avg. loss: 1.305 | test avg. loss: 4.487
Epochs: 15030 | epoch avg. loss: 1.301 | test avg. loss: 4.266


 30%|███       | 15033/50000 [22:28<40:52, 14.26it/s]

Epochs: 15031 | epoch avg. loss: 0.352 | test avg. loss: 6.356
Epochs: 15032 | epoch avg. loss: 0.814 | test avg. loss: 5.348
Epochs: 15033 | epoch avg. loss: 1.196 | test avg. loss: 5.075


 30%|███       | 15037/50000 [22:29<46:29, 12.53it/s]

Epochs: 15034 | epoch avg. loss: 0.800 | test avg. loss: 4.879
Epochs: 15035 | epoch avg. loss: 0.536 | test avg. loss: 4.773
Epochs: 15036 | epoch avg. loss: 0.629 | test avg. loss: 5.384


 30%|███       | 15039/50000 [22:29<45:44, 12.74it/s]

Epochs: 15037 | epoch avg. loss: 0.648 | test avg. loss: 4.797
Epochs: 15038 | epoch avg. loss: 0.237 | test avg. loss: 4.973
Epochs: 15039 | epoch avg. loss: 0.263 | test avg. loss: 5.761


 30%|███       | 15043/50000 [22:29<44:16, 13.16it/s]

Epochs: 15040 | epoch avg. loss: 0.334 | test avg. loss: 4.897
Epochs: 15041 | epoch avg. loss: 0.462 | test avg. loss: 4.796
Epochs: 15042 | epoch avg. loss: 0.255 | test avg. loss: 4.959


 30%|███       | 15045/50000 [22:29<44:30, 13.09it/s]

Epochs: 15043 | epoch avg. loss: 0.177 | test avg. loss: 4.750
Epochs: 15044 | epoch avg. loss: 0.118 | test avg. loss: 5.147
Epochs: 15045 | epoch avg. loss: 0.089 | test avg. loss: 4.919


 30%|███       | 15049/50000 [22:30<46:31, 12.52it/s]

Epochs: 15046 | epoch avg. loss: 0.191 | test avg. loss: 4.906
Epochs: 15047 | epoch avg. loss: 0.084 | test avg. loss: 5.226
Epochs: 15048 | epoch avg. loss: 0.145 | test avg. loss: 4.744


 30%|███       | 15051/50000 [22:30<46:52, 12.43it/s]

Epochs: 15049 | epoch avg. loss: 0.255 | test avg. loss: 4.849
Epochs: 15050 | epoch avg. loss: 0.166 | test avg. loss: 4.721
Epochs: 15051 | epoch avg. loss: 0.173 | test avg. loss: 4.732


 30%|███       | 15055/50000 [22:30<44:54, 12.97it/s]

Epochs: 15052 | epoch avg. loss: 0.191 | test avg. loss: 5.928
Epochs: 15053 | epoch avg. loss: 0.618 | test avg. loss: 4.664
Epochs: 15054 | epoch avg. loss: 0.451 | test avg. loss: 4.621


 30%|███       | 15057/50000 [22:30<47:20, 12.30it/s]

Epochs: 15055 | epoch avg. loss: 0.241 | test avg. loss: 4.751
Epochs: 15056 | epoch avg. loss: 0.171 | test avg. loss: 4.868
Epochs: 15057 | epoch avg. loss: 0.205 | test avg. loss: 5.522


 30%|███       | 15061/50000 [22:31<45:14, 12.87it/s]

Epochs: 15058 | epoch avg. loss: 0.310 | test avg. loss: 4.755
Epochs: 15059 | epoch avg. loss: 0.293 | test avg. loss: 4.631
Epochs: 15060 | epoch avg. loss: 0.101 | test avg. loss: 4.784
Epochs: 15061 | epoch avg. loss: 0.094 | test avg. loss: 4.671


 30%|███       | 15065/50000 [22:31<41:05, 14.17it/s]

Epochs: 15062 | epoch avg. loss: 0.146 | test avg. loss: 4.928
Epochs: 15063 | epoch avg. loss: 0.102 | test avg. loss: 4.845
Epochs: 15064 | epoch avg. loss: 0.035 | test avg. loss: 4.799
Epochs: 15065 | epoch avg. loss: 0.039 | test avg. loss: 4.843


 30%|███       | 15069/50000 [22:31<39:03, 14.90it/s]

Epochs: 15066 | epoch avg. loss: 0.042 | test avg. loss: 4.648
Epochs: 15067 | epoch avg. loss: 0.041 | test avg. loss: 4.758
Epochs: 15068 | epoch avg. loss: 0.023 | test avg. loss: 4.651
Epochs: 15069 | epoch avg. loss: 0.065 | test avg. loss: 4.705


                                                     

Epochs: 15070 | epoch avg. loss: 0.030 | test avg. loss: 4.830
Epochs: 15071 | epoch avg. loss: 0.045 | test avg. loss: 4.633


 30%|███       | 15075/50000 [22:32<47:20, 12.30it/s]

Epochs: 15072 | epoch avg. loss: 0.027 | test avg. loss: 4.662
Epochs: 15073 | epoch avg. loss: 0.022 | test avg. loss: 4.513
Epochs: 15074 | epoch avg. loss: 0.038 | test avg. loss: 4.586


 30%|███       | 15077/50000 [22:32<49:03, 11.87it/s]

Epochs: 15075 | epoch avg. loss: 0.019 | test avg. loss: 4.589
Epochs: 15076 | epoch avg. loss: 0.033 | test avg. loss: 4.676
Epochs: 15077 | epoch avg. loss: 0.020 | test avg. loss: 4.629


 30%|███       | 15081/50000 [22:32<43:35, 13.35it/s]

Epochs: 15078 | epoch avg. loss: 0.021 | test avg. loss: 4.665
Epochs: 15079 | epoch avg. loss: 0.022 | test avg. loss: 4.539
Epochs: 15080 | epoch avg. loss: 0.036 | test avg. loss: 4.569
Epochs: 15081 | epoch avg. loss: 0.025 | test avg. loss: 4.722


 30%|███       | 15085/50000 [22:32<41:13, 14.12it/s]

Epochs: 15082 | epoch avg. loss: 0.024 | test avg. loss: 4.676
Epochs: 15083 | epoch avg. loss: 0.035 | test avg. loss: 4.771
Epochs: 15084 | epoch avg. loss: 0.030 | test avg. loss: 4.720


 30%|███       | 15087/50000 [22:33<43:24, 13.40it/s]

Epochs: 15085 | epoch avg. loss: 0.035 | test avg. loss: 4.596
Epochs: 15086 | epoch avg. loss: 0.107 | test avg. loss: 4.792
Epochs: 15087 | epoch avg. loss: 0.072 | test avg. loss: 4.669


 30%|███       | 15091/50000 [22:33<40:36, 14.33it/s]

Epochs: 15088 | epoch avg. loss: 0.059 | test avg. loss: 4.587
Epochs: 15089 | epoch avg. loss: 0.129 | test avg. loss: 5.036
Epochs: 15090 | epoch avg. loss: 0.178 | test avg. loss: 4.600
Epochs: 15091 | epoch avg. loss: 0.079 | test avg. loss: 4.584


 30%|███       | 15095/50000 [22:33<38:36, 15.07it/s]

Epochs: 15092 | epoch avg. loss: 0.093 | test avg. loss: 4.952
Epochs: 15093 | epoch avg. loss: 0.117 | test avg. loss: 4.604
Epochs: 15094 | epoch avg. loss: 0.077 | test avg. loss: 4.513
Epochs: 15095 | epoch avg. loss: 0.078 | test avg. loss: 4.964


 30%|███       | 15099/50000 [22:33<38:53, 14.96it/s]

Epochs: 15096 | epoch avg. loss: 0.148 | test avg. loss: 4.425
Epochs: 15097 | epoch avg. loss: 0.095 | test avg. loss: 4.534
Epochs: 15098 | epoch avg. loss: 0.066 | test avg. loss: 4.772


 30%|███       | 15099/50000 [22:33<38:53, 14.96it/s]

Epochs: 15099 | epoch avg. loss: 0.057 | test avg. loss: 4.651


 30%|███       | 15103/50000 [22:35<2:01:23,  4.79it/s]

Epochs: 15100 | epoch avg. loss: 0.047 | test avg. loss: 4.795
Epochs: 15101 | epoch avg. loss: 0.044 | test avg. loss: 4.587
Epochs: 15102 | epoch avg. loss: 0.031 | test avg. loss: 4.508


 30%|███       | 15105/50000 [22:35<1:37:55,  5.94it/s]

Epochs: 15103 | epoch avg. loss: 0.033 | test avg. loss: 4.650
Epochs: 15104 | epoch avg. loss: 0.026 | test avg. loss: 4.567
Epochs: 15105 | epoch avg. loss: 0.072 | test avg. loss: 4.757


 30%|███       | 15109/50000 [22:35<1:08:32,  8.48it/s]

Epochs: 15106 | epoch avg. loss: 0.046 | test avg. loss: 4.723
Epochs: 15107 | epoch avg. loss: 0.044 | test avg. loss: 4.620
Epochs: 15108 | epoch avg. loss: 0.040 | test avg. loss: 4.682


 30%|███       | 15113/50000 [22:36<53:09, 10.94it/s]  

Epochs: 15109 | epoch avg. loss: 0.033 | test avg. loss: 4.518
Epochs: 15110 | epoch avg. loss: 0.047 | test avg. loss: 4.515
Epochs: 15111 | epoch avg. loss: 0.035 | test avg. loss: 4.728
Epochs: 15112 | epoch avg. loss: 0.032 | test avg. loss: 4.657


 30%|███       | 15115/50000 [22:36<49:09, 11.83it/s]

Epochs: 15113 | epoch avg. loss: 0.046 | test avg. loss: 4.788
Epochs: 15114 | epoch avg. loss: 0.026 | test avg. loss: 4.734
Epochs: 15115 | epoch avg. loss: 0.026 | test avg. loss: 4.693


 30%|███       | 15119/50000 [22:36<45:20, 12.82it/s]

Epochs: 15116 | epoch avg. loss: 0.040 | test avg. loss: 4.731
Epochs: 15117 | epoch avg. loss: 0.049 | test avg. loss: 4.577
Epochs: 15118 | epoch avg. loss: 0.190 | test avg. loss: 4.829


 30%|███       | 15121/50000 [22:36<43:27, 13.38it/s]

Epochs: 15119 | epoch avg. loss: 0.111 | test avg. loss: 4.776
Epochs: 15120 | epoch avg. loss: 0.077 | test avg. loss: 4.683
Epochs: 15121 | epoch avg. loss: 0.064 | test avg. loss: 4.829


 30%|███       | 15125/50000 [22:37<43:47, 13.27it/s]

Epochs: 15122 | epoch avg. loss: 0.055 | test avg. loss: 4.528
Epochs: 15123 | epoch avg. loss: 0.125 | test avg. loss: 4.616
Epochs: 15124 | epoch avg. loss: 0.045 | test avg. loss: 4.801


 30%|███       | 15129/50000 [22:37<41:38, 13.96it/s]

Epochs: 15125 | epoch avg. loss: 0.049 | test avg. loss: 4.753
Epochs: 15126 | epoch avg. loss: 0.022 | test avg. loss: 4.752
Epochs: 15127 | epoch avg. loss: 0.028 | test avg. loss: 4.841
Epochs: 15128 | epoch avg. loss: 0.025 | test avg. loss: 4.708


 30%|███       | 15131/50000 [22:37<43:55, 13.23it/s]

Epochs: 15129 | epoch avg. loss: 0.033 | test avg. loss: 4.858
Epochs: 15130 | epoch avg. loss: 0.063 | test avg. loss: 4.688
Epochs: 15131 | epoch avg. loss: 0.047 | test avg. loss: 4.599


 30%|███       | 15135/50000 [22:37<42:03, 13.81it/s]

Epochs: 15132 | epoch avg. loss: 0.053 | test avg. loss: 4.810
Epochs: 15133 | epoch avg. loss: 0.060 | test avg. loss: 4.681
Epochs: 15134 | epoch avg. loss: 0.030 | test avg. loss: 4.579


 30%|███       | 15137/50000 [22:37<43:13, 13.44it/s]

Epochs: 15135 | epoch avg. loss: 0.074 | test avg. loss: 4.797
Epochs: 15136 | epoch avg. loss: 0.099 | test avg. loss: 4.776
Epochs: 15137 | epoch avg. loss: 0.040 | test avg. loss: 4.745


 30%|███       | 15141/50000 [22:38<42:35, 13.64it/s]

Epochs: 15138 | epoch avg. loss: 0.055 | test avg. loss: 5.171
Epochs: 15139 | epoch avg. loss: 0.144 | test avg. loss: 4.765
Epochs: 15140 | epoch avg. loss: 0.285 | test avg. loss: 4.831


                                                     

Epochs: 15141 | epoch avg. loss: 0.191 | test avg. loss: 4.966
Epochs: 15142 | epoch avg. loss: 0.110 | test avg. loss: 4.674
Epochs: 15143 | epoch avg. loss: 0.077 | test avg. loss: 4.929


 30%|███       | 15147/50000 [22:38<41:21, 14.05it/s]

Epochs: 15144 | epoch avg. loss: 0.139 | test avg. loss: 4.826
Epochs: 15145 | epoch avg. loss: 0.054 | test avg. loss: 4.651
Epochs: 15146 | epoch avg. loss: 0.151 | test avg. loss: 4.730




Epochs: 15147 | epoch avg. loss: 0.024 | test avg. loss: 4.754
Epochs: 15148 | epoch avg. loss: 0.025 | test avg. loss: 4.701
Epochs: 15149 | epoch avg. loss: 0.023 | test avg. loss: 4.721


 30%|███       | 15153/50000 [22:39<43:32, 13.34it/s]

Epochs: 15150 | epoch avg. loss: 0.037 | test avg. loss: 4.878
Epochs: 15151 | epoch avg. loss: 0.093 | test avg. loss: 4.701
Epochs: 15152 | epoch avg. loss: 0.040 | test avg. loss: 4.650
Epochs: 15153 | epoch avg. loss: 0.089 | test avg. loss: 4.889


 30%|███       | 15157/50000 [22:39<45:17, 12.82it/s]

Epochs: 15154 | epoch avg. loss: 0.108 | test avg. loss: 4.719
Epochs: 15155 | epoch avg. loss: 0.047 | test avg. loss: 4.514
Epochs: 15156 | epoch avg. loss: 0.091 | test avg. loss: 4.546


 30%|███       | 15159/50000 [22:39<44:57, 12.92it/s]

Epochs: 15157 | epoch avg. loss: 0.037 | test avg. loss: 4.718
Epochs: 15158 | epoch avg. loss: 0.061 | test avg. loss: 4.717
Epochs: 15159 | epoch avg. loss: 0.026 | test avg. loss: 4.667


 30%|███       | 15163/50000 [22:39<44:44, 12.98it/s]

Epochs: 15160 | epoch avg. loss: 0.056 | test avg. loss: 4.934
Epochs: 15161 | epoch avg. loss: 0.110 | test avg. loss: 4.791
Epochs: 15162 | epoch avg. loss: 0.038 | test avg. loss: 4.697


 30%|███       | 15167/50000 [22:40<42:42, 13.59it/s]

Epochs: 15163 | epoch avg. loss: 0.044 | test avg. loss: 4.765
Epochs: 15164 | epoch avg. loss: 0.026 | test avg. loss: 4.694
Epochs: 15165 | epoch avg. loss: 0.021 | test avg. loss: 4.719
Epochs: 15166 | epoch avg. loss: 0.020 | test avg. loss: 4.685


 30%|███       | 15171/50000 [22:40<40:13, 14.43it/s]

Epochs: 15167 | epoch avg. loss: 0.023 | test avg. loss: 4.603
Epochs: 15168 | epoch avg. loss: 0.040 | test avg. loss: 4.806
Epochs: 15169 | epoch avg. loss: 0.075 | test avg. loss: 4.948
Epochs: 15170 | epoch avg. loss: 0.071 | test avg. loss: 4.644


 30%|███       | 15173/50000 [22:40<39:27, 14.71it/s]

Epochs: 15171 | epoch avg. loss: 0.213 | test avg. loss: 4.812
Epochs: 15172 | epoch avg. loss: 0.200 | test avg. loss: 4.839
Epochs: 15173 | epoch avg. loss: 0.134 | test avg. loss: 4.570


 30%|███       | 15177/50000 [22:40<39:35, 14.66it/s]

Epochs: 15174 | epoch avg. loss: 0.283 | test avg. loss: 4.796
Epochs: 15175 | epoch avg. loss: 0.212 | test avg. loss: 5.032
Epochs: 15176 | epoch avg. loss: 0.161 | test avg. loss: 4.662


 30%|███       | 15179/50000 [22:41<43:30, 13.34it/s]

Epochs: 15177 | epoch avg. loss: 0.184 | test avg. loss: 4.768
Epochs: 15178 | epoch avg. loss: 0.158 | test avg. loss: 4.862
Epochs: 15179 | epoch avg. loss: 0.130 | test avg. loss: 4.641


 30%|███       | 15183/50000 [22:41<45:47, 12.67it/s]

Epochs: 15180 | epoch avg. loss: 0.374 | test avg. loss: 5.212
Epochs: 15181 | epoch avg. loss: 0.496 | test avg. loss: 4.977
Epochs: 15182 | epoch avg. loss: 0.209 | test avg. loss: 5.268


 30%|███       | 15187/50000 [22:41<41:03, 14.13it/s]

Epochs: 15183 | epoch avg. loss: 0.758 | test avg. loss: 5.467
Epochs: 15184 | epoch avg. loss: 0.470 | test avg. loss: 5.172
Epochs: 15185 | epoch avg. loss: 0.205 | test avg. loss: 5.017
Epochs: 15186 | epoch avg. loss: 0.283 | test avg. loss: 5.179


 30%|███       | 15189/50000 [22:41<41:05, 14.12it/s]

Epochs: 15187 | epoch avg. loss: 0.134 | test avg. loss: 4.684
Epochs: 15188 | epoch avg. loss: 0.113 | test avg. loss: 4.787
Epochs: 15189 | epoch avg. loss: 0.077 | test avg. loss: 5.071


 30%|███       | 15193/50000 [22:42<43:18, 13.39it/s]

Epochs: 15190 | epoch avg. loss: 0.089 | test avg. loss: 5.016
Epochs: 15191 | epoch avg. loss: 0.368 | test avg. loss: 5.258
Epochs: 15192 | epoch avg. loss: 0.227 | test avg. loss: 4.794


                                                     

Epochs: 15193 | epoch avg. loss: 0.136 | test avg. loss: 4.653
Epochs: 15194 | epoch avg. loss: 0.191 | test avg. loss: 5.696
Epochs: 15195 | epoch avg. loss: 0.540 | test avg. loss: 4.678


 30%|███       | 15199/50000 [22:42<40:55, 14.17it/s]

Epochs: 15196 | epoch avg. loss: 0.349 | test avg. loss: 4.699
Epochs: 15197 | epoch avg. loss: 0.202 | test avg. loss: 5.673
Epochs: 15198 | epoch avg. loss: 0.467 | test avg. loss: 4.683


 30%|███       | 15199/50000 [22:42<40:55, 14.17it/s]

Epochs: 15199 | epoch avg. loss: 0.410 | test avg. loss: 5.132


 30%|███       | 15203/50000 [22:44<2:27:01,  3.94it/s]

Epochs: 15200 | epoch avg. loss: 0.761 | test avg. loss: 4.693
Epochs: 15201 | epoch avg. loss: 0.332 | test avg. loss: 4.908
Epochs: 15202 | epoch avg. loss: 0.698 | test avg. loss: 5.567


 30%|███       | 15205/50000 [22:44<1:54:39,  5.06it/s]

Epochs: 15203 | epoch avg. loss: 0.579 | test avg. loss: 4.494
Epochs: 15204 | epoch avg. loss: 0.302 | test avg. loss: 4.322
Epochs: 15205 | epoch avg. loss: 0.242 | test avg. loss: 4.510


 30%|███       | 15209/50000 [22:44<1:19:28,  7.30it/s]

Epochs: 15206 | epoch avg. loss: 0.268 | test avg. loss: 4.465
Epochs: 15207 | epoch avg. loss: 0.314 | test avg. loss: 4.773
Epochs: 15208 | epoch avg. loss: 0.125 | test avg. loss: 4.609


 30%|███       | 15211/50000 [22:45<1:07:26,  8.60it/s]

Epochs: 15209 | epoch avg. loss: 0.152 | test avg. loss: 4.597
Epochs: 15210 | epoch avg. loss: 0.150 | test avg. loss: 5.167
Epochs: 15211 | epoch avg. loss: 0.311 | test avg. loss: 4.431
Epochs: 15212 | epoch avg. loss: 0.089 | test avg. loss: 4.393


 30%|███       | 15215/50000 [22:45<55:41, 10.41it/s]

Epochs: 15213 | epoch avg. loss: 0.071 | test avg. loss: 4.821
Epochs: 15214 | epoch avg. loss: 0.117 | test avg. loss: 4.618
Epochs: 15215 | epoch avg. loss: 0.193 | test avg. loss: 5.025


 30%|███       | 15219/50000 [22:45<49:57, 11.60it/s]

Epochs: 15216 | epoch avg. loss: 0.222 | test avg. loss: 5.040
Epochs: 15217 | epoch avg. loss: 0.144 | test avg. loss: 4.404
Epochs: 15218 | epoch avg. loss: 0.076 | test avg. loss: 4.434


 30%|███       | 15221/50000 [22:45<48:08, 12.04it/s]

Epochs: 15219 | epoch avg. loss: 0.050 | test avg. loss: 4.244
Epochs: 15220 | epoch avg. loss: 0.033 | test avg. loss: 4.647
Epochs: 15221 | epoch avg. loss: 0.187 | test avg. loss: 4.510


 30%|███       | 15225/50000 [22:46<49:32, 11.70it/s]

Epochs: 15222 | epoch avg. loss: 0.069 | test avg. loss: 4.432
Epochs: 15223 | epoch avg. loss: 0.080 | test avg. loss: 4.692
Epochs: 15224 | epoch avg. loss: 0.112 | test avg. loss: 4.290


 30%|███       | 15227/50000 [22:46<50:33, 11.46it/s]

Epochs: 15225 | epoch avg. loss: 0.078 | test avg. loss: 4.237
Epochs: 15226 | epoch avg. loss: 0.076 | test avg. loss: 4.677
Epochs: 15227 | epoch avg. loss: 0.114 | test avg. loss: 4.404


 30%|███       | 15231/50000 [22:46<43:40, 13.27it/s]

Epochs: 15228 | epoch avg. loss: 0.046 | test avg. loss: 4.559
Epochs: 15229 | epoch avg. loss: 0.042 | test avg. loss: 4.810
Epochs: 15230 | epoch avg. loss: 0.058 | test avg. loss: 4.572
Epochs: 15231 | epoch avg. loss: 0.071 | test avg. loss: 4.616


 30%|███       | 15235/50000 [22:46<41:23, 14.00it/s]

Epochs: 15232 | epoch avg. loss: 0.027 | test avg. loss: 4.631
Epochs: 15233 | epoch avg. loss: 0.026 | test avg. loss: 4.510
Epochs: 15234 | epoch avg. loss: 0.031 | test avg. loss: 4.521


 30%|███       | 15239/50000 [22:47<40:07, 14.44it/s]

Epochs: 15235 | epoch avg. loss: 0.033 | test avg. loss: 4.580
Epochs: 15236 | epoch avg. loss: 0.031 | test avg. loss: 4.668
Epochs: 15237 | epoch avg. loss: 0.021 | test avg. loss: 4.756
Epochs: 15238 | epoch avg. loss: 0.021 | test avg. loss: 4.750


 30%|███       | 15241/50000 [22:47<41:38, 13.91it/s]

Epochs: 15239 | epoch avg. loss: 0.023 | test avg. loss: 4.632
Epochs: 15240 | epoch avg. loss: 0.071 | test avg. loss: 4.757
Epochs: 15241 | epoch avg. loss: 0.059 | test avg. loss: 4.795


 30%|███       | 15245/50000 [22:47<42:00, 13.79it/s]

Epochs: 15242 | epoch avg. loss: 0.041 | test avg. loss: 4.683
Epochs: 15243 | epoch avg. loss: 0.063 | test avg. loss: 5.133
Epochs: 15244 | epoch avg. loss: 0.200 | test avg. loss: 4.772


 30%|███       | 15249/50000 [22:47<39:31, 14.65it/s]

Epochs: 15245 | epoch avg. loss: 0.195 | test avg. loss: 4.828
Epochs: 15246 | epoch avg. loss: 0.199 | test avg. loss: 5.067
Epochs: 15247 | epoch avg. loss: 0.201 | test avg. loss: 4.868
Epochs: 15248 | epoch avg. loss: 0.521 | test avg. loss: 5.070




Epochs: 15249 | epoch avg. loss: 0.163 | test avg. loss: 4.874
Epochs: 15250 | epoch avg. loss: 0.199 | test avg. loss: 4.995
Epochs: 15251 | epoch avg. loss: 0.264 | test avg. loss: 5.220


 31%|███       | 15255/50000 [22:48<42:48, 13.53it/s]

Epochs: 15252 | epoch avg. loss: 0.159 | test avg. loss: 4.737
Epochs: 15253 | epoch avg. loss: 0.116 | test avg. loss: 4.642
Epochs: 15254 | epoch avg. loss: 0.100 | test avg. loss: 5.147


 31%|███       | 15257/50000 [22:48<43:56, 13.18it/s]

Epochs: 15255 | epoch avg. loss: 0.167 | test avg. loss: 4.810
Epochs: 15256 | epoch avg. loss: 0.328 | test avg. loss: 5.144
Epochs: 15257 | epoch avg. loss: 0.202 | test avg. loss: 4.920


 31%|███       | 15261/50000 [22:48<41:31, 13.94it/s]

Epochs: 15258 | epoch avg. loss: 0.126 | test avg. loss: 4.814
Epochs: 15259 | epoch avg. loss: 0.113 | test avg. loss: 5.200
Epochs: 15260 | epoch avg. loss: 0.270 | test avg. loss: 4.497


 31%|███       | 15263/50000 [22:48<43:45, 13.23it/s]

Epochs: 15261 | epoch avg. loss: 0.095 | test avg. loss: 4.698
Epochs: 15262 | epoch avg. loss: 0.226 | test avg. loss: 5.128
Epochs: 15263 | epoch avg. loss: 0.235 | test avg. loss: 4.695


 31%|███       | 15267/50000 [22:49<47:05, 12.29it/s]

Epochs: 15264 | epoch avg. loss: 0.280 | test avg. loss: 4.911
Epochs: 15265 | epoch avg. loss: 0.203 | test avg. loss: 5.093
Epochs: 15266 | epoch avg. loss: 0.170 | test avg. loss: 4.842


 31%|███       | 15269/50000 [22:49<45:09, 12.82it/s]

Epochs: 15267 | epoch avg. loss: 0.472 | test avg. loss: 5.286
Epochs: 15268 | epoch avg. loss: 0.413 | test avg. loss: 4.739
Epochs: 15269 | epoch avg. loss: 0.160 | test avg. loss: 4.601


 31%|███       | 15273/50000 [22:49<42:39, 13.57it/s]

Epochs: 15270 | epoch avg. loss: 0.316 | test avg. loss: 5.086
Epochs: 15271 | epoch avg. loss: 0.259 | test avg. loss: 4.709
Epochs: 15272 | epoch avg. loss: 0.376 | test avg. loss: 4.716




Epochs: 15273 | epoch avg. loss: 0.218 | test avg. loss: 5.207
Epochs: 15274 | epoch avg. loss: 0.390 | test avg. loss: 4.344
Epochs: 15275 | epoch avg. loss: 0.169 | test avg. loss: 4.460


 31%|███       | 15279/50000 [22:50<48:13, 12.00it/s]

Epochs: 15276 | epoch avg. loss: 0.126 | test avg. loss: 4.710
Epochs: 15277 | epoch avg. loss: 0.103 | test avg. loss: 4.502
Epochs: 15278 | epoch avg. loss: 0.079 | test avg. loss: 4.669


 31%|███       | 15281/50000 [22:50<46:57, 12.32it/s]

Epochs: 15279 | epoch avg. loss: 0.072 | test avg. loss: 4.592
Epochs: 15280 | epoch avg. loss: 0.053 | test avg. loss: 4.382
Epochs: 15281 | epoch avg. loss: 0.123 | test avg. loss: 4.690


 31%|███       | 15285/50000 [22:50<44:46, 12.92it/s]

Epochs: 15282 | epoch avg. loss: 0.089 | test avg. loss: 4.687
Epochs: 15283 | epoch avg. loss: 0.085 | test avg. loss: 4.762
Epochs: 15284 | epoch avg. loss: 0.078 | test avg. loss: 5.184


 31%|███       | 15287/50000 [22:50<43:17, 13.36it/s]

Epochs: 15285 | epoch avg. loss: 0.116 | test avg. loss: 4.726
Epochs: 15286 | epoch avg. loss: 0.339 | test avg. loss: 4.824
Epochs: 15287 | epoch avg. loss: 0.254 | test avg. loss: 4.674


 31%|███       | 15291/50000 [22:51<47:34, 12.16it/s]

Epochs: 15288 | epoch avg. loss: 0.113 | test avg. loss: 4.700
Epochs: 15289 | epoch avg. loss: 0.102 | test avg. loss: 5.306
Epochs: 15290 | epoch avg. loss: 0.182 | test avg. loss: 4.858


 31%|███       | 15295/50000 [22:51<42:16, 13.68it/s]

Epochs: 15291 | epoch avg. loss: 0.230 | test avg. loss: 4.976
Epochs: 15292 | epoch avg. loss: 0.187 | test avg. loss: 4.780
Epochs: 15293 | epoch avg. loss: 0.137 | test avg. loss: 4.548
Epochs: 15294 | epoch avg. loss: 0.223 | test avg. loss: 5.158


 31%|███       | 15297/50000 [22:51<43:26, 13.32it/s]

Epochs: 15295 | epoch avg. loss: 0.362 | test avg. loss: 4.760
Epochs: 15296 | epoch avg. loss: 0.205 | test avg. loss: 4.983
Epochs: 15297 | epoch avg. loss: 0.223 | test avg. loss: 5.909


 31%|███       | 15299/50000 [22:51<45:25, 12.73it/s]

Epochs: 15298 | epoch avg. loss: 0.467 | test avg. loss: 5.050
Epochs: 15299 | epoch avg. loss: 0.549 | test avg. loss: 4.795


 31%|███       | 15303/50000 [22:53<2:05:44,  4.60it/s]

Epochs: 15300 | epoch avg. loss: 0.285 | test avg. loss: 5.343
Epochs: 15301 | epoch avg. loss: 0.380 | test avg. loss: 4.849
Epochs: 15302 | epoch avg. loss: 0.976 | test avg. loss: 5.000
Epochs: 15303 | epoch avg. loss: 0.389 | test avg. loss: 4.792


 31%|███       | 15307/50000 [22:53<1:19:56,  7.23it/s]

Epochs: 15304 | epoch avg. loss: 0.253 | test avg. loss: 4.901
Epochs: 15305 | epoch avg. loss: 0.272 | test avg. loss: 5.676
Epochs: 15306 | epoch avg. loss: 0.362 | test avg. loss: 4.652
Epochs: 15307 | epoch avg. loss: 0.418 | test avg. loss: 4.542


 31%|███       | 15311/50000 [22:53<59:47,  9.67it/s]  

Epochs: 15308 | epoch avg. loss: 0.174 | test avg. loss: 5.028
Epochs: 15309 | epoch avg. loss: 0.247 | test avg. loss: 4.601
Epochs: 15310 | epoch avg. loss: 0.436 | test avg. loss: 5.017


                                                     

Epochs: 15311 | epoch avg. loss: 0.504 | test avg. loss: 4.627
Epochs: 15312 | epoch avg. loss: 0.273 | test avg. loss: 4.933
Epochs: 15313 | epoch avg. loss: 0.593 | test avg. loss: 5.697


                                                     

Epochs: 15314 | epoch avg. loss: 0.769 | test avg. loss: 4.646
Epochs: 15315 | epoch avg. loss: 0.482 | test avg. loss: 4.898
Epochs: 15316 | epoch avg. loss: 0.759 | test avg. loss: 4.872




Epochs: 15317 | epoch avg. loss: 0.687 | test avg. loss: 5.947
Epochs: 15318 | epoch avg. loss: 1.632 | test avg. loss: 7.429
Epochs: 15319 | epoch avg. loss: 1.388 | test avg. loss: 5.698


 31%|███       | 15323/50000 [22:54<41:06, 14.06it/s]

Epochs: 15320 | epoch avg. loss: 1.818 | test avg. loss: 5.139
Epochs: 15321 | epoch avg. loss: 1.357 | test avg. loss: 4.747
Epochs: 15322 | epoch avg. loss: 0.986 | test avg. loss: 5.075
Epochs: 15323 | epoch avg. loss: 1.294 | test avg. loss: 6.463


 31%|███       | 15327/50000 [22:55<43:30, 13.28it/s]

Epochs: 15324 | epoch avg. loss: 1.622 | test avg. loss: 4.919
Epochs: 15325 | epoch avg. loss: 1.645 | test avg. loss: 5.318
Epochs: 15326 | epoch avg. loss: 1.200 | test avg. loss: 4.460


 31%|███       | 15331/50000 [22:55<39:50, 14.50it/s]

Epochs: 15327 | epoch avg. loss: 0.620 | test avg. loss: 4.296
Epochs: 15328 | epoch avg. loss: 0.548 | test avg. loss: 5.863
Epochs: 15329 | epoch avg. loss: 0.874 | test avg. loss: 4.588
Epochs: 15330 | epoch avg. loss: 0.502 | test avg. loss: 4.947


 31%|███       | 15335/50000 [22:55<37:49, 15.27it/s]

Epochs: 15331 | epoch avg. loss: 0.254 | test avg. loss: 4.332
Epochs: 15332 | epoch avg. loss: 0.175 | test avg. loss: 4.388
Epochs: 15333 | epoch avg. loss: 0.188 | test avg. loss: 4.456
Epochs: 15334 | epoch avg. loss: 0.135 | test avg. loss: 4.433


 31%|███       | 15337/50000 [22:55<37:16, 15.50it/s]

Epochs: 15335 | epoch avg. loss: 0.096 | test avg. loss: 4.234
Epochs: 15336 | epoch avg. loss: 0.181 | test avg. loss: 4.191
Epochs: 15337 | epoch avg. loss: 0.076 | test avg. loss: 4.428


 31%|███       | 15341/50000 [22:55<44:03, 13.11it/s]

Epochs: 15338 | epoch avg. loss: 0.105 | test avg. loss: 4.286
Epochs: 15339 | epoch avg. loss: 0.084 | test avg. loss: 4.554
Epochs: 15340 | epoch avg. loss: 0.053 | test avg. loss: 4.514


 31%|███       | 15345/50000 [22:56<40:40, 14.20it/s]

Epochs: 15341 | epoch avg. loss: 0.046 | test avg. loss: 4.566
Epochs: 15342 | epoch avg. loss: 0.052 | test avg. loss: 4.453
Epochs: 15343 | epoch avg. loss: 0.056 | test avg. loss: 4.230
Epochs: 15344 | epoch avg. loss: 0.096 | test avg. loss: 4.718


 31%|███       | 15347/50000 [22:56<39:48, 14.51it/s]

Epochs: 15345 | epoch avg. loss: 0.174 | test avg. loss: 4.313
Epochs: 15346 | epoch avg. loss: 0.142 | test avg. loss: 4.650
Epochs: 15347 | epoch avg. loss: 0.115 | test avg. loss: 4.574
Epochs: 15348 | epoch avg. loss: 0.144 | test avg. loss: 4.671


 31%|███       | 15351/50000 [22:56<39:52, 14.48it/s]

Epochs: 15349 | epoch avg. loss: 0.077 | test avg. loss: 4.684
Epochs: 15350 | epoch avg. loss: 0.060 | test avg. loss: 4.441
Epochs: 15351 | epoch avg. loss: 0.053 | test avg. loss: 4.535
Epochs: 15352 | epoch avg. loss: 0.052 | test avg. loss: 4.307


 31%|███       | 15355/50000 [22:57<43:29, 13.27it/s]

Epochs: 15353 | epoch avg. loss: 0.086 | test avg. loss: 4.485
Epochs: 15354 | epoch avg. loss: 0.042 | test avg. loss: 4.519
Epochs: 15355 | epoch avg. loss: 0.021 | test avg. loss: 4.598


 31%|███       | 15359/50000 [22:57<43:54, 13.15it/s]

Epochs: 15356 | epoch avg. loss: 0.025 | test avg. loss: 4.528
Epochs: 15357 | epoch avg. loss: 0.027 | test avg. loss: 4.528
Epochs: 15358 | epoch avg. loss: 0.027 | test avg. loss: 4.498


                                                     

Epochs: 15359 | epoch avg. loss: 0.024 | test avg. loss: 4.455
Epochs: 15360 | epoch avg. loss: 0.026 | test avg. loss: 4.645
Epochs: 15361 | epoch avg. loss: 0.035 | test avg. loss: 4.541


 31%|███       | 15365/50000 [22:57<41:42, 13.84it/s]

Epochs: 15362 | epoch avg. loss: 0.075 | test avg. loss: 4.688
Epochs: 15363 | epoch avg. loss: 0.032 | test avg. loss: 4.794
Epochs: 15364 | epoch avg. loss: 0.031 | test avg. loss: 4.667


 31%|███       | 15367/50000 [22:57<41:46, 13.81it/s]

Epochs: 15365 | epoch avg. loss: 0.035 | test avg. loss: 4.724
Epochs: 15366 | epoch avg. loss: 0.024 | test avg. loss: 4.585
Epochs: 15367 | epoch avg. loss: 0.032 | test avg. loss: 4.592


 31%|███       | 15371/50000 [22:58<44:41, 12.92it/s]

Epochs: 15368 | epoch avg. loss: 0.024 | test avg. loss: 4.640
Epochs: 15369 | epoch avg. loss: 0.028 | test avg. loss: 4.648
Epochs: 15370 | epoch avg. loss: 0.039 | test avg. loss: 4.809
Epochs: 15371 | epoch avg. loss: 0.021 | test avg. loss: 4.742


 31%|███       | 15375/50000 [22:58<41:15, 13.99it/s]

Epochs: 15372 | epoch avg. loss: 0.030 | test avg. loss: 4.738
Epochs: 15373 | epoch avg. loss: 0.022 | test avg. loss: 4.774
Epochs: 15374 | epoch avg. loss: 0.028 | test avg. loss: 4.635
Epochs: 15375 | epoch avg. loss: 0.028 | test avg. loss: 4.735


                                                     

Epochs: 15376 | epoch avg. loss: 0.023 | test avg. loss: 4.779
Epochs: 15377 | epoch avg. loss: 0.020 | test avg. loss: 4.755
Epochs: 15378 | epoch avg. loss: 0.029 | test avg. loss: 4.826


 31%|███       | 15381/50000 [22:58<44:12, 13.05it/s]

Epochs: 15379 | epoch avg. loss: 0.030 | test avg. loss: 4.854
Epochs: 15380 | epoch avg. loss: 0.029 | test avg. loss: 4.686


 31%|███       | 15383/50000 [22:59<46:24, 12.43it/s]

Epochs: 15381 | epoch avg. loss: 0.033 | test avg. loss: 4.769
Epochs: 15382 | epoch avg. loss: 0.034 | test avg. loss: 4.751
Epochs: 15383 | epoch avg. loss: 0.034 | test avg. loss: 4.695


 31%|███       | 15387/50000 [22:59<42:27, 13.59it/s]

Epochs: 15384 | epoch avg. loss: 0.063 | test avg. loss: 4.981
Epochs: 15385 | epoch avg. loss: 0.057 | test avg. loss: 4.739
Epochs: 15386 | epoch avg. loss: 0.052 | test avg. loss: 4.872
Epochs: 15387 | epoch avg. loss: 0.033 | test avg. loss: 4.802


 31%|███       | 15391/50000 [22:59<39:58, 14.43it/s]

Epochs: 15388 | epoch avg. loss: 0.022 | test avg. loss: 4.747
Epochs: 15389 | epoch avg. loss: 0.020 | test avg. loss: 4.748
Epochs: 15390 | epoch avg. loss: 0.020 | test avg. loss: 4.713
Epochs: 15391 | epoch avg. loss: 0.023 | test avg. loss: 4.933


 31%|███       | 15395/50000 [22:59<41:27, 13.91it/s]

Epochs: 15392 | epoch avg. loss: 0.064 | test avg. loss: 4.777
Epochs: 15393 | epoch avg. loss: 0.030 | test avg. loss: 4.798
Epochs: 15394 | epoch avg. loss: 0.022 | test avg. loss: 4.829


 31%|███       | 15397/50000 [23:00<42:56, 13.43it/s]

Epochs: 15395 | epoch avg. loss: 0.022 | test avg. loss: 4.757
Epochs: 15396 | epoch avg. loss: 0.017 | test avg. loss: 4.716
Epochs: 15397 | epoch avg. loss: 0.017 | test avg. loss: 4.692


 31%|███       | 15399/50000 [23:00<44:10, 13.06it/s]

Epochs: 15398 | epoch avg. loss: 0.018 | test avg. loss: 4.742
Epochs: 15399 | epoch avg. loss: 0.019 | test avg. loss: 4.804


 31%|███       | 15403/50000 [23:01<2:03:55,  4.65it/s]

Epochs: 15400 | epoch avg. loss: 0.020 | test avg. loss: 4.729
Epochs: 15401 | epoch avg. loss: 0.045 | test avg. loss: 4.902
Epochs: 15402 | epoch avg. loss: 0.034 | test avg. loss: 4.788


 31%|███       | 15405/50000 [23:02<1:39:44,  5.78it/s]

Epochs: 15403 | epoch avg. loss: 0.022 | test avg. loss: 4.818
Epochs: 15404 | epoch avg. loss: 0.032 | test avg. loss: 4.795
Epochs: 15405 | epoch avg. loss: 0.025 | test avg. loss: 4.705


 31%|███       | 15409/50000 [23:02<1:11:22,  8.08it/s]

Epochs: 15406 | epoch avg. loss: 0.026 | test avg. loss: 4.837
Epochs: 15407 | epoch avg. loss: 0.027 | test avg. loss: 4.697
Epochs: 15408 | epoch avg. loss: 0.056 | test avg. loss: 4.772


 31%|███       | 15411/50000 [23:02<1:03:34,  9.07it/s]

Epochs: 15409 | epoch avg. loss: 0.030 | test avg. loss: 4.794
Epochs: 15410 | epoch avg. loss: 0.030 | test avg. loss: 4.695
Epochs: 15411 | epoch avg. loss: 0.044 | test avg. loss: 4.970


 31%|███       | 15415/50000 [23:02<51:06, 11.28it/s]

Epochs: 15412 | epoch avg. loss: 0.067 | test avg. loss: 4.670
Epochs: 15413 | epoch avg. loss: 0.097 | test avg. loss: 4.822
Epochs: 15414 | epoch avg. loss: 0.090 | test avg. loss: 4.784


 31%|███       | 15417/50000 [23:03<49:50, 11.56it/s]

Epochs: 15415 | epoch avg. loss: 0.049 | test avg. loss: 4.707
Epochs: 15416 | epoch avg. loss: 0.063 | test avg. loss: 5.188
Epochs: 15417 | epoch avg. loss: 0.169 | test avg. loss: 4.755


 31%|███       | 15421/50000 [23:03<49:36, 11.62it/s]

Epochs: 15418 | epoch avg. loss: 0.081 | test avg. loss: 4.826
Epochs: 15419 | epoch avg. loss: 0.071 | test avg. loss: 5.059
Epochs: 15420 | epoch avg. loss: 0.107 | test avg. loss: 4.774


 31%|███       | 15423/50000 [23:03<48:49, 11.80it/s]

Epochs: 15421 | epoch avg. loss: 0.164 | test avg. loss: 5.222
Epochs: 15422 | epoch avg. loss: 0.313 | test avg. loss: 4.777
Epochs: 15423 | epoch avg. loss: 0.250 | test avg. loss: 4.882


 31%|███       | 15427/50000 [23:03<43:13, 13.33it/s]

Epochs: 15424 | epoch avg. loss: 0.132 | test avg. loss: 5.153
Epochs: 15425 | epoch avg. loss: 0.072 | test avg. loss: 4.852
Epochs: 15426 | epoch avg. loss: 0.049 | test avg. loss: 5.096
Epochs: 15427 | epoch avg. loss: 0.141 | test avg. loss: 4.678


 31%|███       | 15431/50000 [23:03<43:15, 13.32it/s]

Epochs: 15428 | epoch avg. loss: 0.132 | test avg. loss: 4.724
Epochs: 15429 | epoch avg. loss: 0.100 | test avg. loss: 5.234
Epochs: 15430 | epoch avg. loss: 0.171 | test avg. loss: 4.846


 31%|███       | 15435/50000 [23:04<41:10, 13.99it/s]

Epochs: 15431 | epoch avg. loss: 0.117 | test avg. loss: 5.175
Epochs: 15432 | epoch avg. loss: 0.194 | test avg. loss: 4.797
Epochs: 15433 | epoch avg. loss: 0.253 | test avg. loss: 4.769
Epochs: 15434 | epoch avg. loss: 0.081 | test avg. loss: 4.808


 31%|███       | 15437/50000 [23:04<39:38, 14.53it/s]

Epochs: 15435 | epoch avg. loss: 0.062 | test avg. loss: 4.601
Epochs: 15436 | epoch avg. loss: 0.065 | test avg. loss: 4.859
Epochs: 15437 | epoch avg. loss: 0.077 | test avg. loss: 4.633
Epochs: 15438 | epoch avg. loss: 0.073 | test avg. loss: 4.771


 31%|███       | 15441/50000 [23:04<41:33, 13.86it/s]

Epochs: 15439 | epoch avg. loss: 0.036 | test avg. loss: 4.855
Epochs: 15440 | epoch avg. loss: 0.032 | test avg. loss: 4.774
Epochs: 15441 | epoch avg. loss: 0.044 | test avg. loss: 5.089


 31%|███       | 15445/50000 [23:05<45:49, 12.57it/s]

Epochs: 15442 | epoch avg. loss: 0.106 | test avg. loss: 4.639
Epochs: 15443 | epoch avg. loss: 0.097 | test avg. loss: 4.814
Epochs: 15444 | epoch avg. loss: 0.111 | test avg. loss: 4.657


 31%|███       | 15447/50000 [23:05<45:50, 12.56it/s]

Epochs: 15445 | epoch avg. loss: 0.130 | test avg. loss: 4.879
Epochs: 15446 | epoch avg. loss: 0.281 | test avg. loss: 5.556
Epochs: 15447 | epoch avg. loss: 0.313 | test avg. loss: 4.907


 31%|███       | 15451/50000 [23:05<47:15, 12.18it/s]

Epochs: 15448 | epoch avg. loss: 0.374 | test avg. loss: 4.914
Epochs: 15449 | epoch avg. loss: 0.182 | test avg. loss: 4.625
Epochs: 15450 | epoch avg. loss: 0.342 | test avg. loss: 4.640


 31%|███       | 15453/50000 [23:05<48:03, 11.98it/s]

Epochs: 15451 | epoch avg. loss: 0.300 | test avg. loss: 5.360
Epochs: 15452 | epoch avg. loss: 0.342 | test avg. loss: 4.872
Epochs: 15453 | epoch avg. loss: 0.506 | test avg. loss: 5.104


 31%|███       | 15457/50000 [23:06<51:13, 11.24it/s]

Epochs: 15454 | epoch avg. loss: 0.314 | test avg. loss: 4.838
Epochs: 15455 | epoch avg. loss: 0.324 | test avg. loss: 4.963
Epochs: 15456 | epoch avg. loss: 0.279 | test avg. loss: 5.301


 31%|███       | 15459/50000 [23:06<49:05, 11.73it/s]

Epochs: 15457 | epoch avg. loss: 0.157 | test avg. loss: 4.774
Epochs: 15458 | epoch avg. loss: 0.239 | test avg. loss: 4.777
Epochs: 15459 | epoch avg. loss: 0.171 | test avg. loss: 4.983


 31%|███       | 15463/50000 [23:06<50:00, 11.51it/s]

Epochs: 15460 | epoch avg. loss: 0.194 | test avg. loss: 4.837
Epochs: 15461 | epoch avg. loss: 0.347 | test avg. loss: 5.303
Epochs: 15462 | epoch avg. loss: 0.481 | test avg. loss: 4.733


 31%|███       | 15465/50000 [23:06<48:26, 11.88it/s]

Epochs: 15463 | epoch avg. loss: 0.216 | test avg. loss: 4.652
Epochs: 15464 | epoch avg. loss: 0.119 | test avg. loss: 4.713
Epochs: 15465 | epoch avg. loss: 0.124 | test avg. loss: 4.456


 31%|███       | 15469/50000 [23:07<48:26, 11.88it/s]

Epochs: 15466 | epoch avg. loss: 0.154 | test avg. loss: 5.114
Epochs: 15467 | epoch avg. loss: 0.242 | test avg. loss: 4.698
Epochs: 15468 | epoch avg. loss: 0.365 | test avg. loss: 5.367


 31%|███       | 15471/50000 [23:07<47:42, 12.06it/s]

Epochs: 15469 | epoch avg. loss: 0.517 | test avg. loss: 5.141
Epochs: 15470 | epoch avg. loss: 0.217 | test avg. loss: 4.822
Epochs: 15471 | epoch avg. loss: 0.302 | test avg. loss: 5.359


 31%|███       | 15475/50000 [23:07<46:54, 12.27it/s]

Epochs: 15472 | epoch avg. loss: 0.429 | test avg. loss: 4.555
Epochs: 15473 | epoch avg. loss: 0.384 | test avg. loss: 4.640
Epochs: 15474 | epoch avg. loss: 0.152 | test avg. loss: 5.104


 31%|███       | 15477/50000 [23:07<47:57, 12.00it/s]

Epochs: 15475 | epoch avg. loss: 0.157 | test avg. loss: 4.952
Epochs: 15476 | epoch avg. loss: 0.253 | test avg. loss: 6.022
Epochs: 15477 | epoch avg. loss: 0.535 | test avg. loss: 4.749


 31%|███       | 15481/50000 [23:08<49:23, 11.65it/s]

Epochs: 15478 | epoch avg. loss: 0.409 | test avg. loss: 4.608
Epochs: 15479 | epoch avg. loss: 0.157 | test avg. loss: 5.317
Epochs: 15480 | epoch avg. loss: 0.338 | test avg. loss: 4.635


 31%|███       | 15483/50000 [23:08<48:56, 11.76it/s]

Epochs: 15481 | epoch avg. loss: 0.423 | test avg. loss: 4.805
Epochs: 15482 | epoch avg. loss: 0.083 | test avg. loss: 5.081
Epochs: 15483 | epoch avg. loss: 0.103 | test avg. loss: 4.890


 31%|███       | 15487/50000 [23:08<48:35, 11.84it/s]

Epochs: 15484 | epoch avg. loss: 0.215 | test avg. loss: 4.947
Epochs: 15485 | epoch avg. loss: 0.108 | test avg. loss: 4.747
Epochs: 15486 | epoch avg. loss: 0.060 | test avg. loss: 4.628


 31%|███       | 15489/50000 [23:08<46:15, 12.43it/s]

Epochs: 15487 | epoch avg. loss: 0.080 | test avg. loss: 4.867
Epochs: 15488 | epoch avg. loss: 0.039 | test avg. loss: 4.804
Epochs: 15489 | epoch avg. loss: 0.037 | test avg. loss: 4.962


 31%|███       | 15493/50000 [23:09<48:41, 11.81it/s]

Epochs: 15490 | epoch avg. loss: 0.033 | test avg. loss: 4.920
Epochs: 15491 | epoch avg. loss: 0.027 | test avg. loss: 4.818
Epochs: 15492 | epoch avg. loss: 0.026 | test avg. loss: 4.838


 31%|███       | 15495/50000 [23:09<48:47, 11.79it/s]

Epochs: 15493 | epoch avg. loss: 0.028 | test avg. loss: 4.718
Epochs: 15494 | epoch avg. loss: 0.022 | test avg. loss: 5.045
Epochs: 15495 | epoch avg. loss: 0.067 | test avg. loss: 4.792


 31%|███       | 15499/50000 [23:09<48:41, 11.81it/s]

Epochs: 15496 | epoch avg. loss: 0.080 | test avg. loss: 5.014
Epochs: 15497 | epoch avg. loss: 0.043 | test avg. loss: 4.959
Epochs: 15498 | epoch avg. loss: 0.027 | test avg. loss: 4.905


 31%|███       | 15499/50000 [23:09<48:41, 11.81it/s]

Epochs: 15499 | epoch avg. loss: 0.024 | test avg. loss: 4.758


 31%|███       | 15503/50000 [23:11<2:16:01,  4.23it/s]

Epochs: 15500 | epoch avg. loss: 0.034 | test avg. loss: 4.673
Epochs: 15501 | epoch avg. loss: 0.041 | test avg. loss: 4.950
Epochs: 15502 | epoch avg. loss: 0.085 | test avg. loss: 4.798


 31%|███       | 15505/50000 [23:11<1:51:13,  5.17it/s]

Epochs: 15503 | epoch avg. loss: 0.048 | test avg. loss: 4.897
Epochs: 15504 | epoch avg. loss: 0.032 | test avg. loss: 5.014
Epochs: 15505 | epoch avg. loss: 0.031 | test avg. loss: 4.871


 31%|███       | 15509/50000 [23:11<1:20:29,  7.14it/s]

Epochs: 15506 | epoch avg. loss: 0.039 | test avg. loss: 4.993
Epochs: 15507 | epoch avg. loss: 0.082 | test avg. loss: 4.652
Epochs: 15508 | epoch avg. loss: 0.070 | test avg. loss: 4.750


                                                       

Epochs: 15509 | epoch avg. loss: 0.078 | test avg. loss: 5.017
Epochs: 15510 | epoch avg. loss: 0.096 | test avg. loss: 4.892
Epochs: 15511 | epoch avg. loss: 0.173 | test avg. loss: 5.311


 31%|███       | 15515/50000 [23:12<55:39, 10.33it/s]  

Epochs: 15512 | epoch avg. loss: 0.142 | test avg. loss: 4.986
Epochs: 15513 | epoch avg. loss: 0.159 | test avg. loss: 5.038
Epochs: 15514 | epoch avg. loss: 0.166 | test avg. loss: 5.245


 31%|███       | 15519/50000 [23:12<46:10, 12.45it/s]

Epochs: 15515 | epoch avg. loss: 0.192 | test avg. loss: 5.101
Epochs: 15516 | epoch avg. loss: 0.404 | test avg. loss: 5.567
Epochs: 15517 | epoch avg. loss: 0.339 | test avg. loss: 4.884
Epochs: 15518 | epoch avg. loss: 0.136 | test avg. loss: 4.950


                                                     

Epochs: 15519 | epoch avg. loss: 0.088 | test avg. loss: 5.202
Epochs: 15520 | epoch avg. loss: 0.129 | test avg. loss: 4.894
Epochs: 15521 | epoch avg. loss: 0.040 | test avg. loss: 5.103


 31%|███       | 15525/50000 [23:13<40:57, 14.03it/s]

Epochs: 15522 | epoch avg. loss: 0.091 | test avg. loss: 4.813
Epochs: 15523 | epoch avg. loss: 0.021 | test avg. loss: 4.871
Epochs: 15524 | epoch avg. loss: 0.030 | test avg. loss: 4.818
Epochs: 15525 | epoch avg. loss: 0.035 | test avg. loss: 4.902


 31%|███       | 15529/50000 [23:13<38:51, 14.78it/s]

Epochs: 15526 | epoch avg. loss: 0.030 | test avg. loss: 4.934
Epochs: 15527 | epoch avg. loss: 0.026 | test avg. loss: 4.855
Epochs: 15528 | epoch avg. loss: 0.029 | test avg. loss: 4.862




Epochs: 15529 | epoch avg. loss: 0.023 | test avg. loss: 4.891
Epochs: 15530 | epoch avg. loss: 0.026 | test avg. loss: 4.768
Epochs: 15531 | epoch avg. loss: 0.045 | test avg. loss: 4.877
Epochs: 15532 | epoch avg. loss: 0.024 | test avg. loss: 4.839


                                                     

Epochs: 15533 | epoch avg. loss: 0.020 | test avg. loss: 4.811
Epochs: 15534 | epoch avg. loss: 0.019 | test avg. loss: 4.876
Epochs: 15535 | epoch avg. loss: 0.021 | test avg. loss: 4.809


 31%|███       | 15539/50000 [23:13<41:10, 13.95it/s]

Epochs: 15536 | epoch avg. loss: 0.026 | test avg. loss: 4.809
Epochs: 15537 | epoch avg. loss: 0.026 | test avg. loss: 4.962
Epochs: 15538 | epoch avg. loss: 0.035 | test avg. loss: 4.843


 31%|███       | 15541/50000 [23:14<41:53, 13.71it/s]

Epochs: 15539 | epoch avg. loss: 0.048 | test avg. loss: 4.833
Epochs: 15540 | epoch avg. loss: 0.041 | test avg. loss: 5.019
Epochs: 15541 | epoch avg. loss: 0.047 | test avg. loss: 4.799


 31%|███       | 15545/50000 [23:14<41:08, 13.96it/s]

Epochs: 15542 | epoch avg. loss: 0.037 | test avg. loss: 4.906
Epochs: 15543 | epoch avg. loss: 0.032 | test avg. loss: 4.753
Epochs: 15544 | epoch avg. loss: 0.069 | test avg. loss: 4.851


 31%|███       | 15549/50000 [23:14<40:02, 14.34it/s]

Epochs: 15545 | epoch avg. loss: 0.033 | test avg. loss: 4.999
Epochs: 15546 | epoch avg. loss: 0.032 | test avg. loss: 4.885
Epochs: 15547 | epoch avg. loss: 0.062 | test avg. loss: 4.918
Epochs: 15548 | epoch avg. loss: 0.050 | test avg. loss: 4.777


 31%|███       | 15551/50000 [23:14<39:45, 14.44it/s]

Epochs: 15549 | epoch avg. loss: 0.045 | test avg. loss: 4.670
Epochs: 15550 | epoch avg. loss: 0.045 | test avg. loss: 4.833
Epochs: 15551 | epoch avg. loss: 0.024 | test avg. loss: 4.788


 31%|███       | 15555/50000 [23:15<41:02, 13.99it/s]

Epochs: 15552 | epoch avg. loss: 0.033 | test avg. loss: 4.858
Epochs: 15553 | epoch avg. loss: 0.025 | test avg. loss: 4.997
Epochs: 15554 | epoch avg. loss: 0.037 | test avg. loss: 4.822
Epochs: 15555 | epoch avg. loss: 0.119 | test avg. loss: 5.002


 31%|███       | 15559/50000 [23:15<41:39, 13.78it/s]

Epochs: 15556 | epoch avg. loss: 0.129 | test avg. loss: 4.977
Epochs: 15557 | epoch avg. loss: 0.058 | test avg. loss: 4.882
Epochs: 15558 | epoch avg. loss: 0.083 | test avg. loss: 5.108


 31%|███       | 15563/50000 [23:15<39:34, 14.50it/s]

Epochs: 15559 | epoch avg. loss: 0.049 | test avg. loss: 4.912
Epochs: 15560 | epoch avg. loss: 0.073 | test avg. loss: 4.971
Epochs: 15561 | epoch avg. loss: 0.072 | test avg. loss: 5.073
Epochs: 15562 | epoch avg. loss: 0.079 | test avg. loss: 4.825


 31%|███       | 15565/50000 [23:15<40:15, 14.26it/s]

Epochs: 15563 | epoch avg. loss: 0.028 | test avg. loss: 4.898
Epochs: 15564 | epoch avg. loss: 0.018 | test avg. loss: 4.894
Epochs: 15565 | epoch avg. loss: 0.025 | test avg. loss: 4.956


 31%|███       | 15569/50000 [23:16<42:26, 13.52it/s]

Epochs: 15566 | epoch avg. loss: 0.022 | test avg. loss: 5.033
Epochs: 15567 | epoch avg. loss: 0.031 | test avg. loss: 4.849
Epochs: 15568 | epoch avg. loss: 0.035 | test avg. loss: 4.891


 31%|███       | 15573/50000 [23:16<40:00, 14.34it/s]

Epochs: 15569 | epoch avg. loss: 0.030 | test avg. loss: 4.900
Epochs: 15570 | epoch avg. loss: 0.027 | test avg. loss: 4.815
Epochs: 15571 | epoch avg. loss: 0.061 | test avg. loss: 5.001
Epochs: 15572 | epoch avg. loss: 0.031 | test avg. loss: 4.835


 31%|███       | 15575/50000 [23:16<41:13, 13.92it/s]

Epochs: 15573 | epoch avg. loss: 0.039 | test avg. loss: 4.926
Epochs: 15574 | epoch avg. loss: 0.077 | test avg. loss: 5.080
Epochs: 15575 | epoch avg. loss: 0.091 | test avg. loss: 4.777


 31%|███       | 15579/50000 [23:16<44:11, 12.98it/s]

Epochs: 15576 | epoch avg. loss: 0.085 | test avg. loss: 4.992
Epochs: 15577 | epoch avg. loss: 0.056 | test avg. loss: 4.934
Epochs: 15578 | epoch avg. loss: 0.031 | test avg. loss: 4.919


 31%|███       | 15581/50000 [23:17<44:52, 12.78it/s]

Epochs: 15579 | epoch avg. loss: 0.037 | test avg. loss: 5.149
Epochs: 15580 | epoch avg. loss: 0.076 | test avg. loss: 4.927
Epochs: 15581 | epoch avg. loss: 0.029 | test avg. loss: 4.851


 31%|███       | 15585/50000 [23:17<42:38, 13.45it/s]

Epochs: 15582 | epoch avg. loss: 0.021 | test avg. loss: 4.771
Epochs: 15583 | epoch avg. loss: 0.029 | test avg. loss: 4.761
Epochs: 15584 | epoch avg. loss: 0.026 | test avg. loss: 4.955


 31%|███       | 15587/50000 [23:17<41:18, 13.89it/s]

Epochs: 15585 | epoch avg. loss: 0.037 | test avg. loss: 4.846
Epochs: 15586 | epoch avg. loss: 0.043 | test avg. loss: 4.979
Epochs: 15587 | epoch avg. loss: 0.037 | test avg. loss: 4.954
Epochs: 15588 | epoch avg. loss: 0.027 | test avg. loss: 4.889


 31%|███       | 15591/50000 [23:17<41:28, 13.83it/s]

Epochs: 15589 | epoch avg. loss: 0.022 | test avg. loss: 4.849
Epochs: 15590 | epoch avg. loss: 0.036 | test avg. loss: 4.895
Epochs: 15591 | epoch avg. loss: 0.020 | test avg. loss: 5.013


 31%|███       | 15595/50000 [23:18<44:30, 12.88it/s]

Epochs: 15592 | epoch avg. loss: 0.026 | test avg. loss: 4.866
Epochs: 15593 | epoch avg. loss: 0.096 | test avg. loss: 4.864
Epochs: 15594 | epoch avg. loss: 0.033 | test avg. loss: 4.861


 31%|███       | 15597/50000 [23:18<43:37, 13.14it/s]

Epochs: 15595 | epoch avg. loss: 0.035 | test avg. loss: 4.762
Epochs: 15596 | epoch avg. loss: 0.079 | test avg. loss: 5.045
Epochs: 15597 | epoch avg. loss: 0.110 | test avg. loss: 4.949


 31%|███       | 15599/50000 [23:18<42:12, 13.58it/s]

Epochs: 15598 | epoch avg. loss: 0.042 | test avg. loss: 4.904
Epochs: 15599 | epoch avg. loss: 0.035 | test avg. loss: 5.040


 31%|███       | 15603/50000 [23:20<2:03:46,  4.63it/s]

Epochs: 15600 | epoch avg. loss: 0.045 | test avg. loss: 4.806
Epochs: 15601 | epoch avg. loss: 0.065 | test avg. loss: 5.072
Epochs: 15602 | epoch avg. loss: 0.135 | test avg. loss: 4.863


                                                       

Epochs: 15603 | epoch avg. loss: 0.049 | test avg. loss: 4.738
Epochs: 15604 | epoch avg. loss: 0.094 | test avg. loss: 4.914
Epochs: 15605 | epoch avg. loss: 0.053 | test avg. loss: 4.892




Epochs: 15606 | epoch avg. loss: 0.026 | test avg. loss: 4.836
Epochs: 15607 | epoch avg. loss: 0.025 | test avg. loss: 4.810
Epochs: 15608 | epoch avg. loss: 0.033 | test avg. loss: 4.843


 31%|███       | 15611/50000 [23:20<1:00:18,  9.50it/s]

Epochs: 15609 | epoch avg. loss: 0.018 | test avg. loss: 4.881
Epochs: 15610 | epoch avg. loss: 0.023 | test avg. loss: 4.784
Epochs: 15611 | epoch avg. loss: 0.020 | test avg. loss: 4.820


 31%|███       | 15615/50000 [23:20<51:04, 11.22it/s]

Epochs: 15612 | epoch avg. loss: 0.020 | test avg. loss: 4.828
Epochs: 15613 | epoch avg. loss: 0.021 | test avg. loss: 4.747
Epochs: 15614 | epoch avg. loss: 0.039 | test avg. loss: 5.044


 31%|███       | 15617/50000 [23:21<49:34, 11.56it/s]

Epochs: 15615 | epoch avg. loss: 0.062 | test avg. loss: 4.822
Epochs: 15616 | epoch avg. loss: 0.108 | test avg. loss: 4.911
Epochs: 15617 | epoch avg. loss: 0.101 | test avg. loss: 4.974


 31%|███       | 15621/50000 [23:21<43:39, 13.12it/s]

Epochs: 15618 | epoch avg. loss: 0.069 | test avg. loss: 4.851
Epochs: 15619 | epoch avg. loss: 0.075 | test avg. loss: 5.047
Epochs: 15620 | epoch avg. loss: 0.029 | test avg. loss: 4.964


 31%|███▏      | 15625/50000 [23:21<40:43, 14.07it/s]

Epochs: 15621 | epoch avg. loss: 0.030 | test avg. loss: 4.929
Epochs: 15622 | epoch avg. loss: 0.027 | test avg. loss: 5.081
Epochs: 15623 | epoch avg. loss: 0.056 | test avg. loss: 4.752
Epochs: 15624 | epoch avg. loss: 0.100 | test avg. loss: 4.827


 31%|███▏      | 15627/50000 [23:21<39:38, 14.45it/s]

Epochs: 15625 | epoch avg. loss: 0.045 | test avg. loss: 5.009
Epochs: 15626 | epoch avg. loss: 0.058 | test avg. loss: 4.817
Epochs: 15627 | epoch avg. loss: 0.129 | test avg. loss: 4.867


 31%|███▏      | 15629/50000 [23:22<41:10, 13.91it/s]

Epochs: 15628 | epoch avg. loss: 0.037 | test avg. loss: 5.080
Epochs: 15629 | epoch avg. loss: 0.050 | test avg. loss: 4.856
Epochs: 15630 | epoch avg. loss: 0.080 | test avg. loss: 4.972


 31%|███▏      | 15633/50000 [23:22<47:22, 12.09it/s]

Epochs: 15631 | epoch avg. loss: 0.047 | test avg. loss: 4.977
Epochs: 15632 | epoch avg. loss: 0.040 | test avg. loss: 4.855
Epochs: 15633 | epoch avg. loss: 0.083 | test avg. loss: 5.114


 31%|███▏      | 15637/50000 [23:22<45:40, 12.54it/s]

Epochs: 15634 | epoch avg. loss: 0.069 | test avg. loss: 4.830
Epochs: 15635 | epoch avg. loss: 0.055 | test avg. loss: 4.790
Epochs: 15636 | epoch avg. loss: 0.041 | test avg. loss: 4.846


 31%|███▏      | 15639/50000 [23:22<44:26, 12.89it/s]

Epochs: 15637 | epoch avg. loss: 0.028 | test avg. loss: 4.846
Epochs: 15638 | epoch avg. loss: 0.022 | test avg. loss: 4.782
Epochs: 15639 | epoch avg. loss: 0.048 | test avg. loss: 4.861


 31%|███▏      | 15643/50000 [23:23<47:31, 12.05it/s]

Epochs: 15640 | epoch avg. loss: 0.038 | test avg. loss: 4.952
Epochs: 15641 | epoch avg. loss: 0.041 | test avg. loss: 4.811
Epochs: 15642 | epoch avg. loss: 0.039 | test avg. loss: 4.811


 31%|███▏      | 15645/50000 [23:23<47:37, 12.02it/s]

Epochs: 15643 | epoch avg. loss: 0.022 | test avg. loss: 4.857
Epochs: 15644 | epoch avg. loss: 0.025 | test avg. loss: 4.764
Epochs: 15645 | epoch avg. loss: 0.052 | test avg. loss: 4.810


 31%|███▏      | 15649/50000 [23:23<48:30, 11.80it/s]

Epochs: 15646 | epoch avg. loss: 0.029 | test avg. loss: 5.003
Epochs: 15647 | epoch avg. loss: 0.033 | test avg. loss: 4.858
Epochs: 15648 | epoch avg. loss: 0.073 | test avg. loss: 4.858


 31%|███▏      | 15653/50000 [23:23<42:40, 13.41it/s]

Epochs: 15649 | epoch avg. loss: 0.036 | test avg. loss: 4.992
Epochs: 15650 | epoch avg. loss: 0.058 | test avg. loss: 4.740
Epochs: 15651 | epoch avg. loss: 0.088 | test avg. loss: 4.771
Epochs: 15652 | epoch avg. loss: 0.024 | test avg. loss: 4.909


 31%|███▏      | 15655/50000 [23:24<46:43, 12.25it/s]

Epochs: 15653 | epoch avg. loss: 0.030 | test avg. loss: 4.830
Epochs: 15654 | epoch avg. loss: 0.040 | test avg. loss: 4.875
Epochs: 15655 | epoch avg. loss: 0.045 | test avg. loss: 5.139


 31%|███▏      | 15659/50000 [23:24<43:32, 13.14it/s]

Epochs: 15656 | epoch avg. loss: 0.137 | test avg. loss: 4.758
Epochs: 15657 | epoch avg. loss: 0.078 | test avg. loss: 4.790
Epochs: 15658 | epoch avg. loss: 0.051 | test avg. loss: 5.196
Epochs: 15659 | epoch avg. loss: 0.111 | test avg. loss: 4.796


 31%|███▏      | 15663/50000 [23:24<40:32, 14.12it/s]

Epochs: 15660 | epoch avg. loss: 0.136 | test avg. loss: 4.891
Epochs: 15661 | epoch avg. loss: 0.041 | test avg. loss: 5.056
Epochs: 15662 | epoch avg. loss: 0.052 | test avg. loss: 4.829


 31%|███▏      | 15665/50000 [23:24<40:53, 13.99it/s]

Epochs: 15663 | epoch avg. loss: 0.148 | test avg. loss: 4.827
Epochs: 15664 | epoch avg. loss: 0.037 | test avg. loss: 4.954
Epochs: 15665 | epoch avg. loss: 0.032 | test avg. loss: 4.809


 31%|███▏      | 15669/50000 [23:25<45:39, 12.53it/s]

Epochs: 15666 | epoch avg. loss: 0.053 | test avg. loss: 4.866
Epochs: 15667 | epoch avg. loss: 0.016 | test avg. loss: 4.852
Epochs: 15668 | epoch avg. loss: 0.020 | test avg. loss: 4.868


 31%|███▏      | 15671/50000 [23:25<47:36, 12.02it/s]

Epochs: 15669 | epoch avg. loss: 0.019 | test avg. loss: 4.863
Epochs: 15670 | epoch avg. loss: 0.035 | test avg. loss: 4.867
Epochs: 15671 | epoch avg. loss: 0.019 | test avg. loss: 4.882


 31%|███▏      | 15675/50000 [23:25<46:31, 12.29it/s]

Epochs: 15672 | epoch avg. loss: 0.023 | test avg. loss: 4.908
Epochs: 15673 | epoch avg. loss: 0.029 | test avg. loss: 4.832
Epochs: 15674 | epoch avg. loss: 0.033 | test avg. loss: 4.783


 31%|███▏      | 15679/50000 [23:25<41:42, 13.72it/s]

Epochs: 15675 | epoch avg. loss: 0.131 | test avg. loss: 4.873
Epochs: 15676 | epoch avg. loss: 0.041 | test avg. loss: 4.848
Epochs: 15677 | epoch avg. loss: 0.042 | test avg. loss: 4.783
Epochs: 15678 | epoch avg. loss: 0.107 | test avg. loss: 5.056


 31%|███▏      | 15681/50000 [23:26<45:37, 12.54it/s]

Epochs: 15679 | epoch avg. loss: 0.157 | test avg. loss: 4.932
Epochs: 15680 | epoch avg. loss: 0.078 | test avg. loss: 4.740
Epochs: 15681 | epoch avg. loss: 0.178 | test avg. loss: 4.953


 31%|███▏      | 15685/50000 [23:26<43:20, 13.20it/s]

Epochs: 15682 | epoch avg. loss: 0.119 | test avg. loss: 4.910
Epochs: 15683 | epoch avg. loss: 0.071 | test avg. loss: 4.864
Epochs: 15684 | epoch avg. loss: 0.235 | test avg. loss: 5.243


 31%|███▏      | 15687/50000 [23:26<46:35, 12.28it/s]

Epochs: 15685 | epoch avg. loss: 0.127 | test avg. loss: 4.733
Epochs: 15686 | epoch avg. loss: 0.059 | test avg. loss: 4.731
Epochs: 15687 | epoch avg. loss: 0.062 | test avg. loss: 4.929


 31%|███▏      | 15691/50000 [23:26<46:44, 12.24it/s]

Epochs: 15688 | epoch avg. loss: 0.076 | test avg. loss: 4.774
Epochs: 15689 | epoch avg. loss: 0.055 | test avg. loss: 4.991
Epochs: 15690 | epoch avg. loss: 0.050 | test avg. loss: 5.112


 31%|███▏      | 15693/50000 [23:27<49:11, 11.62it/s]

Epochs: 15691 | epoch avg. loss: 0.060 | test avg. loss: 4.828
Epochs: 15692 | epoch avg. loss: 0.025 | test avg. loss: 5.017
Epochs: 15693 | epoch avg. loss: 0.109 | test avg. loss: 4.849


 31%|███▏      | 15697/50000 [23:27<43:58, 13.00it/s]

Epochs: 15694 | epoch avg. loss: 0.048 | test avg. loss: 4.800
Epochs: 15695 | epoch avg. loss: 0.159 | test avg. loss: 4.918
Epochs: 15696 | epoch avg. loss: 0.075 | test avg. loss: 5.033


 31%|███▏      | 15699/50000 [23:27<42:40, 13.39it/s]

Epochs: 15697 | epoch avg. loss: 0.062 | test avg. loss: 4.799
Epochs: 15698 | epoch avg. loss: 0.168 | test avg. loss: 4.902
Epochs: 15699 | epoch avg. loss: 0.113 | test avg. loss: 5.083


 31%|███▏      | 15703/50000 [23:29<2:21:11,  4.05it/s]

Epochs: 15700 | epoch avg. loss: 0.138 | test avg. loss: 4.726
Epochs: 15701 | epoch avg. loss: 0.171 | test avg. loss: 4.780
Epochs: 15702 | epoch avg. loss: 0.038 | test avg. loss: 4.978


 31%|███▏      | 15705/50000 [23:29<1:54:03,  5.01it/s]

Epochs: 15703 | epoch avg. loss: 0.038 | test avg. loss: 4.886
Epochs: 15704 | epoch avg. loss: 0.055 | test avg. loss: 4.999
Epochs: 15705 | epoch avg. loss: 0.046 | test avg. loss: 5.047


 31%|███▏      | 15709/50000 [23:29<1:17:46,  7.35it/s]

Epochs: 15706 | epoch avg. loss: 0.045 | test avg. loss: 4.866
Epochs: 15707 | epoch avg. loss: 0.028 | test avg. loss: 4.957
Epochs: 15708 | epoch avg. loss: 0.091 | test avg. loss: 5.123


 31%|███▏      | 15711/50000 [23:30<1:07:30,  8.47it/s]

Epochs: 15709 | epoch avg. loss: 0.107 | test avg. loss: 4.827
Epochs: 15710 | epoch avg. loss: 0.226 | test avg. loss: 5.218
Epochs: 15711 | epoch avg. loss: 0.525 | test avg. loss: 5.466


 31%|███▏      | 15715/50000 [23:30<55:40, 10.26it/s]  

Epochs: 15712 | epoch avg. loss: 0.244 | test avg. loss: 5.225
Epochs: 15713 | epoch avg. loss: 0.493 | test avg. loss: 5.131
Epochs: 15714 | epoch avg. loss: 0.086 | test avg. loss: 5.057


 31%|███▏      | 15717/50000 [23:30<51:32, 11.08it/s]

Epochs: 15715 | epoch avg. loss: 0.074 | test avg. loss: 4.880
Epochs: 15716 | epoch avg. loss: 0.087 | test avg. loss: 5.168
Epochs: 15717 | epoch avg. loss: 0.126 | test avg. loss: 4.999


 31%|███▏      | 15721/50000 [23:30<46:30, 12.28it/s]

Epochs: 15718 | epoch avg. loss: 0.030 | test avg. loss: 4.980
Epochs: 15719 | epoch avg. loss: 0.025 | test avg. loss: 5.000
Epochs: 15720 | epoch avg. loss: 0.024 | test avg. loss: 5.030


 31%|███▏      | 15723/50000 [23:30<46:35, 12.26it/s]

Epochs: 15721 | epoch avg. loss: 0.030 | test avg. loss: 4.829
Epochs: 15722 | epoch avg. loss: 0.029 | test avg. loss: 4.841
Epochs: 15723 | epoch avg. loss: 0.028 | test avg. loss: 4.788


 31%|███▏      | 15727/50000 [23:31<48:28, 11.78it/s]

Epochs: 15724 | epoch avg. loss: 0.027 | test avg. loss: 4.842
Epochs: 15725 | epoch avg. loss: 0.035 | test avg. loss: 4.968
Epochs: 15726 | epoch avg. loss: 0.024 | test avg. loss: 4.891


 31%|███▏      | 15731/50000 [23:31<44:26, 12.85it/s]

Epochs: 15727 | epoch avg. loss: 0.046 | test avg. loss: 4.820
Epochs: 15728 | epoch avg. loss: 0.030 | test avg. loss: 5.062
Epochs: 15729 | epoch avg. loss: 0.132 | test avg. loss: 4.748
Epochs: 15730 | epoch avg. loss: 0.132 | test avg. loss: 4.904


 31%|███▏      | 15733/50000 [23:31<44:46, 12.76it/s]

Epochs: 15731 | epoch avg. loss: 0.187 | test avg. loss: 5.340
Epochs: 15732 | epoch avg. loss: 0.141 | test avg. loss: 5.074
Epochs: 15733 | epoch avg. loss: 0.290 | test avg. loss: 4.891


 31%|███▏      | 15737/50000 [23:31<42:29, 13.44it/s]

Epochs: 15734 | epoch avg. loss: 0.266 | test avg. loss: 5.970
Epochs: 15735 | epoch avg. loss: 0.674 | test avg. loss: 4.834
Epochs: 15736 | epoch avg. loss: 0.830 | test avg. loss: 4.714


 31%|███▏      | 15741/50000 [23:32<39:59, 14.27it/s]

Epochs: 15737 | epoch avg. loss: 0.332 | test avg. loss: 5.666
Epochs: 15738 | epoch avg. loss: 0.483 | test avg. loss: 4.744
Epochs: 15739 | epoch avg. loss: 0.545 | test avg. loss: 4.734
Epochs: 15740 | epoch avg. loss: 0.148 | test avg. loss: 5.632


 31%|███▏      | 15745/50000 [23:32<38:07, 14.98it/s]

Epochs: 15741 | epoch avg. loss: 0.447 | test avg. loss: 4.801
Epochs: 15742 | epoch avg. loss: 0.287 | test avg. loss: 4.805
Epochs: 15743 | epoch avg. loss: 0.117 | test avg. loss: 5.140
Epochs: 15744 | epoch avg. loss: 0.146 | test avg. loss: 4.718


 31%|███▏      | 15747/50000 [23:32<38:03, 15.00it/s]

Epochs: 15745 | epoch avg. loss: 0.252 | test avg. loss: 4.914
Epochs: 15746 | epoch avg. loss: 0.169 | test avg. loss: 5.219
Epochs: 15747 | epoch avg. loss: 0.146 | test avg. loss: 5.058


 32%|███▏      | 15751/50000 [23:32<39:09, 14.57it/s]

Epochs: 15748 | epoch avg. loss: 0.503 | test avg. loss: 5.211
Epochs: 15749 | epoch avg. loss: 0.266 | test avg. loss: 5.249
Epochs: 15750 | epoch avg. loss: 0.195 | test avg. loss: 5.232


 32%|███▏      | 15753/50000 [23:33<39:52, 14.32it/s]

Epochs: 15751 | epoch avg. loss: 0.470 | test avg. loss: 5.560
Epochs: 15752 | epoch avg. loss: 0.216 | test avg. loss: 5.105
Epochs: 15753 | epoch avg. loss: 0.099 | test avg. loss: 4.912


 32%|███▏      | 15757/50000 [23:33<39:30, 14.44it/s]

Epochs: 15754 | epoch avg. loss: 0.216 | test avg. loss: 4.988
Epochs: 15755 | epoch avg. loss: 0.086 | test avg. loss: 4.963
Epochs: 15756 | epoch avg. loss: 0.048 | test avg. loss: 5.020
Epochs: 15757 | epoch avg. loss: 0.133 | test avg. loss: 5.273


                                                     

Epochs: 15758 | epoch avg. loss: 0.189 | test avg. loss: 5.330
Epochs: 15759 | epoch avg. loss: 0.088 | test avg. loss: 5.159
Epochs: 15760 | epoch avg. loss: 0.104 | test avg. loss: 5.424


 32%|███▏      | 15763/50000 [23:33<39:30, 14.44it/s]

Epochs: 15761 | epoch avg. loss: 0.178 | test avg. loss: 5.015
Epochs: 15762 | epoch avg. loss: 0.138 | test avg. loss: 5.140
Epochs: 15763 | epoch avg. loss: 0.361 | test avg. loss: 5.215


 32%|███▏      | 15767/50000 [23:34<41:27, 13.76it/s]

Epochs: 15764 | epoch avg. loss: 0.242 | test avg. loss: 5.636
Epochs: 15765 | epoch avg. loss: 0.312 | test avg. loss: 4.822
Epochs: 15766 | epoch avg. loss: 0.367 | test avg. loss: 5.265


 32%|███▏      | 15769/50000 [23:34<40:19, 14.15it/s]

Epochs: 15767 | epoch avg. loss: 0.654 | test avg. loss: 5.202
Epochs: 15768 | epoch avg. loss: 0.299 | test avg. loss: 5.762
Epochs: 15769 | epoch avg. loss: 0.960 | test avg. loss: 6.062
Epochs: 15770 | epoch avg. loss: 1.093 | test avg. loss: 5.557


 32%|███▏      | 15773/50000 [23:34<39:12, 14.55it/s]

Epochs: 15771 | epoch avg. loss: 0.355 | test avg. loss: 5.159
Epochs: 15772 | epoch avg. loss: 0.450 | test avg. loss: 5.405
Epochs: 15773 | epoch avg. loss: 0.436 | test avg. loss: 4.433


 32%|███▏      | 15777/50000 [23:34<39:16, 14.52it/s]

Epochs: 15774 | epoch avg. loss: 0.241 | test avg. loss: 4.588
Epochs: 15775 | epoch avg. loss: 0.127 | test avg. loss: 4.920
Epochs: 15776 | epoch avg. loss: 0.105 | test avg. loss: 4.833
Epochs: 15777 | epoch avg. loss: 0.175 | test avg. loss: 4.910


                                                     

Epochs: 15778 | epoch avg. loss: 0.071 | test avg. loss: 4.827
Epochs: 15779 | epoch avg. loss: 0.074 | test avg. loss: 4.494
Epochs: 15780 | epoch avg. loss: 0.185 | test avg. loss: 4.615


 32%|███▏      | 15785/50000 [23:35<38:46, 14.71it/s]

Epochs: 15781 | epoch avg. loss: 0.109 | test avg. loss: 4.537
Epochs: 15782 | epoch avg. loss: 0.076 | test avg. loss: 4.533
Epochs: 15783 | epoch avg. loss: 0.187 | test avg. loss: 4.843
Epochs: 15784 | epoch avg. loss: 0.090 | test avg. loss: 4.598


 32%|███▏      | 15789/50000 [23:35<38:06, 14.96it/s]

Epochs: 15785 | epoch avg. loss: 0.043 | test avg. loss: 4.491
Epochs: 15786 | epoch avg. loss: 0.029 | test avg. loss: 4.555
Epochs: 15787 | epoch avg. loss: 0.033 | test avg. loss: 4.487
Epochs: 15788 | epoch avg. loss: 0.024 | test avg. loss: 4.535


 32%|███▏      | 15791/50000 [23:35<37:59, 15.01it/s]

Epochs: 15789 | epoch avg. loss: 0.021 | test avg. loss: 4.635
Epochs: 15790 | epoch avg. loss: 0.021 | test avg. loss: 4.509
Epochs: 15791 | epoch avg. loss: 0.050 | test avg. loss: 4.646


 32%|███▏      | 15795/50000 [23:36<42:16, 13.48it/s]

Epochs: 15792 | epoch avg. loss: 0.025 | test avg. loss: 4.504
Epochs: 15793 | epoch avg. loss: 0.041 | test avg. loss: 4.458
Epochs: 15794 | epoch avg. loss: 0.035 | test avg. loss: 4.731


 32%|███▏      | 15799/50000 [23:36<39:02, 14.60it/s]

Epochs: 15795 | epoch avg. loss: 0.107 | test avg. loss: 4.487
Epochs: 15796 | epoch avg. loss: 0.057 | test avg. loss: 4.496
Epochs: 15797 | epoch avg. loss: 0.032 | test avg. loss: 4.612
Epochs: 15798 | epoch avg. loss: 0.030 | test avg. loss: 4.437


 32%|███▏      | 15799/50000 [23:36<39:02, 14.60it/s]

Epochs: 15799 | epoch avg. loss: 0.064 | test avg. loss: 4.521


 32%|███▏      | 15803/50000 [23:37<2:04:31,  4.58it/s]

Epochs: 15800 | epoch avg. loss: 0.028 | test avg. loss: 4.695
Epochs: 15801 | epoch avg. loss: 0.047 | test avg. loss: 4.475
Epochs: 15802 | epoch avg. loss: 0.092 | test avg. loss: 4.570


 32%|███▏      | 15805/50000 [23:38<1:40:50,  5.65it/s]

Epochs: 15803 | epoch avg. loss: 0.065 | test avg. loss: 4.625
Epochs: 15804 | epoch avg. loss: 0.045 | test avg. loss: 4.516
Epochs: 15805 | epoch avg. loss: 0.083 | test avg. loss: 4.895


 32%|███▏      | 15809/50000 [23:38<1:08:11,  8.36it/s]

Epochs: 15806 | epoch avg. loss: 0.150 | test avg. loss: 4.430
Epochs: 15807 | epoch avg. loss: 0.222 | test avg. loss: 4.421
Epochs: 15808 | epoch avg. loss: 0.159 | test avg. loss: 4.851
Epochs: 15809 | epoch avg. loss: 0.162 | test avg. loss: 4.422


 32%|███▏      | 15813/50000 [23:38<51:21, 11.10it/s]

Epochs: 15810 | epoch avg. loss: 0.090 | test avg. loss: 4.527
Epochs: 15811 | epoch avg. loss: 0.044 | test avg. loss: 4.615
Epochs: 15812 | epoch avg. loss: 0.037 | test avg. loss: 4.617
Epochs: 15813 | epoch avg. loss: 0.028 | test avg. loss: 4.634


 32%|███▏      | 15817/50000 [23:38<46:31, 12.24it/s]

Epochs: 15814 | epoch avg. loss: 0.022 | test avg. loss: 4.509
Epochs: 15815 | epoch avg. loss: 0.038 | test avg. loss: 4.659
Epochs: 15816 | epoch avg. loss: 0.045 | test avg. loss: 4.550


 32%|███▏      | 15819/50000 [23:39<44:52, 12.70it/s]

Epochs: 15817 | epoch avg. loss: 0.045 | test avg. loss: 4.534
Epochs: 15818 | epoch avg. loss: 0.037 | test avg. loss: 4.765
Epochs: 15819 | epoch avg. loss: 0.043 | test avg. loss: 4.520


 32%|███▏      | 15823/50000 [23:39<40:54, 13.92it/s]

Epochs: 15820 | epoch avg. loss: 0.109 | test avg. loss: 4.703
Epochs: 15821 | epoch avg. loss: 0.183 | test avg. loss: 4.771
Epochs: 15822 | epoch avg. loss: 0.095 | test avg. loss: 4.521
Epochs: 15823 | epoch avg. loss: 0.116 | test avg. loss: 4.694


 32%|███▏      | 15827/50000 [23:39<38:04, 14.96it/s]

Epochs: 15824 | epoch avg. loss: 0.032 | test avg. loss: 4.736
Epochs: 15825 | epoch avg. loss: 0.052 | test avg. loss: 4.658
Epochs: 15826 | epoch avg. loss: 0.137 | test avg. loss: 4.703
Epochs: 15827 | epoch avg. loss: 0.072 | test avg. loss: 4.508




Epochs: 15828 | epoch avg. loss: 0.057 | test avg. loss: 4.352
Epochs: 15829 | epoch avg. loss: 0.099 | test avg. loss: 4.578
Epochs: 15830 | epoch avg. loss: 0.051 | test avg. loss: 5.010


 32%|███▏      | 15835/50000 [23:40<37:02, 15.37it/s]

Epochs: 15831 | epoch avg. loss: 0.104 | test avg. loss: 4.788
Epochs: 15832 | epoch avg. loss: 0.061 | test avg. loss: 4.701
Epochs: 15833 | epoch avg. loss: 0.034 | test avg. loss: 4.784
Epochs: 15834 | epoch avg. loss: 0.060 | test avg. loss: 4.547


 32%|███▏      | 15837/50000 [23:40<38:04, 14.95it/s]

Epochs: 15835 | epoch avg. loss: 0.178 | test avg. loss: 4.855
Epochs: 15836 | epoch avg. loss: 0.190 | test avg. loss: 4.815
Epochs: 15837 | epoch avg. loss: 0.091 | test avg. loss: 4.794
Epochs: 15838 | epoch avg. loss: 0.159 | test avg. loss: 5.686


 32%|███▏      | 15843/50000 [23:40<36:54, 15.42it/s]

Epochs: 15839 | epoch avg. loss: 0.572 | test avg. loss: 4.727
Epochs: 15840 | epoch avg. loss: 0.099 | test avg. loss: 4.493
Epochs: 15841 | epoch avg. loss: 0.129 | test avg. loss: 5.271
Epochs: 15842 | epoch avg. loss: 0.478 | test avg. loss: 4.702


 32%|███▏      | 15847/50000 [23:40<37:06, 15.34it/s]

Epochs: 15843 | epoch avg. loss: 0.230 | test avg. loss: 4.859
Epochs: 15844 | epoch avg. loss: 0.211 | test avg. loss: 5.971
Epochs: 15845 | epoch avg. loss: 0.574 | test avg. loss: 4.316
Epochs: 15846 | epoch avg. loss: 0.355 | test avg. loss: 4.094


 32%|███▏      | 15849/50000 [23:41<37:58, 14.99it/s]

Epochs: 15847 | epoch avg. loss: 0.189 | test avg. loss: 4.452
Epochs: 15848 | epoch avg. loss: 0.231 | test avg. loss: 4.468
Epochs: 15849 | epoch avg. loss: 0.401 | test avg. loss: 4.836
Epochs: 15850 | epoch avg. loss: 0.097 | test avg. loss: 4.889


 32%|███▏      | 15853/50000 [23:41<39:12, 14.52it/s]

Epochs: 15851 | epoch avg. loss: 0.336 | test avg. loss: 4.769
Epochs: 15852 | epoch avg. loss: 0.273 | test avg. loss: 4.965
Epochs: 15853 | epoch avg. loss: 0.235 | test avg. loss: 4.150


 32%|███▏      | 15857/50000 [23:41<41:10, 13.82it/s]

Epochs: 15854 | epoch avg. loss: 0.312 | test avg. loss: 4.387
Epochs: 15855 | epoch avg. loss: 0.330 | test avg. loss: 5.245
Epochs: 15856 | epoch avg. loss: 0.476 | test avg. loss: 4.844


 32%|███▏      | 15859/50000 [23:41<41:30, 13.71it/s]

Epochs: 15857 | epoch avg. loss: 0.234 | test avg. loss: 4.863
Epochs: 15858 | epoch avg. loss: 0.358 | test avg. loss: 6.929
Epochs: 15859 | epoch avg. loss: 1.156 | test avg. loss: 4.722


 32%|███▏      | 15863/50000 [23:42<46:55, 12.13it/s]

Epochs: 15860 | epoch avg. loss: 0.870 | test avg. loss: 4.711
Epochs: 15861 | epoch avg. loss: 0.512 | test avg. loss: 5.371
Epochs: 15862 | epoch avg. loss: 0.688 | test avg. loss: 6.481


 32%|███▏      | 15865/50000 [23:42<47:10, 12.06it/s]

Epochs: 15863 | epoch avg. loss: 1.949 | test avg. loss: 6.664
Epochs: 15864 | epoch avg. loss: 1.387 | test avg. loss: 4.837
Epochs: 15865 | epoch avg. loss: 0.318 | test avg. loss: 4.730


 32%|███▏      | 15869/50000 [23:42<45:02, 12.63it/s]

Epochs: 15866 | epoch avg. loss: 0.235 | test avg. loss: 5.452
Epochs: 15867 | epoch avg. loss: 0.330 | test avg. loss: 4.802
Epochs: 15868 | epoch avg. loss: 0.600 | test avg. loss: 4.779


 32%|███▏      | 15871/50000 [23:42<44:00, 12.93it/s]

Epochs: 15869 | epoch avg. loss: 0.187 | test avg. loss: 4.637
Epochs: 15870 | epoch avg. loss: 0.180 | test avg. loss: 4.806
Epochs: 15871 | epoch avg. loss: 2.704 | test avg. loss: 4.750


 32%|███▏      | 15875/50000 [23:43<42:39, 13.34it/s]

Epochs: 15872 | epoch avg. loss: 0.665 | test avg. loss: 7.206
Epochs: 15873 | epoch avg. loss: 1.458 | test avg. loss: 6.278
Epochs: 15874 | epoch avg. loss: 1.519 | test avg. loss: 6.597


 32%|███▏      | 15877/50000 [23:43<41:33, 13.69it/s]

Epochs: 15875 | epoch avg. loss: 0.797 | test avg. loss: 4.954
Epochs: 15876 | epoch avg. loss: 0.452 | test avg. loss: 4.925
Epochs: 15877 | epoch avg. loss: 0.434 | test avg. loss: 4.708
Epochs: 15878 | epoch avg. loss: 0.663 | test avg. loss: 4.881


 32%|███▏      | 15881/50000 [23:43<41:47, 13.61it/s]

Epochs: 15879 | epoch avg. loss: 0.548 | test avg. loss: 6.231
Epochs: 15880 | epoch avg. loss: 0.789 | test avg. loss: 4.511
Epochs: 15881 | epoch avg. loss: 0.174 | test avg. loss: 4.519


 32%|███▏      | 15885/50000 [23:43<41:34, 13.68it/s]

Epochs: 15882 | epoch avg. loss: 0.354 | test avg. loss: 4.536
Epochs: 15883 | epoch avg. loss: 0.241 | test avg. loss: 4.591
Epochs: 15884 | epoch avg. loss: 0.396 | test avg. loss: 5.058
Epochs: 15885 | epoch avg. loss: 0.270 | test avg. loss: 4.630


 32%|███▏      | 15889/50000 [23:44<40:28, 14.04it/s]

Epochs: 15886 | epoch avg. loss: 0.203 | test avg. loss: 4.577
Epochs: 15887 | epoch avg. loss: 0.206 | test avg. loss: 4.218
Epochs: 15888 | epoch avg. loss: 0.122 | test avg. loss: 3.780


                                                     

Epochs: 15889 | epoch avg. loss: 0.154 | test avg. loss: 4.329
Epochs: 15890 | epoch avg. loss: 0.377 | test avg. loss: 4.130
Epochs: 15891 | epoch avg. loss: 0.166 | test avg. loss: 4.409


 32%|███▏      | 15895/50000 [23:44<38:48, 14.65it/s]

Epochs: 15892 | epoch avg. loss: 0.118 | test avg. loss: 4.768
Epochs: 15893 | epoch avg. loss: 0.132 | test avg. loss: 4.170
Epochs: 15894 | epoch avg. loss: 0.175 | test avg. loss: 4.163


 32%|███▏      | 15897/50000 [23:44<42:02, 13.52it/s]

Epochs: 15895 | epoch avg. loss: 0.107 | test avg. loss: 3.953
Epochs: 15896 | epoch avg. loss: 0.080 | test avg. loss: 3.936
Epochs: 15897 | epoch avg. loss: 0.078 | test avg. loss: 4.447


 32%|███▏      | 15899/50000 [23:44<43:53, 12.95it/s]

Epochs: 15898 | epoch avg. loss: 0.129 | test avg. loss: 4.233
Epochs: 15899 | epoch avg. loss: 0.088 | test avg. loss: 4.339


 32%|███▏      | 15903/50000 [23:46<2:24:32,  3.93it/s]

Epochs: 15900 | epoch avg. loss: 0.046 | test avg. loss: 4.231
Epochs: 15901 | epoch avg. loss: 0.048 | test avg. loss: 4.079
Epochs: 15902 | epoch avg. loss: 0.046 | test avg. loss: 4.095


 32%|███▏      | 15905/50000 [23:47<1:58:49,  4.78it/s]

Epochs: 15903 | epoch avg. loss: 0.034 | test avg. loss: 3.943
Epochs: 15904 | epoch avg. loss: 0.070 | test avg. loss: 4.093
Epochs: 15905 | epoch avg. loss: 0.035 | test avg. loss: 4.248


 32%|███▏      | 15909/50000 [23:47<1:21:01,  7.01it/s]

Epochs: 15906 | epoch avg. loss: 0.044 | test avg. loss: 4.102
Epochs: 15907 | epoch avg. loss: 0.032 | test avg. loss: 4.084
Epochs: 15908 | epoch avg. loss: 0.022 | test avg. loss: 4.092


 32%|███▏      | 15913/50000 [23:47<57:19,  9.91it/s]  

Epochs: 15909 | epoch avg. loss: 0.024 | test avg. loss: 4.010
Epochs: 15910 | epoch avg. loss: 0.033 | test avg. loss: 4.272
Epochs: 15911 | epoch avg. loss: 0.064 | test avg. loss: 4.026
Epochs: 15912 | epoch avg. loss: 0.059 | test avg. loss: 4.067


 32%|███▏      | 15917/50000 [23:47<46:25, 12.24it/s]

Epochs: 15913 | epoch avg. loss: 0.043 | test avg. loss: 4.211
Epochs: 15914 | epoch avg. loss: 0.045 | test avg. loss: 4.076
Epochs: 15915 | epoch avg. loss: 0.032 | test avg. loss: 4.161
Epochs: 15916 | epoch avg. loss: 0.019 | test avg. loss: 4.137


 32%|███▏      | 15919/50000 [23:48<47:53, 11.86it/s]

Epochs: 15917 | epoch avg. loss: 0.017 | test avg. loss: 4.144
Epochs: 15918 | epoch avg. loss: 0.021 | test avg. loss: 4.105
Epochs: 15919 | epoch avg. loss: 0.017 | test avg. loss: 4.144


 32%|███▏      | 15923/50000 [23:48<48:27, 11.72it/s]

Epochs: 15920 | epoch avg. loss: 0.028 | test avg. loss: 4.240
Epochs: 15921 | epoch avg. loss: 0.036 | test avg. loss: 4.093
Epochs: 15922 | epoch avg. loss: 0.159 | test avg. loss: 4.281


 32%|███▏      | 15925/50000 [23:48<48:36, 11.68it/s]

Epochs: 15923 | epoch avg. loss: 0.043 | test avg. loss: 4.168
Epochs: 15924 | epoch avg. loss: 0.074 | test avg. loss: 4.264
Epochs: 15925 | epoch avg. loss: 0.079 | test avg. loss: 4.430


 32%|███▏      | 15929/50000 [23:48<47:06, 12.05it/s]

Epochs: 15926 | epoch avg. loss: 0.109 | test avg. loss: 4.272
Epochs: 15927 | epoch avg. loss: 0.512 | test avg. loss: 4.261
Epochs: 15928 | epoch avg. loss: 0.197 | test avg. loss: 4.288


 32%|███▏      | 15931/50000 [23:49<49:49, 11.40it/s]

Epochs: 15929 | epoch avg. loss: 0.129 | test avg. loss: 4.073
Epochs: 15930 | epoch avg. loss: 0.245 | test avg. loss: 4.317
Epochs: 15931 | epoch avg. loss: 0.168 | test avg. loss: 4.222


 32%|███▏      | 15935/50000 [23:49<46:43, 12.15it/s]

Epochs: 15932 | epoch avg. loss: 0.064 | test avg. loss: 4.024
Epochs: 15933 | epoch avg. loss: 0.095 | test avg. loss: 4.241
Epochs: 15934 | epoch avg. loss: 0.111 | test avg. loss: 4.233


 32%|███▏      | 15937/50000 [23:49<45:59, 12.35it/s]

Epochs: 15935 | epoch avg. loss: 0.049 | test avg. loss: 4.195
Epochs: 15936 | epoch avg. loss: 0.081 | test avg. loss: 4.404
Epochs: 15937 | epoch avg. loss: 0.057 | test avg. loss: 4.233


 32%|███▏      | 15941/50000 [23:49<42:25, 13.38it/s]

Epochs: 15938 | epoch avg. loss: 0.066 | test avg. loss: 4.096
Epochs: 15939 | epoch avg. loss: 0.056 | test avg. loss: 4.230
Epochs: 15940 | epoch avg. loss: 0.070 | test avg. loss: 3.943


 32%|███▏      | 15943/50000 [23:49<43:21, 13.09it/s]

Epochs: 15941 | epoch avg. loss: 0.058 | test avg. loss: 3.982
Epochs: 15942 | epoch avg. loss: 0.028 | test avg. loss: 4.144
Epochs: 15943 | epoch avg. loss: 0.022 | test avg. loss: 4.199


 32%|███▏      | 15947/50000 [23:50<45:59, 12.34it/s]

Epochs: 15944 | epoch avg. loss: 0.020 | test avg. loss: 4.198
Epochs: 15945 | epoch avg. loss: 0.019 | test avg. loss: 4.165
Epochs: 15946 | epoch avg. loss: 0.017 | test avg. loss: 4.096


 32%|███▏      | 15949/50000 [23:50<46:25, 12.23it/s]

Epochs: 15947 | epoch avg. loss: 0.022 | test avg. loss: 3.999
Epochs: 15948 | epoch avg. loss: 0.044 | test avg. loss: 4.250
Epochs: 15949 | epoch avg. loss: 0.054 | test avg. loss: 4.189


 32%|███▏      | 15953/50000 [23:50<44:18, 12.81it/s]

Epochs: 15950 | epoch avg. loss: 0.027 | test avg. loss: 4.052
Epochs: 15951 | epoch avg. loss: 0.041 | test avg. loss: 4.307
Epochs: 15952 | epoch avg. loss: 0.054 | test avg. loss: 4.095


 32%|███▏      | 15955/50000 [23:50<44:22, 12.79it/s]

Epochs: 15953 | epoch avg. loss: 0.069 | test avg. loss: 4.164
Epochs: 15954 | epoch avg. loss: 0.039 | test avg. loss: 4.275
Epochs: 15955 | epoch avg. loss: 0.044 | test avg. loss: 4.104


 32%|███▏      | 15959/50000 [23:51<43:15, 13.11it/s]

Epochs: 15956 | epoch avg. loss: 0.032 | test avg. loss: 4.206
Epochs: 15957 | epoch avg. loss: 0.021 | test avg. loss: 4.060
Epochs: 15958 | epoch avg. loss: 0.030 | test avg. loss: 4.241


 32%|███▏      | 15963/50000 [23:51<39:52, 14.23it/s]

Epochs: 15959 | epoch avg. loss: 0.090 | test avg. loss: 4.135
Epochs: 15960 | epoch avg. loss: 0.048 | test avg. loss: 4.093
Epochs: 15961 | epoch avg. loss: 0.094 | test avg. loss: 4.364
Epochs: 15962 | epoch avg. loss: 0.048 | test avg. loss: 4.140


 32%|███▏      | 15967/50000 [23:51<37:56, 14.95it/s]

Epochs: 15963 | epoch avg. loss: 0.068 | test avg. loss: 4.232
Epochs: 15964 | epoch avg. loss: 0.054 | test avg. loss: 4.136
Epochs: 15965 | epoch avg. loss: 0.038 | test avg. loss: 3.929
Epochs: 15966 | epoch avg. loss: 0.045 | test avg. loss: 4.049


 32%|███▏      | 15969/50000 [23:51<40:52, 13.87it/s]

Epochs: 15967 | epoch avg. loss: 0.022 | test avg. loss: 4.179
Epochs: 15968 | epoch avg. loss: 0.026 | test avg. loss: 4.284
Epochs: 15969 | epoch avg. loss: 0.025 | test avg. loss: 4.213


 32%|███▏      | 15973/50000 [23:52<46:40, 12.15it/s]

Epochs: 15970 | epoch avg. loss: 0.071 | test avg. loss: 4.250
Epochs: 15971 | epoch avg. loss: 0.021 | test avg. loss: 4.196
Epochs: 15972 | epoch avg. loss: 0.019 | test avg. loss: 4.122


 32%|███▏      | 15977/50000 [23:52<42:19, 13.40it/s]

Epochs: 15973 | epoch avg. loss: 0.023 | test avg. loss: 4.207
Epochs: 15974 | epoch avg. loss: 0.025 | test avg. loss: 4.181
Epochs: 15975 | epoch avg. loss: 0.023 | test avg. loss: 4.170
Epochs: 15976 | epoch avg. loss: 0.033 | test avg. loss: 4.445


 32%|███▏      | 15981/50000 [23:52<39:56, 14.20it/s]

Epochs: 15977 | epoch avg. loss: 0.066 | test avg. loss: 4.155
Epochs: 15978 | epoch avg. loss: 0.074 | test avg. loss: 4.099
Epochs: 15979 | epoch avg. loss: 0.034 | test avg. loss: 4.173
Epochs: 15980 | epoch avg. loss: 0.030 | test avg. loss: 4.111


 32%|███▏      | 15983/50000 [23:53<43:17, 13.10it/s]

Epochs: 15981 | epoch avg. loss: 0.019 | test avg. loss: 4.120
Epochs: 15982 | epoch avg. loss: 0.019 | test avg. loss: 4.293
Epochs: 15983 | epoch avg. loss: 0.033 | test avg. loss: 4.111


 32%|███▏      | 15987/50000 [23:53<44:07, 12.85it/s]

Epochs: 15984 | epoch avg. loss: 0.031 | test avg. loss: 4.140
Epochs: 15985 | epoch avg. loss: 0.015 | test avg. loss: 4.133
Epochs: 15986 | epoch avg. loss: 0.020 | test avg. loss: 4.121


 32%|███▏      | 15991/50000 [23:53<40:49, 13.89it/s]

Epochs: 15987 | epoch avg. loss: 0.019 | test avg. loss: 4.272
Epochs: 15988 | epoch avg. loss: 0.054 | test avg. loss: 4.161
Epochs: 15989 | epoch avg. loss: 0.022 | test avg. loss: 4.113
Epochs: 15990 | epoch avg. loss: 0.032 | test avg. loss: 4.176


 32%|███▏      | 15995/50000 [23:53<38:31, 14.71it/s]

Epochs: 15991 | epoch avg. loss: 0.022 | test avg. loss: 4.290
Epochs: 15992 | epoch avg. loss: 0.047 | test avg. loss: 4.145
Epochs: 15993 | epoch avg. loss: 0.045 | test avg. loss: 4.116
Epochs: 15994 | epoch avg. loss: 0.039 | test avg. loss: 4.347


 32%|███▏      | 15997/50000 [23:54<41:30, 13.66it/s]

Epochs: 15995 | epoch avg. loss: 0.061 | test avg. loss: 4.085
Epochs: 15996 | epoch avg. loss: 0.035 | test avg. loss: 4.107
Epochs: 15997 | epoch avg. loss: 0.015 | test avg. loss: 4.093
Epochs: 15998 | epoch avg. loss: 0.035 | test avg. loss: 4.162


 32%|███▏      | 15999/50000 [23:54<39:45, 14.26it/s]

Epochs: 15999 | epoch avg. loss: 0.023 | test avg. loss: 4.354


 32%|███▏      | 16003/50000 [23:55<1:55:39,  4.90it/s]

Epochs: 16000 | epoch avg. loss: 0.045 | test avg. loss: 4.140
Epochs: 16001 | epoch avg. loss: 0.055 | test avg. loss: 4.222
Epochs: 16002 | epoch avg. loss: 0.060 | test avg. loss: 4.135
Epochs: 16003 | epoch avg. loss: 0.057 | test avg. loss: 4.075


 32%|███▏      | 16007/50000 [23:55<1:16:42,  7.39it/s]

Epochs: 16004 | epoch avg. loss: 0.193 | test avg. loss: 4.425
Epochs: 16005 | epoch avg. loss: 0.163 | test avg. loss: 4.328
Epochs: 16006 | epoch avg. loss: 0.051 | test avg. loss: 4.172


 32%|███▏      | 16011/50000 [23:56<54:53, 10.32it/s]  

Epochs: 16007 | epoch avg. loss: 0.061 | test avg. loss: 4.358
Epochs: 16008 | epoch avg. loss: 0.072 | test avg. loss: 4.074
Epochs: 16009 | epoch avg. loss: 0.038 | test avg. loss: 4.124
Epochs: 16010 | epoch avg. loss: 0.024 | test avg. loss: 4.212




Epochs: 16011 | epoch avg. loss: 0.024 | test avg. loss: 4.094
Epochs: 16012 | epoch avg. loss: 0.056 | test avg. loss: 4.201
Epochs: 16013 | epoch avg. loss: 0.049 | test avg. loss: 4.138


 32%|███▏      | 16017/50000 [23:56<43:53, 12.90it/s]

Epochs: 16014 | epoch avg. loss: 0.022 | test avg. loss: 4.097
Epochs: 16015 | epoch avg. loss: 0.021 | test avg. loss: 4.138
Epochs: 16016 | epoch avg. loss: 0.022 | test avg. loss: 4.100
Epochs: 16017 | epoch avg. loss: 0.016 | test avg. loss: 4.238


 32%|███▏      | 16021/50000 [23:56<40:26, 14.00it/s]

Epochs: 16018 | epoch avg. loss: 0.034 | test avg. loss: 4.184
Epochs: 16019 | epoch avg. loss: 0.021 | test avg. loss: 4.084
Epochs: 16020 | epoch avg. loss: 0.036 | test avg. loss: 4.290


 32%|███▏      | 16023/50000 [23:57<40:47, 13.89it/s]

Epochs: 16021 | epoch avg. loss: 0.074 | test avg. loss: 4.279
Epochs: 16022 | epoch avg. loss: 0.030 | test avg. loss: 4.184
Epochs: 16023 | epoch avg. loss: 0.055 | test avg. loss: 4.288
Epochs: 16024 | epoch avg. loss: 0.043 | test avg. loss: 4.177


 32%|███▏      | 16029/50000 [23:57<37:34, 15.07it/s]

Epochs: 16025 | epoch avg. loss: 0.031 | test avg. loss: 4.055
Epochs: 16026 | epoch avg. loss: 0.047 | test avg. loss: 4.395
Epochs: 16027 | epoch avg. loss: 0.142 | test avg. loss: 4.234
Epochs: 16028 | epoch avg. loss: 0.092 | test avg. loss: 4.148


 32%|███▏      | 16031/50000 [23:57<38:36, 14.66it/s]

Epochs: 16029 | epoch avg. loss: 0.064 | test avg. loss: 4.420
Epochs: 16030 | epoch avg. loss: 0.153 | test avg. loss: 4.004
Epochs: 16031 | epoch avg. loss: 0.136 | test avg. loss: 4.141




Epochs: 16032 | epoch avg. loss: 0.062 | test avg. loss: 4.619
Epochs: 16033 | epoch avg. loss: 0.123 | test avg. loss: 4.118
Epochs: 16034 | epoch avg. loss: 0.089 | test avg. loss: 4.196


 32%|███▏      | 16037/50000 [23:58<40:01, 14.14it/s]

Epochs: 16035 | epoch avg. loss: 0.029 | test avg. loss: 4.245
Epochs: 16036 | epoch avg. loss: 0.029 | test avg. loss: 4.154
Epochs: 16037 | epoch avg. loss: 0.070 | test avg. loss: 4.193


 32%|███▏      | 16041/50000 [23:58<39:52, 14.20it/s]

Epochs: 16038 | epoch avg. loss: 0.025 | test avg. loss: 4.352
Epochs: 16039 | epoch avg. loss: 0.049 | test avg. loss: 4.197
Epochs: 16040 | epoch avg. loss: 0.041 | test avg. loss: 4.117
Epochs: 16041 | epoch avg. loss: 0.042 | test avg. loss: 4.200


 32%|███▏      | 16045/50000 [23:58<39:29, 14.33it/s]

Epochs: 16042 | epoch avg. loss: 0.044 | test avg. loss: 4.133
Epochs: 16043 | epoch avg. loss: 0.024 | test avg. loss: 4.079
Epochs: 16044 | epoch avg. loss: 0.025 | test avg. loss: 4.145


 32%|███▏      | 16047/50000 [23:58<42:55, 13.18it/s]

Epochs: 16045 | epoch avg. loss: 0.023 | test avg. loss: 4.344
Epochs: 16046 | epoch avg. loss: 0.042 | test avg. loss: 4.273
Epochs: 16047 | epoch avg. loss: 0.022 | test avg. loss: 4.123


 32%|███▏      | 16051/50000 [23:58<43:19, 13.06it/s]

Epochs: 16048 | epoch avg. loss: 0.052 | test avg. loss: 4.103
Epochs: 16049 | epoch avg. loss: 0.019 | test avg. loss: 4.193
Epochs: 16050 | epoch avg. loss: 0.022 | test avg. loss: 4.113


 32%|███▏      | 16055/50000 [23:59<39:54, 14.18it/s]

Epochs: 16051 | epoch avg. loss: 0.058 | test avg. loss: 4.267
Epochs: 16052 | epoch avg. loss: 0.041 | test avg. loss: 4.488
Epochs: 16053 | epoch avg. loss: 0.061 | test avg. loss: 4.206
Epochs: 16054 | epoch avg. loss: 0.037 | test avg. loss: 4.203


 32%|███▏      | 16059/50000 [23:59<37:48, 14.96it/s]

Epochs: 16055 | epoch avg. loss: 0.022 | test avg. loss: 4.272
Epochs: 16056 | epoch avg. loss: 0.030 | test avg. loss: 4.165
Epochs: 16057 | epoch avg. loss: 0.052 | test avg. loss: 4.205
Epochs: 16058 | epoch avg. loss: 0.054 | test avg. loss: 4.336


 32%|███▏      | 16061/50000 [23:59<38:20, 14.75it/s]

Epochs: 16059 | epoch avg. loss: 0.026 | test avg. loss: 4.289
Epochs: 16060 | epoch avg. loss: 0.029 | test avg. loss: 4.144
Epochs: 16061 | epoch avg. loss: 0.040 | test avg. loss: 4.246


 32%|███▏      | 16065/50000 [23:59<42:09, 13.42it/s]

Epochs: 16062 | epoch avg. loss: 0.047 | test avg. loss: 4.118
Epochs: 16063 | epoch avg. loss: 0.055 | test avg. loss: 4.207
Epochs: 16064 | epoch avg. loss: 0.106 | test avg. loss: 4.612


 32%|███▏      | 16069/50000 [24:00<40:44, 13.88it/s]

Epochs: 16065 | epoch avg. loss: 0.148 | test avg. loss: 4.462
Epochs: 16066 | epoch avg. loss: 0.055 | test avg. loss: 4.174
Epochs: 16067 | epoch avg. loss: 0.155 | test avg. loss: 4.213
Epochs: 16068 | epoch avg. loss: 0.091 | test avg. loss: 4.199


 32%|███▏      | 16073/50000 [24:00<38:24, 14.72it/s]

Epochs: 16069 | epoch avg. loss: 0.054 | test avg. loss: 4.337
Epochs: 16070 | epoch avg. loss: 0.124 | test avg. loss: 4.540
Epochs: 16071 | epoch avg. loss: 0.142 | test avg. loss: 4.571
Epochs: 16072 | epoch avg. loss: 0.142 | test avg. loss: 4.043


 32%|███▏      | 16075/50000 [24:00<37:55, 14.91it/s]

Epochs: 16073 | epoch avg. loss: 0.451 | test avg. loss: 4.061
Epochs: 16074 | epoch avg. loss: 0.212 | test avg. loss: 4.615
Epochs: 16075 | epoch avg. loss: 0.225 | test avg. loss: 4.345


 32%|███▏      | 16079/50000 [24:00<43:55, 12.87it/s]

Epochs: 16076 | epoch avg. loss: 0.204 | test avg. loss: 4.548
Epochs: 16077 | epoch avg. loss: 0.139 | test avg. loss: 4.449
Epochs: 16078 | epoch avg. loss: 0.081 | test avg. loss: 4.026


 32%|███▏      | 16081/50000 [24:01<41:26, 13.64it/s]

Epochs: 16079 | epoch avg. loss: 0.127 | test avg. loss: 4.016
Epochs: 16080 | epoch avg. loss: 0.080 | test avg. loss: 4.431
Epochs: 16081 | epoch avg. loss: 0.134 | test avg. loss: 4.133


 32%|███▏      | 16085/50000 [24:01<41:06, 13.75it/s]

Epochs: 16082 | epoch avg. loss: 0.145 | test avg. loss: 4.284
Epochs: 16083 | epoch avg. loss: 0.068 | test avg. loss: 4.511
Epochs: 16084 | epoch avg. loss: 0.116 | test avg. loss: 4.110


 32%|███▏      | 16087/50000 [24:01<41:37, 13.58it/s]

Epochs: 16085 | epoch avg. loss: 0.121 | test avg. loss: 4.013
Epochs: 16086 | epoch avg. loss: 0.099 | test avg. loss: 4.527
Epochs: 16087 | epoch avg. loss: 0.276 | test avg. loss: 4.132


 32%|███▏      | 16091/50000 [24:01<44:11, 12.79it/s]

Epochs: 16088 | epoch avg. loss: 0.066 | test avg. loss: 4.069
Epochs: 16089 | epoch avg. loss: 0.090 | test avg. loss: 4.491
Epochs: 16090 | epoch avg. loss: 0.106 | test avg. loss: 4.348


 32%|███▏      | 16093/50000 [24:02<48:00, 11.77it/s]

Epochs: 16091 | epoch avg. loss: 0.091 | test avg. loss: 4.356
Epochs: 16092 | epoch avg. loss: 0.202 | test avg. loss: 4.640
Epochs: 16093 | epoch avg. loss: 0.134 | test avg. loss: 4.279


 32%|███▏      | 16097/50000 [24:02<48:55, 11.55it/s]

Epochs: 16094 | epoch avg. loss: 0.082 | test avg. loss: 4.169
Epochs: 16095 | epoch avg. loss: 0.117 | test avg. loss: 4.331
Epochs: 16096 | epoch avg. loss: 0.080 | test avg. loss: 4.237


 32%|███▏      | 16099/50000 [24:02<50:15, 11.24it/s]

Epochs: 16097 | epoch avg. loss: 0.041 | test avg. loss: 4.256
Epochs: 16098 | epoch avg. loss: 0.048 | test avg. loss: 4.770
Epochs: 16099 | epoch avg. loss: 0.214 | test avg. loss: 4.321


 32%|███▏      | 16103/50000 [24:04<2:21:37,  3.99it/s]

Epochs: 16100 | epoch avg. loss: 0.143 | test avg. loss: 4.175
Epochs: 16101 | epoch avg. loss: 0.078 | test avg. loss: 4.423
Epochs: 16102 | epoch avg. loss: 0.144 | test avg. loss: 3.985


 32%|███▏      | 16105/50000 [24:04<1:50:41,  5.10it/s]

Epochs: 16103 | epoch avg. loss: 0.137 | test avg. loss: 4.074
Epochs: 16104 | epoch avg. loss: 0.059 | test avg. loss: 4.432
Epochs: 16105 | epoch avg. loss: 0.074 | test avg. loss: 4.245


 32%|███▏      | 16109/50000 [24:04<1:18:09,  7.23it/s]

Epochs: 16106 | epoch avg. loss: 0.080 | test avg. loss: 4.263
Epochs: 16107 | epoch avg. loss: 0.080 | test avg. loss: 4.519
Epochs: 16108 | epoch avg. loss: 0.123 | test avg. loss: 4.056


 32%|███▏      | 16111/50000 [24:05<1:06:42,  8.47it/s]

Epochs: 16109 | epoch avg. loss: 0.121 | test avg. loss: 4.135
Epochs: 16110 | epoch avg. loss: 0.055 | test avg. loss: 4.637
Epochs: 16111 | epoch avg. loss: 0.150 | test avg. loss: 4.189


 32%|███▏      | 16115/50000 [24:05<58:50,  9.60it/s]  

Epochs: 16112 | epoch avg. loss: 0.079 | test avg. loss: 4.376
Epochs: 16113 | epoch avg. loss: 0.102 | test avg. loss: 4.226
Epochs: 16114 | epoch avg. loss: 0.075 | test avg. loss: 3.941


 32%|███▏      | 16117/50000 [24:05<52:56, 10.67it/s]

Epochs: 16115 | epoch avg. loss: 0.133 | test avg. loss: 4.191
Epochs: 16116 | epoch avg. loss: 0.033 | test avg. loss: 4.319
Epochs: 16117 | epoch avg. loss: 0.034 | test avg. loss: 4.364


 32%|███▏      | 16121/50000 [24:05<47:23, 11.92it/s]

Epochs: 16118 | epoch avg. loss: 0.029 | test avg. loss: 4.437
Epochs: 16119 | epoch avg. loss: 0.063 | test avg. loss: 4.266
Epochs: 16120 | epoch avg. loss: 0.034 | test avg. loss: 4.095


 32%|███▏      | 16123/50000 [24:06<45:20, 12.45it/s]

Epochs: 16121 | epoch avg. loss: 0.062 | test avg. loss: 4.172
Epochs: 16122 | epoch avg. loss: 0.021 | test avg. loss: 4.313
Epochs: 16123 | epoch avg. loss: 0.021 | test avg. loss: 4.284


 32%|███▏      | 16127/50000 [24:06<41:25, 13.63it/s]

Epochs: 16124 | epoch avg. loss: 0.034 | test avg. loss: 4.305
Epochs: 16125 | epoch avg. loss: 0.018 | test avg. loss: 4.300
Epochs: 16126 | epoch avg. loss: 0.018 | test avg. loss: 4.236
Epochs: 16127 | epoch avg. loss: 0.017 | test avg. loss: 4.228


 32%|███▏      | 16131/50000 [24:06<40:03, 14.09it/s]

Epochs: 16128 | epoch avg. loss: 0.015 | test avg. loss: 4.300
Epochs: 16129 | epoch avg. loss: 0.017 | test avg. loss: 4.242
Epochs: 16130 | epoch avg. loss: 0.014 | test avg. loss: 4.372


 32%|███▏      | 16133/50000 [24:06<38:03, 14.83it/s]

Epochs: 16131 | epoch avg. loss: 0.044 | test avg. loss: 4.228
Epochs: 16132 | epoch avg. loss: 0.043 | test avg. loss: 4.163
Epochs: 16133 | epoch avg. loss: 0.049 | test avg. loss: 4.395


 32%|███▏      | 16137/50000 [24:07<40:57, 13.78it/s]

Epochs: 16134 | epoch avg. loss: 0.094 | test avg. loss: 4.318
Epochs: 16135 | epoch avg. loss: 0.041 | test avg. loss: 4.233
Epochs: 16136 | epoch avg. loss: 0.156 | test avg. loss: 4.503


 32%|███▏      | 16139/50000 [24:07<42:19, 13.33it/s]

Epochs: 16137 | epoch avg. loss: 0.179 | test avg. loss: 4.790
Epochs: 16138 | epoch avg. loss: 0.151 | test avg. loss: 4.417
Epochs: 16139 | epoch avg. loss: 0.325 | test avg. loss: 4.411


 32%|███▏      | 16143/50000 [24:07<41:54, 13.47it/s]

Epochs: 16140 | epoch avg. loss: 0.110 | test avg. loss: 4.364
Epochs: 16141 | epoch avg. loss: 0.099 | test avg. loss: 4.412
Epochs: 16142 | epoch avg. loss: 0.479 | test avg. loss: 4.405


 32%|███▏      | 16145/50000 [24:07<41:50, 13.48it/s]

Epochs: 16143 | epoch avg. loss: 0.285 | test avg. loss: 4.887
Epochs: 16144 | epoch avg. loss: 0.312 | test avg. loss: 4.209
Epochs: 16145 | epoch avg. loss: 0.578 | test avg. loss: 4.259


 32%|███▏      | 16149/50000 [24:07<43:24, 13.00it/s]

Epochs: 16146 | epoch avg. loss: 0.124 | test avg. loss: 5.138
Epochs: 16147 | epoch avg. loss: 0.281 | test avg. loss: 4.276
Epochs: 16148 | epoch avg. loss: 0.165 | test avg. loss: 4.141


 32%|███▏      | 16151/50000 [24:08<43:55, 12.84it/s]

Epochs: 16149 | epoch avg. loss: 0.064 | test avg. loss: 4.251
Epochs: 16150 | epoch avg. loss: 0.051 | test avg. loss: 4.233
Epochs: 16151 | epoch avg. loss: 0.037 | test avg. loss: 4.362


 32%|███▏      | 16155/50000 [24:08<45:17, 12.45it/s]

Epochs: 16152 | epoch avg. loss: 0.038 | test avg. loss: 4.538
Epochs: 16153 | epoch avg. loss: 0.033 | test avg. loss: 4.361
Epochs: 16154 | epoch avg. loss: 0.027 | test avg. loss: 4.352


 32%|███▏      | 16157/50000 [24:08<46:02, 12.25it/s]

Epochs: 16155 | epoch avg. loss: 0.040 | test avg. loss: 4.070
Epochs: 16156 | epoch avg. loss: 0.051 | test avg. loss: 4.026
Epochs: 16157 | epoch avg. loss: 0.084 | test avg. loss: 4.240


 32%|███▏      | 16161/50000 [24:08<45:12, 12.48it/s]

Epochs: 16158 | epoch avg. loss: 0.035 | test avg. loss: 4.332
Epochs: 16159 | epoch avg. loss: 0.030 | test avg. loss: 4.260
Epochs: 16160 | epoch avg. loss: 0.052 | test avg. loss: 4.280


 32%|███▏      | 16165/50000 [24:09<40:40, 13.86it/s]

Epochs: 16161 | epoch avg. loss: 0.030 | test avg. loss: 4.191
Epochs: 16162 | epoch avg. loss: 0.029 | test avg. loss: 4.016
Epochs: 16163 | epoch avg. loss: 0.096 | test avg. loss: 4.169
Epochs: 16164 | epoch avg. loss: 0.035 | test avg. loss: 4.477


 32%|███▏      | 16167/50000 [24:09<41:10, 13.69it/s]

Epochs: 16165 | epoch avg. loss: 0.078 | test avg. loss: 4.247
Epochs: 16166 | epoch avg. loss: 0.023 | test avg. loss: 4.293
Epochs: 16167 | epoch avg. loss: 0.019 | test avg. loss: 4.197


 32%|███▏      | 16171/50000 [24:09<39:10, 14.39it/s]

Epochs: 16168 | epoch avg. loss: 0.036 | test avg. loss: 4.136
Epochs: 16169 | epoch avg. loss: 0.088 | test avg. loss: 4.266
Epochs: 16170 | epoch avg. loss: 0.042 | test avg. loss: 4.314
Epochs: 16171 | epoch avg. loss: 0.029 | test avg. loss: 4.224




Epochs: 16172 | epoch avg. loss: 0.027 | test avg. loss: 4.459
Epochs: 16173 | epoch avg. loss: 0.075 | test avg. loss: 4.285
Epochs: 16174 | epoch avg. loss: 0.029 | test avg. loss: 4.127


 32%|███▏      | 16179/50000 [24:10<37:37, 14.98it/s]

Epochs: 16175 | epoch avg. loss: 0.067 | test avg. loss: 4.226
Epochs: 16176 | epoch avg. loss: 0.020 | test avg. loss: 4.382
Epochs: 16177 | epoch avg. loss: 0.027 | test avg. loss: 4.347
Epochs: 16178 | epoch avg. loss: 0.022 | test avg. loss: 4.224


 32%|███▏      | 16181/50000 [24:10<38:56, 14.47it/s]

Epochs: 16179 | epoch avg. loss: 0.043 | test avg. loss: 4.213
Epochs: 16180 | epoch avg. loss: 0.016 | test avg. loss: 4.261
Epochs: 16181 | epoch avg. loss: 0.019 | test avg. loss: 4.155


 32%|███▏      | 16185/50000 [24:10<41:03, 13.73it/s]

Epochs: 16182 | epoch avg. loss: 0.047 | test avg. loss: 4.187
Epochs: 16183 | epoch avg. loss: 0.038 | test avg. loss: 4.304
Epochs: 16184 | epoch avg. loss: 0.016 | test avg. loss: 4.352


 32%|███▏      | 16187/50000 [24:10<41:23, 13.62it/s]

Epochs: 16185 | epoch avg. loss: 0.018 | test avg. loss: 4.217
Epochs: 16186 | epoch avg. loss: 0.051 | test avg. loss: 4.170
Epochs: 16187 | epoch avg. loss: 0.045 | test avg. loss: 4.191


 32%|███▏      | 16191/50000 [24:11<44:53, 12.55it/s]

Epochs: 16188 | epoch avg. loss: 0.016 | test avg. loss: 4.157
Epochs: 16189 | epoch avg. loss: 0.014 | test avg. loss: 4.251
Epochs: 16190 | epoch avg. loss: 0.020 | test avg. loss: 4.200


 32%|███▏      | 16193/50000 [24:11<45:10, 12.47it/s]

Epochs: 16191 | epoch avg. loss: 0.018 | test avg. loss: 4.256
Epochs: 16192 | epoch avg. loss: 0.015 | test avg. loss: 4.285
Epochs: 16193 | epoch avg. loss: 0.021 | test avg. loss: 4.204


 32%|███▏      | 16197/50000 [24:11<42:24, 13.28it/s]

Epochs: 16194 | epoch avg. loss: 0.160 | test avg. loss: 4.237
Epochs: 16195 | epoch avg. loss: 0.042 | test avg. loss: 4.594
Epochs: 16196 | epoch avg. loss: 0.163 | test avg. loss: 4.281


 32%|███▏      | 16199/50000 [24:11<41:13, 13.66it/s]

Epochs: 16197 | epoch avg. loss: 0.126 | test avg. loss: 4.347
Epochs: 16198 | epoch avg. loss: 0.132 | test avg. loss: 4.917
Epochs: 16199 | epoch avg. loss: 0.322 | test avg. loss: 4.515




Epochs: 16200 | epoch avg. loss: 0.118 | test avg. loss: 4.070
Epochs: 16201 | epoch avg. loss: 0.275 | test avg. loss: 4.113
Epochs: 16202 | epoch avg. loss: 0.109 | test avg. loss: 5.101


 32%|███▏      | 16205/50000 [24:13<1:36:56,  5.81it/s]

Epochs: 16203 | epoch avg. loss: 0.434 | test avg. loss: 4.453
Epochs: 16204 | epoch avg. loss: 0.211 | test avg. loss: 4.608
Epochs: 16205 | epoch avg. loss: 0.287 | test avg. loss: 6.488
Epochs: 16206 | epoch avg. loss: 1.218 | test avg. loss: 4.226


 32%|███▏      | 16211/50000 [24:13<58:22,  9.65it/s]  

Epochs: 16207 | epoch avg. loss: 0.434 | test avg. loss: 4.177
Epochs: 16208 | epoch avg. loss: 0.373 | test avg. loss: 5.119
Epochs: 16209 | epoch avg. loss: 0.559 | test avg. loss: 4.457
Epochs: 16210 | epoch avg. loss: 0.697 | test avg. loss: 4.719


 32%|███▏      | 16213/50000 [24:14<55:21, 10.17it/s]

Epochs: 16211 | epoch avg. loss: 0.346 | test avg. loss: 7.081
Epochs: 16212 | epoch avg. loss: 1.301 | test avg. loss: 4.579
Epochs: 16213 | epoch avg. loss: 0.709 | test avg. loss: 4.507


 32%|███▏      | 16217/50000 [24:14<47:39, 11.81it/s]

Epochs: 16214 | epoch avg. loss: 0.534 | test avg. loss: 5.404
Epochs: 16215 | epoch avg. loss: 0.749 | test avg. loss: 4.032
Epochs: 16216 | epoch avg. loss: 0.281 | test avg. loss: 4.206


 32%|███▏      | 16219/50000 [24:14<45:52, 12.27it/s]

Epochs: 16217 | epoch avg. loss: 0.254 | test avg. loss: 4.781
Epochs: 16218 | epoch avg. loss: 0.248 | test avg. loss: 4.422
Epochs: 16219 | epoch avg. loss: 0.448 | test avg. loss: 4.417


 32%|███▏      | 16223/50000 [24:14<44:14, 12.73it/s]

Epochs: 16220 | epoch avg. loss: 0.350 | test avg. loss: 4.528
Epochs: 16221 | epoch avg. loss: 0.259 | test avg. loss: 3.937
Epochs: 16222 | epoch avg. loss: 0.135 | test avg. loss: 3.953


 32%|███▏      | 16225/50000 [24:15<44:32, 12.64it/s]

Epochs: 16223 | epoch avg. loss: 0.104 | test avg. loss: 4.590
Epochs: 16224 | epoch avg. loss: 0.149 | test avg. loss: 4.364
Epochs: 16225 | epoch avg. loss: 0.163 | test avg. loss: 4.442


 32%|███▏      | 16229/50000 [24:15<42:16, 13.31it/s]

Epochs: 16226 | epoch avg. loss: 0.106 | test avg. loss: 4.971
Epochs: 16227 | epoch avg. loss: 0.260 | test avg. loss: 4.258
Epochs: 16228 | epoch avg. loss: 0.117 | test avg. loss: 4.210


                                                     

Epochs: 16229 | epoch avg. loss: 0.063 | test avg. loss: 4.446
Epochs: 16230 | epoch avg. loss: 0.087 | test avg. loss: 4.281
Epochs: 16231 | epoch avg. loss: 0.031 | test avg. loss: 4.229


 32%|███▏      | 16235/50000 [24:15<39:20, 14.30it/s]

Epochs: 16232 | epoch avg. loss: 0.050 | test avg. loss: 4.417
Epochs: 16233 | epoch avg. loss: 0.043 | test avg. loss: 4.199
Epochs: 16234 | epoch avg. loss: 0.070 | test avg. loss: 4.185
Epochs: 16235 | epoch avg. loss: 0.036 | test avg. loss: 4.419


 32%|███▏      | 16239/50000 [24:15<40:13, 13.99it/s]

Epochs: 16236 | epoch avg. loss: 0.103 | test avg. loss: 4.147
Epochs: 16237 | epoch avg. loss: 0.021 | test avg. loss: 4.029
Epochs: 16238 | epoch avg. loss: 0.025 | test avg. loss: 4.198




Epochs: 16239 | epoch avg. loss: 0.041 | test avg. loss: 4.145
Epochs: 16240 | epoch avg. loss: 0.022 | test avg. loss: 4.242
Epochs: 16241 | epoch avg. loss: 0.020 | test avg. loss: 4.295


 32%|███▏      | 16245/50000 [24:16<38:36, 14.57it/s]

Epochs: 16242 | epoch avg. loss: 0.019 | test avg. loss: 4.190
Epochs: 16243 | epoch avg. loss: 0.029 | test avg. loss: 4.210
Epochs: 16244 | epoch avg. loss: 0.015 | test avg. loss: 4.133
Epochs: 16245 | epoch avg. loss: 0.020 | test avg. loss: 4.233


 32%|███▏      | 16249/50000 [24:16<37:10, 15.13it/s]

Epochs: 16246 | epoch avg. loss: 0.030 | test avg. loss: 4.273
Epochs: 16247 | epoch avg. loss: 0.022 | test avg. loss: 4.170
Epochs: 16248 | epoch avg. loss: 0.042 | test avg. loss: 4.230
Epochs: 16249 | epoch avg. loss: 0.015 | test avg. loss: 4.211


 33%|███▎      | 16253/50000 [24:16<37:51, 14.86it/s]

Epochs: 16250 | epoch avg. loss: 0.017 | test avg. loss: 4.315
Epochs: 16251 | epoch avg. loss: 0.049 | test avg. loss: 4.150
Epochs: 16252 | epoch avg. loss: 0.015 | test avg. loss: 4.165


 33%|███▎      | 16255/50000 [24:17<41:24, 13.58it/s]

Epochs: 16253 | epoch avg. loss: 0.018 | test avg. loss: 4.183
Epochs: 16254 | epoch avg. loss: 0.017 | test avg. loss: 4.170
Epochs: 16255 | epoch avg. loss: 0.019 | test avg. loss: 4.385


 33%|███▎      | 16259/50000 [24:17<42:03, 13.37it/s]

Epochs: 16256 | epoch avg. loss: 0.055 | test avg. loss: 4.178
Epochs: 16257 | epoch avg. loss: 0.029 | test avg. loss: 4.152
Epochs: 16258 | epoch avg. loss: 0.022 | test avg. loss: 4.218


 33%|███▎      | 16261/50000 [24:17<40:39, 13.83it/s]

Epochs: 16259 | epoch avg. loss: 0.035 | test avg. loss: 4.078
Epochs: 16260 | epoch avg. loss: 0.026 | test avg. loss: 4.292
Epochs: 16261 | epoch avg. loss: 0.067 | test avg. loss: 4.281


 33%|███▎      | 16265/50000 [24:17<38:56, 14.44it/s]

Epochs: 16262 | epoch avg. loss: 0.039 | test avg. loss: 4.258
Epochs: 16263 | epoch avg. loss: 0.033 | test avg. loss: 4.368
Epochs: 16264 | epoch avg. loss: 0.053 | test avg. loss: 4.039
Epochs: 16265 | epoch avg. loss: 0.074 | test avg. loss: 4.049


 33%|███▎      | 16269/50000 [24:18<43:27, 12.93it/s]

Epochs: 16266 | epoch avg. loss: 0.043 | test avg. loss: 4.396
Epochs: 16267 | epoch avg. loss: 0.125 | test avg. loss: 4.156
Epochs: 16268 | epoch avg. loss: 0.051 | test avg. loss: 4.311


 33%|███▎      | 16273/50000 [24:18<39:42, 14.16it/s]

Epochs: 16269 | epoch avg. loss: 0.037 | test avg. loss: 4.411
Epochs: 16270 | epoch avg. loss: 0.036 | test avg. loss: 4.232
Epochs: 16271 | epoch avg. loss: 0.030 | test avg. loss: 4.171
Epochs: 16272 | epoch avg. loss: 0.027 | test avg. loss: 4.260


 33%|███▎      | 16277/50000 [24:18<38:11, 14.71it/s]

Epochs: 16273 | epoch avg. loss: 0.031 | test avg. loss: 4.269
Epochs: 16274 | epoch avg. loss: 0.023 | test avg. loss: 4.181
Epochs: 16275 | epoch avg. loss: 0.054 | test avg. loss: 4.261
Epochs: 16276 | epoch avg. loss: 0.014 | test avg. loss: 4.209


 33%|███▎      | 16279/50000 [24:18<39:49, 14.11it/s]

Epochs: 16277 | epoch avg. loss: 0.019 | test avg. loss: 4.154
Epochs: 16278 | epoch avg. loss: 0.021 | test avg. loss: 4.282
Epochs: 16279 | epoch avg. loss: 0.021 | test avg. loss: 4.152


 33%|███▎      | 16283/50000 [24:19<44:27, 12.64it/s]

Epochs: 16280 | epoch avg. loss: 0.057 | test avg. loss: 4.208
Epochs: 16281 | epoch avg. loss: 0.025 | test avg. loss: 4.361
Epochs: 16282 | epoch avg. loss: 0.045 | test avg. loss: 4.238


                                                     

Epochs: 16283 | epoch avg. loss: 0.022 | test avg. loss: 4.124
Epochs: 16284 | epoch avg. loss: 0.031 | test avg. loss: 4.287
Epochs: 16285 | epoch avg. loss: 0.051 | test avg. loss: 4.138


 33%|███▎      | 16289/50000 [24:19<41:28, 13.54it/s]

Epochs: 16286 | epoch avg. loss: 0.019 | test avg. loss: 4.162
Epochs: 16287 | epoch avg. loss: 0.021 | test avg. loss: 4.307
Epochs: 16288 | epoch avg. loss: 0.033 | test avg. loss: 4.140


 33%|███▎      | 16291/50000 [24:19<40:43, 13.80it/s]

Epochs: 16289 | epoch avg. loss: 0.088 | test avg. loss: 4.096
Epochs: 16290 | epoch avg. loss: 0.034 | test avg. loss: 4.331
Epochs: 16291 | epoch avg. loss: 0.093 | test avg. loss: 4.080


 33%|███▎      | 16295/50000 [24:19<40:30, 13.87it/s]

Epochs: 16292 | epoch avg. loss: 0.021 | test avg. loss: 4.258
Epochs: 16293 | epoch avg. loss: 0.053 | test avg. loss: 4.392
Epochs: 16294 | epoch avg. loss: 0.064 | test avg. loss: 4.172


 33%|███▎      | 16297/50000 [24:20<40:46, 13.77it/s]

Epochs: 16295 | epoch avg. loss: 0.020 | test avg. loss: 4.183
Epochs: 16296 | epoch avg. loss: 0.030 | test avg. loss: 4.354
Epochs: 16297 | epoch avg. loss: 0.070 | test avg. loss: 4.249


 33%|███▎      | 16299/50000 [24:20<41:34, 13.51it/s]

Epochs: 16298 | epoch avg. loss: 0.019 | test avg. loss: 4.204
Epochs: 16299 | epoch avg. loss: 0.022 | test avg. loss: 4.264


 33%|███▎      | 16303/50000 [24:22<2:07:53,  4.39it/s]

Epochs: 16300 | epoch avg. loss: 0.045 | test avg. loss: 4.369
Epochs: 16301 | epoch avg. loss: 0.086 | test avg. loss: 4.171
Epochs: 16302 | epoch avg. loss: 0.020 | test avg. loss: 4.180


 33%|███▎      | 16307/50000 [24:22<1:22:08,  6.84it/s]

Epochs: 16303 | epoch avg. loss: 0.028 | test avg. loss: 4.309
Epochs: 16304 | epoch avg. loss: 0.055 | test avg. loss: 4.102
Epochs: 16305 | epoch avg. loss: 0.032 | test avg. loss: 4.042
Epochs: 16306 | epoch avg. loss: 0.063 | test avg. loss: 4.156


 33%|███▎      | 16311/50000 [24:22<58:55,  9.53it/s]  

Epochs: 16307 | epoch avg. loss: 0.030 | test avg. loss: 4.373
Epochs: 16308 | epoch avg. loss: 0.058 | test avg. loss: 4.137
Epochs: 16309 | epoch avg. loss: 0.064 | test avg. loss: 4.150
Epochs: 16310 | epoch avg. loss: 0.041 | test avg. loss: 4.189


 33%|███▎      | 16315/50000 [24:22<46:42, 12.02it/s]

Epochs: 16311 | epoch avg. loss: 0.017 | test avg. loss: 4.170
Epochs: 16312 | epoch avg. loss: 0.017 | test avg. loss: 4.158
Epochs: 16313 | epoch avg. loss: 0.028 | test avg. loss: 4.217
Epochs: 16314 | epoch avg. loss: 0.039 | test avg. loss: 4.063


 33%|███▎      | 16317/50000 [24:23<47:59, 11.70it/s]

Epochs: 16315 | epoch avg. loss: 0.094 | test avg. loss: 4.116
Epochs: 16316 | epoch avg. loss: 0.134 | test avg. loss: 4.479
Epochs: 16317 | epoch avg. loss: 0.203 | test avg. loss: 4.510


 33%|███▎      | 16321/50000 [24:23<47:40, 11.77it/s]

Epochs: 16318 | epoch avg. loss: 0.160 | test avg. loss: 4.073
Epochs: 16319 | epoch avg. loss: 0.307 | test avg. loss: 4.081
Epochs: 16320 | epoch avg. loss: 0.070 | test avg. loss: 4.803


 33%|███▎      | 16323/50000 [24:23<48:21, 11.61it/s]

Epochs: 16321 | epoch avg. loss: 0.263 | test avg. loss: 4.351
Epochs: 16322 | epoch avg. loss: 0.025 | test avg. loss: 4.296
Epochs: 16323 | epoch avg. loss: 0.022 | test avg. loss: 4.249


 33%|███▎      | 16327/50000 [24:23<44:39, 12.56it/s]

Epochs: 16324 | epoch avg. loss: 0.016 | test avg. loss: 4.204
Epochs: 16325 | epoch avg. loss: 0.023 | test avg. loss: 4.369
Epochs: 16326 | epoch avg. loss: 0.074 | test avg. loss: 4.384


 33%|███▎      | 16329/50000 [24:24<48:55, 11.47it/s]

Epochs: 16327 | epoch avg. loss: 0.026 | test avg. loss: 4.329
Epochs: 16328 | epoch avg. loss: 0.018 | test avg. loss: 4.373
Epochs: 16329 | epoch avg. loss: 0.018 | test avg. loss: 4.194


 33%|███▎      | 16333/50000 [24:24<48:55, 11.47it/s]

Epochs: 16330 | epoch avg. loss: 0.042 | test avg. loss: 4.181
Epochs: 16331 | epoch avg. loss: 0.038 | test avg. loss: 4.330
Epochs: 16332 | epoch avg. loss: 0.059 | test avg. loss: 4.108


 33%|███▎      | 16335/50000 [24:24<48:35, 11.55it/s]

Epochs: 16333 | epoch avg. loss: 0.104 | test avg. loss: 4.169
Epochs: 16334 | epoch avg. loss: 0.066 | test avg. loss: 4.701
Epochs: 16335 | epoch avg. loss: 0.226 | test avg. loss: 4.238


 33%|███▎      | 16339/50000 [24:24<44:43, 12.54it/s]

Epochs: 16336 | epoch avg. loss: 0.033 | test avg. loss: 4.047
Epochs: 16337 | epoch avg. loss: 0.035 | test avg. loss: 4.131
Epochs: 16338 | epoch avg. loss: 0.041 | test avg. loss: 4.123


 33%|███▎      | 16341/50000 [24:25<46:01, 12.19it/s]

Epochs: 16339 | epoch avg. loss: 0.029 | test avg. loss: 4.211
Epochs: 16340 | epoch avg. loss: 0.028 | test avg. loss: 4.243
Epochs: 16341 | epoch avg. loss: 0.024 | test avg. loss: 4.208


 33%|███▎      | 16345/50000 [24:25<42:29, 13.20it/s]

Epochs: 16342 | epoch avg. loss: 0.030 | test avg. loss: 4.144
Epochs: 16343 | epoch avg. loss: 0.027 | test avg. loss: 4.230
Epochs: 16344 | epoch avg. loss: 0.025 | test avg. loss: 4.198


 33%|███▎      | 16347/50000 [24:25<41:49, 13.41it/s]

Epochs: 16345 | epoch avg. loss: 0.017 | test avg. loss: 4.213
Epochs: 16346 | epoch avg. loss: 0.013 | test avg. loss: 4.328
Epochs: 16347 | epoch avg. loss: 0.025 | test avg. loss: 4.287


 33%|███▎      | 16351/50000 [24:25<42:41, 13.13it/s]

Epochs: 16348 | epoch avg. loss: 0.030 | test avg. loss: 4.316
Epochs: 16349 | epoch avg. loss: 0.025 | test avg. loss: 4.382
Epochs: 16350 | epoch avg. loss: 0.052 | test avg. loss: 4.213
Epochs: 16351 | epoch avg. loss: 0.016 | test avg. loss: 4.156


 33%|███▎      | 16355/50000 [24:26<45:06, 12.43it/s]

Epochs: 16352 | epoch avg. loss: 0.019 | test avg. loss: 4.248
Epochs: 16353 | epoch avg. loss: 0.032 | test avg. loss: 4.321
Epochs: 16354 | epoch avg. loss: 0.035 | test avg. loss: 4.232


 33%|███▎      | 16359/50000 [24:26<41:32, 13.50it/s]

Epochs: 16355 | epoch avg. loss: 0.025 | test avg. loss: 4.250
Epochs: 16356 | epoch avg. loss: 0.018 | test avg. loss: 4.305
Epochs: 16357 | epoch avg. loss: 0.016 | test avg. loss: 4.179
Epochs: 16358 | epoch avg. loss: 0.044 | test avg. loss: 4.176


 33%|███▎      | 16361/50000 [24:26<40:33, 13.82it/s]

Epochs: 16359 | epoch avg. loss: 0.063 | test avg. loss: 4.268
Epochs: 16360 | epoch avg. loss: 0.046 | test avg. loss: 4.557
Epochs: 16361 | epoch avg. loss: 0.116 | test avg. loss: 4.214


 33%|███▎      | 16365/50000 [24:26<39:13, 14.29it/s]

Epochs: 16362 | epoch avg. loss: 0.057 | test avg. loss: 4.171
Epochs: 16363 | epoch avg. loss: 0.040 | test avg. loss: 4.266
Epochs: 16364 | epoch avg. loss: 0.024 | test avg. loss: 4.156
Epochs: 16365 | epoch avg. loss: 0.017 | test avg. loss: 4.200


 33%|███▎      | 16369/50000 [24:27<40:47, 13.74it/s]

Epochs: 16366 | epoch avg. loss: 0.035 | test avg. loss: 4.439
Epochs: 16367 | epoch avg. loss: 0.087 | test avg. loss: 4.208
Epochs: 16368 | epoch avg. loss: 0.038 | test avg. loss: 4.213


 33%|███▎      | 16371/50000 [24:27<40:45, 13.75it/s]

Epochs: 16369 | epoch avg. loss: 0.030 | test avg. loss: 4.212
Epochs: 16370 | epoch avg. loss: 0.019 | test avg. loss: 4.155
Epochs: 16371 | epoch avg. loss: 0.037 | test avg. loss: 4.174


 33%|███▎      | 16375/50000 [24:27<44:40, 12.54it/s]

Epochs: 16372 | epoch avg. loss: 0.018 | test avg. loss: 4.262
Epochs: 16373 | epoch avg. loss: 0.027 | test avg. loss: 4.280
Epochs: 16374 | epoch avg. loss: 0.021 | test avg. loss: 4.216


 33%|███▎      | 16377/50000 [24:27<44:38, 12.55it/s]

Epochs: 16375 | epoch avg. loss: 0.111 | test avg. loss: 4.312
Epochs: 16376 | epoch avg. loss: 0.038 | test avg. loss: 4.403
Epochs: 16377 | epoch avg. loss: 0.066 | test avg. loss: 4.056


 33%|███▎      | 16381/50000 [24:28<46:52, 11.95it/s]

Epochs: 16378 | epoch avg. loss: 0.060 | test avg. loss: 4.097
Epochs: 16379 | epoch avg. loss: 0.068 | test avg. loss: 4.658
Epochs: 16380 | epoch avg. loss: 0.198 | test avg. loss: 4.045


 33%|███▎      | 16383/50000 [24:28<49:19, 11.36it/s]

Epochs: 16381 | epoch avg. loss: 0.162 | test avg. loss: 4.090
Epochs: 16382 | epoch avg. loss: 0.081 | test avg. loss: 4.502
Epochs: 16383 | epoch avg. loss: 0.188 | test avg. loss: 4.421


 33%|███▎      | 16387/50000 [24:28<50:59, 10.99it/s]

Epochs: 16384 | epoch avg. loss: 0.060 | test avg. loss: 4.243
Epochs: 16385 | epoch avg. loss: 0.113 | test avg. loss: 4.253
Epochs: 16386 | epoch avg. loss: 0.066 | test avg. loss: 4.344


 33%|███▎      | 16389/50000 [24:28<47:56, 11.68it/s]

Epochs: 16387 | epoch avg. loss: 0.073 | test avg. loss: 4.034
Epochs: 16388 | epoch avg. loss: 0.307 | test avg. loss: 4.078
Epochs: 16389 | epoch avg. loss: 0.093 | test avg. loss: 4.778


 33%|███▎      | 16393/50000 [24:29<49:18, 11.36it/s]

Epochs: 16390 | epoch avg. loss: 0.317 | test avg. loss: 4.373
Epochs: 16391 | epoch avg. loss: 0.160 | test avg. loss: 4.830
Epochs: 16392 | epoch avg. loss: 0.736 | test avg. loss: 4.659


 33%|███▎      | 16395/50000 [24:29<48:08, 11.63it/s]

Epochs: 16393 | epoch avg. loss: 0.513 | test avg. loss: 5.631
Epochs: 16394 | epoch avg. loss: 0.778 | test avg. loss: 4.335
Epochs: 16395 | epoch avg. loss: 1.625 | test avg. loss: 4.711


 33%|███▎      | 16399/50000 [24:29<47:36, 11.76it/s]

Epochs: 16396 | epoch avg. loss: 0.873 | test avg. loss: 5.783
Epochs: 16397 | epoch avg. loss: 0.971 | test avg. loss: 4.909
Epochs: 16398 | epoch avg. loss: 0.216 | test avg. loss: 4.574


 33%|███▎      | 16399/50000 [24:29<47:36, 11.76it/s]

Epochs: 16399 | epoch avg. loss: 0.365 | test avg. loss: 4.257


 33%|███▎      | 16403/50000 [24:31<2:17:29,  4.07it/s]

Epochs: 16400 | epoch avg. loss: 0.132 | test avg. loss: 4.873
Epochs: 16401 | epoch avg. loss: 0.599 | test avg. loss: 4.051
Epochs: 16402 | epoch avg. loss: 0.094 | test avg. loss: 4.246


 33%|███▎      | 16407/50000 [24:31<1:25:39,  6.54it/s]

Epochs: 16403 | epoch avg. loss: 0.169 | test avg. loss: 5.344
Epochs: 16404 | epoch avg. loss: 0.699 | test avg. loss: 4.473
Epochs: 16405 | epoch avg. loss: 0.732 | test avg. loss: 4.703
Epochs: 16406 | epoch avg. loss: 0.528 | test avg. loss: 5.867


 33%|███▎      | 16409/50000 [24:32<1:13:28,  7.62it/s]

Epochs: 16407 | epoch avg. loss: 1.040 | test avg. loss: 4.196
Epochs: 16408 | epoch avg. loss: 0.546 | test avg. loss: 5.110
Epochs: 16409 | epoch avg. loss: 1.070 | test avg. loss: 5.543


 33%|███▎      | 16413/50000 [24:32<57:43,  9.70it/s]

Epochs: 16410 | epoch avg. loss: 0.897 | test avg. loss: 4.798
Epochs: 16411 | epoch avg. loss: 0.692 | test avg. loss: 5.491
Epochs: 16412 | epoch avg. loss: 1.002 | test avg. loss: 6.111
Epochs: 16413 | epoch avg. loss: 0.959 | test avg. loss: 4.497


 33%|███▎      | 16417/50000 [24:32<46:17, 12.09it/s]

Epochs: 16414 | epoch avg. loss: 0.223 | test avg. loss: 4.418
Epochs: 16415 | epoch avg. loss: 0.385 | test avg. loss: 4.940
Epochs: 16416 | epoch avg. loss: 0.358 | test avg. loss: 4.442
Epochs: 16417 | epoch avg. loss: 0.321 | test avg. loss: 4.646


                                                     

Epochs: 16418 | epoch avg. loss: 0.159 | test avg. loss: 5.014
Epochs: 16419 | epoch avg. loss: 0.258 | test avg. loss: 4.445
Epochs: 16420 | epoch avg. loss: 0.048 | test avg. loss: 4.078


 33%|███▎      | 16423/50000 [24:32<41:13, 13.58it/s]

Epochs: 16421 | epoch avg. loss: 0.083 | test avg. loss: 4.225
Epochs: 16422 | epoch avg. loss: 0.120 | test avg. loss: 4.088
Epochs: 16423 | epoch avg. loss: 0.077 | test avg. loss: 4.374


 33%|███▎      | 16427/50000 [24:33<42:29, 13.17it/s]

Epochs: 16424 | epoch avg. loss: 0.092 | test avg. loss: 4.693
Epochs: 16425 | epoch avg. loss: 0.092 | test avg. loss: 4.436
Epochs: 16426 | epoch avg. loss: 0.196 | test avg. loss: 4.590


 33%|███▎      | 16431/50000 [24:33<39:04, 14.32it/s]

Epochs: 16427 | epoch avg. loss: 0.122 | test avg. loss: 4.267
Epochs: 16428 | epoch avg. loss: 0.144 | test avg. loss: 4.138
Epochs: 16429 | epoch avg. loss: 0.095 | test avg. loss: 4.453
Epochs: 16430 | epoch avg. loss: 0.206 | test avg. loss: 4.097


 33%|███▎      | 16433/50000 [24:33<38:07, 14.68it/s]

Epochs: 16431 | epoch avg. loss: 0.067 | test avg. loss: 4.408
Epochs: 16432 | epoch avg. loss: 0.143 | test avg. loss: 4.434
Epochs: 16433 | epoch avg. loss: 0.076 | test avg. loss: 4.345
Epochs: 16434 | epoch avg. loss: 0.207 | test avg. loss: 4.628


 33%|███▎      | 16437/50000 [24:33<38:38, 14.47it/s]

Epochs: 16435 | epoch avg. loss: 0.130 | test avg. loss: 4.293
Epochs: 16436 | epoch avg. loss: 0.063 | test avg. loss: 4.232
Epochs: 16437 | epoch avg. loss: 0.042 | test avg. loss: 4.334


 33%|███▎      | 16441/50000 [24:34<40:04, 13.96it/s]

Epochs: 16438 | epoch avg. loss: 0.031 | test avg. loss: 4.271
Epochs: 16439 | epoch avg. loss: 0.037 | test avg. loss: 4.527
Epochs: 16440 | epoch avg. loss: 0.053 | test avg. loss: 4.301


 33%|███▎      | 16445/50000 [24:34<39:57, 14.00it/s]

Epochs: 16441 | epoch avg. loss: 0.091 | test avg. loss: 4.300
Epochs: 16442 | epoch avg. loss: 0.042 | test avg. loss: 4.521
Epochs: 16443 | epoch avg. loss: 0.069 | test avg. loss: 4.218
Epochs: 16444 | epoch avg. loss: 0.067 | test avg. loss: 4.252


 33%|███▎      | 16449/50000 [24:34<38:22, 14.57it/s]

Epochs: 16445 | epoch avg. loss: 0.072 | test avg. loss: 4.403
Epochs: 16446 | epoch avg. loss: 0.113 | test avg. loss: 4.120
Epochs: 16447 | epoch avg. loss: 0.114 | test avg. loss: 4.224
Epochs: 16448 | epoch avg. loss: 0.039 | test avg. loss: 4.512


 33%|███▎      | 16451/50000 [24:34<38:15, 14.62it/s]

Epochs: 16449 | epoch avg. loss: 0.060 | test avg. loss: 4.334
Epochs: 16450 | epoch avg. loss: 0.103 | test avg. loss: 4.280
Epochs: 16451 | epoch avg. loss: 0.038 | test avg. loss: 4.384


 33%|███▎      | 16455/50000 [24:35<39:02, 14.32it/s]

Epochs: 16452 | epoch avg. loss: 0.064 | test avg. loss: 4.126
Epochs: 16453 | epoch avg. loss: 0.178 | test avg. loss: 4.343
Epochs: 16454 | epoch avg. loss: 0.062 | test avg. loss: 4.444


 33%|███▎      | 16457/50000 [24:35<40:59, 13.64it/s]

Epochs: 16455 | epoch avg. loss: 0.054 | test avg. loss: 4.359
Epochs: 16456 | epoch avg. loss: 0.069 | test avg. loss: 4.486
Epochs: 16457 | epoch avg. loss: 0.052 | test avg. loss: 4.258


 33%|███▎      | 16461/50000 [24:35<41:33, 13.45it/s]

Epochs: 16458 | epoch avg. loss: 0.030 | test avg. loss: 4.154
Epochs: 16459 | epoch avg. loss: 0.041 | test avg. loss: 4.285
Epochs: 16460 | epoch avg. loss: 0.025 | test avg. loss: 4.249


                                                     

Epochs: 16461 | epoch avg. loss: 0.015 | test avg. loss: 4.428
Epochs: 16462 | epoch avg. loss: 0.043 | test avg. loss: 4.338
Epochs: 16463 | epoch avg. loss: 0.035 | test avg. loss: 4.293


 33%|███▎      | 16467/50000 [24:36<39:09, 14.27it/s]

Epochs: 16464 | epoch avg. loss: 0.023 | test avg. loss: 4.357
Epochs: 16465 | epoch avg. loss: 0.034 | test avg. loss: 4.155
Epochs: 16466 | epoch avg. loss: 0.052 | test avg. loss: 4.196
Epochs: 16467 | epoch avg. loss: 0.018 | test avg. loss: 4.304


 33%|███▎      | 16471/50000 [24:36<39:04, 14.30it/s]

Epochs: 16468 | epoch avg. loss: 0.021 | test avg. loss: 4.232
Epochs: 16469 | epoch avg. loss: 0.019 | test avg. loss: 4.373
Epochs: 16470 | epoch avg. loss: 0.027 | test avg. loss: 4.226


 33%|███▎      | 16475/50000 [24:36<37:51, 14.76it/s]

Epochs: 16471 | epoch avg. loss: 0.072 | test avg. loss: 4.202
Epochs: 16472 | epoch avg. loss: 0.033 | test avg. loss: 4.399
Epochs: 16473 | epoch avg. loss: 0.061 | test avg. loss: 4.122
Epochs: 16474 | epoch avg. loss: 0.042 | test avg. loss: 4.360


 33%|███▎      | 16479/50000 [24:36<36:27, 15.32it/s]

Epochs: 16475 | epoch avg. loss: 0.133 | test avg. loss: 4.267
Epochs: 16476 | epoch avg. loss: 0.049 | test avg. loss: 4.155
Epochs: 16477 | epoch avg. loss: 0.051 | test avg. loss: 4.418
Epochs: 16478 | epoch avg. loss: 0.087 | test avg. loss: 4.141


 33%|███▎      | 16481/50000 [24:37<37:33, 14.87it/s]

Epochs: 16479 | epoch avg. loss: 0.088 | test avg. loss: 4.165
Epochs: 16480 | epoch avg. loss: 0.055 | test avg. loss: 4.681
Epochs: 16481 | epoch avg. loss: 0.253 | test avg. loss: 4.130


                                                     

Epochs: 16482 | epoch avg. loss: 0.129 | test avg. loss: 4.188
Epochs: 16483 | epoch avg. loss: 0.068 | test avg. loss: 4.469
Epochs: 16484 | epoch avg. loss: 0.088 | test avg. loss: 4.264


 33%|███▎      | 16487/50000 [24:37<38:19, 14.58it/s]

Epochs: 16485 | epoch avg. loss: 0.066 | test avg. loss: 4.226
Epochs: 16486 | epoch avg. loss: 0.037 | test avg. loss: 4.458
Epochs: 16487 | epoch avg. loss: 0.080 | test avg. loss: 4.144


 33%|███▎      | 16491/50000 [24:37<37:50, 14.76it/s]

Epochs: 16488 | epoch avg. loss: 0.110 | test avg. loss: 4.221
Epochs: 16489 | epoch avg. loss: 0.034 | test avg. loss: 4.391
Epochs: 16490 | epoch avg. loss: 0.042 | test avg. loss: 4.304
Epochs: 16491 | epoch avg. loss: 0.036 | test avg. loss: 4.424


 33%|███▎      | 16495/50000 [24:37<38:15, 14.60it/s]

Epochs: 16492 | epoch avg. loss: 0.039 | test avg. loss: 4.388
Epochs: 16493 | epoch avg. loss: 0.025 | test avg. loss: 4.276
Epochs: 16494 | epoch avg. loss: 0.023 | test avg. loss: 4.417


 33%|███▎      | 16499/50000 [24:38<37:37, 14.84it/s]

Epochs: 16495 | epoch avg. loss: 0.062 | test avg. loss: 4.175
Epochs: 16496 | epoch avg. loss: 0.027 | test avg. loss: 4.278
Epochs: 16497 | epoch avg. loss: 0.046 | test avg. loss: 4.313
Epochs: 16498 | epoch avg. loss: 0.046 | test avg. loss: 4.245


 33%|███▎      | 16499/50000 [24:38<37:37, 14.84it/s]

Epochs: 16499 | epoch avg. loss: 0.134 | test avg. loss: 4.571


 33%|███▎      | 16503/50000 [24:39<1:49:09,  5.11it/s]

Epochs: 16500 | epoch avg. loss: 0.175 | test avg. loss: 4.326
Epochs: 16501 | epoch avg. loss: 0.082 | test avg. loss: 4.217
Epochs: 16502 | epoch avg. loss: 0.242 | test avg. loss: 4.964
Epochs: 16503 | epoch avg. loss: 0.608 | test avg. loss: 4.164


                                                       

Epochs: 16504 | epoch avg. loss: 0.486 | test avg. loss: 4.659
Epochs: 16505 | epoch avg. loss: 0.769 | test avg. loss: 4.829
Epochs: 16506 | epoch avg. loss: 0.517 | test avg. loss: 4.295


 33%|███▎      | 16511/50000 [24:40<53:24, 10.45it/s]  

Epochs: 16507 | epoch avg. loss: 0.218 | test avg. loss: 3.898
Epochs: 16508 | epoch avg. loss: 0.563 | test avg. loss: 4.092
Epochs: 16509 | epoch avg. loss: 0.181 | test avg. loss: 4.688
Epochs: 16510 | epoch avg. loss: 0.201 | test avg. loss: 5.027


 33%|███▎      | 16515/50000 [24:40<44:01, 12.68it/s]

Epochs: 16511 | epoch avg. loss: 0.765 | test avg. loss: 5.002
Epochs: 16512 | epoch avg. loss: 0.438 | test avg. loss: 4.897
Epochs: 16513 | epoch avg. loss: 0.275 | test avg. loss: 4.074
Epochs: 16514 | epoch avg. loss: 0.420 | test avg. loss: 4.011


 33%|███▎      | 16517/50000 [24:40<41:40, 13.39it/s]

Epochs: 16515 | epoch avg. loss: 0.145 | test avg. loss: 3.967
Epochs: 16516 | epoch avg. loss: 0.117 | test avg. loss: 4.233
Epochs: 16517 | epoch avg. loss: 0.136 | test avg. loss: 5.366


 33%|███▎      | 16521/50000 [24:40<41:15, 13.52it/s]

Epochs: 16518 | epoch avg. loss: 0.442 | test avg. loss: 4.619
Epochs: 16519 | epoch avg. loss: 0.288 | test avg. loss: 4.574
Epochs: 16520 | epoch avg. loss: 0.123 | test avg. loss: 4.690


 33%|███▎      | 16523/50000 [24:41<41:41, 13.38it/s]

Epochs: 16521 | epoch avg. loss: 0.131 | test avg. loss: 4.099
Epochs: 16522 | epoch avg. loss: 0.071 | test avg. loss: 4.081
Epochs: 16523 | epoch avg. loss: 0.056 | test avg. loss: 4.181


 33%|███▎      | 16527/50000 [24:41<42:10, 13.23it/s]

Epochs: 16524 | epoch avg. loss: 0.043 | test avg. loss: 4.363
Epochs: 16525 | epoch avg. loss: 0.035 | test avg. loss: 4.593
Epochs: 16526 | epoch avg. loss: 0.036 | test avg. loss: 4.451


 33%|███▎      | 16529/50000 [24:41<44:34, 12.51it/s]

Epochs: 16527 | epoch avg. loss: 0.045 | test avg. loss: 4.505
Epochs: 16528 | epoch avg. loss: 0.023 | test avg. loss: 4.279
Epochs: 16529 | epoch avg. loss: 0.096 | test avg. loss: 4.366


 33%|███▎      | 16533/50000 [24:41<43:46, 12.74it/s]

Epochs: 16530 | epoch avg. loss: 0.138 | test avg. loss: 4.600
Epochs: 16531 | epoch avg. loss: 0.142 | test avg. loss: 4.325
Epochs: 16532 | epoch avg. loss: 0.050 | test avg. loss: 4.559


 33%|███▎      | 16537/50000 [24:42<39:23, 14.16it/s]

Epochs: 16533 | epoch avg. loss: 0.061 | test avg. loss: 4.638
Epochs: 16534 | epoch avg. loss: 0.068 | test avg. loss: 4.302
Epochs: 16535 | epoch avg. loss: 0.053 | test avg. loss: 4.259
Epochs: 16536 | epoch avg. loss: 0.032 | test avg. loss: 4.495


 33%|███▎      | 16539/50000 [24:42<38:32, 14.47it/s]

Epochs: 16537 | epoch avg. loss: 0.077 | test avg. loss: 4.320
Epochs: 16538 | epoch avg. loss: 0.031 | test avg. loss: 4.336
Epochs: 16539 | epoch avg. loss: 0.028 | test avg. loss: 4.498


 33%|███▎      | 16543/50000 [24:42<41:52, 13.32it/s]

Epochs: 16540 | epoch avg. loss: 0.021 | test avg. loss: 4.415
Epochs: 16541 | epoch avg. loss: 0.028 | test avg. loss: 4.378
Epochs: 16542 | epoch avg. loss: 0.017 | test avg. loss: 4.409


 33%|███▎      | 16545/50000 [24:42<40:40, 13.71it/s]

Epochs: 16543 | epoch avg. loss: 0.021 | test avg. loss: 4.369
Epochs: 16544 | epoch avg. loss: 0.015 | test avg. loss: 4.310
Epochs: 16545 | epoch avg. loss: 0.028 | test avg. loss: 4.459


 33%|███▎      | 16549/50000 [24:43<44:50, 12.43it/s]

Epochs: 16546 | epoch avg. loss: 0.025 | test avg. loss: 4.380
Epochs: 16547 | epoch avg. loss: 0.024 | test avg. loss: 4.409
Epochs: 16548 | epoch avg. loss: 0.023 | test avg. loss: 4.545


 33%|███▎      | 16551/50000 [24:43<45:24, 12.28it/s]

Epochs: 16549 | epoch avg. loss: 0.021 | test avg. loss: 4.483
Epochs: 16550 | epoch avg. loss: 0.022 | test avg. loss: 4.426
Epochs: 16551 | epoch avg. loss: 0.034 | test avg. loss: 4.478


 33%|███▎      | 16555/50000 [24:43<48:52, 11.40it/s]

Epochs: 16552 | epoch avg. loss: 0.034 | test avg. loss: 4.425
Epochs: 16553 | epoch avg. loss: 0.030 | test avg. loss: 4.277
Epochs: 16554 | epoch avg. loss: 0.092 | test avg. loss: 4.420


 33%|███▎      | 16557/50000 [24:43<48:11, 11.57it/s]

Epochs: 16555 | epoch avg. loss: 0.054 | test avg. loss: 4.481
Epochs: 16556 | epoch avg. loss: 0.042 | test avg. loss: 4.303
Epochs: 16557 | epoch avg. loss: 0.122 | test avg. loss: 4.395


 33%|███▎      | 16561/50000 [24:44<47:59, 11.61it/s]

Epochs: 16558 | epoch avg. loss: 0.017 | test avg. loss: 4.371
Epochs: 16559 | epoch avg. loss: 0.032 | test avg. loss: 4.370
Epochs: 16560 | epoch avg. loss: 0.025 | test avg. loss: 4.488


 33%|███▎      | 16563/50000 [24:44<47:40, 11.69it/s]

Epochs: 16561 | epoch avg. loss: 0.027 | test avg. loss: 4.337
Epochs: 16562 | epoch avg. loss: 0.036 | test avg. loss: 4.375
Epochs: 16563 | epoch avg. loss: 0.024 | test avg. loss: 4.567


 33%|███▎      | 16567/50000 [24:44<45:24, 12.27it/s]

Epochs: 16564 | epoch avg. loss: 0.067 | test avg. loss: 4.499
Epochs: 16565 | epoch avg. loss: 0.034 | test avg. loss: 4.439
Epochs: 16566 | epoch avg. loss: 0.147 | test avg. loss: 4.549


 33%|███▎      | 16571/50000 [24:44<40:04, 13.90it/s]

Epochs: 16567 | epoch avg. loss: 0.091 | test avg. loss: 4.556
Epochs: 16568 | epoch avg. loss: 0.063 | test avg. loss: 4.323
Epochs: 16569 | epoch avg. loss: 0.107 | test avg. loss: 4.570
Epochs: 16570 | epoch avg. loss: 0.102 | test avg. loss: 4.450


 33%|███▎      | 16573/50000 [24:45<42:17, 13.17it/s]

Epochs: 16571 | epoch avg. loss: 0.043 | test avg. loss: 4.425
Epochs: 16572 | epoch avg. loss: 0.050 | test avg. loss: 4.686
Epochs: 16573 | epoch avg. loss: 0.105 | test avg. loss: 4.243


 33%|███▎      | 16577/50000 [24:45<43:32, 12.79it/s]

Epochs: 16574 | epoch avg. loss: 0.054 | test avg. loss: 4.209
Epochs: 16575 | epoch avg. loss: 0.057 | test avg. loss: 4.606
Epochs: 16576 | epoch avg. loss: 0.109 | test avg. loss: 4.333


 33%|███▎      | 16581/50000 [24:45<39:52, 13.97it/s]

Epochs: 16577 | epoch avg. loss: 0.316 | test avg. loss: 4.470
Epochs: 16578 | epoch avg. loss: 0.086 | test avg. loss: 5.152
Epochs: 16579 | epoch avg. loss: 0.231 | test avg. loss: 4.470
Epochs: 16580 | epoch avg. loss: 0.076 | test avg. loss: 4.252


 33%|███▎      | 16583/50000 [24:45<41:22, 13.46it/s]

Epochs: 16581 | epoch avg. loss: 0.042 | test avg. loss: 4.258
Epochs: 16582 | epoch avg. loss: 0.049 | test avg. loss: 4.221
Epochs: 16583 | epoch avg. loss: 0.151 | test avg. loss: 4.585


 33%|███▎      | 16587/50000 [24:46<46:49, 11.89it/s]

Epochs: 16584 | epoch avg. loss: 0.087 | test avg. loss: 4.482
Epochs: 16585 | epoch avg. loss: 0.183 | test avg. loss: 4.445
Epochs: 16586 | epoch avg. loss: 0.115 | test avg. loss: 4.754


 33%|███▎      | 16589/50000 [24:46<46:52, 11.88it/s]

Epochs: 16587 | epoch avg. loss: 0.131 | test avg. loss: 4.172
Epochs: 16588 | epoch avg. loss: 0.146 | test avg. loss: 4.194
Epochs: 16589 | epoch avg. loss: 0.129 | test avg. loss: 4.702


 33%|███▎      | 16593/50000 [24:46<42:44, 13.03it/s]

Epochs: 16590 | epoch avg. loss: 0.138 | test avg. loss: 4.568
Epochs: 16591 | epoch avg. loss: 0.477 | test avg. loss: 4.660
Epochs: 16592 | epoch avg. loss: 0.384 | test avg. loss: 5.193


 33%|███▎      | 16595/50000 [24:46<41:50, 13.31it/s]

Epochs: 16593 | epoch avg. loss: 0.372 | test avg. loss: 4.562
Epochs: 16594 | epoch avg. loss: 0.519 | test avg. loss: 4.552
Epochs: 16595 | epoch avg. loss: 0.145 | test avg. loss: 5.031


 33%|███▎      | 16597/50000 [24:46<41:59, 13.26it/s]

Epochs: 16596 | epoch avg. loss: 0.260 | test avg. loss: 4.481
Epochs: 16597 | epoch avg. loss: 0.296 | test avg. loss: 4.451


 33%|███▎      | 16599/50000 [24:47<47:14, 11.79it/s]

Epochs: 16598 | epoch avg. loss: 0.157 | test avg. loss: 4.834
Epochs: 16599 | epoch avg. loss: 0.236 | test avg. loss: 4.430


 33%|███▎      | 16603/50000 [24:48<2:17:05,  4.06it/s]

Epochs: 16600 | epoch avg. loss: 0.556 | test avg. loss: 4.344
Epochs: 16601 | epoch avg. loss: 0.166 | test avg. loss: 4.808
Epochs: 16602 | epoch avg. loss: 0.163 | test avg. loss: 4.798


 33%|███▎      | 16607/50000 [24:49<1:25:21,  6.52it/s]

Epochs: 16603 | epoch avg. loss: 0.513 | test avg. loss: 4.908
Epochs: 16604 | epoch avg. loss: 0.562 | test avg. loss: 4.971
Epochs: 16605 | epoch avg. loss: 0.340 | test avg. loss: 4.404
Epochs: 16606 | epoch avg. loss: 0.539 | test avg. loss: 4.344


 33%|███▎      | 16611/50000 [24:49<1:00:50,  9.15it/s]

Epochs: 16607 | epoch avg. loss: 0.188 | test avg. loss: 4.520
Epochs: 16608 | epoch avg. loss: 0.124 | test avg. loss: 4.370
Epochs: 16609 | epoch avg. loss: 0.251 | test avg. loss: 4.440
Epochs: 16610 | epoch avg. loss: 0.070 | test avg. loss: 4.294


 33%|███▎      | 16615/50000 [24:49<47:36, 11.69it/s]

Epochs: 16611 | epoch avg. loss: 0.040 | test avg. loss: 4.149
Epochs: 16612 | epoch avg. loss: 0.048 | test avg. loss: 4.181
Epochs: 16613 | epoch avg. loss: 0.043 | test avg. loss: 4.219
Epochs: 16614 | epoch avg. loss: 0.024 | test avg. loss: 4.305


 33%|███▎      | 16617/50000 [24:49<44:20, 12.55it/s]

Epochs: 16615 | epoch avg. loss: 0.023 | test avg. loss: 4.510
Epochs: 16616 | epoch avg. loss: 0.054 | test avg. loss: 4.350
Epochs: 16617 | epoch avg. loss: 0.021 | test avg. loss: 4.219


 33%|███▎      | 16621/50000 [24:50<43:29, 12.79it/s]

Epochs: 16618 | epoch avg. loss: 0.027 | test avg. loss: 4.227
Epochs: 16619 | epoch avg. loss: 0.017 | test avg. loss: 4.268
Epochs: 16620 | epoch avg. loss: 0.030 | test avg. loss: 4.351


 33%|███▎      | 16623/50000 [24:50<44:59, 12.37it/s]

Epochs: 16621 | epoch avg. loss: 0.036 | test avg. loss: 4.639
Epochs: 16622 | epoch avg. loss: 0.082 | test avg. loss: 4.395
Epochs: 16623 | epoch avg. loss: 0.064 | test avg. loss: 4.278


 33%|███▎      | 16627/50000 [24:50<45:13, 12.30it/s]

Epochs: 16624 | epoch avg. loss: 0.040 | test avg. loss: 4.383
Epochs: 16625 | epoch avg. loss: 0.073 | test avg. loss: 4.107
Epochs: 16626 | epoch avg. loss: 0.047 | test avg. loss: 4.144


 33%|███▎      | 16629/50000 [24:50<43:35, 12.76it/s]

Epochs: 16627 | epoch avg. loss: 0.026 | test avg. loss: 4.377
Epochs: 16628 | epoch avg. loss: 0.030 | test avg. loss: 4.320
Epochs: 16629 | epoch avg. loss: 0.040 | test avg. loss: 4.386




Epochs: 16630 | epoch avg. loss: 0.018 | test avg. loss: 4.442
Epochs: 16631 | epoch avg. loss: 0.019 | test avg. loss: 4.373
Epochs: 16632 | epoch avg. loss: 0.015 | test avg. loss: 4.385


 33%|███▎      | 16637/50000 [24:51<39:03, 14.24it/s]

Epochs: 16633 | epoch avg. loss: 0.024 | test avg. loss: 4.341
Epochs: 16634 | epoch avg. loss: 0.014 | test avg. loss: 4.303
Epochs: 16635 | epoch avg. loss: 0.015 | test avg. loss: 4.539
Epochs: 16636 | epoch avg. loss: 0.087 | test avg. loss: 4.466


 33%|███▎      | 16639/50000 [24:51<42:29, 13.08it/s]

Epochs: 16637 | epoch avg. loss: 0.030 | test avg. loss: 4.365
Epochs: 16638 | epoch avg. loss: 0.049 | test avg. loss: 4.541
Epochs: 16639 | epoch avg. loss: 0.087 | test avg. loss: 4.281


 33%|███▎      | 16643/50000 [24:51<42:56, 12.95it/s]

Epochs: 16640 | epoch avg. loss: 0.052 | test avg. loss: 4.256
Epochs: 16641 | epoch avg. loss: 0.052 | test avg. loss: 4.440
Epochs: 16642 | epoch avg. loss: 0.032 | test avg. loss: 4.377


 33%|███▎      | 16645/50000 [24:52<44:31, 12.48it/s]

Epochs: 16643 | epoch avg. loss: 0.029 | test avg. loss: 4.349
Epochs: 16644 | epoch avg. loss: 0.068 | test avg. loss: 4.509
Epochs: 16645 | epoch avg. loss: 0.067 | test avg. loss: 4.366




Epochs: 16646 | epoch avg. loss: 0.041 | test avg. loss: 4.295
Epochs: 16647 | epoch avg. loss: 0.045 | test avg. loss: 4.529
Epochs: 16648 | epoch avg. loss: 0.096 | test avg. loss: 4.272


 33%|███▎      | 16651/50000 [24:52<40:02, 13.88it/s]

Epochs: 16649 | epoch avg. loss: 0.098 | test avg. loss: 4.245
Epochs: 16650 | epoch avg. loss: 0.078 | test avg. loss: 4.695
Epochs: 16651 | epoch avg. loss: 0.141 | test avg. loss: 4.308




Epochs: 16652 | epoch avg. loss: 0.117 | test avg. loss: 4.337
Epochs: 16653 | epoch avg. loss: 0.149 | test avg. loss: 4.537
Epochs: 16654 | epoch avg. loss: 0.139 | test avg. loss: 4.362


 33%|███▎      | 16657/50000 [24:52<42:30, 13.07it/s]

Epochs: 16655 | epoch avg. loss: 0.524 | test avg. loss: 4.365
Epochs: 16656 | epoch avg. loss: 0.133 | test avg. loss: 5.037


 33%|███▎      | 16661/50000 [24:53<41:21, 13.43it/s]

Epochs: 16657 | epoch avg. loss: 0.262 | test avg. loss: 4.482
Epochs: 16658 | epoch avg. loss: 0.291 | test avg. loss: 4.521
Epochs: 16659 | epoch avg. loss: 0.258 | test avg. loss: 4.990
Epochs: 16660 | epoch avg. loss: 0.362 | test avg. loss: 4.698


 33%|███▎      | 16665/50000 [24:53<38:00, 14.62it/s]

Epochs: 16661 | epoch avg. loss: 0.987 | test avg. loss: 4.425
Epochs: 16662 | epoch avg. loss: 0.484 | test avg. loss: 4.845
Epochs: 16663 | epoch avg. loss: 0.322 | test avg. loss: 4.843
Epochs: 16664 | epoch avg. loss: 0.346 | test avg. loss: 5.418




Epochs: 16665 | epoch avg. loss: 0.365 | test avg. loss: 4.221
Epochs: 16666 | epoch avg. loss: 0.168 | test avg. loss: 4.186
Epochs: 16667 | epoch avg. loss: 0.117 | test avg. loss: 4.636


 33%|███▎      | 16671/50000 [24:53<40:34, 13.69it/s]

Epochs: 16668 | epoch avg. loss: 0.156 | test avg. loss: 4.526
Epochs: 16669 | epoch avg. loss: 0.268 | test avg. loss: 4.560
Epochs: 16670 | epoch avg. loss: 0.092 | test avg. loss: 4.703


 33%|███▎      | 16673/50000 [24:54<44:53, 12.37it/s]

Epochs: 16671 | epoch avg. loss: 0.093 | test avg. loss: 4.222
Epochs: 16672 | epoch avg. loss: 0.166 | test avg. loss: 4.237
Epochs: 16673 | epoch avg. loss: 0.091 | test avg. loss: 4.277


 33%|███▎      | 16677/50000 [24:54<40:34, 13.69it/s]

Epochs: 16674 | epoch avg. loss: 0.066 | test avg. loss: 4.264
Epochs: 16675 | epoch avg. loss: 0.094 | test avg. loss: 4.677
Epochs: 16676 | epoch avg. loss: 0.135 | test avg. loss: 4.998
Epochs: 16677 | epoch avg. loss: 0.164 | test avg. loss: 4.447


 33%|███▎      | 16681/50000 [24:54<39:28, 14.07it/s]

Epochs: 16678 | epoch avg. loss: 0.218 | test avg. loss: 4.320
Epochs: 16679 | epoch avg. loss: 0.042 | test avg. loss: 4.493
Epochs: 16680 | epoch avg. loss: 0.081 | test avg. loss: 4.269


 33%|███▎      | 16685/50000 [24:54<37:45, 14.70it/s]

Epochs: 16681 | epoch avg. loss: 0.046 | test avg. loss: 4.398
Epochs: 16682 | epoch avg. loss: 0.018 | test avg. loss: 4.429
Epochs: 16683 | epoch avg. loss: 0.024 | test avg. loss: 4.397
Epochs: 16684 | epoch avg. loss: 0.020 | test avg. loss: 4.472




Epochs: 16685 | epoch avg. loss: 0.031 | test avg. loss: 4.316
Epochs: 16686 | epoch avg. loss: 0.048 | test avg. loss: 4.327
Epochs: 16687 | epoch avg. loss: 0.023 | test avg. loss: 4.483


 33%|███▎      | 16691/50000 [24:55<38:44, 14.33it/s]

Epochs: 16688 | epoch avg. loss: 0.036 | test avg. loss: 4.297
Epochs: 16689 | epoch avg. loss: 0.067 | test avg. loss: 4.318
Epochs: 16690 | epoch avg. loss: 0.027 | test avg. loss: 4.539


                                                     

Epochs: 16691 | epoch avg. loss: 0.055 | test avg. loss: 4.250
Epochs: 16692 | epoch avg. loss: 0.064 | test avg. loss: 4.279
Epochs: 16693 | epoch avg. loss: 0.023 | test avg. loss: 4.400


 33%|███▎      | 16697/50000 [24:55<39:39, 14.00it/s]

Epochs: 16694 | epoch avg. loss: 0.021 | test avg. loss: 4.295
Epochs: 16695 | epoch avg. loss: 0.039 | test avg. loss: 4.317
Epochs: 16696 | epoch avg. loss: 0.022 | test avg. loss: 4.488


 33%|███▎      | 16699/50000 [24:55<39:56, 13.90it/s]

Epochs: 16697 | epoch avg. loss: 0.076 | test avg. loss: 4.262
Epochs: 16698 | epoch avg. loss: 0.030 | test avg. loss: 4.275
Epochs: 16699 | epoch avg. loss: 0.025 | test avg. loss: 4.492


 33%|███▎      | 16703/50000 [24:57<1:50:49,  5.01it/s]

Epochs: 16700 | epoch avg. loss: 0.051 | test avg. loss: 4.318
Epochs: 16701 | epoch avg. loss: 0.120 | test avg. loss: 4.341
Epochs: 16702 | epoch avg. loss: 0.053 | test avg. loss: 4.651


 33%|███▎      | 16707/50000 [24:57<1:13:16,  7.57it/s]

Epochs: 16703 | epoch avg. loss: 0.110 | test avg. loss: 4.294
Epochs: 16704 | epoch avg. loss: 0.127 | test avg. loss: 4.418
Epochs: 16705 | epoch avg. loss: 0.088 | test avg. loss: 4.800
Epochs: 16706 | epoch avg. loss: 0.162 | test avg. loss: 4.282


 33%|███▎      | 16709/50000 [24:57<1:04:44,  8.57it/s]

Epochs: 16707 | epoch avg. loss: 0.108 | test avg. loss: 4.304
Epochs: 16708 | epoch avg. loss: 0.023 | test avg. loss: 4.365
Epochs: 16709 | epoch avg. loss: 0.021 | test avg. loss: 4.389


                                                     

Epochs: 16710 | epoch avg. loss: 0.025 | test avg. loss: 4.562
Epochs: 16711 | epoch avg. loss: 0.037 | test avg. loss: 4.360
Epochs: 16712 | epoch avg. loss: 0.046 | test avg. loss: 4.329


 33%|███▎      | 16717/50000 [24:58<44:01, 12.60it/s]

Epochs: 16713 | epoch avg. loss: 0.034 | test avg. loss: 4.397
Epochs: 16714 | epoch avg. loss: 0.044 | test avg. loss: 4.225
Epochs: 16715 | epoch avg. loss: 0.076 | test avg. loss: 4.371
Epochs: 16716 | epoch avg. loss: 0.049 | test avg. loss: 4.580


 33%|███▎      | 16721/50000 [24:58<40:14, 13.79it/s]

Epochs: 16717 | epoch avg. loss: 0.067 | test avg. loss: 4.260
Epochs: 16718 | epoch avg. loss: 0.086 | test avg. loss: 4.371
Epochs: 16719 | epoch avg. loss: 0.107 | test avg. loss: 4.293
Epochs: 16720 | epoch avg. loss: 0.094 | test avg. loss: 4.376


 33%|███▎      | 16723/50000 [24:58<38:32, 14.39it/s]

Epochs: 16721 | epoch avg. loss: 0.445 | test avg. loss: 4.486
Epochs: 16722 | epoch avg. loss: 0.237 | test avg. loss: 4.762
Epochs: 16723 | epoch avg. loss: 0.194 | test avg. loss: 4.409


 33%|███▎      | 16727/50000 [24:59<39:00, 14.21it/s]

Epochs: 16724 | epoch avg. loss: 0.325 | test avg. loss: 4.450
Epochs: 16725 | epoch avg. loss: 0.100 | test avg. loss: 4.622
Epochs: 16726 | epoch avg. loss: 0.101 | test avg. loss: 4.238
Epochs: 16727 | epoch avg. loss: 0.199 | test avg. loss: 4.242


 33%|███▎      | 16731/50000 [24:59<36:45, 15.08it/s]

Epochs: 16728 | epoch avg. loss: 0.041 | test avg. loss: 4.641
Epochs: 16729 | epoch avg. loss: 0.099 | test avg. loss: 4.300
Epochs: 16730 | epoch avg. loss: 0.152 | test avg. loss: 4.317
Epochs: 16731 | epoch avg. loss: 0.057 | test avg. loss: 4.696


 33%|███▎      | 16735/50000 [24:59<35:32, 15.60it/s]

Epochs: 16732 | epoch avg. loss: 0.157 | test avg. loss: 4.401
Epochs: 16733 | epoch avg. loss: 0.061 | test avg. loss: 4.463
Epochs: 16734 | epoch avg. loss: 0.052 | test avg. loss: 4.577
Epochs: 16735 | epoch avg. loss: 0.071 | test avg. loss: 4.212


 33%|███▎      | 16739/50000 [24:59<35:38, 15.55it/s]

Epochs: 16736 | epoch avg. loss: 0.095 | test avg. loss: 4.232
Epochs: 16737 | epoch avg. loss: 0.044 | test avg. loss: 4.305
Epochs: 16738 | epoch avg. loss: 0.047 | test avg. loss: 4.193


 33%|███▎      | 16741/50000 [25:00<38:44, 14.31it/s]

Epochs: 16739 | epoch avg. loss: 0.129 | test avg. loss: 4.345
Epochs: 16740 | epoch avg. loss: 0.036 | test avg. loss: 4.384
Epochs: 16741 | epoch avg. loss: 0.040 | test avg. loss: 4.330


 33%|███▎      | 16745/50000 [25:00<42:12, 13.13it/s]

Epochs: 16742 | epoch avg. loss: 0.125 | test avg. loss: 4.479
Epochs: 16743 | epoch avg. loss: 0.057 | test avg. loss: 4.573
Epochs: 16744 | epoch avg. loss: 0.045 | test avg. loss: 4.396


 33%|███▎      | 16747/50000 [25:00<41:22, 13.39it/s]

Epochs: 16745 | epoch avg. loss: 0.082 | test avg. loss: 4.367
Epochs: 16746 | epoch avg. loss: 0.046 | test avg. loss: 4.503
Epochs: 16747 | epoch avg. loss: 0.101 | test avg. loss: 4.224


 34%|███▎      | 16751/50000 [25:00<40:27, 13.70it/s]

Epochs: 16748 | epoch avg. loss: 0.074 | test avg. loss: 4.353
Epochs: 16749 | epoch avg. loss: 0.038 | test avg. loss: 4.804
Epochs: 16750 | epoch avg. loss: 0.123 | test avg. loss: 4.303


 34%|███▎      | 16753/50000 [25:00<41:11, 13.45it/s]

Epochs: 16751 | epoch avg. loss: 0.103 | test avg. loss: 4.265
Epochs: 16752 | epoch avg. loss: 0.051 | test avg. loss: 4.737
Epochs: 16753 | epoch avg. loss: 0.176 | test avg. loss: 4.258


 34%|███▎      | 16757/50000 [25:01<41:03, 13.49it/s]

Epochs: 16754 | epoch avg. loss: 0.130 | test avg. loss: 4.248
Epochs: 16755 | epoch avg. loss: 0.072 | test avg. loss: 4.873
Epochs: 16756 | epoch avg. loss: 0.282 | test avg. loss: 4.193


 34%|███▎      | 16759/50000 [25:01<43:15, 12.81it/s]

Epochs: 16757 | epoch avg. loss: 0.256 | test avg. loss: 4.342
Epochs: 16758 | epoch avg. loss: 0.105 | test avg. loss: 4.779
Epochs: 16759 | epoch avg. loss: 0.098 | test avg. loss: 4.565


 34%|███▎      | 16763/50000 [25:01<45:19, 12.22it/s]

Epochs: 16760 | epoch avg. loss: 0.082 | test avg. loss: 4.489
Epochs: 16761 | epoch avg. loss: 0.054 | test avg. loss: 4.674
Epochs: 16762 | epoch avg. loss: 0.097 | test avg. loss: 4.289


 34%|███▎      | 16765/50000 [25:01<45:43, 12.11it/s]

Epochs: 16763 | epoch avg. loss: 0.040 | test avg. loss: 4.230
Epochs: 16764 | epoch avg. loss: 0.063 | test avg. loss: 4.414
Epochs: 16765 | epoch avg. loss: 0.031 | test avg. loss: 4.443


 34%|███▎      | 16769/50000 [25:02<44:25, 12.47it/s]

Epochs: 16766 | epoch avg. loss: 0.024 | test avg. loss: 4.489
Epochs: 16767 | epoch avg. loss: 0.031 | test avg. loss: 4.646
Epochs: 16768 | epoch avg. loss: 0.047 | test avg. loss: 4.532


 34%|███▎      | 16771/50000 [25:02<42:39, 12.98it/s]

Epochs: 16769 | epoch avg. loss: 0.028 | test avg. loss: 4.295
Epochs: 16770 | epoch avg. loss: 0.033 | test avg. loss: 4.461
Epochs: 16771 | epoch avg. loss: 0.114 | test avg. loss: 4.505


 34%|███▎      | 16775/50000 [25:02<40:56, 13.52it/s]

Epochs: 16772 | epoch avg. loss: 0.059 | test avg. loss: 4.454
Epochs: 16773 | epoch avg. loss: 0.159 | test avg. loss: 4.596
Epochs: 16774 | epoch avg. loss: 0.040 | test avg. loss: 4.696


 34%|███▎      | 16777/50000 [25:02<40:47, 13.57it/s]

Epochs: 16775 | epoch avg. loss: 0.044 | test avg. loss: 4.447
Epochs: 16776 | epoch avg. loss: 0.135 | test avg. loss: 4.449
Epochs: 16777 | epoch avg. loss: 0.031 | test avg. loss: 4.527


 34%|███▎      | 16781/50000 [25:03<45:19, 12.21it/s]

Epochs: 16778 | epoch avg. loss: 0.034 | test avg. loss: 4.423
Epochs: 16779 | epoch avg. loss: 0.063 | test avg. loss: 4.454
Epochs: 16780 | epoch avg. loss: 0.023 | test avg. loss: 4.502


 34%|███▎      | 16783/50000 [25:03<45:19, 12.21it/s]

Epochs: 16781 | epoch avg. loss: 0.017 | test avg. loss: 4.349
Epochs: 16782 | epoch avg. loss: 0.041 | test avg. loss: 4.422
Epochs: 16783 | epoch avg. loss: 0.038 | test avg. loss: 4.422


 34%|███▎      | 16787/50000 [25:03<44:22, 12.47it/s]

Epochs: 16784 | epoch avg. loss: 0.034 | test avg. loss: 4.302
Epochs: 16785 | epoch avg. loss: 0.031 | test avg. loss: 4.274
Epochs: 16786 | epoch avg. loss: 0.071 | test avg. loss: 4.454


 34%|███▎      | 16789/50000 [25:03<44:39, 12.39it/s]

Epochs: 16787 | epoch avg. loss: 0.149 | test avg. loss: 4.448
Epochs: 16788 | epoch avg. loss: 0.048 | test avg. loss: 4.367
Epochs: 16789 | epoch avg. loss: 0.060 | test avg. loss: 4.540


 34%|███▎      | 16793/50000 [25:04<47:27, 11.66it/s]

Epochs: 16790 | epoch avg. loss: 0.076 | test avg. loss: 4.356
Epochs: 16791 | epoch avg. loss: 0.046 | test avg. loss: 4.283
Epochs: 16792 | epoch avg. loss: 0.137 | test avg. loss: 4.531


 34%|███▎      | 16795/50000 [25:04<44:52, 12.33it/s]

Epochs: 16793 | epoch avg. loss: 0.098 | test avg. loss: 4.688
Epochs: 16794 | epoch avg. loss: 0.073 | test avg. loss: 4.540
Epochs: 16795 | epoch avg. loss: 0.199 | test avg. loss: 4.557


 34%|███▎      | 16799/50000 [25:04<44:06, 12.54it/s]

Epochs: 16796 | epoch avg. loss: 0.101 | test avg. loss: 4.629
Epochs: 16797 | epoch avg. loss: 0.084 | test avg. loss: 4.340
Epochs: 16798 | epoch avg. loss: 0.201 | test avg. loss: 4.363


 34%|███▎      | 16799/50000 [25:04<44:06, 12.54it/s]

Epochs: 16799 | epoch avg. loss: 0.090 | test avg. loss: 4.470


 34%|███▎      | 16803/50000 [25:08<4:26:29,  2.08it/s]

Epochs: 16800 | epoch avg. loss: 0.086 | test avg. loss: 4.258
Epochs: 16801 | epoch avg. loss: 0.075 | test avg. loss: 4.518
Epochs: 16802 | epoch avg. loss: 0.110 | test avg. loss: 4.725


 34%|███▎      | 16805/50000 [25:09<3:21:43,  2.74it/s]

Epochs: 16803 | epoch avg. loss: 0.091 | test avg. loss: 4.509
Epochs: 16804 | epoch avg. loss: 0.028 | test avg. loss: 4.609
Epochs: 16805 | epoch avg. loss: 0.076 | test avg. loss: 4.553


 34%|███▎      | 16809/50000 [25:09<2:00:45,  4.58it/s]

Epochs: 16806 | epoch avg. loss: 0.049 | test avg. loss: 4.267
Epochs: 16807 | epoch avg. loss: 0.096 | test avg. loss: 4.504
Epochs: 16808 | epoch avg. loss: 0.161 | test avg. loss: 4.698


 34%|███▎      | 16811/50000 [25:09<1:38:44,  5.60it/s]

Epochs: 16809 | epoch avg. loss: 0.076 | test avg. loss: 4.712
Epochs: 16810 | epoch avg. loss: 0.175 | test avg. loss: 4.710
Epochs: 16811 | epoch avg. loss: 0.082 | test avg. loss: 4.572


 34%|███▎      | 16815/50000 [25:09<1:12:24,  7.64it/s]

Epochs: 16812 | epoch avg. loss: 0.080 | test avg. loss: 4.350
Epochs: 16813 | epoch avg. loss: 0.035 | test avg. loss: 4.431
Epochs: 16814 | epoch avg. loss: 0.027 | test avg. loss: 4.594


 34%|███▎      | 16817/50000 [25:10<1:05:31,  8.44it/s]

Epochs: 16815 | epoch avg. loss: 0.028 | test avg. loss: 4.727
Epochs: 16816 | epoch avg. loss: 0.047 | test avg. loss: 4.574
Epochs: 16817 | epoch avg. loss: 0.029 | test avg. loss: 4.601


 34%|███▎      | 16821/50000 [25:10<52:55, 10.45it/s]

Epochs: 16818 | epoch avg. loss: 0.100 | test avg. loss: 4.751
Epochs: 16819 | epoch avg. loss: 0.151 | test avg. loss: 4.467
Epochs: 16820 | epoch avg. loss: 0.025 | test avg. loss: 4.554


 34%|███▎      | 16823/50000 [25:10<48:37, 11.37it/s]

Epochs: 16821 | epoch avg. loss: 0.050 | test avg. loss: 4.758
Epochs: 16822 | epoch avg. loss: 0.100 | test avg. loss: 4.312
Epochs: 16823 | epoch avg. loss: 0.103 | test avg. loss: 4.371


 34%|███▎      | 16827/50000 [25:10<43:05, 12.83it/s]

Epochs: 16824 | epoch avg. loss: 0.034 | test avg. loss: 4.554
Epochs: 16825 | epoch avg. loss: 0.066 | test avg. loss: 4.477
Epochs: 16826 | epoch avg. loss: 0.030 | test avg. loss: 4.375
Epochs: 16827 | epoch avg. loss: 0.111 | test avg. loss: 4.332


 34%|███▎      | 16831/50000 [25:10<41:06, 13.45it/s]

Epochs: 16828 | epoch avg. loss: 0.036 | test avg. loss: 4.594
Epochs: 16829 | epoch avg. loss: 0.116 | test avg. loss: 4.183
Epochs: 16830 | epoch avg. loss: 0.110 | test avg. loss: 4.362


 34%|███▎      | 16835/50000 [25:11<38:52, 14.22it/s]

Epochs: 16831 | epoch avg. loss: 0.056 | test avg. loss: 4.748
Epochs: 16832 | epoch avg. loss: 0.107 | test avg. loss: 4.610
Epochs: 16833 | epoch avg. loss: 0.024 | test avg. loss: 4.589
Epochs: 16834 | epoch avg. loss: 0.017 | test avg. loss: 4.496


 34%|███▎      | 16839/50000 [25:11<37:33, 14.72it/s]

Epochs: 16835 | epoch avg. loss: 0.013 | test avg. loss: 4.446
Epochs: 16836 | epoch avg. loss: 0.019 | test avg. loss: 4.413
Epochs: 16837 | epoch avg. loss: 0.016 | test avg. loss: 4.482
Epochs: 16838 | epoch avg. loss: 0.014 | test avg. loss: 4.576


 34%|███▎      | 16841/50000 [25:11<37:30, 14.73it/s]

Epochs: 16839 | epoch avg. loss: 0.018 | test avg. loss: 4.541
Epochs: 16840 | epoch avg. loss: 0.018 | test avg. loss: 4.678
Epochs: 16841 | epoch avg. loss: 0.084 | test avg. loss: 4.361
Epochs: 16842 | epoch avg. loss: 0.077 | test avg. loss: 4.380


 34%|███▎      | 16845/50000 [25:11<37:29, 14.74it/s]

Epochs: 16843 | epoch avg. loss: 0.075 | test avg. loss: 4.796
Epochs: 16844 | epoch avg. loss: 0.233 | test avg. loss: 4.678
Epochs: 16845 | epoch avg. loss: 0.080 | test avg. loss: 4.601




Epochs: 16846 | epoch avg. loss: 0.165 | test avg. loss: 4.801
Epochs: 16847 | epoch avg. loss: 0.148 | test avg. loss: 4.458
Epochs: 16848 | epoch avg. loss: 0.044 | test avg. loss: 4.271


 34%|███▎      | 16851/50000 [25:12<39:34, 13.96it/s]

Epochs: 16849 | epoch avg. loss: 0.028 | test avg. loss: 4.598
Epochs: 16850 | epoch avg. loss: 0.155 | test avg. loss: 4.511
Epochs: 16851 | epoch avg. loss: 0.039 | test avg. loss: 4.465


 34%|███▎      | 16855/50000 [25:12<39:17, 14.06it/s]

Epochs: 16852 | epoch avg. loss: 0.066 | test avg. loss: 4.511
Epochs: 16853 | epoch avg. loss: 0.027 | test avg. loss: 4.449
Epochs: 16854 | epoch avg. loss: 0.065 | test avg. loss: 4.470


 34%|███▎      | 16857/50000 [25:12<38:42, 14.27it/s]

Epochs: 16855 | epoch avg. loss: 0.037 | test avg. loss: 4.429
Epochs: 16856 | epoch avg. loss: 0.144 | test avg. loss: 4.546
Epochs: 16857 | epoch avg. loss: 0.041 | test avg. loss: 4.762


 34%|███▎      | 16861/50000 [25:13<44:15, 12.48it/s]

Epochs: 16858 | epoch avg. loss: 0.067 | test avg. loss: 4.651
Epochs: 16859 | epoch avg. loss: 0.021 | test avg. loss: 4.518
Epochs: 16860 | epoch avg. loss: 0.018 | test avg. loss: 4.461


 34%|███▎      | 16863/50000 [25:13<42:57, 12.85it/s]

Epochs: 16861 | epoch avg. loss: 0.014 | test avg. loss: 4.427
Epochs: 16862 | epoch avg. loss: 0.019 | test avg. loss: 4.451
Epochs: 16863 | epoch avg. loss: 0.026 | test avg. loss: 4.537


 34%|███▎      | 16867/50000 [25:13<45:07, 12.24it/s]

Epochs: 16864 | epoch avg. loss: 0.014 | test avg. loss: 4.622
Epochs: 16865 | epoch avg. loss: 0.016 | test avg. loss: 4.464
Epochs: 16866 | epoch avg. loss: 0.055 | test avg. loss: 4.448


 34%|███▎      | 16869/50000 [25:13<43:27, 12.71it/s]

Epochs: 16867 | epoch avg. loss: 0.051 | test avg. loss: 4.740
Epochs: 16868 | epoch avg. loss: 0.097 | test avg. loss: 4.502
Epochs: 16869 | epoch avg. loss: 0.215 | test avg. loss: 4.589


 34%|███▎      | 16873/50000 [25:14<45:23, 12.16it/s]

Epochs: 16870 | epoch avg. loss: 0.049 | test avg. loss: 4.857
Epochs: 16871 | epoch avg. loss: 0.077 | test avg. loss: 4.598
Epochs: 16872 | epoch avg. loss: 0.026 | test avg. loss: 4.476


                                                     

Epochs: 16873 | epoch avg. loss: 0.024 | test avg. loss: 4.517
Epochs: 16874 | epoch avg. loss: 0.024 | test avg. loss: 4.502
Epochs: 16875 | epoch avg. loss: 0.017 | test avg. loss: 4.574


 34%|███▍      | 16879/50000 [25:14<40:47, 13.53it/s]

Epochs: 16876 | epoch avg. loss: 0.017 | test avg. loss: 4.626
Epochs: 16877 | epoch avg. loss: 0.014 | test avg. loss: 4.578
Epochs: 16878 | epoch avg. loss: 0.019 | test avg. loss: 4.490


 34%|███▍      | 16883/50000 [25:14<38:18, 14.41it/s]

Epochs: 16879 | epoch avg. loss: 0.053 | test avg. loss: 4.464
Epochs: 16880 | epoch avg. loss: 0.018 | test avg. loss: 4.572
Epochs: 16881 | epoch avg. loss: 0.024 | test avg. loss: 4.443
Epochs: 16882 | epoch avg. loss: 0.105 | test avg. loss: 4.466


 34%|███▍      | 16885/50000 [25:14<39:44, 13.89it/s]

Epochs: 16883 | epoch avg. loss: 0.030 | test avg. loss: 4.630
Epochs: 16884 | epoch avg. loss: 0.073 | test avg. loss: 4.423
Epochs: 16885 | epoch avg. loss: 0.053 | test avg. loss: 4.428


 34%|███▍      | 16889/50000 [25:15<41:49, 13.19it/s]

Epochs: 16886 | epoch avg. loss: 0.098 | test avg. loss: 4.681
Epochs: 16887 | epoch avg. loss: 0.056 | test avg. loss: 4.620
Epochs: 16888 | epoch avg. loss: 0.027 | test avg. loss: 4.465


 34%|███▍      | 16893/50000 [25:15<39:05, 14.12it/s]

Epochs: 16889 | epoch avg. loss: 0.018 | test avg. loss: 4.404
Epochs: 16890 | epoch avg. loss: 0.014 | test avg. loss: 4.406
Epochs: 16891 | epoch avg. loss: 0.018 | test avg. loss: 4.486
Epochs: 16892 | epoch avg. loss: 0.018 | test avg. loss: 4.493


                                                     

Epochs: 16893 | epoch avg. loss: 0.019 | test avg. loss: 4.503
Epochs: 16894 | epoch avg. loss: 0.015 | test avg. loss: 4.565
Epochs: 16895 | epoch avg. loss: 0.029 | test avg. loss: 4.445


 34%|███▍      | 16899/50000 [25:15<38:13, 14.44it/s]

Epochs: 16896 | epoch avg. loss: 0.011 | test avg. loss: 4.507
Epochs: 16897 | epoch avg. loss: 0.026 | test avg. loss: 4.431
Epochs: 16898 | epoch avg. loss: 0.024 | test avg. loss: 4.397


 34%|███▍      | 16899/50000 [25:15<38:13, 14.44it/s]

Epochs: 16899 | epoch avg. loss: 0.038 | test avg. loss: 4.569


 34%|███▍      | 16903/50000 [25:17<1:59:40,  4.61it/s]

Epochs: 16900 | epoch avg. loss: 0.038 | test avg. loss: 4.465
Epochs: 16901 | epoch avg. loss: 0.028 | test avg. loss: 4.467
Epochs: 16902 | epoch avg. loss: 0.027 | test avg. loss: 4.651


 34%|███▍      | 16905/50000 [25:17<1:35:17,  5.79it/s]

Epochs: 16903 | epoch avg. loss: 0.062 | test avg. loss: 4.596
Epochs: 16904 | epoch avg. loss: 0.021 | test avg. loss: 4.529
Epochs: 16905 | epoch avg. loss: 0.052 | test avg. loss: 4.561


 34%|███▍      | 16909/50000 [25:17<1:06:58,  8.23it/s]

Epochs: 16906 | epoch avg. loss: 0.021 | test avg. loss: 4.595
Epochs: 16907 | epoch avg. loss: 0.013 | test avg. loss: 4.558
Epochs: 16908 | epoch avg. loss: 0.012 | test avg. loss: 4.461


 34%|███▍      | 16913/50000 [25:18<51:37, 10.68it/s]

Epochs: 16909 | epoch avg. loss: 0.023 | test avg. loss: 4.485
Epochs: 16910 | epoch avg. loss: 0.030 | test avg. loss: 4.600
Epochs: 16911 | epoch avg. loss: 0.044 | test avg. loss: 4.432
Epochs: 16912 | epoch avg. loss: 0.115 | test avg. loss: 4.490


 34%|███▍      | 16917/50000 [25:18<44:04, 12.51it/s]

Epochs: 16913 | epoch avg. loss: 0.053 | test avg. loss: 5.162
Epochs: 16914 | epoch avg. loss: 0.369 | test avg. loss: 4.542
Epochs: 16915 | epoch avg. loss: 0.082 | test avg. loss: 4.456
Epochs: 16916 | epoch avg. loss: 0.274 | test avg. loss: 4.510


 34%|███▍      | 16921/50000 [25:18<38:53, 14.18it/s]

Epochs: 16917 | epoch avg. loss: 0.144 | test avg. loss: 4.902
Epochs: 16918 | epoch avg. loss: 0.220 | test avg. loss: 4.351
Epochs: 16919 | epoch avg. loss: 0.122 | test avg. loss: 4.425
Epochs: 16920 | epoch avg. loss: 0.095 | test avg. loss: 5.054


 34%|███▍      | 16923/50000 [25:19<39:06, 14.10it/s]

Epochs: 16921 | epoch avg. loss: 0.324 | test avg. loss: 4.748
Epochs: 16922 | epoch avg. loss: 0.109 | test avg. loss: 4.352
Epochs: 16923 | epoch avg. loss: 0.091 | test avg. loss: 4.369


 34%|███▍      | 16927/50000 [25:19<38:51, 14.19it/s]

Epochs: 16924 | epoch avg. loss: 0.029 | test avg. loss: 4.572
Epochs: 16925 | epoch avg. loss: 0.020 | test avg. loss: 4.563
Epochs: 16926 | epoch avg. loss: 0.044 | test avg. loss: 4.657
Epochs: 16927 | epoch avg. loss: 0.032 | test avg. loss: 4.905


 34%|███▍      | 16931/50000 [25:19<41:08, 13.40it/s]

Epochs: 16928 | epoch avg. loss: 0.114 | test avg. loss: 4.581
Epochs: 16929 | epoch avg. loss: 0.040 | test avg. loss: 4.533
Epochs: 16930 | epoch avg. loss: 0.034 | test avg. loss: 4.716


 34%|███▍      | 16933/50000 [25:19<42:52, 12.85it/s]

Epochs: 16931 | epoch avg. loss: 0.039 | test avg. loss: 4.536
Epochs: 16932 | epoch avg. loss: 0.036 | test avg. loss: 4.502
Epochs: 16933 | epoch avg. loss: 0.037 | test avg. loss: 4.725


 34%|███▍      | 16937/50000 [25:20<46:07, 11.95it/s]

Epochs: 16934 | epoch avg. loss: 0.087 | test avg. loss: 4.549
Epochs: 16935 | epoch avg. loss: 0.026 | test avg. loss: 4.433
Epochs: 16936 | epoch avg. loss: 0.035 | test avg. loss: 4.616


 34%|███▍      | 16939/50000 [25:20<44:38, 12.34it/s]

Epochs: 16937 | epoch avg. loss: 0.074 | test avg. loss: 4.531
Epochs: 16938 | epoch avg. loss: 0.036 | test avg. loss: 4.387
Epochs: 16939 | epoch avg. loss: 0.034 | test avg. loss: 4.477


 34%|███▍      | 16943/50000 [25:20<42:20, 13.01it/s]

Epochs: 16940 | epoch avg. loss: 0.018 | test avg. loss: 4.717
Epochs: 16941 | epoch avg. loss: 0.035 | test avg. loss: 4.547
Epochs: 16942 | epoch avg. loss: 0.025 | test avg. loss: 4.606


 34%|███▍      | 16945/50000 [25:20<42:51, 12.85it/s]

Epochs: 16943 | epoch avg. loss: 0.042 | test avg. loss: 4.768
Epochs: 16944 | epoch avg. loss: 0.049 | test avg. loss: 4.536
Epochs: 16945 | epoch avg. loss: 0.216 | test avg. loss: 4.568


 34%|███▍      | 16949/50000 [25:20<43:25, 12.69it/s]

Epochs: 16946 | epoch avg. loss: 0.095 | test avg. loss: 5.353
Epochs: 16947 | epoch avg. loss: 0.464 | test avg. loss: 4.550
Epochs: 16948 | epoch avg. loss: 0.054 | test avg. loss: 4.335


 34%|███▍      | 16951/50000 [25:21<41:49, 13.17it/s]

Epochs: 16949 | epoch avg. loss: 0.091 | test avg. loss: 4.632
Epochs: 16950 | epoch avg. loss: 0.174 | test avg. loss: 4.597
Epochs: 16951 | epoch avg. loss: 0.057 | test avg. loss: 4.553


 34%|███▍      | 16955/50000 [25:21<42:27, 12.97it/s]

Epochs: 16952 | epoch avg. loss: 0.094 | test avg. loss: 4.650
Epochs: 16953 | epoch avg. loss: 0.051 | test avg. loss: 4.823
Epochs: 16954 | epoch avg. loss: 0.059 | test avg. loss: 4.595


 34%|███▍      | 16957/50000 [25:21<41:32, 13.26it/s]

Epochs: 16955 | epoch avg. loss: 0.086 | test avg. loss: 4.479
Epochs: 16956 | epoch avg. loss: 0.041 | test avg. loss: 4.521
Epochs: 16957 | epoch avg. loss: 0.038 | test avg. loss: 4.380


 34%|███▍      | 16961/50000 [25:21<41:37, 13.23it/s]

Epochs: 16958 | epoch avg. loss: 0.043 | test avg. loss: 4.572
Epochs: 16959 | epoch avg. loss: 0.046 | test avg. loss: 4.897
Epochs: 16960 | epoch avg. loss: 0.147 | test avg. loss: 4.642


 34%|███▍      | 16965/50000 [25:22<39:13, 14.04it/s]

Epochs: 16961 | epoch avg. loss: 0.046 | test avg. loss: 4.590
Epochs: 16962 | epoch avg. loss: 0.273 | test avg. loss: 4.509
Epochs: 16963 | epoch avg. loss: 0.164 | test avg. loss: 5.061
Epochs: 16964 | epoch avg. loss: 0.353 | test avg. loss: 4.675


 34%|███▍      | 16967/50000 [25:22<38:17, 14.38it/s]

Epochs: 16965 | epoch avg. loss: 0.300 | test avg. loss: 5.058
Epochs: 16966 | epoch avg. loss: 0.518 | test avg. loss: 4.929
Epochs: 16967 | epoch avg. loss: 0.238 | test avg. loss: 4.867
Epochs: 16968 | epoch avg. loss: 0.242 | test avg. loss: 4.650


 34%|███▍      | 16971/50000 [25:22<37:11, 14.80it/s]

Epochs: 16969 | epoch avg. loss: 1.257 | test avg. loss: 4.277
Epochs: 16970 | epoch avg. loss: 0.314 | test avg. loss: 6.399
Epochs: 16971 | epoch avg. loss: 1.271 | test avg. loss: 4.566


 34%|███▍      | 16975/50000 [25:22<43:08, 12.76it/s]

Epochs: 16972 | epoch avg. loss: 0.173 | test avg. loss: 4.536
Epochs: 16973 | epoch avg. loss: 0.309 | test avg. loss: 4.832
Epochs: 16974 | epoch avg. loss: 0.413 | test avg. loss: 4.648


 34%|███▍      | 16977/50000 [25:23<44:28, 12.38it/s]

Epochs: 16975 | epoch avg. loss: 0.177 | test avg. loss: 4.277
Epochs: 16976 | epoch avg. loss: 0.054 | test avg. loss: 4.527
Epochs: 16977 | epoch avg. loss: 0.045 | test avg. loss: 4.605


 34%|███▍      | 16981/50000 [25:23<42:59, 12.80it/s]

Epochs: 16978 | epoch avg. loss: 0.036 | test avg. loss: 4.520
Epochs: 16979 | epoch avg. loss: 0.034 | test avg. loss: 4.733
Epochs: 16980 | epoch avg. loss: 0.188 | test avg. loss: 4.355


                                                     

Epochs: 16981 | epoch avg. loss: 0.048 | test avg. loss: 4.217
Epochs: 16982 | epoch avg. loss: 0.077 | test avg. loss: 4.437
Epochs: 16983 | epoch avg. loss: 0.026 | test avg. loss: 4.633


 34%|███▍      | 16987/50000 [25:23<41:21, 13.30it/s]

Epochs: 16984 | epoch avg. loss: 0.025 | test avg. loss: 4.602
Epochs: 16985 | epoch avg. loss: 0.054 | test avg. loss: 4.650
Epochs: 16986 | epoch avg. loss: 0.037 | test avg. loss: 4.711


 34%|███▍      | 16989/50000 [25:24<40:43, 13.51it/s]

Epochs: 16987 | epoch avg. loss: 0.122 | test avg. loss: 4.430
Epochs: 16988 | epoch avg. loss: 0.079 | test avg. loss: 4.532
Epochs: 16989 | epoch avg. loss: 0.274 | test avg. loss: 4.676


 34%|███▍      | 16993/50000 [25:24<43:56, 12.52it/s]

Epochs: 16990 | epoch avg. loss: 0.231 | test avg. loss: 5.370
Epochs: 16991 | epoch avg. loss: 0.421 | test avg. loss: 4.368
Epochs: 16992 | epoch avg. loss: 0.166 | test avg. loss: 4.406


 34%|███▍      | 16995/50000 [25:24<46:30, 11.83it/s]

Epochs: 16993 | epoch avg. loss: 0.094 | test avg. loss: 4.778
Epochs: 16994 | epoch avg. loss: 0.175 | test avg. loss: 4.712
Epochs: 16995 | epoch avg. loss: 0.073 | test avg. loss: 4.656


 34%|███▍      | 16999/50000 [25:24<48:07, 11.43it/s]

Epochs: 16996 | epoch avg. loss: 0.533 | test avg. loss: 4.524
Epochs: 16997 | epoch avg. loss: 0.263 | test avg. loss: 5.469
Epochs: 16998 | epoch avg. loss: 0.581 | test avg. loss: 4.455


 34%|███▍      | 16999/50000 [25:24<48:07, 11.43it/s]

Epochs: 16999 | epoch avg. loss: 0.342 | test avg. loss: 4.508


 34%|███▍      | 17003/50000 [25:26<2:10:41,  4.21it/s]

Epochs: 17000 | epoch avg. loss: 0.284 | test avg. loss: 5.296
Epochs: 17001 | epoch avg. loss: 0.318 | test avg. loss: 4.822
Epochs: 17002 | epoch avg. loss: 0.377 | test avg. loss: 4.842


 34%|███▍      | 17005/50000 [25:26<1:45:43,  5.20it/s]

Epochs: 17003 | epoch avg. loss: 0.239 | test avg. loss: 5.950
Epochs: 17004 | epoch avg. loss: 0.823 | test avg. loss: 4.336
Epochs: 17005 | epoch avg. loss: 0.647 | test avg. loss: 4.620


 34%|███▍      | 17009/50000 [25:27<1:16:04,  7.23it/s]

Epochs: 17006 | epoch avg. loss: 0.419 | test avg. loss: 6.242
Epochs: 17007 | epoch avg. loss: 1.377 | test avg. loss: 4.884
Epochs: 17008 | epoch avg. loss: 0.319 | test avg. loss: 4.817


 34%|███▍      | 17011/50000 [25:27<1:06:59,  8.21it/s]

Epochs: 17009 | epoch avg. loss: 0.426 | test avg. loss: 5.048
Epochs: 17010 | epoch avg. loss: 0.666 | test avg. loss: 4.776
Epochs: 17011 | epoch avg. loss: 0.329 | test avg. loss: 5.230


 34%|███▍      | 17015/50000 [25:27<54:36, 10.07it/s]  

Epochs: 17012 | epoch avg. loss: 1.394 | test avg. loss: 4.579
Epochs: 17013 | epoch avg. loss: 0.820 | test avg. loss: 4.936
Epochs: 17014 | epoch avg. loss: 0.564 | test avg. loss: 5.975


 34%|███▍      | 17017/50000 [25:27<49:34, 11.09it/s]

Epochs: 17015 | epoch avg. loss: 1.604 | test avg. loss: 6.099
Epochs: 17016 | epoch avg. loss: 1.127 | test avg. loss: 4.752
Epochs: 17017 | epoch avg. loss: 0.732 | test avg. loss: 4.559


 34%|███▍      | 17021/50000 [25:28<46:13, 11.89it/s]

Epochs: 17018 | epoch avg. loss: 0.477 | test avg. loss: 5.069
Epochs: 17019 | epoch avg. loss: 0.504 | test avg. loss: 5.377
Epochs: 17020 | epoch avg. loss: 2.005 | test avg. loss: 5.610


 34%|███▍      | 17023/50000 [25:28<43:32, 12.62it/s]

Epochs: 17021 | epoch avg. loss: 1.765 | test avg. loss: 4.782
Epochs: 17022 | epoch avg. loss: 1.032 | test avg. loss: 6.077
Epochs: 17023 | epoch avg. loss: 2.729 | test avg. loss: 7.234
Epochs: 17024 | epoch avg. loss: 2.308 | test avg. loss: 4.271


 34%|███▍      | 17027/50000 [25:28<39:28, 13.92it/s]

Epochs: 17025 | epoch avg. loss: 0.952 | test avg. loss: 4.365
Epochs: 17026 | epoch avg. loss: 0.795 | test avg. loss: 6.234
Epochs: 17027 | epoch avg. loss: 1.064 | test avg. loss: 5.691
Epochs: 17028 | epoch avg. loss: 1.442 | test avg. loss: 6.066


 34%|███▍      | 17031/50000 [25:28<37:51, 14.51it/s]

Epochs: 17029 | epoch avg. loss: 1.103 | test avg. loss: 4.628
Epochs: 17030 | epoch avg. loss: 0.682 | test avg. loss: 4.700
Epochs: 17031 | epoch avg. loss: 0.421 | test avg. loss: 5.360


 34%|███▍      | 17035/50000 [25:29<38:47, 14.16it/s]

Epochs: 17032 | epoch avg. loss: 0.449 | test avg. loss: 5.653
Epochs: 17033 | epoch avg. loss: 0.923 | test avg. loss: 6.426
Epochs: 17034 | epoch avg. loss: 0.890 | test avg. loss: 4.884


 34%|███▍      | 17037/50000 [25:29<38:57, 14.10it/s]

Epochs: 17035 | epoch avg. loss: 0.586 | test avg. loss: 4.986
Epochs: 17036 | epoch avg. loss: 0.680 | test avg. loss: 5.109
Epochs: 17037 | epoch avg. loss: 0.581 | test avg. loss: 4.929


 34%|███▍      | 17041/50000 [25:29<41:55, 13.10it/s]

Epochs: 17038 | epoch avg. loss: 1.039 | test avg. loss: 5.640
Epochs: 17039 | epoch avg. loss: 1.055 | test avg. loss: 4.274
Epochs: 17040 | epoch avg. loss: 0.689 | test avg. loss: 4.368


 34%|███▍      | 17043/50000 [25:29<42:47, 12.83it/s]

Epochs: 17041 | epoch avg. loss: 0.433 | test avg. loss: 5.017
Epochs: 17042 | epoch avg. loss: 0.333 | test avg. loss: 4.609
Epochs: 17043 | epoch avg. loss: 0.250 | test avg. loss: 5.223


 34%|███▍      | 17047/50000 [25:30<41:57, 13.09it/s]

Epochs: 17044 | epoch avg. loss: 0.295 | test avg. loss: 4.490
Epochs: 17045 | epoch avg. loss: 0.349 | test avg. loss: 4.789
Epochs: 17046 | epoch avg. loss: 0.382 | test avg. loss: 4.221
Epochs: 17047 | epoch avg. loss: 0.263 | test avg. loss: 4.269


 34%|███▍      | 17051/50000 [25:30<40:22, 13.60it/s]

Epochs: 17048 | epoch avg. loss: 0.158 | test avg. loss: 4.844
Epochs: 17049 | epoch avg. loss: 0.208 | test avg. loss: 4.548
Epochs: 17050 | epoch avg. loss: 0.206 | test avg. loss: 4.880


 34%|███▍      | 17053/50000 [25:30<39:55, 13.75it/s]

Epochs: 17051 | epoch avg. loss: 0.163 | test avg. loss: 4.365
Epochs: 17052 | epoch avg. loss: 0.057 | test avg. loss: 4.414
Epochs: 17053 | epoch avg. loss: 0.064 | test avg. loss: 4.252


 34%|███▍      | 17057/50000 [25:30<37:33, 14.62it/s]

Epochs: 17054 | epoch avg. loss: 0.040 | test avg. loss: 4.243
Epochs: 17055 | epoch avg. loss: 0.035 | test avg. loss: 4.378
Epochs: 17056 | epoch avg. loss: 0.027 | test avg. loss: 4.382
Epochs: 17057 | epoch avg. loss: 0.033 | test avg. loss: 4.511


 34%|███▍      | 17061/50000 [25:30<38:26, 14.28it/s]

Epochs: 17058 | epoch avg. loss: 0.029 | test avg. loss: 4.453
Epochs: 17059 | epoch avg. loss: 0.017 | test avg. loss: 4.404
Epochs: 17060 | epoch avg. loss: 0.015 | test avg. loss: 4.333


 34%|███▍      | 17063/50000 [25:31<39:57, 13.74it/s]

Epochs: 17061 | epoch avg. loss: 0.018 | test avg. loss: 4.453
Epochs: 17062 | epoch avg. loss: 0.041 | test avg. loss: 4.355
Epochs: 17063 | epoch avg. loss: 0.035 | test avg. loss: 4.437


 34%|███▍      | 17067/50000 [25:31<40:30, 13.55it/s]

Epochs: 17064 | epoch avg. loss: 0.029 | test avg. loss: 4.576
Epochs: 17065 | epoch avg. loss: 0.030 | test avg. loss: 4.490
Epochs: 17066 | epoch avg. loss: 0.041 | test avg. loss: 4.653


 34%|███▍      | 17069/50000 [25:31<43:02, 12.75it/s]

Epochs: 17067 | epoch avg. loss: 0.035 | test avg. loss: 4.509
Epochs: 17068 | epoch avg. loss: 0.016 | test avg. loss: 4.557


                                                     

Epochs: 17069 | epoch avg. loss: 0.027 | test avg. loss: 4.490
Epochs: 17070 | epoch avg. loss: 0.018 | test avg. loss: 4.467
Epochs: 17071 | epoch avg. loss: 0.020 | test avg. loss: 4.602


 34%|███▍      | 17075/50000 [25:32<42:07, 13.03it/s]

Epochs: 17072 | epoch avg. loss: 0.027 | test avg. loss: 4.511
Epochs: 17073 | epoch avg. loss: 0.073 | test avg. loss: 4.662
Epochs: 17074 | epoch avg. loss: 0.027 | test avg. loss: 4.568


 34%|███▍      | 17079/50000 [25:32<38:33, 14.23it/s]

Epochs: 17075 | epoch avg. loss: 0.026 | test avg. loss: 4.564
Epochs: 17076 | epoch avg. loss: 0.029 | test avg. loss: 4.588
Epochs: 17077 | epoch avg. loss: 0.036 | test avg. loss: 4.399
Epochs: 17078 | epoch avg. loss: 0.058 | test avg. loss: 4.637


 34%|███▍      | 17081/50000 [25:32<38:56, 14.09it/s]

Epochs: 17079 | epoch avg. loss: 0.065 | test avg. loss: 4.510
Epochs: 17080 | epoch avg. loss: 0.065 | test avg. loss: 4.617
Epochs: 17081 | epoch avg. loss: 0.029 | test avg. loss: 4.680
Epochs: 17082 | epoch avg. loss: 0.023 | test avg. loss: 4.603


 34%|███▍      | 17085/50000 [25:32<37:32, 14.61it/s]

Epochs: 17083 | epoch avg. loss: 0.021 | test avg. loss: 4.610
Epochs: 17084 | epoch avg. loss: 0.026 | test avg. loss: 4.433
Epochs: 17085 | epoch avg. loss: 0.027 | test avg. loss: 4.631


 34%|███▍      | 17089/50000 [25:33<38:49, 14.13it/s]

Epochs: 17086 | epoch avg. loss: 0.061 | test avg. loss: 4.534
Epochs: 17087 | epoch avg. loss: 0.036 | test avg. loss: 4.632
Epochs: 17088 | epoch avg. loss: 0.015 | test avg. loss: 4.651
Epochs: 17089 | epoch avg. loss: 0.029 | test avg. loss: 4.650


 34%|███▍      | 17093/50000 [25:33<36:55, 14.86it/s]

Epochs: 17090 | epoch avg. loss: 0.022 | test avg. loss: 4.661
Epochs: 17091 | epoch avg. loss: 0.023 | test avg. loss: 4.500
Epochs: 17092 | epoch avg. loss: 0.020 | test avg. loss: 4.478
Epochs: 17093 | epoch avg. loss: 0.015 | test avg. loss: 4.566


 34%|███▍      | 17097/50000 [25:33<37:45, 14.52it/s]

Epochs: 17094 | epoch avg. loss: 0.014 | test avg. loss: 4.530
Epochs: 17095 | epoch avg. loss: 0.026 | test avg. loss: 4.603
Epochs: 17096 | epoch avg. loss: 0.013 | test avg. loss: 4.576


 34%|███▍      | 17099/50000 [25:33<37:55, 14.46it/s]

Epochs: 17097 | epoch avg. loss: 0.014 | test avg. loss: 4.557
Epochs: 17098 | epoch avg. loss: 0.012 | test avg. loss: 4.576
Epochs: 17099 | epoch avg. loss: 0.013 | test avg. loss: 4.544


 34%|███▍      | 17103/50000 [25:35<2:05:27,  4.37it/s]

Epochs: 17100 | epoch avg. loss: 0.016 | test avg. loss: 4.669
Epochs: 17101 | epoch avg. loss: 0.021 | test avg. loss: 4.606
Epochs: 17102 | epoch avg. loss: 0.013 | test avg. loss: 4.659
Epochs: 17103 | epoch avg. loss: 0.018 | test avg. loss: 4.624


 34%|███▍      | 17107/50000 [25:35<1:20:14,  6.83it/s]

Epochs: 17104 | epoch avg. loss: 0.011 | test avg. loss: 4.631
Epochs: 17105 | epoch avg. loss: 0.015 | test avg. loss: 4.641
Epochs: 17106 | epoch avg. loss: 0.015 | test avg. loss: 4.568


 34%|███▍      | 17109/50000 [25:35<1:09:16,  7.91it/s]

Epochs: 17107 | epoch avg. loss: 0.015 | test avg. loss: 4.717
Epochs: 17108 | epoch avg. loss: 0.060 | test avg. loss: 4.538
Epochs: 17109 | epoch avg. loss: 0.022 | test avg. loss: 4.669


                                                     

Epochs: 17110 | epoch avg. loss: 0.060 | test avg. loss: 4.573
Epochs: 17111 | epoch avg. loss: 0.030 | test avg. loss: 4.528
Epochs: 17112 | epoch avg. loss: 0.027 | test avg. loss: 4.653


 34%|███▍      | 17117/50000 [25:36<44:55, 12.20it/s]

Epochs: 17113 | epoch avg. loss: 0.022 | test avg. loss: 4.605
Epochs: 17114 | epoch avg. loss: 0.014 | test avg. loss: 4.632
Epochs: 17115 | epoch avg. loss: 0.020 | test avg. loss: 4.690
Epochs: 17116 | epoch avg. loss: 0.026 | test avg. loss: 4.621


 34%|███▍      | 17119/50000 [25:36<44:13, 12.39it/s]

Epochs: 17117 | epoch avg. loss: 0.011 | test avg. loss: 4.602
Epochs: 17118 | epoch avg. loss: 0.013 | test avg. loss: 4.726
Epochs: 17119 | epoch avg. loss: 0.029 | test avg. loss: 4.646


 34%|███▍      | 17123/50000 [25:36<43:25, 12.62it/s]

Epochs: 17120 | epoch avg. loss: 0.025 | test avg. loss: 4.617
Epochs: 17121 | epoch avg. loss: 0.020 | test avg. loss: 4.711
Epochs: 17122 | epoch avg. loss: 0.029 | test avg. loss: 4.605


 34%|███▍      | 17127/50000 [25:37<40:19, 13.58it/s]

Epochs: 17123 | epoch avg. loss: 0.031 | test avg. loss: 4.602
Epochs: 17124 | epoch avg. loss: 0.020 | test avg. loss: 4.713
Epochs: 17125 | epoch avg. loss: 0.033 | test avg. loss: 4.625
Epochs: 17126 | epoch avg. loss: 0.020 | test avg. loss: 4.658


 34%|███▍      | 17129/50000 [25:37<39:26, 13.89it/s]

Epochs: 17127 | epoch avg. loss: 0.019 | test avg. loss: 4.702
Epochs: 17128 | epoch avg. loss: 0.020 | test avg. loss: 4.616
Epochs: 17129 | epoch avg. loss: 0.015 | test avg. loss: 4.656
Epochs: 17130 | epoch avg. loss: 0.011 | test avg. loss: 4.599


 34%|███▍      | 17133/50000 [25:37<38:03, 14.39it/s]

Epochs: 17131 | epoch avg. loss: 0.027 | test avg. loss: 4.628
Epochs: 17132 | epoch avg. loss: 0.017 | test avg. loss: 4.624
Epochs: 17133 | epoch avg. loss: 0.017 | test avg. loss: 4.534


 34%|███▍      | 17137/50000 [25:37<38:37, 14.18it/s]

Epochs: 17134 | epoch avg. loss: 0.018 | test avg. loss: 4.703
Epochs: 17135 | epoch avg. loss: 0.026 | test avg. loss: 4.632
Epochs: 17136 | epoch avg. loss: 0.042 | test avg. loss: 4.716


 34%|███▍      | 17139/50000 [25:38<39:43, 13.79it/s]

Epochs: 17137 | epoch avg. loss: 0.019 | test avg. loss: 4.705
Epochs: 17138 | epoch avg. loss: 0.026 | test avg. loss: 4.567
Epochs: 17139 | epoch avg. loss: 0.059 | test avg. loss: 4.811


 34%|███▍      | 17143/50000 [25:38<39:16, 13.94it/s]

Epochs: 17140 | epoch avg. loss: 0.083 | test avg. loss: 4.575
Epochs: 17141 | epoch avg. loss: 0.026 | test avg. loss: 4.596
Epochs: 17142 | epoch avg. loss: 0.027 | test avg. loss: 4.775


 34%|███▍      | 17145/50000 [25:38<38:37, 14.18it/s]

Epochs: 17143 | epoch avg. loss: 0.037 | test avg. loss: 4.649
Epochs: 17144 | epoch avg. loss: 0.042 | test avg. loss: 4.969
Epochs: 17145 | epoch avg. loss: 0.105 | test avg. loss: 4.659


 34%|███▍      | 17149/50000 [25:38<39:44, 13.78it/s]

Epochs: 17146 | epoch avg. loss: 0.062 | test avg. loss: 4.622
Epochs: 17147 | epoch avg. loss: 0.039 | test avg. loss: 4.823
Epochs: 17148 | epoch avg. loss: 0.073 | test avg. loss: 4.543


 34%|███▍      | 17151/50000 [25:38<41:45, 13.11it/s]

Epochs: 17149 | epoch avg. loss: 0.063 | test avg. loss: 4.652
Epochs: 17150 | epoch avg. loss: 0.030 | test avg. loss: 4.686
Epochs: 17151 | epoch avg. loss: 0.029 | test avg. loss: 4.663


 34%|███▍      | 17155/50000 [25:39<38:38, 14.17it/s]

Epochs: 17152 | epoch avg. loss: 0.030 | test avg. loss: 4.785
Epochs: 17153 | epoch avg. loss: 0.045 | test avg. loss: 4.647
Epochs: 17154 | epoch avg. loss: 0.019 | test avg. loss: 4.550
Epochs: 17155 | epoch avg. loss: 0.040 | test avg. loss: 4.671


 34%|███▍      | 17159/50000 [25:39<36:18, 15.08it/s]

Epochs: 17156 | epoch avg. loss: 0.025 | test avg. loss: 4.676
Epochs: 17157 | epoch avg. loss: 0.017 | test avg. loss: 4.634
Epochs: 17158 | epoch avg. loss: 0.023 | test avg. loss: 4.869
Epochs: 17159 | epoch avg. loss: 0.067 | test avg. loss: 4.609


 34%|███▍      | 17163/50000 [25:39<35:30, 15.41it/s]

Epochs: 17160 | epoch avg. loss: 0.087 | test avg. loss: 4.675
Epochs: 17161 | epoch avg. loss: 0.031 | test avg. loss: 4.872
Epochs: 17162 | epoch avg. loss: 0.059 | test avg. loss: 4.572
Epochs: 17163 | epoch avg. loss: 0.048 | test avg. loss: 4.654


 34%|███▍      | 17167/50000 [25:39<38:06, 14.36it/s]

Epochs: 17164 | epoch avg. loss: 0.100 | test avg. loss: 4.578
Epochs: 17165 | epoch avg. loss: 0.022 | test avg. loss: 4.695
Epochs: 17166 | epoch avg. loss: 0.045 | test avg. loss: 4.748


 34%|███▍      | 17169/50000 [25:40<38:25, 14.24it/s]

Epochs: 17167 | epoch avg. loss: 0.031 | test avg. loss: 4.685
Epochs: 17168 | epoch avg. loss: 0.046 | test avg. loss: 4.869
Epochs: 17169 | epoch avg. loss: 0.068 | test avg. loss: 4.678


 34%|███▍      | 17173/50000 [25:40<39:45, 13.76it/s]

Epochs: 17170 | epoch avg. loss: 0.053 | test avg. loss: 4.657
Epochs: 17171 | epoch avg. loss: 0.032 | test avg. loss: 4.825
Epochs: 17172 | epoch avg. loss: 0.044 | test avg. loss: 4.580


 34%|███▍      | 17177/50000 [25:40<37:36, 14.55it/s]

Epochs: 17173 | epoch avg. loss: 0.044 | test avg. loss: 4.721
Epochs: 17174 | epoch avg. loss: 0.037 | test avg. loss: 4.635
Epochs: 17175 | epoch avg. loss: 0.034 | test avg. loss: 4.592
Epochs: 17176 | epoch avg. loss: 0.035 | test avg. loss: 4.823


 34%|███▍      | 17179/50000 [25:40<39:26, 13.87it/s]

Epochs: 17177 | epoch avg. loss: 0.044 | test avg. loss: 4.603
Epochs: 17178 | epoch avg. loss: 0.084 | test avg. loss: 4.666
Epochs: 17179 | epoch avg. loss: 0.033 | test avg. loss: 4.899


 34%|███▍      | 17183/50000 [25:41<43:34, 12.55it/s]

Epochs: 17180 | epoch avg. loss: 0.063 | test avg. loss: 4.636
Epochs: 17181 | epoch avg. loss: 0.088 | test avg. loss: 4.775
Epochs: 17182 | epoch avg. loss: 0.072 | test avg. loss: 4.621


 34%|███▍      | 17185/50000 [25:41<43:36, 12.54it/s]

Epochs: 17183 | epoch avg. loss: 0.065 | test avg. loss: 4.694
Epochs: 17184 | epoch avg. loss: 0.169 | test avg. loss: 5.226
Epochs: 17185 | epoch avg. loss: 0.257 | test avg. loss: 4.804


 34%|███▍      | 17189/50000 [25:41<42:30, 12.87it/s]

Epochs: 17186 | epoch avg. loss: 0.159 | test avg. loss: 4.783
Epochs: 17187 | epoch avg. loss: 0.123 | test avg. loss: 5.308
Epochs: 17188 | epoch avg. loss: 0.245 | test avg. loss: 4.758


 34%|███▍      | 17191/50000 [25:41<40:53, 13.37it/s]

Epochs: 17189 | epoch avg. loss: 0.309 | test avg. loss: 4.866
Epochs: 17190 | epoch avg. loss: 0.205 | test avg. loss: 4.838
Epochs: 17191 | epoch avg. loss: 0.116 | test avg. loss: 4.732


 34%|███▍      | 17195/50000 [25:42<43:28, 12.57it/s]

Epochs: 17192 | epoch avg. loss: 0.172 | test avg. loss: 5.078
Epochs: 17193 | epoch avg. loss: 0.287 | test avg. loss: 4.688
Epochs: 17194 | epoch avg. loss: 0.089 | test avg. loss: 4.667


 34%|███▍      | 17199/50000 [25:42<40:03, 13.64it/s]

Epochs: 17195 | epoch avg. loss: 0.093 | test avg. loss: 5.210
Epochs: 17196 | epoch avg. loss: 0.213 | test avg. loss: 4.702
Epochs: 17197 | epoch avg. loss: 0.320 | test avg. loss: 4.777
Epochs: 17198 | epoch avg. loss: 0.051 | test avg. loss: 4.734


 34%|███▍      | 17199/50000 [25:42<40:03, 13.64it/s]

Epochs: 17199 | epoch avg. loss: 0.056 | test avg. loss: 4.780


 34%|███▍      | 17203/50000 [25:44<2:15:37,  4.03it/s]

Epochs: 17200 | epoch avg. loss: 0.059 | test avg. loss: 4.897
Epochs: 17201 | epoch avg. loss: 0.061 | test avg. loss: 4.841
Epochs: 17202 | epoch avg. loss: 0.124 | test avg. loss: 5.176


 34%|███▍      | 17205/50000 [25:44<1:48:49,  5.02it/s]

Epochs: 17203 | epoch avg. loss: 0.247 | test avg. loss: 4.633
Epochs: 17204 | epoch avg. loss: 0.162 | test avg. loss: 4.629
Epochs: 17205 | epoch avg. loss: 0.214 | test avg. loss: 5.118


 34%|███▍      | 17209/50000 [25:44<1:17:23,  7.06it/s]

Epochs: 17206 | epoch avg. loss: 0.284 | test avg. loss: 4.530
Epochs: 17207 | epoch avg. loss: 0.056 | test avg. loss: 4.493
Epochs: 17208 | epoch avg. loss: 0.049 | test avg. loss: 4.822


 34%|███▍      | 17211/50000 [25:45<1:09:34,  7.85it/s]

Epochs: 17209 | epoch avg. loss: 0.070 | test avg. loss: 4.743
Epochs: 17210 | epoch avg. loss: 0.179 | test avg. loss: 4.925
Epochs: 17211 | epoch avg. loss: 0.090 | test avg. loss: 4.762


 34%|███▍      | 17215/50000 [25:45<58:52,  9.28it/s]  

Epochs: 17212 | epoch avg. loss: 0.027 | test avg. loss: 4.736
Epochs: 17213 | epoch avg. loss: 0.036 | test avg. loss: 4.656
Epochs: 17214 | epoch avg. loss: 0.033 | test avg. loss: 4.589


 34%|███▍      | 17219/50000 [25:45<47:47, 11.43it/s]

Epochs: 17215 | epoch avg. loss: 0.040 | test avg. loss: 4.781
Epochs: 17216 | epoch avg. loss: 0.047 | test avg. loss: 4.717
Epochs: 17217 | epoch avg. loss: 0.019 | test avg. loss: 4.712
Epochs: 17218 | epoch avg. loss: 0.029 | test avg. loss: 4.744


 34%|███▍      | 17221/50000 [25:45<43:47, 12.48it/s]

Epochs: 17219 | epoch avg. loss: 0.016 | test avg. loss: 4.841
Epochs: 17220 | epoch avg. loss: 0.014 | test avg. loss: 4.738
Epochs: 17221 | epoch avg. loss: 0.045 | test avg. loss: 4.746


 34%|███▍      | 17225/50000 [25:46<42:31, 12.84it/s]

Epochs: 17222 | epoch avg. loss: 0.017 | test avg. loss: 4.722
Epochs: 17223 | epoch avg. loss: 0.014 | test avg. loss: 4.717
Epochs: 17224 | epoch avg. loss: 0.011 | test avg. loss: 4.746


 34%|███▍      | 17227/50000 [25:46<45:40, 11.96it/s]

Epochs: 17225 | epoch avg. loss: 0.010 | test avg. loss: 4.828
Epochs: 17226 | epoch avg. loss: 0.014 | test avg. loss: 4.801
Epochs: 17227 | epoch avg. loss: 0.014 | test avg. loss: 4.750


 34%|███▍      | 17231/50000 [25:46<44:24, 12.30it/s]

Epochs: 17228 | epoch avg. loss: 0.012 | test avg. loss: 4.750
Epochs: 17229 | epoch avg. loss: 0.013 | test avg. loss: 4.741
Epochs: 17230 | epoch avg. loss: 0.012 | test avg. loss: 4.679


 34%|███▍      | 17233/50000 [25:46<44:48, 12.19it/s]

Epochs: 17231 | epoch avg. loss: 0.022 | test avg. loss: 4.789
Epochs: 17232 | epoch avg. loss: 0.017 | test avg. loss: 4.754
Epochs: 17233 | epoch avg. loss: 0.013 | test avg. loss: 4.734


 34%|███▍      | 17237/50000 [25:47<43:46, 12.48it/s]

Epochs: 17234 | epoch avg. loss: 0.016 | test avg. loss: 4.848
Epochs: 17235 | epoch avg. loss: 0.038 | test avg. loss: 4.632
Epochs: 17236 | epoch avg. loss: 0.072 | test avg. loss: 4.746


 34%|███▍      | 17239/50000 [25:47<42:52, 12.74it/s]

Epochs: 17237 | epoch avg. loss: 0.020 | test avg. loss: 4.897
Epochs: 17238 | epoch avg. loss: 0.024 | test avg. loss: 4.807
Epochs: 17239 | epoch avg. loss: 0.033 | test avg. loss: 4.783


 34%|███▍      | 17243/50000 [25:47<43:40, 12.50it/s]

Epochs: 17240 | epoch avg. loss: 0.013 | test avg. loss: 4.678
Epochs: 17241 | epoch avg. loss: 0.016 | test avg. loss: 4.655
Epochs: 17242 | epoch avg. loss: 0.015 | test avg. loss: 4.779


 34%|███▍      | 17245/50000 [25:47<42:31, 12.84it/s]

Epochs: 17243 | epoch avg. loss: 0.021 | test avg. loss: 4.774
Epochs: 17244 | epoch avg. loss: 0.013 | test avg. loss: 4.768
Epochs: 17245 | epoch avg. loss: 0.016 | test avg. loss: 4.958


                                                     

Epochs: 17246 | epoch avg. loss: 0.060 | test avg. loss: 4.725
Epochs: 17247 | epoch avg. loss: 0.022 | test avg. loss: 4.743
Epochs: 17248 | epoch avg. loss: 0.012 | test avg. loss: 4.785


 35%|███▍      | 17251/50000 [25:48<39:32, 13.80it/s]

Epochs: 17249 | epoch avg. loss: 0.012 | test avg. loss: 4.806
Epochs: 17250 | epoch avg. loss: 0.016 | test avg. loss: 4.703
Epochs: 17251 | epoch avg. loss: 0.024 | test avg. loss: 4.702


 35%|███▍      | 17255/50000 [25:48<39:30, 13.82it/s]

Epochs: 17252 | epoch avg. loss: 0.013 | test avg. loss: 4.769
Epochs: 17253 | epoch avg. loss: 0.021 | test avg. loss: 4.728
Epochs: 17254 | epoch avg. loss: 0.011 | test avg. loss: 4.725


 35%|███▍      | 17257/50000 [25:48<41:52, 13.03it/s]

Epochs: 17255 | epoch avg. loss: 0.014 | test avg. loss: 4.696
Epochs: 17256 | epoch avg. loss: 0.014 | test avg. loss: 4.768
Epochs: 17257 | epoch avg. loss: 0.024 | test avg. loss: 4.741


 35%|███▍      | 17261/50000 [25:48<43:34, 12.52it/s]

Epochs: 17258 | epoch avg. loss: 0.013 | test avg. loss: 4.681
Epochs: 17259 | epoch avg. loss: 0.030 | test avg. loss: 4.741
Epochs: 17260 | epoch avg. loss: 0.010 | test avg. loss: 4.750


 35%|███▍      | 17263/50000 [25:49<43:09, 12.64it/s]

Epochs: 17261 | epoch avg. loss: 0.011 | test avg. loss: 4.714
Epochs: 17262 | epoch avg. loss: 0.014 | test avg. loss: 4.758
Epochs: 17263 | epoch avg. loss: 0.011 | test avg. loss: 4.692


 35%|███▍      | 17267/50000 [25:49<40:12, 13.57it/s]

Epochs: 17264 | epoch avg. loss: 0.018 | test avg. loss: 4.707
Epochs: 17265 | epoch avg. loss: 0.011 | test avg. loss: 4.676
Epochs: 17266 | epoch avg. loss: 0.017 | test avg. loss: 4.637
Epochs: 17267 | epoch avg. loss: 0.047 | test avg. loss: 4.807


 35%|███▍      | 17271/50000 [25:49<38:06, 14.31it/s]

Epochs: 17268 | epoch avg. loss: 0.043 | test avg. loss: 4.919
Epochs: 17269 | epoch avg. loss: 0.044 | test avg. loss: 4.693
Epochs: 17270 | epoch avg. loss: 0.091 | test avg. loss: 4.785


                                                     

Epochs: 17271 | epoch avg. loss: 0.028 | test avg. loss: 4.739
Epochs: 17272 | epoch avg. loss: 0.030 | test avg. loss: 4.673
Epochs: 17273 | epoch avg. loss: 0.084 | test avg. loss: 4.784


 35%|███▍      | 17277/50000 [25:50<39:31, 13.80it/s]

Epochs: 17274 | epoch avg. loss: 0.047 | test avg. loss: 4.740
Epochs: 17275 | epoch avg. loss: 0.029 | test avg. loss: 4.660
Epochs: 17276 | epoch avg. loss: 0.015 | test avg. loss: 4.623


 35%|███▍      | 17281/50000 [25:50<36:58, 14.75it/s]

Epochs: 17277 | epoch avg. loss: 0.017 | test avg. loss: 4.734
Epochs: 17278 | epoch avg. loss: 0.020 | test avg. loss: 4.749
Epochs: 17279 | epoch avg. loss: 0.013 | test avg. loss: 4.742
Epochs: 17280 | epoch avg. loss: 0.011 | test avg. loss: 4.766


 35%|███▍      | 17285/50000 [25:50<35:55, 15.18it/s]

Epochs: 17281 | epoch avg. loss: 0.014 | test avg. loss: 4.745
Epochs: 17282 | epoch avg. loss: 0.011 | test avg. loss: 4.718
Epochs: 17283 | epoch avg. loss: 0.011 | test avg. loss: 4.704
Epochs: 17284 | epoch avg. loss: 0.025 | test avg. loss: 4.725


 35%|███▍      | 17287/50000 [25:50<35:53, 15.19it/s]

Epochs: 17285 | epoch avg. loss: 0.015 | test avg. loss: 4.800
Epochs: 17286 | epoch avg. loss: 0.015 | test avg. loss: 4.784
Epochs: 17287 | epoch avg. loss: 0.010 | test avg. loss: 4.782


 35%|███▍      | 17291/50000 [25:51<43:05, 12.65it/s]

Epochs: 17288 | epoch avg. loss: 0.012 | test avg. loss: 4.797
Epochs: 17289 | epoch avg. loss: 0.022 | test avg. loss: 4.766
Epochs: 17290 | epoch avg. loss: 0.013 | test avg. loss: 4.758


 35%|███▍      | 17295/50000 [25:51<39:02, 13.96it/s]

Epochs: 17291 | epoch avg. loss: 0.011 | test avg. loss: 4.815
Epochs: 17292 | epoch avg. loss: 0.012 | test avg. loss: 4.746
Epochs: 17293 | epoch avg. loss: 0.012 | test avg. loss: 4.856
Epochs: 17294 | epoch avg. loss: 0.039 | test avg. loss: 4.739


 35%|███▍      | 17299/50000 [25:51<37:29, 14.53it/s]

Epochs: 17295 | epoch avg. loss: 0.023 | test avg. loss: 4.686
Epochs: 17296 | epoch avg. loss: 0.041 | test avg. loss: 4.898
Epochs: 17297 | epoch avg. loss: 0.068 | test avg. loss: 4.769
Epochs: 17298 | epoch avg. loss: 0.024 | test avg. loss: 4.732


 35%|███▍      | 17299/50000 [25:51<37:29, 14.53it/s]

Epochs: 17299 | epoch avg. loss: 0.033 | test avg. loss: 4.879


 35%|███▍      | 17303/50000 [25:53<1:55:47,  4.71it/s]

Epochs: 17300 | epoch avg. loss: 0.038 | test avg. loss: 4.747
Epochs: 17301 | epoch avg. loss: 0.017 | test avg. loss: 4.610
Epochs: 17302 | epoch avg. loss: 0.024 | test avg. loss: 4.727
Epochs: 17303 | epoch avg. loss: 0.021 | test avg. loss: 4.733


 35%|███▍      | 17307/50000 [25:53<1:15:43,  7.20it/s]

Epochs: 17304 | epoch avg. loss: 0.009 | test avg. loss: 4.831
Epochs: 17305 | epoch avg. loss: 0.019 | test avg. loss: 4.779
Epochs: 17306 | epoch avg. loss: 0.019 | test avg. loss: 4.730


 35%|███▍      | 17309/50000 [25:53<1:03:51,  8.53it/s]

Epochs: 17307 | epoch avg. loss: 0.022 | test avg. loss: 4.761
Epochs: 17308 | epoch avg. loss: 0.020 | test avg. loss: 4.714
Epochs: 17309 | epoch avg. loss: 0.013 | test avg. loss: 4.674


 35%|███▍      | 17313/50000 [25:54<55:40,  9.78it/s]

Epochs: 17310 | epoch avg. loss: 0.010 | test avg. loss: 4.705
Epochs: 17311 | epoch avg. loss: 0.009 | test avg. loss: 4.722
Epochs: 17312 | epoch avg. loss: 0.010 | test avg. loss: 4.706


 35%|███▍      | 17317/50000 [25:54<44:25, 12.26it/s]

Epochs: 17313 | epoch avg. loss: 0.013 | test avg. loss: 4.716
Epochs: 17314 | epoch avg. loss: 0.010 | test avg. loss: 4.699
Epochs: 17315 | epoch avg. loss: 0.011 | test avg. loss: 4.712
Epochs: 17316 | epoch avg. loss: 0.009 | test avg. loss: 4.684


 35%|███▍      | 17319/50000 [25:54<41:52, 13.01it/s]

Epochs: 17317 | epoch avg. loss: 0.015 | test avg. loss: 4.731
Epochs: 17318 | epoch avg. loss: 0.010 | test avg. loss: 4.782
Epochs: 17319 | epoch avg. loss: 0.013 | test avg. loss: 4.806


 35%|███▍      | 17323/50000 [25:54<43:03, 12.65it/s]

Epochs: 17320 | epoch avg. loss: 0.022 | test avg. loss: 4.746
Epochs: 17321 | epoch avg. loss: 0.013 | test avg. loss: 4.689
Epochs: 17322 | epoch avg. loss: 0.017 | test avg. loss: 4.894


 35%|███▍      | 17325/50000 [25:54<43:15, 12.59it/s]

Epochs: 17323 | epoch avg. loss: 0.053 | test avg. loss: 4.705
Epochs: 17324 | epoch avg. loss: 0.063 | test avg. loss: 4.763
Epochs: 17325 | epoch avg. loss: 0.047 | test avg. loss: 5.110


 35%|███▍      | 17329/50000 [25:55<41:18, 13.18it/s]

Epochs: 17326 | epoch avg. loss: 0.126 | test avg. loss: 4.787
Epochs: 17327 | epoch avg. loss: 0.064 | test avg. loss: 4.741
Epochs: 17328 | epoch avg. loss: 0.086 | test avg. loss: 4.914


 35%|███▍      | 17333/50000 [25:55<37:29, 14.52it/s]

Epochs: 17329 | epoch avg. loss: 0.116 | test avg. loss: 4.830
Epochs: 17330 | epoch avg. loss: 0.039 | test avg. loss: 4.844
Epochs: 17331 | epoch avg. loss: 0.058 | test avg. loss: 5.420
Epochs: 17332 | epoch avg. loss: 0.285 | test avg. loss: 4.638


 35%|███▍      | 17337/50000 [25:55<36:18, 15.00it/s]

Epochs: 17333 | epoch avg. loss: 0.037 | test avg. loss: 4.601
Epochs: 17334 | epoch avg. loss: 0.032 | test avg. loss: 4.744
Epochs: 17335 | epoch avg. loss: 0.042 | test avg. loss: 4.892
Epochs: 17336 | epoch avg. loss: 0.367 | test avg. loss: 4.857


 35%|███▍      | 17339/50000 [25:55<35:53, 15.17it/s]

Epochs: 17337 | epoch avg. loss: 0.049 | test avg. loss: 4.882
Epochs: 17338 | epoch avg. loss: 0.038 | test avg. loss: 4.718
Epochs: 17339 | epoch avg. loss: 0.038 | test avg. loss: 4.710


 35%|███▍      | 17343/50000 [25:56<40:45, 13.35it/s]

Epochs: 17340 | epoch avg. loss: 0.024 | test avg. loss: 4.821
Epochs: 17341 | epoch avg. loss: 0.059 | test avg. loss: 4.821
Epochs: 17342 | epoch avg. loss: 0.024 | test avg. loss: 4.797


 35%|███▍      | 17347/50000 [25:56<37:57, 14.34it/s]

Epochs: 17343 | epoch avg. loss: 0.016 | test avg. loss: 4.883
Epochs: 17344 | epoch avg. loss: 0.044 | test avg. loss: 4.808
Epochs: 17345 | epoch avg. loss: 0.032 | test avg. loss: 4.595
Epochs: 17346 | epoch avg. loss: 0.046 | test avg. loss: 4.666


 35%|███▍      | 17351/50000 [25:56<36:25, 14.94it/s]

Epochs: 17347 | epoch avg. loss: 0.035 | test avg. loss: 4.845
Epochs: 17348 | epoch avg. loss: 0.050 | test avg. loss: 4.703
Epochs: 17349 | epoch avg. loss: 0.034 | test avg. loss: 4.949
Epochs: 17350 | epoch avg. loss: 0.088 | test avg. loss: 4.901


 35%|███▍      | 17353/50000 [25:56<36:23, 14.95it/s]

Epochs: 17351 | epoch avg. loss: 0.061 | test avg. loss: 4.816
Epochs: 17352 | epoch avg. loss: 0.126 | test avg. loss: 4.952
Epochs: 17353 | epoch avg. loss: 0.077 | test avg. loss: 4.709


 35%|███▍      | 17357/50000 [25:57<40:35, 13.40it/s]

Epochs: 17354 | epoch avg. loss: 0.098 | test avg. loss: 4.772
Epochs: 17355 | epoch avg. loss: 0.225 | test avg. loss: 5.002
Epochs: 17356 | epoch avg. loss: 0.177 | test avg. loss: 4.903


 35%|███▍      | 17359/50000 [25:57<42:08, 12.91it/s]

Epochs: 17357 | epoch avg. loss: 0.065 | test avg. loss: 4.748
Epochs: 17358 | epoch avg. loss: 0.071 | test avg. loss: 4.757
Epochs: 17359 | epoch avg. loss: 0.077 | test avg. loss: 4.872


 35%|███▍      | 17363/50000 [25:57<39:45, 13.68it/s]

Epochs: 17360 | epoch avg. loss: 0.079 | test avg. loss: 4.688
Epochs: 17361 | epoch avg. loss: 0.046 | test avg. loss: 4.767
Epochs: 17362 | epoch avg. loss: 0.068 | test avg. loss: 4.878


 35%|███▍      | 17365/50000 [25:57<40:00, 13.59it/s]

Epochs: 17363 | epoch avg. loss: 0.052 | test avg. loss: 4.736
Epochs: 17364 | epoch avg. loss: 0.130 | test avg. loss: 4.838
Epochs: 17365 | epoch avg. loss: 0.064 | test avg. loss: 5.174


 35%|███▍      | 17369/50000 [25:58<39:28, 13.78it/s]

Epochs: 17366 | epoch avg. loss: 0.141 | test avg. loss: 4.911
Epochs: 17367 | epoch avg. loss: 0.090 | test avg. loss: 4.914
Epochs: 17368 | epoch avg. loss: 0.037 | test avg. loss: 5.006


 35%|███▍      | 17371/50000 [25:58<38:13, 14.22it/s]

Epochs: 17369 | epoch avg. loss: 0.052 | test avg. loss: 4.764
Epochs: 17370 | epoch avg. loss: 0.106 | test avg. loss: 4.882
Epochs: 17371 | epoch avg. loss: 0.024 | test avg. loss: 5.192




Epochs: 17372 | epoch avg. loss: 0.065 | test avg. loss: 5.029
Epochs: 17373 | epoch avg. loss: 0.027 | test avg. loss: 4.905
Epochs: 17374 | epoch avg. loss: 0.052 | test avg. loss: 4.930


 35%|███▍      | 17377/50000 [25:58<38:15, 14.21it/s]

Epochs: 17375 | epoch avg. loss: 0.024 | test avg. loss: 5.055
Epochs: 17376 | epoch avg. loss: 0.044 | test avg. loss: 4.892
Epochs: 17377 | epoch avg. loss: 0.016 | test avg. loss: 4.924


 35%|███▍      | 17381/50000 [25:58<37:39, 14.44it/s]

Epochs: 17378 | epoch avg. loss: 0.026 | test avg. loss: 4.917
Epochs: 17379 | epoch avg. loss: 0.026 | test avg. loss: 4.772
Epochs: 17380 | epoch avg. loss: 0.023 | test avg. loss: 4.813


 35%|███▍      | 17383/50000 [25:59<39:22, 13.80it/s]

Epochs: 17381 | epoch avg. loss: 0.020 | test avg. loss: 4.866
Epochs: 17382 | epoch avg. loss: 0.018 | test avg. loss: 4.824
Epochs: 17383 | epoch avg. loss: 0.010 | test avg. loss: 4.942


 35%|███▍      | 17387/50000 [25:59<41:28, 13.10it/s]

Epochs: 17384 | epoch avg. loss: 0.032 | test avg. loss: 4.751
Epochs: 17385 | epoch avg. loss: 0.028 | test avg. loss: 4.740
Epochs: 17386 | epoch avg. loss: 0.023 | test avg. loss: 4.982


 35%|███▍      | 17389/50000 [25:59<43:17, 12.56it/s]

Epochs: 17387 | epoch avg. loss: 0.076 | test avg. loss: 4.862
Epochs: 17388 | epoch avg. loss: 0.016 | test avg. loss: 4.854
Epochs: 17389 | epoch avg. loss: 0.019 | test avg. loss: 4.836


 35%|███▍      | 17393/50000 [25:59<46:09, 11.77it/s]

Epochs: 17390 | epoch avg. loss: 0.017 | test avg. loss: 4.831
Epochs: 17391 | epoch avg. loss: 0.012 | test avg. loss: 4.860
Epochs: 17392 | epoch avg. loss: 0.019 | test avg. loss: 4.923


 35%|███▍      | 17395/50000 [26:00<47:54, 11.34it/s]

Epochs: 17393 | epoch avg. loss: 0.023 | test avg. loss: 4.892
Epochs: 17394 | epoch avg. loss: 0.037 | test avg. loss: 4.889
Epochs: 17395 | epoch avg. loss: 0.064 | test avg. loss: 4.976


 35%|███▍      | 17399/50000 [26:00<45:51, 11.85it/s]

Epochs: 17396 | epoch avg. loss: 0.056 | test avg. loss: 5.021
Epochs: 17397 | epoch avg. loss: 0.063 | test avg. loss: 4.808
Epochs: 17398 | epoch avg. loss: 0.017 | test avg. loss: 4.898


 35%|███▍      | 17399/50000 [26:00<45:51, 11.85it/s]

Epochs: 17399 | epoch avg. loss: 0.040 | test avg. loss: 4.947


 35%|███▍      | 17403/50000 [26:02<2:03:39,  4.39it/s]

Epochs: 17400 | epoch avg. loss: 0.033 | test avg. loss: 4.867
Epochs: 17401 | epoch avg. loss: 0.019 | test avg. loss: 4.974
Epochs: 17402 | epoch avg. loss: 0.012 | test avg. loss: 4.906


 35%|███▍      | 17405/50000 [26:02<1:40:34,  5.40it/s]

Epochs: 17403 | epoch avg. loss: 0.045 | test avg. loss: 4.862
Epochs: 17404 | epoch avg. loss: 0.051 | test avg. loss: 4.947
Epochs: 17405 | epoch avg. loss: 0.039 | test avg. loss: 4.978


 35%|███▍      | 17409/50000 [26:02<1:12:35,  7.48it/s]

Epochs: 17406 | epoch avg. loss: 0.038 | test avg. loss: 4.805
Epochs: 17407 | epoch avg. loss: 0.036 | test avg. loss: 4.859
Epochs: 17408 | epoch avg. loss: 0.015 | test avg. loss: 4.866


 35%|███▍      | 17411/50000 [26:02<1:03:23,  8.57it/s]

Epochs: 17409 | epoch avg. loss: 0.014 | test avg. loss: 4.852
Epochs: 17410 | epoch avg. loss: 0.017 | test avg. loss: 4.921
Epochs: 17411 | epoch avg. loss: 0.012 | test avg. loss: 4.926


 35%|███▍      | 17415/50000 [26:03<52:55, 10.26it/s]

Epochs: 17412 | epoch avg. loss: 0.015 | test avg. loss: 4.901
Epochs: 17413 | epoch avg. loss: 0.013 | test avg. loss: 4.910
Epochs: 17414 | epoch avg. loss: 0.018 | test avg. loss: 4.853


 35%|███▍      | 17417/50000 [26:03<53:38, 10.12it/s]

Epochs: 17415 | epoch avg. loss: 0.014 | test avg. loss: 4.794
Epochs: 17416 | epoch avg. loss: 0.043 | test avg. loss: 4.879
Epochs: 17417 | epoch avg. loss: 0.016 | test avg. loss: 5.016


 35%|███▍      | 17421/50000 [26:03<49:05, 11.06it/s]

Epochs: 17418 | epoch avg. loss: 0.031 | test avg. loss: 4.895
Epochs: 17419 | epoch avg. loss: 0.030 | test avg. loss: 4.805
Epochs: 17420 | epoch avg. loss: 0.032 | test avg. loss: 4.983


 35%|███▍      | 17423/50000 [26:03<47:02, 11.54it/s]

Epochs: 17421 | epoch avg. loss: 0.107 | test avg. loss: 4.951
Epochs: 17422 | epoch avg. loss: 0.044 | test avg. loss: 5.006
Epochs: 17423 | epoch avg. loss: 0.244 | test avg. loss: 4.927


 35%|███▍      | 17427/50000 [26:04<47:40, 11.39it/s]

Epochs: 17424 | epoch avg. loss: 0.048 | test avg. loss: 4.991
Epochs: 17425 | epoch avg. loss: 0.073 | test avg. loss: 4.633
Epochs: 17426 | epoch avg. loss: 0.244 | test avg. loss: 4.620


 35%|███▍      | 17429/50000 [26:04<45:58, 11.81it/s]

Epochs: 17427 | epoch avg. loss: 0.117 | test avg. loss: 5.518
Epochs: 17428 | epoch avg. loss: 0.452 | test avg. loss: 4.822
Epochs: 17429 | epoch avg. loss: 0.207 | test avg. loss: 5.114


 35%|███▍      | 17433/50000 [26:04<47:47, 11.36it/s]

Epochs: 17430 | epoch avg. loss: 0.216 | test avg. loss: 5.324
Epochs: 17431 | epoch avg. loss: 0.154 | test avg. loss: 5.215
Epochs: 17432 | epoch avg. loss: 0.089 | test avg. loss: 4.795


 35%|███▍      | 17435/50000 [26:04<45:20, 11.97it/s]

Epochs: 17433 | epoch avg. loss: 0.076 | test avg. loss: 4.699
Epochs: 17434 | epoch avg. loss: 0.064 | test avg. loss: 5.483
Epochs: 17435 | epoch avg. loss: 0.361 | test avg. loss: 4.740


 35%|███▍      | 17439/50000 [26:05<46:55, 11.56it/s]

Epochs: 17436 | epoch avg. loss: 0.150 | test avg. loss: 4.829
Epochs: 17437 | epoch avg. loss: 0.143 | test avg. loss: 5.503
Epochs: 17438 | epoch avg. loss: 0.364 | test avg. loss: 5.114


 35%|███▍      | 17441/50000 [26:05<46:06, 11.77it/s]

Epochs: 17439 | epoch avg. loss: 0.146 | test avg. loss: 5.428
Epochs: 17440 | epoch avg. loss: 0.760 | test avg. loss: 4.936
Epochs: 17441 | epoch avg. loss: 0.273 | test avg. loss: 5.875


 35%|███▍      | 17445/50000 [26:05<44:16, 12.26it/s]

Epochs: 17442 | epoch avg. loss: 0.898 | test avg. loss: 4.629
Epochs: 17443 | epoch avg. loss: 0.327 | test avg. loss: 4.940
Epochs: 17444 | epoch avg. loss: 0.247 | test avg. loss: 6.176


 35%|███▍      | 17447/50000 [26:05<44:57, 12.07it/s]

Epochs: 17445 | epoch avg. loss: 0.677 | test avg. loss: 4.837
Epochs: 17446 | epoch avg. loss: 0.113 | test avg. loss: 4.664
Epochs: 17447 | epoch avg. loss: 0.115 | test avg. loss: 5.091


 35%|███▍      | 17451/50000 [26:06<43:59, 12.33it/s]

Epochs: 17448 | epoch avg. loss: 0.223 | test avg. loss: 4.695
Epochs: 17449 | epoch avg. loss: 0.456 | test avg. loss: 4.834
Epochs: 17450 | epoch avg. loss: 0.599 | test avg. loss: 5.732


 35%|███▍      | 17453/50000 [26:06<43:00, 12.61it/s]

Epochs: 17451 | epoch avg. loss: 0.510 | test avg. loss: 5.242
Epochs: 17452 | epoch avg. loss: 0.645 | test avg. loss: 5.181
Epochs: 17453 | epoch avg. loss: 0.232 | test avg. loss: 5.668


 35%|███▍      | 17457/50000 [26:06<41:57, 12.93it/s]

Epochs: 17454 | epoch avg. loss: 0.308 | test avg. loss: 5.222
Epochs: 17455 | epoch avg. loss: 0.701 | test avg. loss: 5.185
Epochs: 17456 | epoch avg. loss: 0.476 | test avg. loss: 5.507


 35%|███▍      | 17459/50000 [26:06<40:33, 13.37it/s]

Epochs: 17457 | epoch avg. loss: 0.293 | test avg. loss: 5.276
Epochs: 17458 | epoch avg. loss: 0.710 | test avg. loss: 5.298
Epochs: 17459 | epoch avg. loss: 0.507 | test avg. loss: 5.843


 35%|███▍      | 17463/50000 [26:07<43:24, 12.49it/s]

Epochs: 17460 | epoch avg. loss: 0.422 | test avg. loss: 5.966
Epochs: 17461 | epoch avg. loss: 1.272 | test avg. loss: 5.238
Epochs: 17462 | epoch avg. loss: 0.716 | test avg. loss: 5.849


 35%|███▍      | 17465/50000 [26:07<42:24, 12.79it/s]

Epochs: 17463 | epoch avg. loss: 0.796 | test avg. loss: 4.882
Epochs: 17464 | epoch avg. loss: 0.868 | test avg. loss: 5.073
Epochs: 17465 | epoch avg. loss: 0.956 | test avg. loss: 5.560


 35%|███▍      | 17469/50000 [26:07<44:39, 12.14it/s]

Epochs: 17466 | epoch avg. loss: 0.639 | test avg. loss: 6.367
Epochs: 17467 | epoch avg. loss: 1.721 | test avg. loss: 6.430
Epochs: 17468 | epoch avg. loss: 1.444 | test avg. loss: 4.941


 35%|███▍      | 17471/50000 [26:07<45:41, 11.86it/s]

Epochs: 17469 | epoch avg. loss: 0.807 | test avg. loss: 6.273
Epochs: 17470 | epoch avg. loss: 1.867 | test avg. loss: 6.151
Epochs: 17471 | epoch avg. loss: 1.114 | test avg. loss: 4.525


 35%|███▍      | 17475/50000 [26:08<44:50, 12.09it/s]

Epochs: 17472 | epoch avg. loss: 0.794 | test avg. loss: 4.526
Epochs: 17473 | epoch avg. loss: 0.334 | test avg. loss: 5.194
Epochs: 17474 | epoch avg. loss: 0.349 | test avg. loss: 4.855


 35%|███▍      | 17477/50000 [26:08<42:46, 12.67it/s]

Epochs: 17475 | epoch avg. loss: 0.524 | test avg. loss: 5.641
Epochs: 17476 | epoch avg. loss: 0.807 | test avg. loss: 4.614
Epochs: 17477 | epoch avg. loss: 0.424 | test avg. loss: 4.689


 35%|███▍      | 17481/50000 [26:08<40:33, 13.36it/s]

Epochs: 17478 | epoch avg. loss: 0.328 | test avg. loss: 5.921
Epochs: 17479 | epoch avg. loss: 0.653 | test avg. loss: 5.129
Epochs: 17480 | epoch avg. loss: 0.471 | test avg. loss: 5.199
Epochs: 17481 | epoch avg. loss: 0.175 | test avg. loss: 4.937


 35%|███▍      | 17485/50000 [26:08<39:04, 13.87it/s]

Epochs: 17482 | epoch avg. loss: 0.284 | test avg. loss: 5.047
Epochs: 17483 | epoch avg. loss: 0.194 | test avg. loss: 5.362
Epochs: 17484 | epoch avg. loss: 0.210 | test avg. loss: 4.865


 35%|███▍      | 17487/50000 [26:09<39:58, 13.56it/s]

Epochs: 17485 | epoch avg. loss: 0.178 | test avg. loss: 5.044
Epochs: 17486 | epoch avg. loss: 0.227 | test avg. loss: 4.688
Epochs: 17487 | epoch avg. loss: 0.054 | test avg. loss: 4.694


 35%|███▍      | 17491/50000 [26:09<42:00, 12.90it/s]

Epochs: 17488 | epoch avg. loss: 0.041 | test avg. loss: 4.964
Epochs: 17489 | epoch avg. loss: 0.044 | test avg. loss: 4.886
Epochs: 17490 | epoch avg. loss: 0.066 | test avg. loss: 5.214


 35%|███▍      | 17493/50000 [26:09<40:19, 13.44it/s]

Epochs: 17491 | epoch avg. loss: 0.102 | test avg. loss: 4.824
Epochs: 17492 | epoch avg. loss: 0.077 | test avg. loss: 4.865
Epochs: 17493 | epoch avg. loss: 0.084 | test avg. loss: 4.916


                                                     

Epochs: 17494 | epoch avg. loss: 0.104 | test avg. loss: 4.648
Epochs: 17495 | epoch avg. loss: 0.201 | test avg. loss: 5.174
Epochs: 17496 | epoch avg. loss: 0.231 | test avg. loss: 4.891


 35%|███▍      | 17499/50000 [26:09<39:47, 13.61it/s]

Epochs: 17497 | epoch avg. loss: 0.418 | test avg. loss: 5.067
Epochs: 17498 | epoch avg. loss: 0.262 | test avg. loss: 5.062
Epochs: 17499 | epoch avg. loss: 0.198 | test avg. loss: 4.976


 35%|███▌      | 17503/50000 [26:11<1:56:37,  4.64it/s]

Epochs: 17500 | epoch avg. loss: 0.289 | test avg. loss: 5.728
Epochs: 17501 | epoch avg. loss: 0.439 | test avg. loss: 4.803
Epochs: 17502 | epoch avg. loss: 0.210 | test avg. loss: 4.790
Epochs: 17503 | epoch avg. loss: 0.046 | test avg. loss: 4.865


 35%|███▌      | 17507/50000 [26:11<1:14:19,  7.29it/s]

Epochs: 17504 | epoch avg. loss: 0.041 | test avg. loss: 4.732
Epochs: 17505 | epoch avg. loss: 0.050 | test avg. loss: 4.735
Epochs: 17506 | epoch avg. loss: 0.014 | test avg. loss: 4.691
Epochs: 17507 | epoch avg. loss: 0.022 | test avg. loss: 4.689


                                                     

Epochs: 17508 | epoch avg. loss: 0.026 | test avg. loss: 4.768
Epochs: 17509 | epoch avg. loss: 0.014 | test avg. loss: 4.785
Epochs: 17510 | epoch avg. loss: 0.016 | test avg. loss: 4.847


 35%|███▌      | 17515/50000 [26:12<45:23, 11.93it/s]

Epochs: 17511 | epoch avg. loss: 0.012 | test avg. loss: 4.766
Epochs: 17512 | epoch avg. loss: 0.018 | test avg. loss: 4.836
Epochs: 17513 | epoch avg. loss: 0.023 | test avg. loss: 4.728
Epochs: 17514 | epoch avg. loss: 0.025 | test avg. loss: 4.773


 35%|███▌      | 17519/50000 [26:12<40:09, 13.48it/s]

Epochs: 17515 | epoch avg. loss: 0.024 | test avg. loss: 4.859
Epochs: 17516 | epoch avg. loss: 0.025 | test avg. loss: 4.826
Epochs: 17517 | epoch avg. loss: 0.013 | test avg. loss: 4.874
Epochs: 17518 | epoch avg. loss: 0.014 | test avg. loss: 4.798


 35%|███▌      | 17523/50000 [26:12<37:40, 14.37it/s]

Epochs: 17519 | epoch avg. loss: 0.017 | test avg. loss: 4.773
Epochs: 17520 | epoch avg. loss: 0.018 | test avg. loss: 4.913
Epochs: 17521 | epoch avg. loss: 0.040 | test avg. loss: 4.775
Epochs: 17522 | epoch avg. loss: 0.031 | test avg. loss: 4.841


 35%|███▌      | 17527/50000 [26:13<36:40, 14.76it/s]

Epochs: 17523 | epoch avg. loss: 0.016 | test avg. loss: 4.881
Epochs: 17524 | epoch avg. loss: 0.015 | test avg. loss: 4.811
Epochs: 17525 | epoch avg. loss: 0.010 | test avg. loss: 4.835
Epochs: 17526 | epoch avg. loss: 0.013 | test avg. loss: 4.802


 35%|███▌      | 17531/50000 [26:13<35:13, 15.36it/s]

Epochs: 17527 | epoch avg. loss: 0.010 | test avg. loss: 4.856
Epochs: 17528 | epoch avg. loss: 0.014 | test avg. loss: 4.828
Epochs: 17529 | epoch avg. loss: 0.012 | test avg. loss: 4.859
Epochs: 17530 | epoch avg. loss: 0.013 | test avg. loss: 4.903


 35%|███▌      | 17535/50000 [26:13<35:25, 15.28it/s]

Epochs: 17531 | epoch avg. loss: 0.015 | test avg. loss: 4.854
Epochs: 17532 | epoch avg. loss: 0.009 | test avg. loss: 4.834
Epochs: 17533 | epoch avg. loss: 0.010 | test avg. loss: 4.823
Epochs: 17534 | epoch avg. loss: 0.013 | test avg. loss: 4.831


 35%|███▌      | 17539/50000 [26:13<35:21, 15.30it/s]

Epochs: 17535 | epoch avg. loss: 0.014 | test avg. loss: 4.776
Epochs: 17536 | epoch avg. loss: 0.013 | test avg. loss: 4.786
Epochs: 17537 | epoch avg. loss: 0.012 | test avg. loss: 4.891
Epochs: 17538 | epoch avg. loss: 0.017 | test avg. loss: 4.813


                                                     

Epochs: 17539 | epoch avg. loss: 0.018 | test avg. loss: 4.853
Epochs: 17540 | epoch avg. loss: 0.019 | test avg. loss: 4.892
Epochs: 17541 | epoch avg. loss: 0.021 | test avg. loss: 4.757




Epochs: 17542 | epoch avg. loss: 0.040 | test avg. loss: 4.807
Epochs: 17543 | epoch avg. loss: 0.013 | test avg. loss: 4.786
Epochs: 17544 | epoch avg. loss: 0.014 | test avg. loss: 4.814


 35%|███▌      | 17547/50000 [26:14<36:37, 14.77it/s]

Epochs: 17545 | epoch avg. loss: 0.017 | test avg. loss: 4.820
Epochs: 17546 | epoch avg. loss: 0.015 | test avg. loss: 4.769
Epochs: 17547 | epoch avg. loss: 0.020 | test avg. loss: 4.827
Epochs: 17548 | epoch avg. loss: 0.010 | test avg. loss: 4.782


 35%|███▌      | 17553/50000 [26:14<34:53, 15.50it/s]

Epochs: 17549 | epoch avg. loss: 0.018 | test avg. loss: 4.843
Epochs: 17550 | epoch avg. loss: 0.020 | test avg. loss: 4.944
Epochs: 17551 | epoch avg. loss: 0.030 | test avg. loss: 4.782
Epochs: 17552 | epoch avg. loss: 0.109 | test avg. loss: 4.955


                                                     

Epochs: 17553 | epoch avg. loss: 0.151 | test avg. loss: 4.833
Epochs: 17554 | epoch avg. loss: 0.090 | test avg. loss: 4.974
Epochs: 17555 | epoch avg. loss: 0.145 | test avg. loss: 5.510


 35%|███▌      | 17559/50000 [26:15<34:54, 15.49it/s]

Epochs: 17556 | epoch avg. loss: 0.202 | test avg. loss: 4.831
Epochs: 17557 | epoch avg. loss: 0.230 | test avg. loss: 4.718
Epochs: 17558 | epoch avg. loss: 0.242 | test avg. loss: 4.902
Epochs: 17559 | epoch avg. loss: 0.270 | test avg. loss: 4.792


                                                     

Epochs: 17560 | epoch avg. loss: 0.459 | test avg. loss: 5.308
Epochs: 17561 | epoch avg. loss: 0.345 | test avg. loss: 4.848
Epochs: 17562 | epoch avg. loss: 0.189 | test avg. loss: 4.969


 35%|███▌      | 17567/50000 [26:15<35:33, 15.20it/s]

Epochs: 17563 | epoch avg. loss: 0.156 | test avg. loss: 5.459
Epochs: 17564 | epoch avg. loss: 0.217 | test avg. loss: 4.743
Epochs: 17565 | epoch avg. loss: 0.098 | test avg. loss: 4.649
Epochs: 17566 | epoch avg. loss: 0.094 | test avg. loss: 4.730


 35%|███▌      | 17569/50000 [26:15<36:19, 14.88it/s]

Epochs: 17567 | epoch avg. loss: 0.047 | test avg. loss: 4.859
Epochs: 17568 | epoch avg. loss: 0.052 | test avg. loss: 5.098
Epochs: 17569 | epoch avg. loss: 0.057 | test avg. loss: 4.942


 35%|███▌      | 17573/50000 [26:16<35:54, 15.05it/s]

Epochs: 17570 | epoch avg. loss: 0.055 | test avg. loss: 4.895
Epochs: 17571 | epoch avg. loss: 0.054 | test avg. loss: 4.825
Epochs: 17572 | epoch avg. loss: 0.026 | test avg. loss: 4.784
Epochs: 17573 | epoch avg. loss: 0.019 | test avg. loss: 4.943


 35%|███▌      | 17577/50000 [26:16<34:56, 15.46it/s]

Epochs: 17574 | epoch avg. loss: 0.021 | test avg. loss: 4.850
Epochs: 17575 | epoch avg. loss: 0.038 | test avg. loss: 4.934
Epochs: 17576 | epoch avg. loss: 0.028 | test avg. loss: 4.918
Epochs: 17577 | epoch avg. loss: 0.019 | test avg. loss: 4.814


 35%|███▌      | 17581/50000 [26:16<35:55, 15.04it/s]

Epochs: 17578 | epoch avg. loss: 0.018 | test avg. loss: 4.874
Epochs: 17579 | epoch avg. loss: 0.022 | test avg. loss: 4.807
Epochs: 17580 | epoch avg. loss: 0.011 | test avg. loss: 4.830


 35%|███▌      | 17585/50000 [26:16<35:35, 15.18it/s]

Epochs: 17581 | epoch avg. loss: 0.010 | test avg. loss: 4.824
Epochs: 17582 | epoch avg. loss: 0.011 | test avg. loss: 4.893
Epochs: 17583 | epoch avg. loss: 0.020 | test avg. loss: 4.881
Epochs: 17584 | epoch avg. loss: 0.017 | test avg. loss: 4.794


 35%|███▌      | 17589/50000 [26:17<35:33, 15.19it/s]

Epochs: 17585 | epoch avg. loss: 0.035 | test avg. loss: 4.915
Epochs: 17586 | epoch avg. loss: 0.034 | test avg. loss: 4.835
Epochs: 17587 | epoch avg. loss: 0.023 | test avg. loss: 4.850
Epochs: 17588 | epoch avg. loss: 0.022 | test avg. loss: 4.821


 35%|███▌      | 17591/50000 [26:17<35:23, 15.26it/s]

Epochs: 17589 | epoch avg. loss: 0.018 | test avg. loss: 4.788
Epochs: 17590 | epoch avg. loss: 0.017 | test avg. loss: 4.839
Epochs: 17591 | epoch avg. loss: 0.018 | test avg. loss: 4.931


 35%|███▌      | 17595/50000 [26:17<36:02, 14.98it/s]

Epochs: 17592 | epoch avg. loss: 0.016 | test avg. loss: 4.960
Epochs: 17593 | epoch avg. loss: 0.014 | test avg. loss: 4.877
Epochs: 17594 | epoch avg. loss: 0.009 | test avg. loss: 4.784
Epochs: 17595 | epoch avg. loss: 0.012 | test avg. loss: 4.808


 35%|███▌      | 17599/50000 [26:17<40:44, 13.26it/s]

Epochs: 17596 | epoch avg. loss: 0.017 | test avg. loss: 4.803
Epochs: 17597 | epoch avg. loss: 0.013 | test avg. loss: 4.837
Epochs: 17598 | epoch avg. loss: 0.015 | test avg. loss: 4.886


 35%|███▌      | 17599/50000 [26:17<40:44, 13.26it/s]

Epochs: 17599 | epoch avg. loss: 0.013 | test avg. loss: 4.871


 35%|███▌      | 17603/50000 [26:19<2:05:55,  4.29it/s]

Epochs: 17600 | epoch avg. loss: 0.013 | test avg. loss: 4.906
Epochs: 17601 | epoch avg. loss: 0.014 | test avg. loss: 4.807
Epochs: 17602 | epoch avg. loss: 0.017 | test avg. loss: 4.780


 35%|███▌      | 17605/50000 [26:19<1:40:44,  5.36it/s]

Epochs: 17603 | epoch avg. loss: 0.015 | test avg. loss: 4.912
Epochs: 17604 | epoch avg. loss: 0.026 | test avg. loss: 4.792
Epochs: 17605 | epoch avg. loss: 0.026 | test avg. loss: 4.852


 35%|███▌      | 17609/50000 [26:20<1:08:26,  7.89it/s]

Epochs: 17606 | epoch avg. loss: 0.023 | test avg. loss: 4.998
Epochs: 17607 | epoch avg. loss: 0.043 | test avg. loss: 4.834
Epochs: 17608 | epoch avg. loss: 0.062 | test avg. loss: 4.995


 35%|███▌      | 17611/50000 [26:20<57:57,  9.31it/s]

Epochs: 17609 | epoch avg. loss: 0.089 | test avg. loss: 4.869
Epochs: 17610 | epoch avg. loss: 0.054 | test avg. loss: 4.792
Epochs: 17611 | epoch avg. loss: 0.057 | test avg. loss: 5.105
Epochs: 17612 | epoch avg. loss: 0.091 | test avg. loss: 4.764


 35%|███▌      | 17615/50000 [26:20<47:52, 11.27it/s]

Epochs: 17613 | epoch avg. loss: 0.099 | test avg. loss: 4.873
Epochs: 17614 | epoch avg. loss: 0.043 | test avg. loss: 5.048
Epochs: 17615 | epoch avg. loss: 0.062 | test avg. loss: 4.792


 35%|███▌      | 17619/50000 [26:20<45:54, 11.76it/s]

Epochs: 17616 | epoch avg. loss: 0.091 | test avg. loss: 4.889
Epochs: 17617 | epoch avg. loss: 0.047 | test avg. loss: 4.789
Epochs: 17618 | epoch avg. loss: 0.041 | test avg. loss: 4.737


 35%|███▌      | 17621/50000 [26:21<46:52, 11.51it/s]

Epochs: 17619 | epoch avg. loss: 0.039 | test avg. loss: 4.925
Epochs: 17620 | epoch avg. loss: 0.065 | test avg. loss: 4.809
Epochs: 17621 | epoch avg. loss: 0.082 | test avg. loss: 4.797


 35%|███▌      | 17625/50000 [26:21<43:42, 12.35it/s]

Epochs: 17622 | epoch avg. loss: 0.049 | test avg. loss: 5.020
Epochs: 17623 | epoch avg. loss: 0.114 | test avg. loss: 4.690
Epochs: 17624 | epoch avg. loss: 0.044 | test avg. loss: 4.785


 35%|███▌      | 17627/50000 [26:21<42:33, 12.68it/s]

Epochs: 17625 | epoch avg. loss: 0.039 | test avg. loss: 4.880
Epochs: 17626 | epoch avg. loss: 0.030 | test avg. loss: 4.856
Epochs: 17627 | epoch avg. loss: 0.042 | test avg. loss: 5.017


 35%|███▌      | 17631/50000 [26:21<39:34, 13.63it/s]

Epochs: 17628 | epoch avg. loss: 0.045 | test avg. loss: 4.843
Epochs: 17629 | epoch avg. loss: 0.028 | test avg. loss: 4.791
Epochs: 17630 | epoch avg. loss: 0.021 | test avg. loss: 4.806


 35%|███▌      | 17633/50000 [26:21<40:20, 13.37it/s]

Epochs: 17631 | epoch avg. loss: 0.016 | test avg. loss: 4.835
Epochs: 17632 | epoch avg. loss: 0.016 | test avg. loss: 4.816
Epochs: 17633 | epoch avg. loss: 0.013 | test avg. loss: 4.820


 35%|███▌      | 17637/50000 [26:22<38:21, 14.06it/s]

Epochs: 17634 | epoch avg. loss: 0.013 | test avg. loss: 4.841
Epochs: 17635 | epoch avg. loss: 0.010 | test avg. loss: 4.853
Epochs: 17636 | epoch avg. loss: 0.009 | test avg. loss: 4.807
Epochs: 17637 | epoch avg. loss: 0.012 | test avg. loss: 4.859


 35%|███▌      | 17641/50000 [26:22<35:37, 15.14it/s]

Epochs: 17638 | epoch avg. loss: 0.031 | test avg. loss: 4.915
Epochs: 17639 | epoch avg. loss: 0.023 | test avg. loss: 4.827
Epochs: 17640 | epoch avg. loss: 0.038 | test avg. loss: 4.992
Epochs: 17641 | epoch avg. loss: 0.047 | test avg. loss: 4.764


 35%|███▌      | 17645/50000 [26:22<34:39, 15.56it/s]

Epochs: 17642 | epoch avg. loss: 0.060 | test avg. loss: 4.732
Epochs: 17643 | epoch avg. loss: 0.042 | test avg. loss: 5.098
Epochs: 17644 | epoch avg. loss: 0.109 | test avg. loss: 4.835
Epochs: 17645 | epoch avg. loss: 0.269 | test avg. loss: 4.852


 35%|███▌      | 17649/50000 [26:22<35:13, 15.30it/s]

Epochs: 17646 | epoch avg. loss: 0.235 | test avg. loss: 4.990
Epochs: 17647 | epoch avg. loss: 0.183 | test avg. loss: 4.774
Epochs: 17648 | epoch avg. loss: 0.238 | test avg. loss: 5.139


 35%|███▌      | 17651/50000 [26:23<35:54, 15.02it/s]

Epochs: 17649 | epoch avg. loss: 0.311 | test avg. loss: 4.852
Epochs: 17650 | epoch avg. loss: 0.114 | test avg. loss: 4.755
Epochs: 17651 | epoch avg. loss: 0.147 | test avg. loss: 5.744


 35%|███▌      | 17655/50000 [26:23<35:46, 15.07it/s]

Epochs: 17652 | epoch avg. loss: 0.488 | test avg. loss: 4.850
Epochs: 17653 | epoch avg. loss: 0.248 | test avg. loss: 4.951
Epochs: 17654 | epoch avg. loss: 0.137 | test avg. loss: 5.278
Epochs: 17655 | epoch avg. loss: 0.153 | test avg. loss: 4.920


 35%|███▌      | 17659/50000 [26:23<34:05, 15.81it/s]

Epochs: 17656 | epoch avg. loss: 0.208 | test avg. loss: 4.989
Epochs: 17657 | epoch avg. loss: 0.103 | test avg. loss: 4.831
Epochs: 17658 | epoch avg. loss: 0.069 | test avg. loss: 4.670
Epochs: 17659 | epoch avg. loss: 0.142 | test avg. loss: 4.874




Epochs: 17660 | epoch avg. loss: 0.055 | test avg. loss: 4.988
Epochs: 17661 | epoch avg. loss: 0.045 | test avg. loss: 4.856
Epochs: 17662 | epoch avg. loss: 0.036 | test avg. loss: 4.972


 35%|███▌      | 17667/50000 [26:24<34:08, 15.78it/s]

Epochs: 17663 | epoch avg. loss: 0.018 | test avg. loss: 4.902
Epochs: 17664 | epoch avg. loss: 0.034 | test avg. loss: 4.985
Epochs: 17665 | epoch avg. loss: 0.039 | test avg. loss: 4.902
Epochs: 17666 | epoch avg. loss: 0.025 | test avg. loss: 4.760


 35%|███▌      | 17669/50000 [26:24<36:16, 14.85it/s]

Epochs: 17667 | epoch avg. loss: 0.046 | test avg. loss: 4.898
Epochs: 17668 | epoch avg. loss: 0.074 | test avg. loss: 4.817
Epochs: 17669 | epoch avg. loss: 0.024 | test avg. loss: 4.805


 35%|███▌      | 17673/50000 [26:24<40:46, 13.21it/s]

Epochs: 17670 | epoch avg. loss: 0.030 | test avg. loss: 4.980
Epochs: 17671 | epoch avg. loss: 0.043 | test avg. loss: 4.893
Epochs: 17672 | epoch avg. loss: 0.023 | test avg. loss: 4.866


 35%|███▌      | 17675/50000 [26:24<42:46, 12.59it/s]

Epochs: 17673 | epoch avg. loss: 0.021 | test avg. loss: 4.926
Epochs: 17674 | epoch avg. loss: 0.034 | test avg. loss: 4.748
Epochs: 17675 | epoch avg. loss: 0.069 | test avg. loss: 4.800


 35%|███▌      | 17679/50000 [26:25<44:51, 12.01it/s]

Epochs: 17676 | epoch avg. loss: 0.019 | test avg. loss: 4.960
Epochs: 17677 | epoch avg. loss: 0.022 | test avg. loss: 4.850
Epochs: 17678 | epoch avg. loss: 0.041 | test avg. loss: 4.847


 35%|███▌      | 17681/50000 [26:25<44:28, 12.11it/s]

Epochs: 17679 | epoch avg. loss: 0.028 | test avg. loss: 5.080
Epochs: 17680 | epoch avg. loss: 0.098 | test avg. loss: 4.723
Epochs: 17681 | epoch avg. loss: 0.053 | test avg. loss: 4.792


 35%|███▌      | 17685/50000 [26:25<45:04, 11.95it/s]

Epochs: 17682 | epoch avg. loss: 0.038 | test avg. loss: 4.977
Epochs: 17683 | epoch avg. loss: 0.061 | test avg. loss: 4.868
Epochs: 17684 | epoch avg. loss: 0.035 | test avg. loss: 5.096


 35%|███▌      | 17687/50000 [26:25<44:12, 12.18it/s]

Epochs: 17685 | epoch avg. loss: 0.082 | test avg. loss: 5.041
Epochs: 17686 | epoch avg. loss: 0.055 | test avg. loss: 4.748
Epochs: 17687 | epoch avg. loss: 0.045 | test avg. loss: 4.739


 35%|███▌      | 17691/50000 [26:26<44:10, 12.19it/s]

Epochs: 17688 | epoch avg. loss: 0.015 | test avg. loss: 4.874
Epochs: 17689 | epoch avg. loss: 0.028 | test avg. loss: 4.865
Epochs: 17690 | epoch avg. loss: 0.015 | test avg. loss: 4.832


 35%|███▌      | 17693/50000 [26:26<46:06, 11.68it/s]

Epochs: 17691 | epoch avg. loss: 0.021 | test avg. loss: 4.835
Epochs: 17692 | epoch avg. loss: 0.012 | test avg. loss: 4.924
Epochs: 17693 | epoch avg. loss: 0.030 | test avg. loss: 4.850


 35%|███▌      | 17697/50000 [26:26<47:31, 11.33it/s]

Epochs: 17694 | epoch avg. loss: 0.012 | test avg. loss: 4.834
Epochs: 17695 | epoch avg. loss: 0.013 | test avg. loss: 4.879
Epochs: 17696 | epoch avg. loss: 0.012 | test avg. loss: 4.895


 35%|███▌      | 17699/50000 [26:26<47:16, 11.39it/s]

Epochs: 17697 | epoch avg. loss: 0.011 | test avg. loss: 4.828
Epochs: 17698 | epoch avg. loss: 0.022 | test avg. loss: 4.837
Epochs: 17699 | epoch avg. loss: 0.011 | test avg. loss: 4.883


 35%|███▌      | 17703/50000 [26:28<1:59:38,  4.50it/s]

Epochs: 17700 | epoch avg. loss: 0.015 | test avg. loss: 4.788
Epochs: 17701 | epoch avg. loss: 0.041 | test avg. loss: 4.892
Epochs: 17702 | epoch avg. loss: 0.020 | test avg. loss: 4.919


 35%|███▌      | 17705/50000 [26:28<1:36:45,  5.56it/s]

Epochs: 17703 | epoch avg. loss: 0.023 | test avg. loss: 4.869
Epochs: 17704 | epoch avg. loss: 0.026 | test avg. loss: 4.951
Epochs: 17705 | epoch avg. loss: 0.043 | test avg. loss: 4.754


 35%|███▌      | 17709/50000 [26:28<1:07:04,  8.02it/s]

Epochs: 17706 | epoch avg. loss: 0.028 | test avg. loss: 4.820
Epochs: 17707 | epoch avg. loss: 0.023 | test avg. loss: 4.885
Epochs: 17708 | epoch avg. loss: 0.025 | test avg. loss: 4.749


 35%|███▌      | 17713/50000 [26:29<50:14, 10.71it/s]

Epochs: 17709 | epoch avg. loss: 0.054 | test avg. loss: 4.879
Epochs: 17710 | epoch avg. loss: 0.012 | test avg. loss: 4.919
Epochs: 17711 | epoch avg. loss: 0.012 | test avg. loss: 4.862
Epochs: 17712 | epoch avg. loss: 0.015 | test avg. loss: 4.834




Epochs: 17713 | epoch avg. loss: 0.012 | test avg. loss: 4.798
Epochs: 17714 | epoch avg. loss: 0.013 | test avg. loss: 4.866
Epochs: 17715 | epoch avg. loss: 0.026 | test avg. loss: 4.814


 35%|███▌      | 17719/50000 [26:29<41:39, 12.91it/s]

Epochs: 17716 | epoch avg. loss: 0.027 | test avg. loss: 4.823
Epochs: 17717 | epoch avg. loss: 0.081 | test avg. loss: 4.952
Epochs: 17718 | epoch avg. loss: 0.057 | test avg. loss: 5.049




Epochs: 17719 | epoch avg. loss: 0.093 | test avg. loss: 4.769
Epochs: 17720 | epoch avg. loss: 0.095 | test avg. loss: 4.898
Epochs: 17721 | epoch avg. loss: 0.041 | test avg. loss: 5.286


 35%|███▌      | 17725/50000 [26:30<39:29, 13.62it/s]

Epochs: 17722 | epoch avg. loss: 0.107 | test avg. loss: 4.884
Epochs: 17723 | epoch avg. loss: 0.070 | test avg. loss: 4.757
Epochs: 17724 | epoch avg. loss: 0.050 | test avg. loss: 4.845


 35%|███▌      | 17727/50000 [26:30<37:48, 14.22it/s]

Epochs: 17725 | epoch avg. loss: 0.064 | test avg. loss: 4.718
Epochs: 17726 | epoch avg. loss: 0.023 | test avg. loss: 4.748
Epochs: 17727 | epoch avg. loss: 0.022 | test avg. loss: 5.000


 35%|███▌      | 17731/50000 [26:30<37:50, 14.21it/s]

Epochs: 17728 | epoch avg. loss: 0.052 | test avg. loss: 4.790
Epochs: 17729 | epoch avg. loss: 0.053 | test avg. loss: 4.917
Epochs: 17730 | epoch avg. loss: 0.066 | test avg. loss: 5.036
Epochs: 17731 | epoch avg. loss: 0.072 | test avg. loss: 4.793


 35%|███▌      | 17735/50000 [26:30<36:19, 14.80it/s]

Epochs: 17732 | epoch avg. loss: 0.024 | test avg. loss: 4.766
Epochs: 17733 | epoch avg. loss: 0.014 | test avg. loss: 4.868
Epochs: 17734 | epoch avg. loss: 0.016 | test avg. loss: 4.794
Epochs: 17735 | epoch avg. loss: 0.041 | test avg. loss: 4.819


 35%|███▌      | 17739/50000 [26:30<38:29, 13.97it/s]

Epochs: 17736 | epoch avg. loss: 0.025 | test avg. loss: 5.017
Epochs: 17737 | epoch avg. loss: 0.058 | test avg. loss: 4.762
Epochs: 17738 | epoch avg. loss: 0.093 | test avg. loss: 4.858


 35%|███▌      | 17741/50000 [26:31<39:18, 13.68it/s]

Epochs: 17739 | epoch avg. loss: 0.096 | test avg. loss: 5.020
Epochs: 17740 | epoch avg. loss: 0.078 | test avg. loss: 5.002
Epochs: 17741 | epoch avg. loss: 0.396 | test avg. loss: 4.956


 35%|███▌      | 17745/50000 [26:31<38:17, 14.04it/s]

Epochs: 17742 | epoch avg. loss: 0.138 | test avg. loss: 5.043
Epochs: 17743 | epoch avg. loss: 0.118 | test avg. loss: 4.981
Epochs: 17744 | epoch avg. loss: 0.427 | test avg. loss: 4.948


 35%|███▌      | 17747/50000 [26:31<40:54, 13.14it/s]

Epochs: 17745 | epoch avg. loss: 0.217 | test avg. loss: 5.046
Epochs: 17746 | epoch avg. loss: 0.159 | test avg. loss: 5.354
Epochs: 17747 | epoch avg. loss: 0.423 | test avg. loss: 5.846


 36%|███▌      | 17751/50000 [26:31<41:58, 12.80it/s]

Epochs: 17748 | epoch avg. loss: 0.864 | test avg. loss: 4.872
Epochs: 17749 | epoch avg. loss: 0.269 | test avg. loss: 5.273
Epochs: 17750 | epoch avg. loss: 0.742 | test avg. loss: 6.069


 36%|███▌      | 17753/50000 [26:32<41:32, 12.94it/s]

Epochs: 17751 | epoch avg. loss: 0.834 | test avg. loss: 5.097
Epochs: 17752 | epoch avg. loss: 0.242 | test avg. loss: 4.907
Epochs: 17753 | epoch avg. loss: 0.181 | test avg. loss: 5.229


 36%|███▌      | 17757/50000 [26:32<39:25, 13.63it/s]

Epochs: 17754 | epoch avg. loss: 0.291 | test avg. loss: 4.696
Epochs: 17755 | epoch avg. loss: 0.320 | test avg. loss: 4.739
Epochs: 17756 | epoch avg. loss: 0.198 | test avg. loss: 5.202


 36%|███▌      | 17759/50000 [26:32<38:32, 13.94it/s]

Epochs: 17757 | epoch avg. loss: 0.265 | test avg. loss: 4.837
Epochs: 17758 | epoch avg. loss: 0.102 | test avg. loss: 4.694
Epochs: 17759 | epoch avg. loss: 0.095 | test avg. loss: 4.862


 36%|███▌      | 17763/50000 [26:32<39:38, 13.55it/s]

Epochs: 17760 | epoch avg. loss: 0.123 | test avg. loss: 4.678
Epochs: 17761 | epoch avg. loss: 0.054 | test avg. loss: 4.757
Epochs: 17762 | epoch avg. loss: 0.071 | test avg. loss: 5.035


 36%|███▌      | 17765/50000 [26:33<41:15, 13.02it/s]

Epochs: 17763 | epoch avg. loss: 0.076 | test avg. loss: 5.030
Epochs: 17764 | epoch avg. loss: 0.037 | test avg. loss: 4.836
Epochs: 17765 | epoch avg. loss: 0.037 | test avg. loss: 4.847


 36%|███▌      | 17769/50000 [26:33<39:13, 13.70it/s]

Epochs: 17766 | epoch avg. loss: 0.057 | test avg. loss: 4.786
Epochs: 17767 | epoch avg. loss: 0.035 | test avg. loss: 4.712
Epochs: 17768 | epoch avg. loss: 0.018 | test avg. loss: 4.845
Epochs: 17769 | epoch avg. loss: 0.026 | test avg. loss: 4.860


 36%|███▌      | 17773/50000 [26:33<37:24, 14.36it/s]

Epochs: 17770 | epoch avg. loss: 0.023 | test avg. loss: 4.916
Epochs: 17771 | epoch avg. loss: 0.036 | test avg. loss: 5.006
Epochs: 17772 | epoch avg. loss: 0.019 | test avg. loss: 4.849


 36%|███▌      | 17775/50000 [26:33<37:35, 14.28it/s]

Epochs: 17773 | epoch avg. loss: 0.014 | test avg. loss: 4.858
Epochs: 17774 | epoch avg. loss: 0.080 | test avg. loss: 4.966
Epochs: 17775 | epoch avg. loss: 0.056 | test avg. loss: 4.876


 36%|███▌      | 17777/50000 [26:33<37:53, 14.18it/s]

Epochs: 17776 | epoch avg. loss: 0.050 | test avg. loss: 4.925
Epochs: 17777 | epoch avg. loss: 0.019 | test avg. loss: 4.892


 36%|███▌      | 17781/50000 [26:34<42:06, 12.75it/s]

Epochs: 17778 | epoch avg. loss: 0.015 | test avg. loss: 4.720
Epochs: 17779 | epoch avg. loss: 0.019 | test avg. loss: 4.718
Epochs: 17780 | epoch avg. loss: 0.014 | test avg. loss: 4.750




Epochs: 17781 | epoch avg. loss: 0.012 | test avg. loss: 4.795
Epochs: 17782 | epoch avg. loss: 0.015 | test avg. loss: 4.855
Epochs: 17783 | epoch avg. loss: 0.015 | test avg. loss: 4.945


 36%|███▌      | 17787/50000 [26:34<37:15, 14.41it/s]

Epochs: 17784 | epoch avg. loss: 0.015 | test avg. loss: 4.801
Epochs: 17785 | epoch avg. loss: 0.019 | test avg. loss: 4.747
Epochs: 17786 | epoch avg. loss: 0.011 | test avg. loss: 4.742
Epochs: 17787 | epoch avg. loss: 0.009 | test avg. loss: 4.769


 36%|███▌      | 17791/50000 [26:34<35:25, 15.16it/s]

Epochs: 17788 | epoch avg. loss: 0.010 | test avg. loss: 4.848
Epochs: 17789 | epoch avg. loss: 0.015 | test avg. loss: 4.835
Epochs: 17790 | epoch avg. loss: 0.010 | test avg. loss: 4.776


                                                     

Epochs: 17791 | epoch avg. loss: 0.014 | test avg. loss: 4.861
Epochs: 17792 | epoch avg. loss: 0.023 | test avg. loss: 4.792


 36%|███▌      | 17795/50000 [26:35<42:53, 12.51it/s]

Epochs: 17793 | epoch avg. loss: 0.015 | test avg. loss: 4.802
Epochs: 17794 | epoch avg. loss: 0.011 | test avg. loss: 4.851
Epochs: 17795 | epoch avg. loss: 0.009 | test avg. loss: 4.823


 36%|███▌      | 17799/50000 [26:35<42:30, 12.63it/s]

Epochs: 17796 | epoch avg. loss: 0.012 | test avg. loss: 4.861
Epochs: 17797 | epoch avg. loss: 0.010 | test avg. loss: 4.815
Epochs: 17798 | epoch avg. loss: 0.011 | test avg. loss: 4.770


 36%|███▌      | 17799/50000 [26:35<42:30, 12.63it/s]

Epochs: 17799 | epoch avg. loss: 0.015 | test avg. loss: 4.881


 36%|███▌      | 17803/50000 [26:37<1:55:13,  4.66it/s]

Epochs: 17800 | epoch avg. loss: 0.032 | test avg. loss: 4.840
Epochs: 17801 | epoch avg. loss: 0.030 | test avg. loss: 4.873
Epochs: 17802 | epoch avg. loss: 0.183 | test avg. loss: 4.877


 36%|███▌      | 17805/50000 [26:37<1:34:05,  5.70it/s]

Epochs: 17803 | epoch avg. loss: 0.137 | test avg. loss: 5.155
Epochs: 17804 | epoch avg. loss: 0.204 | test avg. loss: 4.745
Epochs: 17805 | epoch avg. loss: 0.132 | test avg. loss: 4.855


 36%|███▌      | 17809/50000 [26:37<1:05:13,  8.22it/s]

Epochs: 17806 | epoch avg. loss: 0.049 | test avg. loss: 5.019
Epochs: 17807 | epoch avg. loss: 0.079 | test avg. loss: 4.776
Epochs: 17808 | epoch avg. loss: 0.052 | test avg. loss: 4.767


                                                     

Epochs: 17809 | epoch avg. loss: 0.051 | test avg. loss: 4.910
Epochs: 17810 | epoch avg. loss: 0.031 | test avg. loss: 4.901
Epochs: 17811 | epoch avg. loss: 0.047 | test avg. loss: 4.873


 36%|███▌      | 17813/50000 [26:38<51:22, 10.44it/s]

Epochs: 17812 | epoch avg. loss: 0.061 | test avg. loss: 4.930
Epochs: 17813 | epoch avg. loss: 0.060 | test avg. loss: 4.940
Epochs: 17814 | epoch avg. loss: 0.055 | test avg. loss: 4.686


 36%|███▌      | 17817/50000 [26:38<48:56, 10.96it/s]

Epochs: 17815 | epoch avg. loss: 0.116 | test avg. loss: 4.736
Epochs: 17816 | epoch avg. loss: 0.032 | test avg. loss: 5.052
Epochs: 17817 | epoch avg. loss: 0.072 | test avg. loss: 4.864


 36%|███▌      | 17821/50000 [26:38<44:10, 12.14it/s]

Epochs: 17818 | epoch avg. loss: 0.090 | test avg. loss: 4.861
Epochs: 17819 | epoch avg. loss: 0.044 | test avg. loss: 5.043
Epochs: 17820 | epoch avg. loss: 0.097 | test avg. loss: 4.662


 36%|███▌      | 17825/50000 [26:38<39:44, 13.50it/s]

Epochs: 17821 | epoch avg. loss: 0.120 | test avg. loss: 4.678
Epochs: 17822 | epoch avg. loss: 0.054 | test avg. loss: 5.136
Epochs: 17823 | epoch avg. loss: 0.190 | test avg. loss: 4.832
Epochs: 17824 | epoch avg. loss: 0.178 | test avg. loss: 4.987


 36%|███▌      | 17827/50000 [26:38<39:42, 13.50it/s]

Epochs: 17825 | epoch avg. loss: 0.137 | test avg. loss: 5.882
Epochs: 17826 | epoch avg. loss: 0.559 | test avg. loss: 4.774
Epochs: 17827 | epoch avg. loss: 0.112 | test avg. loss: 4.530


 36%|███▌      | 17831/50000 [26:39<39:45, 13.49it/s]

Epochs: 17828 | epoch avg. loss: 0.126 | test avg. loss: 5.238
Epochs: 17829 | epoch avg. loss: 0.434 | test avg. loss: 4.777
Epochs: 17830 | epoch avg. loss: 0.266 | test avg. loss: 5.066


 36%|███▌      | 17835/50000 [26:39<37:57, 14.12it/s]

Epochs: 17831 | epoch avg. loss: 0.178 | test avg. loss: 5.783
Epochs: 17832 | epoch avg. loss: 0.396 | test avg. loss: 4.695
Epochs: 17833 | epoch avg. loss: 0.574 | test avg. loss: 4.457
Epochs: 17834 | epoch avg. loss: 0.368 | test avg. loss: 6.796


 36%|███▌      | 17837/50000 [26:39<38:31, 13.92it/s]

Epochs: 17835 | epoch avg. loss: 1.760 | test avg. loss: 5.333
Epochs: 17836 | epoch avg. loss: 1.358 | test avg. loss: 5.677
Epochs: 17837 | epoch avg. loss: 0.719 | test avg. loss: 6.921


 36%|███▌      | 17841/50000 [26:39<39:27, 13.59it/s]

Epochs: 17838 | epoch avg. loss: 0.951 | test avg. loss: 4.906
Epochs: 17839 | epoch avg. loss: 0.739 | test avg. loss: 4.627
Epochs: 17840 | epoch avg. loss: 0.413 | test avg. loss: 5.785


 36%|███▌      | 17843/50000 [26:40<43:32, 12.31it/s]

Epochs: 17841 | epoch avg. loss: 0.658 | test avg. loss: 5.199
Epochs: 17842 | epoch avg. loss: 0.572 | test avg. loss: 5.569
Epochs: 17843 | epoch avg. loss: 0.943 | test avg. loss: 4.968


 36%|███▌      | 17847/50000 [26:40<43:36, 12.29it/s]

Epochs: 17844 | epoch avg. loss: 0.542 | test avg. loss: 5.243
Epochs: 17845 | epoch avg. loss: 0.861 | test avg. loss: 6.858
Epochs: 17846 | epoch avg. loss: 1.164 | test avg. loss: 4.442


 36%|███▌      | 17849/50000 [26:40<42:15, 12.68it/s]

Epochs: 17847 | epoch avg. loss: 0.567 | test avg. loss: 4.341
Epochs: 17848 | epoch avg. loss: 0.252 | test avg. loss: 5.149
Epochs: 17849 | epoch avg. loss: 0.378 | test avg. loss: 4.760


 36%|███▌      | 17853/50000 [26:40<43:43, 12.25it/s]

Epochs: 17850 | epoch avg. loss: 0.339 | test avg. loss: 5.088
Epochs: 17851 | epoch avg. loss: 0.214 | test avg. loss: 5.055
Epochs: 17852 | epoch avg. loss: 0.153 | test avg. loss: 4.750


 36%|███▌      | 17855/50000 [26:41<45:01, 11.90it/s]

Epochs: 17853 | epoch avg. loss: 0.164 | test avg. loss: 5.328
Epochs: 17854 | epoch avg. loss: 0.446 | test avg. loss: 4.520
Epochs: 17855 | epoch avg. loss: 0.343 | test avg. loss: 4.676


 36%|███▌      | 17859/50000 [26:41<39:40, 13.50it/s]

Epochs: 17856 | epoch avg. loss: 0.242 | test avg. loss: 4.789
Epochs: 17857 | epoch avg. loss: 0.203 | test avg. loss: 4.757
Epochs: 17858 | epoch avg. loss: 0.339 | test avg. loss: 5.189
Epochs: 17859 | epoch avg. loss: 0.276 | test avg. loss: 4.604


                                                     

Epochs: 17860 | epoch avg. loss: 0.296 | test avg. loss: 4.594
Epochs: 17861 | epoch avg. loss: 0.137 | test avg. loss: 5.219
Epochs: 17862 | epoch avg. loss: 0.220 | test avg. loss: 4.668


 36%|███▌      | 17865/50000 [26:41<38:06, 14.05it/s]

Epochs: 17863 | epoch avg. loss: 0.259 | test avg. loss: 4.721
Epochs: 17864 | epoch avg. loss: 0.073 | test avg. loss: 4.666
Epochs: 17865 | epoch avg. loss: 0.035 | test avg. loss: 4.608


 36%|███▌      | 17869/50000 [26:42<40:30, 13.22it/s]

Epochs: 17866 | epoch avg. loss: 0.045 | test avg. loss: 4.911
Epochs: 17867 | epoch avg. loss: 0.095 | test avg. loss: 4.680
Epochs: 17868 | epoch avg. loss: 0.066 | test avg. loss: 4.829
Epochs: 17869 | epoch avg. loss: 0.059 | test avg. loss: 4.846


 36%|███▌      | 17873/50000 [26:42<36:20, 14.73it/s]

Epochs: 17870 | epoch avg. loss: 0.038 | test avg. loss: 4.660
Epochs: 17871 | epoch avg. loss: 0.037 | test avg. loss: 4.774
Epochs: 17872 | epoch avg. loss: 0.069 | test avg. loss: 4.623
Epochs: 17873 | epoch avg. loss: 0.035 | test avg. loss: 4.597


 36%|███▌      | 17877/50000 [26:42<36:26, 14.69it/s]

Epochs: 17874 | epoch avg. loss: 0.045 | test avg. loss: 4.919
Epochs: 17875 | epoch avg. loss: 0.150 | test avg. loss: 4.701
Epochs: 17876 | epoch avg. loss: 0.182 | test avg. loss: 4.748


 36%|███▌      | 17879/50000 [26:42<37:26, 14.30it/s]

Epochs: 17877 | epoch avg. loss: 0.102 | test avg. loss: 5.119
Epochs: 17878 | epoch avg. loss: 0.151 | test avg. loss: 4.699
Epochs: 17879 | epoch avg. loss: 0.209 | test avg. loss: 4.790


 36%|███▌      | 17883/50000 [26:43<42:16, 12.66it/s]

Epochs: 17880 | epoch avg. loss: 0.122 | test avg. loss: 4.439
Epochs: 17881 | epoch avg. loss: 0.106 | test avg. loss: 4.400
Epochs: 17882 | epoch avg. loss: 0.092 | test avg. loss: 4.975


 36%|███▌      | 17885/50000 [26:43<44:26, 12.04it/s]

Epochs: 17883 | epoch avg. loss: 0.218 | test avg. loss: 4.560
Epochs: 17884 | epoch avg. loss: 0.166 | test avg. loss: 4.815
Epochs: 17885 | epoch avg. loss: 0.142 | test avg. loss: 4.543


 36%|███▌      | 17889/50000 [26:43<45:50, 11.67it/s]

Epochs: 17886 | epoch avg. loss: 0.103 | test avg. loss: 4.533
Epochs: 17887 | epoch avg. loss: 0.084 | test avg. loss: 4.840
Epochs: 17888 | epoch avg. loss: 0.137 | test avg. loss: 4.531


 36%|███▌      | 17891/50000 [26:43<47:40, 11.23it/s]

Epochs: 17889 | epoch avg. loss: 0.064 | test avg. loss: 4.566
Epochs: 17890 | epoch avg. loss: 0.030 | test avg. loss: 4.565
Epochs: 17891 | epoch avg. loss: 0.039 | test avg. loss: 4.666


 36%|███▌      | 17895/50000 [26:44<46:07, 11.60it/s]

Epochs: 17892 | epoch avg. loss: 0.063 | test avg. loss: 4.883
Epochs: 17893 | epoch avg. loss: 0.093 | test avg. loss: 4.587
Epochs: 17894 | epoch avg. loss: 0.040 | test avg. loss: 4.698


 36%|███▌      | 17897/50000 [26:44<46:08, 11.59it/s]

Epochs: 17895 | epoch avg. loss: 0.050 | test avg. loss: 4.523
Epochs: 17896 | epoch avg. loss: 0.049 | test avg. loss: 4.629
Epochs: 17897 | epoch avg. loss: 0.039 | test avg. loss: 4.743


 36%|███▌      | 17899/50000 [26:44<46:03, 11.62it/s]

Epochs: 17898 | epoch avg. loss: 0.029 | test avg. loss: 4.682
Epochs: 17899 | epoch avg. loss: 0.030 | test avg. loss: 4.761


 36%|███▌      | 17903/50000 [26:46<2:12:21,  4.04it/s]

Epochs: 17900 | epoch avg. loss: 0.027 | test avg. loss: 4.674
Epochs: 17901 | epoch avg. loss: 0.017 | test avg. loss: 4.592
Epochs: 17902 | epoch avg. loss: 0.020 | test avg. loss: 4.786


 36%|███▌      | 17905/50000 [26:46<1:44:03,  5.14it/s]

Epochs: 17903 | epoch avg. loss: 0.057 | test avg. loss: 4.679
Epochs: 17904 | epoch avg. loss: 0.017 | test avg. loss: 4.688
Epochs: 17905 | epoch avg. loss: 0.012 | test avg. loss: 4.706


 36%|███▌      | 17909/50000 [26:46<1:14:59,  7.13it/s]

Epochs: 17906 | epoch avg. loss: 0.013 | test avg. loss: 4.641
Epochs: 17907 | epoch avg. loss: 0.030 | test avg. loss: 4.744
Epochs: 17908 | epoch avg. loss: 0.014 | test avg. loss: 4.729


 36%|███▌      | 17911/50000 [26:47<1:07:43,  7.90it/s]

Epochs: 17909 | epoch avg. loss: 0.016 | test avg. loss: 4.661
Epochs: 17910 | epoch avg. loss: 0.024 | test avg. loss: 4.817
Epochs: 17911 | epoch avg. loss: 0.043 | test avg. loss: 4.640


 36%|███▌      | 17915/50000 [26:47<53:07, 10.07it/s]  

Epochs: 17912 | epoch avg. loss: 0.012 | test avg. loss: 4.682
Epochs: 17913 | epoch avg. loss: 0.035 | test avg. loss: 4.740
Epochs: 17914 | epoch avg. loss: 0.021 | test avg. loss: 4.680


 36%|███▌      | 17917/50000 [26:47<50:01, 10.69it/s]

Epochs: 17915 | epoch avg. loss: 0.013 | test avg. loss: 4.752
Epochs: 17916 | epoch avg. loss: 0.012 | test avg. loss: 4.686
Epochs: 17917 | epoch avg. loss: 0.010 | test avg. loss: 4.751


 36%|███▌      | 17921/50000 [26:47<50:05, 10.67it/s]

Epochs: 17918 | epoch avg. loss: 0.021 | test avg. loss: 4.685
Epochs: 17919 | epoch avg. loss: 0.015 | test avg. loss: 4.658
Epochs: 17920 | epoch avg. loss: 0.016 | test avg. loss: 4.716


 36%|███▌      | 17923/50000 [26:48<48:54, 10.93it/s]

Epochs: 17921 | epoch avg. loss: 0.010 | test avg. loss: 4.705
Epochs: 17922 | epoch avg. loss: 0.010 | test avg. loss: 4.877
Epochs: 17923 | epoch avg. loss: 0.047 | test avg. loss: 4.701


 36%|███▌      | 17927/50000 [26:48<44:59, 11.88it/s]

Epochs: 17924 | epoch avg. loss: 0.026 | test avg. loss: 4.682
Epochs: 17925 | epoch avg. loss: 0.018 | test avg. loss: 4.806
Epochs: 17926 | epoch avg. loss: 0.030 | test avg. loss: 4.688


 36%|███▌      | 17929/50000 [26:48<43:40, 12.24it/s]

Epochs: 17927 | epoch avg. loss: 0.013 | test avg. loss: 4.775
Epochs: 17928 | epoch avg. loss: 0.016 | test avg. loss: 4.736
Epochs: 17929 | epoch avg. loss: 0.019 | test avg. loss: 4.699


 36%|███▌      | 17933/50000 [26:48<42:51, 12.47it/s]

Epochs: 17930 | epoch avg. loss: 0.033 | test avg. loss: 4.898
Epochs: 17931 | epoch avg. loss: 0.056 | test avg. loss: 4.696
Epochs: 17932 | epoch avg. loss: 0.025 | test avg. loss: 4.691


 36%|███▌      | 17935/50000 [26:49<46:26, 11.51it/s]

Epochs: 17933 | epoch avg. loss: 0.017 | test avg. loss: 4.811
Epochs: 17934 | epoch avg. loss: 0.021 | test avg. loss: 4.734
Epochs: 17935 | epoch avg. loss: 0.011 | test avg. loss: 4.891


 36%|███▌      | 17939/50000 [26:49<44:30, 12.00it/s]

Epochs: 17936 | epoch avg. loss: 0.050 | test avg. loss: 4.777
Epochs: 17937 | epoch avg. loss: 0.013 | test avg. loss: 4.726
Epochs: 17938 | epoch avg. loss: 0.013 | test avg. loss: 4.797


 36%|███▌      | 17941/50000 [26:49<43:25, 12.31it/s]

Epochs: 17939 | epoch avg. loss: 0.020 | test avg. loss: 4.658
Epochs: 17940 | epoch avg. loss: 0.037 | test avg. loss: 4.673
Epochs: 17941 | epoch avg. loss: 0.019 | test avg. loss: 4.831


 36%|███▌      | 17945/50000 [26:49<45:40, 11.70it/s]

Epochs: 17942 | epoch avg. loss: 0.028 | test avg. loss: 4.718
Epochs: 17943 | epoch avg. loss: 0.014 | test avg. loss: 4.773
Epochs: 17944 | epoch avg. loss: 0.017 | test avg. loss: 4.833


 36%|███▌      | 17947/50000 [26:50<45:42, 11.69it/s]

Epochs: 17945 | epoch avg. loss: 0.015 | test avg. loss: 4.729
Epochs: 17946 | epoch avg. loss: 0.025 | test avg. loss: 4.772
Epochs: 17947 | epoch avg. loss: 0.017 | test avg. loss: 4.717


 36%|███▌      | 17951/50000 [26:50<40:55, 13.05it/s]

Epochs: 17948 | epoch avg. loss: 0.011 | test avg. loss: 4.657
Epochs: 17949 | epoch avg. loss: 0.013 | test avg. loss: 4.749
Epochs: 17950 | epoch avg. loss: 0.018 | test avg. loss: 4.678


 36%|███▌      | 17953/50000 [26:50<41:35, 12.84it/s]

Epochs: 17951 | epoch avg. loss: 0.035 | test avg. loss: 4.749
Epochs: 17952 | epoch avg. loss: 0.018 | test avg. loss: 4.873
Epochs: 17953 | epoch avg. loss: 0.037 | test avg. loss: 4.743


 36%|███▌      | 17957/50000 [26:50<38:58, 13.70it/s]

Epochs: 17954 | epoch avg. loss: 0.038 | test avg. loss: 4.728
Epochs: 17955 | epoch avg. loss: 0.020 | test avg. loss: 4.835
Epochs: 17956 | epoch avg. loss: 0.040 | test avg. loss: 4.640


 36%|███▌      | 17959/50000 [26:51<43:13, 12.36it/s]

Epochs: 17957 | epoch avg. loss: 0.049 | test avg. loss: 4.700
Epochs: 17958 | epoch avg. loss: 0.028 | test avg. loss: 4.873
Epochs: 17959 | epoch avg. loss: 0.045 | test avg. loss: 4.731


 36%|███▌      | 17963/50000 [26:51<44:08, 12.09it/s]

Epochs: 17960 | epoch avg. loss: 0.036 | test avg. loss: 4.849
Epochs: 17961 | epoch avg. loss: 0.042 | test avg. loss: 4.677
Epochs: 17962 | epoch avg. loss: 0.046 | test avg. loss: 4.690


 36%|███▌      | 17965/50000 [26:51<44:46, 11.93it/s]

Epochs: 17963 | epoch avg. loss: 0.100 | test avg. loss: 4.975
Epochs: 17964 | epoch avg. loss: 0.062 | test avg. loss: 4.723
Epochs: 17965 | epoch avg. loss: 0.027 | test avg. loss: 4.694


 36%|███▌      | 17969/50000 [26:51<43:22, 12.31it/s]

Epochs: 17966 | epoch avg. loss: 0.021 | test avg. loss: 4.843
Epochs: 17967 | epoch avg. loss: 0.036 | test avg. loss: 4.757
Epochs: 17968 | epoch avg. loss: 0.030 | test avg. loss: 4.762


 36%|███▌      | 17971/50000 [26:52<44:16, 12.06it/s]

Epochs: 17969 | epoch avg. loss: 0.021 | test avg. loss: 4.731
Epochs: 17970 | epoch avg. loss: 0.030 | test avg. loss: 4.658
Epochs: 17971 | epoch avg. loss: 0.111 | test avg. loss: 4.840




Epochs: 17972 | epoch avg. loss: 0.120 | test avg. loss: 4.787
Epochs: 17973 | epoch avg. loss: 0.044 | test avg. loss: 4.636
Epochs: 17974 | epoch avg. loss: 0.057 | test avg. loss: 4.730


 36%|███▌      | 17979/50000 [26:52<37:29, 14.24it/s]

Epochs: 17975 | epoch avg. loss: 0.013 | test avg. loss: 4.668
Epochs: 17976 | epoch avg. loss: 0.036 | test avg. loss: 4.652
Epochs: 17977 | epoch avg. loss: 0.030 | test avg. loss: 4.796
Epochs: 17978 | epoch avg. loss: 0.046 | test avg. loss: 4.644


 36%|███▌      | 17981/50000 [26:52<37:25, 14.26it/s]

Epochs: 17979 | epoch avg. loss: 0.051 | test avg. loss: 4.671
Epochs: 17980 | epoch avg. loss: 0.030 | test avg. loss: 4.855
Epochs: 17981 | epoch avg. loss: 0.039 | test avg. loss: 4.755
Epochs: 17982 | epoch avg. loss: 0.014 | test avg. loss: 4.685


 36%|███▌      | 17985/50000 [26:53<39:17, 13.58it/s]

Epochs: 17983 | epoch avg. loss: 0.039 | test avg. loss: 4.747
Epochs: 17984 | epoch avg. loss: 0.010 | test avg. loss: 4.786
Epochs: 17985 | epoch avg. loss: 0.010 | test avg. loss: 4.769


 36%|███▌      | 17989/50000 [26:53<37:20, 14.29it/s]

Epochs: 17986 | epoch avg. loss: 0.009 | test avg. loss: 4.751
Epochs: 17987 | epoch avg. loss: 0.008 | test avg. loss: 4.702
Epochs: 17988 | epoch avg. loss: 0.008 | test avg. loss: 4.770
Epochs: 17989 | epoch avg. loss: 0.016 | test avg. loss: 4.698


 36%|███▌      | 17993/50000 [26:53<34:43, 15.36it/s]

Epochs: 17990 | epoch avg. loss: 0.037 | test avg. loss: 4.757
Epochs: 17991 | epoch avg. loss: 0.014 | test avg. loss: 4.847
Epochs: 17992 | epoch avg. loss: 0.015 | test avg. loss: 4.741
Epochs: 17993 | epoch avg. loss: 0.024 | test avg. loss: 4.735


 36%|███▌      | 17997/50000 [26:53<34:43, 15.36it/s]

Epochs: 17994 | epoch avg. loss: 0.012 | test avg. loss: 4.768
Epochs: 17995 | epoch avg. loss: 0.016 | test avg. loss: 4.692
Epochs: 17996 | epoch avg. loss: 0.014 | test avg. loss: 4.768
Epochs: 17997 | epoch avg. loss: 0.011 | test avg. loss: 4.852


 36%|███▌      | 17999/50000 [26:53<35:47, 14.90it/s]

Epochs: 17998 | epoch avg. loss: 0.012 | test avg. loss: 4.797
Epochs: 17999 | epoch avg. loss: 0.013 | test avg. loss: 4.841


 36%|███▌      | 18003/50000 [26:55<1:51:08,  4.80it/s]

Epochs: 18000 | epoch avg. loss: 0.013 | test avg. loss: 4.704
Epochs: 18001 | epoch avg. loss: 0.030 | test avg. loss: 4.679
Epochs: 18002 | epoch avg. loss: 0.061 | test avg. loss: 4.874
Epochs: 18003 | epoch avg. loss: 0.047 | test avg. loss: 4.758


 36%|███▌      | 18007/50000 [26:55<1:13:19,  7.27it/s]

Epochs: 18004 | epoch avg. loss: 0.020 | test avg. loss: 4.671
Epochs: 18005 | epoch avg. loss: 0.064 | test avg. loss: 4.748
Epochs: 18006 | epoch avg. loss: 0.028 | test avg. loss: 4.839


 36%|███▌      | 18009/50000 [26:56<1:04:07,  8.31it/s]

Epochs: 18007 | epoch avg. loss: 0.029 | test avg. loss: 4.733
Epochs: 18008 | epoch avg. loss: 0.099 | test avg. loss: 4.747
Epochs: 18009 | epoch avg. loss: 0.020 | test avg. loss: 4.886
Epochs: 18010 | epoch avg. loss: 0.035 | test avg. loss: 4.706


 36%|███▌      | 18015/50000 [26:56<43:46, 12.18it/s]

Epochs: 18011 | epoch avg. loss: 0.125 | test avg. loss: 4.710
Epochs: 18012 | epoch avg. loss: 0.045 | test avg. loss: 5.096
Epochs: 18013 | epoch avg. loss: 0.183 | test avg. loss: 4.779
Epochs: 18014 | epoch avg. loss: 0.174 | test avg. loss: 4.815


 36%|███▌      | 18019/50000 [26:56<38:41, 13.78it/s]

Epochs: 18015 | epoch avg. loss: 0.086 | test avg. loss: 5.203
Epochs: 18016 | epoch avg. loss: 0.170 | test avg. loss: 4.690
Epochs: 18017 | epoch avg. loss: 0.049 | test avg. loss: 4.726
Epochs: 18018 | epoch avg. loss: 0.062 | test avg. loss: 4.774


 36%|███▌      | 18023/50000 [26:56<36:53, 14.45it/s]

Epochs: 18019 | epoch avg. loss: 0.039 | test avg. loss: 4.829
Epochs: 18020 | epoch avg. loss: 0.017 | test avg. loss: 4.937
Epochs: 18021 | epoch avg. loss: 0.019 | test avg. loss: 4.888
Epochs: 18022 | epoch avg. loss: 0.018 | test avg. loss: 4.764


 36%|███▌      | 18025/50000 [26:57<41:31, 12.83it/s]

Epochs: 18023 | epoch avg. loss: 0.014 | test avg. loss: 4.618
Epochs: 18024 | epoch avg. loss: 0.024 | test avg. loss: 4.639
Epochs: 18025 | epoch avg. loss: 0.016 | test avg. loss: 4.823


 36%|███▌      | 18029/50000 [26:57<37:12, 14.32it/s]

Epochs: 18026 | epoch avg. loss: 0.022 | test avg. loss: 4.820
Epochs: 18027 | epoch avg. loss: 0.019 | test avg. loss: 4.831
Epochs: 18028 | epoch avg. loss: 0.018 | test avg. loss: 4.852
Epochs: 18029 | epoch avg. loss: 0.018 | test avg. loss: 4.763


 36%|███▌      | 18033/50000 [26:57<35:27, 15.02it/s]

Epochs: 18030 | epoch avg. loss: 0.017 | test avg. loss: 4.716
Epochs: 18031 | epoch avg. loss: 0.023 | test avg. loss: 4.819
Epochs: 18032 | epoch avg. loss: 0.023 | test avg. loss: 4.773
Epochs: 18033 | epoch avg. loss: 0.007 | test avg. loss: 4.826


 36%|███▌      | 18037/50000 [26:57<35:31, 15.00it/s]

Epochs: 18034 | epoch avg. loss: 0.024 | test avg. loss: 4.853
Epochs: 18035 | epoch avg. loss: 0.019 | test avg. loss: 4.736
Epochs: 18036 | epoch avg. loss: 0.059 | test avg. loss: 4.747


 36%|███▌      | 18039/50000 [26:58<36:48, 14.47it/s]

Epochs: 18037 | epoch avg. loss: 0.024 | test avg. loss: 4.836
Epochs: 18038 | epoch avg. loss: 0.030 | test avg. loss: 4.717
Epochs: 18039 | epoch avg. loss: 0.048 | test avg. loss: 4.748


 36%|███▌      | 18043/50000 [26:58<36:48, 14.47it/s]

Epochs: 18040 | epoch avg. loss: 0.022 | test avg. loss: 4.813
Epochs: 18041 | epoch avg. loss: 0.033 | test avg. loss: 4.693
Epochs: 18042 | epoch avg. loss: 0.027 | test avg. loss: 4.881


 36%|███▌      | 18045/50000 [26:58<38:31, 13.82it/s]

Epochs: 18043 | epoch avg. loss: 0.155 | test avg. loss: 4.946
Epochs: 18044 | epoch avg. loss: 0.084 | test avg. loss: 4.673
Epochs: 18045 | epoch avg. loss: 0.109 | test avg. loss: 4.755


 36%|███▌      | 18049/50000 [26:58<38:38, 13.78it/s]

Epochs: 18046 | epoch avg. loss: 0.031 | test avg. loss: 4.843
Epochs: 18047 | epoch avg. loss: 0.031 | test avg. loss: 4.710
Epochs: 18048 | epoch avg. loss: 0.047 | test avg. loss: 4.761
Epochs: 18049 | epoch avg. loss: 0.032 | test avg. loss: 4.736


 36%|███▌      | 18053/50000 [26:59<40:53, 13.02it/s]

Epochs: 18050 | epoch avg. loss: 0.017 | test avg. loss: 4.681
Epochs: 18051 | epoch avg. loss: 0.038 | test avg. loss: 4.749
Epochs: 18052 | epoch avg. loss: 0.023 | test avg. loss: 4.741


                                                     

Epochs: 18053 | epoch avg. loss: 0.018 | test avg. loss: 4.679
Epochs: 18054 | epoch avg. loss: 0.012 | test avg. loss: 4.723
Epochs: 18055 | epoch avg. loss: 0.010 | test avg. loss: 4.714




Epochs: 18056 | epoch avg. loss: 0.020 | test avg. loss: 4.727
Epochs: 18057 | epoch avg. loss: 0.023 | test avg. loss: 4.822
Epochs: 18058 | epoch avg. loss: 0.031 | test avg. loss: 4.769


 36%|███▌      | 18061/50000 [26:59<38:22, 13.87it/s]

Epochs: 18059 | epoch avg. loss: 0.014 | test avg. loss: 4.680
Epochs: 18060 | epoch avg. loss: 0.034 | test avg. loss: 4.726
Epochs: 18061 | epoch avg. loss: 0.016 | test avg. loss: 4.817


 36%|███▌      | 18065/50000 [26:59<40:46, 13.05it/s]

Epochs: 18062 | epoch avg. loss: 0.019 | test avg. loss: 4.735
Epochs: 18063 | epoch avg. loss: 0.075 | test avg. loss: 4.777
Epochs: 18064 | epoch avg. loss: 0.013 | test avg. loss: 4.810


 36%|███▌      | 18067/50000 [27:00<40:48, 13.04it/s]

Epochs: 18065 | epoch avg. loss: 0.019 | test avg. loss: 4.704
Epochs: 18066 | epoch avg. loss: 0.105 | test avg. loss: 4.675
Epochs: 18067 | epoch avg. loss: 0.017 | test avg. loss: 4.765




Epochs: 18068 | epoch avg. loss: 0.026 | test avg. loss: 4.708
Epochs: 18069 | epoch avg. loss: 0.021 | test avg. loss: 4.776
Epochs: 18070 | epoch avg. loss: 0.013 | test avg. loss: 4.837


 36%|███▌      | 18073/50000 [27:00<39:46, 13.38it/s]

Epochs: 18071 | epoch avg. loss: 0.015 | test avg. loss: 4.735
Epochs: 18072 | epoch avg. loss: 0.022 | test avg. loss: 4.732
Epochs: 18073 | epoch avg. loss: 0.010 | test avg. loss: 4.783


 36%|███▌      | 18077/50000 [27:00<41:23, 12.86it/s]

Epochs: 18074 | epoch avg. loss: 0.019 | test avg. loss: 4.788
Epochs: 18075 | epoch avg. loss: 0.012 | test avg. loss: 4.733
Epochs: 18076 | epoch avg. loss: 0.053 | test avg. loss: 4.778


 36%|███▌      | 18079/50000 [27:01<44:33, 11.94it/s]

Epochs: 18077 | epoch avg. loss: 0.010 | test avg. loss: 4.724
Epochs: 18078 | epoch avg. loss: 0.010 | test avg. loss: 4.677


 36%|███▌      | 18081/50000 [27:01<46:55, 11.34it/s]

Epochs: 18079 | epoch avg. loss: 0.010 | test avg. loss: 4.717
Epochs: 18080 | epoch avg. loss: 0.012 | test avg. loss: 4.716
Epochs: 18081 | epoch avg. loss: 0.007 | test avg. loss: 4.760


 36%|███▌      | 18085/50000 [27:01<40:33, 13.11it/s]

Epochs: 18082 | epoch avg. loss: 0.012 | test avg. loss: 4.791
Epochs: 18083 | epoch avg. loss: 0.010 | test avg. loss: 4.750
Epochs: 18084 | epoch avg. loss: 0.012 | test avg. loss: 4.772
Epochs: 18085 | epoch avg. loss: 0.007 | test avg. loss: 4.756


 36%|███▌      | 18089/50000 [27:01<39:00, 13.63it/s]

Epochs: 18086 | epoch avg. loss: 0.009 | test avg. loss: 4.710
Epochs: 18087 | epoch avg. loss: 0.011 | test avg. loss: 4.800
Epochs: 18088 | epoch avg. loss: 0.032 | test avg. loss: 4.811


 36%|███▌      | 18091/50000 [27:02<42:21, 12.56it/s]

Epochs: 18089 | epoch avg. loss: 0.021 | test avg. loss: 4.728
Epochs: 18090 | epoch avg. loss: 0.105 | test avg. loss: 4.823
Epochs: 18091 | epoch avg. loss: 0.052 | test avg. loss: 4.991


 36%|███▌      | 18095/50000 [27:02<40:47, 13.03it/s]

Epochs: 18092 | epoch avg. loss: 0.063 | test avg. loss: 4.722
Epochs: 18093 | epoch avg. loss: 0.076 | test avg. loss: 4.719
Epochs: 18094 | epoch avg. loss: 0.018 | test avg. loss: 4.763


 36%|███▌      | 18097/50000 [27:02<40:25, 13.15it/s]

Epochs: 18095 | epoch avg. loss: 0.015 | test avg. loss: 4.724
Epochs: 18096 | epoch avg. loss: 0.021 | test avg. loss: 4.754
Epochs: 18097 | epoch avg. loss: 0.009 | test avg. loss: 4.781
Epochs: 18098 | epoch avg. loss: 0.020 | test avg. loss: 4.803


 36%|███▌      | 18099/50000 [27:02<38:26, 13.83it/s]

Epochs: 18099 | epoch avg. loss: 0.019 | test avg. loss: 4.726


 36%|███▌      | 18103/50000 [27:04<2:05:02,  4.25it/s]

Epochs: 18100 | epoch avg. loss: 0.018 | test avg. loss: 4.702
Epochs: 18101 | epoch avg. loss: 0.022 | test avg. loss: 4.882
Epochs: 18102 | epoch avg. loss: 0.060 | test avg. loss: 4.859


 36%|███▌      | 18107/50000 [27:04<1:18:30,  6.77it/s]

Epochs: 18103 | epoch avg. loss: 0.022 | test avg. loss: 4.771
Epochs: 18104 | epoch avg. loss: 0.044 | test avg. loss: 4.794
Epochs: 18105 | epoch avg. loss: 0.014 | test avg. loss: 4.848
Epochs: 18106 | epoch avg. loss: 0.019 | test avg. loss: 4.698


 36%|███▌      | 18109/50000 [27:04<1:05:50,  8.07it/s]

Epochs: 18107 | epoch avg. loss: 0.016 | test avg. loss: 4.719
Epochs: 18108 | epoch avg. loss: 0.025 | test avg. loss: 4.762
Epochs: 18109 | epoch avg. loss: 0.024 | test avg. loss: 4.753


 36%|███▌      | 18113/50000 [27:05<52:25, 10.14it/s]

Epochs: 18110 | epoch avg. loss: 0.123 | test avg. loss: 4.785
Epochs: 18111 | epoch avg. loss: 0.029 | test avg. loss: 4.904
Epochs: 18112 | epoch avg. loss: 0.042 | test avg. loss: 4.837


 36%|███▌      | 18115/50000 [27:05<49:19, 10.77it/s]

Epochs: 18113 | epoch avg. loss: 0.020 | test avg. loss: 4.681
Epochs: 18114 | epoch avg. loss: 0.044 | test avg. loss: 4.740
Epochs: 18115 | epoch avg. loss: 0.032 | test avg. loss: 4.887


 36%|███▌      | 18119/50000 [27:05<47:57, 11.08it/s]

Epochs: 18116 | epoch avg. loss: 0.046 | test avg. loss: 4.774
Epochs: 18117 | epoch avg. loss: 0.023 | test avg. loss: 4.724
Epochs: 18118 | epoch avg. loss: 0.019 | test avg. loss: 4.723


                                                     

Epochs: 18119 | epoch avg. loss: 0.013 | test avg. loss: 4.697
Epochs: 18120 | epoch avg. loss: 0.010 | test avg. loss: 4.657
Epochs: 18121 | epoch avg. loss: 0.017 | test avg. loss: 4.718


 36%|███▋      | 18125/50000 [27:06<45:24, 11.70it/s]

Epochs: 18122 | epoch avg. loss: 0.010 | test avg. loss: 4.750
Epochs: 18123 | epoch avg. loss: 0.010 | test avg. loss: 4.768
Epochs: 18124 | epoch avg. loss: 0.012 | test avg. loss: 4.752


                                                     

Epochs: 18125 | epoch avg. loss: 0.011 | test avg. loss: 4.813
Epochs: 18126 | epoch avg. loss: 0.022 | test avg. loss: 4.768
Epochs: 18127 | epoch avg. loss: 0.009 | test avg. loss: 4.740


 36%|███▋      | 18131/50000 [27:06<38:00, 13.98it/s]

Epochs: 18128 | epoch avg. loss: 0.013 | test avg. loss: 4.779
Epochs: 18129 | epoch avg. loss: 0.009 | test avg. loss: 4.724
Epochs: 18130 | epoch avg. loss: 0.021 | test avg. loss: 4.728
Epochs: 18131 | epoch avg. loss: 0.093 | test avg. loss: 4.733


 36%|███▋      | 18135/50000 [27:06<38:12, 13.90it/s]

Epochs: 18132 | epoch avg. loss: 0.016 | test avg. loss: 4.787
Epochs: 18133 | epoch avg. loss: 0.019 | test avg. loss: 4.637
Epochs: 18134 | epoch avg. loss: 0.034 | test avg. loss: 4.680


 36%|███▋      | 18137/50000 [27:06<42:29, 12.50it/s]

Epochs: 18135 | epoch avg. loss: 0.052 | test avg. loss: 4.936
Epochs: 18136 | epoch avg. loss: 0.108 | test avg. loss: 4.858


 36%|███▋      | 18139/50000 [27:07<43:38, 12.17it/s]

Epochs: 18137 | epoch avg. loss: 0.018 | test avg. loss: 4.891
Epochs: 18138 | epoch avg. loss: 0.025 | test avg. loss: 5.165
Epochs: 18139 | epoch avg. loss: 0.196 | test avg. loss: 4.909


 36%|███▋      | 18143/50000 [27:07<40:42, 13.04it/s]

Epochs: 18140 | epoch avg. loss: 0.057 | test avg. loss: 4.809
Epochs: 18141 | epoch avg. loss: 0.261 | test avg. loss: 4.711
Epochs: 18142 | epoch avg. loss: 0.040 | test avg. loss: 5.052


 36%|███▋      | 18145/50000 [27:07<41:27, 12.81it/s]

Epochs: 18143 | epoch avg. loss: 0.189 | test avg. loss: 4.718
Epochs: 18144 | epoch avg. loss: 0.052 | test avg. loss: 4.721
Epochs: 18145 | epoch avg. loss: 0.202 | test avg. loss: 4.876


 36%|███▋      | 18149/50000 [27:07<45:24, 11.69it/s]

Epochs: 18146 | epoch avg. loss: 0.146 | test avg. loss: 5.724
Epochs: 18147 | epoch avg. loss: 0.487 | test avg. loss: 4.668
Epochs: 18148 | epoch avg. loss: 0.405 | test avg. loss: 5.108


 36%|███▋      | 18151/50000 [27:08<45:00, 11.79it/s]

Epochs: 18149 | epoch avg. loss: 0.666 | test avg. loss: 4.942
Epochs: 18150 | epoch avg. loss: 0.322 | test avg. loss: 5.513
Epochs: 18151 | epoch avg. loss: 0.316 | test avg. loss: 4.952




Epochs: 18152 | epoch avg. loss: 0.562 | test avg. loss: 4.533
Epochs: 18153 | epoch avg. loss: 0.247 | test avg. loss: 6.325
Epochs: 18154 | epoch avg. loss: 1.204 | test avg. loss: 4.573


 36%|███▋      | 18157/50000 [27:08<40:14, 13.19it/s]

Epochs: 18155 | epoch avg. loss: 0.676 | test avg. loss: 4.788
Epochs: 18156 | epoch avg. loss: 0.344 | test avg. loss: 6.133
Epochs: 18157 | epoch avg. loss: 0.713 | test avg. loss: 5.040


 36%|███▋      | 18161/50000 [27:08<38:13, 13.88it/s]

Epochs: 18158 | epoch avg. loss: 0.526 | test avg. loss: 4.929
Epochs: 18159 | epoch avg. loss: 0.157 | test avg. loss: 5.091
Epochs: 18160 | epoch avg. loss: 0.130 | test avg. loss: 4.896


 36%|███▋      | 18163/50000 [27:09<39:22, 13.48it/s]

Epochs: 18161 | epoch avg. loss: 0.165 | test avg. loss: 4.784
Epochs: 18162 | epoch avg. loss: 0.123 | test avg. loss: 4.698
Epochs: 18163 | epoch avg. loss: 0.081 | test avg. loss: 4.803


 36%|███▋      | 18167/50000 [27:09<37:34, 14.12it/s]

Epochs: 18164 | epoch avg. loss: 0.200 | test avg. loss: 5.121
Epochs: 18165 | epoch avg. loss: 0.216 | test avg. loss: 4.874
Epochs: 18166 | epoch avg. loss: 0.120 | test avg. loss: 4.666
Epochs: 18167 | epoch avg. loss: 0.378 | test avg. loss: 4.469


 36%|███▋      | 18171/50000 [27:09<36:27, 14.55it/s]

Epochs: 18168 | epoch avg. loss: 0.046 | test avg. loss: 4.617
Epochs: 18169 | epoch avg. loss: 0.058 | test avg. loss: 4.612
Epochs: 18170 | epoch avg. loss: 0.047 | test avg. loss: 4.638


 36%|███▋      | 18173/50000 [27:09<38:22, 13.82it/s]

Epochs: 18171 | epoch avg. loss: 0.067 | test avg. loss: 4.724
Epochs: 18172 | epoch avg. loss: 0.054 | test avg. loss: 4.657
Epochs: 18173 | epoch avg. loss: 0.110 | test avg. loss: 4.688


 36%|███▋      | 18177/50000 [27:10<43:24, 12.22it/s]

Epochs: 18174 | epoch avg. loss: 0.096 | test avg. loss: 4.862
Epochs: 18175 | epoch avg. loss: 0.068 | test avg. loss: 4.712
Epochs: 18176 | epoch avg. loss: 0.049 | test avg. loss: 4.685


 36%|███▋      | 18179/50000 [27:10<44:03, 12.04it/s]

Epochs: 18177 | epoch avg. loss: 0.189 | test avg. loss: 4.888
Epochs: 18178 | epoch avg. loss: 0.098 | test avg. loss: 4.981
Epochs: 18179 | epoch avg. loss: 0.062 | test avg. loss: 4.875


 36%|███▋      | 18183/50000 [27:10<40:23, 13.13it/s]

Epochs: 18180 | epoch avg. loss: 0.268 | test avg. loss: 4.757
Epochs: 18181 | epoch avg. loss: 0.090 | test avg. loss: 4.933
Epochs: 18182 | epoch avg. loss: 0.113 | test avg. loss: 4.611




Epochs: 18183 | epoch avg. loss: 0.197 | test avg. loss: 4.607
Epochs: 18184 | epoch avg. loss: 0.062 | test avg. loss: 4.959
Epochs: 18185 | epoch avg. loss: 0.108 | test avg. loss: 4.610


 36%|███▋      | 18189/50000 [27:10<40:03, 13.24it/s]

Epochs: 18186 | epoch avg. loss: 0.048 | test avg. loss: 4.622
Epochs: 18187 | epoch avg. loss: 0.027 | test avg. loss: 4.791
Epochs: 18188 | epoch avg. loss: 0.030 | test avg. loss: 4.710


 36%|███▋      | 18191/50000 [27:11<42:39, 12.43it/s]

Epochs: 18189 | epoch avg. loss: 0.050 | test avg. loss: 4.720
Epochs: 18190 | epoch avg. loss: 0.041 | test avg. loss: 4.856
Epochs: 18191 | epoch avg. loss: 0.056 | test avg. loss: 4.668


 36%|███▋      | 18195/50000 [27:11<40:26, 13.11it/s]

Epochs: 18192 | epoch avg. loss: 0.021 | test avg. loss: 4.646
Epochs: 18193 | epoch avg. loss: 0.051 | test avg. loss: 4.718
Epochs: 18194 | epoch avg. loss: 0.011 | test avg. loss: 4.764


 36%|███▋      | 18197/50000 [27:11<39:36, 13.38it/s]

Epochs: 18195 | epoch avg. loss: 0.010 | test avg. loss: 4.764
Epochs: 18196 | epoch avg. loss: 0.008 | test avg. loss: 4.715
Epochs: 18197 | epoch avg. loss: 0.008 | test avg. loss: 4.722
Epochs: 18198 | epoch avg. loss: 0.009 | test avg. loss: 4.688


 36%|███▋      | 18199/50000 [27:11<38:06, 13.91it/s]

Epochs: 18199 | epoch avg. loss: 0.020 | test avg. loss: 4.759


 36%|███▋      | 18203/50000 [27:13<1:54:55,  4.61it/s]

Epochs: 18200 | epoch avg. loss: 0.012 | test avg. loss: 4.917
Epochs: 18201 | epoch avg. loss: 0.035 | test avg. loss: 4.813
Epochs: 18202 | epoch avg. loss: 0.016 | test avg. loss: 4.684
Epochs: 18203 | epoch avg. loss: 0.059 | test avg. loss: 4.717


 36%|███▋      | 18207/50000 [27:13<1:12:59,  7.26it/s]

Epochs: 18204 | epoch avg. loss: 0.022 | test avg. loss: 4.762
Epochs: 18205 | epoch avg. loss: 0.017 | test avg. loss: 4.707
Epochs: 18206 | epoch avg. loss: 0.018 | test avg. loss: 4.767
Epochs: 18207 | epoch avg. loss: 0.018 | test avg. loss: 4.912


 36%|███▋      | 18211/50000 [27:13<55:49,  9.49it/s]  

Epochs: 18208 | epoch avg. loss: 0.068 | test avg. loss: 4.679
Epochs: 18209 | epoch avg. loss: 0.057 | test avg. loss: 4.673
Epochs: 18210 | epoch avg. loss: 0.036 | test avg. loss: 5.025


 36%|███▋      | 18215/50000 [27:14<45:15, 11.71it/s]

Epochs: 18211 | epoch avg. loss: 0.186 | test avg. loss: 4.784
Epochs: 18212 | epoch avg. loss: 0.164 | test avg. loss: 4.881
Epochs: 18213 | epoch avg. loss: 0.296 | test avg. loss: 5.096
Epochs: 18214 | epoch avg. loss: 0.463 | test avg. loss: 4.792


 36%|███▋      | 18217/50000 [27:14<44:27, 11.92it/s]

Epochs: 18215 | epoch avg. loss: 0.152 | test avg. loss: 4.652
Epochs: 18216 | epoch avg. loss: 0.352 | test avg. loss: 4.733
Epochs: 18217 | epoch avg. loss: 0.168 | test avg. loss: 5.515


 36%|███▋      | 18221/50000 [27:14<39:03, 13.56it/s]

Epochs: 18218 | epoch avg. loss: 0.455 | test avg. loss: 4.950
Epochs: 18219 | epoch avg. loss: 0.337 | test avg. loss: 5.022
Epochs: 18220 | epoch avg. loss: 0.268 | test avg. loss: 6.869
Epochs: 18221 | epoch avg. loss: 1.472 | test avg. loss: 4.816


 36%|███▋      | 18225/50000 [27:14<41:06, 12.88it/s]

Epochs: 18222 | epoch avg. loss: 0.889 | test avg. loss: 4.610
Epochs: 18223 | epoch avg. loss: 0.674 | test avg. loss: 6.606
Epochs: 18224 | epoch avg. loss: 1.427 | test avg. loss: 5.143


 36%|███▋      | 18227/50000 [27:15<43:02, 12.30it/s]

Epochs: 18225 | epoch avg. loss: 0.711 | test avg. loss: 5.060
Epochs: 18226 | epoch avg. loss: 0.186 | test avg. loss: 5.629
Epochs: 18227 | epoch avg. loss: 0.279 | test avg. loss: 5.118
Epochs: 18228 | epoch avg. loss: 0.844 | test avg. loss: 4.733


                                                     

Epochs: 18229 | epoch avg. loss: 0.428 | test avg. loss: 5.972
Epochs: 18230 | epoch avg. loss: 0.990 | test avg. loss: 5.004
Epochs: 18231 | epoch avg. loss: 1.203 | test avg. loss: 4.802


 36%|███▋      | 18235/50000 [27:15<36:40, 14.43it/s]

Epochs: 18232 | epoch avg. loss: 0.361 | test avg. loss: 6.940
Epochs: 18233 | epoch avg. loss: 1.080 | test avg. loss: 5.465
Epochs: 18234 | epoch avg. loss: 1.388 | test avg. loss: 5.116
Epochs: 18235 | epoch avg. loss: 1.273 | test avg. loss: 4.583


 36%|███▋      | 18239/50000 [27:15<36:21, 14.56it/s]

Epochs: 18236 | epoch avg. loss: 0.919 | test avg. loss: 5.521
Epochs: 18237 | epoch avg. loss: 1.492 | test avg. loss: 6.004
Epochs: 18238 | epoch avg. loss: 1.309 | test avg. loss: 4.936


 36%|███▋      | 18243/50000 [27:16<35:42, 14.82it/s]

Epochs: 18239 | epoch avg. loss: 1.290 | test avg. loss: 5.107
Epochs: 18240 | epoch avg. loss: 0.949 | test avg. loss: 7.132
Epochs: 18241 | epoch avg. loss: 1.671 | test avg. loss: 4.667
Epochs: 18242 | epoch avg. loss: 1.051 | test avg. loss: 4.300


 36%|███▋      | 18245/50000 [27:16<36:07, 14.65it/s]

Epochs: 18243 | epoch avg. loss: 0.553 | test avg. loss: 4.419
Epochs: 18244 | epoch avg. loss: 0.376 | test avg. loss: 4.810
Epochs: 18245 | epoch avg. loss: 0.437 | test avg. loss: 5.584


 36%|███▋      | 18249/50000 [27:16<36:25, 14.53it/s]

Epochs: 18246 | epoch avg. loss: 0.306 | test avg. loss: 5.005
Epochs: 18247 | epoch avg. loss: 0.227 | test avg. loss: 5.035
Epochs: 18248 | epoch avg. loss: 0.140 | test avg. loss: 4.851
Epochs: 18249 | epoch avg. loss: 0.226 | test avg. loss: 4.974


 37%|███▋      | 18253/50000 [27:16<36:32, 14.48it/s]

Epochs: 18250 | epoch avg. loss: 0.426 | test avg. loss: 5.718
Epochs: 18251 | epoch avg. loss: 0.571 | test avg. loss: 5.156
Epochs: 18252 | epoch avg. loss: 0.728 | test avg. loss: 5.029


 37%|███▋      | 18255/50000 [27:17<38:26, 13.77it/s]

Epochs: 18253 | epoch avg. loss: 0.271 | test avg. loss: 5.256
Epochs: 18254 | epoch avg. loss: 0.268 | test avg. loss: 5.074
Epochs: 18255 | epoch avg. loss: 0.488 | test avg. loss: 5.259
Epochs: 18256 | epoch avg. loss: 0.402 | test avg. loss: 4.542


 37%|███▋      | 18259/50000 [27:17<38:43, 13.66it/s]

Epochs: 18257 | epoch avg. loss: 0.226 | test avg. loss: 4.663
Epochs: 18258 | epoch avg. loss: 0.142 | test avg. loss: 4.840
Epochs: 18259 | epoch avg. loss: 0.157 | test avg. loss: 4.772


 37%|███▋      | 18263/50000 [27:17<43:42, 12.10it/s]

Epochs: 18260 | epoch avg. loss: 0.313 | test avg. loss: 4.966
Epochs: 18261 | epoch avg. loss: 0.140 | test avg. loss: 4.597
Epochs: 18262 | epoch avg. loss: 0.171 | test avg. loss: 4.713


 37%|███▋      | 18265/50000 [27:17<45:35, 11.60it/s]

Epochs: 18263 | epoch avg. loss: 0.301 | test avg. loss: 4.456
Epochs: 18264 | epoch avg. loss: 0.212 | test avg. loss: 4.614
Epochs: 18265 | epoch avg. loss: 0.323 | test avg. loss: 5.546


 37%|███▋      | 18269/50000 [27:18<41:44, 12.67it/s]

Epochs: 18266 | epoch avg. loss: 0.426 | test avg. loss: 4.697
Epochs: 18267 | epoch avg. loss: 0.385 | test avg. loss: 4.907
Epochs: 18268 | epoch avg. loss: 0.245 | test avg. loss: 4.647
Epochs: 18269 | epoch avg. loss: 0.268 | test avg. loss: 4.744


 37%|███▋      | 18273/50000 [27:18<37:57, 13.93it/s]

Epochs: 18270 | epoch avg. loss: 0.301 | test avg. loss: 5.245
Epochs: 18271 | epoch avg. loss: 0.211 | test avg. loss: 4.836
Epochs: 18272 | epoch avg. loss: 0.264 | test avg. loss: 4.952
Epochs: 18273 | epoch avg. loss: 0.164 | test avg. loss: 4.541


 37%|███▋      | 18277/50000 [27:18<38:22, 13.78it/s]

Epochs: 18274 | epoch avg. loss: 0.106 | test avg. loss: 4.473
Epochs: 18275 | epoch avg. loss: 0.122 | test avg. loss: 4.432
Epochs: 18276 | epoch avg. loss: 0.099 | test avg. loss: 4.562


 37%|███▋      | 18279/50000 [27:18<37:58, 13.92it/s]

Epochs: 18277 | epoch avg. loss: 0.128 | test avg. loss: 5.456
Epochs: 18278 | epoch avg. loss: 0.330 | test avg. loss: 4.988
Epochs: 18279 | epoch avg. loss: 0.283 | test avg. loss: 5.000


 37%|███▋      | 18283/50000 [27:19<38:27, 13.75it/s]

Epochs: 18280 | epoch avg. loss: 0.164 | test avg. loss: 4.705
Epochs: 18281 | epoch avg. loss: 0.067 | test avg. loss: 4.394
Epochs: 18282 | epoch avg. loss: 0.063 | test avg. loss: 4.598
Epochs: 18283 | epoch avg. loss: 0.070 | test avg. loss: 4.483


 37%|███▋      | 18287/50000 [27:19<36:21, 14.53it/s]

Epochs: 18284 | epoch avg. loss: 0.077 | test avg. loss: 4.824
Epochs: 18285 | epoch avg. loss: 0.064 | test avg. loss: 4.800
Epochs: 18286 | epoch avg. loss: 0.035 | test avg. loss: 4.754
Epochs: 18287 | epoch avg. loss: 0.037 | test avg. loss: 4.789


 37%|███▋      | 18291/50000 [27:19<36:58, 14.29it/s]

Epochs: 18288 | epoch avg. loss: 0.056 | test avg. loss: 4.519
Epochs: 18289 | epoch avg. loss: 0.058 | test avg. loss: 4.592
Epochs: 18290 | epoch avg. loss: 0.024 | test avg. loss: 4.552


 37%|███▋      | 18293/50000 [27:19<38:29, 13.73it/s]

Epochs: 18291 | epoch avg. loss: 0.023 | test avg. loss: 4.686
Epochs: 18292 | epoch avg. loss: 0.041 | test avg. loss: 4.690
Epochs: 18293 | epoch avg. loss: 0.024 | test avg. loss: 4.590


                                                     

Epochs: 18294 | epoch avg. loss: 0.024 | test avg. loss: 4.701
Epochs: 18295 | epoch avg. loss: 0.031 | test avg. loss: 4.566
Epochs: 18296 | epoch avg. loss: 0.019 | test avg. loss: 4.589


 37%|███▋      | 18299/50000 [27:20<37:30, 14.09it/s]

Epochs: 18297 | epoch avg. loss: 0.015 | test avg. loss: 4.728
Epochs: 18298 | epoch avg. loss: 0.026 | test avg. loss: 4.598
Epochs: 18299 | epoch avg. loss: 0.068 | test avg. loss: 4.688


 37%|███▋      | 18303/50000 [27:22<2:04:35,  4.24it/s]

Epochs: 18300 | epoch avg. loss: 0.051 | test avg. loss: 4.623
Epochs: 18301 | epoch avg. loss: 0.038 | test avg. loss: 4.680
Epochs: 18302 | epoch avg. loss: 0.031 | test avg. loss: 4.780
Epochs: 18303 | epoch avg. loss: 0.038 | test avg. loss: 4.607




Epochs: 18304 | epoch avg. loss: 0.013 | test avg. loss: 4.655
Epochs: 18305 | epoch avg. loss: 0.033 | test avg. loss: 4.560
Epochs: 18306 | epoch avg. loss: 0.040 | test avg. loss: 4.643


 37%|███▋      | 18309/50000 [27:22<1:06:37,  7.93it/s]

Epochs: 18307 | epoch avg. loss: 0.033 | test avg. loss: 4.719
Epochs: 18308 | epoch avg. loss: 0.034 | test avg. loss: 4.585
Epochs: 18309 | epoch avg. loss: 0.062 | test avg. loss: 4.789


 37%|███▋      | 18313/50000 [27:22<51:09, 10.32it/s]

Epochs: 18310 | epoch avg. loss: 0.099 | test avg. loss: 4.515
Epochs: 18311 | epoch avg. loss: 0.083 | test avg. loss: 4.575
Epochs: 18312 | epoch avg. loss: 0.045 | test avg. loss: 4.939


 37%|███▋      | 18315/50000 [27:23<49:02, 10.77it/s]

Epochs: 18313 | epoch avg. loss: 0.093 | test avg. loss: 4.678
Epochs: 18314 | epoch avg. loss: 0.074 | test avg. loss: 4.810
Epochs: 18315 | epoch avg. loss: 0.043 | test avg. loss: 4.578


 37%|███▋      | 18319/50000 [27:23<44:17, 11.92it/s]

Epochs: 18316 | epoch avg. loss: 0.045 | test avg. loss: 4.547
Epochs: 18317 | epoch avg. loss: 0.035 | test avg. loss: 4.718
Epochs: 18318 | epoch avg. loss: 0.038 | test avg. loss: 4.572


 37%|███▋      | 18321/50000 [27:23<43:36, 12.11it/s]

Epochs: 18319 | epoch avg. loss: 0.021 | test avg. loss: 4.652
Epochs: 18320 | epoch avg. loss: 0.031 | test avg. loss: 4.583
Epochs: 18321 | epoch avg. loss: 0.093 | test avg. loss: 4.667


 37%|███▋      | 18325/50000 [27:23<42:37, 12.38it/s]

Epochs: 18322 | epoch avg. loss: 0.067 | test avg. loss: 4.982
Epochs: 18323 | epoch avg. loss: 0.094 | test avg. loss: 4.685
Epochs: 18324 | epoch avg. loss: 0.030 | test avg. loss: 4.711


 37%|███▋      | 18327/50000 [27:23<45:22, 11.63it/s]

Epochs: 18325 | epoch avg. loss: 0.038 | test avg. loss: 4.626
Epochs: 18326 | epoch avg. loss: 0.031 | test avg. loss: 4.555


 37%|███▋      | 18329/50000 [27:24<48:30, 10.88it/s]

Epochs: 18327 | epoch avg. loss: 0.051 | test avg. loss: 4.810
Epochs: 18328 | epoch avg. loss: 0.088 | test avg. loss: 4.645
Epochs: 18329 | epoch avg. loss: 0.036 | test avg. loss: 4.648


 37%|███▋      | 18333/50000 [27:24<44:25, 11.88it/s]

Epochs: 18330 | epoch avg. loss: 0.025 | test avg. loss: 4.754
Epochs: 18331 | epoch avg. loss: 0.034 | test avg. loss: 4.570
Epochs: 18332 | epoch avg. loss: 0.013 | test avg. loss: 4.662


 37%|███▋      | 18335/50000 [27:24<41:11, 12.81it/s]

Epochs: 18333 | epoch avg. loss: 0.039 | test avg. loss: 4.574
Epochs: 18334 | epoch avg. loss: 0.029 | test avg. loss: 4.635
Epochs: 18335 | epoch avg. loss: 0.024 | test avg. loss: 4.769


 37%|███▋      | 18339/50000 [27:24<39:50, 13.25it/s]

Epochs: 18336 | epoch avg. loss: 0.015 | test avg. loss: 4.679
Epochs: 18337 | epoch avg. loss: 0.026 | test avg. loss: 4.683
Epochs: 18338 | epoch avg. loss: 0.009 | test avg. loss: 4.600


 37%|███▋      | 18341/50000 [27:25<42:11, 12.51it/s]

Epochs: 18339 | epoch avg. loss: 0.012 | test avg. loss: 4.623
Epochs: 18340 | epoch avg. loss: 0.011 | test avg. loss: 4.593
Epochs: 18341 | epoch avg. loss: 0.017 | test avg. loss: 4.594


 37%|███▋      | 18345/50000 [27:25<43:27, 12.14it/s]

Epochs: 18342 | epoch avg. loss: 0.020 | test avg. loss: 4.688
Epochs: 18343 | epoch avg. loss: 0.012 | test avg. loss: 4.705
Epochs: 18344 | epoch avg. loss: 0.011 | test avg. loss: 4.643


 37%|███▋      | 18347/50000 [27:25<42:59, 12.27it/s]

Epochs: 18345 | epoch avg. loss: 0.021 | test avg. loss: 4.628
Epochs: 18346 | epoch avg. loss: 0.009 | test avg. loss: 4.639
Epochs: 18347 | epoch avg. loss: 0.013 | test avg. loss: 4.558


 37%|███▋      | 18351/50000 [27:25<43:23, 12.16it/s]

Epochs: 18348 | epoch avg. loss: 0.015 | test avg. loss: 4.604
Epochs: 18349 | epoch avg. loss: 0.012 | test avg. loss: 4.622
Epochs: 18350 | epoch avg. loss: 0.012 | test avg. loss: 4.574


 37%|███▋      | 18353/50000 [27:26<41:58, 12.57it/s]

Epochs: 18351 | epoch avg. loss: 0.026 | test avg. loss: 4.676
Epochs: 18352 | epoch avg. loss: 0.013 | test avg. loss: 4.624
Epochs: 18353 | epoch avg. loss: 0.031 | test avg. loss: 4.636


 37%|███▋      | 18357/50000 [27:26<42:23, 12.44it/s]

Epochs: 18354 | epoch avg. loss: 0.017 | test avg. loss: 4.648
Epochs: 18355 | epoch avg. loss: 0.020 | test avg. loss: 4.537
Epochs: 18356 | epoch avg. loss: 0.016 | test avg. loss: 4.644


 37%|███▋      | 18359/50000 [27:26<42:10, 12.50it/s]

Epochs: 18357 | epoch avg. loss: 0.042 | test avg. loss: 4.661
Epochs: 18358 | epoch avg. loss: 0.019 | test avg. loss: 4.588
Epochs: 18359 | epoch avg. loss: 0.028 | test avg. loss: 4.666


 37%|███▋      | 18363/50000 [27:26<41:09, 12.81it/s]

Epochs: 18360 | epoch avg. loss: 0.013 | test avg. loss: 4.608
Epochs: 18361 | epoch avg. loss: 0.007 | test avg. loss: 4.622
Epochs: 18362 | epoch avg. loss: 0.012 | test avg. loss: 4.557


 37%|███▋      | 18365/50000 [27:27<39:56, 13.20it/s]

Epochs: 18363 | epoch avg. loss: 0.017 | test avg. loss: 4.624
Epochs: 18364 | epoch avg. loss: 0.007 | test avg. loss: 4.624
Epochs: 18365 | epoch avg. loss: 0.011 | test avg. loss: 4.636


 37%|███▋      | 18369/50000 [27:27<43:52, 12.02it/s]

Epochs: 18366 | epoch avg. loss: 0.010 | test avg. loss: 4.666
Epochs: 18367 | epoch avg. loss: 0.013 | test avg. loss: 4.553
Epochs: 18368 | epoch avg. loss: 0.018 | test avg. loss: 4.630


 37%|███▋      | 18371/50000 [27:27<44:04, 11.96it/s]

Epochs: 18369 | epoch avg. loss: 0.023 | test avg. loss: 4.590
Epochs: 18370 | epoch avg. loss: 0.010 | test avg. loss: 4.610
Epochs: 18371 | epoch avg. loss: 0.013 | test avg. loss: 4.738


 37%|███▋      | 18375/50000 [27:27<42:26, 12.42it/s]

Epochs: 18372 | epoch avg. loss: 0.027 | test avg. loss: 4.583
Epochs: 18373 | epoch avg. loss: 0.052 | test avg. loss: 4.607
Epochs: 18374 | epoch avg. loss: 0.027 | test avg. loss: 4.691


 37%|███▋      | 18377/50000 [27:28<44:59, 11.71it/s]

Epochs: 18375 | epoch avg. loss: 0.032 | test avg. loss: 4.552
Epochs: 18376 | epoch avg. loss: 0.048 | test avg. loss: 4.722
Epochs: 18377 | epoch avg. loss: 0.023 | test avg. loss: 4.674


 37%|███▋      | 18381/50000 [27:28<44:19, 11.89it/s]

Epochs: 18378 | epoch avg. loss: 0.017 | test avg. loss: 4.633
Epochs: 18379 | epoch avg. loss: 0.018 | test avg. loss: 4.720
Epochs: 18380 | epoch avg. loss: 0.038 | test avg. loss: 4.525


 37%|███▋      | 18383/50000 [27:28<44:15, 11.90it/s]

Epochs: 18381 | epoch avg. loss: 0.056 | test avg. loss: 4.649
Epochs: 18382 | epoch avg. loss: 0.027 | test avg. loss: 4.693
Epochs: 18383 | epoch avg. loss: 0.021 | test avg. loss: 4.634


 37%|███▋      | 18387/50000 [27:28<46:12, 11.40it/s]

Epochs: 18384 | epoch avg. loss: 0.038 | test avg. loss: 4.762
Epochs: 18385 | epoch avg. loss: 0.044 | test avg. loss: 4.564
Epochs: 18386 | epoch avg. loss: 0.016 | test avg. loss: 4.562


 37%|███▋      | 18389/50000 [27:29<47:12, 11.16it/s]

Epochs: 18387 | epoch avg. loss: 0.014 | test avg. loss: 4.647
Epochs: 18388 | epoch avg. loss: 0.020 | test avg. loss: 4.587
Epochs: 18389 | epoch avg. loss: 0.015 | test avg. loss: 4.697


 37%|███▋      | 18393/50000 [27:29<44:42, 11.78it/s]

Epochs: 18390 | epoch avg. loss: 0.024 | test avg. loss: 4.640
Epochs: 18391 | epoch avg. loss: 0.024 | test avg. loss: 4.554
Epochs: 18392 | epoch avg. loss: 0.070 | test avg. loss: 4.661


 37%|███▋      | 18395/50000 [27:29<43:54, 12.00it/s]

Epochs: 18393 | epoch avg. loss: 0.056 | test avg. loss: 4.594
Epochs: 18394 | epoch avg. loss: 0.016 | test avg. loss: 4.597
Epochs: 18395 | epoch avg. loss: 0.016 | test avg. loss: 4.618


 37%|███▋      | 18399/50000 [27:29<42:55, 12.27it/s]

Epochs: 18396 | epoch avg. loss: 0.018 | test avg. loss: 4.572
Epochs: 18397 | epoch avg. loss: 0.032 | test avg. loss: 4.707
Epochs: 18398 | epoch avg. loss: 0.031 | test avg. loss: 4.591


 37%|███▋      | 18399/50000 [27:29<42:55, 12.27it/s]

Epochs: 18399 | epoch avg. loss: 0.034 | test avg. loss: 4.599


 37%|███▋      | 18403/50000 [27:31<1:58:47,  4.43it/s]

Epochs: 18400 | epoch avg. loss: 0.018 | test avg. loss: 4.665
Epochs: 18401 | epoch avg. loss: 0.012 | test avg. loss: 4.608
Epochs: 18402 | epoch avg. loss: 0.011 | test avg. loss: 4.764


 37%|███▋      | 18405/50000 [27:31<1:36:53,  5.43it/s]

Epochs: 18403 | epoch avg. loss: 0.044 | test avg. loss: 4.602
Epochs: 18404 | epoch avg. loss: 0.054 | test avg. loss: 4.663
Epochs: 18405 | epoch avg. loss: 0.060 | test avg. loss: 4.728


 37%|███▋      | 18409/50000 [27:31<1:08:04,  7.73it/s]

Epochs: 18406 | epoch avg. loss: 0.063 | test avg. loss: 4.559
Epochs: 18407 | epoch avg. loss: 0.153 | test avg. loss: 4.640
Epochs: 18408 | epoch avg. loss: 0.061 | test avg. loss: 4.671


 37%|███▋      | 18413/50000 [27:32<50:26, 10.44it/s]

Epochs: 18409 | epoch avg. loss: 0.047 | test avg. loss: 4.603
Epochs: 18410 | epoch avg. loss: 0.169 | test avg. loss: 4.836
Epochs: 18411 | epoch avg. loss: 0.173 | test avg. loss: 4.618
Epochs: 18412 | epoch avg. loss: 0.076 | test avg. loss: 4.655


 37%|███▋      | 18415/50000 [27:32<46:29, 11.32it/s]

Epochs: 18413 | epoch avg. loss: 0.111 | test avg. loss: 4.997
Epochs: 18414 | epoch avg. loss: 0.135 | test avg. loss: 4.608
Epochs: 18415 | epoch avg. loss: 0.087 | test avg. loss: 4.528


 37%|███▋      | 18419/50000 [27:32<41:58, 12.54it/s]

Epochs: 18416 | epoch avg. loss: 0.056 | test avg. loss: 4.736
Epochs: 18417 | epoch avg. loss: 0.071 | test avg. loss: 4.555
Epochs: 18418 | epoch avg. loss: 0.077 | test avg. loss: 4.621


 37%|███▋      | 18421/50000 [27:32<41:11, 12.77it/s]

Epochs: 18419 | epoch avg. loss: 0.038 | test avg. loss: 4.866
Epochs: 18420 | epoch avg. loss: 0.080 | test avg. loss: 4.587
Epochs: 18421 | epoch avg. loss: 0.026 | test avg. loss: 4.600


 37%|███▋      | 18425/50000 [27:33<42:11, 12.47it/s]

Epochs: 18422 | epoch avg. loss: 0.020 | test avg. loss: 4.573
Epochs: 18423 | epoch avg. loss: 0.014 | test avg. loss: 4.582
Epochs: 18424 | epoch avg. loss: 0.013 | test avg. loss: 4.680
Epochs: 18425 | epoch avg. loss: 0.014 | test avg. loss: 4.656


 37%|███▋      | 18429/50000 [27:33<37:01, 14.21it/s]

Epochs: 18426 | epoch avg. loss: 0.033 | test avg. loss: 4.740
Epochs: 18427 | epoch avg. loss: 0.017 | test avg. loss: 4.683
Epochs: 18428 | epoch avg. loss: 0.022 | test avg. loss: 4.666
Epochs: 18429 | epoch avg. loss: 0.009 | test avg. loss: 4.574




Epochs: 18430 | epoch avg. loss: 0.019 | test avg. loss: 4.549
Epochs: 18431 | epoch avg. loss: 0.014 | test avg. loss: 4.641
Epochs: 18432 | epoch avg. loss: 0.024 | test avg. loss: 4.718


 37%|███▋      | 18435/50000 [27:33<35:37, 14.77it/s]

Epochs: 18433 | epoch avg. loss: 0.016 | test avg. loss: 4.709
Epochs: 18434 | epoch avg. loss: 0.009 | test avg. loss: 4.724
Epochs: 18435 | epoch avg. loss: 0.013 | test avg. loss: 4.657


                                                     

Epochs: 18436 | epoch avg. loss: 0.009 | test avg. loss: 4.622
Epochs: 18437 | epoch avg. loss: 0.009 | test avg. loss: 4.595
Epochs: 18438 | epoch avg. loss: 0.008 | test avg. loss: 4.657


 37%|███▋      | 18443/50000 [27:34<35:19, 14.89it/s]

Epochs: 18439 | epoch avg. loss: 0.015 | test avg. loss: 4.609
Epochs: 18440 | epoch avg. loss: 0.015 | test avg. loss: 4.620
Epochs: 18441 | epoch avg. loss: 0.014 | test avg. loss: 4.692
Epochs: 18442 | epoch avg. loss: 0.021 | test avg. loss: 4.681


 37%|███▋      | 18447/50000 [27:34<34:11, 15.38it/s]

Epochs: 18443 | epoch avg. loss: 0.015 | test avg. loss: 4.602
Epochs: 18444 | epoch avg. loss: 0.017 | test avg. loss: 4.569
Epochs: 18445 | epoch avg. loss: 0.033 | test avg. loss: 4.651
Epochs: 18446 | epoch avg. loss: 0.008 | test avg. loss: 4.610


 37%|███▋      | 18449/50000 [27:34<34:28, 15.26it/s]

Epochs: 18447 | epoch avg. loss: 0.015 | test avg. loss: 4.611
Epochs: 18448 | epoch avg. loss: 0.017 | test avg. loss: 4.680
Epochs: 18449 | epoch avg. loss: 0.036 | test avg. loss: 4.518


 37%|███▋      | 18453/50000 [27:35<38:14, 13.75it/s]

Epochs: 18450 | epoch avg. loss: 0.026 | test avg. loss: 4.586
Epochs: 18451 | epoch avg. loss: 0.024 | test avg. loss: 4.626
Epochs: 18452 | epoch avg. loss: 0.018 | test avg. loss: 4.607


 37%|███▋      | 18455/50000 [27:35<38:18, 13.72it/s]

Epochs: 18453 | epoch avg. loss: 0.023 | test avg. loss: 4.732
Epochs: 18454 | epoch avg. loss: 0.035 | test avg. loss: 4.664
Epochs: 18455 | epoch avg. loss: 0.012 | test avg. loss: 4.605


 37%|███▋      | 18459/50000 [27:35<36:52, 14.26it/s]

Epochs: 18456 | epoch avg. loss: 0.012 | test avg. loss: 4.656
Epochs: 18457 | epoch avg. loss: 0.019 | test avg. loss: 4.580
Epochs: 18458 | epoch avg. loss: 0.019 | test avg. loss: 4.587
Epochs: 18459 | epoch avg. loss: 0.014 | test avg. loss: 4.653


 37%|███▋      | 18463/50000 [27:35<36:15, 14.49it/s]

Epochs: 18460 | epoch avg. loss: 0.010 | test avg. loss: 4.655
Epochs: 18461 | epoch avg. loss: 0.009 | test avg. loss: 4.617
Epochs: 18462 | epoch avg. loss: 0.017 | test avg. loss: 4.660


 37%|███▋      | 18465/50000 [27:36<38:58, 13.48it/s]

Epochs: 18463 | epoch avg. loss: 0.023 | test avg. loss: 4.668
Epochs: 18464 | epoch avg. loss: 0.022 | test avg. loss: 4.510
Epochs: 18465 | epoch avg. loss: 0.053 | test avg. loss: 4.559


 37%|███▋      | 18469/50000 [27:36<39:37, 13.26it/s]

Epochs: 18466 | epoch avg. loss: 0.017 | test avg. loss: 4.729
Epochs: 18467 | epoch avg. loss: 0.032 | test avg. loss: 4.596
Epochs: 18468 | epoch avg. loss: 0.021 | test avg. loss: 4.627
Epochs: 18469 | epoch avg. loss: 0.009 | test avg. loss: 4.588


 37%|███▋      | 18473/50000 [27:36<36:44, 14.30it/s]

Epochs: 18470 | epoch avg. loss: 0.013 | test avg. loss: 4.527
Epochs: 18471 | epoch avg. loss: 0.014 | test avg. loss: 4.569
Epochs: 18472 | epoch avg. loss: 0.013 | test avg. loss: 4.556
Epochs: 18473 | epoch avg. loss: 0.007 | test avg. loss: 4.624


 37%|███▋      | 18477/50000 [27:36<35:03, 14.99it/s]

Epochs: 18474 | epoch avg. loss: 0.014 | test avg. loss: 4.693
Epochs: 18475 | epoch avg. loss: 0.014 | test avg. loss: 4.600
Epochs: 18476 | epoch avg. loss: 0.027 | test avg. loss: 4.644
Epochs: 18477 | epoch avg. loss: 0.023 | test avg. loss: 4.657


 37%|███▋      | 18481/50000 [27:37<36:40, 14.32it/s]

Epochs: 18478 | epoch avg. loss: 0.024 | test avg. loss: 4.524
Epochs: 18479 | epoch avg. loss: 0.046 | test avg. loss: 4.736
Epochs: 18480 | epoch avg. loss: 0.037 | test avg. loss: 4.661


 37%|███▋      | 18483/50000 [27:37<36:28, 14.40it/s]

Epochs: 18481 | epoch avg. loss: 0.015 | test avg. loss: 4.641
Epochs: 18482 | epoch avg. loss: 0.021 | test avg. loss: 4.701
Epochs: 18483 | epoch avg. loss: 0.024 | test avg. loss: 4.644


 37%|███▋      | 18487/50000 [27:37<35:54, 14.63it/s]

Epochs: 18484 | epoch avg. loss: 0.014 | test avg. loss: 4.530
Epochs: 18485 | epoch avg. loss: 0.046 | test avg. loss: 4.623
Epochs: 18486 | epoch avg. loss: 0.029 | test avg. loss: 4.717
Epochs: 18487 | epoch avg. loss: 0.027 | test avg. loss: 4.640


 37%|███▋      | 18491/50000 [27:37<34:22, 15.28it/s]

Epochs: 18488 | epoch avg. loss: 0.054 | test avg. loss: 4.685
Epochs: 18489 | epoch avg. loss: 0.033 | test avg. loss: 4.580
Epochs: 18490 | epoch avg. loss: 0.014 | test avg. loss: 4.560
Epochs: 18491 | epoch avg. loss: 0.017 | test avg. loss: 4.621


 37%|███▋      | 18495/50000 [27:38<38:02, 13.80it/s]

Epochs: 18492 | epoch avg. loss: 0.017 | test avg. loss: 4.592
Epochs: 18493 | epoch avg. loss: 0.011 | test avg. loss: 4.660
Epochs: 18494 | epoch avg. loss: 0.018 | test avg. loss: 4.636


 37%|███▋      | 18499/50000 [27:38<36:37, 14.33it/s]

Epochs: 18495 | epoch avg. loss: 0.012 | test avg. loss: 4.533
Epochs: 18496 | epoch avg. loss: 0.014 | test avg. loss: 4.545
Epochs: 18497 | epoch avg. loss: 0.018 | test avg. loss: 4.592
Epochs: 18498 | epoch avg. loss: 0.018 | test avg. loss: 4.541


 37%|███▋      | 18499/50000 [27:38<36:37, 14.33it/s]

Epochs: 18499 | epoch avg. loss: 0.072 | test avg. loss: 4.631


 37%|███▋      | 18503/50000 [27:39<1:52:18,  4.67it/s]

Epochs: 18500 | epoch avg. loss: 0.024 | test avg. loss: 4.741
Epochs: 18501 | epoch avg. loss: 0.034 | test avg. loss: 4.593
Epochs: 18502 | epoch avg. loss: 0.055 | test avg. loss: 4.698


 37%|███▋      | 18505/50000 [27:40<1:30:11,  5.82it/s]

Epochs: 18503 | epoch avg. loss: 0.020 | test avg. loss: 4.720
Epochs: 18504 | epoch avg. loss: 0.018 | test avg. loss: 4.627
Epochs: 18505 | epoch avg. loss: 0.017 | test avg. loss: 4.579


 37%|███▋      | 18509/50000 [27:40<1:06:42,  7.87it/s]

Epochs: 18506 | epoch avg. loss: 0.015 | test avg. loss: 4.668
Epochs: 18507 | epoch avg. loss: 0.034 | test avg. loss: 4.713
Epochs: 18508 | epoch avg. loss: 0.023 | test avg. loss: 4.656


 37%|███▋      | 18511/50000 [27:40<58:41,  8.94it/s]

Epochs: 18509 | epoch avg. loss: 0.014 | test avg. loss: 4.653
Epochs: 18510 | epoch avg. loss: 0.018 | test avg. loss: 4.748
Epochs: 18511 | epoch avg. loss: 0.029 | test avg. loss: 4.660


 37%|███▋      | 18515/50000 [27:40<49:54, 10.51it/s]

Epochs: 18512 | epoch avg. loss: 0.019 | test avg. loss: 4.473
Epochs: 18513 | epoch avg. loss: 0.061 | test avg. loss: 4.545
Epochs: 18514 | epoch avg. loss: 0.014 | test avg. loss: 4.606


 37%|███▋      | 18517/50000 [27:41<51:05, 10.27it/s]

Epochs: 18515 | epoch avg. loss: 0.016 | test avg. loss: 4.566
Epochs: 18516 | epoch avg. loss: 0.038 | test avg. loss: 4.609
Epochs: 18517 | epoch avg. loss: 0.017 | test avg. loss: 4.637


 37%|███▋      | 18521/50000 [27:41<44:51, 11.70it/s]

Epochs: 18518 | epoch avg. loss: 0.023 | test avg. loss: 4.501
Epochs: 18519 | epoch avg. loss: 0.012 | test avg. loss: 4.610
Epochs: 18520 | epoch avg. loss: 0.051 | test avg. loss: 4.700


 37%|███▋      | 18523/50000 [27:41<41:33, 12.62it/s]

Epochs: 18521 | epoch avg. loss: 0.038 | test avg. loss: 4.613
Epochs: 18522 | epoch avg. loss: 0.015 | test avg. loss: 4.674
Epochs: 18523 | epoch avg. loss: 0.014 | test avg. loss: 4.703


 37%|███▋      | 18527/50000 [27:41<42:29, 12.35it/s]

Epochs: 18524 | epoch avg. loss: 0.015 | test avg. loss: 4.624
Epochs: 18525 | epoch avg. loss: 0.009 | test avg. loss: 4.569
Epochs: 18526 | epoch avg. loss: 0.009 | test avg. loss: 4.599


 37%|███▋      | 18529/50000 [27:42<42:40, 12.29it/s]

Epochs: 18527 | epoch avg. loss: 0.009 | test avg. loss: 4.564
Epochs: 18528 | epoch avg. loss: 0.011 | test avg. loss: 4.653
Epochs: 18529 | epoch avg. loss: 0.024 | test avg. loss: 4.684


 37%|███▋      | 18533/50000 [27:42<45:16, 11.58it/s]

Epochs: 18530 | epoch avg. loss: 0.014 | test avg. loss: 4.611
Epochs: 18531 | epoch avg. loss: 0.027 | test avg. loss: 4.703
Epochs: 18532 | epoch avg. loss: 0.019 | test avg. loss: 4.673


 37%|███▋      | 18535/50000 [27:42<48:19, 10.85it/s]

Epochs: 18533 | epoch avg. loss: 0.019 | test avg. loss: 4.564
Epochs: 18534 | epoch avg. loss: 0.088 | test avg. loss: 4.601


 37%|███▋      | 18537/50000 [27:42<48:39, 10.78it/s]

Epochs: 18535 | epoch avg. loss: 0.029 | test avg. loss: 4.557
Epochs: 18536 | epoch avg. loss: 0.024 | test avg. loss: 4.577
Epochs: 18537 | epoch avg. loss: 0.015 | test avg. loss: 4.693


 37%|███▋      | 18541/50000 [27:43<45:21, 11.56it/s]

Epochs: 18538 | epoch avg. loss: 0.018 | test avg. loss: 4.745
Epochs: 18539 | epoch avg. loss: 0.013 | test avg. loss: 4.678
Epochs: 18540 | epoch avg. loss: 0.014 | test avg. loss: 4.651


 37%|███▋      | 18543/50000 [27:43<43:13, 12.13it/s]

Epochs: 18541 | epoch avg. loss: 0.024 | test avg. loss: 4.525
Epochs: 18542 | epoch avg. loss: 0.012 | test avg. loss: 4.514
Epochs: 18543 | epoch avg. loss: 0.011 | test avg. loss: 4.582


 37%|███▋      | 18547/50000 [27:43<38:56, 13.46it/s]

Epochs: 18544 | epoch avg. loss: 0.008 | test avg. loss: 4.638
Epochs: 18545 | epoch avg. loss: 0.010 | test avg. loss: 4.657
Epochs: 18546 | epoch avg. loss: 0.008 | test avg. loss: 4.613
Epochs: 18547 | epoch avg. loss: 0.008 | test avg. loss: 4.573


 37%|███▋      | 18551/50000 [27:43<35:50, 14.62it/s]

Epochs: 18548 | epoch avg. loss: 0.010 | test avg. loss: 4.522
Epochs: 18549 | epoch avg. loss: 0.031 | test avg. loss: 4.559
Epochs: 18550 | epoch avg. loss: 0.011 | test avg. loss: 4.632
Epochs: 18551 | epoch avg. loss: 0.010 | test avg. loss: 4.653


 37%|███▋      | 18555/50000 [27:44<38:54, 13.47it/s]

Epochs: 18552 | epoch avg. loss: 0.010 | test avg. loss: 4.582
Epochs: 18553 | epoch avg. loss: 0.009 | test avg. loss: 4.586
Epochs: 18554 | epoch avg. loss: 0.007 | test avg. loss: 4.590


 37%|███▋      | 18557/50000 [27:44<38:15, 13.70it/s]

Epochs: 18555 | epoch avg. loss: 0.008 | test avg. loss: 4.590
Epochs: 18556 | epoch avg. loss: 0.016 | test avg. loss: 4.616
Epochs: 18557 | epoch avg. loss: 0.016 | test avg. loss: 4.621


 37%|███▋      | 18561/50000 [27:44<39:15, 13.35it/s]

Epochs: 18558 | epoch avg. loss: 0.014 | test avg. loss: 4.699
Epochs: 18559 | epoch avg. loss: 0.026 | test avg. loss: 4.579
Epochs: 18560 | epoch avg. loss: 0.015 | test avg. loss: 4.525


 37%|███▋      | 18565/50000 [27:44<37:17, 14.05it/s]

Epochs: 18561 | epoch avg. loss: 0.022 | test avg. loss: 4.596
Epochs: 18562 | epoch avg. loss: 0.013 | test avg. loss: 4.671
Epochs: 18563 | epoch avg. loss: 0.014 | test avg. loss: 4.634
Epochs: 18564 | epoch avg. loss: 0.024 | test avg. loss: 4.624


 37%|███▋      | 18567/50000 [27:45<37:31, 13.96it/s]

Epochs: 18565 | epoch avg. loss: 0.010 | test avg. loss: 4.574
Epochs: 18566 | epoch avg. loss: 0.008 | test avg. loss: 4.509
Epochs: 18567 | epoch avg. loss: 0.011 | test avg. loss: 4.582


 37%|███▋      | 18571/50000 [27:45<37:21, 14.02it/s]

Epochs: 18568 | epoch avg. loss: 0.026 | test avg. loss: 4.671
Epochs: 18569 | epoch avg. loss: 0.044 | test avg. loss: 4.605
Epochs: 18570 | epoch avg. loss: 0.013 | test avg. loss: 4.542


 37%|███▋      | 18573/50000 [27:45<39:15, 13.34it/s]

Epochs: 18571 | epoch avg. loss: 0.039 | test avg. loss: 4.687
Epochs: 18572 | epoch avg. loss: 0.061 | test avg. loss: 4.651
Epochs: 18573 | epoch avg. loss: 0.026 | test avg. loss: 4.559


 37%|███▋      | 18577/50000 [27:45<41:19, 12.67it/s]

Epochs: 18574 | epoch avg. loss: 0.043 | test avg. loss: 4.741
Epochs: 18575 | epoch avg. loss: 0.070 | test avg. loss: 4.729
Epochs: 18576 | epoch avg. loss: 0.031 | test avg. loss: 4.567


 37%|███▋      | 18579/50000 [27:45<40:34, 12.91it/s]

Epochs: 18577 | epoch avg. loss: 0.068 | test avg. loss: 4.585
Epochs: 18578 | epoch avg. loss: 0.017 | test avg. loss: 4.688
Epochs: 18579 | epoch avg. loss: 0.027 | test avg. loss: 4.606


 37%|███▋      | 18583/50000 [27:46<42:05, 12.44it/s]

Epochs: 18580 | epoch avg. loss: 0.039 | test avg. loss: 4.558
Epochs: 18581 | epoch avg. loss: 0.058 | test avg. loss: 4.559
Epochs: 18582 | epoch avg. loss: 0.045 | test avg. loss: 4.736


 37%|███▋      | 18585/50000 [27:46<43:43, 11.98it/s]

Epochs: 18583 | epoch avg. loss: 0.091 | test avg. loss: 4.520
Epochs: 18584 | epoch avg. loss: 0.025 | test avg. loss: 4.528
Epochs: 18585 | epoch avg. loss: 0.020 | test avg. loss: 4.703


 37%|███▋      | 18589/50000 [27:46<41:19, 12.67it/s]

Epochs: 18586 | epoch avg. loss: 0.051 | test avg. loss: 4.740
Epochs: 18587 | epoch avg. loss: 0.027 | test avg. loss: 4.614
Epochs: 18588 | epoch avg. loss: 0.105 | test avg. loss: 4.646


 37%|███▋      | 18591/50000 [27:46<39:31, 13.25it/s]

Epochs: 18589 | epoch avg. loss: 0.073 | test avg. loss: 4.815
Epochs: 18590 | epoch avg. loss: 0.073 | test avg. loss: 4.581
Epochs: 18591 | epoch avg. loss: 0.073 | test avg. loss: 4.563


 37%|███▋      | 18595/50000 [27:47<43:54, 11.92it/s]

Epochs: 18592 | epoch avg. loss: 0.020 | test avg. loss: 4.693
Epochs: 18593 | epoch avg. loss: 0.051 | test avg. loss: 4.588
Epochs: 18594 | epoch avg. loss: 0.014 | test avg. loss: 4.569


 37%|███▋      | 18597/50000 [27:47<47:31, 11.01it/s]

Epochs: 18595 | epoch avg. loss: 0.015 | test avg. loss: 4.595
Epochs: 18596 | epoch avg. loss: 0.053 | test avg. loss: 4.678
Epochs: 18597 | epoch avg. loss: 0.017 | test avg. loss: 4.673


 37%|███▋      | 18599/50000 [27:47<49:15, 10.62it/s]

Epochs: 18598 | epoch avg. loss: 0.017 | test avg. loss: 4.531
Epochs: 18599 | epoch avg. loss: 0.048 | test avg. loss: 4.546


 37%|███▋      | 18603/50000 [27:49<2:09:13,  4.05it/s]

Epochs: 18600 | epoch avg. loss: 0.019 | test avg. loss: 4.618
Epochs: 18601 | epoch avg. loss: 0.025 | test avg. loss: 4.559
Epochs: 18602 | epoch avg. loss: 0.041 | test avg. loss: 4.630


 37%|███▋      | 18605/50000 [27:49<1:40:54,  5.19it/s]

Epochs: 18603 | epoch avg. loss: 0.038 | test avg. loss: 4.814
Epochs: 18604 | epoch avg. loss: 0.065 | test avg. loss: 4.782
Epochs: 18605 | epoch avg. loss: 0.029 | test avg. loss: 4.610


 37%|███▋      | 18609/50000 [27:49<1:09:56,  7.48it/s]

Epochs: 18606 | epoch avg. loss: 0.080 | test avg. loss: 4.525
Epochs: 18607 | epoch avg. loss: 0.033 | test avg. loss: 4.664
Epochs: 18608 | epoch avg. loss: 0.082 | test avg. loss: 4.518


 37%|███▋      | 18611/50000 [27:50<1:00:37,  8.63it/s]

Epochs: 18609 | epoch avg. loss: 0.021 | test avg. loss: 4.572
Epochs: 18610 | epoch avg. loss: 0.034 | test avg. loss: 4.710
Epochs: 18611 | epoch avg. loss: 0.030 | test avg. loss: 4.900
Epochs: 18612 | epoch avg. loss: 0.062 | test avg. loss: 4.608


 37%|███▋      | 18617/50000 [27:50<43:41, 11.97it/s]

Epochs: 18613 | epoch avg. loss: 0.153 | test avg. loss: 4.520
Epochs: 18614 | epoch avg. loss: 0.112 | test avg. loss: 4.700
Epochs: 18615 | epoch avg. loss: 0.145 | test avg. loss: 4.577
Epochs: 18616 | epoch avg. loss: 0.087 | test avg. loss: 5.012


 37%|███▋      | 18621/50000 [27:50<38:43, 13.51it/s]

Epochs: 18617 | epoch avg. loss: 0.759 | test avg. loss: 4.948
Epochs: 18618 | epoch avg. loss: 0.223 | test avg. loss: 5.978
Epochs: 18619 | epoch avg. loss: 0.469 | test avg. loss: 5.068
Epochs: 18620 | epoch avg. loss: 0.750 | test avg. loss: 5.137


 37%|███▋      | 18623/50000 [27:50<39:39, 13.18it/s]

Epochs: 18621 | epoch avg. loss: 0.682 | test avg. loss: 5.167
Epochs: 18622 | epoch avg. loss: 0.589 | test avg. loss: 4.664
Epochs: 18623 | epoch avg. loss: 0.203 | test avg. loss: 4.421


 37%|███▋      | 18627/50000 [27:51<39:53, 13.11it/s]

Epochs: 18624 | epoch avg. loss: 0.691 | test avg. loss: 4.613
Epochs: 18625 | epoch avg. loss: 0.496 | test avg. loss: 4.929
Epochs: 18626 | epoch avg. loss: 0.271 | test avg. loss: 4.610
Epochs: 18627 | epoch avg. loss: 0.252 | test avg. loss: 6.355


 37%|███▋      | 18631/50000 [27:51<36:21, 14.38it/s]

Epochs: 18628 | epoch avg. loss: 1.128 | test avg. loss: 5.089
Epochs: 18629 | epoch avg. loss: 0.334 | test avg. loss: 4.836
Epochs: 18630 | epoch avg. loss: 1.834 | test avg. loss: 4.166
Epochs: 18631 | epoch avg. loss: 0.395 | test avg. loss: 6.929


 37%|███▋      | 18635/50000 [27:51<36:03, 14.50it/s]

Epochs: 18632 | epoch avg. loss: 1.434 | test avg. loss: 5.402
Epochs: 18633 | epoch avg. loss: 1.127 | test avg. loss: 5.092
Epochs: 18634 | epoch avg. loss: 0.526 | test avg. loss: 6.462
Epochs: 18635 | epoch avg. loss: 1.123 | test avg. loss: 4.982


 37%|███▋      | 18639/50000 [27:52<36:39, 14.26it/s]

Epochs: 18636 | epoch avg. loss: 0.802 | test avg. loss: 4.779
Epochs: 18637 | epoch avg. loss: 0.244 | test avg. loss: 5.200
Epochs: 18638 | epoch avg. loss: 0.233 | test avg. loss: 4.785




Epochs: 18639 | epoch avg. loss: 0.123 | test avg. loss: 5.109
Epochs: 18640 | epoch avg. loss: 0.150 | test avg. loss: 4.761
Epochs: 18641 | epoch avg. loss: 0.230 | test avg. loss: 4.853


 37%|███▋      | 18645/50000 [27:52<35:03, 14.91it/s]

Epochs: 18642 | epoch avg. loss: 0.163 | test avg. loss: 5.151
Epochs: 18643 | epoch avg. loss: 0.164 | test avg. loss: 4.937
Epochs: 18644 | epoch avg. loss: 0.522 | test avg. loss: 4.763
Epochs: 18645 | epoch avg. loss: 0.213 | test avg. loss: 4.644


 37%|███▋      | 18649/50000 [27:52<33:38, 15.53it/s]

Epochs: 18646 | epoch avg. loss: 0.138 | test avg. loss: 4.502
Epochs: 18647 | epoch avg. loss: 0.108 | test avg. loss: 4.613
Epochs: 18648 | epoch avg. loss: 0.064 | test avg. loss: 4.698
Epochs: 18649 | epoch avg. loss: 0.151 | test avg. loss: 4.986


 37%|███▋      | 18653/50000 [27:52<34:20, 15.22it/s]

Epochs: 18650 | epoch avg. loss: 0.172 | test avg. loss: 4.898
Epochs: 18651 | epoch avg. loss: 0.148 | test avg. loss: 4.775
Epochs: 18652 | epoch avg. loss: 0.084 | test avg. loss: 4.764


 37%|███▋      | 18655/50000 [27:53<35:29, 14.72it/s]

Epochs: 18653 | epoch avg. loss: 0.093 | test avg. loss: 4.422
Epochs: 18654 | epoch avg. loss: 0.046 | test avg. loss: 4.351
Epochs: 18655 | epoch avg. loss: 0.090 | test avg. loss: 4.603


                                                     

Epochs: 18656 | epoch avg. loss: 0.085 | test avg. loss: 4.534
Epochs: 18657 | epoch avg. loss: 0.025 | test avg. loss: 4.581
Epochs: 18658 | epoch avg. loss: 0.039 | test avg. loss: 4.834


 37%|███▋      | 18663/50000 [27:53<35:25, 14.74it/s]

Epochs: 18659 | epoch avg. loss: 0.068 | test avg. loss: 4.635
Epochs: 18660 | epoch avg. loss: 0.026 | test avg. loss: 4.649
Epochs: 18661 | epoch avg. loss: 0.058 | test avg. loss: 4.599
Epochs: 18662 | epoch avg. loss: 0.035 | test avg. loss: 4.492


 37%|███▋      | 18665/50000 [27:53<35:09, 14.86it/s]

Epochs: 18663 | epoch avg. loss: 0.036 | test avg. loss: 4.573
Epochs: 18664 | epoch avg. loss: 0.016 | test avg. loss: 4.559
Epochs: 18665 | epoch avg. loss: 0.015 | test avg. loss: 4.535


 37%|███▋      | 18669/50000 [27:54<37:39, 13.87it/s]

Epochs: 18666 | epoch avg. loss: 0.016 | test avg. loss: 4.574
Epochs: 18667 | epoch avg. loss: 0.012 | test avg. loss: 4.539
Epochs: 18668 | epoch avg. loss: 0.019 | test avg. loss: 4.552


 37%|███▋      | 18671/50000 [27:54<38:06, 13.70it/s]

Epochs: 18669 | epoch avg. loss: 0.024 | test avg. loss: 4.681
Epochs: 18670 | epoch avg. loss: 0.051 | test avg. loss: 4.489
Epochs: 18671 | epoch avg. loss: 0.037 | test avg. loss: 4.445




Epochs: 18672 | epoch avg. loss: 0.034 | test avg. loss: 4.531
Epochs: 18673 | epoch avg. loss: 0.024 | test avg. loss: 4.554
Epochs: 18674 | epoch avg. loss: 0.016 | test avg. loss: 4.510


                                                     

Epochs: 18675 | epoch avg. loss: 0.039 | test avg. loss: 4.539
Epochs: 18676 | epoch avg. loss: 0.013 | test avg. loss: 4.533
Epochs: 18677 | epoch avg. loss: 0.012 | test avg. loss: 4.449


 37%|███▋      | 18681/50000 [27:54<38:18, 13.63it/s]

Epochs: 18678 | epoch avg. loss: 0.031 | test avg. loss: 4.485
Epochs: 18679 | epoch avg. loss: 0.017 | test avg. loss: 4.640
Epochs: 18680 | epoch avg. loss: 0.037 | test avg. loss: 4.490


 37%|███▋      | 18685/50000 [27:55<37:47, 13.81it/s]

Epochs: 18681 | epoch avg. loss: 0.081 | test avg. loss: 4.503
Epochs: 18682 | epoch avg. loss: 0.025 | test avg. loss: 4.629
Epochs: 18683 | epoch avg. loss: 0.037 | test avg. loss: 4.542
Epochs: 18684 | epoch avg. loss: 0.046 | test avg. loss: 4.588


 37%|███▋      | 18689/50000 [27:55<35:58, 14.50it/s]

Epochs: 18685 | epoch avg. loss: 0.022 | test avg. loss: 4.681
Epochs: 18686 | epoch avg. loss: 0.033 | test avg. loss: 4.492
Epochs: 18687 | epoch avg. loss: 0.073 | test avg. loss: 4.526
Epochs: 18688 | epoch avg. loss: 0.017 | test avg. loss: 4.633


 37%|███▋      | 18693/50000 [27:55<34:21, 15.19it/s]

Epochs: 18689 | epoch avg. loss: 0.021 | test avg. loss: 4.519
Epochs: 18690 | epoch avg. loss: 0.056 | test avg. loss: 4.566
Epochs: 18691 | epoch avg. loss: 0.024 | test avg. loss: 4.420
Epochs: 18692 | epoch avg. loss: 0.023 | test avg. loss: 4.409


 37%|███▋      | 18695/50000 [27:55<35:01, 14.90it/s]

Epochs: 18693 | epoch avg. loss: 0.020 | test avg. loss: 4.575
Epochs: 18694 | epoch avg. loss: 0.053 | test avg. loss: 4.513
Epochs: 18695 | epoch avg. loss: 0.009 | test avg. loss: 4.544


 37%|███▋      | 18699/50000 [27:56<37:12, 14.02it/s]

Epochs: 18696 | epoch avg. loss: 0.010 | test avg. loss: 4.503
Epochs: 18697 | epoch avg. loss: 0.011 | test avg. loss: 4.479
Epochs: 18698 | epoch avg. loss: 0.009 | test avg. loss: 4.503


 37%|███▋      | 18699/50000 [27:56<37:12, 14.02it/s]

Epochs: 18699 | epoch avg. loss: 0.009 | test avg. loss: 4.500


 37%|███▋      | 18703/50000 [27:57<1:59:01,  4.38it/s]

Epochs: 18700 | epoch avg. loss: 0.009 | test avg. loss: 4.536
Epochs: 18701 | epoch avg. loss: 0.008 | test avg. loss: 4.543
Epochs: 18702 | epoch avg. loss: 0.011 | test avg. loss: 4.486


 37%|███▋      | 18705/50000 [27:58<1:35:06,  5.48it/s]

Epochs: 18703 | epoch avg. loss: 0.027 | test avg. loss: 4.586
Epochs: 18704 | epoch avg. loss: 0.060 | test avg. loss: 4.596
Epochs: 18705 | epoch avg. loss: 0.044 | test avg. loss: 4.482


 37%|███▋      | 18709/50000 [27:58<1:04:55,  8.03it/s]

Epochs: 18706 | epoch avg. loss: 0.031 | test avg. loss: 4.543
Epochs: 18707 | epoch avg. loss: 0.013 | test avg. loss: 4.647
Epochs: 18708 | epoch avg. loss: 0.023 | test avg. loss: 4.548


 37%|███▋      | 18711/50000 [27:58<57:24,  9.08it/s]

Epochs: 18709 | epoch avg. loss: 0.016 | test avg. loss: 4.538
Epochs: 18710 | epoch avg. loss: 0.010 | test avg. loss: 4.502
Epochs: 18711 | epoch avg. loss: 0.012 | test avg. loss: 4.455


 37%|███▋      | 18715/50000 [27:58<48:48, 10.68it/s]

Epochs: 18712 | epoch avg. loss: 0.013 | test avg. loss: 4.668
Epochs: 18713 | epoch avg. loss: 0.060 | test avg. loss: 4.476
Epochs: 18714 | epoch avg. loss: 0.021 | test avg. loss: 4.466


 37%|███▋      | 18717/50000 [27:59<50:19, 10.36it/s]

Epochs: 18715 | epoch avg. loss: 0.029 | test avg. loss: 4.705
Epochs: 18716 | epoch avg. loss: 0.065 | test avg. loss: 4.505
Epochs: 18717 | epoch avg. loss: 0.126 | test avg. loss: 4.611


 37%|███▋      | 18721/50000 [27:59<46:59, 11.09it/s]

Epochs: 18718 | epoch avg. loss: 0.035 | test avg. loss: 4.695
Epochs: 18719 | epoch avg. loss: 0.034 | test avg. loss: 4.552
Epochs: 18720 | epoch avg. loss: 0.047 | test avg. loss: 4.494


 37%|███▋      | 18723/50000 [27:59<45:01, 11.58it/s]

Epochs: 18721 | epoch avg. loss: 0.019 | test avg. loss: 4.621
Epochs: 18722 | epoch avg. loss: 0.081 | test avg. loss: 4.471
Epochs: 18723 | epoch avg. loss: 0.017 | test avg. loss: 4.455


 37%|███▋      | 18727/50000 [27:59<39:06, 13.32it/s]

Epochs: 18724 | epoch avg. loss: 0.028 | test avg. loss: 4.559
Epochs: 18725 | epoch avg. loss: 0.018 | test avg. loss: 4.589
Epochs: 18726 | epoch avg. loss: 0.011 | test avg. loss: 4.587
Epochs: 18727 | epoch avg. loss: 0.009 | test avg. loss: 4.501


 37%|███▋      | 18731/50000 [28:00<38:24, 13.57it/s]

Epochs: 18728 | epoch avg. loss: 0.014 | test avg. loss: 4.541
Epochs: 18729 | epoch avg. loss: 0.023 | test avg. loss: 4.490
Epochs: 18730 | epoch avg. loss: 0.014 | test avg. loss: 4.448
Epochs: 18731 | epoch avg. loss: 0.019 | test avg. loss: 4.538


 37%|███▋      | 18735/50000 [28:00<35:04, 14.86it/s]

Epochs: 18732 | epoch avg. loss: 0.009 | test avg. loss: 4.543
Epochs: 18733 | epoch avg. loss: 0.012 | test avg. loss: 4.587
Epochs: 18734 | epoch avg. loss: 0.011 | test avg. loss: 4.533
Epochs: 18735 | epoch avg. loss: 0.009 | test avg. loss: 4.489


                                                     

Epochs: 18736 | epoch avg. loss: 0.009 | test avg. loss: 4.483
Epochs: 18737 | epoch avg. loss: 0.014 | test avg. loss: 4.478
Epochs: 18738 | epoch avg. loss: 0.027 | test avg. loss: 4.561


 37%|███▋      | 18741/50000 [28:00<36:45, 14.18it/s]

Epochs: 18739 | epoch avg. loss: 0.017 | test avg. loss: 4.589
Epochs: 18740 | epoch avg. loss: 0.015 | test avg. loss: 4.535
Epochs: 18741 | epoch avg. loss: 0.058 | test avg. loss: 4.565


 37%|███▋      | 18745/50000 [28:01<38:33, 13.51it/s]

Epochs: 18742 | epoch avg. loss: 0.016 | test avg. loss: 4.618
Epochs: 18743 | epoch avg. loss: 0.035 | test avg. loss: 4.480
Epochs: 18744 | epoch avg. loss: 0.022 | test avg. loss: 4.440


 37%|███▋      | 18747/50000 [28:01<36:51, 14.13it/s]

Epochs: 18745 | epoch avg. loss: 0.019 | test avg. loss: 4.498
Epochs: 18746 | epoch avg. loss: 0.009 | test avg. loss: 4.459
Epochs: 18747 | epoch avg. loss: 0.012 | test avg. loss: 4.486


 38%|███▊      | 18751/50000 [28:01<39:51, 13.06it/s]

Epochs: 18748 | epoch avg. loss: 0.013 | test avg. loss: 4.509
Epochs: 18749 | epoch avg. loss: 0.012 | test avg. loss: 4.454
Epochs: 18750 | epoch avg. loss: 0.010 | test avg. loss: 4.468


 38%|███▊      | 18753/50000 [28:01<41:10, 12.65it/s]

Epochs: 18751 | epoch avg. loss: 0.008 | test avg. loss: 4.535
Epochs: 18752 | epoch avg. loss: 0.017 | test avg. loss: 4.536
Epochs: 18753 | epoch avg. loss: 0.012 | test avg. loss: 4.460


 38%|███▊      | 18757/50000 [28:02<45:11, 11.52it/s]

Epochs: 18754 | epoch avg. loss: 0.020 | test avg. loss: 4.471
Epochs: 18755 | epoch avg. loss: 0.010 | test avg. loss: 4.553
Epochs: 18756 | epoch avg. loss: 0.014 | test avg. loss: 4.515


 38%|███▊      | 18759/50000 [28:02<43:38, 11.93it/s]

Epochs: 18757 | epoch avg. loss: 0.014 | test avg. loss: 4.540
Epochs: 18758 | epoch avg. loss: 0.009 | test avg. loss: 4.526
Epochs: 18759 | epoch avg. loss: 0.012 | test avg. loss: 4.466


 38%|███▊      | 18763/50000 [28:02<43:51, 11.87it/s]

Epochs: 18760 | epoch avg. loss: 0.052 | test avg. loss: 4.492
Epochs: 18761 | epoch avg. loss: 0.020 | test avg. loss: 4.567
Epochs: 18762 | epoch avg. loss: 0.034 | test avg. loss: 4.465


 38%|███▊      | 18765/50000 [28:02<40:27, 12.87it/s]

Epochs: 18763 | epoch avg. loss: 0.021 | test avg. loss: 4.545
Epochs: 18764 | epoch avg. loss: 0.025 | test avg. loss: 4.675
Epochs: 18765 | epoch avg. loss: 0.043 | test avg. loss: 4.444


 38%|███▊      | 18769/50000 [28:03<41:55, 12.42it/s]

Epochs: 18766 | epoch avg. loss: 0.154 | test avg. loss: 4.405
Epochs: 18767 | epoch avg. loss: 0.048 | test avg. loss: 4.766
Epochs: 18768 | epoch avg. loss: 0.154 | test avg. loss: 4.463


 38%|███▊      | 18773/50000 [28:03<37:47, 13.77it/s]

Epochs: 18769 | epoch avg. loss: 0.062 | test avg. loss: 4.452
Epochs: 18770 | epoch avg. loss: 0.082 | test avg. loss: 4.714
Epochs: 18771 | epoch avg. loss: 0.155 | test avg. loss: 4.467
Epochs: 18772 | epoch avg. loss: 0.064 | test avg. loss: 4.524


 38%|███▊      | 18775/50000 [28:03<35:59, 14.46it/s]

Epochs: 18773 | epoch avg. loss: 0.300 | test avg. loss: 4.632
Epochs: 18774 | epoch avg. loss: 0.080 | test avg. loss: 4.831
Epochs: 18775 | epoch avg. loss: 0.072 | test avg. loss: 4.700




Epochs: 18776 | epoch avg. loss: 0.302 | test avg. loss: 4.605
Epochs: 18777 | epoch avg. loss: 0.130 | test avg. loss: 5.166
Epochs: 18778 | epoch avg. loss: 0.314 | test avg. loss: 4.567


 38%|███▊      | 18781/50000 [28:03<36:41, 14.18it/s]

Epochs: 18779 | epoch avg. loss: 0.220 | test avg. loss: 4.632
Epochs: 18780 | epoch avg. loss: 0.150 | test avg. loss: 5.113
Epochs: 18781 | epoch avg. loss: 0.266 | test avg. loss: 4.402


 38%|███▊      | 18785/50000 [28:04<40:36, 12.81it/s]

Epochs: 18782 | epoch avg. loss: 0.065 | test avg. loss: 4.252
Epochs: 18783 | epoch avg. loss: 0.171 | test avg. loss: 4.457
Epochs: 18784 | epoch avg. loss: 0.154 | test avg. loss: 4.728


 38%|███▊      | 18789/50000 [28:04<37:35, 13.84it/s]

Epochs: 18785 | epoch avg. loss: 0.074 | test avg. loss: 4.842
Epochs: 18786 | epoch avg. loss: 0.168 | test avg. loss: 4.914
Epochs: 18787 | epoch avg. loss: 0.071 | test avg. loss: 4.828
Epochs: 18788 | epoch avg. loss: 0.077 | test avg. loss: 4.267


 38%|███▊      | 18791/50000 [28:04<35:45, 14.55it/s]

Epochs: 18789 | epoch avg. loss: 0.064 | test avg. loss: 4.154
Epochs: 18790 | epoch avg. loss: 0.089 | test avg. loss: 4.295
Epochs: 18791 | epoch avg. loss: 0.043 | test avg. loss: 4.816


 38%|███▊      | 18795/50000 [28:04<38:10, 13.62it/s]

Epochs: 18792 | epoch avg. loss: 0.169 | test avg. loss: 4.720
Epochs: 18793 | epoch avg. loss: 0.037 | test avg. loss: 4.711
Epochs: 18794 | epoch avg. loss: 0.099 | test avg. loss: 4.603


 38%|███▊      | 18797/50000 [28:05<40:46, 12.75it/s]

Epochs: 18795 | epoch avg. loss: 0.047 | test avg. loss: 4.468
Epochs: 18796 | epoch avg. loss: 0.019 | test avg. loss: 4.394
Epochs: 18797 | epoch avg. loss: 0.015 | test avg. loss: 4.451


 38%|███▊      | 18799/50000 [28:05<40:25, 12.86it/s]

Epochs: 18798 | epoch avg. loss: 0.023 | test avg. loss: 4.479
Epochs: 18799 | epoch avg. loss: 0.017 | test avg. loss: 4.533


 38%|███▊      | 18803/50000 [28:07<2:02:14,  4.25it/s]

Epochs: 18800 | epoch avg. loss: 0.018 | test avg. loss: 4.482
Epochs: 18801 | epoch avg. loss: 0.020 | test avg. loss: 4.459
Epochs: 18802 | epoch avg. loss: 0.050 | test avg. loss: 4.516
Epochs: 18803 | epoch avg. loss: 0.014 | test avg. loss: 4.553


 38%|███▊      | 18807/50000 [28:07<1:18:22,  6.63it/s]

Epochs: 18804 | epoch avg. loss: 0.015 | test avg. loss: 4.495
Epochs: 18805 | epoch avg. loss: 0.011 | test avg. loss: 4.451
Epochs: 18806 | epoch avg. loss: 0.010 | test avg. loss: 4.489


 38%|███▊      | 18811/50000 [28:07<54:49,  9.48it/s]  

Epochs: 18807 | epoch avg. loss: 0.016 | test avg. loss: 4.489
Epochs: 18808 | epoch avg. loss: 0.013 | test avg. loss: 4.432
Epochs: 18809 | epoch avg. loss: 0.022 | test avg. loss: 4.431
Epochs: 18810 | epoch avg. loss: 0.044 | test avg. loss: 4.540


 38%|███▊      | 18813/50000 [28:07<49:09, 10.57it/s]

Epochs: 18811 | epoch avg. loss: 0.024 | test avg. loss: 4.552
Epochs: 18812 | epoch avg. loss: 0.016 | test avg. loss: 4.451
Epochs: 18813 | epoch avg. loss: 0.028 | test avg. loss: 4.488


 38%|███▊      | 18815/50000 [28:08<49:14, 10.55it/s]

Epochs: 18814 | epoch avg. loss: 0.079 | test avg. loss: 4.616
Epochs: 18815 | epoch avg. loss: 0.042 | test avg. loss: 4.566


 38%|███▊      | 18819/50000 [28:08<48:09, 10.79it/s]

Epochs: 18816 | epoch avg. loss: 0.136 | test avg. loss: 4.501
Epochs: 18817 | epoch avg. loss: 0.036 | test avg. loss: 4.574
Epochs: 18818 | epoch avg. loss: 0.066 | test avg. loss: 4.319


 38%|███▊      | 18823/50000 [28:08<42:07, 12.33it/s]

Epochs: 18819 | epoch avg. loss: 0.053 | test avg. loss: 4.354
Epochs: 18820 | epoch avg. loss: 0.074 | test avg. loss: 4.573
Epochs: 18821 | epoch avg. loss: 0.034 | test avg. loss: 4.601
Epochs: 18822 | epoch avg. loss: 0.020 | test avg. loss: 4.478


 38%|███▊      | 18825/50000 [28:08<39:46, 13.06it/s]

Epochs: 18823 | epoch avg. loss: 0.020 | test avg. loss: 4.432
Epochs: 18824 | epoch avg. loss: 0.012 | test avg. loss: 4.376
Epochs: 18825 | epoch avg. loss: 0.025 | test avg. loss: 4.390


 38%|███▊      | 18829/50000 [28:08<39:43, 13.08it/s]

Epochs: 18826 | epoch avg. loss: 0.056 | test avg. loss: 4.464
Epochs: 18827 | epoch avg. loss: 0.021 | test avg. loss: 4.562
Epochs: 18828 | epoch avg. loss: 0.031 | test avg. loss: 4.403


 38%|███▊      | 18831/50000 [28:09<39:16, 13.23it/s]

Epochs: 18829 | epoch avg. loss: 0.035 | test avg. loss: 4.421
Epochs: 18830 | epoch avg. loss: 0.017 | test avg. loss: 4.561
Epochs: 18831 | epoch avg. loss: 0.027 | test avg. loss: 4.478


 38%|███▊      | 18835/50000 [28:09<41:05, 12.64it/s]

Epochs: 18832 | epoch avg. loss: 0.024 | test avg. loss: 4.555
Epochs: 18833 | epoch avg. loss: 0.012 | test avg. loss: 4.624
Epochs: 18834 | epoch avg. loss: 0.014 | test avg. loss: 4.536


 38%|███▊      | 18839/50000 [28:09<39:37, 13.10it/s]

Epochs: 18835 | epoch avg. loss: 0.016 | test avg. loss: 4.493
Epochs: 18836 | epoch avg. loss: 0.013 | test avg. loss: 4.480
Epochs: 18837 | epoch avg. loss: 0.008 | test avg. loss: 4.477
Epochs: 18838 | epoch avg. loss: 0.008 | test avg. loss: 4.463


 38%|███▊      | 18841/50000 [28:09<39:48, 13.04it/s]

Epochs: 18839 | epoch avg. loss: 0.011 | test avg. loss: 4.509
Epochs: 18840 | epoch avg. loss: 0.008 | test avg. loss: 4.521
Epochs: 18841 | epoch avg. loss: 0.008 | test avg. loss: 4.549


                                                     

Epochs: 18842 | epoch avg. loss: 0.015 | test avg. loss: 4.510
Epochs: 18843 | epoch avg. loss: 0.008 | test avg. loss: 4.460
Epochs: 18844 | epoch avg. loss: 0.009 | test avg. loss: 4.569


 38%|███▊      | 18847/50000 [28:10<36:28, 14.23it/s]

Epochs: 18845 | epoch avg. loss: 0.052 | test avg. loss: 4.522
Epochs: 18846 | epoch avg. loss: 0.031 | test avg. loss: 4.547
Epochs: 18847 | epoch avg. loss: 0.133 | test avg. loss: 4.561


 38%|███▊      | 18851/50000 [28:10<35:36, 14.58it/s]

Epochs: 18848 | epoch avg. loss: 0.042 | test avg. loss: 4.740
Epochs: 18849 | epoch avg. loss: 0.082 | test avg. loss: 4.534
Epochs: 18850 | epoch avg. loss: 0.027 | test avg. loss: 4.550
Epochs: 18851 | epoch avg. loss: 0.020 | test avg. loss: 4.630


 38%|███▊      | 18855/50000 [28:10<38:06, 13.62it/s]

Epochs: 18852 | epoch avg. loss: 0.031 | test avg. loss: 4.507
Epochs: 18853 | epoch avg. loss: 0.009 | test avg. loss: 4.532
Epochs: 18854 | epoch avg. loss: 0.025 | test avg. loss: 4.565


 38%|███▊      | 18857/50000 [28:11<39:11, 13.24it/s]

Epochs: 18855 | epoch avg. loss: 0.027 | test avg. loss: 4.499
Epochs: 18856 | epoch avg. loss: 0.011 | test avg. loss: 4.551
Epochs: 18857 | epoch avg. loss: 0.018 | test avg. loss: 4.586


 38%|███▊      | 18861/50000 [28:11<38:48, 13.37it/s]

Epochs: 18858 | epoch avg. loss: 0.020 | test avg. loss: 4.468
Epochs: 18859 | epoch avg. loss: 0.049 | test avg. loss: 4.456
Epochs: 18860 | epoch avg. loss: 0.024 | test avg. loss: 4.542


 38%|███▊      | 18863/50000 [28:11<37:43, 13.76it/s]

Epochs: 18861 | epoch avg. loss: 0.025 | test avg. loss: 4.444
Epochs: 18862 | epoch avg. loss: 0.030 | test avg. loss: 4.414
Epochs: 18863 | epoch avg. loss: 0.066 | test avg. loss: 4.529


 38%|███▊      | 18867/50000 [28:11<36:06, 14.37it/s]

Epochs: 18864 | epoch avg. loss: 0.086 | test avg. loss: 4.772
Epochs: 18865 | epoch avg. loss: 0.146 | test avg. loss: 4.503
Epochs: 18866 | epoch avg. loss: 0.070 | test avg. loss: 4.633


 38%|███▊      | 18869/50000 [28:11<38:04, 13.63it/s]

Epochs: 18867 | epoch avg. loss: 0.220 | test avg. loss: 4.708
Epochs: 18868 | epoch avg. loss: 0.055 | test avg. loss: 4.872
Epochs: 18869 | epoch avg. loss: 0.099 | test avg. loss: 4.435


 38%|███▊      | 18873/50000 [28:12<38:40, 13.41it/s]

Epochs: 18870 | epoch avg. loss: 0.033 | test avg. loss: 4.397
Epochs: 18871 | epoch avg. loss: 0.092 | test avg. loss: 4.607
Epochs: 18872 | epoch avg. loss: 0.072 | test avg. loss: 4.535


 38%|███▊      | 18877/50000 [28:12<35:42, 14.53it/s]

Epochs: 18873 | epoch avg. loss: 0.218 | test avg. loss: 4.657
Epochs: 18874 | epoch avg. loss: 0.103 | test avg. loss: 5.115
Epochs: 18875 | epoch avg. loss: 0.222 | test avg. loss: 4.551
Epochs: 18876 | epoch avg. loss: 0.465 | test avg. loss: 4.401


 38%|███▊      | 18879/50000 [28:12<36:39, 14.15it/s]

Epochs: 18877 | epoch avg. loss: 0.572 | test avg. loss: 6.957
Epochs: 18878 | epoch avg. loss: 1.903 | test avg. loss: 4.361
Epochs: 18879 | epoch avg. loss: 0.847 | test avg. loss: 4.370
Epochs: 18880 | epoch avg. loss: 0.356 | test avg. loss: 6.894


 38%|███▊      | 18883/50000 [28:13<37:27, 13.84it/s]

Epochs: 18881 | epoch avg. loss: 1.572 | test avg. loss: 5.331
Epochs: 18882 | epoch avg. loss: 1.758 | test avg. loss: 5.184
Epochs: 18883 | epoch avg. loss: 0.870 | test avg. loss: 8.936


 38%|███▊      | 18887/50000 [28:13<38:40, 13.41it/s]

Epochs: 18884 | epoch avg. loss: 3.610 | test avg. loss: 5.296
Epochs: 18885 | epoch avg. loss: 3.347 | test avg. loss: 5.062
Epochs: 18886 | epoch avg. loss: 1.290 | test avg. loss: 8.691
Epochs: 18887 | epoch avg. loss: 2.350 | test avg. loss: 6.799


 38%|███▊      | 18891/50000 [28:13<35:37, 14.56it/s]

Epochs: 18888 | epoch avg. loss: 2.394 | test avg. loss: 5.266
Epochs: 18889 | epoch avg. loss: 1.176 | test avg. loss: 4.772
Epochs: 18890 | epoch avg. loss: 0.789 | test avg. loss: 5.497
Epochs: 18891 | epoch avg. loss: 1.217 | test avg. loss: 6.714


 38%|███▊      | 18895/50000 [28:13<35:22, 14.66it/s]

Epochs: 18892 | epoch avg. loss: 1.117 | test avg. loss: 5.571
Epochs: 18893 | epoch avg. loss: 1.760 | test avg. loss: 4.852
Epochs: 18894 | epoch avg. loss: 0.484 | test avg. loss: 4.331


 38%|███▊      | 18897/50000 [28:13<38:45, 13.38it/s]

Epochs: 18895 | epoch avg. loss: 0.729 | test avg. loss: 4.145
Epochs: 18896 | epoch avg. loss: 0.582 | test avg. loss: 5.123
Epochs: 18897 | epoch avg. loss: 0.606 | test avg. loss: 4.674


 38%|███▊      | 18899/50000 [28:14<37:13, 13.93it/s]

Epochs: 18898 | epoch avg. loss: 0.598 | test avg. loss: 5.692
Epochs: 18899 | epoch avg. loss: 0.581 | test avg. loss: 4.434


 38%|███▊      | 18903/50000 [28:15<1:51:50,  4.63it/s]

Epochs: 18900 | epoch avg. loss: 0.363 | test avg. loss: 4.686
Epochs: 18901 | epoch avg. loss: 0.206 | test avg. loss: 4.170
Epochs: 18902 | epoch avg. loss: 0.389 | test avg. loss: 4.255
Epochs: 18903 | epoch avg. loss: 0.292 | test avg. loss: 4.265


 38%|███▊      | 18907/50000 [28:16<1:13:28,  7.05it/s]

Epochs: 18904 | epoch avg. loss: 0.174 | test avg. loss: 4.260
Epochs: 18905 | epoch avg. loss: 0.169 | test avg. loss: 4.905
Epochs: 18906 | epoch avg. loss: 0.247 | test avg. loss: 4.488
Epochs: 18907 | epoch avg. loss: 0.275 | test avg. loss: 4.742


                                                     

Epochs: 18908 | epoch avg. loss: 0.117 | test avg. loss: 4.396
Epochs: 18909 | epoch avg. loss: 0.165 | test avg. loss: 4.597
Epochs: 18910 | epoch avg. loss: 0.179 | test avg. loss: 4.444


 38%|███▊      | 18915/50000 [28:16<42:36, 12.16it/s]

Epochs: 18911 | epoch avg. loss: 0.177 | test avg. loss: 4.500
Epochs: 18912 | epoch avg. loss: 0.086 | test avg. loss: 4.640
Epochs: 18913 | epoch avg. loss: 0.080 | test avg. loss: 4.454
Epochs: 18914 | epoch avg. loss: 0.101 | test avg. loss: 4.791


 38%|███▊      | 18919/50000 [28:16<37:17, 13.89it/s]

Epochs: 18915 | epoch avg. loss: 0.140 | test avg. loss: 4.461
Epochs: 18916 | epoch avg. loss: 0.182 | test avg. loss: 4.587
Epochs: 18917 | epoch avg. loss: 0.046 | test avg. loss: 4.572
Epochs: 18918 | epoch avg. loss: 0.024 | test avg. loss: 4.511


 38%|███▊      | 18921/50000 [28:17<39:07, 13.24it/s]

Epochs: 18919 | epoch avg. loss: 0.052 | test avg. loss: 4.652
Epochs: 18920 | epoch avg. loss: 0.087 | test avg. loss: 4.306
Epochs: 18921 | epoch avg. loss: 0.149 | test avg. loss: 4.461


 38%|███▊      | 18925/50000 [28:17<39:21, 13.16it/s]

Epochs: 18922 | epoch avg. loss: 0.120 | test avg. loss: 4.320
Epochs: 18923 | epoch avg. loss: 0.122 | test avg. loss: 4.449
Epochs: 18924 | epoch avg. loss: 0.117 | test avg. loss: 5.000


 38%|███▊      | 18929/50000 [28:17<36:22, 14.24it/s]

Epochs: 18925 | epoch avg. loss: 0.207 | test avg. loss: 4.579
Epochs: 18926 | epoch avg. loss: 0.258 | test avg. loss: 4.493
Epochs: 18927 | epoch avg. loss: 0.114 | test avg. loss: 4.373
Epochs: 18928 | epoch avg. loss: 0.058 | test avg. loss: 4.430


 38%|███▊      | 18933/50000 [28:17<34:17, 15.10it/s]

Epochs: 18929 | epoch avg. loss: 0.053 | test avg. loss: 4.624
Epochs: 18930 | epoch avg. loss: 0.049 | test avg. loss: 4.475
Epochs: 18931 | epoch avg. loss: 0.047 | test avg. loss: 4.664
Epochs: 18932 | epoch avg. loss: 0.069 | test avg. loss: 4.402


 38%|███▊      | 18935/50000 [28:18<36:23, 14.23it/s]

Epochs: 18933 | epoch avg. loss: 0.052 | test avg. loss: 4.512
Epochs: 18934 | epoch avg. loss: 0.060 | test avg. loss: 4.513
Epochs: 18935 | epoch avg. loss: 0.029 | test avg. loss: 4.510


 38%|███▊      | 18939/50000 [28:18<37:10, 13.93it/s]

Epochs: 18936 | epoch avg. loss: 0.025 | test avg. loss: 4.596
Epochs: 18937 | epoch avg. loss: 0.019 | test avg. loss: 4.507
Epochs: 18938 | epoch avg. loss: 0.017 | test avg. loss: 4.527


 38%|███▊      | 18941/50000 [28:18<36:47, 14.07it/s]

Epochs: 18939 | epoch avg. loss: 0.011 | test avg. loss: 4.559
Epochs: 18940 | epoch avg. loss: 0.013 | test avg. loss: 4.557
Epochs: 18941 | epoch avg. loss: 0.014 | test avg. loss: 4.554


 38%|███▊      | 18945/50000 [28:18<37:22, 13.85it/s]

Epochs: 18942 | epoch avg. loss: 0.013 | test avg. loss: 4.586
Epochs: 18943 | epoch avg. loss: 0.011 | test avg. loss: 4.506
Epochs: 18944 | epoch avg. loss: 0.009 | test avg. loss: 4.506


 38%|███▊      | 18947/50000 [28:18<37:25, 13.83it/s]

Epochs: 18945 | epoch avg. loss: 0.018 | test avg. loss: 4.454
Epochs: 18946 | epoch avg. loss: 0.010 | test avg. loss: 4.462
Epochs: 18947 | epoch avg. loss: 0.009 | test avg. loss: 4.514


 38%|███▊      | 18951/50000 [28:19<42:31, 12.17it/s]

Epochs: 18948 | epoch avg. loss: 0.011 | test avg. loss: 4.503
Epochs: 18949 | epoch avg. loss: 0.018 | test avg. loss: 4.631
Epochs: 18950 | epoch avg. loss: 0.020 | test avg. loss: 4.519


 38%|███▊      | 18953/50000 [28:19<42:37, 12.14it/s]

Epochs: 18951 | epoch avg. loss: 0.026 | test avg. loss: 4.554
Epochs: 18952 | epoch avg. loss: 0.019 | test avg. loss: 4.524
Epochs: 18953 | epoch avg. loss: 0.011 | test avg. loss: 4.470


 38%|███▊      | 18957/50000 [28:19<38:36, 13.40it/s]

Epochs: 18954 | epoch avg. loss: 0.008 | test avg. loss: 4.442
Epochs: 18955 | epoch avg. loss: 0.016 | test avg. loss: 4.488
Epochs: 18956 | epoch avg. loss: 0.008 | test avg. loss: 4.511
Epochs: 18957 | epoch avg. loss: 0.009 | test avg. loss: 4.539


 38%|███▊      | 18961/50000 [28:19<38:09, 13.56it/s]

Epochs: 18958 | epoch avg. loss: 0.008 | test avg. loss: 4.578
Epochs: 18959 | epoch avg. loss: 0.009 | test avg. loss: 4.528
Epochs: 18960 | epoch avg. loss: 0.009 | test avg. loss: 4.598


 38%|███▊      | 18963/50000 [28:20<41:29, 12.46it/s]

Epochs: 18961 | epoch avg. loss: 0.014 | test avg. loss: 4.502
Epochs: 18962 | epoch avg. loss: 0.030 | test avg. loss: 4.574
Epochs: 18963 | epoch avg. loss: 0.013 | test avg. loss: 4.569


 38%|███▊      | 18967/50000 [28:20<37:30, 13.79it/s]

Epochs: 18964 | epoch avg. loss: 0.014 | test avg. loss: 4.516
Epochs: 18965 | epoch avg. loss: 0.026 | test avg. loss: 4.716
Epochs: 18966 | epoch avg. loss: 0.046 | test avg. loss: 4.513
Epochs: 18967 | epoch avg. loss: 0.038 | test avg. loss: 4.555


 38%|███▊      | 18971/50000 [28:20<34:39, 14.92it/s]

Epochs: 18968 | epoch avg. loss: 0.029 | test avg. loss: 4.508
Epochs: 18969 | epoch avg. loss: 0.020 | test avg. loss: 4.528
Epochs: 18970 | epoch avg. loss: 0.023 | test avg. loss: 4.627
Epochs: 18971 | epoch avg. loss: 0.026 | test avg. loss: 4.517


 38%|███▊      | 18975/50000 [28:20<34:26, 15.01it/s]

Epochs: 18972 | epoch avg. loss: 0.040 | test avg. loss: 4.700
Epochs: 18973 | epoch avg. loss: 0.043 | test avg. loss: 4.499
Epochs: 18974 | epoch avg. loss: 0.049 | test avg. loss: 4.515


 38%|███▊      | 18977/50000 [28:21<38:02, 13.59it/s]

Epochs: 18975 | epoch avg. loss: 0.012 | test avg. loss: 4.519
Epochs: 18976 | epoch avg. loss: 0.011 | test avg. loss: 4.516
Epochs: 18977 | epoch avg. loss: 0.011 | test avg. loss: 4.596


 38%|███▊      | 18981/50000 [28:21<42:18, 12.22it/s]

Epochs: 18978 | epoch avg. loss: 0.012 | test avg. loss: 4.562
Epochs: 18979 | epoch avg. loss: 0.027 | test avg. loss: 4.570
Epochs: 18980 | epoch avg. loss: 0.029 | test avg. loss: 4.726


 38%|███▊      | 18983/50000 [28:21<40:48, 12.67it/s]

Epochs: 18981 | epoch avg. loss: 0.029 | test avg. loss: 4.566
Epochs: 18982 | epoch avg. loss: 0.043 | test avg. loss: 4.604
Epochs: 18983 | epoch avg. loss: 0.014 | test avg. loss: 4.652


 38%|███▊      | 18987/50000 [28:21<38:23, 13.46it/s]

Epochs: 18984 | epoch avg. loss: 0.019 | test avg. loss: 4.543
Epochs: 18985 | epoch avg. loss: 0.047 | test avg. loss: 4.643
Epochs: 18986 | epoch avg. loss: 0.023 | test avg. loss: 4.523


 38%|███▊      | 18989/50000 [28:22<41:49, 12.36it/s]

Epochs: 18987 | epoch avg. loss: 0.028 | test avg. loss: 4.496
Epochs: 18988 | epoch avg. loss: 0.025 | test avg. loss: 4.707
Epochs: 18989 | epoch avg. loss: 0.057 | test avg. loss: 4.513


 38%|███▊      | 18993/50000 [28:22<41:04, 12.58it/s]

Epochs: 18990 | epoch avg. loss: 0.057 | test avg. loss: 4.673
Epochs: 18991 | epoch avg. loss: 0.060 | test avg. loss: 4.566
Epochs: 18992 | epoch avg. loss: 0.040 | test avg. loss: 4.538


 38%|███▊      | 18995/50000 [28:22<38:43, 13.34it/s]

Epochs: 18993 | epoch avg. loss: 0.042 | test avg. loss: 4.781
Epochs: 18994 | epoch avg. loss: 0.086 | test avg. loss: 4.568
Epochs: 18995 | epoch avg. loss: 0.085 | test avg. loss: 4.695


 38%|███▊      | 18999/50000 [28:22<36:50, 14.02it/s]

Epochs: 18996 | epoch avg. loss: 0.059 | test avg. loss: 4.628
Epochs: 18997 | epoch avg. loss: 0.026 | test avg. loss: 4.533
Epochs: 18998 | epoch avg. loss: 0.024 | test avg. loss: 4.583
Epochs: 18999 | epoch avg. loss: 0.024 | test avg. loss: 4.471


 38%|███▊      | 19003/50000 [28:24<1:57:33,  4.39it/s]

Epochs: 19000 | epoch avg. loss: 0.019 | test avg. loss: 4.525
Epochs: 19001 | epoch avg. loss: 0.017 | test avg. loss: 4.561
Epochs: 19002 | epoch avg. loss: 0.014 | test avg. loss: 4.494


 38%|███▊      | 19005/50000 [28:24<1:34:39,  5.46it/s]

Epochs: 19003 | epoch avg. loss: 0.018 | test avg. loss: 4.566
Epochs: 19004 | epoch avg. loss: 0.017 | test avg. loss: 4.530
Epochs: 19005 | epoch avg. loss: 0.009 | test avg. loss: 4.510


 38%|███▊      | 19009/50000 [28:24<1:09:27,  7.44it/s]

Epochs: 19006 | epoch avg. loss: 0.011 | test avg. loss: 4.619
Epochs: 19007 | epoch avg. loss: 0.023 | test avg. loss: 4.485
Epochs: 19008 | epoch avg. loss: 0.017 | test avg. loss: 4.497


 38%|███▊      | 19011/50000 [28:25<58:25,  8.84it/s]

Epochs: 19009 | epoch avg. loss: 0.021 | test avg. loss: 4.508
Epochs: 19010 | epoch avg. loss: 0.013 | test avg. loss: 4.524
Epochs: 19011 | epoch avg. loss: 0.012 | test avg. loss: 4.585


 38%|███▊      | 19015/50000 [28:25<51:20, 10.06it/s]

Epochs: 19012 | epoch avg. loss: 0.012 | test avg. loss: 4.574
Epochs: 19013 | epoch avg. loss: 0.018 | test avg. loss: 4.682
Epochs: 19014 | epoch avg. loss: 0.046 | test avg. loss: 4.545


 38%|███▊      | 19017/50000 [28:25<49:58, 10.33it/s]

Epochs: 19015 | epoch avg. loss: 0.017 | test avg. loss: 4.501
Epochs: 19016 | epoch avg. loss: 0.013 | test avg. loss: 4.553
Epochs: 19017 | epoch avg. loss: 0.013 | test avg. loss: 4.530


 38%|███▊      | 19021/50000 [28:26<48:49, 10.58it/s]

Epochs: 19018 | epoch avg. loss: 0.008 | test avg. loss: 4.547
Epochs: 19019 | epoch avg. loss: 0.010 | test avg. loss: 4.589
Epochs: 19020 | epoch avg. loss: 0.013 | test avg. loss: 4.561


                                                     

Epochs: 19021 | epoch avg. loss: 0.010 | test avg. loss: 4.542
Epochs: 19022 | epoch avg. loss: 0.009 | test avg. loss: 4.560
Epochs: 19023 | epoch avg. loss: 0.007 | test avg. loss: 4.516


 38%|███▊      | 19027/50000 [28:26<41:22, 12.48it/s]

Epochs: 19024 | epoch avg. loss: 0.008 | test avg. loss: 4.521
Epochs: 19025 | epoch avg. loss: 0.007 | test avg. loss: 4.554
Epochs: 19026 | epoch avg. loss: 0.007 | test avg. loss: 4.557


 38%|███▊      | 19029/50000 [28:26<43:02, 11.99it/s]

Epochs: 19027 | epoch avg. loss: 0.006 | test avg. loss: 4.586
Epochs: 19028 | epoch avg. loss: 0.007 | test avg. loss: 4.550
Epochs: 19029 | epoch avg. loss: 0.006 | test avg. loss: 4.580


 38%|███▊      | 19033/50000 [28:26<44:18, 11.65it/s]

Epochs: 19030 | epoch avg. loss: 0.012 | test avg. loss: 4.532
Epochs: 19031 | epoch avg. loss: 0.007 | test avg. loss: 4.491
Epochs: 19032 | epoch avg. loss: 0.008 | test avg. loss: 4.563


 38%|███▊      | 19035/50000 [28:27<42:52, 12.04it/s]

Epochs: 19033 | epoch avg. loss: 0.015 | test avg. loss: 4.519
Epochs: 19034 | epoch avg. loss: 0.007 | test avg. loss: 4.526
Epochs: 19035 | epoch avg. loss: 0.007 | test avg. loss: 4.555


 38%|███▊      | 19039/50000 [28:27<41:23, 12.47it/s]

Epochs: 19036 | epoch avg. loss: 0.007 | test avg. loss: 4.523
Epochs: 19037 | epoch avg. loss: 0.015 | test avg. loss: 4.534
Epochs: 19038 | epoch avg. loss: 0.009 | test avg. loss: 4.588


 38%|███▊      | 19041/50000 [28:27<38:35, 13.37it/s]

Epochs: 19039 | epoch avg. loss: 0.012 | test avg. loss: 4.513
Epochs: 19040 | epoch avg. loss: 0.018 | test avg. loss: 4.546
Epochs: 19041 | epoch avg. loss: 0.010 | test avg. loss: 4.580
Epochs: 19042 | epoch avg. loss: 0.015 | test avg. loss: 4.536


 38%|███▊      | 19045/50000 [28:27<37:53, 13.62it/s]

Epochs: 19043 | epoch avg. loss: 0.007 | test avg. loss: 4.539
Epochs: 19044 | epoch avg. loss: 0.007 | test avg. loss: 4.518
Epochs: 19045 | epoch avg. loss: 0.007 | test avg. loss: 4.591


 38%|███▊      | 19049/50000 [28:28<39:28, 13.07it/s]

Epochs: 19046 | epoch avg. loss: 0.009 | test avg. loss: 4.522
Epochs: 19047 | epoch avg. loss: 0.021 | test avg. loss: 4.552
Epochs: 19048 | epoch avg. loss: 0.011 | test avg. loss: 4.591


 38%|███▊      | 19051/50000 [28:28<38:29, 13.40it/s]

Epochs: 19049 | epoch avg. loss: 0.013 | test avg. loss: 4.540
Epochs: 19050 | epoch avg. loss: 0.007 | test avg. loss: 4.522
Epochs: 19051 | epoch avg. loss: 0.008 | test avg. loss: 4.490
Epochs: 19052 | epoch avg. loss: 0.006 | test avg. loss: 4.530


                                                     

Epochs: 19053 | epoch avg. loss: 0.011 | test avg. loss: 4.525
Epochs: 19054 | epoch avg. loss: 0.015 | test avg. loss: 4.498
Epochs: 19055 | epoch avg. loss: 0.032 | test avg. loss: 4.608


 38%|███▊      | 19059/50000 [28:28<37:11, 13.87it/s]

Epochs: 19056 | epoch avg. loss: 0.032 | test avg. loss: 4.551
Epochs: 19057 | epoch avg. loss: 0.016 | test avg. loss: 4.433
Epochs: 19058 | epoch avg. loss: 0.035 | test avg. loss: 4.568


 38%|███▊      | 19061/50000 [28:29<38:46, 13.30it/s]

Epochs: 19059 | epoch avg. loss: 0.026 | test avg. loss: 4.555
Epochs: 19060 | epoch avg. loss: 0.017 | test avg. loss: 4.584
Epochs: 19061 | epoch avg. loss: 0.014 | test avg. loss: 4.639


 38%|███▊      | 19065/50000 [28:29<38:01, 13.56it/s]

Epochs: 19062 | epoch avg. loss: 0.017 | test avg. loss: 4.568
Epochs: 19063 | epoch avg. loss: 0.007 | test avg. loss: 4.602
Epochs: 19064 | epoch avg. loss: 0.027 | test avg. loss: 4.582


 38%|███▊      | 19069/50000 [28:29<35:32, 14.50it/s]

Epochs: 19065 | epoch avg. loss: 0.015 | test avg. loss: 4.492
Epochs: 19066 | epoch avg. loss: 0.015 | test avg. loss: 4.519
Epochs: 19067 | epoch avg. loss: 0.009 | test avg. loss: 4.578
Epochs: 19068 | epoch avg. loss: 0.016 | test avg. loss: 4.510


 38%|███▊      | 19071/50000 [28:29<35:21, 14.58it/s]

Epochs: 19069 | epoch avg. loss: 0.016 | test avg. loss: 4.488
Epochs: 19070 | epoch avg. loss: 0.016 | test avg. loss: 4.666
Epochs: 19071 | epoch avg. loss: 0.047 | test avg. loss: 4.512


 38%|███▊      | 19075/50000 [28:30<38:13, 13.48it/s]

Epochs: 19072 | epoch avg. loss: 0.072 | test avg. loss: 4.590
Epochs: 19073 | epoch avg. loss: 0.038 | test avg. loss: 4.943
Epochs: 19074 | epoch avg. loss: 0.123 | test avg. loss: 4.528


 38%|███▊      | 19079/50000 [28:30<35:57, 14.33it/s]

Epochs: 19075 | epoch avg. loss: 0.081 | test avg. loss: 4.413
Epochs: 19076 | epoch avg. loss: 0.037 | test avg. loss: 4.428
Epochs: 19077 | epoch avg. loss: 0.038 | test avg. loss: 4.409
Epochs: 19078 | epoch avg. loss: 0.076 | test avg. loss: 4.763


 38%|███▊      | 19083/50000 [28:30<34:24, 14.97it/s]

Epochs: 19079 | epoch avg. loss: 0.086 | test avg. loss: 4.721
Epochs: 19080 | epoch avg. loss: 0.026 | test avg. loss: 4.648
Epochs: 19081 | epoch avg. loss: 0.037 | test avg. loss: 4.659
Epochs: 19082 | epoch avg. loss: 0.023 | test avg. loss: 4.417


                                                     

Epochs: 19083 | epoch avg. loss: 0.041 | test avg. loss: 4.372
Epochs: 19084 | epoch avg. loss: 0.022 | test avg. loss: 4.467
Epochs: 19085 | epoch avg. loss: 0.016 | test avg. loss: 4.542


 38%|███▊      | 19089/50000 [28:31<38:31, 13.37it/s]

Epochs: 19086 | epoch avg. loss: 0.010 | test avg. loss: 4.541
Epochs: 19087 | epoch avg. loss: 0.028 | test avg. loss: 4.559
Epochs: 19088 | epoch avg. loss: 0.015 | test avg. loss: 4.681


 38%|███▊      | 19093/50000 [28:31<35:52, 14.36it/s]

Epochs: 19089 | epoch avg. loss: 0.052 | test avg. loss: 4.541
Epochs: 19090 | epoch avg. loss: 0.016 | test avg. loss: 4.569
Epochs: 19091 | epoch avg. loss: 0.026 | test avg. loss: 4.554
Epochs: 19092 | epoch avg. loss: 0.021 | test avg. loss: 4.422


 38%|███▊      | 19097/50000 [28:31<34:33, 14.90it/s]

Epochs: 19093 | epoch avg. loss: 0.034 | test avg. loss: 4.458
Epochs: 19094 | epoch avg. loss: 0.019 | test avg. loss: 4.653
Epochs: 19095 | epoch avg. loss: 0.057 | test avg. loss: 4.491
Epochs: 19096 | epoch avg. loss: 0.031 | test avg. loss: 4.533


 38%|███▊      | 19099/50000 [28:31<34:32, 14.91it/s]

Epochs: 19097 | epoch avg. loss: 0.034 | test avg. loss: 4.812
Epochs: 19098 | epoch avg. loss: 0.103 | test avg. loss: 4.548
Epochs: 19099 | epoch avg. loss: 0.097 | test avg. loss: 4.554


 38%|███▊      | 19103/50000 [28:33<1:51:16,  4.63it/s]

Epochs: 19100 | epoch avg. loss: 0.038 | test avg. loss: 4.732
Epochs: 19101 | epoch avg. loss: 0.054 | test avg. loss: 4.502
Epochs: 19102 | epoch avg. loss: 0.130 | test avg. loss: 4.513
Epochs: 19103 | epoch avg. loss: 0.041 | test avg. loss: 4.483


 38%|███▊      | 19107/50000 [28:33<1:11:08,  7.24it/s]

Epochs: 19104 | epoch avg. loss: 0.052 | test avg. loss: 4.436
Epochs: 19105 | epoch avg. loss: 0.101 | test avg. loss: 4.694
Epochs: 19106 | epoch avg. loss: 0.056 | test avg. loss: 4.488
Epochs: 19107 | epoch avg. loss: 0.036 | test avg. loss: 4.505


 38%|███▊      | 19111/50000 [28:33<55:31,  9.27it/s]

Epochs: 19108 | epoch avg. loss: 0.023 | test avg. loss: 4.710
Epochs: 19109 | epoch avg. loss: 0.042 | test avg. loss: 4.515
Epochs: 19110 | epoch avg. loss: 0.067 | test avg. loss: 4.496


 38%|███▊      | 19113/50000 [28:34<51:01, 10.09it/s]

Epochs: 19111 | epoch avg. loss: 0.035 | test avg. loss: 4.590
Epochs: 19112 | epoch avg. loss: 0.020 | test avg. loss: 4.478
Epochs: 19113 | epoch avg. loss: 0.042 | test avg. loss: 4.472




Epochs: 19114 | epoch avg. loss: 0.029 | test avg. loss: 4.609
Epochs: 19115 | epoch avg. loss: 0.042 | test avg. loss: 4.452
Epochs: 19116 | epoch avg. loss: 0.010 | test avg. loss: 4.460


 38%|███▊      | 19121/50000 [28:34<37:57, 13.56it/s]

Epochs: 19117 | epoch avg. loss: 0.028 | test avg. loss: 4.458
Epochs: 19118 | epoch avg. loss: 0.018 | test avg. loss: 4.493
Epochs: 19119 | epoch avg. loss: 0.017 | test avg. loss: 4.686
Epochs: 19120 | epoch avg. loss: 0.033 | test avg. loss: 4.630


 38%|███▊      | 19125/50000 [28:34<35:48, 14.37it/s]

Epochs: 19121 | epoch avg. loss: 0.017 | test avg. loss: 4.557
Epochs: 19122 | epoch avg. loss: 0.021 | test avg. loss: 4.623
Epochs: 19123 | epoch avg. loss: 0.031 | test avg. loss: 4.414
Epochs: 19124 | epoch avg. loss: 0.052 | test avg. loss: 4.399


 38%|███▊      | 19127/50000 [28:35<39:42, 12.96it/s]

Epochs: 19125 | epoch avg. loss: 0.029 | test avg. loss: 4.588
Epochs: 19126 | epoch avg. loss: 0.022 | test avg. loss: 4.531
Epochs: 19127 | epoch avg. loss: 0.065 | test avg. loss: 4.697


 38%|███▊      | 19131/50000 [28:35<39:42, 12.96it/s]

Epochs: 19128 | epoch avg. loss: 0.049 | test avg. loss: 4.700
Epochs: 19129 | epoch avg. loss: 0.032 | test avg. loss: 4.527
Epochs: 19130 | epoch avg. loss: 0.102 | test avg. loss: 4.513


 38%|███▊      | 19135/50000 [28:35<36:40, 14.03it/s]

Epochs: 19131 | epoch avg. loss: 0.032 | test avg. loss: 4.490
Epochs: 19132 | epoch avg. loss: 0.027 | test avg. loss: 4.432
Epochs: 19133 | epoch avg. loss: 0.065 | test avg. loss: 4.547
Epochs: 19134 | epoch avg. loss: 0.022 | test avg. loss: 4.742


 38%|███▊      | 19137/50000 [28:35<36:17, 14.17it/s]

Epochs: 19135 | epoch avg. loss: 0.041 | test avg. loss: 4.579
Epochs: 19136 | epoch avg. loss: 0.031 | test avg. loss: 4.526
Epochs: 19137 | epoch avg. loss: 0.022 | test avg. loss: 4.599


 38%|███▊      | 19141/50000 [28:36<38:03, 13.51it/s]

Epochs: 19138 | epoch avg. loss: 0.044 | test avg. loss: 4.472
Epochs: 19139 | epoch avg. loss: 0.020 | test avg. loss: 4.418
Epochs: 19140 | epoch avg. loss: 0.079 | test avg. loss: 4.526
Epochs: 19141 | epoch avg. loss: 0.019 | test avg. loss: 4.669


 38%|███▊      | 19145/50000 [28:36<36:19, 14.16it/s]

Epochs: 19142 | epoch avg. loss: 0.022 | test avg. loss: 4.573
Epochs: 19143 | epoch avg. loss: 0.025 | test avg. loss: 4.596
Epochs: 19144 | epoch avg. loss: 0.017 | test avg. loss: 4.658
Epochs: 19145 | epoch avg. loss: 0.033 | test avg. loss: 4.442


 38%|███▊      | 19149/50000 [28:36<36:24, 14.12it/s]

Epochs: 19146 | epoch avg. loss: 0.054 | test avg. loss: 4.400
Epochs: 19147 | epoch avg. loss: 0.021 | test avg. loss: 4.482
Epochs: 19148 | epoch avg. loss: 0.030 | test avg. loss: 4.406


 38%|███▊      | 19151/50000 [28:36<37:25, 13.74it/s]

Epochs: 19149 | epoch avg. loss: 0.105 | test avg. loss: 4.554
Epochs: 19150 | epoch avg. loss: 0.044 | test avg. loss: 4.660
Epochs: 19151 | epoch avg. loss: 0.039 | test avg. loss: 4.596


 38%|███▊      | 19155/50000 [28:37<38:50, 13.24it/s]

Epochs: 19152 | epoch avg. loss: 0.133 | test avg. loss: 4.480
Epochs: 19153 | epoch avg. loss: 0.035 | test avg. loss: 4.473
Epochs: 19154 | epoch avg. loss: 0.047 | test avg. loss: 4.349
Epochs: 19155 | epoch avg. loss: 0.055 | test avg. loss: 4.480


 38%|███▊      | 19159/50000 [28:37<38:56, 13.20it/s]

Epochs: 19156 | epoch avg. loss: 0.022 | test avg. loss: 4.605
Epochs: 19157 | epoch avg. loss: 0.028 | test avg. loss: 4.621
Epochs: 19158 | epoch avg. loss: 0.128 | test avg. loss: 4.680


 38%|███▊      | 19161/50000 [28:37<38:30, 13.35it/s]

Epochs: 19159 | epoch avg. loss: 0.115 | test avg. loss: 4.629
Epochs: 19160 | epoch avg. loss: 0.076 | test avg. loss: 4.615
Epochs: 19161 | epoch avg. loss: 0.322 | test avg. loss: 4.825


 38%|███▊      | 19165/50000 [28:37<39:01, 13.17it/s]

Epochs: 19162 | epoch avg. loss: 0.296 | test avg. loss: 4.695
Epochs: 19163 | epoch avg. loss: 0.117 | test avg. loss: 4.434
Epochs: 19164 | epoch avg. loss: 0.054 | test avg. loss: 4.548


 38%|███▊      | 19167/50000 [28:38<41:25, 12.40it/s]

Epochs: 19165 | epoch avg. loss: 0.021 | test avg. loss: 4.667
Epochs: 19166 | epoch avg. loss: 0.017 | test avg. loss: 4.661
Epochs: 19167 | epoch avg. loss: 0.016 | test avg. loss: 4.568


 38%|███▊      | 19171/50000 [28:38<42:59, 11.95it/s]

Epochs: 19168 | epoch avg. loss: 0.016 | test avg. loss: 4.525
Epochs: 19169 | epoch avg. loss: 0.017 | test avg. loss: 4.452
Epochs: 19170 | epoch avg. loss: 0.080 | test avg. loss: 4.498


 38%|███▊      | 19173/50000 [28:38<40:29, 12.69it/s]

Epochs: 19171 | epoch avg. loss: 0.022 | test avg. loss: 4.739
Epochs: 19172 | epoch avg. loss: 0.080 | test avg. loss: 4.475
Epochs: 19173 | epoch avg. loss: 0.050 | test avg. loss: 4.391


 38%|███▊      | 19177/50000 [28:38<45:35, 11.27it/s]

Epochs: 19174 | epoch avg. loss: 0.036 | test avg. loss: 4.667
Epochs: 19175 | epoch avg. loss: 0.104 | test avg. loss: 4.441
Epochs: 19176 | epoch avg. loss: 0.103 | test avg. loss: 4.553


 38%|███▊      | 19179/50000 [28:39<44:41, 11.50it/s]

Epochs: 19177 | epoch avg. loss: 0.086 | test avg. loss: 4.888
Epochs: 19178 | epoch avg. loss: 0.165 | test avg. loss: 4.569
Epochs: 19179 | epoch avg. loss: 0.084 | test avg. loss: 4.470


 38%|███▊      | 19183/50000 [28:39<39:05, 13.14it/s]

Epochs: 19180 | epoch avg. loss: 0.050 | test avg. loss: 4.749
Epochs: 19181 | epoch avg. loss: 0.203 | test avg. loss: 4.352
Epochs: 19182 | epoch avg. loss: 0.060 | test avg. loss: 4.408


 38%|███▊      | 19185/50000 [28:39<39:51, 12.89it/s]

Epochs: 19183 | epoch avg. loss: 0.039 | test avg. loss: 4.664
Epochs: 19184 | epoch avg. loss: 0.060 | test avg. loss: 4.595
Epochs: 19185 | epoch avg. loss: 0.049 | test avg. loss: 4.599


 38%|███▊      | 19189/50000 [28:39<37:37, 13.65it/s]

Epochs: 19186 | epoch avg. loss: 0.030 | test avg. loss: 4.801
Epochs: 19187 | epoch avg. loss: 0.107 | test avg. loss: 4.490
Epochs: 19188 | epoch avg. loss: 0.033 | test avg. loss: 4.545


 38%|███▊      | 19191/50000 [28:40<40:18, 12.74it/s]

Epochs: 19189 | epoch avg. loss: 0.018 | test avg. loss: 4.673
Epochs: 19190 | epoch avg. loss: 0.020 | test avg. loss: 4.566
Epochs: 19191 | epoch avg. loss: 0.053 | test avg. loss: 4.498


 38%|███▊      | 19195/50000 [28:40<40:27, 12.69it/s]

Epochs: 19192 | epoch avg. loss: 0.028 | test avg. loss: 4.516
Epochs: 19193 | epoch avg. loss: 0.022 | test avg. loss: 4.452
Epochs: 19194 | epoch avg. loss: 0.014 | test avg. loss: 4.483


 38%|███▊      | 19197/50000 [28:40<42:19, 12.13it/s]

Epochs: 19195 | epoch avg. loss: 0.022 | test avg. loss: 4.648
Epochs: 19196 | epoch avg. loss: 0.041 | test avg. loss: 4.532
Epochs: 19197 | epoch avg. loss: 0.017 | test avg. loss: 4.447


 38%|███▊      | 19199/50000 [28:40<41:51, 12.27it/s]

Epochs: 19198 | epoch avg. loss: 0.008 | test avg. loss: 4.417
Epochs: 19199 | epoch avg. loss: 0.014 | test avg. loss: 4.450


 38%|███▊      | 19203/50000 [28:42<2:01:56,  4.21it/s]

Epochs: 19200 | epoch avg. loss: 0.013 | test avg. loss: 4.470
Epochs: 19201 | epoch avg. loss: 0.019 | test avg. loss: 4.500
Epochs: 19202 | epoch avg. loss: 0.011 | test avg. loss: 4.580
Epochs: 19203 | epoch avg. loss: 0.022 | test avg. loss: 4.442


 38%|███▊      | 19207/50000 [28:42<1:17:26,  6.63it/s]

Epochs: 19204 | epoch avg. loss: 0.049 | test avg. loss: 4.413
Epochs: 19205 | epoch avg. loss: 0.037 | test avg. loss: 4.647
Epochs: 19206 | epoch avg. loss: 0.076 | test avg. loss: 4.458


 38%|███▊      | 19209/50000 [28:43<1:06:42,  7.69it/s]

Epochs: 19207 | epoch avg. loss: 0.043 | test avg. loss: 4.499
Epochs: 19208 | epoch avg. loss: 0.054 | test avg. loss: 4.747
Epochs: 19209 | epoch avg. loss: 0.109 | test avg. loss: 4.597


 38%|███▊      | 19213/50000 [28:43<52:31,  9.77it/s]

Epochs: 19210 | epoch avg. loss: 0.054 | test avg. loss: 4.593
Epochs: 19211 | epoch avg. loss: 0.141 | test avg. loss: 4.697
Epochs: 19212 | epoch avg. loss: 0.145 | test avg. loss: 4.548


 38%|███▊      | 19215/50000 [28:43<50:05, 10.24it/s]

Epochs: 19213 | epoch avg. loss: 0.115 | test avg. loss: 4.883
Epochs: 19214 | epoch avg. loss: 0.860 | test avg. loss: 4.625
Epochs: 19215 | epoch avg. loss: 0.294 | test avg. loss: 5.008


 38%|███▊      | 19219/50000 [28:43<42:47, 11.99it/s]

Epochs: 19216 | epoch avg. loss: 0.250 | test avg. loss: 4.659
Epochs: 19217 | epoch avg. loss: 0.519 | test avg. loss: 4.496
Epochs: 19218 | epoch avg. loss: 0.384 | test avg. loss: 4.689


 38%|███▊      | 19221/50000 [28:43<42:13, 12.15it/s]

Epochs: 19219 | epoch avg. loss: 0.271 | test avg. loss: 4.313
Epochs: 19220 | epoch avg. loss: 0.296 | test avg. loss: 4.519
Epochs: 19221 | epoch avg. loss: 0.123 | test avg. loss: 5.230


 38%|███▊      | 19225/50000 [28:44<44:24, 11.55it/s]

Epochs: 19222 | epoch avg. loss: 0.216 | test avg. loss: 4.750
Epochs: 19223 | epoch avg. loss: 0.469 | test avg. loss: 4.672
Epochs: 19224 | epoch avg. loss: 0.434 | test avg. loss: 4.555


 38%|███▊      | 19227/50000 [28:44<45:13, 11.34it/s]

Epochs: 19225 | epoch avg. loss: 0.302 | test avg. loss: 5.665
Epochs: 19226 | epoch avg. loss: 1.380 | test avg. loss: 5.164
Epochs: 19227 | epoch avg. loss: 0.628 | test avg. loss: 4.628


 38%|███▊      | 19231/50000 [28:44<42:09, 12.17it/s]

Epochs: 19228 | epoch avg. loss: 0.384 | test avg. loss: 5.483
Epochs: 19229 | epoch avg. loss: 1.425 | test avg. loss: 4.760
Epochs: 19230 | epoch avg. loss: 0.643 | test avg. loss: 4.740


 38%|███▊      | 19233/50000 [28:44<41:37, 12.32it/s]

Epochs: 19231 | epoch avg. loss: 0.617 | test avg. loss: 7.456
Epochs: 19232 | epoch avg. loss: 2.317 | test avg. loss: 6.862
Epochs: 19233 | epoch avg. loss: 1.667 | test avg. loss: 5.186


 38%|███▊      | 19237/50000 [28:45<38:26, 13.34it/s]

Epochs: 19234 | epoch avg. loss: 0.561 | test avg. loss: 4.796
Epochs: 19235 | epoch avg. loss: 0.766 | test avg. loss: 5.177
Epochs: 19236 | epoch avg. loss: 0.723 | test avg. loss: 4.711
Epochs: 19237 | epoch avg. loss: 0.316 | test avg. loss: 5.189


 38%|███▊      | 19241/50000 [28:45<36:02, 14.22it/s]

Epochs: 19238 | epoch avg. loss: 0.604 | test avg. loss: 5.706
Epochs: 19239 | epoch avg. loss: 0.672 | test avg. loss: 4.943
Epochs: 19240 | epoch avg. loss: 0.458 | test avg. loss: 4.626
Epochs: 19241 | epoch avg. loss: 0.634 | test avg. loss: 4.944


 38%|███▊      | 19245/50000 [28:45<34:13, 14.98it/s]

Epochs: 19242 | epoch avg. loss: 0.517 | test avg. loss: 4.102
Epochs: 19243 | epoch avg. loss: 0.507 | test avg. loss: 4.496
Epochs: 19244 | epoch avg. loss: 1.103 | test avg. loss: 4.492
Epochs: 19245 | epoch avg. loss: 0.420 | test avg. loss: 4.966


 38%|███▊      | 19249/50000 [28:45<35:11, 14.57it/s]

Epochs: 19246 | epoch avg. loss: 0.759 | test avg. loss: 7.178
Epochs: 19247 | epoch avg. loss: 1.674 | test avg. loss: 5.770
Epochs: 19248 | epoch avg. loss: 2.098 | test avg. loss: 5.009


 39%|███▊      | 19251/50000 [28:46<35:55, 14.27it/s]

Epochs: 19249 | epoch avg. loss: 1.586 | test avg. loss: 4.465
Epochs: 19250 | epoch avg. loss: 0.745 | test avg. loss: 4.327
Epochs: 19251 | epoch avg. loss: 0.676 | test avg. loss: 5.184


 39%|███▊      | 19255/50000 [28:46<39:07, 13.10it/s]

Epochs: 19252 | epoch avg. loss: 0.682 | test avg. loss: 4.665
Epochs: 19253 | epoch avg. loss: 0.660 | test avg. loss: 4.973
Epochs: 19254 | epoch avg. loss: 0.351 | test avg. loss: 4.513


 39%|███▊      | 19257/50000 [28:46<40:33, 12.63it/s]

Epochs: 19255 | epoch avg. loss: 0.458 | test avg. loss: 4.302
Epochs: 19256 | epoch avg. loss: 0.390 | test avg. loss: 5.028
Epochs: 19257 | epoch avg. loss: 0.785 | test avg. loss: 4.319


 39%|███▊      | 19261/50000 [28:46<42:44, 11.98it/s]

Epochs: 19258 | epoch avg. loss: 0.736 | test avg. loss: 4.519
Epochs: 19259 | epoch avg. loss: 0.651 | test avg. loss: 4.150
Epochs: 19260 | epoch avg. loss: 0.367 | test avg. loss: 4.504


 39%|███▊      | 19263/50000 [28:47<41:23, 12.38it/s]

Epochs: 19261 | epoch avg. loss: 0.459 | test avg. loss: 4.282
Epochs: 19262 | epoch avg. loss: 0.331 | test avg. loss: 4.306
Epochs: 19263 | epoch avg. loss: 0.436 | test avg. loss: 5.597


 39%|███▊      | 19267/50000 [28:47<37:40, 13.60it/s]

Epochs: 19264 | epoch avg. loss: 0.786 | test avg. loss: 5.264
Epochs: 19265 | epoch avg. loss: 1.197 | test avg. loss: 5.926
Epochs: 19266 | epoch avg. loss: 1.324 | test avg. loss: 4.951
Epochs: 19267 | epoch avg. loss: 1.441 | test avg. loss: 5.432


 39%|███▊      | 19271/50000 [28:47<35:00, 14.63it/s]

Epochs: 19268 | epoch avg. loss: 1.228 | test avg. loss: 4.992
Epochs: 19269 | epoch avg. loss: 1.463 | test avg. loss: 5.294
Epochs: 19270 | epoch avg. loss: 1.961 | test avg. loss: 6.769
Epochs: 19271 | epoch avg. loss: 2.266 | test avg. loss: 7.219


 39%|███▊      | 19275/50000 [28:47<34:27, 14.86it/s]

Epochs: 19272 | epoch avg. loss: 3.142 | test avg. loss: 8.630
Epochs: 19273 | epoch avg. loss: 2.964 | test avg. loss: 6.763
Epochs: 19274 | epoch avg. loss: 2.661 | test avg. loss: 7.341


 39%|███▊      | 19277/50000 [28:48<34:54, 14.67it/s]

Epochs: 19275 | epoch avg. loss: 2.056 | test avg. loss: 6.572
Epochs: 19276 | epoch avg. loss: 2.105 | test avg. loss: 7.524
Epochs: 19277 | epoch avg. loss: 1.992 | test avg. loss: 6.406


 39%|███▊      | 19281/50000 [28:48<34:40, 14.76it/s]

Epochs: 19278 | epoch avg. loss: 2.850 | test avg. loss: 7.437
Epochs: 19279 | epoch avg. loss: 1.968 | test avg. loss: 5.232
Epochs: 19280 | epoch avg. loss: 2.142 | test avg. loss: 5.250
Epochs: 19281 | epoch avg. loss: 1.133 | test avg. loss: 3.767


 39%|███▊      | 19285/50000 [28:48<33:39, 15.21it/s]

Epochs: 19282 | epoch avg. loss: 0.690 | test avg. loss: 4.368
Epochs: 19283 | epoch avg. loss: 0.715 | test avg. loss: 3.950
Epochs: 19284 | epoch avg. loss: 0.647 | test avg. loss: 5.114
Epochs: 19285 | epoch avg. loss: 0.815 | test avg. loss: 3.550


 39%|███▊      | 19289/50000 [28:48<32:43, 15.64it/s]

Epochs: 19286 | epoch avg. loss: 0.456 | test avg. loss: 3.897
Epochs: 19287 | epoch avg. loss: 0.330 | test avg. loss: 3.426
Epochs: 19288 | epoch avg. loss: 0.495 | test avg. loss: 4.172
Epochs: 19289 | epoch avg. loss: 0.407 | test avg. loss: 3.856


 39%|███▊      | 19293/50000 [28:49<33:18, 15.36it/s]

Epochs: 19290 | epoch avg. loss: 0.580 | test avg. loss: 5.161
Epochs: 19291 | epoch avg. loss: 0.883 | test avg. loss: 4.193
Epochs: 19292 | epoch avg. loss: 0.730 | test avg. loss: 5.022
Epochs: 19293 | epoch avg. loss: 0.739 | test avg. loss: 4.093


 39%|███▊      | 19297/50000 [28:49<34:13, 14.95it/s]

Epochs: 19294 | epoch avg. loss: 0.611 | test avg. loss: 4.707
Epochs: 19295 | epoch avg. loss: 0.440 | test avg. loss: 3.758
Epochs: 19296 | epoch avg. loss: 0.285 | test avg. loss: 4.178


 39%|███▊      | 19299/50000 [28:49<34:33, 14.81it/s]

Epochs: 19297 | epoch avg. loss: 0.317 | test avg. loss: 3.782
Epochs: 19298 | epoch avg. loss: 0.382 | test avg. loss: 4.472
Epochs: 19299 | epoch avg. loss: 0.371 | test avg. loss: 4.086


 39%|███▊      | 19303/50000 [28:51<1:44:03,  4.92it/s]

Epochs: 19300 | epoch avg. loss: 0.370 | test avg. loss: 4.703
Epochs: 19301 | epoch avg. loss: 0.292 | test avg. loss: 4.118
Epochs: 19302 | epoch avg. loss: 0.187 | test avg. loss: 4.469


 39%|███▊      | 19307/50000 [28:51<1:07:22,  7.59it/s]

Epochs: 19303 | epoch avg. loss: 0.197 | test avg. loss: 3.990
Epochs: 19304 | epoch avg. loss: 0.286 | test avg. loss: 4.225
Epochs: 19305 | epoch avg. loss: 0.197 | test avg. loss: 3.845
Epochs: 19306 | epoch avg. loss: 0.168 | test avg. loss: 4.284


 39%|███▊      | 19311/50000 [28:51<50:28, 10.13it/s]

Epochs: 19307 | epoch avg. loss: 0.230 | test avg. loss: 3.970
Epochs: 19308 | epoch avg. loss: 0.196 | test avg. loss: 4.518
Epochs: 19309 | epoch avg. loss: 0.300 | test avg. loss: 4.188
Epochs: 19310 | epoch avg. loss: 0.406 | test avg. loss: 4.358


 39%|███▊      | 19315/50000 [28:51<41:07, 12.43it/s]

Epochs: 19311 | epoch avg. loss: 0.205 | test avg. loss: 3.926
Epochs: 19312 | epoch avg. loss: 0.226 | test avg. loss: 4.168
Epochs: 19313 | epoch avg. loss: 0.133 | test avg. loss: 3.900
Epochs: 19314 | epoch avg. loss: 0.121 | test avg. loss: 4.206


 39%|███▊      | 19319/50000 [28:52<37:41, 13.57it/s]

Epochs: 19315 | epoch avg. loss: 0.108 | test avg. loss: 3.996
Epochs: 19316 | epoch avg. loss: 0.087 | test avg. loss: 4.321
Epochs: 19317 | epoch avg. loss: 0.117 | test avg. loss: 4.037
Epochs: 19318 | epoch avg. loss: 0.080 | test avg. loss: 4.208


 39%|███▊      | 19321/50000 [28:52<37:15, 13.72it/s]

Epochs: 19319 | epoch avg. loss: 0.065 | test avg. loss: 3.978
Epochs: 19320 | epoch avg. loss: 0.055 | test avg. loss: 4.101
Epochs: 19321 | epoch avg. loss: 0.032 | test avg. loss: 3.970


 39%|███▊      | 19325/50000 [28:52<36:43, 13.92it/s]

Epochs: 19322 | epoch avg. loss: 0.040 | test avg. loss: 4.101
Epochs: 19323 | epoch avg. loss: 0.029 | test avg. loss: 4.011
Epochs: 19324 | epoch avg. loss: 0.029 | test avg. loss: 4.127


 39%|███▊      | 19329/50000 [28:52<33:56, 15.06it/s]

Epochs: 19325 | epoch avg. loss: 0.022 | test avg. loss: 4.040
Epochs: 19326 | epoch avg. loss: 0.033 | test avg. loss: 4.184
Epochs: 19327 | epoch avg. loss: 0.035 | test avg. loss: 4.056
Epochs: 19328 | epoch avg. loss: 0.043 | test avg. loss: 4.192


 39%|███▊      | 19333/50000 [28:53<33:20, 15.33it/s]

Epochs: 19329 | epoch avg. loss: 0.043 | test avg. loss: 4.031
Epochs: 19330 | epoch avg. loss: 0.035 | test avg. loss: 4.115
Epochs: 19331 | epoch avg. loss: 0.019 | test avg. loss: 4.074
Epochs: 19332 | epoch avg. loss: 0.012 | test avg. loss: 4.142


 39%|███▊      | 19337/50000 [28:53<32:20, 15.80it/s]

Epochs: 19333 | epoch avg. loss: 0.010 | test avg. loss: 4.111
Epochs: 19334 | epoch avg. loss: 0.015 | test avg. loss: 4.150
Epochs: 19335 | epoch avg. loss: 0.009 | test avg. loss: 4.117
Epochs: 19336 | epoch avg. loss: 0.009 | test avg. loss: 4.142


                                                     

Epochs: 19337 | epoch avg. loss: 0.011 | test avg. loss: 4.082
Epochs: 19338 | epoch avg. loss: 0.021 | test avg. loss: 4.145
Epochs: 19339 | epoch avg. loss: 0.012 | test avg. loss: 4.090


 39%|███▊      | 19343/50000 [28:53<33:05, 15.44it/s]

Epochs: 19340 | epoch avg. loss: 0.019 | test avg. loss: 4.170
Epochs: 19341 | epoch avg. loss: 0.011 | test avg. loss: 4.140
Epochs: 19342 | epoch avg. loss: 0.013 | test avg. loss: 4.178
Epochs: 19343 | epoch avg. loss: 0.010 | test avg. loss: 4.148


                                                     

Epochs: 19344 | epoch avg. loss: 0.010 | test avg. loss: 4.159
Epochs: 19345 | epoch avg. loss: 0.008 | test avg. loss: 4.171
Epochs: 19346 | epoch avg. loss: 0.008 | test avg. loss: 4.161


 39%|███▊      | 19351/50000 [28:54<32:30, 15.71it/s]

Epochs: 19347 | epoch avg. loss: 0.010 | test avg. loss: 4.206
Epochs: 19348 | epoch avg. loss: 0.013 | test avg. loss: 4.146
Epochs: 19349 | epoch avg. loss: 0.014 | test avg. loss: 4.216
Epochs: 19350 | epoch avg. loss: 0.017 | test avg. loss: 4.128


 39%|███▊      | 19355/50000 [28:54<31:37, 16.15it/s]

Epochs: 19351 | epoch avg. loss: 0.025 | test avg. loss: 4.246
Epochs: 19352 | epoch avg. loss: 0.024 | test avg. loss: 4.172
Epochs: 19353 | epoch avg. loss: 0.015 | test avg. loss: 4.224
Epochs: 19354 | epoch avg. loss: 0.013 | test avg. loss: 4.202


 39%|███▊      | 19357/50000 [28:54<31:51, 16.03it/s]

Epochs: 19355 | epoch avg. loss: 0.010 | test avg. loss: 4.183
Epochs: 19356 | epoch avg. loss: 0.010 | test avg. loss: 4.213
Epochs: 19357 | epoch avg. loss: 0.010 | test avg. loss: 4.190


 39%|███▊      | 19361/50000 [28:54<32:38, 15.64it/s]

Epochs: 19358 | epoch avg. loss: 0.008 | test avg. loss: 4.197
Epochs: 19359 | epoch avg. loss: 0.009 | test avg. loss: 4.206
Epochs: 19360 | epoch avg. loss: 0.008 | test avg. loss: 4.220
Epochs: 19361 | epoch avg. loss: 0.008 | test avg. loss: 4.230


 39%|███▊      | 19365/50000 [28:55<34:37, 14.75it/s]

Epochs: 19362 | epoch avg. loss: 0.009 | test avg. loss: 4.272
Epochs: 19363 | epoch avg. loss: 0.010 | test avg. loss: 4.225
Epochs: 19364 | epoch avg. loss: 0.013 | test avg. loss: 4.294
Epochs: 19365 | epoch avg. loss: 0.016 | test avg. loss: 4.213


 39%|███▊      | 19369/50000 [28:55<32:54, 15.52it/s]

Epochs: 19366 | epoch avg. loss: 0.013 | test avg. loss: 4.273
Epochs: 19367 | epoch avg. loss: 0.012 | test avg. loss: 4.246
Epochs: 19368 | epoch avg. loss: 0.008 | test avg. loss: 4.279
Epochs: 19369 | epoch avg. loss: 0.009 | test avg. loss: 4.277


 39%|███▊      | 19373/50000 [28:55<32:32, 15.69it/s]

Epochs: 19370 | epoch avg. loss: 0.008 | test avg. loss: 4.270
Epochs: 19371 | epoch avg. loss: 0.008 | test avg. loss: 4.263
Epochs: 19372 | epoch avg. loss: 0.008 | test avg. loss: 4.245


 39%|███▉      | 19377/50000 [28:55<34:08, 14.95it/s]

Epochs: 19373 | epoch avg. loss: 0.008 | test avg. loss: 4.270
Epochs: 19374 | epoch avg. loss: 0.010 | test avg. loss: 4.237
Epochs: 19375 | epoch avg. loss: 0.012 | test avg. loss: 4.312
Epochs: 19376 | epoch avg. loss: 0.012 | test avg. loss: 4.270


 39%|███▉      | 19381/50000 [28:56<32:40, 15.62it/s]

Epochs: 19377 | epoch avg. loss: 0.011 | test avg. loss: 4.320
Epochs: 19378 | epoch avg. loss: 0.009 | test avg. loss: 4.258
Epochs: 19379 | epoch avg. loss: 0.017 | test avg. loss: 4.306
Epochs: 19380 | epoch avg. loss: 0.011 | test avg. loss: 4.278


 39%|███▉      | 19385/50000 [28:56<31:53, 16.00it/s]

Epochs: 19381 | epoch avg. loss: 0.007 | test avg. loss: 4.300
Epochs: 19382 | epoch avg. loss: 0.007 | test avg. loss: 4.276
Epochs: 19383 | epoch avg. loss: 0.013 | test avg. loss: 4.285
Epochs: 19384 | epoch avg. loss: 0.007 | test avg. loss: 4.289


 39%|███▉      | 19387/50000 [28:56<32:15, 15.81it/s]

Epochs: 19385 | epoch avg. loss: 0.008 | test avg. loss: 4.331
Epochs: 19386 | epoch avg. loss: 0.009 | test avg. loss: 4.303
Epochs: 19387 | epoch avg. loss: 0.008 | test avg. loss: 4.328
Epochs: 19388 | epoch avg. loss: 0.008 | test avg. loss: 4.275


 39%|███▉      | 19391/50000 [28:56<33:59, 15.01it/s]

Epochs: 19389 | epoch avg. loss: 0.011 | test avg. loss: 4.307
Epochs: 19390 | epoch avg. loss: 0.008 | test avg. loss: 4.272
Epochs: 19391 | epoch avg. loss: 0.012 | test avg. loss: 4.302


 39%|███▉      | 19395/50000 [28:57<35:25, 14.40it/s]

Epochs: 19392 | epoch avg. loss: 0.008 | test avg. loss: 4.357
Epochs: 19393 | epoch avg. loss: 0.008 | test avg. loss: 4.354
Epochs: 19394 | epoch avg. loss: 0.008 | test avg. loss: 4.340




Epochs: 19395 | epoch avg. loss: 0.008 | test avg. loss: 4.377
Epochs: 19396 | epoch avg. loss: 0.013 | test avg. loss: 4.313
Epochs: 19397 | epoch avg. loss: 0.010 | test avg. loss: 4.308


 39%|███▉      | 19399/50000 [28:57<35:13, 14.48it/s]

Epochs: 19398 | epoch avg. loss: 0.007 | test avg. loss: 4.325
Epochs: 19399 | epoch avg. loss: 0.007 | test avg. loss: 4.354


 39%|███▉      | 19403/50000 [28:59<1:51:51,  4.56it/s]

Epochs: 19400 | epoch avg. loss: 0.010 | test avg. loss: 4.345
Epochs: 19401 | epoch avg. loss: 0.007 | test avg. loss: 4.352
Epochs: 19402 | epoch avg. loss: 0.007 | test avg. loss: 4.350
Epochs: 19403 | epoch avg. loss: 0.009 | test avg. loss: 4.319


 39%|███▉      | 19407/50000 [28:59<1:10:38,  7.22it/s]

Epochs: 19404 | epoch avg. loss: 0.011 | test avg. loss: 4.378
Epochs: 19405 | epoch avg. loss: 0.012 | test avg. loss: 4.323
Epochs: 19406 | epoch avg. loss: 0.012 | test avg. loss: 4.379
Epochs: 19407 | epoch avg. loss: 0.008 | test avg. loss: 4.331


 39%|███▉      | 19411/50000 [28:59<50:24, 10.11it/s]

Epochs: 19408 | epoch avg. loss: 0.011 | test avg. loss: 4.357
Epochs: 19409 | epoch avg. loss: 0.007 | test avg. loss: 4.330
Epochs: 19410 | epoch avg. loss: 0.008 | test avg. loss: 4.375
Epochs: 19411 | epoch avg. loss: 0.012 | test avg. loss: 4.335


 39%|███▉      | 19415/50000 [28:59<43:18, 11.77it/s]

Epochs: 19412 | epoch avg. loss: 0.007 | test avg. loss: 4.420
Epochs: 19413 | epoch avg. loss: 0.014 | test avg. loss: 4.326
Epochs: 19414 | epoch avg. loss: 0.033 | test avg. loss: 4.423


 39%|███▉      | 19417/50000 [29:00<44:41, 11.41it/s]

Epochs: 19415 | epoch avg. loss: 0.025 | test avg. loss: 4.335
Epochs: 19416 | epoch avg. loss: 0.009 | test avg. loss: 4.328
Epochs: 19417 | epoch avg. loss: 0.008 | test avg. loss: 4.315


 39%|███▉      | 19421/50000 [29:00<43:18, 11.77it/s]

Epochs: 19418 | epoch avg. loss: 0.010 | test avg. loss: 4.336
Epochs: 19419 | epoch avg. loss: 0.010 | test avg. loss: 4.404
Epochs: 19420 | epoch avg. loss: 0.012 | test avg. loss: 4.364


 39%|███▉      | 19423/50000 [29:00<41:44, 12.21it/s]

Epochs: 19421 | epoch avg. loss: 0.012 | test avg. loss: 4.427
Epochs: 19422 | epoch avg. loss: 0.014 | test avg. loss: 4.333
Epochs: 19423 | epoch avg. loss: 0.016 | test avg. loss: 4.417


 39%|███▉      | 19427/50000 [29:00<38:36, 13.20it/s]

Epochs: 19424 | epoch avg. loss: 0.024 | test avg. loss: 4.323
Epochs: 19425 | epoch avg. loss: 0.018 | test avg. loss: 4.383
Epochs: 19426 | epoch avg. loss: 0.006 | test avg. loss: 4.351
Epochs: 19427 | epoch avg. loss: 0.015 | test avg. loss: 4.457


 39%|███▉      | 19431/50000 [29:01<37:55, 13.43it/s]

Epochs: 19428 | epoch avg. loss: 0.028 | test avg. loss: 4.317
Epochs: 19429 | epoch avg. loss: 0.026 | test avg. loss: 4.366
Epochs: 19430 | epoch avg. loss: 0.038 | test avg. loss: 4.278


 39%|███▉      | 19435/50000 [29:01<35:46, 14.24it/s]

Epochs: 19431 | epoch avg. loss: 0.025 | test avg. loss: 4.328
Epochs: 19432 | epoch avg. loss: 0.018 | test avg. loss: 4.394
Epochs: 19433 | epoch avg. loss: 0.015 | test avg. loss: 4.390
Epochs: 19434 | epoch avg. loss: 0.010 | test avg. loss: 4.426


 39%|███▉      | 19439/50000 [29:01<34:15, 14.87it/s]

Epochs: 19435 | epoch avg. loss: 0.008 | test avg. loss: 4.421
Epochs: 19436 | epoch avg. loss: 0.008 | test avg. loss: 4.426
Epochs: 19437 | epoch avg. loss: 0.009 | test avg. loss: 4.377
Epochs: 19438 | epoch avg. loss: 0.011 | test avg. loss: 4.423


 39%|███▉      | 19443/50000 [29:01<33:29, 15.21it/s]

Epochs: 19439 | epoch avg. loss: 0.010 | test avg. loss: 4.366
Epochs: 19440 | epoch avg. loss: 0.011 | test avg. loss: 4.403
Epochs: 19441 | epoch avg. loss: 0.008 | test avg. loss: 4.394
Epochs: 19442 | epoch avg. loss: 0.007 | test avg. loss: 4.408


 39%|███▉      | 19447/50000 [29:02<32:53, 15.48it/s]

Epochs: 19443 | epoch avg. loss: 0.007 | test avg. loss: 4.429
Epochs: 19444 | epoch avg. loss: 0.007 | test avg. loss: 4.411
Epochs: 19445 | epoch avg. loss: 0.007 | test avg. loss: 4.407
Epochs: 19446 | epoch avg. loss: 0.006 | test avg. loss: 4.384




Epochs: 19447 | epoch avg. loss: 0.007 | test avg. loss: 4.396
Epochs: 19448 | epoch avg. loss: 0.008 | test avg. loss: 4.360
Epochs: 19449 | epoch avg. loss: 0.011 | test avg. loss: 4.413




Epochs: 19450 | epoch avg. loss: 0.007 | test avg. loss: 4.390
Epochs: 19451 | epoch avg. loss: 0.011 | test avg. loss: 4.437
Epochs: 19452 | epoch avg. loss: 0.008 | test avg. loss: 4.382


 39%|███▉      | 19457/50000 [29:02<33:04, 15.39it/s]

Epochs: 19453 | epoch avg. loss: 0.012 | test avg. loss: 4.443
Epochs: 19454 | epoch avg. loss: 0.015 | test avg. loss: 4.370
Epochs: 19455 | epoch avg. loss: 0.016 | test avg. loss: 4.379
Epochs: 19456 | epoch avg. loss: 0.009 | test avg. loss: 4.408


 39%|███▉      | 19459/50000 [29:02<33:22, 15.25it/s]

Epochs: 19457 | epoch avg. loss: 0.008 | test avg. loss: 4.390
Epochs: 19458 | epoch avg. loss: 0.008 | test avg. loss: 4.417
Epochs: 19459 | epoch avg. loss: 0.008 | test avg. loss: 4.380


 39%|███▉      | 19463/50000 [29:03<34:18, 14.83it/s]

Epochs: 19460 | epoch avg. loss: 0.007 | test avg. loss: 4.370
Epochs: 19461 | epoch avg. loss: 0.007 | test avg. loss: 4.372
Epochs: 19462 | epoch avg. loss: 0.007 | test avg. loss: 4.383


 39%|███▉      | 19467/50000 [29:03<34:52, 14.59it/s]

Epochs: 19463 | epoch avg. loss: 0.006 | test avg. loss: 4.392
Epochs: 19464 | epoch avg. loss: 0.006 | test avg. loss: 4.400
Epochs: 19465 | epoch avg. loss: 0.006 | test avg. loss: 4.396
Epochs: 19466 | epoch avg. loss: 0.006 | test avg. loss: 4.389


 39%|███▉      | 19469/50000 [29:03<34:48, 14.62it/s]

Epochs: 19467 | epoch avg. loss: 0.007 | test avg. loss: 4.406
Epochs: 19468 | epoch avg. loss: 0.008 | test avg. loss: 4.378
Epochs: 19469 | epoch avg. loss: 0.007 | test avg. loss: 4.370


 39%|███▉      | 19473/50000 [29:03<37:56, 13.41it/s]

Epochs: 19470 | epoch avg. loss: 0.007 | test avg. loss: 4.405
Epochs: 19471 | epoch avg. loss: 0.008 | test avg. loss: 4.389
Epochs: 19472 | epoch avg. loss: 0.007 | test avg. loss: 4.420




Epochs: 19473 | epoch avg. loss: 0.007 | test avg. loss: 4.374
Epochs: 19474 | epoch avg. loss: 0.014 | test avg. loss: 4.391
Epochs: 19475 | epoch avg. loss: 0.007 | test avg. loss: 4.404


 39%|███▉      | 19477/50000 [29:04<37:09, 13.69it/s]

Epochs: 19476 | epoch avg. loss: 0.007 | test avg. loss: 4.382
Epochs: 19477 | epoch avg. loss: 0.007 | test avg. loss: 4.412
Epochs: 19478 | epoch avg. loss: 0.009 | test avg. loss: 4.357


 39%|███▉      | 19481/50000 [29:04<44:55, 11.32it/s]

Epochs: 19479 | epoch avg. loss: 0.012 | test avg. loss: 4.374
Epochs: 19480 | epoch avg. loss: 0.008 | test avg. loss: 4.399
Epochs: 19481 | epoch avg. loss: 0.007 | test avg. loss: 4.409


 39%|███▉      | 19485/50000 [29:05<46:41, 10.89it/s]

Epochs: 19482 | epoch avg. loss: 0.006 | test avg. loss: 4.413
Epochs: 19483 | epoch avg. loss: 0.006 | test avg. loss: 4.421
Epochs: 19484 | epoch avg. loss: 0.007 | test avg. loss: 4.371


 39%|███▉      | 19487/50000 [29:05<46:29, 10.94it/s]

Epochs: 19485 | epoch avg. loss: 0.009 | test avg. loss: 4.398
Epochs: 19486 | epoch avg. loss: 0.008 | test avg. loss: 4.344
Epochs: 19487 | epoch avg. loss: 0.012 | test avg. loss: 4.372


 39%|███▉      | 19491/50000 [29:05<42:59, 11.83it/s]

Epochs: 19488 | epoch avg. loss: 0.006 | test avg. loss: 4.351
Epochs: 19489 | epoch avg. loss: 0.014 | test avg. loss: 4.430
Epochs: 19490 | epoch avg. loss: 0.008 | test avg. loss: 4.432


 39%|███▉      | 19495/50000 [29:05<37:57, 13.39it/s]

Epochs: 19491 | epoch avg. loss: 0.008 | test avg. loss: 4.407
Epochs: 19492 | epoch avg. loss: 0.008 | test avg. loss: 4.411
Epochs: 19493 | epoch avg. loss: 0.008 | test avg. loss: 4.344
Epochs: 19494 | epoch avg. loss: 0.008 | test avg. loss: 4.328


 39%|███▉      | 19497/50000 [29:06<38:55, 13.06it/s]

Epochs: 19495 | epoch avg. loss: 0.007 | test avg. loss: 4.347
Epochs: 19496 | epoch avg. loss: 0.007 | test avg. loss: 4.373
Epochs: 19497 | epoch avg. loss: 0.007 | test avg. loss: 4.413


 39%|███▉      | 19499/50000 [29:06<41:22, 12.28it/s]

Epochs: 19498 | epoch avg. loss: 0.007 | test avg. loss: 4.395
Epochs: 19499 | epoch avg. loss: 0.009 | test avg. loss: 4.443


 39%|███▉      | 19503/50000 [29:07<1:59:40,  4.25it/s]

Epochs: 19500 | epoch avg. loss: 0.011 | test avg. loss: 4.351
Epochs: 19501 | epoch avg. loss: 0.015 | test avg. loss: 4.389
Epochs: 19502 | epoch avg. loss: 0.014 | test avg. loss: 4.356


 39%|███▉      | 19507/50000 [29:08<1:16:24,  6.65it/s]

Epochs: 19503 | epoch avg. loss: 0.008 | test avg. loss: 4.375
Epochs: 19504 | epoch avg. loss: 0.007 | test avg. loss: 4.398
Epochs: 19505 | epoch avg. loss: 0.008 | test avg. loss: 4.393
Epochs: 19506 | epoch avg. loss: 0.007 | test avg. loss: 4.402


 39%|███▉      | 19511/50000 [29:08<54:01,  9.41it/s]  

Epochs: 19507 | epoch avg. loss: 0.007 | test avg. loss: 4.386
Epochs: 19508 | epoch avg. loss: 0.006 | test avg. loss: 4.354
Epochs: 19509 | epoch avg. loss: 0.007 | test avg. loss: 4.393
Epochs: 19510 | epoch avg. loss: 0.008 | test avg. loss: 4.353


 39%|███▉      | 19513/50000 [29:08<48:25, 10.49it/s]

Epochs: 19511 | epoch avg. loss: 0.010 | test avg. loss: 4.409
Epochs: 19512 | epoch avg. loss: 0.016 | test avg. loss: 4.419
Epochs: 19513 | epoch avg. loss: 0.010 | test avg. loss: 4.412


 39%|███▉      | 19517/50000 [29:08<42:50, 11.86it/s]

Epochs: 19514 | epoch avg. loss: 0.010 | test avg. loss: 4.452
Epochs: 19515 | epoch avg. loss: 0.010 | test avg. loss: 4.372
Epochs: 19516 | epoch avg. loss: 0.014 | test avg. loss: 4.392


 39%|███▉      | 19519/50000 [29:09<40:56, 12.41it/s]

Epochs: 19517 | epoch avg. loss: 0.007 | test avg. loss: 4.357
Epochs: 19518 | epoch avg. loss: 0.010 | test avg. loss: 4.386
Epochs: 19519 | epoch avg. loss: 0.006 | test avg. loss: 4.374
Epochs: 19520 | epoch avg. loss: 0.011 | test avg. loss: 4.418


 39%|███▉      | 19525/50000 [29:09<35:04, 14.48it/s]

Epochs: 19521 | epoch avg. loss: 0.007 | test avg. loss: 4.394
Epochs: 19522 | epoch avg. loss: 0.010 | test avg. loss: 4.427
Epochs: 19523 | epoch avg. loss: 0.013 | test avg. loss: 4.360
Epochs: 19524 | epoch avg. loss: 0.008 | test avg. loss: 4.374


 39%|███▉      | 19527/50000 [29:09<34:32, 14.70it/s]

Epochs: 19525 | epoch avg. loss: 0.009 | test avg. loss: 4.401
Epochs: 19526 | epoch avg. loss: 0.008 | test avg. loss: 4.383
Epochs: 19527 | epoch avg. loss: 0.009 | test avg. loss: 4.434
Epochs: 19528 | epoch avg. loss: 0.013 | test avg. loss: 4.398




Epochs: 19529 | epoch avg. loss: 0.007 | test avg. loss: 4.377
Epochs: 19530 | epoch avg. loss: 0.006 | test avg. loss: 4.372
Epochs: 19531 | epoch avg. loss: 0.007 | test avg. loss: 4.368


 39%|███▉      | 19535/50000 [29:10<33:40, 15.08it/s]

Epochs: 19532 | epoch avg. loss: 0.007 | test avg. loss: 4.402
Epochs: 19533 | epoch avg. loss: 0.008 | test avg. loss: 4.382
Epochs: 19534 | epoch avg. loss: 0.007 | test avg. loss: 4.437
Epochs: 19535 | epoch avg. loss: 0.012 | test avg. loss: 4.358




Epochs: 19536 | epoch avg. loss: 0.007 | test avg. loss: 4.426
Epochs: 19537 | epoch avg. loss: 0.026 | test avg. loss: 4.338
Epochs: 19538 | epoch avg. loss: 0.012 | test avg. loss: 4.369


 39%|███▉      | 19543/50000 [29:10<33:33, 15.12it/s]

Epochs: 19539 | epoch avg. loss: 0.010 | test avg. loss: 4.359
Epochs: 19540 | epoch avg. loss: 0.008 | test avg. loss: 4.411
Epochs: 19541 | epoch avg. loss: 0.016 | test avg. loss: 4.403
Epochs: 19542 | epoch avg. loss: 0.015 | test avg. loss: 4.327


 39%|███▉      | 19545/50000 [29:10<33:32, 15.14it/s]

Epochs: 19543 | epoch avg. loss: 0.036 | test avg. loss: 4.436
Epochs: 19544 | epoch avg. loss: 0.017 | test avg. loss: 4.368
Epochs: 19545 | epoch avg. loss: 0.024 | test avg. loss: 4.416


 39%|███▉      | 19549/50000 [29:11<34:54, 14.54it/s]

Epochs: 19546 | epoch avg. loss: 0.023 | test avg. loss: 4.388
Epochs: 19547 | epoch avg. loss: 0.013 | test avg. loss: 4.346
Epochs: 19548 | epoch avg. loss: 0.010 | test avg. loss: 4.399


 39%|███▉      | 19553/50000 [29:11<33:16, 15.25it/s]

Epochs: 19549 | epoch avg. loss: 0.010 | test avg. loss: 4.366
Epochs: 19550 | epoch avg. loss: 0.014 | test avg. loss: 4.429
Epochs: 19551 | epoch avg. loss: 0.009 | test avg. loss: 4.392
Epochs: 19552 | epoch avg. loss: 0.011 | test avg. loss: 4.364


 39%|███▉      | 19557/50000 [29:11<32:00, 15.85it/s]

Epochs: 19553 | epoch avg. loss: 0.009 | test avg. loss: 4.381
Epochs: 19554 | epoch avg. loss: 0.007 | test avg. loss: 4.363
Epochs: 19555 | epoch avg. loss: 0.006 | test avg. loss: 4.376
Epochs: 19556 | epoch avg. loss: 0.007 | test avg. loss: 4.358


 39%|███▉      | 19561/50000 [29:11<32:33, 15.58it/s]

Epochs: 19557 | epoch avg. loss: 0.012 | test avg. loss: 4.437
Epochs: 19558 | epoch avg. loss: 0.014 | test avg. loss: 4.414
Epochs: 19559 | epoch avg. loss: 0.010 | test avg. loss: 4.380
Epochs: 19560 | epoch avg. loss: 0.011 | test avg. loss: 4.430


 39%|███▉      | 19563/50000 [29:12<35:00, 14.49it/s]

Epochs: 19561 | epoch avg. loss: 0.014 | test avg. loss: 4.352
Epochs: 19562 | epoch avg. loss: 0.009 | test avg. loss: 4.416
Epochs: 19563 | epoch avg. loss: 0.015 | test avg. loss: 4.400
Epochs: 19564 | epoch avg. loss: 0.013 | test avg. loss: 4.427


 39%|███▉      | 19569/50000 [29:12<33:40, 15.06it/s]

Epochs: 19565 | epoch avg. loss: 0.013 | test avg. loss: 4.527
Epochs: 19566 | epoch avg. loss: 0.027 | test avg. loss: 4.402
Epochs: 19567 | epoch avg. loss: 0.020 | test avg. loss: 4.410
Epochs: 19568 | epoch avg. loss: 0.012 | test avg. loss: 4.329


 39%|███▉      | 19573/50000 [29:12<32:41, 15.51it/s]

Epochs: 19569 | epoch avg. loss: 0.013 | test avg. loss: 4.352
Epochs: 19570 | epoch avg. loss: 0.014 | test avg. loss: 4.359
Epochs: 19571 | epoch avg. loss: 0.014 | test avg. loss: 4.406
Epochs: 19572 | epoch avg. loss: 0.012 | test avg. loss: 4.513


 39%|███▉      | 19577/50000 [29:12<32:27, 15.62it/s]

Epochs: 19573 | epoch avg. loss: 0.014 | test avg. loss: 4.454
Epochs: 19574 | epoch avg. loss: 0.015 | test avg. loss: 4.462
Epochs: 19575 | epoch avg. loss: 0.015 | test avg. loss: 4.388
Epochs: 19576 | epoch avg. loss: 0.010 | test avg. loss: 4.338




Epochs: 19577 | epoch avg. loss: 0.009 | test avg. loss: 4.379
Epochs: 19578 | epoch avg. loss: 0.006 | test avg. loss: 4.450
Epochs: 19579 | epoch avg. loss: 0.007 | test avg. loss: 4.474




Epochs: 19580 | epoch avg. loss: 0.011 | test avg. loss: 4.448
Epochs: 19581 | epoch avg. loss: 0.009 | test avg. loss: 4.442
Epochs: 19582 | epoch avg. loss: 0.009 | test avg. loss: 4.361


 39%|███▉      | 19587/50000 [29:13<32:48, 15.45it/s]

Epochs: 19583 | epoch avg. loss: 0.009 | test avg. loss: 4.451
Epochs: 19584 | epoch avg. loss: 0.040 | test avg. loss: 4.385
Epochs: 19585 | epoch avg. loss: 0.022 | test avg. loss: 4.394
Epochs: 19586 | epoch avg. loss: 0.025 | test avg. loss: 4.601


 39%|███▉      | 19591/50000 [29:13<32:06, 15.79it/s]

Epochs: 19587 | epoch avg. loss: 0.050 | test avg. loss: 4.409
Epochs: 19588 | epoch avg. loss: 0.078 | test avg. loss: 4.504
Epochs: 19589 | epoch avg. loss: 0.116 | test avg. loss: 4.373
Epochs: 19590 | epoch avg. loss: 0.032 | test avg. loss: 4.350


 39%|███▉      | 19593/50000 [29:13<32:08, 15.77it/s]

Epochs: 19591 | epoch avg. loss: 0.025 | test avg. loss: 4.482
Epochs: 19592 | epoch avg. loss: 0.028 | test avg. loss: 4.427
Epochs: 19593 | epoch avg. loss: 0.024 | test avg. loss: 4.463


 39%|███▉      | 19597/50000 [29:14<33:32, 15.11it/s]

Epochs: 19594 | epoch avg. loss: 0.011 | test avg. loss: 4.487
Epochs: 19595 | epoch avg. loss: 0.014 | test avg. loss: 4.374
Epochs: 19596 | epoch avg. loss: 0.027 | test avg. loss: 4.448
Epochs: 19597 | epoch avg. loss: 0.018 | test avg. loss: 4.369


 39%|███▉      | 19599/50000 [29:14<34:06, 14.86it/s]

Epochs: 19598 | epoch avg. loss: 0.034 | test avg. loss: 4.532
Epochs: 19599 | epoch avg. loss: 0.070 | test avg. loss: 4.465


 39%|███▉      | 19603/50000 [29:16<1:53:40,  4.46it/s]

Epochs: 19600 | epoch avg. loss: 0.031 | test avg. loss: 4.447
Epochs: 19601 | epoch avg. loss: 0.045 | test avg. loss: 4.603
Epochs: 19602 | epoch avg. loss: 0.036 | test avg. loss: 4.453


 39%|███▉      | 19605/50000 [29:16<1:31:31,  5.53it/s]

Epochs: 19603 | epoch avg. loss: 0.034 | test avg. loss: 4.438
Epochs: 19604 | epoch avg. loss: 0.014 | test avg. loss: 4.455
Epochs: 19605 | epoch avg. loss: 0.018 | test avg. loss: 4.363


                                                       

Epochs: 19606 | epoch avg. loss: 0.013 | test avg. loss: 4.431
Epochs: 19607 | epoch avg. loss: 0.033 | test avg. loss: 4.427
Epochs: 19608 | epoch avg. loss: 0.023 | test avg. loss: 4.461


 39%|███▉      | 19611/50000 [29:16<53:50,  9.41it/s]

Epochs: 19609 | epoch avg. loss: 0.028 | test avg. loss: 4.655
Epochs: 19610 | epoch avg. loss: 0.070 | test avg. loss: 4.392
Epochs: 19611 | epoch avg. loss: 0.131 | test avg. loss: 4.536


 39%|███▉      | 19615/50000 [29:16<47:10, 10.73it/s]

Epochs: 19612 | epoch avg. loss: 0.147 | test avg. loss: 4.225
Epochs: 19613 | epoch avg. loss: 0.095 | test avg. loss: 4.300
Epochs: 19614 | epoch avg. loss: 0.102 | test avg. loss: 4.746


 39%|███▉      | 19617/50000 [29:17<46:12, 10.96it/s]

Epochs: 19615 | epoch avg. loss: 0.112 | test avg. loss: 4.598
Epochs: 19616 | epoch avg. loss: 0.113 | test avg. loss: 4.727
Epochs: 19617 | epoch avg. loss: 0.078 | test avg. loss: 4.558


 39%|███▉      | 19621/50000 [29:17<45:14, 11.19it/s]

Epochs: 19618 | epoch avg. loss: 0.046 | test avg. loss: 4.381
Epochs: 19619 | epoch avg. loss: 0.116 | test avg. loss: 4.534
Epochs: 19620 | epoch avg. loss: 0.110 | test avg. loss: 4.309


 39%|███▉      | 19623/50000 [29:17<43:04, 11.76it/s]

Epochs: 19621 | epoch avg. loss: 0.058 | test avg. loss: 4.383
Epochs: 19622 | epoch avg. loss: 0.030 | test avg. loss: 4.461
Epochs: 19623 | epoch avg. loss: 0.029 | test avg. loss: 4.443


 39%|███▉      | 19627/50000 [29:17<42:47, 11.83it/s]

Epochs: 19624 | epoch avg. loss: 0.044 | test avg. loss: 4.510
Epochs: 19625 | epoch avg. loss: 0.032 | test avg. loss: 4.380
Epochs: 19626 | epoch avg. loss: 0.024 | test avg. loss: 4.365


 39%|███▉      | 19629/50000 [29:18<44:42, 11.32it/s]

Epochs: 19627 | epoch avg. loss: 0.017 | test avg. loss: 4.420
Epochs: 19628 | epoch avg. loss: 0.022 | test avg. loss: 4.320
Epochs: 19629 | epoch avg. loss: 0.041 | test avg. loss: 4.329


 39%|███▉      | 19633/50000 [29:18<43:32, 11.62it/s]

Epochs: 19630 | epoch avg. loss: 0.008 | test avg. loss: 4.319
Epochs: 19631 | epoch avg. loss: 0.007 | test avg. loss: 4.376
Epochs: 19632 | epoch avg. loss: 0.009 | test avg. loss: 4.352




Epochs: 19633 | epoch avg. loss: 0.023 | test avg. loss: 4.429
Epochs: 19634 | epoch avg. loss: 0.043 | test avg. loss: 4.431
Epochs: 19635 | epoch avg. loss: 0.047 | test avg. loss: 4.409
Epochs: 19636 | epoch avg. loss: 0.229 | test avg. loss: 4.631

 39%|███▉      | 19639/50000 [29:18<39:28, 12.82it/s]


Epochs: 19637 | epoch avg. loss: 0.230 | test avg. loss: 4.326
Epochs: 19638 | epoch avg. loss: 0.094 | test avg. loss: 4.394


 39%|███▉      | 19641/50000 [29:19<39:38, 12.76it/s]

Epochs: 19639 | epoch avg. loss: 0.102 | test avg. loss: 4.674
Epochs: 19640 | epoch avg. loss: 0.134 | test avg. loss: 4.430
Epochs: 19641 | epoch avg. loss: 0.069 | test avg. loss: 4.410


 39%|███▉      | 19645/50000 [29:19<40:59, 12.34it/s]

Epochs: 19642 | epoch avg. loss: 0.039 | test avg. loss: 4.516
Epochs: 19643 | epoch avg. loss: 0.045 | test avg. loss: 4.374
Epochs: 19644 | epoch avg. loss: 0.071 | test avg. loss: 4.417


 39%|███▉      | 19647/50000 [29:19<40:07, 12.61it/s]

Epochs: 19645 | epoch avg. loss: 0.040 | test avg. loss: 4.454
Epochs: 19646 | epoch avg. loss: 0.035 | test avg. loss: 4.318
Epochs: 19647 | epoch avg. loss: 0.060 | test avg. loss: 4.392


 39%|███▉      | 19651/50000 [29:19<40:24, 12.52it/s]

Epochs: 19648 | epoch avg. loss: 0.050 | test avg. loss: 4.359
Epochs: 19649 | epoch avg. loss: 0.022 | test avg. loss: 4.338
Epochs: 19650 | epoch avg. loss: 0.027 | test avg. loss: 4.413




Epochs: 19651 | epoch avg. loss: 0.041 | test avg. loss: 4.363
Epochs: 19652 | epoch avg. loss: 0.019 | test avg. loss: 4.281
Epochs: 19653 | epoch avg. loss: 0.018 | test avg. loss: 4.374


 39%|███▉      | 19657/50000 [29:20<37:43, 13.41it/s]

Epochs: 19654 | epoch avg. loss: 0.014 | test avg. loss: 4.392
Epochs: 19655 | epoch avg. loss: 0.019 | test avg. loss: 4.433
Epochs: 19656 | epoch avg. loss: 0.014 | test avg. loss: 4.515


 39%|███▉      | 19659/50000 [29:20<38:53, 13.00it/s]

Epochs: 19657 | epoch avg. loss: 0.031 | test avg. loss: 4.360
Epochs: 19658 | epoch avg. loss: 0.019 | test avg. loss: 4.381
Epochs: 19659 | epoch avg. loss: 0.017 | test avg. loss: 4.422


 39%|███▉      | 19663/50000 [29:20<35:57, 14.06it/s]

Epochs: 19660 | epoch avg. loss: 0.014 | test avg. loss: 4.405
Epochs: 19661 | epoch avg. loss: 0.023 | test avg. loss: 4.475
Epochs: 19662 | epoch avg. loss: 0.007 | test avg. loss: 4.450
Epochs: 19663 | epoch avg. loss: 0.006 | test avg. loss: 4.478


 39%|███▉      | 19667/50000 [29:21<35:27, 14.26it/s]

Epochs: 19664 | epoch avg. loss: 0.012 | test avg. loss: 4.374
Epochs: 19665 | epoch avg. loss: 0.017 | test avg. loss: 4.393
Epochs: 19666 | epoch avg. loss: 0.013 | test avg. loss: 4.411
Epochs: 19667 | epoch avg. loss: 0.008 | test avg. loss: 4.428


 39%|███▉      | 19671/50000 [29:21<37:14, 13.57it/s]

Epochs: 19668 | epoch avg. loss: 0.007 | test avg. loss: 4.485
Epochs: 19669 | epoch avg. loss: 0.008 | test avg. loss: 4.432
Epochs: 19670 | epoch avg. loss: 0.009 | test avg. loss: 4.447


 39%|███▉      | 19673/50000 [29:21<38:57, 12.97it/s]

Epochs: 19671 | epoch avg. loss: 0.016 | test avg. loss: 4.418
Epochs: 19672 | epoch avg. loss: 0.010 | test avg. loss: 4.365
Epochs: 19673 | epoch avg. loss: 0.014 | test avg. loss: 4.421


 39%|███▉      | 19677/50000 [29:21<39:21, 12.84it/s]

Epochs: 19674 | epoch avg. loss: 0.006 | test avg. loss: 4.405
Epochs: 19675 | epoch avg. loss: 0.021 | test avg. loss: 4.458
Epochs: 19676 | epoch avg. loss: 0.016 | test avg. loss: 4.555


 39%|███▉      | 19679/50000 [29:22<39:49, 12.69it/s]

Epochs: 19677 | epoch avg. loss: 0.034 | test avg. loss: 4.385
Epochs: 19678 | epoch avg. loss: 0.021 | test avg. loss: 4.405
Epochs: 19679 | epoch avg. loss: 0.028 | test avg. loss: 4.476


                                                     

Epochs: 19680 | epoch avg. loss: 0.034 | test avg. loss: 4.371
Epochs: 19681 | epoch avg. loss: 0.044 | test avg. loss: 4.546
Epochs: 19682 | epoch avg. loss: 0.063 | test avg. loss: 4.453


 39%|███▉      | 19687/50000 [29:22<35:11, 14.36it/s]

Epochs: 19683 | epoch avg. loss: 0.042 | test avg. loss: 4.367
Epochs: 19684 | epoch avg. loss: 0.033 | test avg. loss: 4.402
Epochs: 19685 | epoch avg. loss: 0.020 | test avg. loss: 4.350
Epochs: 19686 | epoch avg. loss: 0.041 | test avg. loss: 4.496


 39%|███▉      | 19689/50000 [29:22<37:16, 13.55it/s]

Epochs: 19687 | epoch avg. loss: 0.040 | test avg. loss: 4.491
Epochs: 19688 | epoch avg. loss: 0.091 | test avg. loss: 4.503
Epochs: 19689 | epoch avg. loss: 0.046 | test avg. loss: 4.665


 39%|███▉      | 19693/50000 [29:23<41:14, 12.25it/s]

Epochs: 19690 | epoch avg. loss: 0.085 | test avg. loss: 4.368
Epochs: 19691 | epoch avg. loss: 0.076 | test avg. loss: 4.477
Epochs: 19692 | epoch avg. loss: 0.095 | test avg. loss: 4.345


 39%|███▉      | 19695/50000 [29:23<40:02, 12.61it/s]

Epochs: 19693 | epoch avg. loss: 0.045 | test avg. loss: 4.329
Epochs: 19694 | epoch avg. loss: 0.070 | test avg. loss: 4.545
Epochs: 19695 | epoch avg. loss: 0.056 | test avg. loss: 4.439
Epochs: 19696 | epoch avg. loss: 0.031 | test avg. loss: 4.530


 39%|███▉      | 19699/50000 [29:23<38:02, 13.28it/s]

Epochs: 19697 | epoch avg. loss: 0.031 | test avg. loss: 4.448
Epochs: 19698 | epoch avg. loss: 0.018 | test avg. loss: 4.418
Epochs: 19699 | epoch avg. loss: 0.017 | test avg. loss: 4.471


 39%|███▉      | 19703/50000 [29:25<2:04:14,  4.06it/s]

Epochs: 19700 | epoch avg. loss: 0.019 | test avg. loss: 4.446
Epochs: 19701 | epoch avg. loss: 0.045 | test avg. loss: 4.585
Epochs: 19702 | epoch avg. loss: 0.043 | test avg. loss: 4.568


 39%|███▉      | 19705/50000 [29:25<1:38:42,  5.11it/s]

Epochs: 19703 | epoch avg. loss: 0.027 | test avg. loss: 4.434
Epochs: 19704 | epoch avg. loss: 0.010 | test avg. loss: 4.467
Epochs: 19705 | epoch avg. loss: 0.017 | test avg. loss: 4.406
Epochs: 19706 | epoch avg. loss: 0.012 | test avg. loss: 4.506


 39%|███▉      | 19709/50000 [29:25<1:04:20,  7.85it/s]

Epochs: 19707 | epoch avg. loss: 0.021 | test avg. loss: 4.476
Epochs: 19708 | epoch avg. loss: 0.016 | test avg. loss: 4.475
Epochs: 19709 | epoch avg. loss: 0.014 | test avg. loss: 4.527


                                                     

Epochs: 19710 | epoch avg. loss: 0.021 | test avg. loss: 4.424
Epochs: 19711 | epoch avg. loss: 0.009 | test avg. loss: 4.405
Epochs: 19712 | epoch avg. loss: 0.008 | test avg. loss: 4.324


 39%|███▉      | 19717/50000 [29:26<41:27, 12.17it/s]

Epochs: 19713 | epoch avg. loss: 0.020 | test avg. loss: 4.373
Epochs: 19714 | epoch avg. loss: 0.015 | test avg. loss: 4.456
Epochs: 19715 | epoch avg. loss: 0.014 | test avg. loss: 4.423
Epochs: 19716 | epoch avg. loss: 0.012 | test avg. loss: 4.446


 39%|███▉      | 19721/50000 [29:26<36:29, 13.83it/s]

Epochs: 19717 | epoch avg. loss: 0.006 | test avg. loss: 4.423
Epochs: 19718 | epoch avg. loss: 0.009 | test avg. loss: 4.460
Epochs: 19719 | epoch avg. loss: 0.007 | test avg. loss: 4.443
Epochs: 19720 | epoch avg. loss: 0.010 | test avg. loss: 4.479


 39%|███▉      | 19725/50000 [29:26<34:34, 14.59it/s]

Epochs: 19721 | epoch avg. loss: 0.009 | test avg. loss: 4.509
Epochs: 19722 | epoch avg. loss: 0.009 | test avg. loss: 4.445
Epochs: 19723 | epoch avg. loss: 0.006 | test avg. loss: 4.486
Epochs: 19724 | epoch avg. loss: 0.027 | test avg. loss: 4.435


 39%|███▉      | 19727/50000 [29:27<38:48, 13.00it/s]

Epochs: 19725 | epoch avg. loss: 0.018 | test avg. loss: 4.448
Epochs: 19726 | epoch avg. loss: 0.024 | test avg. loss: 4.633
Epochs: 19727 | epoch avg. loss: 0.060 | test avg. loss: 4.492


 39%|███▉      | 19731/50000 [29:27<37:43, 13.37it/s]

Epochs: 19728 | epoch avg. loss: 0.032 | test avg. loss: 4.450
Epochs: 19729 | epoch avg. loss: 0.041 | test avg. loss: 4.666
Epochs: 19730 | epoch avg. loss: 0.138 | test avg. loss: 4.442


 39%|███▉      | 19733/50000 [29:27<37:10, 13.57it/s]

Epochs: 19731 | epoch avg. loss: 0.046 | test avg. loss: 4.438
Epochs: 19732 | epoch avg. loss: 0.040 | test avg. loss: 4.589
Epochs: 19733 | epoch avg. loss: 0.045 | test avg. loss: 4.433
Epochs: 19734 | epoch avg. loss: 0.056 | test avg. loss: 4.490


 39%|███▉      | 19737/50000 [29:27<35:33, 14.19it/s]

Epochs: 19735 | epoch avg. loss: 0.063 | test avg. loss: 4.438
Epochs: 19736 | epoch avg. loss: 0.036 | test avg. loss: 4.390
Epochs: 19737 | epoch avg. loss: 0.045 | test avg. loss: 4.543


 39%|███▉      | 19741/50000 [29:28<38:31, 13.09it/s]

Epochs: 19738 | epoch avg. loss: 0.078 | test avg. loss: 4.443
Epochs: 19739 | epoch avg. loss: 0.048 | test avg. loss: 4.458
Epochs: 19740 | epoch avg. loss: 0.053 | test avg. loss: 4.951


 39%|███▉      | 19743/50000 [29:28<39:08, 12.89it/s]

Epochs: 19741 | epoch avg. loss: 0.257 | test avg. loss: 4.473
Epochs: 19742 | epoch avg. loss: 0.086 | test avg. loss: 4.463
Epochs: 19743 | epoch avg. loss: 0.078 | test avg. loss: 4.873


 39%|███▉      | 19747/50000 [29:28<38:55, 12.95it/s]

Epochs: 19744 | epoch avg. loss: 0.154 | test avg. loss: 4.492
Epochs: 19745 | epoch avg. loss: 0.116 | test avg. loss: 4.578
Epochs: 19746 | epoch avg. loss: 0.063 | test avg. loss: 4.789


                                                     

Epochs: 19747 | epoch avg. loss: 0.055 | test avg. loss: 4.653
Epochs: 19748 | epoch avg. loss: 0.050 | test avg. loss: 4.671
Epochs: 19749 | epoch avg. loss: 0.078 | test avg. loss: 4.511


 40%|███▉      | 19753/50000 [29:28<37:59, 13.27it/s]

Epochs: 19750 | epoch avg. loss: 0.025 | test avg. loss: 4.457
Epochs: 19751 | epoch avg. loss: 0.021 | test avg. loss: 4.628
Epochs: 19752 | epoch avg. loss: 0.028 | test avg. loss: 4.657


                                                     

Epochs: 19753 | epoch avg. loss: 0.016 | test avg. loss: 4.582
Epochs: 19754 | epoch avg. loss: 0.028 | test avg. loss: 4.616
Epochs: 19755 | epoch avg. loss: 0.041 | test avg. loss: 4.487


 40%|███▉      | 19759/50000 [29:29<34:57, 14.42it/s]

Epochs: 19756 | epoch avg. loss: 0.051 | test avg. loss: 4.464
Epochs: 19757 | epoch avg. loss: 0.059 | test avg. loss: 4.633
Epochs: 19758 | epoch avg. loss: 0.094 | test avg. loss: 4.523
Epochs: 19759 | epoch avg. loss: 0.091 | test avg. loss: 4.619


 40%|███▉      | 19763/50000 [29:29<33:55, 14.85it/s]

Epochs: 19760 | epoch avg. loss: 0.045 | test avg. loss: 4.438
Epochs: 19761 | epoch avg. loss: 0.052 | test avg. loss: 4.318
Epochs: 19762 | epoch avg. loss: 0.128 | test avg. loss: 4.365
Epochs: 19763 | epoch avg. loss: 0.068 | test avg. loss: 4.356


 40%|███▉      | 19767/50000 [29:29<34:34, 14.57it/s]

Epochs: 19764 | epoch avg. loss: 0.057 | test avg. loss: 4.383
Epochs: 19765 | epoch avg. loss: 0.061 | test avg. loss: 4.465
Epochs: 19766 | epoch avg. loss: 0.050 | test avg. loss: 4.309


 40%|███▉      | 19769/50000 [29:30<36:57, 13.63it/s]

Epochs: 19767 | epoch avg. loss: 0.029 | test avg. loss: 4.232
Epochs: 19768 | epoch avg. loss: 0.059 | test avg. loss: 4.277
Epochs: 19769 | epoch avg. loss: 0.018 | test avg. loss: 4.287


 40%|███▉      | 19773/50000 [29:30<36:02, 13.98it/s]

Epochs: 19770 | epoch avg. loss: 0.024 | test avg. loss: 4.315
Epochs: 19771 | epoch avg. loss: 0.027 | test avg. loss: 4.371
Epochs: 19772 | epoch avg. loss: 0.051 | test avg. loss: 4.179
Epochs: 19773 | epoch avg. loss: 0.017 | test avg. loss: 4.118


 40%|███▉      | 19777/50000 [29:30<34:01, 14.80it/s]

Epochs: 19774 | epoch avg. loss: 0.023 | test avg. loss: 4.159
Epochs: 19775 | epoch avg. loss: 0.012 | test avg. loss: 4.195
Epochs: 19776 | epoch avg. loss: 0.015 | test avg. loss: 4.277
Epochs: 19777 | epoch avg. loss: 0.011 | test avg. loss: 4.271


 40%|███▉      | 19781/50000 [29:30<33:58, 14.83it/s]

Epochs: 19778 | epoch avg. loss: 0.008 | test avg. loss: 4.208
Epochs: 19779 | epoch avg. loss: 0.007 | test avg. loss: 4.163
Epochs: 19780 | epoch avg. loss: 0.007 | test avg. loss: 4.216


 40%|███▉      | 19783/50000 [29:31<35:59, 13.99it/s]

Epochs: 19781 | epoch avg. loss: 0.014 | test avg. loss: 4.172
Epochs: 19782 | epoch avg. loss: 0.016 | test avg. loss: 4.207
Epochs: 19783 | epoch avg. loss: 0.010 | test avg. loss: 4.303


 40%|███▉      | 19787/50000 [29:31<36:26, 13.82it/s]

Epochs: 19784 | epoch avg. loss: 0.016 | test avg. loss: 4.239
Epochs: 19785 | epoch avg. loss: 0.015 | test avg. loss: 4.240
Epochs: 19786 | epoch avg. loss: 0.012 | test avg. loss: 4.350


 40%|███▉      | 19789/50000 [29:31<35:36, 14.14it/s]

Epochs: 19787 | epoch avg. loss: 0.039 | test avg. loss: 4.216
Epochs: 19788 | epoch avg. loss: 0.035 | test avg. loss: 4.240
Epochs: 19789 | epoch avg. loss: 0.013 | test avg. loss: 4.289
Epochs: 19790 | epoch avg. loss: 0.013 | test avg. loss: 4.213


 40%|███▉      | 19793/50000 [29:31<35:13, 14.30it/s]

Epochs: 19791 | epoch avg. loss: 0.021 | test avg. loss: 4.216
Epochs: 19792 | epoch avg. loss: 0.012 | test avg. loss: 4.223
Epochs: 19793 | epoch avg. loss: 0.013 | test avg. loss: 4.196


 40%|███▉      | 19797/50000 [29:32<36:10, 13.92it/s]

Epochs: 19794 | epoch avg. loss: 0.009 | test avg. loss: 4.197
Epochs: 19795 | epoch avg. loss: 0.015 | test avg. loss: 4.312
Epochs: 19796 | epoch avg. loss: 0.024 | test avg. loss: 4.306


 40%|███▉      | 19799/50000 [29:32<35:35, 14.14it/s]

Epochs: 19797 | epoch avg. loss: 0.023 | test avg. loss: 4.307
Epochs: 19798 | epoch avg. loss: 0.056 | test avg. loss: 4.339
Epochs: 19799 | epoch avg. loss: 0.032 | test avg. loss: 4.330




Epochs: 19800 | epoch avg. loss: 0.044 | test avg. loss: 4.160
Epochs: 19801 | epoch avg. loss: 0.029 | test avg. loss: 4.200
Epochs: 19802 | epoch avg. loss: 0.012 | test avg. loss: 4.292


 40%|███▉      | 19807/50000 [29:34<1:08:45,  7.32it/s]

Epochs: 19803 | epoch avg. loss: 0.010 | test avg. loss: 4.318
Epochs: 19804 | epoch avg. loss: 0.009 | test avg. loss: 4.289
Epochs: 19805 | epoch avg. loss: 0.007 | test avg. loss: 4.296
Epochs: 19806 | epoch avg. loss: 0.011 | test avg. loss: 4.217


 40%|███▉      | 19811/50000 [29:34<50:48,  9.90it/s]

Epochs: 19807 | epoch avg. loss: 0.027 | test avg. loss: 4.297
Epochs: 19808 | epoch avg. loss: 0.018 | test avg. loss: 4.312
Epochs: 19809 | epoch avg. loss: 0.013 | test avg. loss: 4.243
Epochs: 19810 | epoch avg. loss: 0.010 | test avg. loss: 4.282




Epochs: 19811 | epoch avg. loss: 0.007 | test avg. loss: 4.299
Epochs: 19812 | epoch avg. loss: 0.007 | test avg. loss: 4.300
Epochs: 19813 | epoch avg. loss: 0.006 | test avg. loss: 4.320


 40%|███▉      | 19817/50000 [29:34<39:08, 12.85it/s]

Epochs: 19814 | epoch avg. loss: 0.011 | test avg. loss: 4.297
Epochs: 19815 | epoch avg. loss: 0.008 | test avg. loss: 4.259
Epochs: 19816 | epoch avg. loss: 0.010 | test avg. loss: 4.274
Epochs: 19817 | epoch avg. loss: 0.011 | test avg. loss: 4.315


 40%|███▉      | 19821/50000 [29:35<37:40, 13.35it/s]

Epochs: 19818 | epoch avg. loss: 0.008 | test avg. loss: 4.351
Epochs: 19819 | epoch avg. loss: 0.009 | test avg. loss: 4.283
Epochs: 19820 | epoch avg. loss: 0.007 | test avg. loss: 4.220


 40%|███▉      | 19823/50000 [29:35<37:12, 13.51it/s]

Epochs: 19821 | epoch avg. loss: 0.007 | test avg. loss: 4.261
Epochs: 19822 | epoch avg. loss: 0.029 | test avg. loss: 4.265
Epochs: 19823 | epoch avg. loss: 0.013 | test avg. loss: 4.229


 40%|███▉      | 19827/50000 [29:35<35:06, 14.32it/s]

Epochs: 19824 | epoch avg. loss: 0.024 | test avg. loss: 4.402
Epochs: 19825 | epoch avg. loss: 0.043 | test avg. loss: 4.320
Epochs: 19826 | epoch avg. loss: 0.020 | test avg. loss: 4.260


 40%|███▉      | 19829/50000 [29:35<36:20, 13.84it/s]

Epochs: 19827 | epoch avg. loss: 0.058 | test avg. loss: 4.270
Epochs: 19828 | epoch avg. loss: 0.050 | test avg. loss: 4.332
Epochs: 19829 | epoch avg. loss: 0.033 | test avg. loss: 4.347


 40%|███▉      | 19833/50000 [29:35<39:23, 12.76it/s]

Epochs: 19830 | epoch avg. loss: 0.133 | test avg. loss: 4.377
Epochs: 19831 | epoch avg. loss: 0.025 | test avg. loss: 4.483
Epochs: 19832 | epoch avg. loss: 0.043 | test avg. loss: 4.308


 40%|███▉      | 19835/50000 [29:36<40:38, 12.37it/s]

Epochs: 19833 | epoch avg. loss: 0.054 | test avg. loss: 4.329
Epochs: 19834 | epoch avg. loss: 0.055 | test avg. loss: 4.476
Epochs: 19835 | epoch avg. loss: 0.071 | test avg. loss: 4.292


 40%|███▉      | 19839/50000 [29:36<39:42, 12.66it/s]

Epochs: 19836 | epoch avg. loss: 0.081 | test avg. loss: 4.320
Epochs: 19837 | epoch avg. loss: 0.070 | test avg. loss: 4.457
Epochs: 19838 | epoch avg. loss: 0.053 | test avg. loss: 4.326


 40%|███▉      | 19841/50000 [29:36<38:49, 12.95it/s]

Epochs: 19839 | epoch avg. loss: 0.020 | test avg. loss: 4.225
Epochs: 19840 | epoch avg. loss: 0.038 | test avg. loss: 4.337
Epochs: 19841 | epoch avg. loss: 0.046 | test avg. loss: 4.322


 40%|███▉      | 19845/50000 [29:36<36:24, 13.80it/s]

Epochs: 19842 | epoch avg. loss: 0.014 | test avg. loss: 4.300
Epochs: 19843 | epoch avg. loss: 0.021 | test avg. loss: 4.381
Epochs: 19844 | epoch avg. loss: 0.026 | test avg. loss: 4.315


 40%|███▉      | 19847/50000 [29:37<39:13, 12.81it/s]

Epochs: 19845 | epoch avg. loss: 0.014 | test avg. loss: 4.243
Epochs: 19846 | epoch avg. loss: 0.053 | test avg. loss: 4.274
Epochs: 19847 | epoch avg. loss: 0.018 | test avg. loss: 4.356


 40%|███▉      | 19851/50000 [29:37<38:49, 12.94it/s]

Epochs: 19848 | epoch avg. loss: 0.016 | test avg. loss: 4.354
Epochs: 19849 | epoch avg. loss: 0.010 | test avg. loss: 4.354
Epochs: 19850 | epoch avg. loss: 0.009 | test avg. loss: 4.287


 40%|███▉      | 19855/50000 [29:37<35:04, 14.32it/s]

Epochs: 19851 | epoch avg. loss: 0.008 | test avg. loss: 4.228
Epochs: 19852 | epoch avg. loss: 0.008 | test avg. loss: 4.217
Epochs: 19853 | epoch avg. loss: 0.007 | test avg. loss: 4.276
Epochs: 19854 | epoch avg. loss: 0.014 | test avg. loss: 4.304


 40%|███▉      | 19857/50000 [29:37<35:30, 14.15it/s]

Epochs: 19855 | epoch avg. loss: 0.009 | test avg. loss: 4.286
Epochs: 19856 | epoch avg. loss: 0.011 | test avg. loss: 4.325
Epochs: 19857 | epoch avg. loss: 0.016 | test avg. loss: 4.306


 40%|███▉      | 19861/50000 [29:38<38:06, 13.18it/s]

Epochs: 19858 | epoch avg. loss: 0.012 | test avg. loss: 4.252
Epochs: 19859 | epoch avg. loss: 0.009 | test avg. loss: 4.315
Epochs: 19860 | epoch avg. loss: 0.027 | test avg. loss: 4.393


 40%|███▉      | 19863/50000 [29:38<38:04, 13.19it/s]

Epochs: 19861 | epoch avg. loss: 0.019 | test avg. loss: 4.347
Epochs: 19862 | epoch avg. loss: 0.015 | test avg. loss: 4.383
Epochs: 19863 | epoch avg. loss: 0.020 | test avg. loss: 4.294


 40%|███▉      | 19867/50000 [29:38<41:04, 12.23it/s]

Epochs: 19864 | epoch avg. loss: 0.015 | test avg. loss: 4.246
Epochs: 19865 | epoch avg. loss: 0.030 | test avg. loss: 4.279
Epochs: 19866 | epoch avg. loss: 0.015 | test avg. loss: 4.354


 40%|███▉      | 19869/50000 [29:38<43:31, 11.54it/s]

Epochs: 19867 | epoch avg. loss: 0.038 | test avg. loss: 4.343
Epochs: 19868 | epoch avg. loss: 0.025 | test avg. loss: 4.353
Epochs: 19869 | epoch avg. loss: 0.038 | test avg. loss: 4.406


 40%|███▉      | 19873/50000 [29:39<44:30, 11.28it/s]

Epochs: 19870 | epoch avg. loss: 0.041 | test avg. loss: 4.261
Epochs: 19871 | epoch avg. loss: 0.018 | test avg. loss: 4.204
Epochs: 19872 | epoch avg. loss: 0.014 | test avg. loss: 4.298


 40%|███▉      | 19875/50000 [29:39<40:46, 12.31it/s]

Epochs: 19873 | epoch avg. loss: 0.019 | test avg. loss: 4.328
Epochs: 19874 | epoch avg. loss: 0.010 | test avg. loss: 4.278
Epochs: 19875 | epoch avg. loss: 0.036 | test avg. loss: 4.277


 40%|███▉      | 19879/50000 [29:39<41:29, 12.10it/s]

Epochs: 19876 | epoch avg. loss: 0.010 | test avg. loss: 4.264
Epochs: 19877 | epoch avg. loss: 0.012 | test avg. loss: 4.290
Epochs: 19878 | epoch avg. loss: 0.018 | test avg. loss: 4.227


 40%|███▉      | 19881/50000 [29:39<39:41, 12.65it/s]

Epochs: 19879 | epoch avg. loss: 0.033 | test avg. loss: 4.270
Epochs: 19880 | epoch avg. loss: 0.022 | test avg. loss: 4.416
Epochs: 19881 | epoch avg. loss: 0.022 | test avg. loss: 4.302


 40%|███▉      | 19885/50000 [29:40<42:00, 11.95it/s]

Epochs: 19882 | epoch avg. loss: 0.020 | test avg. loss: 4.266
Epochs: 19883 | epoch avg. loss: 0.072 | test avg. loss: 4.282
Epochs: 19884 | epoch avg. loss: 0.020 | test avg. loss: 4.507


 40%|███▉      | 19887/50000 [29:40<42:03, 11.93it/s]

Epochs: 19885 | epoch avg. loss: 0.100 | test avg. loss: 4.278
Epochs: 19886 | epoch avg. loss: 0.034 | test avg. loss: 4.302
Epochs: 19887 | epoch avg. loss: 0.075 | test avg. loss: 4.386


 40%|███▉      | 19891/50000 [29:40<42:06, 11.92it/s]

Epochs: 19888 | epoch avg. loss: 0.013 | test avg. loss: 4.343
Epochs: 19889 | epoch avg. loss: 0.060 | test avg. loss: 4.262
Epochs: 19890 | epoch avg. loss: 0.041 | test avg. loss: 4.341


 40%|███▉      | 19895/50000 [29:40<38:05, 13.17it/s]

Epochs: 19891 | epoch avg. loss: 0.154 | test avg. loss: 4.324
Epochs: 19892 | epoch avg. loss: 0.040 | test avg. loss: 4.413
Epochs: 19893 | epoch avg. loss: 0.062 | test avg. loss: 4.732
Epochs: 19894 | epoch avg. loss: 0.226 | test avg. loss: 4.348


 40%|███▉      | 19897/50000 [29:41<42:25, 11.83it/s]

Epochs: 19895 | epoch avg. loss: 0.096 | test avg. loss: 4.195
Epochs: 19896 | epoch avg. loss: 0.077 | test avg. loss: 4.426
Epochs: 19897 | epoch avg. loss: 0.212 | test avg. loss: 4.059


 40%|███▉      | 19899/50000 [29:41<40:23, 12.42it/s]

Epochs: 19898 | epoch avg. loss: 0.089 | test avg. loss: 4.336
Epochs: 19899 | epoch avg. loss: 0.371 | test avg. loss: 4.476


 40%|███▉      | 19902/50000 [29:43<2:21:38,  3.54it/s]

Epochs: 19900 | epoch avg. loss: 0.189 | test avg. loss: 4.822
Epochs: 19901 | epoch avg. loss: 0.220 | test avg. loss: 4.467
Epochs: 19902 | epoch avg. loss: 0.244 | test avg. loss: 4.476


 40%|███▉      | 19906/50000 [29:43<1:27:27,  5.73it/s]

Epochs: 19903 | epoch avg. loss: 0.221 | test avg. loss: 4.737
Epochs: 19904 | epoch avg. loss: 0.325 | test avg. loss: 4.113
Epochs: 19905 | epoch avg. loss: 0.193 | test avg. loss: 4.374


 40%|███▉      | 19908/50000 [29:43<1:11:36,  7.00it/s]

Epochs: 19906 | epoch avg. loss: 0.352 | test avg. loss: 4.565
Epochs: 19907 | epoch avg. loss: 0.214 | test avg. loss: 5.001
Epochs: 19908 | epoch avg. loss: 0.339 | test avg. loss: 6.559


 40%|███▉      | 19912/50000 [29:43<54:30,  9.20it/s]  

Epochs: 19909 | epoch avg. loss: 1.466 | test avg. loss: 4.539
Epochs: 19910 | epoch avg. loss: 0.259 | test avg. loss: 4.185
Epochs: 19911 | epoch avg. loss: 0.365 | test avg. loss: 5.659


 40%|███▉      | 19914/50000 [29:44<51:52,  9.66it/s]

Epochs: 19912 | epoch avg. loss: 1.627 | test avg. loss: 3.953
Epochs: 19913 | epoch avg. loss: 0.739 | test avg. loss: 5.590
Epochs: 19914 | epoch avg. loss: 1.170 | test avg. loss: 7.483


 40%|███▉      | 19918/50000 [29:44<46:38, 10.75it/s]

Epochs: 19915 | epoch avg. loss: 1.873 | test avg. loss: 4.606
Epochs: 19916 | epoch avg. loss: 0.322 | test avg. loss: 3.855
Epochs: 19917 | epoch avg. loss: 0.357 | test avg. loss: 3.944


 40%|███▉      | 19920/50000 [29:44<45:17, 11.07it/s]

Epochs: 19918 | epoch avg. loss: 0.643 | test avg. loss: 3.463
Epochs: 19919 | epoch avg. loss: 0.199 | test avg. loss: 4.155
Epochs: 19920 | epoch avg. loss: 0.222 | test avg. loss: 4.739


 40%|███▉      | 19924/50000 [29:44<42:18, 11.85it/s]

Epochs: 19921 | epoch avg. loss: 0.274 | test avg. loss: 4.349
Epochs: 19922 | epoch avg. loss: 0.218 | test avg. loss: 4.485
Epochs: 19923 | epoch avg. loss: 0.373 | test avg. loss: 3.909


 40%|███▉      | 19926/50000 [29:45<40:52, 12.26it/s]

Epochs: 19924 | epoch avg. loss: 0.199 | test avg. loss: 3.925
Epochs: 19925 | epoch avg. loss: 0.177 | test avg. loss: 4.657
Epochs: 19926 | epoch avg. loss: 0.343 | test avg. loss: 4.321


 40%|███▉      | 19930/50000 [29:45<38:00, 13.19it/s]

Epochs: 19927 | epoch avg. loss: 0.402 | test avg. loss: 4.369
Epochs: 19928 | epoch avg. loss: 0.144 | test avg. loss: 4.544
Epochs: 19929 | epoch avg. loss: 0.158 | test avg. loss: 4.188


 40%|███▉      | 19932/50000 [29:45<37:43, 13.29it/s]

Epochs: 19930 | epoch avg. loss: 0.089 | test avg. loss: 4.347
Epochs: 19931 | epoch avg. loss: 0.136 | test avg. loss: 4.432
Epochs: 19932 | epoch avg. loss: 0.047 | test avg. loss: 4.430


 40%|███▉      | 19936/50000 [29:45<36:04, 13.89it/s]

Epochs: 19933 | epoch avg. loss: 0.042 | test avg. loss: 4.453
Epochs: 19934 | epoch avg. loss: 0.052 | test avg. loss: 4.159
Epochs: 19935 | epoch avg. loss: 0.040 | test avg. loss: 4.027


 40%|███▉      | 19938/50000 [29:45<36:58, 13.55it/s]

Epochs: 19936 | epoch avg. loss: 0.034 | test avg. loss: 4.069
Epochs: 19937 | epoch avg. loss: 0.034 | test avg. loss: 4.120
Epochs: 19938 | epoch avg. loss: 0.115 | test avg. loss: 4.283


 40%|███▉      | 19942/50000 [29:46<40:35, 12.34it/s]

Epochs: 19939 | epoch avg. loss: 0.051 | test avg. loss: 4.329
Epochs: 19940 | epoch avg. loss: 0.058 | test avg. loss: 4.290
Epochs: 19941 | epoch avg. loss: 0.134 | test avg. loss: 4.340


 40%|███▉      | 19944/50000 [29:46<40:31, 12.36it/s]

Epochs: 19942 | epoch avg. loss: 0.072 | test avg. loss: 4.074
Epochs: 19943 | epoch avg. loss: 0.025 | test avg. loss: 4.022
Epochs: 19944 | epoch avg. loss: 0.025 | test avg. loss: 4.153


 40%|███▉      | 19948/50000 [29:46<37:06, 13.50it/s]

Epochs: 19945 | epoch avg. loss: 0.024 | test avg. loss: 4.192
Epochs: 19946 | epoch avg. loss: 0.016 | test avg. loss: 4.225
Epochs: 19947 | epoch avg. loss: 0.020 | test avg. loss: 4.166
Epochs: 19948 | epoch avg. loss: 0.015 | test avg. loss: 4.148


 40%|███▉      | 19952/50000 [29:46<37:07, 13.49it/s]

Epochs: 19949 | epoch avg. loss: 0.025 | test avg. loss: 4.106
Epochs: 19950 | epoch avg. loss: 0.017 | test avg. loss: 4.084
Epochs: 19951 | epoch avg. loss: 0.022 | test avg. loss: 4.169


 40%|███▉      | 19954/50000 [29:47<37:56, 13.20it/s]

Epochs: 19952 | epoch avg. loss: 0.012 | test avg. loss: 4.315
Epochs: 19953 | epoch avg. loss: 0.032 | test avg. loss: 4.248
Epochs: 19954 | epoch avg. loss: 0.013 | test avg. loss: 4.172


 40%|███▉      | 19958/50000 [29:47<40:17, 12.43it/s]

Epochs: 19955 | epoch avg. loss: 0.020 | test avg. loss: 4.197
Epochs: 19956 | epoch avg. loss: 0.011 | test avg. loss: 4.189
Epochs: 19957 | epoch avg. loss: 0.011 | test avg. loss: 4.212


 40%|███▉      | 19960/50000 [29:47<40:42, 12.30it/s]

Epochs: 19958 | epoch avg. loss: 0.011 | test avg. loss: 4.274
Epochs: 19959 | epoch avg. loss: 0.012 | test avg. loss: 4.182
Epochs: 19960 | epoch avg. loss: 0.018 | test avg. loss: 4.221


 40%|███▉      | 19964/50000 [29:47<38:31, 12.99it/s]

Epochs: 19961 | epoch avg. loss: 0.015 | test avg. loss: 4.158
Epochs: 19962 | epoch avg. loss: 0.012 | test avg. loss: 4.159
Epochs: 19963 | epoch avg. loss: 0.011 | test avg. loss: 4.238


 40%|███▉      | 19966/50000 [29:48<40:00, 12.51it/s]

Epochs: 19964 | epoch avg. loss: 0.018 | test avg. loss: 4.172
Epochs: 19965 | epoch avg. loss: 0.018 | test avg. loss: 4.174
Epochs: 19966 | epoch avg. loss: 0.012 | test avg. loss: 4.272


                                                     

Epochs: 19967 | epoch avg. loss: 0.024 | test avg. loss: 4.173
Epochs: 19968 | epoch avg. loss: 0.027 | test avg. loss: 4.194
Epochs: 19969 | epoch avg. loss: 0.018 | test avg. loss: 4.323


 40%|███▉      | 19972/50000 [29:48<36:43, 13.63it/s]

Epochs: 19970 | epoch avg. loss: 0.031 | test avg. loss: 4.177
Epochs: 19971 | epoch avg. loss: 0.045 | test avg. loss: 4.201
Epochs: 19972 | epoch avg. loss: 0.016 | test avg. loss: 4.295


 40%|███▉      | 19976/50000 [29:48<35:38, 14.04it/s]

Epochs: 19973 | epoch avg. loss: 0.023 | test avg. loss: 4.162
Epochs: 19974 | epoch avg. loss: 0.034 | test avg. loss: 4.123
Epochs: 19975 | epoch avg. loss: 0.024 | test avg. loss: 4.221


 40%|███▉      | 19978/50000 [29:48<38:33, 12.97it/s]

Epochs: 19976 | epoch avg. loss: 0.028 | test avg. loss: 4.224
Epochs: 19977 | epoch avg. loss: 0.015 | test avg. loss: 4.206


 40%|███▉      | 19980/50000 [29:49<41:50, 11.96it/s]

Epochs: 19978 | epoch avg. loss: 0.034 | test avg. loss: 4.328
Epochs: 19979 | epoch avg. loss: 0.032 | test avg. loss: 4.286
Epochs: 19980 | epoch avg. loss: 0.020 | test avg. loss: 4.203




Epochs: 19981 | epoch avg. loss: 0.021 | test avg. loss: 4.304
Epochs: 19982 | epoch avg. loss: 0.048 | test avg. loss: 4.111
Epochs: 19983 | epoch avg. loss: 0.049 | test avg. loss: 4.163


 40%|███▉      | 19986/50000 [29:49<37:50, 13.22it/s]

Epochs: 19984 | epoch avg. loss: 0.038 | test avg. loss: 4.584
Epochs: 19985 | epoch avg. loss: 0.177 | test avg. loss: 4.211
Epochs: 19986 | epoch avg. loss: 0.159 | test avg. loss: 4.216


 40%|███▉      | 19990/50000 [29:49<34:58, 14.30it/s]

Epochs: 19987 | epoch avg. loss: 0.156 | test avg. loss: 4.513
Epochs: 19988 | epoch avg. loss: 0.231 | test avg. loss: 4.227
Epochs: 19989 | epoch avg. loss: 0.078 | test avg. loss: 4.248


                                                     

Epochs: 19990 | epoch avg. loss: 0.098 | test avg. loss: 4.806
Epochs: 19991 | epoch avg. loss: 0.230 | test avg. loss: 4.222
Epochs: 19992 | epoch avg. loss: 0.156 | test avg. loss: 4.191


 40%|███▉      | 19996/50000 [29:50<34:29, 14.50it/s]

Epochs: 19993 | epoch avg. loss: 0.099 | test avg. loss: 4.556
Epochs: 19994 | epoch avg. loss: 0.279 | test avg. loss: 4.218
Epochs: 19995 | epoch avg. loss: 0.049 | test avg. loss: 4.239


 40%|███▉      | 19998/50000 [29:50<35:03, 14.26it/s]

Epochs: 19996 | epoch avg. loss: 0.048 | test avg. loss: 4.481
Epochs: 19997 | epoch avg. loss: 0.098 | test avg. loss: 4.058
Epochs: 19998 | epoch avg. loss: 0.087 | test avg. loss: 4.048


 40%|███▉      | 19998/50000 [29:50<35:03, 14.26it/s]

Epochs: 19999 | epoch avg. loss: 0.039 | test avg. loss: 4.168


 40%|████      | 20004/50000 [29:52<1:29:28,  5.59it/s]

Epochs: 20000 | epoch avg. loss: 0.059 | test avg. loss: 4.157
Epochs: 20001 | epoch avg. loss: 0.243 | test avg. loss: 4.334
Epochs: 20002 | epoch avg. loss: 0.160 | test avg. loss: 4.543
Epochs: 20003 | epoch avg. loss: 0.129 | test avg. loss: 4.221


 40%|████      | 20008/50000 [29:52<59:27,  8.41it/s]  

Epochs: 20004 | epoch avg. loss: 0.108 | test avg. loss: 4.141
Epochs: 20005 | epoch avg. loss: 0.056 | test avg. loss: 4.106
Epochs: 20006 | epoch avg. loss: 0.045 | test avg. loss: 4.040
Epochs: 20007 | epoch avg. loss: 0.024 | test avg. loss: 4.266


 40%|████      | 20010/50000 [29:52<52:01,  9.61it/s]

Epochs: 20008 | epoch avg. loss: 0.052 | test avg. loss: 4.233
Epochs: 20009 | epoch avg. loss: 0.025 | test avg. loss: 4.229
Epochs: 20010 | epoch avg. loss: 0.026 | test avg. loss: 4.357


 40%|████      | 20014/50000 [29:53<46:01, 10.86it/s]

Epochs: 20011 | epoch avg. loss: 0.055 | test avg. loss: 4.110
Epochs: 20012 | epoch avg. loss: 0.041 | test avg. loss: 4.130
Epochs: 20013 | epoch avg. loss: 0.028 | test avg. loss: 4.312


 40%|████      | 20016/50000 [29:53<43:46, 11.41it/s]

Epochs: 20014 | epoch avg. loss: 0.061 | test avg. loss: 4.166
Epochs: 20015 | epoch avg. loss: 0.013 | test avg. loss: 4.214
Epochs: 20016 | epoch avg. loss: 0.018 | test avg. loss: 4.238




Epochs: 20017 | epoch avg. loss: 0.019 | test avg. loss: 4.203
Epochs: 20018 | epoch avg. loss: 0.014 | test avg. loss: 4.168
Epochs: 20019 | epoch avg. loss: 0.017 | test avg. loss: 4.308


 40%|████      | 20024/50000 [29:53<36:12, 13.80it/s]

Epochs: 20020 | epoch avg. loss: 0.062 | test avg. loss: 4.125
Epochs: 20021 | epoch avg. loss: 0.054 | test avg. loss: 4.058
Epochs: 20022 | epoch avg. loss: 0.050 | test avg. loss: 4.318
Epochs: 20023 | epoch avg. loss: 0.107 | test avg. loss: 4.106


 40%|████      | 20026/50000 [29:53<38:17, 13.05it/s]

Epochs: 20024 | epoch avg. loss: 0.049 | test avg. loss: 4.215
Epochs: 20025 | epoch avg. loss: 0.078 | test avg. loss: 4.696
Epochs: 20026 | epoch avg. loss: 0.229 | test avg. loss: 4.112


 40%|████      | 20030/50000 [29:54<34:57, 14.29it/s]

Epochs: 20027 | epoch avg. loss: 0.211 | test avg. loss: 4.081
Epochs: 20028 | epoch avg. loss: 0.128 | test avg. loss: 4.776
Epochs: 20029 | epoch avg. loss: 0.303 | test avg. loss: 4.303
Epochs: 20030 | epoch avg. loss: 0.447 | test avg. loss: 4.409


 40%|████      | 20034/50000 [29:54<33:39, 14.84it/s]

Epochs: 20031 | epoch avg. loss: 0.280 | test avg. loss: 4.987
Epochs: 20032 | epoch avg. loss: 0.389 | test avg. loss: 4.539
Epochs: 20033 | epoch avg. loss: 0.464 | test avg. loss: 4.306
Epochs: 20034 | epoch avg. loss: 0.119 | test avg. loss: 4.616


 40%|████      | 20038/50000 [29:54<35:27, 14.08it/s]

Epochs: 20035 | epoch avg. loss: 0.164 | test avg. loss: 4.196
Epochs: 20036 | epoch avg. loss: 0.157 | test avg. loss: 4.170
Epochs: 20037 | epoch avg. loss: 0.044 | test avg. loss: 4.246


 40%|████      | 20040/50000 [29:54<37:20, 13.37it/s]

Epochs: 20038 | epoch avg. loss: 0.037 | test avg. loss: 4.138
Epochs: 20039 | epoch avg. loss: 0.031 | test avg. loss: 4.173
Epochs: 20040 | epoch avg. loss: 0.024 | test avg. loss: 4.274


 40%|████      | 20044/50000 [29:55<37:35, 13.28it/s]

Epochs: 20041 | epoch avg. loss: 0.026 | test avg. loss: 4.166
Epochs: 20042 | epoch avg. loss: 0.073 | test avg. loss: 4.209
Epochs: 20043 | epoch avg. loss: 0.040 | test avg. loss: 4.289


 40%|████      | 20046/50000 [29:55<36:56, 13.51it/s]

Epochs: 20044 | epoch avg. loss: 0.049 | test avg. loss: 4.160
Epochs: 20045 | epoch avg. loss: 0.139 | test avg. loss: 4.312
Epochs: 20046 | epoch avg. loss: 0.091 | test avg. loss: 4.272


 40%|████      | 20050/50000 [29:55<37:32, 13.30it/s]

Epochs: 20047 | epoch avg. loss: 0.041 | test avg. loss: 4.120
Epochs: 20048 | epoch avg. loss: 0.058 | test avg. loss: 4.309
Epochs: 20049 | epoch avg. loss: 0.077 | test avg. loss: 4.055
Epochs: 20050 | epoch avg. loss: 0.082 | test avg. loss: 4.112


 40%|████      | 20052/50000 [29:55<36:33, 13.65it/s]

Epochs: 20051 | epoch avg. loss: 0.194 | test avg. loss: 4.237
Epochs: 20052 | epoch avg. loss: 0.074 | test avg. loss: 4.584


 40%|████      | 20056/50000 [29:56<40:31, 12.31it/s]

Epochs: 20053 | epoch avg. loss: 0.144 | test avg. loss: 4.238
Epochs: 20054 | epoch avg. loss: 0.194 | test avg. loss: 4.187
Epochs: 20055 | epoch avg. loss: 0.065 | test avg. loss: 4.562


 40%|████      | 20058/50000 [29:56<40:34, 12.30it/s]

Epochs: 20056 | epoch avg. loss: 0.232 | test avg. loss: 4.111
Epochs: 20057 | epoch avg. loss: 0.197 | test avg. loss: 4.109
Epochs: 20058 | epoch avg. loss: 0.089 | test avg. loss: 4.368


 40%|████      | 20062/50000 [29:56<37:28, 13.31it/s]

Epochs: 20059 | epoch avg. loss: 0.186 | test avg. loss: 4.202
Epochs: 20060 | epoch avg. loss: 0.052 | test avg. loss: 4.260
Epochs: 20061 | epoch avg. loss: 0.060 | test avg. loss: 4.720
Epochs: 20062 | epoch avg. loss: 0.193 | test avg. loss: 4.268


 40%|████      | 20066/50000 [29:56<34:45, 14.35it/s]

Epochs: 20063 | epoch avg. loss: 0.081 | test avg. loss: 4.200
Epochs: 20064 | epoch avg. loss: 0.035 | test avg. loss: 4.277
Epochs: 20065 | epoch avg. loss: 0.040 | test avg. loss: 4.195


 40%|████      | 20068/50000 [29:57<38:13, 13.05it/s]

Epochs: 20066 | epoch avg. loss: 0.026 | test avg. loss: 4.187
Epochs: 20067 | epoch avg. loss: 0.055 | test avg. loss: 4.334
Epochs: 20068 | epoch avg. loss: 0.096 | test avg. loss: 4.251


 40%|████      | 20072/50000 [29:57<36:55, 13.51it/s]

Epochs: 20069 | epoch avg. loss: 0.038 | test avg. loss: 4.128
Epochs: 20070 | epoch avg. loss: 0.076 | test avg. loss: 4.258
Epochs: 20071 | epoch avg. loss: 0.049 | test avg. loss: 4.241


 40%|████      | 20074/50000 [29:57<38:06, 13.09it/s]

Epochs: 20072 | epoch avg. loss: 0.039 | test avg. loss: 4.237
Epochs: 20073 | epoch avg. loss: 0.187 | test avg. loss: 4.286
Epochs: 20074 | epoch avg. loss: 0.096 | test avg. loss: 4.176


 40%|████      | 20078/50000 [29:57<40:32, 12.30it/s]

Epochs: 20075 | epoch avg. loss: 0.133 | test avg. loss: 4.866
Epochs: 20076 | epoch avg. loss: 1.090 | test avg. loss: 4.650
Epochs: 20077 | epoch avg. loss: 0.631 | test avg. loss: 4.788


 40%|████      | 20080/50000 [29:58<43:20, 11.51it/s]

Epochs: 20078 | epoch avg. loss: 0.421 | test avg. loss: 5.153
Epochs: 20079 | epoch avg. loss: 1.115 | test avg. loss: 5.159
Epochs: 20080 | epoch avg. loss: 0.680 | test avg. loss: 4.218


 40%|████      | 20084/50000 [29:58<40:35, 12.29it/s]

Epochs: 20081 | epoch avg. loss: 0.193 | test avg. loss: 4.099
Epochs: 20082 | epoch avg. loss: 0.340 | test avg. loss: 4.158
Epochs: 20083 | epoch avg. loss: 0.207 | test avg. loss: 4.280


 40%|████      | 20088/50000 [29:58<36:10, 13.78it/s]

Epochs: 20084 | epoch avg. loss: 0.097 | test avg. loss: 4.209
Epochs: 20085 | epoch avg. loss: 0.102 | test avg. loss: 4.809
Epochs: 20086 | epoch avg. loss: 0.218 | test avg. loss: 4.463
Epochs: 20087 | epoch avg. loss: 0.231 | test avg. loss: 4.367


 40%|████      | 20090/50000 [29:58<34:44, 14.35it/s]

Epochs: 20088 | epoch avg. loss: 0.151 | test avg. loss: 5.066
Epochs: 20089 | epoch avg. loss: 0.557 | test avg. loss: 4.127
Epochs: 20090 | epoch avg. loss: 0.397 | test avg. loss: 4.270
Epochs: 20091 | epoch avg. loss: 0.154 | test avg. loss: 4.858


 40%|████      | 20094/50000 [29:59<37:52, 13.16it/s]

Epochs: 20092 | epoch avg. loss: 0.177 | test avg. loss: 4.409
Epochs: 20093 | epoch avg. loss: 0.107 | test avg. loss: 4.540
Epochs: 20094 | epoch avg. loss: 0.087 | test avg. loss: 4.207


 40%|████      | 20098/50000 [29:59<39:40, 12.56it/s]

Epochs: 20095 | epoch avg. loss: 0.057 | test avg. loss: 4.224
Epochs: 20096 | epoch avg. loss: 0.100 | test avg. loss: 4.402
Epochs: 20097 | epoch avg. loss: 0.121 | test avg. loss: 4.109


 40%|████      | 20098/50000 [29:59<39:40, 12.56it/s]

Epochs: 20098 | epoch avg. loss: 0.138 | test avg. loss: 4.310
Epochs: 20099 | epoch avg. loss: 0.103 | test avg. loss: 4.366


 40%|████      | 20102/50000 [30:01<1:55:28,  4.31it/s]

Epochs: 20100 | epoch avg. loss: 0.068 | test avg. loss: 4.368
Epochs: 20101 | epoch avg. loss: 0.169 | test avg. loss: 4.363
Epochs: 20102 | epoch avg. loss: 0.071 | test avg. loss: 4.114


 40%|████      | 20106/50000 [30:01<1:16:12,  6.54it/s]

Epochs: 20103 | epoch avg. loss: 0.050 | test avg. loss: 4.110
Epochs: 20104 | epoch avg. loss: 0.177 | test avg. loss: 4.192
Epochs: 20105 | epoch avg. loss: 0.078 | test avg. loss: 4.191


 40%|████      | 20108/50000 [30:01<1:04:57,  7.67it/s]

Epochs: 20106 | epoch avg. loss: 0.051 | test avg. loss: 4.252
Epochs: 20107 | epoch avg. loss: 0.148 | test avg. loss: 4.507
Epochs: 20108 | epoch avg. loss: 0.194 | test avg. loss: 4.263


 40%|████      | 20112/50000 [30:01<51:19,  9.70it/s]

Epochs: 20109 | epoch avg. loss: 0.121 | test avg. loss: 4.237
Epochs: 20110 | epoch avg. loss: 0.095 | test avg. loss: 4.631
Epochs: 20111 | epoch avg. loss: 0.210 | test avg. loss: 4.212


 40%|████      | 20114/50000 [30:02<49:42, 10.02it/s]

Epochs: 20112 | epoch avg. loss: 0.216 | test avg. loss: 4.332
Epochs: 20113 | epoch avg. loss: 0.357 | test avg. loss: 4.230
Epochs: 20114 | epoch avg. loss: 0.188 | test avg. loss: 4.609


 40%|████      | 20118/50000 [30:02<44:23, 11.22it/s]

Epochs: 20115 | epoch avg. loss: 0.349 | test avg. loss: 4.956
Epochs: 20116 | epoch avg. loss: 0.284 | test avg. loss: 4.248
Epochs: 20117 | epoch avg. loss: 0.093 | test avg. loss: 4.164


 40%|████      | 20120/50000 [30:02<44:20, 11.23it/s]

Epochs: 20118 | epoch avg. loss: 0.300 | test avg. loss: 4.248
Epochs: 20119 | epoch avg. loss: 0.259 | test avg. loss: 4.157
Epochs: 20120 | epoch avg. loss: 0.120 | test avg. loss: 4.380


 40%|████      | 20124/50000 [30:02<41:00, 12.14it/s]

Epochs: 20121 | epoch avg. loss: 0.209 | test avg. loss: 5.061
Epochs: 20122 | epoch avg. loss: 0.272 | test avg. loss: 4.537
Epochs: 20123 | epoch avg. loss: 0.212 | test avg. loss: 4.531


 40%|████      | 20126/50000 [30:03<42:58, 11.59it/s]

Epochs: 20124 | epoch avg. loss: 0.130 | test avg. loss: 4.699
Epochs: 20125 | epoch avg. loss: 0.143 | test avg. loss: 4.280
Epochs: 20126 | epoch avg. loss: 0.184 | test avg. loss: 4.283


 40%|████      | 20130/50000 [30:03<40:37, 12.26it/s]

Epochs: 20127 | epoch avg. loss: 0.021 | test avg. loss: 4.350
Epochs: 20128 | epoch avg. loss: 0.042 | test avg. loss: 4.453
Epochs: 20129 | epoch avg. loss: 0.041 | test avg. loss: 4.676


 40%|████      | 20132/50000 [30:03<39:56, 12.46it/s]

Epochs: 20130 | epoch avg. loss: 0.062 | test avg. loss: 4.375
Epochs: 20131 | epoch avg. loss: 0.061 | test avg. loss: 4.307
Epochs: 20132 | epoch avg. loss: 0.020 | test avg. loss: 4.336


 40%|████      | 20136/50000 [30:03<37:21, 13.32it/s]

Epochs: 20133 | epoch avg. loss: 0.022 | test avg. loss: 4.250
Epochs: 20134 | epoch avg. loss: 0.036 | test avg. loss: 4.368
Epochs: 20135 | epoch avg. loss: 0.023 | test avg. loss: 4.483
Epochs: 20136 | epoch avg. loss: 0.024 | test avg. loss: 4.414


 40%|████      | 20140/50000 [30:04<37:21, 13.32it/s]

Epochs: 20137 | epoch avg. loss: 0.059 | test avg. loss: 4.526
Epochs: 20138 | epoch avg. loss: 0.071 | test avg. loss: 4.373
Epochs: 20139 | epoch avg. loss: 0.028 | test avg. loss: 4.310
Epochs: 20140 | epoch avg. loss: 0.029 | test avg. loss: 4.457


 40%|████      | 20144/50000 [30:04<34:55, 14.25it/s]

Epochs: 20141 | epoch avg. loss: 0.045 | test avg. loss: 4.287
Epochs: 20142 | epoch avg. loss: 0.030 | test avg. loss: 4.356
Epochs: 20143 | epoch avg. loss: 0.080 | test avg. loss: 4.279
Epochs: 20144 | epoch avg. loss: 0.030 | test avg. loss: 4.276


 40%|████      | 20148/50000 [30:04<32:30, 15.31it/s]

Epochs: 20145 | epoch avg. loss: 0.040 | test avg. loss: 4.385
Epochs: 20146 | epoch avg. loss: 0.010 | test avg. loss: 4.438
Epochs: 20147 | epoch avg. loss: 0.011 | test avg. loss: 4.490
Epochs: 20148 | epoch avg. loss: 0.020 | test avg. loss: 4.323


 40%|████      | 20152/50000 [30:04<35:10, 14.14it/s]

Epochs: 20149 | epoch avg. loss: 0.018 | test avg. loss: 4.313
Epochs: 20150 | epoch avg. loss: 0.031 | test avg. loss: 4.264
Epochs: 20151 | epoch avg. loss: 0.012 | test avg. loss: 4.288


 40%|████      | 20154/50000 [30:05<37:40, 13.21it/s]

Epochs: 20152 | epoch avg. loss: 0.010 | test avg. loss: 4.296
Epochs: 20153 | epoch avg. loss: 0.015 | test avg. loss: 4.337
Epochs: 20154 | epoch avg. loss: 0.010 | test avg. loss: 4.420


 40%|████      | 20158/50000 [30:05<34:43, 14.32it/s]

Epochs: 20155 | epoch avg. loss: 0.010 | test avg. loss: 4.364
Epochs: 20156 | epoch avg. loss: 0.024 | test avg. loss: 4.338
Epochs: 20157 | epoch avg. loss: 0.014 | test avg. loss: 4.395
Epochs: 20158 | epoch avg. loss: 0.030 | test avg. loss: 4.364


 40%|████      | 20162/50000 [30:05<32:38, 15.23it/s]

Epochs: 20159 | epoch avg. loss: 0.009 | test avg. loss: 4.373
Epochs: 20160 | epoch avg. loss: 0.012 | test avg. loss: 4.393
Epochs: 20161 | epoch avg. loss: 0.008 | test avg. loss: 4.359
Epochs: 20162 | epoch avg. loss: 0.007 | test avg. loss: 4.311


 40%|████      | 20166/50000 [30:05<34:32, 14.39it/s]

Epochs: 20163 | epoch avg. loss: 0.008 | test avg. loss: 4.324
Epochs: 20164 | epoch avg. loss: 0.011 | test avg. loss: 4.337
Epochs: 20165 | epoch avg. loss: 0.009 | test avg. loss: 4.288




Epochs: 20166 | epoch avg. loss: 0.034 | test avg. loss: 4.336
Epochs: 20167 | epoch avg. loss: 0.024 | test avg. loss: 4.424
Epochs: 20168 | epoch avg. loss: 0.044 | test avg. loss: 4.291


 40%|████      | 20172/50000 [30:06<37:08, 13.38it/s]

Epochs: 20169 | epoch avg. loss: 0.018 | test avg. loss: 4.338
Epochs: 20170 | epoch avg. loss: 0.015 | test avg. loss: 4.409
Epochs: 20171 | epoch avg. loss: 0.018 | test avg. loss: 4.338


 40%|████      | 20174/50000 [30:06<40:09, 12.38it/s]

Epochs: 20172 | epoch avg. loss: 0.018 | test avg. loss: 4.430
Epochs: 20173 | epoch avg. loss: 0.036 | test avg. loss: 4.312
Epochs: 20174 | epoch avg. loss: 0.040 | test avg. loss: 4.319


 40%|████      | 20178/50000 [30:06<36:15, 13.71it/s]

Epochs: 20175 | epoch avg. loss: 0.099 | test avg. loss: 4.397
Epochs: 20176 | epoch avg. loss: 0.072 | test avg. loss: 4.349
Epochs: 20177 | epoch avg. loss: 0.030 | test avg. loss: 4.235


 40%|████      | 20180/50000 [30:07<38:13, 13.00it/s]

Epochs: 20178 | epoch avg. loss: 0.032 | test avg. loss: 4.334
Epochs: 20179 | epoch avg. loss: 0.041 | test avg. loss: 4.191
Epochs: 20180 | epoch avg. loss: 0.044 | test avg. loss: 4.250


 40%|████      | 20184/50000 [30:07<35:46, 13.89it/s]

Epochs: 20181 | epoch avg. loss: 0.037 | test avg. loss: 4.396
Epochs: 20182 | epoch avg. loss: 0.033 | test avg. loss: 4.381
Epochs: 20183 | epoch avg. loss: 0.060 | test avg. loss: 4.507
Epochs: 20184 | epoch avg. loss: 0.043 | test avg. loss: 4.483


 40%|████      | 20188/50000 [30:07<33:51, 14.68it/s]

Epochs: 20185 | epoch avg. loss: 0.035 | test avg. loss: 4.374
Epochs: 20186 | epoch avg. loss: 0.105 | test avg. loss: 4.459
Epochs: 20187 | epoch avg. loss: 0.075 | test avg. loss: 4.184
Epochs: 20188 | epoch avg. loss: 0.062 | test avg. loss: 4.224




Epochs: 20189 | epoch avg. loss: 0.110 | test avg. loss: 4.444
Epochs: 20190 | epoch avg. loss: 0.073 | test avg. loss: 4.443
Epochs: 20191 | epoch avg. loss: 0.018 | test avg. loss: 4.395


 40%|████      | 20194/50000 [30:08<35:13, 14.10it/s]

Epochs: 20192 | epoch avg. loss: 0.013 | test avg. loss: 4.346
Epochs: 20193 | epoch avg. loss: 0.016 | test avg. loss: 4.297
Epochs: 20194 | epoch avg. loss: 0.024 | test avg. loss: 4.359


 40%|████      | 20198/50000 [30:08<37:44, 13.16it/s]

Epochs: 20195 | epoch avg. loss: 0.008 | test avg. loss: 4.373
Epochs: 20196 | epoch avg. loss: 0.013 | test avg. loss: 4.415
Epochs: 20197 | epoch avg. loss: 0.009 | test avg. loss: 4.457


 40%|████      | 20198/50000 [30:08<37:44, 13.16it/s]

Epochs: 20198 | epoch avg. loss: 0.010 | test avg. loss: 4.378
Epochs: 20199 | epoch avg. loss: 0.013 | test avg. loss: 4.403


                                                       

Epochs: 20200 | epoch avg. loss: 0.010 | test avg. loss: 4.327
Epochs: 20201 | epoch avg. loss: 0.024 | test avg. loss: 4.308
Epochs: 20202 | epoch avg. loss: 0.019 | test avg. loss: 4.448


 40%|████      | 20206/50000 [30:10<1:08:46,  7.22it/s]

Epochs: 20203 | epoch avg. loss: 0.031 | test avg. loss: 4.349
Epochs: 20204 | epoch avg. loss: 0.020 | test avg. loss: 4.331
Epochs: 20205 | epoch avg. loss: 0.015 | test avg. loss: 4.414
Epochs: 20206 | epoch avg. loss: 0.019 | test avg. loss: 4.333


 40%|████      | 20210/50000 [30:10<50:40,  9.80it/s]

Epochs: 20207 | epoch avg. loss: 0.021 | test avg. loss: 4.361
Epochs: 20208 | epoch avg. loss: 0.014 | test avg. loss: 4.539
Epochs: 20209 | epoch avg. loss: 0.046 | test avg. loss: 4.407


 40%|████      | 20214/50000 [30:10<41:36, 11.93it/s]

Epochs: 20210 | epoch avg. loss: 0.007 | test avg. loss: 4.377
Epochs: 20211 | epoch avg. loss: 0.008 | test avg. loss: 4.414
Epochs: 20212 | epoch avg. loss: 0.017 | test avg. loss: 4.386
Epochs: 20213 | epoch avg. loss: 0.010 | test avg. loss: 4.412


 40%|████      | 20216/50000 [30:10<39:51, 12.46it/s]

Epochs: 20214 | epoch avg. loss: 0.009 | test avg. loss: 4.481
Epochs: 20215 | epoch avg. loss: 0.014 | test avg. loss: 4.397
Epochs: 20216 | epoch avg. loss: 0.026 | test avg. loss: 4.347


 40%|████      | 20220/50000 [30:11<37:48, 13.13it/s]

Epochs: 20217 | epoch avg. loss: 0.017 | test avg. loss: 4.443
Epochs: 20218 | epoch avg. loss: 0.032 | test avg. loss: 4.357
Epochs: 20219 | epoch avg. loss: 0.013 | test avg. loss: 4.353


                                                     

Epochs: 20220 | epoch avg. loss: 0.010 | test avg. loss: 4.396
Epochs: 20221 | epoch avg. loss: 0.013 | test avg. loss: 4.370
Epochs: 20222 | epoch avg. loss: 0.008 | test avg. loss: 4.391


 40%|████      | 20226/50000 [30:11<34:38, 14.33it/s]

Epochs: 20223 | epoch avg. loss: 0.007 | test avg. loss: 4.403
Epochs: 20224 | epoch avg. loss: 0.009 | test avg. loss: 4.358
Epochs: 20225 | epoch avg. loss: 0.026 | test avg. loss: 4.478
Epochs: 20226 | epoch avg. loss: 0.036 | test avg. loss: 4.405


 40%|████      | 20230/50000 [30:11<33:01, 15.02it/s]

Epochs: 20227 | epoch avg. loss: 0.017 | test avg. loss: 4.396
Epochs: 20228 | epoch avg. loss: 0.024 | test avg. loss: 4.511
Epochs: 20229 | epoch avg. loss: 0.033 | test avg. loss: 4.408
Epochs: 20230 | epoch avg. loss: 0.051 | test avg. loss: 4.407


 40%|████      | 20234/50000 [30:12<37:06, 13.37it/s]

Epochs: 20231 | epoch avg. loss: 0.036 | test avg. loss: 4.509
Epochs: 20232 | epoch avg. loss: 0.037 | test avg. loss: 4.342
Epochs: 20233 | epoch avg. loss: 0.022 | test avg. loss: 4.311


 40%|████      | 20236/50000 [30:12<40:15, 12.32it/s]

Epochs: 20234 | epoch avg. loss: 0.023 | test avg. loss: 4.576
Epochs: 20235 | epoch avg. loss: 0.115 | test avg. loss: 4.358
Epochs: 20236 | epoch avg. loss: 0.071 | test avg. loss: 4.505


 40%|████      | 20240/50000 [30:12<37:24, 13.26it/s]

Epochs: 20237 | epoch avg. loss: 0.050 | test avg. loss: 4.825
Epochs: 20238 | epoch avg. loss: 0.112 | test avg. loss: 4.480
Epochs: 20239 | epoch avg. loss: 0.034 | test avg. loss: 4.353
Epochs: 20240 | epoch avg. loss: 0.060 | test avg. loss: 4.607


 40%|████      | 20244/50000 [30:12<34:44, 14.28it/s]

Epochs: 20241 | epoch avg. loss: 0.128 | test avg. loss: 4.512
Epochs: 20242 | epoch avg. loss: 0.112 | test avg. loss: 4.643
Epochs: 20243 | epoch avg. loss: 0.087 | test avg. loss: 5.204


 40%|████      | 20248/50000 [30:13<34:44, 14.27it/s]

Epochs: 20244 | epoch avg. loss: 0.238 | test avg. loss: 4.436
Epochs: 20245 | epoch avg. loss: 0.221 | test avg. loss: 4.435
Epochs: 20246 | epoch avg. loss: 0.168 | test avg. loss: 4.847
Epochs: 20247 | epoch avg. loss: 0.321 | test avg. loss: 4.624


 41%|████      | 20252/50000 [30:13<33:47, 14.67it/s]

Epochs: 20248 | epoch avg. loss: 0.233 | test avg. loss: 4.693
Epochs: 20249 | epoch avg. loss: 0.291 | test avg. loss: 7.265
Epochs: 20250 | epoch avg. loss: 1.658 | test avg. loss: 4.558
Epochs: 20251 | epoch avg. loss: 1.336 | test avg. loss: 4.176


 41%|████      | 20254/50000 [30:13<34:33, 14.34it/s]

Epochs: 20252 | epoch avg. loss: 0.532 | test avg. loss: 5.761
Epochs: 20253 | epoch avg. loss: 1.176 | test avg. loss: 5.175
Epochs: 20254 | epoch avg. loss: 1.392 | test avg. loss: 4.869


 41%|████      | 20258/50000 [30:13<36:17, 13.66it/s]

Epochs: 20255 | epoch avg. loss: 0.921 | test avg. loss: 8.223
Epochs: 20256 | epoch avg. loss: 2.156 | test avg. loss: 6.082
Epochs: 20257 | epoch avg. loss: 1.937 | test avg. loss: 4.642


 41%|████      | 20260/50000 [30:14<35:45, 13.86it/s]

Epochs: 20258 | epoch avg. loss: 0.803 | test avg. loss: 4.672
Epochs: 20259 | epoch avg. loss: 0.457 | test avg. loss: 4.776
Epochs: 20260 | epoch avg. loss: 0.627 | test avg. loss: 5.807


 41%|████      | 20264/50000 [30:14<36:46, 13.48it/s]

Epochs: 20261 | epoch avg. loss: 0.728 | test avg. loss: 4.824
Epochs: 20262 | epoch avg. loss: 1.132 | test avg. loss: 4.439
Epochs: 20263 | epoch avg. loss: 0.766 | test avg. loss: 4.196


 41%|████      | 20266/50000 [30:14<37:41, 13.15it/s]

Epochs: 20264 | epoch avg. loss: 0.555 | test avg. loss: 4.472
Epochs: 20265 | epoch avg. loss: 0.910 | test avg. loss: 4.934
Epochs: 20266 | epoch avg. loss: 0.638 | test avg. loss: 4.474


 41%|████      | 20270/50000 [30:14<40:07, 12.35it/s]

Epochs: 20267 | epoch avg. loss: 0.755 | test avg. loss: 4.792
Epochs: 20268 | epoch avg. loss: 0.568 | test avg. loss: 4.866
Epochs: 20269 | epoch avg. loss: 0.350 | test avg. loss: 4.404


 41%|████      | 20272/50000 [30:15<43:28, 11.40it/s]

Epochs: 20270 | epoch avg. loss: 0.316 | test avg. loss: 4.952
Epochs: 20271 | epoch avg. loss: 0.669 | test avg. loss: 4.544
Epochs: 20272 | epoch avg. loss: 0.465 | test avg. loss: 4.515


 41%|████      | 20276/50000 [30:15<44:07, 11.23it/s]

Epochs: 20273 | epoch avg. loss: 0.231 | test avg. loss: 4.902
Epochs: 20274 | epoch avg. loss: 0.285 | test avg. loss: 4.547
Epochs: 20275 | epoch avg. loss: 0.379 | test avg. loss: 4.512


 41%|████      | 20278/50000 [30:15<43:45, 11.32it/s]

Epochs: 20276 | epoch avg. loss: 0.237 | test avg. loss: 4.088
Epochs: 20277 | epoch avg. loss: 0.098 | test avg. loss: 4.215


 41%|████      | 20280/50000 [30:15<46:37, 10.62it/s]

Epochs: 20278 | epoch avg. loss: 0.138 | test avg. loss: 4.312
Epochs: 20279 | epoch avg. loss: 0.132 | test avg. loss: 4.382
Epochs: 20280 | epoch avg. loss: 0.194 | test avg. loss: 4.721


 41%|████      | 20284/50000 [30:16<41:04, 12.06it/s]

Epochs: 20281 | epoch avg. loss: 0.187 | test avg. loss: 4.301
Epochs: 20282 | epoch avg. loss: 0.175 | test avg. loss: 4.474
Epochs: 20283 | epoch avg. loss: 0.384 | test avg. loss: 3.993
Epochs: 20284 | epoch avg. loss: 0.266 | test avg. loss: 4.163


 41%|████      | 20288/50000 [30:16<36:58, 13.39it/s]

Epochs: 20285 | epoch avg. loss: 0.311 | test avg. loss: 4.722
Epochs: 20286 | epoch avg. loss: 0.291 | test avg. loss: 4.581
Epochs: 20287 | epoch avg. loss: 0.233 | test avg. loss: 4.839


 41%|████      | 20290/50000 [30:16<38:20, 12.92it/s]

Epochs: 20288 | epoch avg. loss: 0.142 | test avg. loss: 4.534
Epochs: 20289 | epoch avg. loss: 0.064 | test avg. loss: 4.267
Epochs: 20290 | epoch avg. loss: 0.068 | test avg. loss: 4.379


 41%|████      | 20294/50000 [30:16<38:08, 12.98it/s]

Epochs: 20291 | epoch avg. loss: 0.111 | test avg. loss: 4.130
Epochs: 20292 | epoch avg. loss: 0.048 | test avg. loss: 4.451
Epochs: 20293 | epoch avg. loss: 0.104 | test avg. loss: 4.366


 41%|████      | 20296/50000 [30:17<38:18, 12.92it/s]

Epochs: 20294 | epoch avg. loss: 0.094 | test avg. loss: 4.588
Epochs: 20295 | epoch avg. loss: 0.050 | test avg. loss: 4.670
Epochs: 20296 | epoch avg. loss: 0.044 | test avg. loss: 4.518


 41%|████      | 20298/50000 [30:17<39:46, 12.44it/s]

Epochs: 20297 | epoch avg. loss: 0.035 | test avg. loss: 4.433
Epochs: 20298 | epoch avg. loss: 0.029 | test avg. loss: 4.296
Epochs: 20299 | epoch avg. loss: 0.068 | test avg. loss: 4.460


 41%|████      | 20304/50000 [30:19<1:32:51,  5.33it/s]

Epochs: 20300 | epoch avg. loss: 0.066 | test avg. loss: 4.352
Epochs: 20301 | epoch avg. loss: 0.040 | test avg. loss: 4.463
Epochs: 20302 | epoch avg. loss: 0.048 | test avg. loss: 4.523
Epochs: 20303 | epoch avg. loss: 0.038 | test avg. loss: 4.406


 41%|████      | 20308/50000 [30:19<1:01:06,  8.10it/s]

Epochs: 20304 | epoch avg. loss: 0.056 | test avg. loss: 4.522
Epochs: 20305 | epoch avg. loss: 0.033 | test avg. loss: 4.410
Epochs: 20306 | epoch avg. loss: 0.027 | test avg. loss: 4.441
Epochs: 20307 | epoch avg. loss: 0.023 | test avg. loss: 4.466


 41%|████      | 20310/50000 [30:19<53:23,  9.27it/s]

Epochs: 20308 | epoch avg. loss: 0.018 | test avg. loss: 4.434
Epochs: 20309 | epoch avg. loss: 0.020 | test avg. loss: 4.495
Epochs: 20310 | epoch avg. loss: 0.020 | test avg. loss: 4.366


 41%|████      | 20314/50000 [30:19<48:26, 10.21it/s]

Epochs: 20311 | epoch avg. loss: 0.016 | test avg. loss: 4.405
Epochs: 20312 | epoch avg. loss: 0.022 | test avg. loss: 4.310
Epochs: 20313 | epoch avg. loss: 0.029 | test avg. loss: 4.440


 41%|████      | 20316/50000 [30:20<48:31, 10.20it/s]

Epochs: 20314 | epoch avg. loss: 0.022 | test avg. loss: 4.464
Epochs: 20315 | epoch avg. loss: 0.023 | test avg. loss: 4.438
Epochs: 20316 | epoch avg. loss: 0.019 | test avg. loss: 4.487


 41%|████      | 20320/50000 [30:20<42:09, 11.73it/s]

Epochs: 20317 | epoch avg. loss: 0.018 | test avg. loss: 4.359
Epochs: 20318 | epoch avg. loss: 0.043 | test avg. loss: 4.462
Epochs: 20319 | epoch avg. loss: 0.055 | test avg. loss: 4.324


 41%|████      | 20322/50000 [30:20<41:09, 12.02it/s]

Epochs: 20320 | epoch avg. loss: 0.059 | test avg. loss: 4.392
Epochs: 20321 | epoch avg. loss: 0.053 | test avg. loss: 4.407
Epochs: 20322 | epoch avg. loss: 0.034 | test avg. loss: 4.362


 41%|████      | 20326/50000 [30:20<41:29, 11.92it/s]

Epochs: 20323 | epoch avg. loss: 0.026 | test avg. loss: 4.457
Epochs: 20324 | epoch avg. loss: 0.032 | test avg. loss: 4.320
Epochs: 20325 | epoch avg. loss: 0.061 | test avg. loss: 4.413


 41%|████      | 20328/50000 [30:21<41:20, 11.96it/s]

Epochs: 20326 | epoch avg. loss: 0.098 | test avg. loss: 4.375
Epochs: 20327 | epoch avg. loss: 0.031 | test avg. loss: 4.464
Epochs: 20328 | epoch avg. loss: 0.025 | test avg. loss: 4.600


 41%|████      | 20332/50000 [30:21<40:49, 12.11it/s]

Epochs: 20329 | epoch avg. loss: 0.018 | test avg. loss: 4.558
Epochs: 20330 | epoch avg. loss: 0.012 | test avg. loss: 4.477
Epochs: 20331 | epoch avg. loss: 0.011 | test avg. loss: 4.464


 41%|████      | 20334/50000 [30:21<38:06, 12.97it/s]

Epochs: 20332 | epoch avg. loss: 0.017 | test avg. loss: 4.333
Epochs: 20333 | epoch avg. loss: 0.015 | test avg. loss: 4.343
Epochs: 20334 | epoch avg. loss: 0.010 | test avg. loss: 4.391
Epochs: 20335 | epoch avg. loss: 0.009 | test avg. loss: 4.387


 41%|████      | 20338/50000 [30:21<36:03, 13.71it/s]

Epochs: 20336 | epoch avg. loss: 0.011 | test avg. loss: 4.496
Epochs: 20337 | epoch avg. loss: 0.013 | test avg. loss: 4.419
Epochs: 20338 | epoch avg. loss: 0.024 | test avg. loss: 4.405


 41%|████      | 20342/50000 [30:22<36:08, 13.68it/s]

Epochs: 20339 | epoch avg. loss: 0.009 | test avg. loss: 4.416
Epochs: 20340 | epoch avg. loss: 0.013 | test avg. loss: 4.338
Epochs: 20341 | epoch avg. loss: 0.035 | test avg. loss: 4.499




Epochs: 20342 | epoch avg. loss: 0.051 | test avg. loss: 4.404
Epochs: 20343 | epoch avg. loss: 0.025 | test avg. loss: 4.423
Epochs: 20344 | epoch avg. loss: 0.023 | test avg. loss: 4.511


 41%|████      | 20348/50000 [30:22<33:54, 14.57it/s]

Epochs: 20345 | epoch avg. loss: 0.018 | test avg. loss: 4.404
Epochs: 20346 | epoch avg. loss: 0.009 | test avg. loss: 4.394
Epochs: 20347 | epoch avg. loss: 0.007 | test avg. loss: 4.320


 41%|████      | 20350/50000 [30:22<34:45, 14.22it/s]

Epochs: 20348 | epoch avg. loss: 0.014 | test avg. loss: 4.371
Epochs: 20349 | epoch avg. loss: 0.009 | test avg. loss: 4.369
Epochs: 20350 | epoch avg. loss: 0.009 | test avg. loss: 4.444


 41%|████      | 20354/50000 [30:22<37:16, 13.25it/s]

Epochs: 20351 | epoch avg. loss: 0.008 | test avg. loss: 4.502
Epochs: 20352 | epoch avg. loss: 0.010 | test avg. loss: 4.448
Epochs: 20353 | epoch avg. loss: 0.013 | test avg. loss: 4.535


 41%|████      | 20356/50000 [30:23<38:56, 12.69it/s]

Epochs: 20354 | epoch avg. loss: 0.024 | test avg. loss: 4.397
Epochs: 20355 | epoch avg. loss: 0.028 | test avg. loss: 4.432
Epochs: 20356 | epoch avg. loss: 0.009 | test avg. loss: 4.457


 41%|████      | 20360/50000 [30:23<39:41, 12.45it/s]

Epochs: 20357 | epoch avg. loss: 0.010 | test avg. loss: 4.405
Epochs: 20358 | epoch avg. loss: 0.013 | test avg. loss: 4.512
Epochs: 20359 | epoch avg. loss: 0.024 | test avg. loss: 4.402


 41%|████      | 20364/50000 [30:23<34:51, 14.17it/s]

Epochs: 20360 | epoch avg. loss: 0.032 | test avg. loss: 4.478
Epochs: 20361 | epoch avg. loss: 0.011 | test avg. loss: 4.503
Epochs: 20362 | epoch avg. loss: 0.010 | test avg. loss: 4.486
Epochs: 20363 | epoch avg. loss: 0.010 | test avg. loss: 4.476


                                                     

Epochs: 20364 | epoch avg. loss: 0.011 | test avg. loss: 4.385
Epochs: 20365 | epoch avg. loss: 0.016 | test avg. loss: 4.427
Epochs: 20366 | epoch avg. loss: 0.009 | test avg. loss: 4.405


 41%|████      | 20370/50000 [30:24<35:52, 13.77it/s]

Epochs: 20367 | epoch avg. loss: 0.006 | test avg. loss: 4.385
Epochs: 20368 | epoch avg. loss: 0.007 | test avg. loss: 4.443
Epochs: 20369 | epoch avg. loss: 0.013 | test avg. loss: 4.386


 41%|████      | 20372/50000 [30:24<37:35, 13.14it/s]

Epochs: 20370 | epoch avg. loss: 0.006 | test avg. loss: 4.402
Epochs: 20371 | epoch avg. loss: 0.008 | test avg. loss: 4.431
Epochs: 20372 | epoch avg. loss: 0.008 | test avg. loss: 4.388


 41%|████      | 20376/50000 [30:24<35:14, 14.01it/s]

Epochs: 20373 | epoch avg. loss: 0.019 | test avg. loss: 4.451
Epochs: 20374 | epoch avg. loss: 0.006 | test avg. loss: 4.471
Epochs: 20375 | epoch avg. loss: 0.006 | test avg. loss: 4.483
Epochs: 20376 | epoch avg. loss: 0.006 | test avg. loss: 4.400


                                                     

Epochs: 20377 | epoch avg. loss: 0.019 | test avg. loss: 4.394
Epochs: 20378 | epoch avg. loss: 0.007 | test avg. loss: 4.418
Epochs: 20379 | epoch avg. loss: 0.006 | test avg. loss: 4.416


 41%|████      | 20382/50000 [30:24<35:27, 13.92it/s]

Epochs: 20380 | epoch avg. loss: 0.005 | test avg. loss: 4.434
Epochs: 20381 | epoch avg. loss: 0.006 | test avg. loss: 4.388
Epochs: 20382 | epoch avg. loss: 0.014 | test avg. loss: 4.431


 41%|████      | 20386/50000 [30:25<36:47, 13.42it/s]

Epochs: 20383 | epoch avg. loss: 0.012 | test avg. loss: 4.402
Epochs: 20384 | epoch avg. loss: 0.008 | test avg. loss: 4.381
Epochs: 20385 | epoch avg. loss: 0.007 | test avg. loss: 4.428


                                                     

Epochs: 20386 | epoch avg. loss: 0.008 | test avg. loss: 4.413
Epochs: 20387 | epoch avg. loss: 0.008 | test avg. loss: 4.417
Epochs: 20388 | epoch avg. loss: 0.008 | test avg. loss: 4.495


                                                     

Epochs: 20389 | epoch avg. loss: 0.018 | test avg. loss: 4.390
Epochs: 20390 | epoch avg. loss: 0.026 | test avg. loss: 4.455
Epochs: 20391 | epoch avg. loss: 0.023 | test avg. loss: 4.445


 41%|████      | 20394/50000 [30:25<34:09, 14.45it/s]

Epochs: 20392 | epoch avg. loss: 0.017 | test avg. loss: 4.362
Epochs: 20393 | epoch avg. loss: 0.027 | test avg. loss: 4.554
Epochs: 20394 | epoch avg. loss: 0.064 | test avg. loss: 4.404


 41%|████      | 20398/50000 [30:26<34:41, 14.22it/s]

Epochs: 20395 | epoch avg. loss: 0.039 | test avg. loss: 4.458
Epochs: 20396 | epoch avg. loss: 0.017 | test avg. loss: 4.550
Epochs: 20397 | epoch avg. loss: 0.021 | test avg. loss: 4.467


 41%|████      | 20398/50000 [30:26<34:41, 14.22it/s]

Epochs: 20398 | epoch avg. loss: 0.009 | test avg. loss: 4.457
Epochs: 20399 | epoch avg. loss: 0.007 | test avg. loss: 4.393




Epochs: 20400 | epoch avg. loss: 0.006 | test avg. loss: 4.356
Epochs: 20401 | epoch avg. loss: 0.007 | test avg. loss: 4.348
Epochs: 20402 | epoch avg. loss: 0.007 | test avg. loss: 4.380


 41%|████      | 20406/50000 [30:27<1:09:43,  7.07it/s]

Epochs: 20403 | epoch avg. loss: 0.006 | test avg. loss: 4.393
Epochs: 20404 | epoch avg. loss: 0.006 | test avg. loss: 4.453
Epochs: 20405 | epoch avg. loss: 0.010 | test avg. loss: 4.380


 41%|████      | 20408/50000 [30:28<59:17,  8.32it/s]

Epochs: 20406 | epoch avg. loss: 0.014 | test avg. loss: 4.394
Epochs: 20407 | epoch avg. loss: 0.016 | test avg. loss: 4.514
Epochs: 20408 | epoch avg. loss: 0.044 | test avg. loss: 4.314


 41%|████      | 20412/50000 [30:28<47:09, 10.46it/s]

Epochs: 20409 | epoch avg. loss: 0.049 | test avg. loss: 4.373
Epochs: 20410 | epoch avg. loss: 0.044 | test avg. loss: 4.431
Epochs: 20411 | epoch avg. loss: 0.030 | test avg. loss: 4.436


 41%|████      | 20416/50000 [30:28<38:50, 12.70it/s]

Epochs: 20412 | epoch avg. loss: 0.035 | test avg. loss: 4.515
Epochs: 20413 | epoch avg. loss: 0.019 | test avg. loss: 4.343
Epochs: 20414 | epoch avg. loss: 0.021 | test avg. loss: 4.384
Epochs: 20415 | epoch avg. loss: 0.024 | test avg. loss: 4.352


 41%|████      | 20418/50000 [30:28<36:20, 13.57it/s]

Epochs: 20416 | epoch avg. loss: 0.013 | test avg. loss: 4.339
Epochs: 20417 | epoch avg. loss: 0.013 | test avg. loss: 4.466
Epochs: 20418 | epoch avg. loss: 0.012 | test avg. loss: 4.450


 41%|████      | 20422/50000 [30:29<37:50, 13.03it/s]

Epochs: 20419 | epoch avg. loss: 0.009 | test avg. loss: 4.446
Epochs: 20420 | epoch avg. loss: 0.007 | test avg. loss: 4.401
Epochs: 20421 | epoch avg. loss: 0.008 | test avg. loss: 4.326


 41%|████      | 20424/50000 [30:29<36:54, 13.36it/s]

Epochs: 20422 | epoch avg. loss: 0.020 | test avg. loss: 4.401
Epochs: 20423 | epoch avg. loss: 0.011 | test avg. loss: 4.365
Epochs: 20424 | epoch avg. loss: 0.029 | test avg. loss: 4.438


 41%|████      | 20428/50000 [30:29<35:22, 13.93it/s]

Epochs: 20425 | epoch avg. loss: 0.022 | test avg. loss: 4.577
Epochs: 20426 | epoch avg. loss: 0.035 | test avg. loss: 4.398
Epochs: 20427 | epoch avg. loss: 0.055 | test avg. loss: 4.498
Epochs: 20428 | epoch avg. loss: 0.044 | test avg. loss: 4.498


 41%|████      | 20432/50000 [30:29<34:24, 14.32it/s]

Epochs: 20429 | epoch avg. loss: 0.032 | test avg. loss: 4.441
Epochs: 20430 | epoch avg. loss: 0.107 | test avg. loss: 4.686
Epochs: 20431 | epoch avg. loss: 0.098 | test avg. loss: 4.458


 41%|████      | 20434/50000 [30:30<34:45, 14.18it/s]

Epochs: 20432 | epoch avg. loss: 0.093 | test avg. loss: 4.440
Epochs: 20433 | epoch avg. loss: 0.077 | test avg. loss: 4.914
Epochs: 20434 | epoch avg. loss: 0.224 | test avg. loss: 4.302
Epochs: 20435 | epoch avg. loss: 0.153 | test avg. loss: 4.235


 41%|████      | 20438/50000 [30:30<37:24, 13.17it/s]

Epochs: 20436 | epoch avg. loss: 0.109 | test avg. loss: 4.534
Epochs: 20437 | epoch avg. loss: 0.155 | test avg. loss: 4.272
Epochs: 20438 | epoch avg. loss: 0.122 | test avg. loss: 4.544


 41%|████      | 20442/50000 [30:30<36:33, 13.47it/s]

Epochs: 20439 | epoch avg. loss: 0.058 | test avg. loss: 4.781
Epochs: 20440 | epoch avg. loss: 0.067 | test avg. loss: 4.646
Epochs: 20441 | epoch avg. loss: 0.138 | test avg. loss: 4.759
Epochs: 20442 | epoch avg. loss: 0.113 | test avg. loss: 4.499


 41%|████      | 20446/50000 [30:30<33:59, 14.49it/s]

Epochs: 20443 | epoch avg. loss: 0.088 | test avg. loss: 4.518
Epochs: 20444 | epoch avg. loss: 0.132 | test avg. loss: 4.981
Epochs: 20445 | epoch avg. loss: 0.252 | test avg. loss: 4.506


 41%|████      | 20448/50000 [30:31<37:05, 13.28it/s]

Epochs: 20446 | epoch avg. loss: 0.212 | test avg. loss: 4.547
Epochs: 20447 | epoch avg. loss: 0.121 | test avg. loss: 4.944
Epochs: 20448 | epoch avg. loss: 0.164 | test avg. loss: 4.593




Epochs: 20449 | epoch avg. loss: 0.209 | test avg. loss: 4.557
Epochs: 20450 | epoch avg. loss: 0.054 | test avg. loss: 4.573
Epochs: 20451 | epoch avg. loss: 0.050 | test avg. loss: 4.526


 41%|████      | 20456/50000 [30:31<33:22, 14.75it/s]

Epochs: 20452 | epoch avg. loss: 0.063 | test avg. loss: 4.717
Epochs: 20453 | epoch avg. loss: 0.098 | test avg. loss: 4.394
Epochs: 20454 | epoch avg. loss: 0.047 | test avg. loss: 4.367
Epochs: 20455 | epoch avg. loss: 0.036 | test avg. loss: 4.469


 41%|████      | 20460/50000 [30:31<32:02, 15.37it/s]

Epochs: 20456 | epoch avg. loss: 0.037 | test avg. loss: 4.422
Epochs: 20457 | epoch avg. loss: 0.084 | test avg. loss: 4.653
Epochs: 20458 | epoch avg. loss: 0.084 | test avg. loss: 4.610
Epochs: 20459 | epoch avg. loss: 0.033 | test avg. loss: 4.523


 41%|████      | 20462/50000 [30:32<35:40, 13.80it/s]

Epochs: 20460 | epoch avg. loss: 0.069 | test avg. loss: 4.696
Epochs: 20461 | epoch avg. loss: 0.049 | test avg. loss: 4.513
Epochs: 20462 | epoch avg. loss: 0.032 | test avg. loss: 4.538


 41%|████      | 20466/50000 [30:32<35:35, 13.83it/s]

Epochs: 20463 | epoch avg. loss: 0.037 | test avg. loss: 4.567
Epochs: 20464 | epoch avg. loss: 0.028 | test avg. loss: 4.457
Epochs: 20465 | epoch avg. loss: 0.056 | test avg. loss: 4.501


                                                     

Epochs: 20466 | epoch avg. loss: 0.012 | test avg. loss: 4.526
Epochs: 20467 | epoch avg. loss: 0.012 | test avg. loss: 4.481
Epochs: 20468 | epoch avg. loss: 0.035 | test avg. loss: 4.560


 41%|████      | 20472/50000 [30:32<35:48, 13.74it/s]

Epochs: 20469 | epoch avg. loss: 0.029 | test avg. loss: 4.514
Epochs: 20470 | epoch avg. loss: 0.027 | test avg. loss: 4.538
Epochs: 20471 | epoch avg. loss: 0.045 | test avg. loss: 4.740


 41%|████      | 20474/50000 [30:32<35:11, 13.99it/s]

Epochs: 20472 | epoch avg. loss: 0.081 | test avg. loss: 4.579
Epochs: 20473 | epoch avg. loss: 0.051 | test avg. loss: 4.578
Epochs: 20474 | epoch avg. loss: 0.035 | test avg. loss: 4.698


 41%|████      | 20478/50000 [30:33<40:01, 12.30it/s]

Epochs: 20475 | epoch avg. loss: 0.049 | test avg. loss: 4.453
Epochs: 20476 | epoch avg. loss: 0.040 | test avg. loss: 4.487
Epochs: 20477 | epoch avg. loss: 0.042 | test avg. loss: 4.424


 41%|████      | 20480/50000 [30:33<40:58, 12.01it/s]

Epochs: 20478 | epoch avg. loss: 0.029 | test avg. loss: 4.433
Epochs: 20479 | epoch avg. loss: 0.090 | test avg. loss: 4.664
Epochs: 20480 | epoch avg. loss: 0.040 | test avg. loss: 4.684


 41%|████      | 20484/50000 [30:33<43:53, 11.21it/s]

Epochs: 20481 | epoch avg. loss: 0.023 | test avg. loss: 4.616
Epochs: 20482 | epoch avg. loss: 0.024 | test avg. loss: 4.683
Epochs: 20483 | epoch avg. loss: 0.073 | test avg. loss: 4.416


 41%|████      | 20486/50000 [30:34<45:03, 10.92it/s]

Epochs: 20484 | epoch avg. loss: 0.045 | test avg. loss: 4.389
Epochs: 20485 | epoch avg. loss: 0.042 | test avg. loss: 4.489
Epochs: 20486 | epoch avg. loss: 0.026 | test avg. loss: 4.440


 41%|████      | 20490/50000 [30:34<41:43, 11.79it/s]

Epochs: 20487 | epoch avg. loss: 0.042 | test avg. loss: 4.512
Epochs: 20488 | epoch avg. loss: 0.028 | test avg. loss: 4.587
Epochs: 20489 | epoch avg. loss: 0.037 | test avg. loss: 4.463


 41%|████      | 20492/50000 [30:34<38:44, 12.69it/s]

Epochs: 20490 | epoch avg. loss: 0.043 | test avg. loss: 4.617
Epochs: 20491 | epoch avg. loss: 0.059 | test avg. loss: 4.600
Epochs: 20492 | epoch avg. loss: 0.035 | test avg. loss: 4.560
Epochs: 20493 | epoch avg. loss: 0.022 | test avg. loss: 4.499


 41%|████      | 20496/50000 [30:34<36:02, 13.64it/s]

Epochs: 20494 | epoch avg. loss: 0.016 | test avg. loss: 4.313
Epochs: 20495 | epoch avg. loss: 0.030 | test avg. loss: 4.322
Epochs: 20496 | epoch avg. loss: 0.015 | test avg. loss: 4.341


 41%|████      | 20498/50000 [30:34<36:54, 13.32it/s]

Epochs: 20497 | epoch avg. loss: 0.013 | test avg. loss: 4.311
Epochs: 20498 | epoch avg. loss: 0.007 | test avg. loss: 4.348
Epochs: 20499 | epoch avg. loss: 0.010 | test avg. loss: 4.305


 41%|████      | 20504/50000 [30:36<1:30:31,  5.43it/s]

Epochs: 20500 | epoch avg. loss: 0.008 | test avg. loss: 4.276
Epochs: 20501 | epoch avg. loss: 0.008 | test avg. loss: 4.349
Epochs: 20502 | epoch avg. loss: 0.021 | test avg. loss: 4.299
Epochs: 20503 | epoch avg. loss: 0.014 | test avg. loss: 4.245


 41%|████      | 20506/50000 [30:37<1:13:57,  6.65it/s]

Epochs: 20504 | epoch avg. loss: 0.025 | test avg. loss: 4.317
Epochs: 20505 | epoch avg. loss: 0.033 | test avg. loss: 4.202
Epochs: 20506 | epoch avg. loss: 0.015 | test avg. loss: 4.185


 41%|████      | 20510/50000 [30:37<54:28,  9.02it/s]  

Epochs: 20507 | epoch avg. loss: 0.021 | test avg. loss: 4.313
Epochs: 20508 | epoch avg. loss: 0.018 | test avg. loss: 4.277
Epochs: 20509 | epoch avg. loss: 0.043 | test avg. loss: 4.277


 41%|████      | 20512/50000 [30:37<49:52,  9.85it/s]

Epochs: 20510 | epoch avg. loss: 0.040 | test avg. loss: 4.544
Epochs: 20511 | epoch avg. loss: 0.086 | test avg. loss: 4.287
Epochs: 20512 | epoch avg. loss: 0.110 | test avg. loss: 4.324


 41%|████      | 20516/50000 [30:37<44:47, 10.97it/s]

Epochs: 20513 | epoch avg. loss: 0.095 | test avg. loss: 4.385
Epochs: 20514 | epoch avg. loss: 0.079 | test avg. loss: 4.187
Epochs: 20515 | epoch avg. loss: 0.105 | test avg. loss: 4.326


 41%|████      | 20518/50000 [30:37<44:46, 10.97it/s]

Epochs: 20516 | epoch avg. loss: 0.055 | test avg. loss: 4.479
Epochs: 20517 | epoch avg. loss: 0.064 | test avg. loss: 4.290
Epochs: 20518 | epoch avg. loss: 0.082 | test avg. loss: 4.409


 41%|████      | 20522/50000 [30:38<40:38, 12.09it/s]

Epochs: 20519 | epoch avg. loss: 0.087 | test avg. loss: 4.472
Epochs: 20520 | epoch avg. loss: 0.077 | test avg. loss: 4.233
Epochs: 20521 | epoch avg. loss: 0.057 | test avg. loss: 4.239
Epochs: 20522 | epoch avg. loss: 0.014 | test avg. loss: 4.271


 41%|████      | 20526/50000 [30:38<35:01, 14.03it/s]

Epochs: 20523 | epoch avg. loss: 0.015 | test avg. loss: 4.289
Epochs: 20524 | epoch avg. loss: 0.011 | test avg. loss: 4.353
Epochs: 20525 | epoch avg. loss: 0.012 | test avg. loss: 4.314
Epochs: 20526 | epoch avg. loss: 0.009 | test avg. loss: 4.257


 41%|████      | 20530/50000 [30:38<32:41, 15.02it/s]

Epochs: 20527 | epoch avg. loss: 0.007 | test avg. loss: 4.220
Epochs: 20528 | epoch avg. loss: 0.011 | test avg. loss: 4.197
Epochs: 20529 | epoch avg. loss: 0.014 | test avg. loss: 4.262
Epochs: 20530 | epoch avg. loss: 0.018 | test avg. loss: 4.287


 41%|████      | 20534/50000 [30:38<35:45, 13.73it/s]

Epochs: 20531 | epoch avg. loss: 0.013 | test avg. loss: 4.274
Epochs: 20532 | epoch avg. loss: 0.006 | test avg. loss: 4.246
Epochs: 20533 | epoch avg. loss: 0.006 | test avg. loss: 4.244


 41%|████      | 20536/50000 [30:39<37:31, 13.09it/s]

Epochs: 20534 | epoch avg. loss: 0.007 | test avg. loss: 4.225
Epochs: 20535 | epoch avg. loss: 0.014 | test avg. loss: 4.275
Epochs: 20536 | epoch avg. loss: 0.013 | test avg. loss: 4.323


 41%|████      | 20540/50000 [30:39<36:29, 13.46it/s]

Epochs: 20537 | epoch avg. loss: 0.011 | test avg. loss: 4.253
Epochs: 20538 | epoch avg. loss: 0.022 | test avg. loss: 4.320
Epochs: 20539 | epoch avg. loss: 0.024 | test avg. loss: 4.312
Epochs: 20540 | epoch avg. loss: 0.021 | test avg. loss: 4.166


 41%|████      | 20544/50000 [30:39<33:47, 14.53it/s]

Epochs: 20541 | epoch avg. loss: 0.035 | test avg. loss: 4.218
Epochs: 20542 | epoch avg. loss: 0.019 | test avg. loss: 4.308
Epochs: 20543 | epoch avg. loss: 0.017 | test avg. loss: 4.279
Epochs: 20544 | epoch avg. loss: 0.020 | test avg. loss: 4.367


 41%|████      | 20548/50000 [30:40<38:17, 12.82it/s]

Epochs: 20545 | epoch avg. loss: 0.023 | test avg. loss: 4.377
Epochs: 20546 | epoch avg. loss: 0.015 | test avg. loss: 4.254
Epochs: 20547 | epoch avg. loss: 0.029 | test avg. loss: 4.232


 41%|████      | 20550/50000 [30:40<38:32, 12.74it/s]

Epochs: 20548 | epoch avg. loss: 0.011 | test avg. loss: 4.330
Epochs: 20549 | epoch avg. loss: 0.028 | test avg. loss: 4.203
Epochs: 20550 | epoch avg. loss: 0.030 | test avg. loss: 4.263


 41%|████      | 20554/50000 [30:40<37:08, 13.21it/s]

Epochs: 20551 | epoch avg. loss: 0.021 | test avg. loss: 4.512
Epochs: 20552 | epoch avg. loss: 0.069 | test avg. loss: 4.296
Epochs: 20553 | epoch avg. loss: 0.093 | test avg. loss: 4.341


 41%|████      | 20556/50000 [30:40<37:13, 13.18it/s]

Epochs: 20554 | epoch avg. loss: 0.075 | test avg. loss: 4.314
Epochs: 20555 | epoch avg. loss: 0.047 | test avg. loss: 4.214
Epochs: 20556 | epoch avg. loss: 0.078 | test avg. loss: 4.317


 41%|████      | 20560/50000 [30:40<38:27, 12.76it/s]

Epochs: 20557 | epoch avg. loss: 0.035 | test avg. loss: 4.394
Epochs: 20558 | epoch avg. loss: 0.020 | test avg. loss: 4.338
Epochs: 20559 | epoch avg. loss: 0.036 | test avg. loss: 4.308


 41%|████      | 20562/50000 [30:41<37:38, 13.04it/s]

Epochs: 20560 | epoch avg. loss: 0.016 | test avg. loss: 4.365
Epochs: 20561 | epoch avg. loss: 0.037 | test avg. loss: 4.223
Epochs: 20562 | epoch avg. loss: 0.025 | test avg. loss: 4.237


 41%|████      | 20566/50000 [30:41<34:41, 14.14it/s]

Epochs: 20563 | epoch avg. loss: 0.020 | test avg. loss: 4.389
Epochs: 20564 | epoch avg. loss: 0.021 | test avg. loss: 4.362
Epochs: 20565 | epoch avg. loss: 0.025 | test avg. loss: 4.277
Epochs: 20566 | epoch avg. loss: 0.036 | test avg. loss: 4.329


 41%|████      | 20570/50000 [30:41<32:24, 15.14it/s]

Epochs: 20567 | epoch avg. loss: 0.021 | test avg. loss: 4.226
Epochs: 20568 | epoch avg. loss: 0.008 | test avg. loss: 4.232
Epochs: 20569 | epoch avg. loss: 0.010 | test avg. loss: 4.282
Epochs: 20570 | epoch avg. loss: 0.009 | test avg. loss: 4.328


 41%|████      | 20574/50000 [30:41<35:29, 13.82it/s]

Epochs: 20571 | epoch avg. loss: 0.007 | test avg. loss: 4.368
Epochs: 20572 | epoch avg. loss: 0.008 | test avg. loss: 4.320
Epochs: 20573 | epoch avg. loss: 0.006 | test avg. loss: 4.306


 41%|████      | 20576/50000 [30:42<38:04, 12.88it/s]

Epochs: 20574 | epoch avg. loss: 0.006 | test avg. loss: 4.280
Epochs: 20575 | epoch avg. loss: 0.005 | test avg. loss: 4.294
Epochs: 20576 | epoch avg. loss: 0.008 | test avg. loss: 4.352




Epochs: 20577 | epoch avg. loss: 0.011 | test avg. loss: 4.265
Epochs: 20578 | epoch avg. loss: 0.031 | test avg. loss: 4.285
Epochs: 20579 | epoch avg. loss: 0.011 | test avg. loss: 4.340


 41%|████      | 20582/50000 [30:42<35:34, 13.78it/s]

Epochs: 20580 | epoch avg. loss: 0.011 | test avg. loss: 4.289
Epochs: 20581 | epoch avg. loss: 0.006 | test avg. loss: 4.297
Epochs: 20582 | epoch avg. loss: 0.009 | test avg. loss: 4.227


 41%|████      | 20586/50000 [30:42<36:56, 13.27it/s]

Epochs: 20583 | epoch avg. loss: 0.030 | test avg. loss: 4.264
Epochs: 20584 | epoch avg. loss: 0.043 | test avg. loss: 4.386
Epochs: 20585 | epoch avg. loss: 0.053 | test avg. loss: 4.391


 41%|████      | 20588/50000 [30:43<40:57, 11.97it/s]

Epochs: 20586 | epoch avg. loss: 0.028 | test avg. loss: 4.241
Epochs: 20587 | epoch avg. loss: 0.084 | test avg. loss: 4.292
Epochs: 20588 | epoch avg. loss: 0.089 | test avg. loss: 4.336


 41%|████      | 20592/50000 [30:43<37:53, 12.93it/s]

Epochs: 20589 | epoch avg. loss: 0.042 | test avg. loss: 4.282
Epochs: 20590 | epoch avg. loss: 0.098 | test avg. loss: 4.360
Epochs: 20591 | epoch avg. loss: 0.095 | test avg. loss: 4.484


                                                     

Epochs: 20592 | epoch avg. loss: 0.087 | test avg. loss: 4.235
Epochs: 20593 | epoch avg. loss: 0.097 | test avg. loss: 4.424
Epochs: 20594 | epoch avg. loss: 0.049 | test avg. loss: 4.590


                                                     

Epochs: 20595 | epoch avg. loss: 0.048 | test avg. loss: 4.475
Epochs: 20596 | epoch avg. loss: 0.169 | test avg. loss: 4.442
Epochs: 20597 | epoch avg. loss: 0.296 | test avg. loss: 4.459


 41%|████      | 20598/50000 [30:43<34:39, 14.14it/s]

Epochs: 20598 | epoch avg. loss: 0.149 | test avg. loss: 4.345
Epochs: 20599 | epoch avg. loss: 0.298 | test avg. loss: 4.515


                                                       

Epochs: 20600 | epoch avg. loss: 0.389 | test avg. loss: 5.107
Epochs: 20601 | epoch avg. loss: 0.545 | test avg. loss: 4.312
Epochs: 20602 | epoch avg. loss: 0.659 | test avg. loss: 4.231




Epochs: 20603 | epoch avg. loss: 0.425 | test avg. loss: 5.608
Epochs: 20604 | epoch avg. loss: 0.753 | test avg. loss: 4.808
Epochs: 20605 | epoch avg. loss: 0.382 | test avg. loss: 4.681


 41%|████      | 20608/50000 [30:45<59:55,  8.17it/s]

Epochs: 20606 | epoch avg. loss: 0.140 | test avg. loss: 4.837
Epochs: 20607 | epoch avg. loss: 0.184 | test avg. loss: 4.519
Epochs: 20608 | epoch avg. loss: 0.683 | test avg. loss: 4.332


 41%|████      | 20612/50000 [30:46<45:38, 10.73it/s]

Epochs: 20609 | epoch avg. loss: 0.360 | test avg. loss: 5.680
Epochs: 20610 | epoch avg. loss: 0.716 | test avg. loss: 5.279
Epochs: 20611 | epoch avg. loss: 1.397 | test avg. loss: 5.136
Epochs: 20612 | epoch avg. loss: 0.942 | test avg. loss: 4.751


 41%|████      | 20616/50000 [30:46<37:57, 12.90it/s]

Epochs: 20613 | epoch avg. loss: 0.565 | test avg. loss: 4.232
Epochs: 20614 | epoch avg. loss: 0.498 | test avg. loss: 4.459
Epochs: 20615 | epoch avg. loss: 0.154 | test avg. loss: 4.701
Epochs: 20616 | epoch avg. loss: 0.351 | test avg. loss: 4.983


 41%|████      | 20620/50000 [30:46<35:04, 13.96it/s]

Epochs: 20617 | epoch avg. loss: 0.268 | test avg. loss: 5.406
Epochs: 20618 | epoch avg. loss: 0.343 | test avg. loss: 4.613
Epochs: 20619 | epoch avg. loss: 0.714 | test avg. loss: 4.700


 41%|████      | 20622/50000 [30:46<33:47, 14.49it/s]

Epochs: 20620 | epoch avg. loss: 0.411 | test avg. loss: 4.114
Epochs: 20621 | epoch avg. loss: 0.137 | test avg. loss: 4.607
Epochs: 20622 | epoch avg. loss: 0.399 | test avg. loss: 4.774


 41%|████▏     | 20626/50000 [30:47<37:06, 13.20it/s]

Epochs: 20623 | epoch avg. loss: 0.154 | test avg. loss: 4.841
Epochs: 20624 | epoch avg. loss: 0.225 | test avg. loss: 5.317
Epochs: 20625 | epoch avg. loss: 0.426 | test avg. loss: 4.478


 41%|████▏     | 20630/50000 [30:47<34:41, 14.11it/s]

Epochs: 20626 | epoch avg. loss: 0.073 | test avg. loss: 4.228
Epochs: 20627 | epoch avg. loss: 0.097 | test avg. loss: 4.358
Epochs: 20628 | epoch avg. loss: 0.136 | test avg. loss: 4.189
Epochs: 20629 | epoch avg. loss: 0.055 | test avg. loss: 4.402


 41%|████▏     | 20634/50000 [30:47<32:24, 15.10it/s]

Epochs: 20630 | epoch avg. loss: 0.112 | test avg. loss: 4.501
Epochs: 20631 | epoch avg. loss: 0.053 | test avg. loss: 4.501
Epochs: 20632 | epoch avg. loss: 0.041 | test avg. loss: 4.675
Epochs: 20633 | epoch avg. loss: 0.084 | test avg. loss: 4.330


 41%|████▏     | 20636/50000 [30:47<34:04, 14.36it/s]

Epochs: 20634 | epoch avg. loss: 0.052 | test avg. loss: 4.227
Epochs: 20635 | epoch avg. loss: 0.038 | test avg. loss: 4.315
Epochs: 20636 | epoch avg. loss: 0.034 | test avg. loss: 4.298


 41%|████▏     | 20640/50000 [30:48<33:46, 14.49it/s]

Epochs: 20637 | epoch avg. loss: 0.029 | test avg. loss: 4.405
Epochs: 20638 | epoch avg. loss: 0.020 | test avg. loss: 4.349
Epochs: 20639 | epoch avg. loss: 0.015 | test avg. loss: 4.373
Epochs: 20640 | epoch avg. loss: 0.028 | test avg. loss: 4.244


 41%|████▏     | 20644/50000 [30:48<33:42, 14.52it/s]

Epochs: 20641 | epoch avg. loss: 0.023 | test avg. loss: 4.205
Epochs: 20642 | epoch avg. loss: 0.019 | test avg. loss: 4.235
Epochs: 20643 | epoch avg. loss: 0.011 | test avg. loss: 4.330


 41%|████▏     | 20646/50000 [30:48<34:18, 14.26it/s]

Epochs: 20644 | epoch avg. loss: 0.015 | test avg. loss: 4.300
Epochs: 20645 | epoch avg. loss: 0.017 | test avg. loss: 4.302
Epochs: 20646 | epoch avg. loss: 0.013 | test avg. loss: 4.309


 41%|████▏     | 20650/50000 [30:48<34:18, 14.26it/s]

Epochs: 20647 | epoch avg. loss: 0.016 | test avg. loss: 4.219
Epochs: 20648 | epoch avg. loss: 0.022 | test avg. loss: 4.301
Epochs: 20649 | epoch avg. loss: 0.014 | test avg. loss: 4.303


 41%|████▏     | 20652/50000 [30:49<35:59, 13.59it/s]

Epochs: 20650 | epoch avg. loss: 0.012 | test avg. loss: 4.313
Epochs: 20651 | epoch avg. loss: 0.017 | test avg. loss: 4.429
Epochs: 20652 | epoch avg. loss: 0.052 | test avg. loss: 4.245
Epochs: 20653 | epoch avg. loss: 0.038 | test avg. loss: 4.249


 41%|████▏     | 20658/50000 [30:49<32:35, 15.00it/s]

Epochs: 20654 | epoch avg. loss: 0.037 | test avg. loss: 4.477
Epochs: 20655 | epoch avg. loss: 0.051 | test avg. loss: 4.310
Epochs: 20656 | epoch avg. loss: 0.060 | test avg. loss: 4.291
Epochs: 20657 | epoch avg. loss: 0.034 | test avg. loss: 4.540


 41%|████▏     | 20662/50000 [30:49<31:42, 15.42it/s]

Epochs: 20658 | epoch avg. loss: 0.120 | test avg. loss: 4.331
Epochs: 20659 | epoch avg. loss: 0.058 | test avg. loss: 4.464
Epochs: 20660 | epoch avg. loss: 0.059 | test avg. loss: 4.503
Epochs: 20661 | epoch avg. loss: 0.064 | test avg. loss: 4.283


 41%|████▏     | 20664/50000 [30:49<33:37, 14.54it/s]

Epochs: 20662 | epoch avg. loss: 0.237 | test avg. loss: 4.312
Epochs: 20663 | epoch avg. loss: 0.135 | test avg. loss: 4.344
Epochs: 20664 | epoch avg. loss: 0.073 | test avg. loss: 4.459


 41%|████▏     | 20668/50000 [30:50<36:46, 13.29it/s]

Epochs: 20665 | epoch avg. loss: 0.169 | test avg. loss: 4.764
Epochs: 20666 | epoch avg. loss: 0.144 | test avg. loss: 4.690
Epochs: 20667 | epoch avg. loss: 0.155 | test avg. loss: 4.713


 41%|████▏     | 20672/50000 [30:50<34:01, 14.36it/s]

Epochs: 20668 | epoch avg. loss: 0.099 | test avg. loss: 4.769
Epochs: 20669 | epoch avg. loss: 0.134 | test avg. loss: 4.316
Epochs: 20670 | epoch avg. loss: 0.034 | test avg. loss: 4.112
Epochs: 20671 | epoch avg. loss: 0.063 | test avg. loss: 4.319


 41%|████▏     | 20676/50000 [30:50<31:57, 15.30it/s]

Epochs: 20672 | epoch avg. loss: 0.071 | test avg. loss: 4.323
Epochs: 20673 | epoch avg. loss: 0.019 | test avg. loss: 4.530
Epochs: 20674 | epoch avg. loss: 0.030 | test avg. loss: 4.545
Epochs: 20675 | epoch avg. loss: 0.065 | test avg. loss: 4.492


 41%|████▏     | 20680/50000 [30:50<31:50, 15.35it/s]

Epochs: 20676 | epoch avg. loss: 0.081 | test avg. loss: 4.552
Epochs: 20677 | epoch avg. loss: 0.071 | test avg. loss: 4.319
Epochs: 20678 | epoch avg. loss: 0.030 | test avg. loss: 4.299
Epochs: 20679 | epoch avg. loss: 0.029 | test avg. loss: 4.456


 41%|████▏     | 20682/50000 [30:51<33:04, 14.77it/s]

Epochs: 20680 | epoch avg. loss: 0.023 | test avg. loss: 4.438
Epochs: 20681 | epoch avg. loss: 0.011 | test avg. loss: 4.472
Epochs: 20682 | epoch avg. loss: 0.011 | test avg. loss: 4.446


 41%|████▏     | 20686/50000 [30:51<33:22, 14.64it/s]

Epochs: 20683 | epoch avg. loss: 0.010 | test avg. loss: 4.355
Epochs: 20684 | epoch avg. loss: 0.008 | test avg. loss: 4.291
Epochs: 20685 | epoch avg. loss: 0.007 | test avg. loss: 4.264
Epochs: 20686 | epoch avg. loss: 0.008 | test avg. loss: 4.301


 41%|████▏     | 20690/50000 [30:51<33:22, 14.64it/s]

Epochs: 20687 | epoch avg. loss: 0.008 | test avg. loss: 4.363
Epochs: 20688 | epoch avg. loss: 0.006 | test avg. loss: 4.425
Epochs: 20689 | epoch avg. loss: 0.007 | test avg. loss: 4.491


 41%|████▏     | 20692/50000 [30:51<36:28, 13.39it/s]

Epochs: 20690 | epoch avg. loss: 0.010 | test avg. loss: 4.427
Epochs: 20691 | epoch avg. loss: 0.013 | test avg. loss: 4.361
Epochs: 20692 | epoch avg. loss: 0.011 | test avg. loss: 4.396


 41%|████▏     | 20696/50000 [30:52<39:46, 12.28it/s]

Epochs: 20693 | epoch avg. loss: 0.012 | test avg. loss: 4.322
Epochs: 20694 | epoch avg. loss: 0.013 | test avg. loss: 4.383
Epochs: 20695 | epoch avg. loss: 0.007 | test avg. loss: 4.461


 41%|████▏     | 20698/50000 [30:52<38:42, 12.61it/s]

Epochs: 20696 | epoch avg. loss: 0.011 | test avg. loss: 4.408
Epochs: 20697 | epoch avg. loss: 0.011 | test avg. loss: 4.410
Epochs: 20698 | epoch avg. loss: 0.006 | test avg. loss: 4.383
Epochs: 20699 | epoch avg. loss: 0.007 | test avg. loss: 4.343


 41%|████▏     | 20703/50000 [30:56<3:23:09,  2.40it/s]

Epochs: 20700 | epoch avg. loss: 0.019 | test avg. loss: 4.370
Epochs: 20701 | epoch avg. loss: 0.006 | test avg. loss: 4.377
Epochs: 20702 | epoch avg. loss: 0.006 | test avg. loss: 4.402


 41%|████▏     | 20705/50000 [30:56<2:29:45,  3.26it/s]

Epochs: 20703 | epoch avg. loss: 0.009 | test avg. loss: 4.437
Epochs: 20704 | epoch avg. loss: 0.010 | test avg. loss: 4.378
Epochs: 20705 | epoch avg. loss: 0.008 | test avg. loss: 4.417


 41%|████▏     | 20709/50000 [30:57<1:30:35,  5.39it/s]

Epochs: 20706 | epoch avg. loss: 0.007 | test avg. loss: 4.383
Epochs: 20707 | epoch avg. loss: 0.005 | test avg. loss: 4.388
Epochs: 20708 | epoch avg. loss: 0.005 | test avg. loss: 4.373


 41%|████▏     | 20711/50000 [30:57<1:15:46,  6.44it/s]

Epochs: 20709 | epoch avg. loss: 0.015 | test avg. loss: 4.426
Epochs: 20710 | epoch avg. loss: 0.013 | test avg. loss: 4.500
Epochs: 20711 | epoch avg. loss: 0.008 | test avg. loss: 4.434


 41%|████▏     | 20715/50000 [30:57<56:46,  8.60it/s]  

Epochs: 20712 | epoch avg. loss: 0.006 | test avg. loss: 4.369
Epochs: 20713 | epoch avg. loss: 0.007 | test avg. loss: 4.347
Epochs: 20714 | epoch avg. loss: 0.006 | test avg. loss: 4.359


 41%|████▏     | 20717/50000 [30:57<51:22,  9.50it/s]

Epochs: 20715 | epoch avg. loss: 0.008 | test avg. loss: 4.358
Epochs: 20716 | epoch avg. loss: 0.005 | test avg. loss: 4.389
Epochs: 20717 | epoch avg. loss: 0.005 | test avg. loss: 4.434


 41%|████▏     | 20721/50000 [30:58<45:45, 10.66it/s]

Epochs: 20718 | epoch avg. loss: 0.010 | test avg. loss: 4.391
Epochs: 20719 | epoch avg. loss: 0.006 | test avg. loss: 4.332
Epochs: 20720 | epoch avg. loss: 0.009 | test avg. loss: 4.433


 41%|████▏     | 20723/50000 [30:58<44:42, 10.92it/s]

Epochs: 20721 | epoch avg. loss: 0.024 | test avg. loss: 4.338
Epochs: 20722 | epoch avg. loss: 0.016 | test avg. loss: 4.369
Epochs: 20723 | epoch avg. loss: 0.013 | test avg. loss: 4.378


 41%|████▏     | 20727/50000 [30:58<43:17, 11.27it/s]

Epochs: 20724 | epoch avg. loss: 0.010 | test avg. loss: 4.401
Epochs: 20725 | epoch avg. loss: 0.018 | test avg. loss: 4.571
Epochs: 20726 | epoch avg. loss: 0.040 | test avg. loss: 4.361


 41%|████▏     | 20729/50000 [30:58<43:18, 11.26it/s]

Epochs: 20727 | epoch avg. loss: 0.082 | test avg. loss: 4.340
Epochs: 20728 | epoch avg. loss: 0.020 | test avg. loss: 4.487
Epochs: 20729 | epoch avg. loss: 0.040 | test avg. loss: 4.329


 41%|████▏     | 20733/50000 [30:59<41:01, 11.89it/s]

Epochs: 20730 | epoch avg. loss: 0.052 | test avg. loss: 4.370
Epochs: 20731 | epoch avg. loss: 0.011 | test avg. loss: 4.330
Epochs: 20732 | epoch avg. loss: 0.038 | test avg. loss: 4.355


 41%|████▏     | 20735/50000 [30:59<40:37, 12.00it/s]

Epochs: 20733 | epoch avg. loss: 0.054 | test avg. loss: 4.652
Epochs: 20734 | epoch avg. loss: 0.085 | test avg. loss: 4.533
Epochs: 20735 | epoch avg. loss: 0.019 | test avg. loss: 4.391


 41%|████▏     | 20739/50000 [30:59<39:46, 12.26it/s]

Epochs: 20736 | epoch avg. loss: 0.020 | test avg. loss: 4.526
Epochs: 20737 | epoch avg. loss: 0.040 | test avg. loss: 4.399
Epochs: 20738 | epoch avg. loss: 0.020 | test avg. loss: 4.348


 41%|████▏     | 20741/50000 [30:59<40:07, 12.15it/s]

Epochs: 20739 | epoch avg. loss: 0.025 | test avg. loss: 4.460
Epochs: 20740 | epoch avg. loss: 0.046 | test avg. loss: 4.315
Epochs: 20741 | epoch avg. loss: 0.010 | test avg. loss: 4.298




Epochs: 20742 | epoch avg. loss: 0.017 | test avg. loss: 4.434
Epochs: 20743 | epoch avg. loss: 0.056 | test avg. loss: 4.386
Epochs: 20744 | epoch avg. loss: 0.019 | test avg. loss: 4.439


 41%|████▏     | 20749/50000 [31:00<35:14, 13.83it/s]

Epochs: 20745 | epoch avg. loss: 0.017 | test avg. loss: 4.529
Epochs: 20746 | epoch avg. loss: 0.019 | test avg. loss: 4.474
Epochs: 20747 | epoch avg. loss: 0.008 | test avg. loss: 4.401
Epochs: 20748 | epoch avg. loss: 0.008 | test avg. loss: 4.442


 42%|████▏     | 20753/50000 [31:00<33:04, 14.74it/s]

Epochs: 20749 | epoch avg. loss: 0.024 | test avg. loss: 4.331
Epochs: 20750 | epoch avg. loss: 0.007 | test avg. loss: 4.385
Epochs: 20751 | epoch avg. loss: 0.017 | test avg. loss: 4.397
Epochs: 20752 | epoch avg. loss: 0.009 | test avg. loss: 4.381


 42%|████▏     | 20755/50000 [31:00<32:59, 14.77it/s]

Epochs: 20753 | epoch avg. loss: 0.011 | test avg. loss: 4.480
Epochs: 20754 | epoch avg. loss: 0.019 | test avg. loss: 4.459
Epochs: 20755 | epoch avg. loss: 0.009 | test avg. loss: 4.367


 42%|████▏     | 20759/50000 [31:01<37:20, 13.05it/s]

Epochs: 20756 | epoch avg. loss: 0.031 | test avg. loss: 4.387
Epochs: 20757 | epoch avg. loss: 0.013 | test avg. loss: 4.482
Epochs: 20758 | epoch avg. loss: 0.014 | test avg. loss: 4.467


 42%|████▏     | 20763/50000 [31:01<34:46, 14.01it/s]

Epochs: 20759 | epoch avg. loss: 0.010 | test avg. loss: 4.417
Epochs: 20760 | epoch avg. loss: 0.005 | test avg. loss: 4.407
Epochs: 20761 | epoch avg. loss: 0.007 | test avg. loss: 4.444
Epochs: 20762 | epoch avg. loss: 0.016 | test avg. loss: 4.389


 42%|████▏     | 20767/50000 [31:01<32:48, 14.85it/s]

Epochs: 20763 | epoch avg. loss: 0.006 | test avg. loss: 4.342
Epochs: 20764 | epoch avg. loss: 0.015 | test avg. loss: 4.367
Epochs: 20765 | epoch avg. loss: 0.007 | test avg. loss: 4.436
Epochs: 20766 | epoch avg. loss: 0.007 | test avg. loss: 4.446


 42%|████▏     | 20769/50000 [31:01<32:36, 14.94it/s]

Epochs: 20767 | epoch avg. loss: 0.010 | test avg. loss: 4.442
Epochs: 20768 | epoch avg. loss: 0.022 | test avg. loss: 4.483
Epochs: 20769 | epoch avg. loss: 0.012 | test avg. loss: 4.487


 42%|████▏     | 20773/50000 [31:02<38:18, 12.71it/s]

Epochs: 20770 | epoch avg. loss: 0.014 | test avg. loss: 4.386
Epochs: 20771 | epoch avg. loss: 0.018 | test avg. loss: 4.384
Epochs: 20772 | epoch avg. loss: 0.009 | test avg. loss: 4.492


 42%|████▏     | 20775/50000 [31:02<38:15, 12.73it/s]

Epochs: 20773 | epoch avg. loss: 0.026 | test avg. loss: 4.358
Epochs: 20774 | epoch avg. loss: 0.037 | test avg. loss: 4.339
Epochs: 20775 | epoch avg. loss: 0.021 | test avg. loss: 4.538


 42%|████▏     | 20779/50000 [31:02<37:22, 13.03it/s]

Epochs: 20776 | epoch avg. loss: 0.068 | test avg. loss: 4.401
Epochs: 20777 | epoch avg. loss: 0.024 | test avg. loss: 4.510
Epochs: 20778 | epoch avg. loss: 0.013 | test avg. loss: 4.602
Epochs: 20779 | epoch avg. loss: 0.013 | test avg. loss: 4.488


 42%|████▏     | 20783/50000 [31:02<34:33, 14.09it/s]

Epochs: 20780 | epoch avg. loss: 0.018 | test avg. loss: 4.375
Epochs: 20781 | epoch avg. loss: 0.014 | test avg. loss: 4.484
Epochs: 20782 | epoch avg. loss: 0.055 | test avg. loss: 4.322


 42%|████▏     | 20785/50000 [31:03<35:40, 13.65it/s]

Epochs: 20783 | epoch avg. loss: 0.041 | test avg. loss: 4.367
Epochs: 20784 | epoch avg. loss: 0.053 | test avg. loss: 4.602
Epochs: 20785 | epoch avg. loss: 0.101 | test avg. loss: 4.542


 42%|████▏     | 20789/50000 [31:03<34:41, 14.04it/s]

Epochs: 20786 | epoch avg. loss: 0.038 | test avg. loss: 4.437
Epochs: 20787 | epoch avg. loss: 0.125 | test avg. loss: 4.396
Epochs: 20788 | epoch avg. loss: 0.057 | test avg. loss: 4.446
Epochs: 20789 | epoch avg. loss: 0.070 | test avg. loss: 4.346


 42%|████▏     | 20793/50000 [31:03<33:55, 14.35it/s]

Epochs: 20790 | epoch avg. loss: 0.218 | test avg. loss: 4.420
Epochs: 20791 | epoch avg. loss: 0.083 | test avg. loss: 4.736
Epochs: 20792 | epoch avg. loss: 0.085 | test avg. loss: 4.645


 42%|████▏     | 20795/50000 [31:03<33:35, 14.49it/s]

Epochs: 20793 | epoch avg. loss: 0.055 | test avg. loss: 4.657
Epochs: 20794 | epoch avg. loss: 0.023 | test avg. loss: 4.615
Epochs: 20795 | epoch avg. loss: 0.036 | test avg. loss: 4.370


 42%|████▏     | 20799/50000 [31:04<35:21, 13.77it/s]

Epochs: 20796 | epoch avg. loss: 0.037 | test avg. loss: 4.316
Epochs: 20797 | epoch avg. loss: 0.041 | test avg. loss: 4.424
Epochs: 20798 | epoch avg. loss: 0.024 | test avg. loss: 4.498


 42%|████▏     | 20799/50000 [31:04<35:21, 13.77it/s]

Epochs: 20799 | epoch avg. loss: 0.013 | test avg. loss: 4.540


 42%|████▏     | 20803/50000 [31:05<1:58:37,  4.10it/s]

Epochs: 20800 | epoch avg. loss: 0.011 | test avg. loss: 4.522
Epochs: 20801 | epoch avg. loss: 0.013 | test avg. loss: 4.468
Epochs: 20802 | epoch avg. loss: 0.010 | test avg. loss: 4.416


 42%|████▏     | 20805/50000 [31:06<1:36:03,  5.07it/s]

Epochs: 20803 | epoch avg. loss: 0.016 | test avg. loss: 4.509
Epochs: 20804 | epoch avg. loss: 0.014 | test avg. loss: 4.548
Epochs: 20805 | epoch avg. loss: 0.008 | test avg. loss: 4.601


 42%|████▏     | 20809/50000 [31:06<1:05:53,  7.38it/s]

Epochs: 20806 | epoch avg. loss: 0.033 | test avg. loss: 4.725
Epochs: 20807 | epoch avg. loss: 0.093 | test avg. loss: 4.313
Epochs: 20808 | epoch avg. loss: 0.021 | test avg. loss: 4.241


 42%|████▏     | 20811/50000 [31:06<56:11,  8.66it/s]

Epochs: 20809 | epoch avg. loss: 0.026 | test avg. loss: 4.393
Epochs: 20810 | epoch avg. loss: 0.021 | test avg. loss: 4.521
Epochs: 20811 | epoch avg. loss: 0.084 | test avg. loss: 4.846
Epochs: 20812 | epoch avg. loss: 0.107 | test avg. loss: 4.999


 42%|████▏     | 20815/50000 [31:06<44:32, 10.92it/s]

Epochs: 20813 | epoch avg. loss: 0.103 | test avg. loss: 4.549
Epochs: 20814 | epoch avg. loss: 0.224 | test avg. loss: 4.311
Epochs: 20815 | epoch avg. loss: 0.054 | test avg. loss: 4.461


 42%|████▏     | 20819/50000 [31:07<41:29, 11.72it/s]

Epochs: 20816 | epoch avg. loss: 0.064 | test avg. loss: 4.271
Epochs: 20817 | epoch avg. loss: 0.061 | test avg. loss: 4.408
Epochs: 20818 | epoch avg. loss: 0.029 | test avg. loss: 4.661


 42%|████▏     | 20821/50000 [31:07<40:47, 11.92it/s]

Epochs: 20819 | epoch avg. loss: 0.057 | test avg. loss: 4.638
Epochs: 20820 | epoch avg. loss: 0.025 | test avg. loss: 4.537
Epochs: 20821 | epoch avg. loss: 0.067 | test avg. loss: 4.532


 42%|████▏     | 20825/50000 [31:07<35:59, 13.51it/s]

Epochs: 20822 | epoch avg. loss: 0.032 | test avg. loss: 4.684
Epochs: 20823 | epoch avg. loss: 0.065 | test avg. loss: 4.475
Epochs: 20824 | epoch avg. loss: 0.058 | test avg. loss: 4.533
Epochs: 20825 | epoch avg. loss: 0.029 | test avg. loss: 4.743


 42%|████▏     | 20829/50000 [31:07<33:57, 14.32it/s]

Epochs: 20826 | epoch avg. loss: 0.052 | test avg. loss: 4.648
Epochs: 20827 | epoch avg. loss: 0.030 | test avg. loss: 4.612
Epochs: 20828 | epoch avg. loss: 0.042 | test avg. loss: 4.673


 42%|████▏     | 20833/50000 [31:08<33:50, 14.36it/s]

Epochs: 20829 | epoch avg. loss: 0.029 | test avg. loss: 4.474
Epochs: 20830 | epoch avg. loss: 0.051 | test avg. loss: 4.426
Epochs: 20831 | epoch avg. loss: 0.057 | test avg. loss: 4.570
Epochs: 20832 | epoch avg. loss: 0.047 | test avg. loss: 4.603


 42%|████▏     | 20835/50000 [31:08<32:47, 14.82it/s]

Epochs: 20833 | epoch avg. loss: 0.017 | test avg. loss: 4.591
Epochs: 20834 | epoch avg. loss: 0.023 | test avg. loss: 4.628
Epochs: 20835 | epoch avg. loss: 0.027 | test avg. loss: 4.619


 42%|████▏     | 20839/50000 [31:08<32:44, 14.84it/s]

Epochs: 20836 | epoch avg. loss: 0.028 | test avg. loss: 4.443
Epochs: 20837 | epoch avg. loss: 0.038 | test avg. loss: 4.435
Epochs: 20838 | epoch avg. loss: 0.022 | test avg. loss: 4.603
Epochs: 20839 | epoch avg. loss: 0.042 | test avg. loss: 4.534


 42%|████▏     | 20843/50000 [31:08<32:16, 15.06it/s]

Epochs: 20840 | epoch avg. loss: 0.011 | test avg. loss: 4.542
Epochs: 20841 | epoch avg. loss: 0.011 | test avg. loss: 4.599
Epochs: 20842 | epoch avg. loss: 0.021 | test avg. loss: 4.428


 42%|████▏     | 20845/50000 [31:09<38:10, 12.73it/s]

Epochs: 20843 | epoch avg. loss: 0.038 | test avg. loss: 4.452
Epochs: 20844 | epoch avg. loss: 0.015 | test avg. loss: 4.578
Epochs: 20845 | epoch avg. loss: 0.015 | test avg. loss: 4.556


 42%|████▏     | 20849/50000 [31:09<34:42, 14.00it/s]

Epochs: 20846 | epoch avg. loss: 0.015 | test avg. loss: 4.492
Epochs: 20847 | epoch avg. loss: 0.045 | test avg. loss: 4.532
Epochs: 20848 | epoch avg. loss: 0.039 | test avg. loss: 4.581
Epochs: 20849 | epoch avg. loss: 0.041 | test avg. loss: 4.393


 42%|████▏     | 20853/50000 [31:09<34:27, 14.10it/s]

Epochs: 20850 | epoch avg. loss: 0.063 | test avg. loss: 4.412
Epochs: 20851 | epoch avg. loss: 0.021 | test avg. loss: 4.650
Epochs: 20852 | epoch avg. loss: 0.119 | test avg. loss: 4.502


 42%|████▏     | 20855/50000 [31:09<33:27, 14.52it/s]

Epochs: 20853 | epoch avg. loss: 0.048 | test avg. loss: 4.601
Epochs: 20854 | epoch avg. loss: 0.081 | test avg. loss: 4.898
Epochs: 20855 | epoch avg. loss: 0.198 | test avg. loss: 4.747


 42%|████▏     | 20859/50000 [31:10<37:42, 12.88it/s]

Epochs: 20856 | epoch avg. loss: 0.062 | test avg. loss: 4.514
Epochs: 20857 | epoch avg. loss: 0.182 | test avg. loss: 4.443
Epochs: 20858 | epoch avg. loss: 0.069 | test avg. loss: 4.558




Epochs: 20859 | epoch avg. loss: 0.061 | test avg. loss: 4.720
Epochs: 20860 | epoch avg. loss: 0.437 | test avg. loss: 4.550
Epochs: 20861 | epoch avg. loss: 0.140 | test avg. loss: 5.427


 42%|████▏     | 20865/50000 [31:10<34:33, 14.05it/s]

Epochs: 20862 | epoch avg. loss: 0.564 | test avg. loss: 4.370
Epochs: 20863 | epoch avg. loss: 0.147 | test avg. loss: 4.437
Epochs: 20864 | epoch avg. loss: 0.119 | test avg. loss: 4.794


 42%|████▏     | 20867/50000 [31:10<36:30, 13.30it/s]

Epochs: 20865 | epoch avg. loss: 0.160 | test avg. loss: 4.972
Epochs: 20866 | epoch avg. loss: 0.114 | test avg. loss: 4.649
Epochs: 20867 | epoch avg. loss: 0.199 | test avg. loss: 4.559


 42%|████▏     | 20871/50000 [31:10<38:57, 12.46it/s]

Epochs: 20868 | epoch avg. loss: 0.136 | test avg. loss: 4.665
Epochs: 20869 | epoch avg. loss: 0.150 | test avg. loss: 4.427
Epochs: 20870 | epoch avg. loss: 0.126 | test avg. loss: 4.599


 42%|████▏     | 20873/50000 [31:11<41:19, 11.75it/s]

Epochs: 20871 | epoch avg. loss: 0.211 | test avg. loss: 4.784
Epochs: 20872 | epoch avg. loss: 0.128 | test avg. loss: 4.818
Epochs: 20873 | epoch avg. loss: 0.124 | test avg. loss: 4.530


 42%|████▏     | 20877/50000 [31:11<45:01, 10.78it/s]

Epochs: 20874 | epoch avg. loss: 0.028 | test avg. loss: 4.541
Epochs: 20875 | epoch avg. loss: 0.031 | test avg. loss: 4.967
Epochs: 20876 | epoch avg. loss: 0.154 | test avg. loss: 4.707


 42%|████▏     | 20879/50000 [31:11<44:19, 10.95it/s]

Epochs: 20877 | epoch avg. loss: 0.061 | test avg. loss: 4.721
Epochs: 20878 | epoch avg. loss: 0.174 | test avg. loss: 4.903
Epochs: 20879 | epoch avg. loss: 0.055 | test avg. loss: 4.912


 42%|████▏     | 20883/50000 [31:12<43:51, 11.06it/s]

Epochs: 20880 | epoch avg. loss: 0.031 | test avg. loss: 4.644
Epochs: 20881 | epoch avg. loss: 0.012 | test avg. loss: 4.548
Epochs: 20882 | epoch avg. loss: 0.019 | test avg. loss: 4.441


 42%|████▏     | 20885/50000 [31:12<40:52, 11.87it/s]

Epochs: 20883 | epoch avg. loss: 0.013 | test avg. loss: 4.337
Epochs: 20884 | epoch avg. loss: 0.047 | test avg. loss: 4.324
Epochs: 20885 | epoch avg. loss: 0.020 | test avg. loss: 4.448


 42%|████▏     | 20889/50000 [31:12<36:35, 13.26it/s]

Epochs: 20886 | epoch avg. loss: 0.058 | test avg. loss: 4.252
Epochs: 20887 | epoch avg. loss: 0.041 | test avg. loss: 4.227
Epochs: 20888 | epoch avg. loss: 0.097 | test avg. loss: 4.325
Epochs: 20889 | epoch avg. loss: 0.016 | test avg. loss: 4.407


 42%|████▏     | 20893/50000 [31:12<35:55, 13.50it/s]

Epochs: 20890 | epoch avg. loss: 0.025 | test avg. loss: 4.373
Epochs: 20891 | epoch avg. loss: 0.041 | test avg. loss: 4.448
Epochs: 20892 | epoch avg. loss: 0.078 | test avg. loss: 4.505


 42%|████▏     | 20895/50000 [31:13<41:08, 11.79it/s]

Epochs: 20893 | epoch avg. loss: 0.107 | test avg. loss: 4.264
Epochs: 20894 | epoch avg. loss: 0.067 | test avg. loss: 4.332
Epochs: 20895 | epoch avg. loss: 0.127 | test avg. loss: 4.349


 42%|████▏     | 20899/50000 [31:13<39:24, 12.31it/s]

Epochs: 20896 | epoch avg. loss: 0.030 | test avg. loss: 4.448
Epochs: 20897 | epoch avg. loss: 0.075 | test avg. loss: 4.217
Epochs: 20898 | epoch avg. loss: 0.036 | test avg. loss: 4.306


 42%|████▏     | 20899/50000 [31:13<39:24, 12.31it/s]

Epochs: 20899 | epoch avg. loss: 0.045 | test avg. loss: 4.503


 42%|████▏     | 20903/50000 [31:15<1:59:38,  4.05it/s]

Epochs: 20900 | epoch avg. loss: 0.076 | test avg. loss: 4.653
Epochs: 20901 | epoch avg. loss: 0.144 | test avg. loss: 4.211
Epochs: 20902 | epoch avg. loss: 0.097 | test avg. loss: 4.147


 42%|████▏     | 20907/50000 [31:15<1:15:06,  6.46it/s]

Epochs: 20903 | epoch avg. loss: 0.071 | test avg. loss: 4.665
Epochs: 20904 | epoch avg. loss: 0.198 | test avg. loss: 4.183
Epochs: 20905 | epoch avg. loss: 0.095 | test avg. loss: 4.214
Epochs: 20906 | epoch avg. loss: 0.055 | test avg. loss: 4.509


 42%|████▏     | 20909/50000 [31:15<1:01:48,  7.84it/s]

Epochs: 20907 | epoch avg. loss: 0.142 | test avg. loss: 4.403
Epochs: 20908 | epoch avg. loss: 0.033 | test avg. loss: 4.265
Epochs: 20909 | epoch avg. loss: 0.057 | test avg. loss: 4.297
Epochs: 20910 | epoch avg. loss: 0.080 | test avg. loss: 4.504


 42%|████▏     | 20913/50000 [31:15<47:29, 10.21it/s]

Epochs: 20911 | epoch avg. loss: 0.178 | test avg. loss: 4.232
Epochs: 20912 | epoch avg. loss: 0.113 | test avg. loss: 4.445
Epochs: 20913 | epoch avg. loss: 0.086 | test avg. loss: 4.561


 42%|████▏     | 20917/50000 [31:16<45:33, 10.64it/s]

Epochs: 20914 | epoch avg. loss: 0.032 | test avg. loss: 4.329
Epochs: 20915 | epoch avg. loss: 0.022 | test avg. loss: 4.151
Epochs: 20916 | epoch avg. loss: 0.037 | test avg. loss: 4.134


 42%|████▏     | 20919/50000 [31:16<43:22, 11.17it/s]

Epochs: 20917 | epoch avg. loss: 0.037 | test avg. loss: 4.261
Epochs: 20918 | epoch avg. loss: 0.022 | test avg. loss: 4.381
Epochs: 20919 | epoch avg. loss: 0.016 | test avg. loss: 4.384


 42%|████▏     | 20923/50000 [31:16<42:57, 11.28it/s]

Epochs: 20920 | epoch avg. loss: 0.011 | test avg. loss: 4.285
Epochs: 20921 | epoch avg. loss: 0.031 | test avg. loss: 4.302
Epochs: 20922 | epoch avg. loss: 0.029 | test avg. loss: 4.372


 42%|████▏     | 20925/50000 [31:17<45:31, 10.64it/s]

Epochs: 20923 | epoch avg. loss: 0.037 | test avg. loss: 4.266
Epochs: 20924 | epoch avg. loss: 0.095 | test avg. loss: 4.341
Epochs: 20925 | epoch avg. loss: 0.093 | test avg. loss: 4.422


 42%|████▏     | 20929/50000 [31:17<42:03, 11.52it/s]

Epochs: 20926 | epoch avg. loss: 0.046 | test avg. loss: 4.416
Epochs: 20927 | epoch avg. loss: 0.024 | test avg. loss: 4.326
Epochs: 20928 | epoch avg. loss: 0.045 | test avg. loss: 4.314


 42%|████▏     | 20931/50000 [31:17<40:01, 12.11it/s]

Epochs: 20929 | epoch avg. loss: 0.019 | test avg. loss: 4.330
Epochs: 20930 | epoch avg. loss: 0.014 | test avg. loss: 4.364
Epochs: 20931 | epoch avg. loss: 0.018 | test avg. loss: 4.311


 42%|████▏     | 20935/50000 [31:17<40:04, 12.09it/s]

Epochs: 20932 | epoch avg. loss: 0.014 | test avg. loss: 4.300
Epochs: 20933 | epoch avg. loss: 0.030 | test avg. loss: 4.294
Epochs: 20934 | epoch avg. loss: 0.016 | test avg. loss: 4.385


 42%|████▏     | 20937/50000 [31:18<42:35, 11.37it/s]

Epochs: 20935 | epoch avg. loss: 0.026 | test avg. loss: 4.452
Epochs: 20936 | epoch avg. loss: 0.019 | test avg. loss: 4.357
Epochs: 20937 | epoch avg. loss: 0.019 | test avg. loss: 4.337


 42%|████▏     | 20941/50000 [31:18<40:16, 12.02it/s]

Epochs: 20938 | epoch avg. loss: 0.013 | test avg. loss: 4.433
Epochs: 20939 | epoch avg. loss: 0.032 | test avg. loss: 4.403
Epochs: 20940 | epoch avg. loss: 0.011 | test avg. loss: 4.377


 42%|████▏     | 20943/50000 [31:18<40:01, 12.10it/s]

Epochs: 20941 | epoch avg. loss: 0.010 | test avg. loss: 4.410
Epochs: 20942 | epoch avg. loss: 0.014 | test avg. loss: 4.418
Epochs: 20943 | epoch avg. loss: 0.014 | test avg. loss: 4.332


 42%|████▏     | 20947/50000 [31:18<41:05, 11.78it/s]

Epochs: 20944 | epoch avg. loss: 0.008 | test avg. loss: 4.283
Epochs: 20945 | epoch avg. loss: 0.009 | test avg. loss: 4.325
Epochs: 20946 | epoch avg. loss: 0.014 | test avg. loss: 4.356


 42%|████▏     | 20949/50000 [31:19<44:17, 10.93it/s]

Epochs: 20947 | epoch avg. loss: 0.010 | test avg. loss: 4.358
Epochs: 20948 | epoch avg. loss: 0.042 | test avg. loss: 4.434
Epochs: 20949 | epoch avg. loss: 0.019 | test avg. loss: 4.472


 42%|████▏     | 20953/50000 [31:19<40:38, 11.91it/s]

Epochs: 20950 | epoch avg. loss: 0.016 | test avg. loss: 4.331
Epochs: 20951 | epoch avg. loss: 0.054 | test avg. loss: 4.284
Epochs: 20952 | epoch avg. loss: 0.032 | test avg. loss: 4.434


 42%|████▏     | 20955/50000 [31:19<40:34, 11.93it/s]

Epochs: 20953 | epoch avg. loss: 0.074 | test avg. loss: 4.376
Epochs: 20954 | epoch avg. loss: 0.108 | test avg. loss: 4.639
Epochs: 20955 | epoch avg. loss: 0.105 | test avg. loss: 4.706


 42%|████▏     | 20959/50000 [31:19<38:12, 12.67it/s]

Epochs: 20956 | epoch avg. loss: 0.029 | test avg. loss: 4.429
Epochs: 20957 | epoch avg. loss: 0.023 | test avg. loss: 4.161
Epochs: 20958 | epoch avg. loss: 0.141 | test avg. loss: 4.229


 42%|████▏     | 20961/50000 [31:20<38:59, 12.41it/s]

Epochs: 20959 | epoch avg. loss: 0.071 | test avg. loss: 4.465
Epochs: 20960 | epoch avg. loss: 0.098 | test avg. loss: 4.640
Epochs: 20961 | epoch avg. loss: 0.063 | test avg. loss: 4.500


 42%|████▏     | 20965/50000 [31:20<38:18, 12.63it/s]

Epochs: 20962 | epoch avg. loss: 0.093 | test avg. loss: 4.454
Epochs: 20963 | epoch avg. loss: 0.121 | test avg. loss: 4.414
Epochs: 20964 | epoch avg. loss: 0.025 | test avg. loss: 4.493


 42%|████▏     | 20967/50000 [31:20<37:15, 12.99it/s]

Epochs: 20965 | epoch avg. loss: 0.032 | test avg. loss: 4.397
Epochs: 20966 | epoch avg. loss: 0.072 | test avg. loss: 4.541
Epochs: 20967 | epoch avg. loss: 0.041 | test avg. loss: 4.653


 42%|████▏     | 20971/50000 [31:20<35:23, 13.67it/s]

Epochs: 20968 | epoch avg. loss: 0.043 | test avg. loss: 4.511
Epochs: 20969 | epoch avg. loss: 0.222 | test avg. loss: 4.417
Epochs: 20970 | epoch avg. loss: 0.077 | test avg. loss: 4.549


 42%|████▏     | 20973/50000 [31:20<35:47, 13.52it/s]

Epochs: 20971 | epoch avg. loss: 0.082 | test avg. loss: 4.521
Epochs: 20972 | epoch avg. loss: 0.029 | test avg. loss: 4.416
Epochs: 20973 | epoch avg. loss: 0.051 | test avg. loss: 4.437


 42%|████▏     | 20977/50000 [31:21<37:48, 12.80it/s]

Epochs: 20974 | epoch avg. loss: 0.035 | test avg. loss: 4.592
Epochs: 20975 | epoch avg. loss: 0.115 | test avg. loss: 4.384
Epochs: 20976 | epoch avg. loss: 0.052 | test avg. loss: 4.487


 42%|████▏     | 20979/50000 [31:21<37:34, 12.87it/s]

Epochs: 20977 | epoch avg. loss: 0.187 | test avg. loss: 4.592
Epochs: 20978 | epoch avg. loss: 0.046 | test avg. loss: 4.839
Epochs: 20979 | epoch avg. loss: 0.114 | test avg. loss: 4.367


 42%|████▏     | 20983/50000 [31:21<38:27, 12.58it/s]

Epochs: 20980 | epoch avg. loss: 0.034 | test avg. loss: 4.231
Epochs: 20981 | epoch avg. loss: 0.035 | test avg. loss: 4.322
Epochs: 20982 | epoch avg. loss: 0.016 | test avg. loss: 4.370


 42%|████▏     | 20985/50000 [31:21<39:35, 12.21it/s]

Epochs: 20983 | epoch avg. loss: 0.012 | test avg. loss: 4.534
Epochs: 20984 | epoch avg. loss: 0.018 | test avg. loss: 4.605
Epochs: 20985 | epoch avg. loss: 0.037 | test avg. loss: 4.449


 42%|████▏     | 20989/50000 [31:22<41:51, 11.55it/s]

Epochs: 20986 | epoch avg. loss: 0.010 | test avg. loss: 4.331
Epochs: 20987 | epoch avg. loss: 0.018 | test avg. loss: 4.393
Epochs: 20988 | epoch avg. loss: 0.015 | test avg. loss: 4.459


 42%|████▏     | 20991/50000 [31:22<40:43, 11.87it/s]

Epochs: 20989 | epoch avg. loss: 0.010 | test avg. loss: 4.444
Epochs: 20990 | epoch avg. loss: 0.014 | test avg. loss: 4.504
Epochs: 20991 | epoch avg. loss: 0.012 | test avg. loss: 4.576


 42%|████▏     | 20995/50000 [31:22<41:08, 11.75it/s]

Epochs: 20992 | epoch avg. loss: 0.036 | test avg. loss: 4.445
Epochs: 20993 | epoch avg. loss: 0.009 | test avg. loss: 4.360
Epochs: 20994 | epoch avg. loss: 0.024 | test avg. loss: 4.386


 42%|████▏     | 20997/50000 [31:22<42:16, 11.43it/s]

Epochs: 20995 | epoch avg. loss: 0.028 | test avg. loss: 4.521
Epochs: 20996 | epoch avg. loss: 0.010 | test avg. loss: 4.595
Epochs: 20997 | epoch avg. loss: 0.022 | test avg. loss: 4.504


 42%|████▏     | 20999/50000 [31:23<41:22, 11.68it/s]

Epochs: 20998 | epoch avg. loss: 0.008 | test avg. loss: 4.359
Epochs: 20999 | epoch avg. loss: 0.015 | test avg. loss: 4.346


 42%|████▏     | 21003/50000 [31:24<1:52:29,  4.30it/s]

Epochs: 21000 | epoch avg. loss: 0.013 | test avg. loss: 4.371
Epochs: 21001 | epoch avg. loss: 0.018 | test avg. loss: 4.428
Epochs: 21002 | epoch avg. loss: 0.008 | test avg. loss: 4.485


 42%|████▏     | 21005/50000 [31:25<1:32:04,  5.25it/s]

Epochs: 21003 | epoch avg. loss: 0.006 | test avg. loss: 4.507
Epochs: 21004 | epoch avg. loss: 0.010 | test avg. loss: 4.479
Epochs: 21005 | epoch avg. loss: 0.011 | test avg. loss: 4.387


 42%|████▏     | 21009/50000 [31:25<1:05:32,  7.37it/s]

Epochs: 21006 | epoch avg. loss: 0.006 | test avg. loss: 4.350
Epochs: 21007 | epoch avg. loss: 0.006 | test avg. loss: 4.364
Epochs: 21008 | epoch avg. loss: 0.007 | test avg. loss: 4.440


 42%|████▏     | 21011/50000 [31:25<59:55,  8.06it/s]

Epochs: 21009 | epoch avg. loss: 0.006 | test avg. loss: 4.458
Epochs: 21010 | epoch avg. loss: 0.005 | test avg. loss: 4.458
Epochs: 21011 | epoch avg. loss: 0.005 | test avg. loss: 4.481


 42%|████▏     | 21015/50000 [31:25<48:16, 10.01it/s]

Epochs: 21012 | epoch avg. loss: 0.017 | test avg. loss: 4.457
Epochs: 21013 | epoch avg. loss: 0.021 | test avg. loss: 4.340
Epochs: 21014 | epoch avg. loss: 0.007 | test avg. loss: 4.375


 42%|████▏     | 21017/50000 [31:26<43:58, 10.98it/s]

Epochs: 21015 | epoch avg. loss: 0.006 | test avg. loss: 4.437
Epochs: 21016 | epoch avg. loss: 0.005 | test avg. loss: 4.479
Epochs: 21017 | epoch avg. loss: 0.005 | test avg. loss: 4.486


 42%|████▏     | 21021/50000 [31:26<39:49, 12.13it/s]

Epochs: 21018 | epoch avg. loss: 0.006 | test avg. loss: 4.475
Epochs: 21019 | epoch avg. loss: 0.009 | test avg. loss: 4.434
Epochs: 21020 | epoch avg. loss: 0.006 | test avg. loss: 4.369


                                                     

Epochs: 21021 | epoch avg. loss: 0.020 | test avg. loss: 4.372
Epochs: 21022 | epoch avg. loss: 0.012 | test avg. loss: 4.511
Epochs: 21023 | epoch avg. loss: 0.035 | test avg. loss: 4.502


 42%|████▏     | 21027/50000 [31:26<35:36, 13.56it/s]

Epochs: 21024 | epoch avg. loss: 0.027 | test avg. loss: 4.436
Epochs: 21025 | epoch avg. loss: 0.012 | test avg. loss: 4.426
Epochs: 21026 | epoch avg. loss: 0.060 | test avg. loss: 4.464


 42%|████▏     | 21029/50000 [31:26<35:04, 13.77it/s]

Epochs: 21027 | epoch avg. loss: 0.015 | test avg. loss: 4.578
Epochs: 21028 | epoch avg. loss: 0.031 | test avg. loss: 4.369
Epochs: 21029 | epoch avg. loss: 0.034 | test avg. loss: 4.325


 42%|████▏     | 21033/50000 [31:27<35:33, 13.58it/s]

Epochs: 21030 | epoch avg. loss: 0.016 | test avg. loss: 4.467
Epochs: 21031 | epoch avg. loss: 0.025 | test avg. loss: 4.374
Epochs: 21032 | epoch avg. loss: 0.066 | test avg. loss: 4.356


 42%|████▏     | 21035/50000 [31:27<36:00, 13.40it/s]

Epochs: 21033 | epoch avg. loss: 0.029 | test avg. loss: 4.527
Epochs: 21034 | epoch avg. loss: 0.077 | test avg. loss: 4.431
Epochs: 21035 | epoch avg. loss: 0.038 | test avg. loss: 4.482
Epochs: 21036 | epoch avg. loss: 0.097 | test avg. loss: 4.613


 42%|████▏     | 21039/50000 [31:27<33:40, 14.33it/s]

Epochs: 21037 | epoch avg. loss: 0.054 | test avg. loss: 4.858
Epochs: 21038 | epoch avg. loss: 0.172 | test avg. loss: 4.501
Epochs: 21039 | epoch avg. loss: 0.059 | test avg. loss: 4.381
Epochs: 21040 | epoch avg. loss: 0.057 | test avg. loss: 4.808


 42%|████▏     | 21043/50000 [31:27<35:23, 13.64it/s]

Epochs: 21041 | epoch avg. loss: 0.450 | test avg. loss: 4.523
Epochs: 21042 | epoch avg. loss: 0.103 | test avg. loss: 4.842
Epochs: 21043 | epoch avg. loss: 0.532 | test avg. loss: 4.606


 42%|████▏     | 21047/50000 [31:28<36:29, 13.22it/s]

Epochs: 21044 | epoch avg. loss: 0.082 | test avg. loss: 4.793
Epochs: 21045 | epoch avg. loss: 0.172 | test avg. loss: 4.445
Epochs: 21046 | epoch avg. loss: 0.038 | test avg. loss: 4.368


 42%|████▏     | 21049/50000 [31:28<36:53, 13.08it/s]

Epochs: 21047 | epoch avg. loss: 0.032 | test avg. loss: 4.310
Epochs: 21048 | epoch avg. loss: 0.040 | test avg. loss: 4.390
Epochs: 21049 | epoch avg. loss: 0.050 | test avg. loss: 4.512
Epochs: 21050 | epoch avg. loss: 0.030 | test avg. loss: 4.709


 42%|████▏     | 21055/50000 [31:28<32:22, 14.90it/s]

Epochs: 21051 | epoch avg. loss: 0.040 | test avg. loss: 4.569
Epochs: 21052 | epoch avg. loss: 0.146 | test avg. loss: 4.494
Epochs: 21053 | epoch avg. loss: 0.108 | test avg. loss: 4.506
Epochs: 21054 | epoch avg. loss: 0.027 | test avg. loss: 4.615


 42%|████▏     | 21057/50000 [31:28<33:23, 14.45it/s]

Epochs: 21055 | epoch avg. loss: 0.046 | test avg. loss: 4.505
Epochs: 21056 | epoch avg. loss: 0.058 | test avg. loss: 4.520
Epochs: 21057 | epoch avg. loss: 0.085 | test avg. loss: 4.606


 42%|████▏     | 21061/50000 [31:29<34:31, 13.97it/s]

Epochs: 21058 | epoch avg. loss: 0.019 | test avg. loss: 4.565
Epochs: 21059 | epoch avg. loss: 0.018 | test avg. loss: 4.474
Epochs: 21060 | epoch avg. loss: 0.020 | test avg. loss: 4.442
Epochs: 21061 | epoch avg. loss: 0.013 | test avg. loss: 4.484


 42%|████▏     | 21065/50000 [31:29<33:43, 14.30it/s]

Epochs: 21062 | epoch avg. loss: 0.017 | test avg. loss: 4.477
Epochs: 21063 | epoch avg. loss: 0.041 | test avg. loss: 4.508
Epochs: 21064 | epoch avg. loss: 0.049 | test avg. loss: 4.557


 42%|████▏     | 21069/50000 [31:29<32:09, 15.00it/s]

Epochs: 21065 | epoch avg. loss: 0.010 | test avg. loss: 4.563
Epochs: 21066 | epoch avg. loss: 0.015 | test avg. loss: 4.485
Epochs: 21067 | epoch avg. loss: 0.008 | test avg. loss: 4.469
Epochs: 21068 | epoch avg. loss: 0.008 | test avg. loss: 4.588


 42%|████▏     | 21071/50000 [31:29<33:16, 14.49it/s]

Epochs: 21069 | epoch avg. loss: 0.045 | test avg. loss: 4.610
Epochs: 21070 | epoch avg. loss: 0.024 | test avg. loss: 4.462
Epochs: 21071 | epoch avg. loss: 0.077 | test avg. loss: 4.423


 42%|████▏     | 21075/50000 [31:30<35:10, 13.70it/s]

Epochs: 21072 | epoch avg. loss: 0.038 | test avg. loss: 4.517
Epochs: 21073 | epoch avg. loss: 0.043 | test avg. loss: 4.697
Epochs: 21074 | epoch avg. loss: 0.133 | test avg. loss: 4.558


 42%|████▏     | 21077/50000 [31:30<34:27, 13.99it/s]

Epochs: 21075 | epoch avg. loss: 0.024 | test avg. loss: 4.492
Epochs: 21076 | epoch avg. loss: 0.038 | test avg. loss: 4.524
Epochs: 21077 | epoch avg. loss: 0.013 | test avg. loss: 4.593


 42%|████▏     | 21081/50000 [31:30<34:15, 14.07it/s]

Epochs: 21078 | epoch avg. loss: 0.028 | test avg. loss: 4.556
Epochs: 21079 | epoch avg. loss: 0.010 | test avg. loss: 4.548
Epochs: 21080 | epoch avg. loss: 0.014 | test avg. loss: 4.571


 42%|████▏     | 21083/50000 [31:30<35:14, 13.68it/s]

Epochs: 21081 | epoch avg. loss: 0.008 | test avg. loss: 4.618
Epochs: 21082 | epoch avg. loss: 0.010 | test avg. loss: 4.554
Epochs: 21083 | epoch avg. loss: 0.022 | test avg. loss: 4.537


 42%|████▏     | 21087/50000 [31:30<37:45, 12.76it/s]

Epochs: 21084 | epoch avg. loss: 0.025 | test avg. loss: 4.590
Epochs: 21085 | epoch avg. loss: 0.026 | test avg. loss: 4.650
Epochs: 21086 | epoch avg. loss: 0.052 | test avg. loss: 4.357


 42%|████▏     | 21089/50000 [31:31<37:06, 12.98it/s]

Epochs: 21087 | epoch avg. loss: 0.026 | test avg. loss: 4.400
Epochs: 21088 | epoch avg. loss: 0.111 | test avg. loss: 4.530
Epochs: 21089 | epoch avg. loss: 0.026 | test avg. loss: 4.751


 42%|████▏     | 21093/50000 [31:31<36:18, 13.27it/s]

Epochs: 21090 | epoch avg. loss: 0.057 | test avg. loss: 4.647
Epochs: 21091 | epoch avg. loss: 0.019 | test avg. loss: 4.505
Epochs: 21092 | epoch avg. loss: 0.113 | test avg. loss: 4.430


 42%|████▏     | 21095/50000 [31:31<36:07, 13.33it/s]

Epochs: 21093 | epoch avg. loss: 0.029 | test avg. loss: 4.525
Epochs: 21094 | epoch avg. loss: 0.045 | test avg. loss: 4.586
Epochs: 21095 | epoch avg. loss: 0.042 | test avg. loss: 4.563


 42%|████▏     | 21099/50000 [31:31<35:47, 13.46it/s]

Epochs: 21096 | epoch avg. loss: 0.012 | test avg. loss: 4.509
Epochs: 21097 | epoch avg. loss: 0.016 | test avg. loss: 4.497
Epochs: 21098 | epoch avg. loss: 0.009 | test avg. loss: 4.471


 42%|████▏     | 21099/50000 [31:31<35:47, 13.46it/s]

Epochs: 21099 | epoch avg. loss: 0.011 | test avg. loss: 4.454


 42%|████▏     | 21103/50000 [31:33<1:49:43,  4.39it/s]

Epochs: 21100 | epoch avg. loss: 0.008 | test avg. loss: 4.494
Epochs: 21101 | epoch avg. loss: 0.006 | test avg. loss: 4.486
Epochs: 21102 | epoch avg. loss: 0.010 | test avg. loss: 4.471


 42%|████▏     | 21105/50000 [31:33<1:26:24,  5.57it/s]

Epochs: 21103 | epoch avg. loss: 0.021 | test avg. loss: 4.504
Epochs: 21104 | epoch avg. loss: 0.011 | test avg. loss: 4.523
Epochs: 21105 | epoch avg. loss: 0.006 | test avg. loss: 4.535


                                                       

Epochs: 21106 | epoch avg. loss: 0.011 | test avg. loss: 4.447
Epochs: 21107 | epoch avg. loss: 0.008 | test avg. loss: 4.431
Epochs: 21108 | epoch avg. loss: 0.017 | test avg. loss: 4.492


                                                     

Epochs: 21109 | epoch avg. loss: 0.014 | test avg. loss: 4.674
Epochs: 21110 | epoch avg. loss: 0.070 | test avg. loss: 4.582
Epochs: 21111 | epoch avg. loss: 0.037 | test avg. loss: 4.550


 42%|████▏     | 21115/50000 [31:34<41:31, 11.60it/s]

Epochs: 21112 | epoch avg. loss: 0.174 | test avg. loss: 4.451
Epochs: 21113 | epoch avg. loss: 0.035 | test avg. loss: 4.538
Epochs: 21114 | epoch avg. loss: 0.093 | test avg. loss: 4.719
Epochs: 21115 | epoch avg. loss: 0.064 | test avg. loss: 4.652


 42%|████▏     | 21119/50000 [31:34<36:39, 13.13it/s]

Epochs: 21116 | epoch avg. loss: 0.109 | test avg. loss: 4.652
Epochs: 21117 | epoch avg. loss: 0.071 | test avg. loss: 4.560
Epochs: 21118 | epoch avg. loss: 0.028 | test avg. loss: 4.580


 42%|████▏     | 21121/50000 [31:34<38:30, 12.50it/s]

Epochs: 21119 | epoch avg. loss: 0.062 | test avg. loss: 4.498
Epochs: 21120 | epoch avg. loss: 0.033 | test avg. loss: 4.609
Epochs: 21121 | epoch avg. loss: 0.027 | test avg. loss: 4.877


 42%|████▏     | 21125/50000 [31:35<36:58, 13.02it/s]

Epochs: 21122 | epoch avg. loss: 0.146 | test avg. loss: 5.112
Epochs: 21123 | epoch avg. loss: 0.232 | test avg. loss: 4.409
Epochs: 21124 | epoch avg. loss: 0.052 | test avg. loss: 4.303


 42%|████▏     | 21127/50000 [31:35<36:24, 13.22it/s]

Epochs: 21125 | epoch avg. loss: 0.111 | test avg. loss: 4.572
Epochs: 21126 | epoch avg. loss: 0.273 | test avg. loss: 5.011
Epochs: 21127 | epoch avg. loss: 0.173 | test avg. loss: 4.937


 42%|████▏     | 21131/50000 [31:35<35:08, 13.69it/s]

Epochs: 21128 | epoch avg. loss: 0.373 | test avg. loss: 4.956
Epochs: 21129 | epoch avg. loss: 0.689 | test avg. loss: 4.406
Epochs: 21130 | epoch avg. loss: 0.614 | test avg. loss: 5.717


 42%|████▏     | 21133/50000 [31:35<35:40, 13.48it/s]

Epochs: 21131 | epoch avg. loss: 1.416 | test avg. loss: 4.217
Epochs: 21132 | epoch avg. loss: 0.626 | test avg. loss: 5.511
Epochs: 21133 | epoch avg. loss: 1.566 | test avg. loss: 5.287


 42%|████▏     | 21137/50000 [31:36<38:00, 12.66it/s]

Epochs: 21134 | epoch avg. loss: 0.257 | test avg. loss: 5.320
Epochs: 21135 | epoch avg. loss: 0.229 | test avg. loss: 4.358
Epochs: 21136 | epoch avg. loss: 0.183 | test avg. loss: 4.193


 42%|████▏     | 21139/50000 [31:36<38:52, 12.38it/s]

Epochs: 21137 | epoch avg. loss: 0.192 | test avg. loss: 4.464
Epochs: 21138 | epoch avg. loss: 0.183 | test avg. loss: 4.694
Epochs: 21139 | epoch avg. loss: 0.221 | test avg. loss: 4.482


 42%|████▏     | 21143/50000 [31:36<38:50, 12.38it/s]

Epochs: 21140 | epoch avg. loss: 0.151 | test avg. loss: 5.158
Epochs: 21141 | epoch avg. loss: 0.648 | test avg. loss: 4.943
Epochs: 21142 | epoch avg. loss: 0.361 | test avg. loss: 4.851


 42%|████▏     | 21145/50000 [31:36<39:45, 12.10it/s]

Epochs: 21143 | epoch avg. loss: 1.334 | test avg. loss: 4.412
Epochs: 21144 | epoch avg. loss: 0.338 | test avg. loss: 6.404
Epochs: 21145 | epoch avg. loss: 0.766 | test avg. loss: 5.317


                                                     

Epochs: 21146 | epoch avg. loss: 1.505 | test avg. loss: 4.722
Epochs: 21147 | epoch avg. loss: 0.696 | test avg. loss: 8.622
Epochs: 21148 | epoch avg. loss: 2.679 | test avg. loss: 5.203


 42%|████▏     | 21151/50000 [31:37<36:52, 13.04it/s]

Epochs: 21149 | epoch avg. loss: 2.522 | test avg. loss: 4.502
Epochs: 21150 | epoch avg. loss: 0.930 | test avg. loss: 8.824
Epochs: 21151 | epoch avg. loss: 2.144 | test avg. loss: 6.525


 42%|████▏     | 21155/50000 [31:37<35:44, 13.45it/s]

Epochs: 21152 | epoch avg. loss: 2.142 | test avg. loss: 6.112
Epochs: 21153 | epoch avg. loss: 1.465 | test avg. loss: 6.350
Epochs: 21154 | epoch avg. loss: 1.074 | test avg. loss: 5.600


 42%|████▏     | 21157/50000 [31:37<35:43, 13.46it/s]

Epochs: 21155 | epoch avg. loss: 1.299 | test avg. loss: 6.217
Epochs: 21156 | epoch avg. loss: 0.947 | test avg. loss: 5.241
Epochs: 21157 | epoch avg. loss: 0.692 | test avg. loss: 5.364


 42%|████▏     | 21161/50000 [31:38<37:46, 12.72it/s]

Epochs: 21158 | epoch avg. loss: 0.304 | test avg. loss: 5.857
Epochs: 21159 | epoch avg. loss: 0.320 | test avg. loss: 5.370
Epochs: 21160 | epoch avg. loss: 0.647 | test avg. loss: 4.896


 42%|████▏     | 21163/50000 [31:38<35:45, 13.44it/s]

Epochs: 21161 | epoch avg. loss: 0.295 | test avg. loss: 4.571
Epochs: 21162 | epoch avg. loss: 0.273 | test avg. loss: 4.590
Epochs: 21163 | epoch avg. loss: 0.218 | test avg. loss: 5.148


 42%|████▏     | 21167/50000 [31:38<38:25, 12.51it/s]

Epochs: 21164 | epoch avg. loss: 0.297 | test avg. loss: 4.707
Epochs: 21165 | epoch avg. loss: 0.329 | test avg. loss: 4.972
Epochs: 21166 | epoch avg. loss: 0.451 | test avg. loss: 4.466


 42%|████▏     | 21169/50000 [31:38<38:30, 12.48it/s]

Epochs: 21167 | epoch avg. loss: 0.404 | test avg. loss: 4.587
Epochs: 21168 | epoch avg. loss: 0.248 | test avg. loss: 5.040
Epochs: 21169 | epoch avg. loss: 0.291 | test avg. loss: 4.383


 42%|████▏     | 21171/50000 [31:39<41:24, 11.60it/s]

Epochs: 21170 | epoch avg. loss: 0.145 | test avg. loss: 4.429
Epochs: 21171 | epoch avg. loss: 0.083 | test avg. loss: 4.351


 42%|████▏     | 21175/50000 [31:39<41:46, 11.50it/s]

Epochs: 21172 | epoch avg. loss: 0.026 | test avg. loss: 4.559
Epochs: 21173 | epoch avg. loss: 0.060 | test avg. loss: 4.476
Epochs: 21174 | epoch avg. loss: 0.088 | test avg. loss: 4.547


 42%|████▏     | 21179/50000 [31:39<36:58, 12.99it/s]

Epochs: 21175 | epoch avg. loss: 0.044 | test avg. loss: 4.703
Epochs: 21176 | epoch avg. loss: 0.051 | test avg. loss: 4.562
Epochs: 21177 | epoch avg. loss: 0.031 | test avg. loss: 4.691
Epochs: 21178 | epoch avg. loss: 0.051 | test avg. loss: 4.420


 42%|████▏     | 21183/50000 [31:39<33:49, 14.20it/s]

Epochs: 21179 | epoch avg. loss: 0.076 | test avg. loss: 4.434
Epochs: 21180 | epoch avg. loss: 0.033 | test avg. loss: 4.523
Epochs: 21181 | epoch avg. loss: 0.031 | test avg. loss: 4.508
Epochs: 21182 | epoch avg. loss: 0.020 | test avg. loss: 4.482


 42%|████▏     | 21185/50000 [31:39<35:15, 13.62it/s]

Epochs: 21183 | epoch avg. loss: 0.022 | test avg. loss: 4.502
Epochs: 21184 | epoch avg. loss: 0.023 | test avg. loss: 4.461
Epochs: 21185 | epoch avg. loss: 0.020 | test avg. loss: 4.421


 42%|████▏     | 21189/50000 [31:40<36:12, 13.26it/s]

Epochs: 21186 | epoch avg. loss: 0.024 | test avg. loss: 4.662
Epochs: 21187 | epoch avg. loss: 0.081 | test avg. loss: 4.537
Epochs: 21188 | epoch avg. loss: 0.041 | test avg. loss: 4.649


 42%|████▏     | 21193/50000 [31:40<34:14, 14.02it/s]

Epochs: 21189 | epoch avg. loss: 0.027 | test avg. loss: 4.658
Epochs: 21190 | epoch avg. loss: 0.032 | test avg. loss: 4.591
Epochs: 21191 | epoch avg. loss: 0.024 | test avg. loss: 4.561
Epochs: 21192 | epoch avg. loss: 0.010 | test avg. loss: 4.519


 42%|████▏     | 21195/50000 [31:40<33:35, 14.29it/s]

Epochs: 21193 | epoch avg. loss: 0.013 | test avg. loss: 4.518
Epochs: 21194 | epoch avg. loss: 0.016 | test avg. loss: 4.697
Epochs: 21195 | epoch avg. loss: 0.058 | test avg. loss: 4.601


 42%|████▏     | 21197/50000 [31:40<34:11, 14.04it/s]

Epochs: 21196 | epoch avg. loss: 0.028 | test avg. loss: 4.588
Epochs: 21197 | epoch avg. loss: 0.016 | test avg. loss: 4.575
Epochs: 21198 | epoch avg. loss: 0.017 | test avg. loss: 4.479


 42%|████▏     | 21199/50000 [31:41<38:22, 12.51it/s]

Epochs: 21199 | epoch avg. loss: 0.019 | test avg. loss: 4.506


 42%|████▏     | 21203/50000 [31:42<2:01:54,  3.94it/s]

Epochs: 21200 | epoch avg. loss: 0.010 | test avg. loss: 4.497
Epochs: 21201 | epoch avg. loss: 0.015 | test avg. loss: 4.644
Epochs: 21202 | epoch avg. loss: 0.016 | test avg. loss: 4.586


 42%|████▏     | 21205/50000 [31:43<1:37:43,  4.91it/s]

Epochs: 21203 | epoch avg. loss: 0.020 | test avg. loss: 4.664
Epochs: 21204 | epoch avg. loss: 0.015 | test avg. loss: 4.632
Epochs: 21205 | epoch avg. loss: 0.009 | test avg. loss: 4.567


 42%|████▏     | 21209/50000 [31:43<1:05:52,  7.28it/s]

Epochs: 21206 | epoch avg. loss: 0.009 | test avg. loss: 4.526
Epochs: 21207 | epoch avg. loss: 0.009 | test avg. loss: 4.554
Epochs: 21208 | epoch avg. loss: 0.011 | test avg. loss: 4.568




Epochs: 21209 | epoch avg. loss: 0.006 | test avg. loss: 4.638
Epochs: 21210 | epoch avg. loss: 0.011 | test avg. loss: 4.643
Epochs: 21211 | epoch avg. loss: 0.008 | test avg. loss: 4.626


 42%|████▏     | 21215/50000 [31:43<47:40, 10.06it/s]

Epochs: 21212 | epoch avg. loss: 0.009 | test avg. loss: 4.683
Epochs: 21213 | epoch avg. loss: 0.020 | test avg. loss: 4.574
Epochs: 21214 | epoch avg. loss: 0.017 | test avg. loss: 4.555


 42%|████▏     | 21217/50000 [31:44<45:28, 10.55it/s]

Epochs: 21215 | epoch avg. loss: 0.009 | test avg. loss: 4.580
Epochs: 21216 | epoch avg. loss: 0.010 | test avg. loss: 4.525
Epochs: 21217 | epoch avg. loss: 0.017 | test avg. loss: 4.632


 42%|████▏     | 21221/50000 [31:44<40:50, 11.74it/s]

Epochs: 21218 | epoch avg. loss: 0.010 | test avg. loss: 4.665
Epochs: 21219 | epoch avg. loss: 0.008 | test avg. loss: 4.627
Epochs: 21220 | epoch avg. loss: 0.010 | test avg. loss: 4.644


 42%|████▏     | 21223/50000 [31:44<39:25, 12.17it/s]

Epochs: 21221 | epoch avg. loss: 0.011 | test avg. loss: 4.590
Epochs: 21222 | epoch avg. loss: 0.010 | test avg. loss: 4.564
Epochs: 21223 | epoch avg. loss: 0.019 | test avg. loss: 4.645
Epochs: 21224 | epoch avg. loss: 0.007 | test avg. loss: 4.678


 42%|████▏     | 21227/50000 [31:44<34:26, 13.92it/s]

Epochs: 21225 | epoch avg. loss: 0.006 | test avg. loss: 4.668
Epochs: 21226 | epoch avg. loss: 0.007 | test avg. loss: 4.681
Epochs: 21227 | epoch avg. loss: 0.009 | test avg. loss: 4.637


 42%|████▏     | 21231/50000 [31:45<34:24, 13.94it/s]

Epochs: 21228 | epoch avg. loss: 0.005 | test avg. loss: 4.603
Epochs: 21229 | epoch avg. loss: 0.005 | test avg. loss: 4.634
Epochs: 21230 | epoch avg. loss: 0.008 | test avg. loss: 4.660


 42%|████▏     | 21235/50000 [31:45<32:35, 14.71it/s]

Epochs: 21231 | epoch avg. loss: 0.007 | test avg. loss: 4.640
Epochs: 21232 | epoch avg. loss: 0.013 | test avg. loss: 4.662
Epochs: 21233 | epoch avg. loss: 0.005 | test avg. loss: 4.636
Epochs: 21234 | epoch avg. loss: 0.006 | test avg. loss: 4.611


 42%|████▏     | 21237/50000 [31:45<31:58, 14.99it/s]

Epochs: 21235 | epoch avg. loss: 0.007 | test avg. loss: 4.657
Epochs: 21236 | epoch avg. loss: 0.010 | test avg. loss: 4.584
Epochs: 21237 | epoch avg. loss: 0.011 | test avg. loss: 4.674
Epochs: 21238 | epoch avg. loss: 0.005 | test avg. loss: 4.629


 42%|████▏     | 21243/50000 [31:45<31:49, 15.06it/s]

Epochs: 21239 | epoch avg. loss: 0.018 | test avg. loss: 4.762
Epochs: 21240 | epoch avg. loss: 0.047 | test avg. loss: 4.706
Epochs: 21241 | epoch avg. loss: 0.019 | test avg. loss: 4.630
Epochs: 21242 | epoch avg. loss: 0.021 | test avg. loss: 4.747


 42%|████▏     | 21245/50000 [31:46<33:51, 14.15it/s]

Epochs: 21243 | epoch avg. loss: 0.026 | test avg. loss: 4.602
Epochs: 21244 | epoch avg. loss: 0.028 | test avg. loss: 4.640
Epochs: 21245 | epoch avg. loss: 0.010 | test avg. loss: 4.647


 42%|████▏     | 21249/50000 [31:46<36:24, 13.16it/s]

Epochs: 21246 | epoch avg. loss: 0.010 | test avg. loss: 4.616
Epochs: 21247 | epoch avg. loss: 0.009 | test avg. loss: 4.671
Epochs: 21248 | epoch avg. loss: 0.007 | test avg. loss: 4.618


 43%|████▎     | 21251/50000 [31:46<35:28, 13.51it/s]

Epochs: 21249 | epoch avg. loss: 0.021 | test avg. loss: 4.699
Epochs: 21250 | epoch avg. loss: 0.015 | test avg. loss: 4.707
Epochs: 21251 | epoch avg. loss: 0.011 | test avg. loss: 4.626


 43%|████▎     | 21255/50000 [31:46<35:38, 13.44it/s]

Epochs: 21252 | epoch avg. loss: 0.018 | test avg. loss: 4.803
Epochs: 21253 | epoch avg. loss: 0.028 | test avg. loss: 4.620
Epochs: 21254 | epoch avg. loss: 0.040 | test avg. loss: 4.656


 43%|████▎     | 21257/50000 [31:46<35:28, 13.50it/s]

Epochs: 21255 | epoch avg. loss: 0.022 | test avg. loss: 4.748
Epochs: 21256 | epoch avg. loss: 0.033 | test avg. loss: 4.606
Epochs: 21257 | epoch avg. loss: 0.026 | test avg. loss: 4.740


 43%|████▎     | 21261/50000 [31:47<35:19, 13.56it/s]

Epochs: 21258 | epoch avg. loss: 0.049 | test avg. loss: 4.718
Epochs: 21259 | epoch avg. loss: 0.022 | test avg. loss: 4.707
Epochs: 21260 | epoch avg. loss: 0.025 | test avg. loss: 4.878


 43%|████▎     | 21263/50000 [31:47<34:24, 13.92it/s]

Epochs: 21261 | epoch avg. loss: 0.034 | test avg. loss: 4.661
Epochs: 21262 | epoch avg. loss: 0.028 | test avg. loss: 4.721
Epochs: 21263 | epoch avg. loss: 0.027 | test avg. loss: 4.534
Epochs: 21264 | epoch avg. loss: 0.029 | test avg. loss: 4.538


 43%|████▎     | 21267/50000 [31:47<33:50, 14.15it/s]

Epochs: 21265 | epoch avg. loss: 0.020 | test avg. loss: 4.753
Epochs: 21266 | epoch avg. loss: 0.035 | test avg. loss: 4.674
Epochs: 21267 | epoch avg. loss: 0.017 | test avg. loss: 4.803


 43%|████▎     | 21269/50000 [31:47<37:20, 12.82it/s]

Epochs: 21268 | epoch avg. loss: 0.018 | test avg. loss: 4.760
Epochs: 21269 | epoch avg. loss: 0.009 | test avg. loss: 4.748


 43%|████▎     | 21273/50000 [31:48<41:36, 11.51it/s]

Epochs: 21270 | epoch avg. loss: 0.020 | test avg. loss: 4.659
Epochs: 21271 | epoch avg. loss: 0.012 | test avg. loss: 4.555
Epochs: 21272 | epoch avg. loss: 0.013 | test avg. loss: 4.671


 43%|████▎     | 21277/50000 [31:48<36:56, 12.96it/s]

Epochs: 21273 | epoch avg. loss: 0.017 | test avg. loss: 4.596
Epochs: 21274 | epoch avg. loss: 0.038 | test avg. loss: 4.678
Epochs: 21275 | epoch avg. loss: 0.009 | test avg. loss: 4.739
Epochs: 21276 | epoch avg. loss: 0.009 | test avg. loss: 4.669


 43%|████▎     | 21281/50000 [31:48<33:28, 14.30it/s]

Epochs: 21277 | epoch avg. loss: 0.012 | test avg. loss: 4.740
Epochs: 21278 | epoch avg. loss: 0.016 | test avg. loss: 4.671
Epochs: 21279 | epoch avg. loss: 0.008 | test avg. loss: 4.670
Epochs: 21280 | epoch avg. loss: 0.008 | test avg. loss: 4.751


 43%|████▎     | 21283/50000 [31:49<34:08, 14.02it/s]

Epochs: 21281 | epoch avg. loss: 0.009 | test avg. loss: 4.713
Epochs: 21282 | epoch avg. loss: 0.006 | test avg. loss: 4.732
Epochs: 21283 | epoch avg. loss: 0.006 | test avg. loss: 4.703
Epochs: 21284 | epoch avg. loss: 0.006 | test avg. loss: 4.670


 43%|████▎     | 21287/50000 [31:49<32:37, 14.67it/s]

Epochs: 21285 | epoch avg. loss: 0.006 | test avg. loss: 4.630
Epochs: 21286 | epoch avg. loss: 0.005 | test avg. loss: 4.682
Epochs: 21287 | epoch avg. loss: 0.011 | test avg. loss: 4.730
Epochs: 21288 | epoch avg. loss: 0.008 | test avg. loss: 4.683


 43%|████▎     | 21293/50000 [31:49<31:48, 15.04it/s]

Epochs: 21289 | epoch avg. loss: 0.015 | test avg. loss: 4.814
Epochs: 21290 | epoch avg. loss: 0.022 | test avg. loss: 4.597
Epochs: 21291 | epoch avg. loss: 0.035 | test avg. loss: 4.595
Epochs: 21292 | epoch avg. loss: 0.017 | test avg. loss: 4.761


                                                     

Epochs: 21293 | epoch avg. loss: 0.029 | test avg. loss: 4.630
Epochs: 21294 | epoch avg. loss: 0.069 | test avg. loss: 4.811
Epochs: 21295 | epoch avg. loss: 0.021 | test avg. loss: 4.740


 43%|████▎     | 21299/50000 [31:49<35:16, 13.56it/s]

Epochs: 21296 | epoch avg. loss: 0.011 | test avg. loss: 4.657
Epochs: 21297 | epoch avg. loss: 0.010 | test avg. loss: 4.667
Epochs: 21298 | epoch avg. loss: 0.016 | test avg. loss: 4.596


 43%|████▎     | 21299/50000 [31:50<35:16, 13.56it/s]

Epochs: 21299 | epoch avg. loss: 0.007 | test avg. loss: 4.608




Epochs: 21300 | epoch avg. loss: 0.006 | test avg. loss: 4.725
Epochs: 21301 | epoch avg. loss: 0.009 | test avg. loss: 4.699
Epochs: 21302 | epoch avg. loss: 0.014 | test avg. loss: 4.714


 43%|████▎     | 21305/50000 [31:51<1:24:55,  5.63it/s]

Epochs: 21303 | epoch avg. loss: 0.006 | test avg. loss: 4.677
Epochs: 21304 | epoch avg. loss: 0.005 | test avg. loss: 4.628
Epochs: 21305 | epoch avg. loss: 0.005 | test avg. loss: 4.596


 43%|████▎     | 21309/50000 [31:52<59:15,  8.07it/s]

Epochs: 21306 | epoch avg. loss: 0.005 | test avg. loss: 4.635
Epochs: 21307 | epoch avg. loss: 0.007 | test avg. loss: 4.682
Epochs: 21308 | epoch avg. loss: 0.005 | test avg. loss: 4.687
Epochs: 21309 | epoch avg. loss: 0.008 | test avg. loss: 4.758


 43%|████▎     | 21313/50000 [31:52<45:36, 10.48it/s]

Epochs: 21310 | epoch avg. loss: 0.008 | test avg. loss: 4.651
Epochs: 21311 | epoch avg. loss: 0.011 | test avg. loss: 4.652
Epochs: 21312 | epoch avg. loss: 0.005 | test avg. loss: 4.658




Epochs: 21313 | epoch avg. loss: 0.008 | test avg. loss: 4.655
Epochs: 21314 | epoch avg. loss: 0.010 | test avg. loss: 4.745
Epochs: 21315 | epoch avg. loss: 0.013 | test avg. loss: 4.666


 43%|████▎     | 21319/50000 [31:52<36:27, 13.11it/s]

Epochs: 21316 | epoch avg. loss: 0.012 | test avg. loss: 4.660
Epochs: 21317 | epoch avg. loss: 0.009 | test avg. loss: 4.759
Epochs: 21318 | epoch avg. loss: 0.043 | test avg. loss: 4.647
Epochs: 21319 | epoch avg. loss: 0.014 | test avg. loss: 4.639


 43%|████▎     | 21323/50000 [31:53<35:10, 13.59it/s]

Epochs: 21320 | epoch avg. loss: 0.013 | test avg. loss: 4.754
Epochs: 21321 | epoch avg. loss: 0.017 | test avg. loss: 4.645
Epochs: 21322 | epoch avg. loss: 0.020 | test avg. loss: 4.680


 43%|████▎     | 21325/50000 [31:53<34:24, 13.89it/s]

Epochs: 21323 | epoch avg. loss: 0.012 | test avg. loss: 4.752
Epochs: 21324 | epoch avg. loss: 0.021 | test avg. loss: 4.615
Epochs: 21325 | epoch avg. loss: 0.039 | test avg. loss: 4.674


 43%|████▎     | 21329/50000 [31:53<35:01, 13.64it/s]

Epochs: 21326 | epoch avg. loss: 0.021 | test avg. loss: 4.712
Epochs: 21327 | epoch avg. loss: 0.022 | test avg. loss: 4.699
Epochs: 21328 | epoch avg. loss: 0.077 | test avg. loss: 5.007


 43%|████▎     | 21331/50000 [31:53<36:22, 13.13it/s]

Epochs: 21329 | epoch avg. loss: 0.101 | test avg. loss: 4.775
Epochs: 21330 | epoch avg. loss: 0.035 | test avg. loss: 4.613
Epochs: 21331 | epoch avg. loss: 0.038 | test avg. loss: 4.786


 43%|████▎     | 21335/50000 [31:54<36:59, 12.91it/s]

Epochs: 21332 | epoch avg. loss: 0.094 | test avg. loss: 4.468
Epochs: 21333 | epoch avg. loss: 0.059 | test avg. loss: 4.508
Epochs: 21334 | epoch avg. loss: 0.044 | test avg. loss: 4.873


 43%|████▎     | 21337/50000 [31:54<37:05, 12.88it/s]

Epochs: 21335 | epoch avg. loss: 0.098 | test avg. loss: 4.745
Epochs: 21336 | epoch avg. loss: 0.030 | test avg. loss: 4.740
Epochs: 21337 | epoch avg. loss: 0.037 | test avg. loss: 4.888


 43%|████▎     | 21341/50000 [31:54<36:48, 12.98it/s]

Epochs: 21338 | epoch avg. loss: 0.074 | test avg. loss: 4.495
Epochs: 21339 | epoch avg. loss: 0.068 | test avg. loss: 4.598
Epochs: 21340 | epoch avg. loss: 0.067 | test avg. loss: 4.668


 43%|████▎     | 21343/50000 [31:54<36:03, 13.24it/s]

Epochs: 21341 | epoch avg. loss: 0.036 | test avg. loss: 4.656
Epochs: 21342 | epoch avg. loss: 0.061 | test avg. loss: 4.896
Epochs: 21343 | epoch avg. loss: 0.039 | test avg. loss: 4.761


 43%|████▎     | 21347/50000 [31:54<39:57, 11.95it/s]

Epochs: 21344 | epoch avg. loss: 0.019 | test avg. loss: 4.646
Epochs: 21345 | epoch avg. loss: 0.021 | test avg. loss: 4.762
Epochs: 21346 | epoch avg. loss: 0.050 | test avg. loss: 4.643


 43%|████▎     | 21349/50000 [31:55<41:29, 11.51it/s]

Epochs: 21347 | epoch avg. loss: 0.072 | test avg. loss: 4.765
Epochs: 21348 | epoch avg. loss: 0.085 | test avg. loss: 4.978
Epochs: 21349 | epoch avg. loss: 0.084 | test avg. loss: 4.706


 43%|████▎     | 21353/50000 [31:55<39:56, 11.95it/s]

Epochs: 21350 | epoch avg. loss: 0.111 | test avg. loss: 4.880
Epochs: 21351 | epoch avg. loss: 0.060 | test avg. loss: 4.954
Epochs: 21352 | epoch avg. loss: 0.051 | test avg. loss: 4.784


 43%|████▎     | 21355/50000 [31:55<38:07, 12.52it/s]

Epochs: 21353 | epoch avg. loss: 0.256 | test avg. loss: 4.773
Epochs: 21354 | epoch avg. loss: 0.108 | test avg. loss: 4.658
Epochs: 21355 | epoch avg. loss: 0.043 | test avg. loss: 4.560


 43%|████▎     | 21359/50000 [31:55<37:32, 12.72it/s]

Epochs: 21356 | epoch avg. loss: 0.024 | test avg. loss: 4.751
Epochs: 21357 | epoch avg. loss: 0.039 | test avg. loss: 4.823
Epochs: 21358 | epoch avg. loss: 0.039 | test avg. loss: 4.856


 43%|████▎     | 21361/50000 [31:56<38:20, 12.45it/s]

Epochs: 21359 | epoch avg. loss: 0.025 | test avg. loss: 4.873
Epochs: 21360 | epoch avg. loss: 0.024 | test avg. loss: 4.679
Epochs: 21361 | epoch avg. loss: 0.036 | test avg. loss: 4.637


 43%|████▎     | 21365/50000 [31:56<38:25, 12.42it/s]

Epochs: 21362 | epoch avg. loss: 0.041 | test avg. loss: 4.814
Epochs: 21363 | epoch avg. loss: 0.037 | test avg. loss: 4.734
Epochs: 21364 | epoch avg. loss: 0.023 | test avg. loss: 4.621
Epochs: 21365 | epoch avg. loss: 0.033 | test avg. loss: 4.718


 43%|████▎     | 21369/50000 [31:56<36:16, 13.16it/s]

Epochs: 21366 | epoch avg. loss: 0.035 | test avg. loss: 4.526
Epochs: 21367 | epoch avg. loss: 0.033 | test avg. loss: 4.533
Epochs: 21368 | epoch avg. loss: 0.022 | test avg. loss: 4.751


 43%|████▎     | 21371/50000 [31:56<38:39, 12.34it/s]

Epochs: 21369 | epoch avg. loss: 0.032 | test avg. loss: 4.727
Epochs: 21370 | epoch avg. loss: 0.033 | test avg. loss: 4.751


 43%|████▎     | 21373/50000 [31:57<42:20, 11.27it/s]

Epochs: 21371 | epoch avg. loss: 0.020 | test avg. loss: 4.842
Epochs: 21372 | epoch avg. loss: 0.048 | test avg. loss: 4.640
Epochs: 21373 | epoch avg. loss: 0.017 | test avg. loss: 4.680


 43%|████▎     | 21377/50000 [31:57<40:00, 11.92it/s]

Epochs: 21374 | epoch avg. loss: 0.011 | test avg. loss: 4.768
Epochs: 21375 | epoch avg. loss: 0.011 | test avg. loss: 4.721
Epochs: 21376 | epoch avg. loss: 0.006 | test avg. loss: 4.725




Epochs: 21377 | epoch avg. loss: 0.013 | test avg. loss: 4.737
Epochs: 21378 | epoch avg. loss: 0.010 | test avg. loss: 4.656
Epochs: 21379 | epoch avg. loss: 0.024 | test avg. loss: 4.687


 43%|████▎     | 21383/50000 [31:57<37:42, 12.65it/s]

Epochs: 21380 | epoch avg. loss: 0.014 | test avg. loss: 4.829
Epochs: 21381 | epoch avg. loss: 0.046 | test avg. loss: 4.655
Epochs: 21382 | epoch avg. loss: 0.025 | test avg. loss: 4.694


 43%|████▎     | 21385/50000 [31:58<38:29, 12.39it/s]

Epochs: 21383 | epoch avg. loss: 0.019 | test avg. loss: 4.923
Epochs: 21384 | epoch avg. loss: 0.049 | test avg. loss: 4.714
Epochs: 21385 | epoch avg. loss: 0.048 | test avg. loss: 4.668




Epochs: 21386 | epoch avg. loss: 0.028 | test avg. loss: 4.763
Epochs: 21387 | epoch avg. loss: 0.034 | test avg. loss: 4.726
Epochs: 21388 | epoch avg. loss: 0.011 | test avg. loss: 4.703


 43%|████▎     | 21391/50000 [31:58<34:35, 13.78it/s]

Epochs: 21389 | epoch avg. loss: 0.013 | test avg. loss: 4.888
Epochs: 21390 | epoch avg. loss: 0.050 | test avg. loss: 4.742
Epochs: 21391 | epoch avg. loss: 0.018 | test avg. loss: 4.662


 43%|████▎     | 21395/50000 [31:58<34:19, 13.89it/s]

Epochs: 21392 | epoch avg. loss: 0.072 | test avg. loss: 4.764
Epochs: 21393 | epoch avg. loss: 0.019 | test avg. loss: 4.881
Epochs: 21394 | epoch avg. loss: 0.036 | test avg. loss: 4.761


 43%|████▎     | 21397/50000 [31:59<37:25, 12.74it/s]

Epochs: 21395 | epoch avg. loss: 0.006 | test avg. loss: 4.727
Epochs: 21396 | epoch avg. loss: 0.011 | test avg. loss: 4.642
Epochs: 21397 | epoch avg. loss: 0.009 | test avg. loss: 4.625


 43%|████▎     | 21399/50000 [31:59<38:16, 12.45it/s]

Epochs: 21398 | epoch avg. loss: 0.008 | test avg. loss: 4.653
Epochs: 21399 | epoch avg. loss: 0.006 | test avg. loss: 4.765


 43%|████▎     | 21403/50000 [32:01<1:57:37,  4.05it/s]

Epochs: 21400 | epoch avg. loss: 0.008 | test avg. loss: 4.717
Epochs: 21401 | epoch avg. loss: 0.016 | test avg. loss: 4.671
Epochs: 21402 | epoch avg. loss: 0.014 | test avg. loss: 4.726


 43%|████▎     | 21405/50000 [32:01<1:33:42,  5.09it/s]

Epochs: 21403 | epoch avg. loss: 0.021 | test avg. loss: 4.738
Epochs: 21404 | epoch avg. loss: 0.013 | test avg. loss: 4.679
Epochs: 21405 | epoch avg. loss: 0.014 | test avg. loss: 4.669


 43%|████▎     | 21409/50000 [32:01<1:08:17,  6.98it/s]

Epochs: 21406 | epoch avg. loss: 0.020 | test avg. loss: 4.708
Epochs: 21407 | epoch avg. loss: 0.008 | test avg. loss: 4.773
Epochs: 21408 | epoch avg. loss: 0.017 | test avg. loss: 4.666


 43%|████▎     | 21411/50000 [32:01<59:00,  8.08it/s]

Epochs: 21409 | epoch avg. loss: 0.014 | test avg. loss: 4.717
Epochs: 21410 | epoch avg. loss: 0.006 | test avg. loss: 4.736
Epochs: 21411 | epoch avg. loss: 0.006 | test avg. loss: 4.708


 43%|████▎     | 21415/50000 [32:02<49:46,  9.57it/s]

Epochs: 21412 | epoch avg. loss: 0.006 | test avg. loss: 4.712
Epochs: 21413 | epoch avg. loss: 0.011 | test avg. loss: 4.754
Epochs: 21414 | epoch avg. loss: 0.015 | test avg. loss: 4.639


 43%|████▎     | 21417/50000 [32:02<44:53, 10.61it/s]

Epochs: 21415 | epoch avg. loss: 0.046 | test avg. loss: 4.708
Epochs: 21416 | epoch avg. loss: 0.014 | test avg. loss: 4.824
Epochs: 21417 | epoch avg. loss: 0.025 | test avg. loss: 4.714


 43%|████▎     | 21421/50000 [32:02<39:16, 12.13it/s]

Epochs: 21418 | epoch avg. loss: 0.009 | test avg. loss: 4.687
Epochs: 21419 | epoch avg. loss: 0.008 | test avg. loss: 4.703
Epochs: 21420 | epoch avg. loss: 0.006 | test avg. loss: 4.657
Epochs: 21421 | epoch avg. loss: 0.005 | test avg. loss: 4.703


 43%|████▎     | 21425/50000 [32:02<35:33, 13.39it/s]

Epochs: 21422 | epoch avg. loss: 0.009 | test avg. loss: 4.748
Epochs: 21423 | epoch avg. loss: 0.014 | test avg. loss: 4.731
Epochs: 21424 | epoch avg. loss: 0.006 | test avg. loss: 4.681




Epochs: 21425 | epoch avg. loss: 0.013 | test avg. loss: 4.718
Epochs: 21426 | epoch avg. loss: 0.006 | test avg. loss: 4.701


 43%|████▎     | 21429/50000 [32:03<38:25, 12.39it/s]

Epochs: 21427 | epoch avg. loss: 0.009 | test avg. loss: 4.708
Epochs: 21428 | epoch avg. loss: 0.014 | test avg. loss: 4.752
Epochs: 21429 | epoch avg. loss: 0.008 | test avg. loss: 4.840


 43%|████▎     | 21433/50000 [32:03<38:36, 12.33it/s]

Epochs: 21430 | epoch avg. loss: 0.023 | test avg. loss: 4.650
Epochs: 21431 | epoch avg. loss: 0.022 | test avg. loss: 4.677
Epochs: 21432 | epoch avg. loss: 0.015 | test avg. loss: 4.813


 43%|████▎     | 21437/50000 [32:03<35:01, 13.59it/s]

Epochs: 21433 | epoch avg. loss: 0.027 | test avg. loss: 4.700
Epochs: 21434 | epoch avg. loss: 0.010 | test avg. loss: 4.691
Epochs: 21435 | epoch avg. loss: 0.007 | test avg. loss: 4.744
Epochs: 21436 | epoch avg. loss: 0.014 | test avg. loss: 4.674


 43%|████▎     | 21439/50000 [32:03<33:58, 14.01it/s]

Epochs: 21437 | epoch avg. loss: 0.006 | test avg. loss: 4.666
Epochs: 21438 | epoch avg. loss: 0.007 | test avg. loss: 4.742
Epochs: 21439 | epoch avg. loss: 0.013 | test avg. loss: 4.675


 43%|████▎     | 21443/50000 [32:04<34:55, 13.63it/s]

Epochs: 21440 | epoch avg. loss: 0.017 | test avg. loss: 4.665
Epochs: 21441 | epoch avg. loss: 0.019 | test avg. loss: 4.743
Epochs: 21442 | epoch avg. loss: 0.011 | test avg. loss: 4.743


 43%|████▎     | 21445/50000 [32:04<35:14, 13.50it/s]

Epochs: 21443 | epoch avg. loss: 0.008 | test avg. loss: 4.688
Epochs: 21444 | epoch avg. loss: 0.011 | test avg. loss: 4.721
Epochs: 21445 | epoch avg. loss: 0.007 | test avg. loss: 4.732


 43%|████▎     | 21449/50000 [32:04<34:23, 13.84it/s]

Epochs: 21446 | epoch avg. loss: 0.006 | test avg. loss: 4.671
Epochs: 21447 | epoch avg. loss: 0.016 | test avg. loss: 4.671
Epochs: 21448 | epoch avg. loss: 0.007 | test avg. loss: 4.732


 43%|████▎     | 21451/50000 [32:04<34:26, 13.82it/s]

Epochs: 21449 | epoch avg. loss: 0.012 | test avg. loss: 4.603
Epochs: 21450 | epoch avg. loss: 0.018 | test avg. loss: 4.613
Epochs: 21451 | epoch avg. loss: 0.017 | test avg. loss: 4.687


 43%|████▎     | 21455/50000 [32:04<34:32, 13.77it/s]

Epochs: 21452 | epoch avg. loss: 0.005 | test avg. loss: 4.739
Epochs: 21453 | epoch avg. loss: 0.006 | test avg. loss: 4.708
Epochs: 21454 | epoch avg. loss: 0.007 | test avg. loss: 4.678


 43%|████▎     | 21457/50000 [32:05<35:42, 13.32it/s]

Epochs: 21455 | epoch avg. loss: 0.006 | test avg. loss: 4.636
Epochs: 21456 | epoch avg. loss: 0.006 | test avg. loss: 4.648
Epochs: 21457 | epoch avg. loss: 0.006 | test avg. loss: 4.668


 43%|████▎     | 21461/50000 [32:05<34:14, 13.89it/s]

Epochs: 21458 | epoch avg. loss: 0.008 | test avg. loss: 4.692
Epochs: 21459 | epoch avg. loss: 0.008 | test avg. loss: 4.760
Epochs: 21460 | epoch avg. loss: 0.013 | test avg. loss: 4.702
Epochs: 21461 | epoch avg. loss: 0.006 | test avg. loss: 4.642


 43%|████▎     | 21465/50000 [32:05<32:26, 14.66it/s]

Epochs: 21462 | epoch avg. loss: 0.005 | test avg. loss: 4.622
Epochs: 21463 | epoch avg. loss: 0.005 | test avg. loss: 4.637
Epochs: 21464 | epoch avg. loss: 0.005 | test avg. loss: 4.675
Epochs: 21465 | epoch avg. loss: 0.006 | test avg. loss: 4.698


 43%|████▎     | 21469/50000 [32:05<34:14, 13.89it/s]

Epochs: 21466 | epoch avg. loss: 0.005 | test avg. loss: 4.686
Epochs: 21467 | epoch avg. loss: 0.005 | test avg. loss: 4.706
Epochs: 21468 | epoch avg. loss: 0.015 | test avg. loss: 4.667


 43%|████▎     | 21471/50000 [32:06<34:04, 13.95it/s]

Epochs: 21469 | epoch avg. loss: 0.008 | test avg. loss: 4.662
Epochs: 21470 | epoch avg. loss: 0.005 | test avg. loss: 4.665
Epochs: 21471 | epoch avg. loss: 0.007 | test avg. loss: 4.697


 43%|████▎     | 21475/50000 [32:06<35:25, 13.42it/s]

Epochs: 21472 | epoch avg. loss: 0.006 | test avg. loss: 4.743
Epochs: 21473 | epoch avg. loss: 0.014 | test avg. loss: 4.725
Epochs: 21474 | epoch avg. loss: 0.013 | test avg. loss: 4.644


 43%|████▎     | 21477/50000 [32:06<34:30, 13.78it/s]

Epochs: 21475 | epoch avg. loss: 0.015 | test avg. loss: 4.640
Epochs: 21476 | epoch avg. loss: 0.014 | test avg. loss: 4.782
Epochs: 21477 | epoch avg. loss: 0.026 | test avg. loss: 4.745


 43%|████▎     | 21481/50000 [32:06<36:52, 12.89it/s]

Epochs: 21478 | epoch avg. loss: 0.015 | test avg. loss: 4.693
Epochs: 21479 | epoch avg. loss: 0.022 | test avg. loss: 4.783
Epochs: 21480 | epoch avg. loss: 0.036 | test avg. loss: 4.684


 43%|████▎     | 21483/50000 [32:07<39:20, 12.08it/s]

Epochs: 21481 | epoch avg. loss: 0.015 | test avg. loss: 4.541
Epochs: 21482 | epoch avg. loss: 0.030 | test avg. loss: 4.615
Epochs: 21483 | epoch avg. loss: 0.009 | test avg. loss: 4.730


 43%|████▎     | 21487/50000 [32:07<35:44, 13.30it/s]

Epochs: 21484 | epoch avg. loss: 0.009 | test avg. loss: 4.684
Epochs: 21485 | epoch avg. loss: 0.008 | test avg. loss: 4.650
Epochs: 21486 | epoch avg. loss: 0.007 | test avg. loss: 4.764
Epochs: 21487 | epoch avg. loss: 0.043 | test avg. loss: 4.679


 43%|████▎     | 21491/50000 [32:07<33:13, 14.30it/s]

Epochs: 21488 | epoch avg. loss: 0.011 | test avg. loss: 4.638
Epochs: 21489 | epoch avg. loss: 0.029 | test avg. loss: 4.709
Epochs: 21490 | epoch avg. loss: 0.007 | test avg. loss: 4.729
Epochs: 21491 | epoch avg. loss: 0.009 | test avg. loss: 4.610


 43%|████▎     | 21495/50000 [32:07<33:26, 14.21it/s]

Epochs: 21492 | epoch avg. loss: 0.038 | test avg. loss: 4.620
Epochs: 21493 | epoch avg. loss: 0.011 | test avg. loss: 4.687
Epochs: 21494 | epoch avg. loss: 0.015 | test avg. loss: 4.690


 43%|████▎     | 21499/50000 [32:08<32:44, 14.51it/s]

Epochs: 21495 | epoch avg. loss: 0.010 | test avg. loss: 4.634
Epochs: 21496 | epoch avg. loss: 0.060 | test avg. loss: 4.678
Epochs: 21497 | epoch avg. loss: 0.028 | test avg. loss: 4.750
Epochs: 21498 | epoch avg. loss: 0.014 | test avg. loss: 4.795


 43%|████▎     | 21499/50000 [32:08<32:44, 14.51it/s]

Epochs: 21499 | epoch avg. loss: 0.022 | test avg. loss: 4.629


 43%|████▎     | 21503/50000 [32:09<1:47:42,  4.41it/s]

Epochs: 21500 | epoch avg. loss: 0.016 | test avg. loss: 4.595
Epochs: 21501 | epoch avg. loss: 0.018 | test avg. loss: 4.654
Epochs: 21502 | epoch avg. loss: 0.007 | test avg. loss: 4.730


 43%|████▎     | 21505/50000 [32:10<1:26:44,  5.48it/s]

Epochs: 21503 | epoch avg. loss: 0.007 | test avg. loss: 4.725
Epochs: 21504 | epoch avg. loss: 0.027 | test avg. loss: 4.716
Epochs: 21505 | epoch avg. loss: 0.013 | test avg. loss: 4.774


 43%|████▎     | 21509/50000 [32:10<58:46,  8.08it/s]

Epochs: 21506 | epoch avg. loss: 0.031 | test avg. loss: 4.812
Epochs: 21507 | epoch avg. loss: 0.037 | test avg. loss: 4.617
Epochs: 21508 | epoch avg. loss: 0.037 | test avg. loss: 4.666
Epochs: 21509 | epoch avg. loss: 0.038 | test avg. loss: 4.768


 43%|████▎     | 21513/50000 [32:10<45:33, 10.42it/s]

Epochs: 21510 | epoch avg. loss: 0.070 | test avg. loss: 4.649
Epochs: 21511 | epoch avg. loss: 0.033 | test avg. loss: 4.669
Epochs: 21512 | epoch avg. loss: 0.046 | test avg. loss: 4.712
Epochs: 21513 | epoch avg. loss: 0.035 | test avg. loss: 4.788


 43%|████▎     | 21517/50000 [32:10<39:42, 11.96it/s]

Epochs: 21514 | epoch avg. loss: 0.079 | test avg. loss: 4.494
Epochs: 21515 | epoch avg. loss: 0.026 | test avg. loss: 4.501
Epochs: 21516 | epoch avg. loss: 0.024 | test avg. loss: 4.828


 43%|████▎     | 21519/50000 [32:11<38:26, 12.35it/s]

Epochs: 21517 | epoch avg. loss: 0.091 | test avg. loss: 4.879
Epochs: 21518 | epoch avg. loss: 0.032 | test avg. loss: 4.750
Epochs: 21519 | epoch avg. loss: 0.079 | test avg. loss: 4.615


 43%|████▎     | 21523/50000 [32:11<37:49, 12.55it/s]

Epochs: 21520 | epoch avg. loss: 0.022 | test avg. loss: 4.522
Epochs: 21521 | epoch avg. loss: 0.025 | test avg. loss: 4.502
Epochs: 21522 | epoch avg. loss: 0.015 | test avg. loss: 4.625


                                                     

Epochs: 21523 | epoch avg. loss: 0.020 | test avg. loss: 4.712
Epochs: 21524 | epoch avg. loss: 0.016 | test avg. loss: 4.799
Epochs: 21525 | epoch avg. loss: 0.035 | test avg. loss: 4.718




Epochs: 21526 | epoch avg. loss: 0.022 | test avg. loss: 4.624
Epochs: 21527 | epoch avg. loss: 0.012 | test avg. loss: 4.602
Epochs: 21528 | epoch avg. loss: 0.009 | test avg. loss: 4.699


 43%|████▎     | 21531/50000 [32:12<33:52, 14.00it/s]

Epochs: 21529 | epoch avg. loss: 0.041 | test avg. loss: 4.723
Epochs: 21530 | epoch avg. loss: 0.026 | test avg. loss: 4.740
Epochs: 21531 | epoch avg. loss: 0.039 | test avg. loss: 4.745


 43%|████▎     | 21535/50000 [32:12<34:19, 13.82it/s]

Epochs: 21532 | epoch avg. loss: 0.041 | test avg. loss: 4.598
Epochs: 21533 | epoch avg. loss: 0.016 | test avg. loss: 4.666
Epochs: 21534 | epoch avg. loss: 0.012 | test avg. loss: 4.787


 43%|████▎     | 21537/50000 [32:12<34:01, 13.94it/s]

Epochs: 21535 | epoch avg. loss: 0.008 | test avg. loss: 4.738
Epochs: 21536 | epoch avg. loss: 0.021 | test avg. loss: 4.660
Epochs: 21537 | epoch avg. loss: 0.011 | test avg. loss: 4.680


 43%|████▎     | 21541/50000 [32:12<36:44, 12.91it/s]

Epochs: 21538 | epoch avg. loss: 0.031 | test avg. loss: 4.537
Epochs: 21539 | epoch avg. loss: 0.014 | test avg. loss: 4.538
Epochs: 21540 | epoch avg. loss: 0.029 | test avg. loss: 4.740


 43%|████▎     | 21543/50000 [32:12<38:10, 12.42it/s]

Epochs: 21541 | epoch avg. loss: 0.013 | test avg. loss: 4.841
Epochs: 21542 | epoch avg. loss: 0.015 | test avg. loss: 4.723
Epochs: 21543 | epoch avg. loss: 0.023 | test avg. loss: 4.587


 43%|████▎     | 21547/50000 [32:13<35:50, 13.23it/s]

Epochs: 21544 | epoch avg. loss: 0.022 | test avg. loss: 4.548
Epochs: 21545 | epoch avg. loss: 0.018 | test avg. loss: 4.698
Epochs: 21546 | epoch avg. loss: 0.052 | test avg. loss: 4.682
Epochs: 21547 | epoch avg. loss: 0.016 | test avg. loss: 4.686


 43%|████▎     | 21551/50000 [32:13<33:29, 14.16it/s]

Epochs: 21548 | epoch avg. loss: 0.077 | test avg. loss: 4.799
Epochs: 21549 | epoch avg. loss: 0.032 | test avg. loss: 5.203
Epochs: 21550 | epoch avg. loss: 0.263 | test avg. loss: 4.960
Epochs: 21551 | epoch avg. loss: 0.120 | test avg. loss: 4.553


 43%|████▎     | 21555/50000 [32:13<34:06, 13.90it/s]

Epochs: 21552 | epoch avg. loss: 0.213 | test avg. loss: 4.621
Epochs: 21553 | epoch avg. loss: 0.269 | test avg. loss: 6.246
Epochs: 21554 | epoch avg. loss: 1.504 | test avg. loss: 4.884


 43%|████▎     | 21557/50000 [32:14<36:41, 12.92it/s]

Epochs: 21555 | epoch avg. loss: 0.229 | test avg. loss: 5.547
Epochs: 21556 | epoch avg. loss: 2.210 | test avg. loss: 4.845
Epochs: 21557 | epoch avg. loss: 1.588 | test avg. loss: 11.736


 43%|████▎     | 21561/50000 [32:14<38:15, 12.39it/s]

Epochs: 21558 | epoch avg. loss: 5.075 | test avg. loss: 7.718
Epochs: 21559 | epoch avg. loss: 4.499 | test avg. loss: 5.958
Epochs: 21560 | epoch avg. loss: 1.041 | test avg. loss: 7.825


 43%|████▎     | 21563/50000 [32:14<38:05, 12.44it/s]

Epochs: 21561 | epoch avg. loss: 1.141 | test avg. loss: 6.595
Epochs: 21562 | epoch avg. loss: 2.287 | test avg. loss: 4.708
Epochs: 21563 | epoch avg. loss: 0.601 | test avg. loss: 4.296


 43%|████▎     | 21567/50000 [32:14<35:26, 13.37it/s]

Epochs: 21564 | epoch avg. loss: 0.653 | test avg. loss: 5.280
Epochs: 21565 | epoch avg. loss: 1.057 | test avg. loss: 7.228
Epochs: 21566 | epoch avg. loss: 1.856 | test avg. loss: 5.758


 43%|████▎     | 21569/50000 [32:14<35:22, 13.39it/s]

Epochs: 21567 | epoch avg. loss: 0.577 | test avg. loss: 6.595
Epochs: 21568 | epoch avg. loss: 0.570 | test avg. loss: 5.898
Epochs: 21569 | epoch avg. loss: 0.585 | test avg. loss: 4.965


 43%|████▎     | 21573/50000 [32:15<38:15, 12.39it/s]

Epochs: 21570 | epoch avg. loss: 0.443 | test avg. loss: 5.821
Epochs: 21571 | epoch avg. loss: 1.277 | test avg. loss: 5.053
Epochs: 21572 | epoch avg. loss: 1.227 | test avg. loss: 5.105


 43%|████▎     | 21575/50000 [32:15<38:55, 12.17it/s]

Epochs: 21573 | epoch avg. loss: 0.956 | test avg. loss: 5.211
Epochs: 21574 | epoch avg. loss: 0.450 | test avg. loss: 5.140
Epochs: 21575 | epoch avg. loss: 0.418 | test avg. loss: 5.544


 43%|████▎     | 21579/50000 [32:15<37:50, 12.52it/s]

Epochs: 21576 | epoch avg. loss: 0.450 | test avg. loss: 4.727
Epochs: 21577 | epoch avg. loss: 0.313 | test avg. loss: 4.996
Epochs: 21578 | epoch avg. loss: 0.152 | test avg. loss: 4.811


 43%|████▎     | 21581/50000 [32:15<38:52, 12.18it/s]

Epochs: 21579 | epoch avg. loss: 0.094 | test avg. loss: 5.162
Epochs: 21580 | epoch avg. loss: 0.135 | test avg. loss: 5.024
Epochs: 21581 | epoch avg. loss: 0.084 | test avg. loss: 4.758


 43%|████▎     | 21585/50000 [32:16<41:57, 11.29it/s]

Epochs: 21582 | epoch avg. loss: 0.083 | test avg. loss: 4.900
Epochs: 21583 | epoch avg. loss: 0.101 | test avg. loss: 4.474
Epochs: 21584 | epoch avg. loss: 0.100 | test avg. loss: 4.729


 43%|████▎     | 21587/50000 [32:16<41:34, 11.39it/s]

Epochs: 21585 | epoch avg. loss: 0.165 | test avg. loss: 4.430
Epochs: 21586 | epoch avg. loss: 0.071 | test avg. loss: 4.563
Epochs: 21587 | epoch avg. loss: 0.038 | test avg. loss: 4.662


 43%|████▎     | 21591/50000 [32:16<37:12, 12.72it/s]

Epochs: 21588 | epoch avg. loss: 0.025 | test avg. loss: 4.685
Epochs: 21589 | epoch avg. loss: 0.027 | test avg. loss: 4.655
Epochs: 21590 | epoch avg. loss: 0.019 | test avg. loss: 4.590
Epochs: 21591 | epoch avg. loss: 0.017 | test avg. loss: 4.558


 43%|████▎     | 21595/50000 [32:16<35:33, 13.31it/s]

Epochs: 21592 | epoch avg. loss: 0.026 | test avg. loss: 4.519
Epochs: 21593 | epoch avg. loss: 0.034 | test avg. loss: 4.700
Epochs: 21594 | epoch avg. loss: 0.039 | test avg. loss: 4.565


 43%|████▎     | 21597/50000 [32:17<35:01, 13.52it/s]

Epochs: 21595 | epoch avg. loss: 0.045 | test avg. loss: 4.685
Epochs: 21596 | epoch avg. loss: 0.044 | test avg. loss: 4.585
Epochs: 21597 | epoch avg. loss: 0.036 | test avg. loss: 4.577


 43%|████▎     | 21599/50000 [32:17<34:45, 13.62it/s]

Epochs: 21598 | epoch avg. loss: 0.035 | test avg. loss: 4.654
Epochs: 21599 | epoch avg. loss: 0.031 | test avg. loss: 4.580


 43%|████▎     | 21603/50000 [32:18<1:47:16,  4.41it/s]

Epochs: 21600 | epoch avg. loss: 0.014 | test avg. loss: 4.653
Epochs: 21601 | epoch avg. loss: 0.017 | test avg. loss: 4.576
Epochs: 21602 | epoch avg. loss: 0.022 | test avg. loss: 4.726


 43%|████▎     | 21605/50000 [32:19<1:28:02,  5.38it/s]

Epochs: 21603 | epoch avg. loss: 0.022 | test avg. loss: 4.652
Epochs: 21604 | epoch avg. loss: 0.037 | test avg. loss: 4.697
Epochs: 21605 | epoch avg. loss: 0.015 | test avg. loss: 4.719




Epochs: 21606 | epoch avg. loss: 0.016 | test avg. loss: 4.636
Epochs: 21607 | epoch avg. loss: 0.026 | test avg. loss: 4.755
Epochs: 21608 | epoch avg. loss: 0.023 | test avg. loss: 4.650


 43%|████▎     | 21611/50000 [32:19<53:01,  8.92it/s]

Epochs: 21609 | epoch avg. loss: 0.017 | test avg. loss: 4.625
Epochs: 21610 | epoch avg. loss: 0.013 | test avg. loss: 4.644
Epochs: 21611 | epoch avg. loss: 0.008 | test avg. loss: 4.650


 43%|████▎     | 21615/50000 [32:19<48:25,  9.77it/s]

Epochs: 21612 | epoch avg. loss: 0.008 | test avg. loss: 4.719
Epochs: 21613 | epoch avg. loss: 0.011 | test avg. loss: 4.645
Epochs: 21614 | epoch avg. loss: 0.035 | test avg. loss: 4.819


 43%|████▎     | 21617/50000 [32:20<43:58, 10.76it/s]

Epochs: 21615 | epoch avg. loss: 0.035 | test avg. loss: 4.639
Epochs: 21616 | epoch avg. loss: 0.034 | test avg. loss: 4.748
Epochs: 21617 | epoch avg. loss: 0.030 | test avg. loss: 4.704


 43%|████▎     | 21621/50000 [32:20<40:34, 11.66it/s]

Epochs: 21618 | epoch avg. loss: 0.029 | test avg. loss: 4.702
Epochs: 21619 | epoch avg. loss: 0.024 | test avg. loss: 4.822
Epochs: 21620 | epoch avg. loss: 0.033 | test avg. loss: 4.619


 43%|████▎     | 21623/50000 [32:20<39:20, 12.02it/s]

Epochs: 21621 | epoch avg. loss: 0.050 | test avg. loss: 4.694
Epochs: 21622 | epoch avg. loss: 0.020 | test avg. loss: 4.633
Epochs: 21623 | epoch avg. loss: 0.011 | test avg. loss: 4.604


 43%|████▎     | 21627/50000 [32:20<37:27, 12.62it/s]

Epochs: 21624 | epoch avg. loss: 0.008 | test avg. loss: 4.564
Epochs: 21625 | epoch avg. loss: 0.021 | test avg. loss: 4.703
Epochs: 21626 | epoch avg. loss: 0.036 | test avg. loss: 4.660


                                                     

Epochs: 21627 | epoch avg. loss: 0.018 | test avg. loss: 4.708
Epochs: 21628 | epoch avg. loss: 0.013 | test avg. loss: 4.774
Epochs: 21629 | epoch avg. loss: 0.014 | test avg. loss: 4.657


 43%|████▎     | 21633/50000 [32:21<35:26, 13.34it/s]

Epochs: 21630 | epoch avg. loss: 0.020 | test avg. loss: 4.734
Epochs: 21631 | epoch avg. loss: 0.014 | test avg. loss: 4.613
Epochs: 21632 | epoch avg. loss: 0.022 | test avg. loss: 4.655


 43%|████▎     | 21637/50000 [32:21<32:39, 14.47it/s]

Epochs: 21633 | epoch avg. loss: 0.016 | test avg. loss: 4.789
Epochs: 21634 | epoch avg. loss: 0.021 | test avg. loss: 4.664
Epochs: 21635 | epoch avg. loss: 0.037 | test avg. loss: 4.799
Epochs: 21636 | epoch avg. loss: 0.044 | test avg. loss: 4.660


 43%|████▎     | 21639/50000 [32:21<32:07, 14.72it/s]

Epochs: 21637 | epoch avg. loss: 0.050 | test avg. loss: 4.656
Epochs: 21638 | epoch avg. loss: 0.048 | test avg. loss: 5.038
Epochs: 21639 | epoch avg. loss: 0.108 | test avg. loss: 4.625


 43%|████▎     | 21643/50000 [32:22<32:09, 14.70it/s]

Epochs: 21640 | epoch avg. loss: 0.132 | test avg. loss: 4.727
Epochs: 21641 | epoch avg. loss: 0.078 | test avg. loss: 4.558
Epochs: 21642 | epoch avg. loss: 0.026 | test avg. loss: 4.555
Epochs: 21643 | epoch avg. loss: 0.036 | test avg. loss: 4.962


 43%|████▎     | 21647/50000 [32:22<32:40, 14.46it/s]

Epochs: 21644 | epoch avg. loss: 0.109 | test avg. loss: 4.693
Epochs: 21645 | epoch avg. loss: 0.091 | test avg. loss: 4.739
Epochs: 21646 | epoch avg. loss: 0.044 | test avg. loss: 4.749


 43%|████▎     | 21649/50000 [32:22<35:22, 13.36it/s]

Epochs: 21647 | epoch avg. loss: 0.045 | test avg. loss: 4.486
Epochs: 21648 | epoch avg. loss: 0.031 | test avg. loss: 4.723
Epochs: 21649 | epoch avg. loss: 0.047 | test avg. loss: 4.609


 43%|████▎     | 21653/50000 [32:22<35:44, 13.22it/s]

Epochs: 21650 | epoch avg. loss: 0.059 | test avg. loss: 4.731
Epochs: 21651 | epoch avg. loss: 0.024 | test avg. loss: 4.926
Epochs: 21652 | epoch avg. loss: 0.032 | test avg. loss: 4.733


 43%|████▎     | 21655/50000 [32:23<35:32, 13.29it/s]

Epochs: 21653 | epoch avg. loss: 0.025 | test avg. loss: 4.777
Epochs: 21654 | epoch avg. loss: 0.034 | test avg. loss: 4.627
Epochs: 21655 | epoch avg. loss: 0.018 | test avg. loss: 4.616
Epochs: 21656 | epoch avg. loss: 0.013 | test avg. loss: 4.657


 43%|████▎     | 21661/50000 [32:23<32:13, 14.66it/s]

Epochs: 21657 | epoch avg. loss: 0.010 | test avg. loss: 4.682
Epochs: 21658 | epoch avg. loss: 0.011 | test avg. loss: 4.768
Epochs: 21659 | epoch avg. loss: 0.014 | test avg. loss: 4.642
Epochs: 21660 | epoch avg. loss: 0.037 | test avg. loss: 4.735


 43%|████▎     | 21663/50000 [32:23<32:04, 14.72it/s]

Epochs: 21661 | epoch avg. loss: 0.016 | test avg. loss: 4.648
Epochs: 21662 | epoch avg. loss: 0.020 | test avg. loss: 4.741
Epochs: 21663 | epoch avg. loss: 0.037 | test avg. loss: 4.711


 43%|████▎     | 21667/50000 [32:23<32:12, 14.66it/s]

Epochs: 21664 | epoch avg. loss: 0.015 | test avg. loss: 4.653
Epochs: 21665 | epoch avg. loss: 0.017 | test avg. loss: 4.731
Epochs: 21666 | epoch avg. loss: 0.012 | test avg. loss: 4.625


 43%|████▎     | 21669/50000 [32:23<35:13, 13.40it/s]

Epochs: 21667 | epoch avg. loss: 0.019 | test avg. loss: 4.630
Epochs: 21668 | epoch avg. loss: 0.008 | test avg. loss: 4.693
Epochs: 21669 | epoch avg. loss: 0.010 | test avg. loss: 4.697


 43%|████▎     | 21673/50000 [32:24<35:24, 13.34it/s]

Epochs: 21670 | epoch avg. loss: 0.006 | test avg. loss: 4.701
Epochs: 21671 | epoch avg. loss: 0.008 | test avg. loss: 4.703
Epochs: 21672 | epoch avg. loss: 0.008 | test avg. loss: 4.770


                                                     

Epochs: 21673 | epoch avg. loss: 0.008 | test avg. loss: 4.690
Epochs: 21674 | epoch avg. loss: 0.016 | test avg. loss: 4.725
Epochs: 21675 | epoch avg. loss: 0.012 | test avg. loss: 4.687




Epochs: 21676 | epoch avg. loss: 0.012 | test avg. loss: 4.615
Epochs: 21677 | epoch avg. loss: 0.019 | test avg. loss: 4.750
Epochs: 21678 | epoch avg. loss: 0.020 | test avg. loss: 4.604


 43%|████▎     | 21681/50000 [32:24<32:13, 14.64it/s]

Epochs: 21679 | epoch avg. loss: 0.029 | test avg. loss: 4.669
Epochs: 21680 | epoch avg. loss: 0.022 | test avg. loss: 4.657
Epochs: 21681 | epoch avg. loss: 0.012 | test avg. loss: 4.611


 43%|████▎     | 21685/50000 [32:25<33:03, 14.27it/s]

Epochs: 21682 | epoch avg. loss: 0.010 | test avg. loss: 4.679
Epochs: 21683 | epoch avg. loss: 0.007 | test avg. loss: 4.679
Epochs: 21684 | epoch avg. loss: 0.006 | test avg. loss: 4.700


 43%|████▎     | 21687/50000 [32:25<32:26, 14.55it/s]

Epochs: 21685 | epoch avg. loss: 0.006 | test avg. loss: 4.702
Epochs: 21686 | epoch avg. loss: 0.006 | test avg. loss: 4.665
Epochs: 21687 | epoch avg. loss: 0.007 | test avg. loss: 4.697


 43%|████▎     | 21691/50000 [32:25<34:24, 13.71it/s]

Epochs: 21688 | epoch avg. loss: 0.013 | test avg. loss: 4.675
Epochs: 21689 | epoch avg. loss: 0.007 | test avg. loss: 4.698
Epochs: 21690 | epoch avg. loss: 0.008 | test avg. loss: 4.647


 43%|████▎     | 21693/50000 [32:25<33:42, 14.00it/s]

Epochs: 21691 | epoch avg. loss: 0.012 | test avg. loss: 4.678
Epochs: 21692 | epoch avg. loss: 0.009 | test avg. loss: 4.792
Epochs: 21693 | epoch avg. loss: 0.021 | test avg. loss: 4.729


 43%|████▎     | 21697/50000 [32:25<33:44, 13.98it/s]

Epochs: 21694 | epoch avg. loss: 0.015 | test avg. loss: 4.685
Epochs: 21695 | epoch avg. loss: 0.013 | test avg. loss: 4.755
Epochs: 21696 | epoch avg. loss: 0.028 | test avg. loss: 4.610


 43%|████▎     | 21699/50000 [32:26<34:21, 13.73it/s]

Epochs: 21697 | epoch avg. loss: 0.006 | test avg. loss: 4.665
Epochs: 21698 | epoch avg. loss: 0.012 | test avg. loss: 4.630
Epochs: 21699 | epoch avg. loss: 0.028 | test avg. loss: 4.683


 43%|████▎     | 21703/50000 [32:27<1:41:08,  4.66it/s]

Epochs: 21700 | epoch avg. loss: 0.015 | test avg. loss: 4.772
Epochs: 21701 | epoch avg. loss: 0.013 | test avg. loss: 4.751
Epochs: 21702 | epoch avg. loss: 0.008 | test avg. loss: 4.682
Epochs: 21703 | epoch avg. loss: 0.009 | test avg. loss: 4.648


 43%|████▎     | 21707/50000 [32:27<1:07:58,  6.94it/s]

Epochs: 21704 | epoch avg. loss: 0.008 | test avg. loss: 4.715
Epochs: 21705 | epoch avg. loss: 0.012 | test avg. loss: 4.674
Epochs: 21706 | epoch avg. loss: 0.009 | test avg. loss: 4.681


 43%|████▎     | 21711/50000 [32:28<49:04,  9.61it/s]

Epochs: 21707 | epoch avg. loss: 0.008 | test avg. loss: 4.735
Epochs: 21708 | epoch avg. loss: 0.009 | test avg. loss: 4.670
Epochs: 21709 | epoch avg. loss: 0.013 | test avg. loss: 4.627
Epochs: 21710 | epoch avg. loss: 0.017 | test avg. loss: 4.772


 43%|████▎     | 21713/50000 [32:28<43:51, 10.75it/s]

Epochs: 21711 | epoch avg. loss: 0.020 | test avg. loss: 4.658
Epochs: 21712 | epoch avg. loss: 0.019 | test avg. loss: 4.706
Epochs: 21713 | epoch avg. loss: 0.008 | test avg. loss: 4.750


 43%|████▎     | 21717/50000 [32:28<39:28, 11.94it/s]

Epochs: 21714 | epoch avg. loss: 0.011 | test avg. loss: 4.661
Epochs: 21715 | epoch avg. loss: 0.034 | test avg. loss: 4.744
Epochs: 21716 | epoch avg. loss: 0.020 | test avg. loss: 4.664


 43%|████▎     | 21719/50000 [32:28<36:28, 12.92it/s]

Epochs: 21717 | epoch avg. loss: 0.012 | test avg. loss: 4.612
Epochs: 21718 | epoch avg. loss: 0.030 | test avg. loss: 4.781
Epochs: 21719 | epoch avg. loss: 0.033 | test avg. loss: 4.767


 43%|████▎     | 21723/50000 [32:29<35:09, 13.41it/s]

Epochs: 21720 | epoch avg. loss: 0.014 | test avg. loss: 4.687
Epochs: 21721 | epoch avg. loss: 0.023 | test avg. loss: 4.828
Epochs: 21722 | epoch avg. loss: 0.049 | test avg. loss: 4.615
Epochs: 21723 | epoch avg. loss: 0.022 | test avg. loss: 4.655


 43%|████▎     | 21727/50000 [32:29<32:56, 14.30it/s]

Epochs: 21724 | epoch avg. loss: 0.036 | test avg. loss: 4.678
Epochs: 21725 | epoch avg. loss: 0.024 | test avg. loss: 4.582
Epochs: 21726 | epoch avg. loss: 0.033 | test avg. loss: 4.868
Epochs: 21727 | epoch avg. loss: 0.065 | test avg. loss: 4.748


 43%|████▎     | 21731/50000 [32:29<31:06, 15.14it/s]

Epochs: 21728 | epoch avg. loss: 0.031 | test avg. loss: 4.702
Epochs: 21729 | epoch avg. loss: 0.039 | test avg. loss: 4.883
Epochs: 21730 | epoch avg. loss: 0.054 | test avg. loss: 4.681
Epochs: 21731 | epoch avg. loss: 0.093 | test avg. loss: 4.713


 43%|████▎     | 21735/50000 [32:29<31:27, 14.97it/s]

Epochs: 21732 | epoch avg. loss: 0.041 | test avg. loss: 5.010
Epochs: 21733 | epoch avg. loss: 0.083 | test avg. loss: 4.678
Epochs: 21734 | epoch avg. loss: 0.092 | test avg. loss: 4.695


 43%|████▎     | 21739/50000 [32:30<32:53, 14.32it/s]

Epochs: 21735 | epoch avg. loss: 0.024 | test avg. loss: 4.727
Epochs: 21736 | epoch avg. loss: 0.020 | test avg. loss: 4.617
Epochs: 21737 | epoch avg. loss: 0.038 | test avg. loss: 4.670
Epochs: 21738 | epoch avg. loss: 0.014 | test avg. loss: 4.642


 43%|████▎     | 21741/50000 [32:30<33:32, 14.04it/s]

Epochs: 21739 | epoch avg. loss: 0.020 | test avg. loss: 4.595
Epochs: 21740 | epoch avg. loss: 0.045 | test avg. loss: 4.807
Epochs: 21741 | epoch avg. loss: 0.045 | test avg. loss: 4.737


 43%|████▎     | 21745/50000 [32:30<32:50, 14.34it/s]

Epochs: 21742 | epoch avg. loss: 0.021 | test avg. loss: 4.741
Epochs: 21743 | epoch avg. loss: 0.025 | test avg. loss: 4.764
Epochs: 21744 | epoch avg. loss: 0.026 | test avg. loss: 4.569
Epochs: 21745 | epoch avg. loss: 0.040 | test avg. loss: 4.739


 43%|████▎     | 21749/50000 [32:30<34:23, 13.69it/s]

Epochs: 21746 | epoch avg. loss: 0.036 | test avg. loss: 4.713
Epochs: 21747 | epoch avg. loss: 0.024 | test avg. loss: 4.610
Epochs: 21748 | epoch avg. loss: 0.046 | test avg. loss: 4.800


 44%|████▎     | 21751/50000 [32:31<36:57, 12.74it/s]

Epochs: 21749 | epoch avg. loss: 0.066 | test avg. loss: 4.599
Epochs: 21750 | epoch avg. loss: 0.029 | test avg. loss: 4.706
Epochs: 21751 | epoch avg. loss: 0.055 | test avg. loss: 4.645


 44%|████▎     | 21755/50000 [32:31<33:37, 14.00it/s]

Epochs: 21752 | epoch avg. loss: 0.041 | test avg. loss: 4.615
Epochs: 21753 | epoch avg. loss: 0.119 | test avg. loss: 4.937
Epochs: 21754 | epoch avg. loss: 0.132 | test avg. loss: 4.764
Epochs: 21755 | epoch avg. loss: 0.053 | test avg. loss: 4.679




Epochs: 21756 | epoch avg. loss: 0.062 | test avg. loss: 4.909
Epochs: 21757 | epoch avg. loss: 0.127 | test avg. loss: 4.592
Epochs: 21758 | epoch avg. loss: 0.084 | test avg. loss: 4.595


 44%|████▎     | 21763/50000 [32:31<31:50, 14.78it/s]

Epochs: 21759 | epoch avg. loss: 0.065 | test avg. loss: 4.841
Epochs: 21760 | epoch avg. loss: 0.113 | test avg. loss: 4.646
Epochs: 21761 | epoch avg. loss: 0.307 | test avg. loss: 4.908
Epochs: 21762 | epoch avg. loss: 0.214 | test avg. loss: 5.026


 44%|████▎     | 21765/50000 [32:32<33:23, 14.09it/s]

Epochs: 21763 | epoch avg. loss: 0.131 | test avg. loss: 4.612
Epochs: 21764 | epoch avg. loss: 0.114 | test avg. loss: 4.947
Epochs: 21765 | epoch avg. loss: 0.147 | test avg. loss: 4.653


 44%|████▎     | 21769/50000 [32:32<33:17, 14.13it/s]

Epochs: 21766 | epoch avg. loss: 0.074 | test avg. loss: 4.625
Epochs: 21767 | epoch avg. loss: 0.072 | test avg. loss: 4.874
Epochs: 21768 | epoch avg. loss: 0.045 | test avg. loss: 4.667


 44%|████▎     | 21773/50000 [32:32<31:38, 14.86it/s]

Epochs: 21769 | epoch avg. loss: 0.065 | test avg. loss: 4.770
Epochs: 21770 | epoch avg. loss: 0.056 | test avg. loss: 4.780
Epochs: 21771 | epoch avg. loss: 0.028 | test avg. loss: 4.698
Epochs: 21772 | epoch avg. loss: 0.030 | test avg. loss: 4.752


 44%|████▎     | 21775/50000 [32:32<31:46, 14.80it/s]

Epochs: 21773 | epoch avg. loss: 0.017 | test avg. loss: 4.659
Epochs: 21774 | epoch avg. loss: 0.038 | test avg. loss: 4.779
Epochs: 21775 | epoch avg. loss: 0.027 | test avg. loss: 4.794


 44%|████▎     | 21779/50000 [32:33<33:36, 13.99it/s]

Epochs: 21776 | epoch avg. loss: 0.024 | test avg. loss: 4.694
Epochs: 21777 | epoch avg. loss: 0.040 | test avg. loss: 4.887
Epochs: 21778 | epoch avg. loss: 0.123 | test avg. loss: 4.732


 44%|████▎     | 21781/50000 [32:33<34:02, 13.81it/s]

Epochs: 21779 | epoch avg. loss: 0.041 | test avg. loss: 4.617
Epochs: 21780 | epoch avg. loss: 0.079 | test avg. loss: 4.786
Epochs: 21781 | epoch avg. loss: 0.073 | test avg. loss: 4.638


 44%|████▎     | 21785/50000 [32:33<35:32, 13.23it/s]

Epochs: 21782 | epoch avg. loss: 0.064 | test avg. loss: 4.714
Epochs: 21783 | epoch avg. loss: 0.042 | test avg. loss: 5.011
Epochs: 21784 | epoch avg. loss: 0.092 | test avg. loss: 4.700


 44%|████▎     | 21787/50000 [32:33<36:18, 12.95it/s]

Epochs: 21785 | epoch avg. loss: 0.107 | test avg. loss: 4.900
Epochs: 21786 | epoch avg. loss: 0.033 | test avg. loss: 4.982
Epochs: 21787 | epoch avg. loss: 0.030 | test avg. loss: 4.844


 44%|████▎     | 21791/50000 [32:33<37:08, 12.66it/s]

Epochs: 21788 | epoch avg. loss: 0.031 | test avg. loss: 4.820
Epochs: 21789 | epoch avg. loss: 0.028 | test avg. loss: 4.626
Epochs: 21790 | epoch avg. loss: 0.019 | test avg. loss: 4.670


 44%|████▎     | 21793/50000 [32:34<37:07, 12.66it/s]

Epochs: 21791 | epoch avg. loss: 0.034 | test avg. loss: 4.768
Epochs: 21792 | epoch avg. loss: 0.030 | test avg. loss: 4.726
Epochs: 21793 | epoch avg. loss: 0.062 | test avg. loss: 4.851


 44%|████▎     | 21797/50000 [32:34<38:31, 12.20it/s]

Epochs: 21794 | epoch avg. loss: 0.069 | test avg. loss: 4.844
Epochs: 21795 | epoch avg. loss: 0.039 | test avg. loss: 4.749
Epochs: 21796 | epoch avg. loss: 0.025 | test avg. loss: 4.799


 44%|████▎     | 21799/50000 [32:34<37:47, 12.43it/s]

Epochs: 21797 | epoch avg. loss: 0.015 | test avg. loss: 4.934
Epochs: 21798 | epoch avg. loss: 0.019 | test avg. loss: 4.878
Epochs: 21799 | epoch avg. loss: 0.022 | test avg. loss: 4.727


 44%|████▎     | 21803/50000 [32:36<1:54:20,  4.11it/s]

Epochs: 21800 | epoch avg. loss: 0.031 | test avg. loss: 4.694
Epochs: 21801 | epoch avg. loss: 0.013 | test avg. loss: 4.673
Epochs: 21802 | epoch avg. loss: 0.010 | test avg. loss: 4.685


 44%|████▎     | 21807/50000 [32:36<1:11:38,  6.56it/s]

Epochs: 21803 | epoch avg. loss: 0.012 | test avg. loss: 4.799
Epochs: 21804 | epoch avg. loss: 0.009 | test avg. loss: 4.898
Epochs: 21805 | epoch avg. loss: 0.013 | test avg. loss: 4.772
Epochs: 21806 | epoch avg. loss: 0.010 | test avg. loss: 4.840


 44%|████▎     | 21809/50000 [32:37<1:01:12,  7.68it/s]

Epochs: 21807 | epoch avg. loss: 0.025 | test avg. loss: 4.684
Epochs: 21808 | epoch avg. loss: 0.026 | test avg. loss: 4.750
Epochs: 21809 | epoch avg. loss: 0.036 | test avg. loss: 4.879


 44%|████▎     | 21813/50000 [32:37<51:40,  9.09it/s]

Epochs: 21810 | epoch avg. loss: 0.027 | test avg. loss: 4.781
Epochs: 21811 | epoch avg. loss: 0.033 | test avg. loss: 4.862
Epochs: 21812 | epoch avg. loss: 0.015 | test avg. loss: 4.776


 44%|████▎     | 21815/50000 [32:37<46:04, 10.20it/s]

Epochs: 21813 | epoch avg. loss: 0.011 | test avg. loss: 4.658
Epochs: 21814 | epoch avg. loss: 0.017 | test avg. loss: 4.798
Epochs: 21815 | epoch avg. loss: 0.034 | test avg. loss: 4.676


 44%|████▎     | 21819/50000 [32:37<41:19, 11.37it/s]

Epochs: 21816 | epoch avg. loss: 0.017 | test avg. loss: 4.707
Epochs: 21817 | epoch avg. loss: 0.015 | test avg. loss: 4.845
Epochs: 21818 | epoch avg. loss: 0.028 | test avg. loss: 4.758


 44%|████▎     | 21821/50000 [32:37<41:16, 11.38it/s]

Epochs: 21819 | epoch avg. loss: 0.009 | test avg. loss: 4.780
Epochs: 21820 | epoch avg. loss: 0.007 | test avg. loss: 4.818
Epochs: 21821 | epoch avg. loss: 0.013 | test avg. loss: 4.783


 44%|████▎     | 21825/50000 [32:38<39:32, 11.88it/s]

Epochs: 21822 | epoch avg. loss: 0.006 | test avg. loss: 4.780
Epochs: 21823 | epoch avg. loss: 0.005 | test avg. loss: 4.767
Epochs: 21824 | epoch avg. loss: 0.007 | test avg. loss: 4.725


 44%|████▎     | 21827/50000 [32:38<38:52, 12.08it/s]

Epochs: 21825 | epoch avg. loss: 0.010 | test avg. loss: 4.785
Epochs: 21826 | epoch avg. loss: 0.011 | test avg. loss: 4.760
Epochs: 21827 | epoch avg. loss: 0.009 | test avg. loss: 4.725


 44%|████▎     | 21831/50000 [32:38<38:00, 12.35it/s]

Epochs: 21828 | epoch avg. loss: 0.018 | test avg. loss: 4.809
Epochs: 21829 | epoch avg. loss: 0.018 | test avg. loss: 4.818
Epochs: 21830 | epoch avg. loss: 0.021 | test avg. loss: 4.635


 44%|████▎     | 21833/50000 [32:38<39:02, 12.02it/s]

Epochs: 21831 | epoch avg. loss: 0.032 | test avg. loss: 4.708
Epochs: 21832 | epoch avg. loss: 0.016 | test avg. loss: 4.922
Epochs: 21833 | epoch avg. loss: 0.027 | test avg. loss: 4.819


 44%|████▎     | 21837/50000 [32:39<37:22, 12.56it/s]

Epochs: 21834 | epoch avg. loss: 0.017 | test avg. loss: 4.748
Epochs: 21835 | epoch avg. loss: 0.010 | test avg. loss: 4.750
Epochs: 21836 | epoch avg. loss: 0.015 | test avg. loss: 4.620


 44%|████▎     | 21839/50000 [32:39<38:37, 12.15it/s]

Epochs: 21837 | epoch avg. loss: 0.024 | test avg. loss: 4.662
Epochs: 21838 | epoch avg. loss: 0.011 | test avg. loss: 4.756
Epochs: 21839 | epoch avg. loss: 0.006 | test avg. loss: 4.823


 44%|████▎     | 21843/50000 [32:39<36:05, 13.00it/s]

Epochs: 21840 | epoch avg. loss: 0.015 | test avg. loss: 4.821
Epochs: 21841 | epoch avg. loss: 0.010 | test avg. loss: 4.688
Epochs: 21842 | epoch avg. loss: 0.019 | test avg. loss: 4.656


 44%|████▎     | 21845/50000 [32:39<35:08, 13.35it/s]

Epochs: 21843 | epoch avg. loss: 0.012 | test avg. loss: 4.844
Epochs: 21844 | epoch avg. loss: 0.041 | test avg. loss: 4.782
Epochs: 21845 | epoch avg. loss: 0.009 | test avg. loss: 4.742


 44%|████▎     | 21849/50000 [32:40<39:52, 11.76it/s]

Epochs: 21846 | epoch avg. loss: 0.017 | test avg. loss: 4.752
Epochs: 21847 | epoch avg. loss: 0.005 | test avg. loss: 4.747
Epochs: 21848 | epoch avg. loss: 0.010 | test avg. loss: 4.729


 44%|████▎     | 21851/50000 [32:40<39:37, 11.84it/s]

Epochs: 21849 | epoch avg. loss: 0.012 | test avg. loss: 4.706
Epochs: 21850 | epoch avg. loss: 0.008 | test avg. loss: 4.646
Epochs: 21851 | epoch avg. loss: 0.018 | test avg. loss: 4.787


 44%|████▎     | 21855/50000 [32:40<38:30, 12.18it/s]

Epochs: 21852 | epoch avg. loss: 0.013 | test avg. loss: 4.874
Epochs: 21853 | epoch avg. loss: 0.014 | test avg. loss: 4.763
Epochs: 21854 | epoch avg. loss: 0.010 | test avg. loss: 4.743


 44%|████▎     | 21857/50000 [32:40<37:08, 12.63it/s]

Epochs: 21855 | epoch avg. loss: 0.006 | test avg. loss: 4.778
Epochs: 21856 | epoch avg. loss: 0.009 | test avg. loss: 4.674
Epochs: 21857 | epoch avg. loss: 0.023 | test avg. loss: 4.754


 44%|████▎     | 21861/50000 [32:41<37:46, 12.42it/s]

Epochs: 21858 | epoch avg. loss: 0.012 | test avg. loss: 4.793
Epochs: 21859 | epoch avg. loss: 0.010 | test avg. loss: 4.739
Epochs: 21860 | epoch avg. loss: 0.014 | test avg. loss: 4.872


 44%|████▎     | 21863/50000 [32:41<36:52, 12.72it/s]

Epochs: 21861 | epoch avg. loss: 0.026 | test avg. loss: 4.715
Epochs: 21862 | epoch avg. loss: 0.021 | test avg. loss: 4.697
Epochs: 21863 | epoch avg. loss: 0.018 | test avg. loss: 4.948


 44%|████▎     | 21867/50000 [32:41<37:54, 12.37it/s]

Epochs: 21864 | epoch avg. loss: 0.055 | test avg. loss: 4.769
Epochs: 21865 | epoch avg. loss: 0.130 | test avg. loss: 4.823
Epochs: 21866 | epoch avg. loss: 0.136 | test avg. loss: 5.596


 44%|████▎     | 21869/50000 [32:41<35:51, 13.07it/s]

Epochs: 21867 | epoch avg. loss: 0.405 | test avg. loss: 4.582
Epochs: 21868 | epoch avg. loss: 0.378 | test avg. loss: 4.719
Epochs: 21869 | epoch avg. loss: 0.137 | test avg. loss: 5.490


 44%|████▎     | 21873/50000 [32:42<36:40, 12.78it/s]

Epochs: 21870 | epoch avg. loss: 0.295 | test avg. loss: 4.969
Epochs: 21871 | epoch avg. loss: 0.178 | test avg. loss: 4.871
Epochs: 21872 | epoch avg. loss: 0.303 | test avg. loss: 4.797


 44%|████▍     | 21875/50000 [32:42<36:21, 12.89it/s]

Epochs: 21873 | epoch avg. loss: 0.168 | test avg. loss: 4.702
Epochs: 21874 | epoch avg. loss: 0.070 | test avg. loss: 4.696
Epochs: 21875 | epoch avg. loss: 0.041 | test avg. loss: 4.967


 44%|████▍     | 21879/50000 [32:42<35:05, 13.35it/s]

Epochs: 21876 | epoch avg. loss: 0.049 | test avg. loss: 4.917
Epochs: 21877 | epoch avg. loss: 0.025 | test avg. loss: 4.762
Epochs: 21878 | epoch avg. loss: 0.039 | test avg. loss: 4.692
Epochs: 21879 | epoch avg. loss: 0.013 | test avg. loss: 4.696


 44%|████▍     | 21883/50000 [32:42<34:07, 13.73it/s]

Epochs: 21880 | epoch avg. loss: 0.015 | test avg. loss: 4.749
Epochs: 21881 | epoch avg. loss: 0.014 | test avg. loss: 4.934
Epochs: 21882 | epoch avg. loss: 0.026 | test avg. loss: 4.798


 44%|████▍     | 21885/50000 [32:43<35:13, 13.30it/s]

Epochs: 21883 | epoch avg. loss: 0.030 | test avg. loss: 4.760
Epochs: 21884 | epoch avg. loss: 0.027 | test avg. loss: 5.045
Epochs: 21885 | epoch avg. loss: 0.113 | test avg. loss: 4.774




Epochs: 21886 | epoch avg. loss: 0.049 | test avg. loss: 4.767
Epochs: 21887 | epoch avg. loss: 0.071 | test avg. loss: 5.285
Epochs: 21888 | epoch avg. loss: 0.247 | test avg. loss: 4.869


 44%|████▍     | 21893/50000 [32:43<31:52, 14.70it/s]

Epochs: 21889 | epoch avg. loss: 0.124 | test avg. loss: 4.907
Epochs: 21890 | epoch avg. loss: 0.054 | test avg. loss: 5.213
Epochs: 21891 | epoch avg. loss: 0.101 | test avg. loss: 4.872
Epochs: 21892 | epoch avg. loss: 0.101 | test avg. loss: 4.931


 44%|████▍     | 21895/50000 [32:43<31:32, 14.85it/s]

Epochs: 21893 | epoch avg. loss: 0.050 | test avg. loss: 5.446
Epochs: 21894 | epoch avg. loss: 0.137 | test avg. loss: 4.972
Epochs: 21895 | epoch avg. loss: 0.100 | test avg. loss: 5.040


 44%|████▍     | 21899/50000 [32:43<33:54, 13.81it/s]

Epochs: 21896 | epoch avg. loss: 0.034 | test avg. loss: 5.251
Epochs: 21897 | epoch avg. loss: 0.082 | test avg. loss: 4.922
Epochs: 21898 | epoch avg. loss: 0.069 | test avg. loss: 4.831


 44%|████▍     | 21899/50000 [32:44<33:54, 13.81it/s]

Epochs: 21899 | epoch avg. loss: 0.024 | test avg. loss: 4.891


                                                       

Epochs: 21900 | epoch avg. loss: 0.036 | test avg. loss: 4.708
Epochs: 21901 | epoch avg. loss: 0.059 | test avg. loss: 4.778
Epochs: 21902 | epoch avg. loss: 0.025 | test avg. loss: 5.093


 44%|████▍     | 21905/50000 [32:45<1:19:58,  5.86it/s]

Epochs: 21903 | epoch avg. loss: 0.054 | test avg. loss: 4.939
Epochs: 21904 | epoch avg. loss: 0.060 | test avg. loss: 4.988
Epochs: 21905 | epoch avg. loss: 0.056 | test avg. loss: 5.034


 44%|████▍     | 21909/50000 [32:46<57:54,  8.09it/s]  

Epochs: 21906 | epoch avg. loss: 0.078 | test avg. loss: 4.716
Epochs: 21907 | epoch avg. loss: 0.185 | test avg. loss: 4.800
Epochs: 21908 | epoch avg. loss: 0.061 | test avg. loss: 5.560


 44%|████▍     | 21911/50000 [32:46<50:06,  9.34it/s]

Epochs: 21909 | epoch avg. loss: 0.255 | test avg. loss: 5.128
Epochs: 21910 | epoch avg. loss: 0.167 | test avg. loss: 5.284
Epochs: 21911 | epoch avg. loss: 0.196 | test avg. loss: 6.446


 44%|████▍     | 21915/50000 [32:46<41:46, 11.20it/s]

Epochs: 21912 | epoch avg. loss: 0.705 | test avg. loss: 4.940
Epochs: 21913 | epoch avg. loss: 0.893 | test avg. loss: 4.558
Epochs: 21914 | epoch avg. loss: 0.598 | test avg. loss: 5.496
Epochs: 21915 | epoch avg. loss: 0.390 | test avg. loss: 5.514


 44%|████▍     | 21919/50000 [32:46<35:31, 13.18it/s]

Epochs: 21916 | epoch avg. loss: 0.495 | test avg. loss: 5.661
Epochs: 21917 | epoch avg. loss: 0.171 | test avg. loss: 5.719
Epochs: 21918 | epoch avg. loss: 0.172 | test avg. loss: 5.091
Epochs: 21919 | epoch avg. loss: 0.477 | test avg. loss: 4.785


 44%|████▍     | 21923/50000 [32:47<36:41, 12.75it/s]

Epochs: 21920 | epoch avg. loss: 0.135 | test avg. loss: 5.353
Epochs: 21921 | epoch avg. loss: 0.332 | test avg. loss: 5.214
Epochs: 21922 | epoch avg. loss: 0.093 | test avg. loss: 5.153


 44%|████▍     | 21927/50000 [32:47<33:10, 14.10it/s]

Epochs: 21923 | epoch avg. loss: 0.144 | test avg. loss: 5.372
Epochs: 21924 | epoch avg. loss: 0.166 | test avg. loss: 5.188
Epochs: 21925 | epoch avg. loss: 0.087 | test avg. loss: 4.942
Epochs: 21926 | epoch avg. loss: 0.167 | test avg. loss: 4.963


 44%|████▍     | 21931/50000 [32:47<31:54, 14.66it/s]

Epochs: 21927 | epoch avg. loss: 0.071 | test avg. loss: 5.303
Epochs: 21928 | epoch avg. loss: 0.121 | test avg. loss: 4.978
Epochs: 21929 | epoch avg. loss: 0.073 | test avg. loss: 5.051
Epochs: 21930 | epoch avg. loss: 0.073 | test avg. loss: 4.909


 44%|████▍     | 21935/50000 [32:47<31:03, 15.06it/s]

Epochs: 21931 | epoch avg. loss: 0.048 | test avg. loss: 4.874
Epochs: 21932 | epoch avg. loss: 0.097 | test avg. loss: 5.083
Epochs: 21933 | epoch avg. loss: 0.035 | test avg. loss: 4.995
Epochs: 21934 | epoch avg. loss: 0.034 | test avg. loss: 5.073


 44%|████▍     | 21937/50000 [32:48<33:05, 14.13it/s]

Epochs: 21935 | epoch avg. loss: 0.042 | test avg. loss: 5.373
Epochs: 21936 | epoch avg. loss: 0.142 | test avg. loss: 4.961
Epochs: 21937 | epoch avg. loss: 0.063 | test avg. loss: 4.909




Epochs: 21938 | epoch avg. loss: 0.064 | test avg. loss: 5.087
Epochs: 21939 | epoch avg. loss: 0.106 | test avg. loss: 4.788
Epochs: 21940 | epoch avg. loss: 0.142 | test avg. loss: 5.099


 44%|████▍     | 21945/50000 [32:48<31:42, 14.75it/s]

Epochs: 21941 | epoch avg. loss: 0.050 | test avg. loss: 5.367
Epochs: 21942 | epoch avg. loss: 0.048 | test avg. loss: 5.195
Epochs: 21943 | epoch avg. loss: 0.109 | test avg. loss: 5.108
Epochs: 21944 | epoch avg. loss: 0.032 | test avg. loss: 5.046


 44%|████▍     | 21949/50000 [32:48<30:27, 15.35it/s]

Epochs: 21945 | epoch avg. loss: 0.049 | test avg. loss: 4.825
Epochs: 21946 | epoch avg. loss: 0.032 | test avg. loss: 4.974
Epochs: 21947 | epoch avg. loss: 0.059 | test avg. loss: 5.023
Epochs: 21948 | epoch avg. loss: 0.035 | test avg. loss: 4.900


 44%|████▍     | 21951/50000 [32:49<33:28, 13.97it/s]

Epochs: 21949 | epoch avg. loss: 0.085 | test avg. loss: 5.008
Epochs: 21950 | epoch avg. loss: 0.067 | test avg. loss: 5.147
Epochs: 21951 | epoch avg. loss: 0.060 | test avg. loss: 4.962


 44%|████▍     | 21955/50000 [32:49<32:29, 14.39it/s]

Epochs: 21952 | epoch avg. loss: 0.132 | test avg. loss: 5.046
Epochs: 21953 | epoch avg. loss: 0.025 | test avg. loss: 5.264
Epochs: 21954 | epoch avg. loss: 0.044 | test avg. loss: 5.058
Epochs: 21955 | epoch avg. loss: 0.036 | test avg. loss: 5.014


 44%|████▍     | 21959/50000 [32:49<32:21, 14.44it/s]

Epochs: 21956 | epoch avg. loss: 0.021 | test avg. loss: 5.051
Epochs: 21957 | epoch avg. loss: 0.017 | test avg. loss: 4.913
Epochs: 21958 | epoch avg. loss: 0.034 | test avg. loss: 4.950


 44%|████▍     | 21963/50000 [32:49<30:55, 15.11it/s]

Epochs: 21959 | epoch avg. loss: 0.019 | test avg. loss: 5.197
Epochs: 21960 | epoch avg. loss: 0.055 | test avg. loss: 4.909
Epochs: 21961 | epoch avg. loss: 0.093 | test avg. loss: 4.945
Epochs: 21962 | epoch avg. loss: 0.031 | test avg. loss: 5.273


 44%|████▍     | 21965/50000 [32:49<31:47, 14.69it/s]

Epochs: 21963 | epoch avg. loss: 0.065 | test avg. loss: 5.094
Epochs: 21964 | epoch avg. loss: 0.062 | test avg. loss: 5.117
Epochs: 21965 | epoch avg. loss: 0.025 | test avg. loss: 5.310


                                                     

Epochs: 21966 | epoch avg. loss: 0.073 | test avg. loss: 4.986
Epochs: 21967 | epoch avg. loss: 0.115 | test avg. loss: 4.894
Epochs: 21968 | epoch avg. loss: 0.052 | test avg. loss: 5.157


                                                     

Epochs: 21969 | epoch avg. loss: 0.082 | test avg. loss: 4.929
Epochs: 21970 | epoch avg. loss: 0.101 | test avg. loss: 5.149
Epochs: 21971 | epoch avg. loss: 0.085 | test avg. loss: 5.336


 44%|████▍     | 21975/50000 [32:50<33:21, 14.00it/s]

Epochs: 21972 | epoch avg. loss: 0.076 | test avg. loss: 5.032
Epochs: 21973 | epoch avg. loss: 0.033 | test avg. loss: 5.187
Epochs: 21974 | epoch avg. loss: 0.071 | test avg. loss: 4.992


 44%|████▍     | 21977/50000 [32:50<35:46, 13.06it/s]

Epochs: 21975 | epoch avg. loss: 0.061 | test avg. loss: 4.926
Epochs: 21976 | epoch avg. loss: 0.082 | test avg. loss: 5.134
Epochs: 21977 | epoch avg. loss: 0.083 | test avg. loss: 4.999




Epochs: 21978 | epoch avg. loss: 0.030 | test avg. loss: 5.023
Epochs: 21979 | epoch avg. loss: 0.041 | test avg. loss: 5.253
Epochs: 21980 | epoch avg. loss: 0.066 | test avg. loss: 5.125


 44%|████▍     | 21983/50000 [32:51<35:00, 13.34it/s]

Epochs: 21981 | epoch avg. loss: 0.088 | test avg. loss: 5.055
Epochs: 21982 | epoch avg. loss: 0.080 | test avg. loss: 5.696
Epochs: 21983 | epoch avg. loss: 0.365 | test avg. loss: 4.886


 44%|████▍     | 21987/50000 [32:51<34:05, 13.69it/s]

Epochs: 21984 | epoch avg. loss: 0.149 | test avg. loss: 5.083
Epochs: 21985 | epoch avg. loss: 0.185 | test avg. loss: 5.482
Epochs: 21986 | epoch avg. loss: 0.206 | test avg. loss: 5.221


 44%|████▍     | 21989/50000 [32:51<34:48, 13.41it/s]

Epochs: 21987 | epoch avg. loss: 0.113 | test avg. loss: 5.049
Epochs: 21988 | epoch avg. loss: 0.134 | test avg. loss: 5.211
Epochs: 21989 | epoch avg. loss: 0.165 | test avg. loss: 4.794


 44%|████▍     | 21991/50000 [32:52<34:03, 13.70it/s]

Epochs: 21990 | epoch avg. loss: 0.089 | test avg. loss: 4.852
Epochs: 21991 | epoch avg. loss: 0.129 | test avg. loss: 5.270
Epochs: 21992 | epoch avg. loss: 0.110 | test avg. loss: 4.988


 44%|████▍     | 21995/50000 [32:52<36:11, 12.90it/s]

Epochs: 21993 | epoch avg. loss: 0.087 | test avg. loss: 4.967
Epochs: 21994 | epoch avg. loss: 0.086 | test avg. loss: 5.417
Epochs: 21995 | epoch avg. loss: 0.151 | test avg. loss: 5.191
Epochs: 21996 | epoch avg. loss: 0.072 | test avg. loss: 5.378


 44%|████▍     | 21999/50000 [32:52<34:35, 13.49it/s]

Epochs: 21997 | epoch avg. loss: 0.082 | test avg. loss: 5.829
Epochs: 21998 | epoch avg. loss: 0.226 | test avg. loss: 5.081
Epochs: 21999 | epoch avg. loss: 0.192 | test avg. loss: 5.071


 44%|████▍     | 22003/50000 [32:54<1:49:53,  4.25it/s]

Epochs: 22000 | epoch avg. loss: 0.093 | test avg. loss: 5.238
Epochs: 22001 | epoch avg. loss: 0.089 | test avg. loss: 5.028
Epochs: 22002 | epoch avg. loss: 0.053 | test avg. loss: 5.463


 44%|████▍     | 22005/50000 [32:54<1:28:39,  5.26it/s]

Epochs: 22003 | epoch avg. loss: 0.176 | test avg. loss: 5.278
Epochs: 22004 | epoch avg. loss: 0.044 | test avg. loss: 5.104
Epochs: 22005 | epoch avg. loss: 0.049 | test avg. loss: 5.258


 44%|████▍     | 22009/50000 [32:54<1:02:48,  7.43it/s]

Epochs: 22006 | epoch avg. loss: 0.074 | test avg. loss: 4.985
Epochs: 22007 | epoch avg. loss: 0.100 | test avg. loss: 5.067
Epochs: 22008 | epoch avg. loss: 0.036 | test avg. loss: 5.267


 44%|████▍     | 22011/50000 [32:55<58:10,  8.02it/s]

Epochs: 22009 | epoch avg. loss: 0.040 | test avg. loss: 5.236
Epochs: 22010 | epoch avg. loss: 0.026 | test avg. loss: 5.079
Epochs: 22011 | epoch avg. loss: 0.018 | test avg. loss: 4.995


 44%|████▍     | 22015/50000 [32:55<48:12,  9.68it/s]

Epochs: 22012 | epoch avg. loss: 0.022 | test avg. loss: 5.145
Epochs: 22013 | epoch avg. loss: 0.035 | test avg. loss: 5.137
Epochs: 22014 | epoch avg. loss: 0.014 | test avg. loss: 5.149


 44%|████▍     | 22017/50000 [32:55<44:47, 10.41it/s]

Epochs: 22015 | epoch avg. loss: 0.021 | test avg. loss: 5.260
Epochs: 22016 | epoch avg. loss: 0.055 | test avg. loss: 5.062
Epochs: 22017 | epoch avg. loss: 0.026 | test avg. loss: 5.057


 44%|████▍     | 22021/50000 [32:55<42:55, 10.87it/s]

Epochs: 22018 | epoch avg. loss: 0.024 | test avg. loss: 5.318
Epochs: 22019 | epoch avg. loss: 0.090 | test avg. loss: 5.143
Epochs: 22020 | epoch avg. loss: 0.024 | test avg. loss: 5.138


 44%|████▍     | 22023/50000 [32:56<40:50, 11.42it/s]

Epochs: 22021 | epoch avg. loss: 0.017 | test avg. loss: 5.190
Epochs: 22022 | epoch avg. loss: 0.026 | test avg. loss: 5.088
Epochs: 22023 | epoch avg. loss: 0.013 | test avg. loss: 5.164


 44%|████▍     | 22027/50000 [32:56<40:06, 11.63it/s]

Epochs: 22024 | epoch avg. loss: 0.006 | test avg. loss: 5.240
Epochs: 22025 | epoch avg. loss: 0.007 | test avg. loss: 5.230
Epochs: 22026 | epoch avg. loss: 0.010 | test avg. loss: 5.152


 44%|████▍     | 22029/50000 [32:56<38:40, 12.06it/s]

Epochs: 22027 | epoch avg. loss: 0.012 | test avg. loss: 5.134
Epochs: 22028 | epoch avg. loss: 0.012 | test avg. loss: 5.150
Epochs: 22029 | epoch avg. loss: 0.008 | test avg. loss: 5.253


 44%|████▍     | 22033/50000 [32:56<36:28, 12.78it/s]

Epochs: 22030 | epoch avg. loss: 0.014 | test avg. loss: 5.231
Epochs: 22031 | epoch avg. loss: 0.006 | test avg. loss: 5.185
Epochs: 22032 | epoch avg. loss: 0.005 | test avg. loss: 5.169


 44%|████▍     | 22035/50000 [32:57<38:10, 12.21it/s]

Epochs: 22033 | epoch avg. loss: 0.005 | test avg. loss: 5.170
Epochs: 22034 | epoch avg. loss: 0.006 | test avg. loss: 5.149
Epochs: 22035 | epoch avg. loss: 0.006 | test avg. loss: 5.129


 44%|████▍     | 22039/50000 [32:57<36:46, 12.67it/s]

Epochs: 22036 | epoch avg. loss: 0.012 | test avg. loss: 5.197
Epochs: 22037 | epoch avg. loss: 0.006 | test avg. loss: 5.186
Epochs: 22038 | epoch avg. loss: 0.005 | test avg. loss: 5.155


 44%|████▍     | 22043/50000 [32:57<32:45, 14.23it/s]

Epochs: 22039 | epoch avg. loss: 0.008 | test avg. loss: 5.167
Epochs: 22040 | epoch avg. loss: 0.005 | test avg. loss: 5.184
Epochs: 22041 | epoch avg. loss: 0.004 | test avg. loss: 5.188
Epochs: 22042 | epoch avg. loss: 0.005 | test avg. loss: 5.158


 44%|████▍     | 22045/50000 [32:57<34:12, 13.62it/s]

Epochs: 22043 | epoch avg. loss: 0.006 | test avg. loss: 5.200
Epochs: 22044 | epoch avg. loss: 0.007 | test avg. loss: 5.151
Epochs: 22045 | epoch avg. loss: 0.005 | test avg. loss: 5.133




Epochs: 22046 | epoch avg. loss: 0.005 | test avg. loss: 5.190
Epochs: 22047 | epoch avg. loss: 0.006 | test avg. loss: 5.173


 44%|████▍     | 22051/50000 [32:58<37:52, 12.30it/s]

Epochs: 22048 | epoch avg. loss: 0.009 | test avg. loss: 5.177
Epochs: 22049 | epoch avg. loss: 0.010 | test avg. loss: 5.327
Epochs: 22050 | epoch avg. loss: 0.040 | test avg. loss: 5.233


 44%|████▍     | 22053/50000 [32:58<38:34, 12.07it/s]

Epochs: 22051 | epoch avg. loss: 0.013 | test avg. loss: 5.112
Epochs: 22052 | epoch avg. loss: 0.030 | test avg. loss: 5.256
Epochs: 22053 | epoch avg. loss: 0.048 | test avg. loss: 5.243


 44%|████▍     | 22057/50000 [32:58<38:14, 12.18it/s]

Epochs: 22054 | epoch avg. loss: 0.030 | test avg. loss: 5.111
Epochs: 22055 | epoch avg. loss: 0.148 | test avg. loss: 5.309
Epochs: 22056 | epoch avg. loss: 0.089 | test avg. loss: 5.339


 44%|████▍     | 22059/50000 [32:58<37:37, 12.38it/s]

Epochs: 22057 | epoch avg. loss: 0.066 | test avg. loss: 5.131
Epochs: 22058 | epoch avg. loss: 0.221 | test avg. loss: 5.197
Epochs: 22059 | epoch avg. loss: 0.062 | test avg. loss: 5.270


 44%|████▍     | 22063/50000 [32:59<38:49, 11.99it/s]

Epochs: 22060 | epoch avg. loss: 0.049 | test avg. loss: 5.308
Epochs: 22061 | epoch avg. loss: 0.084 | test avg. loss: 5.704
Epochs: 22062 | epoch avg. loss: 0.174 | test avg. loss: 5.215


 44%|████▍     | 22065/50000 [32:59<37:40, 12.36it/s]

Epochs: 22063 | epoch avg. loss: 0.074 | test avg. loss: 5.072
Epochs: 22064 | epoch avg. loss: 0.085 | test avg. loss: 5.327
Epochs: 22065 | epoch avg. loss: 0.089 | test avg. loss: 5.196


 44%|████▍     | 22069/50000 [32:59<33:47, 13.77it/s]

Epochs: 22066 | epoch avg. loss: 0.014 | test avg. loss: 5.316
Epochs: 22067 | epoch avg. loss: 0.045 | test avg. loss: 5.479
Epochs: 22068 | epoch avg. loss: 0.066 | test avg. loss: 5.171
Epochs: 22069 | epoch avg. loss: 0.148 | test avg. loss: 5.106


 44%|████▍     | 22073/50000 [32:59<33:46, 13.78it/s]

Epochs: 22070 | epoch avg. loss: 0.079 | test avg. loss: 5.720
Epochs: 22071 | epoch avg. loss: 0.255 | test avg. loss: 5.099
Epochs: 22072 | epoch avg. loss: 0.215 | test avg. loss: 5.313


 44%|████▍     | 22075/50000 [33:00<36:43, 12.67it/s]

Epochs: 22073 | epoch avg. loss: 0.332 | test avg. loss: 5.641
Epochs: 22074 | epoch avg. loss: 0.271 | test avg. loss: 5.373
Epochs: 22075 | epoch avg. loss: 0.383 | test avg. loss: 5.462


 44%|████▍     | 22079/50000 [33:00<33:51, 13.75it/s]

Epochs: 22076 | epoch avg. loss: 0.151 | test avg. loss: 5.760
Epochs: 22077 | epoch avg. loss: 0.199 | test avg. loss: 4.933
Epochs: 22078 | epoch avg. loss: 0.358 | test avg. loss: 4.655


 44%|████▍     | 22081/50000 [33:00<34:28, 13.50it/s]

Epochs: 22079 | epoch avg. loss: 0.119 | test avg. loss: 4.872
Epochs: 22080 | epoch avg. loss: 0.170 | test avg. loss: 4.614
Epochs: 22081 | epoch avg. loss: 0.289 | test avg. loss: 4.825


 44%|████▍     | 22085/50000 [33:00<34:26, 13.51it/s]

Epochs: 22082 | epoch avg. loss: 0.127 | test avg. loss: 5.021
Epochs: 22083 | epoch avg. loss: 0.086 | test avg. loss: 4.874
Epochs: 22084 | epoch avg. loss: 0.152 | test avg. loss: 5.138


 44%|████▍     | 22087/50000 [33:01<37:12, 12.50it/s]

Epochs: 22085 | epoch avg. loss: 0.282 | test avg. loss: 4.771
Epochs: 22086 | epoch avg. loss: 0.113 | test avg. loss: 4.616
Epochs: 22087 | epoch avg. loss: 0.240 | test avg. loss: 4.881


 44%|████▍     | 22091/50000 [33:01<36:33, 12.72it/s]

Epochs: 22088 | epoch avg. loss: 0.103 | test avg. loss: 5.355
Epochs: 22089 | epoch avg. loss: 0.123 | test avg. loss: 4.992
Epochs: 22090 | epoch avg. loss: 0.164 | test avg. loss: 5.022


 44%|████▍     | 22093/50000 [33:01<34:56, 13.31it/s]

Epochs: 22091 | epoch avg. loss: 0.125 | test avg. loss: 4.615
Epochs: 22092 | epoch avg. loss: 0.074 | test avg. loss: 4.411
Epochs: 22093 | epoch avg. loss: 0.149 | test avg. loss: 4.768


                                                     

Epochs: 22094 | epoch avg. loss: 0.115 | test avg. loss: 4.788
Epochs: 22095 | epoch avg. loss: 0.028 | test avg. loss: 4.893
Epochs: 22096 | epoch avg. loss: 0.027 | test avg. loss: 5.006


 44%|████▍     | 22099/50000 [33:01<34:09, 13.62it/s]

Epochs: 22097 | epoch avg. loss: 0.031 | test avg. loss: 4.770
Epochs: 22098 | epoch avg. loss: 0.048 | test avg. loss: 4.627
Epochs: 22099 | epoch avg. loss: 0.017 | test avg. loss: 4.657


 44%|████▍     | 22103/50000 [33:03<1:40:35,  4.62it/s]

Epochs: 22100 | epoch avg. loss: 0.023 | test avg. loss: 4.615
Epochs: 22101 | epoch avg. loss: 0.011 | test avg. loss: 4.684
Epochs: 22102 | epoch avg. loss: 0.015 | test avg. loss: 4.830
Epochs: 22103 | epoch avg. loss: 0.025 | test avg. loss: 4.675


 44%|████▍     | 22107/50000 [33:03<1:06:03,  7.04it/s]

Epochs: 22104 | epoch avg. loss: 0.039 | test avg. loss: 4.625
Epochs: 22105 | epoch avg. loss: 0.017 | test avg. loss: 4.717
Epochs: 22106 | epoch avg. loss: 0.043 | test avg. loss: 4.626


 44%|████▍     | 22109/50000 [33:04<58:23,  7.96it/s]

Epochs: 22107 | epoch avg. loss: 0.016 | test avg. loss: 4.549
Epochs: 22108 | epoch avg. loss: 0.069 | test avg. loss: 4.699
Epochs: 22109 | epoch avg. loss: 0.015 | test avg. loss: 4.874


 44%|████▍     | 22113/50000 [33:04<44:16, 10.50it/s]

Epochs: 22110 | epoch avg. loss: 0.020 | test avg. loss: 4.740
Epochs: 22111 | epoch avg. loss: 0.066 | test avg. loss: 4.693
Epochs: 22112 | epoch avg. loss: 0.030 | test avg. loss: 4.873
Epochs: 22113 | epoch avg. loss: 0.131 | test avg. loss: 4.604


 44%|████▍     | 22117/50000 [33:04<37:15, 12.47it/s]

Epochs: 22114 | epoch avg. loss: 0.036 | test avg. loss: 4.719
Epochs: 22115 | epoch avg. loss: 0.029 | test avg. loss: 4.961
Epochs: 22116 | epoch avg. loss: 0.031 | test avg. loss: 4.791
Epochs: 22117 | epoch avg. loss: 0.084 | test avg. loss: 4.734


 44%|████▍     | 22121/50000 [33:04<34:31, 13.46it/s]

Epochs: 22118 | epoch avg. loss: 0.023 | test avg. loss: 4.797
Epochs: 22119 | epoch avg. loss: 0.030 | test avg. loss: 4.602
Epochs: 22120 | epoch avg. loss: 0.060 | test avg. loss: 4.678


 44%|████▍     | 22123/50000 [33:05<35:10, 13.21it/s]

Epochs: 22121 | epoch avg. loss: 0.021 | test avg. loss: 4.898
Epochs: 22122 | epoch avg. loss: 0.032 | test avg. loss: 4.792
Epochs: 22123 | epoch avg. loss: 0.055 | test avg. loss: 4.865


 44%|████▍     | 22127/50000 [33:05<33:08, 14.02it/s]

Epochs: 22124 | epoch avg. loss: 0.054 | test avg. loss: 5.055
Epochs: 22125 | epoch avg. loss: 0.129 | test avg. loss: 4.517
Epochs: 22126 | epoch avg. loss: 0.151 | test avg. loss: 4.644
Epochs: 22127 | epoch avg. loss: 0.110 | test avg. loss: 5.008


 44%|████▍     | 22131/50000 [33:05<31:30, 14.74it/s]

Epochs: 22128 | epoch avg. loss: 0.139 | test avg. loss: 4.701
Epochs: 22129 | epoch avg. loss: 0.117 | test avg. loss: 4.842
Epochs: 22130 | epoch avg. loss: 0.086 | test avg. loss: 4.957
Epochs: 22131 | epoch avg. loss: 0.047 | test avg. loss: 4.817


 44%|████▍     | 22135/50000 [33:05<30:40, 15.14it/s]

Epochs: 22132 | epoch avg. loss: 0.081 | test avg. loss: 4.958
Epochs: 22133 | epoch avg. loss: 0.053 | test avg. loss: 4.824
Epochs: 22134 | epoch avg. loss: 0.030 | test avg. loss: 4.768
Epochs: 22135 | epoch avg. loss: 0.030 | test avg. loss: 4.983


 44%|████▍     | 22139/50000 [33:06<33:04, 14.04it/s]

Epochs: 22136 | epoch avg. loss: 0.095 | test avg. loss: 4.735
Epochs: 22137 | epoch avg. loss: 0.034 | test avg. loss: 4.722
Epochs: 22138 | epoch avg. loss: 0.026 | test avg. loss: 4.870


 44%|████▍     | 22141/50000 [33:06<33:04, 14.04it/s]

Epochs: 22139 | epoch avg. loss: 0.058 | test avg. loss: 4.616
Epochs: 22140 | epoch avg. loss: 0.108 | test avg. loss: 4.657
Epochs: 22141 | epoch avg. loss: 0.088 | test avg. loss: 5.216


 44%|████▍     | 22145/50000 [33:06<34:48, 13.34it/s]

Epochs: 22142 | epoch avg. loss: 0.207 | test avg. loss: 4.637
Epochs: 22143 | epoch avg. loss: 0.274 | test avg. loss: 4.580
Epochs: 22144 | epoch avg. loss: 0.096 | test avg. loss: 5.298


 44%|████▍     | 22149/50000 [33:06<33:04, 14.04it/s]

Epochs: 22145 | epoch avg. loss: 0.382 | test avg. loss: 4.701
Epochs: 22146 | epoch avg. loss: 0.341 | test avg. loss: 4.891
Epochs: 22147 | epoch avg. loss: 0.155 | test avg. loss: 5.496
Epochs: 22148 | epoch avg. loss: 0.207 | test avg. loss: 4.724


 44%|████▍     | 22151/50000 [33:07<33:40, 13.78it/s]

Epochs: 22149 | epoch avg. loss: 0.208 | test avg. loss: 4.743
Epochs: 22150 | epoch avg. loss: 0.198 | test avg. loss: 5.075
Epochs: 22151 | epoch avg. loss: 0.237 | test avg. loss: 4.918


                                                     

Epochs: 22152 | epoch avg. loss: 0.632 | test avg. loss: 4.988
Epochs: 22153 | epoch avg. loss: 0.153 | test avg. loss: 5.487
Epochs: 22154 | epoch avg. loss: 0.167 | test avg. loss: 5.037


 44%|████▍     | 22159/50000 [33:07<32:32, 14.26it/s]

Epochs: 22155 | epoch avg. loss: 0.217 | test avg. loss: 5.145
Epochs: 22156 | epoch avg. loss: 0.236 | test avg. loss: 4.723
Epochs: 22157 | epoch avg. loss: 0.087 | test avg. loss: 4.599
Epochs: 22158 | epoch avg. loss: 0.163 | test avg. loss: 5.084


 44%|████▍     | 22163/50000 [33:07<31:05, 14.92it/s]

Epochs: 22159 | epoch avg. loss: 0.252 | test avg. loss: 4.839
Epochs: 22160 | epoch avg. loss: 0.145 | test avg. loss: 4.764
Epochs: 22161 | epoch avg. loss: 0.147 | test avg. loss: 4.989
Epochs: 22162 | epoch avg. loss: 0.203 | test avg. loss: 4.610


 44%|████▍     | 22165/50000 [33:07<31:04, 14.93it/s]

Epochs: 22163 | epoch avg. loss: 0.069 | test avg. loss: 4.751
Epochs: 22164 | epoch avg. loss: 0.048 | test avg. loss: 4.904
Epochs: 22165 | epoch avg. loss: 0.023 | test avg. loss: 4.858


 44%|████▍     | 22169/50000 [33:08<36:43, 12.63it/s]

Epochs: 22166 | epoch avg. loss: 0.024 | test avg. loss: 4.914
Epochs: 22167 | epoch avg. loss: 0.039 | test avg. loss: 4.771
Epochs: 22168 | epoch avg. loss: 0.051 | test avg. loss: 4.717




Epochs: 22169 | epoch avg. loss: 0.122 | test avg. loss: 4.849
Epochs: 22170 | epoch avg. loss: 0.084 | test avg. loss: 4.789
Epochs: 22171 | epoch avg. loss: 0.051 | test avg. loss: 4.754


 44%|████▍     | 22175/50000 [33:08<32:36, 14.22it/s]

Epochs: 22172 | epoch avg. loss: 0.055 | test avg. loss: 5.039
Epochs: 22173 | epoch avg. loss: 0.081 | test avg. loss: 4.781
Epochs: 22174 | epoch avg. loss: 0.129 | test avg. loss: 4.765
Epochs: 22175 | epoch avg. loss: 0.091 | test avg. loss: 5.409


 44%|████▍     | 22179/50000 [33:08<33:41, 13.76it/s]

Epochs: 22176 | epoch avg. loss: 0.286 | test avg. loss: 4.720
Epochs: 22177 | epoch avg. loss: 0.322 | test avg. loss: 4.760
Epochs: 22178 | epoch avg. loss: 0.084 | test avg. loss: 5.369


 44%|████▍     | 22181/50000 [33:09<33:13, 13.96it/s]

Epochs: 22179 | epoch avg. loss: 0.180 | test avg. loss: 4.893
Epochs: 22180 | epoch avg. loss: 0.062 | test avg. loss: 4.903
Epochs: 22181 | epoch avg. loss: 0.029 | test avg. loss: 4.812


 44%|████▍     | 22185/50000 [33:09<32:02, 14.47it/s]

Epochs: 22182 | epoch avg. loss: 0.022 | test avg. loss: 4.760
Epochs: 22183 | epoch avg. loss: 0.025 | test avg. loss: 4.987
Epochs: 22184 | epoch avg. loss: 0.072 | test avg. loss: 4.860
Epochs: 22185 | epoch avg. loss: 0.018 | test avg. loss: 4.757


 44%|████▍     | 22189/50000 [33:09<31:14, 14.83it/s]

Epochs: 22186 | epoch avg. loss: 0.020 | test avg. loss: 4.934
Epochs: 22187 | epoch avg. loss: 0.058 | test avg. loss: 4.748
Epochs: 22188 | epoch avg. loss: 0.028 | test avg. loss: 4.914


 44%|████▍     | 22191/50000 [33:09<32:20, 14.33it/s]

Epochs: 22189 | epoch avg. loss: 0.025 | test avg. loss: 4.949
Epochs: 22190 | epoch avg. loss: 0.031 | test avg. loss: 4.814
Epochs: 22191 | epoch avg. loss: 0.082 | test avg. loss: 4.932


 44%|████▍     | 22195/50000 [33:10<33:58, 13.64it/s]

Epochs: 22192 | epoch avg. loss: 0.072 | test avg. loss: 4.822
Epochs: 22193 | epoch avg. loss: 0.024 | test avg. loss: 4.812
Epochs: 22194 | epoch avg. loss: 0.027 | test avg. loss: 5.036


 44%|████▍     | 22199/50000 [33:10<32:24, 14.30it/s]

Epochs: 22195 | epoch avg. loss: 0.062 | test avg. loss: 4.786
Epochs: 22196 | epoch avg. loss: 0.034 | test avg. loss: 4.772
Epochs: 22197 | epoch avg. loss: 0.045 | test avg. loss: 5.049
Epochs: 22198 | epoch avg. loss: 0.091 | test avg. loss: 4.871


 44%|████▍     | 22199/50000 [33:10<32:24, 14.30it/s]

Epochs: 22199 | epoch avg. loss: 0.075 | test avg. loss: 4.914


 44%|████▍     | 22203/50000 [33:12<1:48:26,  4.27it/s]

Epochs: 22200 | epoch avg. loss: 0.050 | test avg. loss: 5.128
Epochs: 22201 | epoch avg. loss: 0.081 | test avg. loss: 4.869
Epochs: 22202 | epoch avg. loss: 0.034 | test avg. loss: 4.759


                                                       

Epochs: 22203 | epoch avg. loss: 0.035 | test avg. loss: 5.000
Epochs: 22204 | epoch avg. loss: 0.063 | test avg. loss: 4.825
Epochs: 22205 | epoch avg. loss: 0.045 | test avg. loss: 4.900


 44%|████▍     | 22209/50000 [33:12<57:40,  8.03it/s]

Epochs: 22206 | epoch avg. loss: 0.033 | test avg. loss: 5.255
Epochs: 22207 | epoch avg. loss: 0.157 | test avg. loss: 4.848
Epochs: 22208 | epoch avg. loss: 0.031 | test avg. loss: 4.805
Epochs: 22209 | epoch avg. loss: 0.040 | test avg. loss: 4.994


 44%|████▍     | 22213/50000 [33:12<47:19,  9.79it/s]

Epochs: 22210 | epoch avg. loss: 0.072 | test avg. loss: 4.723
Epochs: 22211 | epoch avg. loss: 0.049 | test avg. loss: 4.789
Epochs: 22212 | epoch avg. loss: 0.048 | test avg. loss: 4.852


 44%|████▍     | 22215/50000 [33:13<46:10, 10.03it/s]

Epochs: 22213 | epoch avg. loss: 0.045 | test avg. loss: 4.710
Epochs: 22214 | epoch avg. loss: 0.142 | test avg. loss: 4.805
Epochs: 22215 | epoch avg. loss: 0.074 | test avg. loss: 4.949


 44%|████▍     | 22219/50000 [33:13<43:35, 10.62it/s]

Epochs: 22216 | epoch avg. loss: 0.048 | test avg. loss: 4.900
Epochs: 22217 | epoch avg. loss: 0.064 | test avg. loss: 5.270
Epochs: 22218 | epoch avg. loss: 0.087 | test avg. loss: 4.958


 44%|████▍     | 22221/50000 [33:13<41:41, 11.11it/s]

Epochs: 22219 | epoch avg. loss: 0.062 | test avg. loss: 4.820
Epochs: 22220 | epoch avg. loss: 0.048 | test avg. loss: 4.816
Epochs: 22221 | epoch avg. loss: 0.036 | test avg. loss: 4.621


 44%|████▍     | 22225/50000 [33:13<38:05, 12.15it/s]

Epochs: 22222 | epoch avg. loss: 0.059 | test avg. loss: 4.764
Epochs: 22223 | epoch avg. loss: 0.018 | test avg. loss: 5.012
Epochs: 22224 | epoch avg. loss: 0.026 | test avg. loss: 4.966


 44%|████▍     | 22227/50000 [33:14<38:15, 12.10it/s]

Epochs: 22225 | epoch avg. loss: 0.027 | test avg. loss: 4.941
Epochs: 22226 | epoch avg. loss: 0.013 | test avg. loss: 4.949
Epochs: 22227 | epoch avg. loss: 0.020 | test avg. loss: 4.764


 44%|████▍     | 22231/50000 [33:14<39:30, 11.71it/s]

Epochs: 22228 | epoch avg. loss: 0.062 | test avg. loss: 4.838
Epochs: 22229 | epoch avg. loss: 0.020 | test avg. loss: 5.114
Epochs: 22230 | epoch avg. loss: 0.045 | test avg. loss: 4.924


 44%|████▍     | 22233/50000 [33:14<39:20, 11.77it/s]

Epochs: 22231 | epoch avg. loss: 0.047 | test avg. loss: 4.924
Epochs: 22232 | epoch avg. loss: 0.036 | test avg. loss: 5.057
Epochs: 22233 | epoch avg. loss: 0.083 | test avg. loss: 4.764


 44%|████▍     | 22237/50000 [33:14<36:44, 12.59it/s]

Epochs: 22234 | epoch avg. loss: 0.071 | test avg. loss: 4.803
Epochs: 22235 | epoch avg. loss: 0.035 | test avg. loss: 5.142
Epochs: 22236 | epoch avg. loss: 0.072 | test avg. loss: 4.888


 44%|████▍     | 22239/50000 [33:15<35:40, 12.97it/s]

Epochs: 22237 | epoch avg. loss: 0.019 | test avg. loss: 4.860
Epochs: 22238 | epoch avg. loss: 0.012 | test avg. loss: 4.904
Epochs: 22239 | epoch avg. loss: 0.011 | test avg. loss: 4.905


 44%|████▍     | 22243/50000 [33:15<33:22, 13.86it/s]

Epochs: 22240 | epoch avg. loss: 0.008 | test avg. loss: 4.912
Epochs: 22241 | epoch avg. loss: 0.011 | test avg. loss: 4.953
Epochs: 22242 | epoch avg. loss: 0.011 | test avg. loss: 4.923
Epochs: 22243 | epoch avg. loss: 0.012 | test avg. loss: 4.829


 44%|████▍     | 22247/50000 [33:15<32:33, 14.21it/s]

Epochs: 22244 | epoch avg. loss: 0.012 | test avg. loss: 4.865
Epochs: 22245 | epoch avg. loss: 0.007 | test avg. loss: 4.971
Epochs: 22246 | epoch avg. loss: 0.015 | test avg. loss: 4.968


 44%|████▍     | 22249/50000 [33:15<33:27, 13.83it/s]

Epochs: 22247 | epoch avg. loss: 0.007 | test avg. loss: 4.930
Epochs: 22248 | epoch avg. loss: 0.008 | test avg. loss: 5.033
Epochs: 22249 | epoch avg. loss: 0.028 | test avg. loss: 4.843


 45%|████▍     | 22253/50000 [33:16<38:08, 12.13it/s]

Epochs: 22250 | epoch avg. loss: 0.015 | test avg. loss: 4.763
Epochs: 22251 | epoch avg. loss: 0.018 | test avg. loss: 4.924
Epochs: 22252 | epoch avg. loss: 0.029 | test avg. loss: 4.863


 45%|████▍     | 22257/50000 [33:16<35:03, 13.19it/s]

Epochs: 22253 | epoch avg. loss: 0.008 | test avg. loss: 4.889
Epochs: 22254 | epoch avg. loss: 0.008 | test avg. loss: 5.009
Epochs: 22255 | epoch avg. loss: 0.022 | test avg. loss: 4.884
Epochs: 22256 | epoch avg. loss: 0.012 | test avg. loss: 4.819


                                                     

Epochs: 22257 | epoch avg. loss: 0.012 | test avg. loss: 4.960
Epochs: 22258 | epoch avg. loss: 0.049 | test avg. loss: 4.968
Epochs: 22259 | epoch avg. loss: 0.011 | test avg. loss: 4.993


 45%|████▍     | 22263/50000 [33:16<33:01, 14.00it/s]

Epochs: 22260 | epoch avg. loss: 0.017 | test avg. loss: 4.974
Epochs: 22261 | epoch avg. loss: 0.008 | test avg. loss: 4.867
Epochs: 22262 | epoch avg. loss: 0.009 | test avg. loss: 4.790


 45%|████▍     | 22265/50000 [33:17<34:34, 13.37it/s]

Epochs: 22263 | epoch avg. loss: 0.009 | test avg. loss: 4.754
Epochs: 22264 | epoch avg. loss: 0.022 | test avg. loss: 4.956
Epochs: 22265 | epoch avg. loss: 0.012 | test avg. loss: 5.031


 45%|████▍     | 22269/50000 [33:17<36:54, 12.52it/s]

Epochs: 22266 | epoch avg. loss: 0.021 | test avg. loss: 5.009
Epochs: 22267 | epoch avg. loss: 0.030 | test avg. loss: 5.108
Epochs: 22268 | epoch avg. loss: 0.045 | test avg. loss: 4.859


 45%|████▍     | 22271/50000 [33:17<38:23, 12.04it/s]

Epochs: 22269 | epoch avg. loss: 0.018 | test avg. loss: 4.813
Epochs: 22270 | epoch avg. loss: 0.018 | test avg. loss: 4.988
Epochs: 22271 | epoch avg. loss: 0.032 | test avg. loss: 4.898


 45%|████▍     | 22275/50000 [33:17<36:54, 12.52it/s]

Epochs: 22272 | epoch avg. loss: 0.060 | test avg. loss: 4.951
Epochs: 22273 | epoch avg. loss: 0.029 | test avg. loss: 5.157
Epochs: 22274 | epoch avg. loss: 0.057 | test avg. loss: 5.046


 45%|████▍     | 22277/50000 [33:18<38:38, 11.96it/s]

Epochs: 22275 | epoch avg. loss: 0.014 | test avg. loss: 4.963
Epochs: 22276 | epoch avg. loss: 0.021 | test avg. loss: 5.109
Epochs: 22277 | epoch avg. loss: 0.033 | test avg. loss: 4.941


 45%|████▍     | 22281/50000 [33:18<36:39, 12.60it/s]

Epochs: 22278 | epoch avg. loss: 0.015 | test avg. loss: 4.889
Epochs: 22279 | epoch avg. loss: 0.015 | test avg. loss: 5.063
Epochs: 22280 | epoch avg. loss: 0.039 | test avg. loss: 4.963


 45%|████▍     | 22283/50000 [33:18<36:36, 12.62it/s]

Epochs: 22281 | epoch avg. loss: 0.024 | test avg. loss: 5.038
Epochs: 22282 | epoch avg. loss: 0.023 | test avg. loss: 5.286
Epochs: 22283 | epoch avg. loss: 0.074 | test avg. loss: 4.946




Epochs: 22284 | epoch avg. loss: 0.022 | test avg. loss: 4.791
Epochs: 22285 | epoch avg. loss: 0.030 | test avg. loss: 4.915
Epochs: 22286 | epoch avg. loss: 0.030 | test avg. loss: 5.007


 45%|████▍     | 22289/50000 [33:18<36:26, 12.68it/s]

Epochs: 22287 | epoch avg. loss: 0.011 | test avg. loss: 5.057
Epochs: 22288 | epoch avg. loss: 0.025 | test avg. loss: 5.095
Epochs: 22289 | epoch avg. loss: 0.010 | test avg. loss: 5.053


 45%|████▍     | 22293/50000 [33:19<35:48, 12.89it/s]

Epochs: 22290 | epoch avg. loss: 0.010 | test avg. loss: 4.882
Epochs: 22291 | epoch avg. loss: 0.016 | test avg. loss: 4.904
Epochs: 22292 | epoch avg. loss: 0.009 | test avg. loss: 4.991
Epochs: 22293 | epoch avg. loss: 0.008 | test avg. loss: 5.001


 45%|████▍     | 22297/50000 [33:19<34:43, 13.30it/s]

Epochs: 22294 | epoch avg. loss: 0.010 | test avg. loss: 5.028
Epochs: 22295 | epoch avg. loss: 0.007 | test avg. loss: 5.027
Epochs: 22296 | epoch avg. loss: 0.010 | test avg. loss: 4.969


 45%|████▍     | 22299/50000 [33:19<35:36, 12.97it/s]

Epochs: 22297 | epoch avg. loss: 0.005 | test avg. loss: 4.971
Epochs: 22298 | epoch avg. loss: 0.005 | test avg. loss: 4.976
Epochs: 22299 | epoch avg. loss: 0.004 | test avg. loss: 5.034


 45%|████▍     | 22303/50000 [33:21<1:45:56,  4.36it/s]

Epochs: 22300 | epoch avg. loss: 0.013 | test avg. loss: 5.007
Epochs: 22301 | epoch avg. loss: 0.005 | test avg. loss: 5.005
Epochs: 22302 | epoch avg. loss: 0.007 | test avg. loss: 5.064


 45%|████▍     | 22305/50000 [33:21<1:23:26,  5.53it/s]

Epochs: 22303 | epoch avg. loss: 0.014 | test avg. loss: 4.942
Epochs: 22304 | epoch avg. loss: 0.015 | test avg. loss: 4.958
Epochs: 22305 | epoch avg. loss: 0.012 | test avg. loss: 5.092


 45%|████▍     | 22309/50000 [33:21<58:28,  7.89it/s]  

Epochs: 22306 | epoch avg. loss: 0.022 | test avg. loss: 4.972
Epochs: 22307 | epoch avg. loss: 0.013 | test avg. loss: 4.887
Epochs: 22308 | epoch avg. loss: 0.016 | test avg. loss: 5.101


 45%|████▍     | 22311/50000 [33:22<51:09,  9.02it/s]

Epochs: 22309 | epoch avg. loss: 0.061 | test avg. loss: 4.970
Epochs: 22310 | epoch avg. loss: 0.009 | test avg. loss: 4.942
Epochs: 22311 | epoch avg. loss: 0.009 | test avg. loss: 4.973


 45%|████▍     | 22315/50000 [33:22<42:42, 10.81it/s]

Epochs: 22312 | epoch avg. loss: 0.012 | test avg. loss: 4.978
Epochs: 22313 | epoch avg. loss: 0.006 | test avg. loss: 4.986
Epochs: 22314 | epoch avg. loss: 0.006 | test avg. loss: 4.898


 45%|████▍     | 22319/50000 [33:22<36:21, 12.69it/s]

Epochs: 22315 | epoch avg. loss: 0.027 | test avg. loss: 4.953
Epochs: 22316 | epoch avg. loss: 0.015 | test avg. loss: 5.103
Epochs: 22317 | epoch avg. loss: 0.031 | test avg. loss: 5.008
Epochs: 22318 | epoch avg. loss: 0.022 | test avg. loss: 5.076


 45%|████▍     | 22321/50000 [33:22<34:09, 13.50it/s]

Epochs: 22319 | epoch avg. loss: 0.012 | test avg. loss: 5.154
Epochs: 22320 | epoch avg. loss: 0.017 | test avg. loss: 5.022
Epochs: 22321 | epoch avg. loss: 0.012 | test avg. loss: 4.948
Epochs: 22322 | epoch avg. loss: 0.017 | test avg. loss: 4.962




Epochs: 22323 | epoch avg. loss: 0.009 | test avg. loss: 4.974
Epochs: 22324 | epoch avg. loss: 0.013 | test avg. loss: 4.972
Epochs: 22325 | epoch avg. loss: 0.006 | test avg. loss: 4.939


 45%|████▍     | 22329/50000 [33:23<34:56, 13.20it/s]

Epochs: 22326 | epoch avg. loss: 0.017 | test avg. loss: 4.974
Epochs: 22327 | epoch avg. loss: 0.005 | test avg. loss: 4.998
Epochs: 22328 | epoch avg. loss: 0.005 | test avg. loss: 4.984


 45%|████▍     | 22331/50000 [33:23<35:49, 12.87it/s]

Epochs: 22329 | epoch avg. loss: 0.010 | test avg. loss: 5.010
Epochs: 22330 | epoch avg. loss: 0.005 | test avg. loss: 5.039
Epochs: 22331 | epoch avg. loss: 0.006 | test avg. loss: 4.927


 45%|████▍     | 22335/50000 [33:23<33:09, 13.91it/s]

Epochs: 22332 | epoch avg. loss: 0.034 | test avg. loss: 4.934
Epochs: 22333 | epoch avg. loss: 0.011 | test avg. loss: 5.022
Epochs: 22334 | epoch avg. loss: 0.019 | test avg. loss: 4.973
Epochs: 22335 | epoch avg. loss: 0.011 | test avg. loss: 4.965


 45%|████▍     | 22339/50000 [33:24<31:50, 14.48it/s]

Epochs: 22336 | epoch avg. loss: 0.011 | test avg. loss: 5.046
Epochs: 22337 | epoch avg. loss: 0.030 | test avg. loss: 5.117
Epochs: 22338 | epoch avg. loss: 0.025 | test avg. loss: 5.047
Epochs: 22339 | epoch avg. loss: 0.006 | test avg. loss: 5.039


 45%|████▍     | 22343/50000 [33:24<30:45, 14.99it/s]

Epochs: 22340 | epoch avg. loss: 0.005 | test avg. loss: 5.025
Epochs: 22341 | epoch avg. loss: 0.007 | test avg. loss: 4.949
Epochs: 22342 | epoch avg. loss: 0.042 | test avg. loss: 4.981


 45%|████▍     | 22347/50000 [33:24<31:05, 14.83it/s]

Epochs: 22343 | epoch avg. loss: 0.012 | test avg. loss: 5.147
Epochs: 22344 | epoch avg. loss: 0.058 | test avg. loss: 5.058
Epochs: 22345 | epoch avg. loss: 0.014 | test avg. loss: 5.057
Epochs: 22346 | epoch avg. loss: 0.019 | test avg. loss: 5.245


 45%|████▍     | 22351/50000 [33:24<30:34, 15.07it/s]

Epochs: 22347 | epoch avg. loss: 0.071 | test avg. loss: 4.986
Epochs: 22348 | epoch avg. loss: 0.029 | test avg. loss: 4.868
Epochs: 22349 | epoch avg. loss: 0.121 | test avg. loss: 5.116
Epochs: 22350 | epoch avg. loss: 0.146 | test avg. loss: 5.479


 45%|████▍     | 22353/50000 [33:25<35:09, 13.11it/s]

Epochs: 22351 | epoch avg. loss: 0.138 | test avg. loss: 5.035
Epochs: 22352 | epoch avg. loss: 0.125 | test avg. loss: 5.099
Epochs: 22353 | epoch avg. loss: 0.067 | test avg. loss: 5.473


                                                     

Epochs: 22354 | epoch avg. loss: 0.181 | test avg. loss: 4.889
Epochs: 22355 | epoch avg. loss: 0.134 | test avg. loss: 5.023
Epochs: 22356 | epoch avg. loss: 0.093 | test avg. loss: 5.543


 45%|████▍     | 22361/50000 [33:25<32:19, 14.25it/s]

Epochs: 22357 | epoch avg. loss: 0.122 | test avg. loss: 5.300
Epochs: 22358 | epoch avg. loss: 0.104 | test avg. loss: 5.127
Epochs: 22359 | epoch avg. loss: 0.061 | test avg. loss: 5.317
Epochs: 22360 | epoch avg. loss: 0.243 | test avg. loss: 4.495


 45%|████▍     | 22365/50000 [33:25<30:36, 15.05it/s]

Epochs: 22361 | epoch avg. loss: 0.309 | test avg. loss: 4.634
Epochs: 22362 | epoch avg. loss: 0.161 | test avg. loss: 5.446
Epochs: 22363 | epoch avg. loss: 0.296 | test avg. loss: 5.291
Epochs: 22364 | epoch avg. loss: 0.078 | test avg. loss: 5.199


 45%|████▍     | 22367/50000 [33:26<32:01, 14.38it/s]

Epochs: 22365 | epoch avg. loss: 0.458 | test avg. loss: 4.933
Epochs: 22366 | epoch avg. loss: 0.082 | test avg. loss: 5.124
Epochs: 22367 | epoch avg. loss: 0.107 | test avg. loss: 4.754


 45%|████▍     | 22371/50000 [33:26<31:34, 14.59it/s]

Epochs: 22368 | epoch avg. loss: 0.066 | test avg. loss: 4.842
Epochs: 22369 | epoch avg. loss: 0.056 | test avg. loss: 5.165
Epochs: 22370 | epoch avg. loss: 0.087 | test avg. loss: 4.995
Epochs: 22371 | epoch avg. loss: 0.064 | test avg. loss: 4.963


 45%|████▍     | 22375/50000 [33:26<31:47, 14.48it/s]

Epochs: 22372 | epoch avg. loss: 0.051 | test avg. loss: 5.374
Epochs: 22373 | epoch avg. loss: 0.184 | test avg. loss: 4.882
Epochs: 22374 | epoch avg. loss: 0.074 | test avg. loss: 4.923


 45%|████▍     | 22379/50000 [33:26<30:16, 15.21it/s]

Epochs: 22375 | epoch avg. loss: 0.096 | test avg. loss: 5.640
Epochs: 22376 | epoch avg. loss: 0.389 | test avg. loss: 4.931
Epochs: 22377 | epoch avg. loss: 0.169 | test avg. loss: 4.812
Epochs: 22378 | epoch avg. loss: 0.097 | test avg. loss: 4.989


 45%|████▍     | 22381/50000 [33:26<31:00, 14.85it/s]

Epochs: 22379 | epoch avg. loss: 0.152 | test avg. loss: 4.778
Epochs: 22380 | epoch avg. loss: 0.635 | test avg. loss: 4.622
Epochs: 22381 | epoch avg. loss: 0.188 | test avg. loss: 6.144


 45%|████▍     | 22385/50000 [33:27<30:53, 14.90it/s]

Epochs: 22382 | epoch avg. loss: 0.660 | test avg. loss: 4.786
Epochs: 22383 | epoch avg. loss: 0.395 | test avg. loss: 4.870
Epochs: 22384 | epoch avg. loss: 0.145 | test avg. loss: 5.438
Epochs: 22385 | epoch avg. loss: 0.196 | test avg. loss: 5.286


 45%|████▍     | 22389/50000 [33:27<30:02, 15.32it/s]

Epochs: 22386 | epoch avg. loss: 0.032 | test avg. loss: 5.408
Epochs: 22387 | epoch avg. loss: 0.038 | test avg. loss: 5.338
Epochs: 22388 | epoch avg. loss: 0.043 | test avg. loss: 5.134


 45%|████▍     | 22391/50000 [33:27<32:29, 14.16it/s]

Epochs: 22389 | epoch avg. loss: 0.031 | test avg. loss: 5.123
Epochs: 22390 | epoch avg. loss: 0.047 | test avg. loss: 4.884
Epochs: 22391 | epoch avg. loss: 0.064 | test avg. loss: 5.010


 45%|████▍     | 22395/50000 [33:27<32:04, 14.34it/s]

Epochs: 22392 | epoch avg. loss: 0.066 | test avg. loss: 5.354
Epochs: 22393 | epoch avg. loss: 0.069 | test avg. loss: 5.238
Epochs: 22394 | epoch avg. loss: 0.127 | test avg. loss: 5.149


 45%|████▍     | 22397/50000 [33:28<33:49, 13.60it/s]

Epochs: 22395 | epoch avg. loss: 0.103 | test avg. loss: 5.415
Epochs: 22396 | epoch avg. loss: 0.149 | test avg. loss: 4.878
Epochs: 22397 | epoch avg. loss: 0.104 | test avg. loss: 4.867


 45%|████▍     | 22399/50000 [33:28<33:14, 13.84it/s]

Epochs: 22398 | epoch avg. loss: 0.093 | test avg. loss: 5.279
Epochs: 22399 | epoch avg. loss: 0.086 | test avg. loss: 5.130


 45%|████▍     | 22403/50000 [33:29<1:47:03,  4.30it/s]

Epochs: 22400 | epoch avg. loss: 0.073 | test avg. loss: 5.145
Epochs: 22401 | epoch avg. loss: 0.100 | test avg. loss: 5.260
Epochs: 22402 | epoch avg. loss: 0.066 | test avg. loss: 4.994


 45%|████▍     | 22405/50000 [33:30<1:25:20,  5.39it/s]

Epochs: 22403 | epoch avg. loss: 0.015 | test avg. loss: 4.913
Epochs: 22404 | epoch avg. loss: 0.015 | test avg. loss: 5.021
Epochs: 22405 | epoch avg. loss: 0.019 | test avg. loss: 5.046


 45%|████▍     | 22409/50000 [33:30<1:00:56,  7.55it/s]

Epochs: 22406 | epoch avg. loss: 0.022 | test avg. loss: 5.114
Epochs: 22407 | epoch avg. loss: 0.013 | test avg. loss: 5.211
Epochs: 22408 | epoch avg. loss: 0.028 | test avg. loss: 5.054


 45%|████▍     | 22411/50000 [33:30<55:06,  8.34it/s]

Epochs: 22409 | epoch avg. loss: 0.013 | test avg. loss: 5.043
Epochs: 22410 | epoch avg. loss: 0.014 | test avg. loss: 5.097
Epochs: 22411 | epoch avg. loss: 0.013 | test avg. loss: 5.099


 45%|████▍     | 22413/50000 [33:30<50:14,  9.15it/s]

Epochs: 22412 | epoch avg. loss: 0.032 | test avg. loss: 5.187
Epochs: 22413 | epoch avg. loss: 0.011 | test avg. loss: 5.292


 45%|████▍     | 22417/50000 [33:31<44:28, 10.34it/s]

Epochs: 22414 | epoch avg. loss: 0.029 | test avg. loss: 5.061
Epochs: 22415 | epoch avg. loss: 0.029 | test avg. loss: 5.105
Epochs: 22416 | epoch avg. loss: 0.025 | test avg. loss: 5.337


 45%|████▍     | 22421/50000 [33:31<36:57, 12.44it/s]

Epochs: 22417 | epoch avg. loss: 0.052 | test avg. loss: 5.118
Epochs: 22418 | epoch avg. loss: 0.030 | test avg. loss: 5.269
Epochs: 22419 | epoch avg. loss: 0.037 | test avg. loss: 5.288
Epochs: 22420 | epoch avg. loss: 0.024 | test avg. loss: 5.126


                                                     

Epochs: 22421 | epoch avg. loss: 0.045 | test avg. loss: 5.122
Epochs: 22422 | epoch avg. loss: 0.016 | test avg. loss: 5.195
Epochs: 22423 | epoch avg. loss: 0.020 | test avg. loss: 5.062


 45%|████▍     | 22427/50000 [33:31<33:48, 13.59it/s]

Epochs: 22424 | epoch avg. loss: 0.048 | test avg. loss: 5.041
Epochs: 22425 | epoch avg. loss: 0.015 | test avg. loss: 5.115
Epochs: 22426 | epoch avg. loss: 0.019 | test avg. loss: 5.125


 45%|████▍     | 22429/50000 [33:32<36:18, 12.66it/s]

Epochs: 22427 | epoch avg. loss: 0.007 | test avg. loss: 5.112
Epochs: 22428 | epoch avg. loss: 0.011 | test avg. loss: 5.281
Epochs: 22429 | epoch avg. loss: 0.033 | test avg. loss: 5.115


 45%|████▍     | 22433/50000 [33:32<33:53, 13.56it/s]

Epochs: 22430 | epoch avg. loss: 0.015 | test avg. loss: 4.987
Epochs: 22431 | epoch avg. loss: 0.048 | test avg. loss: 5.169
Epochs: 22432 | epoch avg. loss: 0.023 | test avg. loss: 5.228
Epochs: 22433 | epoch avg. loss: 0.017 | test avg. loss: 5.203


 45%|████▍     | 22437/50000 [33:32<35:15, 13.03it/s]

Epochs: 22434 | epoch avg. loss: 0.050 | test avg. loss: 5.300
Epochs: 22435 | epoch avg. loss: 0.040 | test avg. loss: 5.410
Epochs: 22436 | epoch avg. loss: 0.091 | test avg. loss: 5.018


 45%|████▍     | 22439/50000 [33:32<35:33, 12.92it/s]

Epochs: 22437 | epoch avg. loss: 0.090 | test avg. loss: 4.932
Epochs: 22438 | epoch avg. loss: 0.050 | test avg. loss: 5.407
Epochs: 22439 | epoch avg. loss: 0.156 | test avg. loss: 5.059


 45%|████▍     | 22443/50000 [33:33<38:13, 12.01it/s]

Epochs: 22440 | epoch avg. loss: 0.066 | test avg. loss: 5.109
Epochs: 22441 | epoch avg. loss: 0.038 | test avg. loss: 5.413
Epochs: 22442 | epoch avg. loss: 0.095 | test avg. loss: 5.169


 45%|████▍     | 22445/50000 [33:33<37:38, 12.20it/s]

Epochs: 22443 | epoch avg. loss: 0.052 | test avg. loss: 5.178
Epochs: 22444 | epoch avg. loss: 0.127 | test avg. loss: 5.346
Epochs: 22445 | epoch avg. loss: 0.089 | test avg. loss: 5.256


 45%|████▍     | 22449/50000 [33:33<34:17, 13.39it/s]

Epochs: 22446 | epoch avg. loss: 0.058 | test avg. loss: 5.211
Epochs: 22447 | epoch avg. loss: 0.351 | test avg. loss: 5.221
Epochs: 22448 | epoch avg. loss: 0.168 | test avg. loss: 6.161
Epochs: 22449 | epoch avg. loss: 0.571 | test avg. loss: 5.088


                                                     

Epochs: 22450 | epoch avg. loss: 0.632 | test avg. loss: 5.231
Epochs: 22451 | epoch avg. loss: 0.334 | test avg. loss: 7.539
Epochs: 22452 | epoch avg. loss: 1.578 | test avg. loss: 5.122


 45%|████▍     | 22455/50000 [33:34<33:59, 13.51it/s]

Epochs: 22453 | epoch avg. loss: 0.675 | test avg. loss: 6.472
Epochs: 22454 | epoch avg. loss: 1.591 | test avg. loss: 6.679
Epochs: 22455 | epoch avg. loss: 1.146 | test avg. loss: 5.642


 45%|████▍     | 22459/50000 [33:34<34:08, 13.45it/s]

Epochs: 22456 | epoch avg. loss: 0.322 | test avg. loss: 5.389
Epochs: 22457 | epoch avg. loss: 0.304 | test avg. loss: 5.696
Epochs: 22458 | epoch avg. loss: 0.381 | test avg. loss: 4.706


 45%|████▍     | 22463/50000 [33:34<31:37, 14.51it/s]

Epochs: 22459 | epoch avg. loss: 0.256 | test avg. loss: 5.034
Epochs: 22460 | epoch avg. loss: 0.152 | test avg. loss: 5.746
Epochs: 22461 | epoch avg. loss: 0.136 | test avg. loss: 5.678
Epochs: 22462 | epoch avg. loss: 0.128 | test avg. loss: 5.623


                                                     

Epochs: 22463 | epoch avg. loss: 0.062 | test avg. loss: 5.465
Epochs: 22464 | epoch avg. loss: 0.100 | test avg. loss: 5.047
Epochs: 22465 | epoch avg. loss: 0.236 | test avg. loss: 5.252


 45%|████▍     | 22469/50000 [33:34<32:12, 14.25it/s]

Epochs: 22466 | epoch avg. loss: 0.061 | test avg. loss: 5.895
Epochs: 22467 | epoch avg. loss: 0.109 | test avg. loss: 5.907
Epochs: 22468 | epoch avg. loss: 0.043 | test avg. loss: 5.793


 45%|████▍     | 22471/50000 [33:35<37:44, 12.16it/s]

Epochs: 22469 | epoch avg. loss: 0.045 | test avg. loss: 5.789
Epochs: 22470 | epoch avg. loss: 0.086 | test avg. loss: 5.157
Epochs: 22471 | epoch avg. loss: 0.065 | test avg. loss: 5.102


 45%|████▍     | 22475/50000 [33:35<33:30, 13.69it/s]

Epochs: 22472 | epoch avg. loss: 0.036 | test avg. loss: 5.407
Epochs: 22473 | epoch avg. loss: 0.049 | test avg. loss: 5.309
Epochs: 22474 | epoch avg. loss: 0.033 | test avg. loss: 5.324
Epochs: 22475 | epoch avg. loss: 0.016 | test avg. loss: 5.352


 45%|████▍     | 22479/50000 [33:35<31:41, 14.48it/s]

Epochs: 22476 | epoch avg. loss: 0.012 | test avg. loss: 5.342
Epochs: 22477 | epoch avg. loss: 0.013 | test avg. loss: 5.557
Epochs: 22478 | epoch avg. loss: 0.031 | test avg. loss: 5.496
Epochs: 22479 | epoch avg. loss: 0.010 | test avg. loss: 5.432




Epochs: 22480 | epoch avg. loss: 0.011 | test avg. loss: 5.434
Epochs: 22481 | epoch avg. loss: 0.007 | test avg. loss: 5.436


 45%|████▍     | 22485/50000 [33:36<37:26, 12.25it/s]

Epochs: 22482 | epoch avg. loss: 0.006 | test avg. loss: 5.415
Epochs: 22483 | epoch avg. loss: 0.006 | test avg. loss: 5.348
Epochs: 22484 | epoch avg. loss: 0.008 | test avg. loss: 5.327


                                                     

Epochs: 22485 | epoch avg. loss: 0.006 | test avg. loss: 5.269
Epochs: 22486 | epoch avg. loss: 0.015 | test avg. loss: 5.280
Epochs: 22487 | epoch avg. loss: 0.022 | test avg. loss: 5.460


 45%|████▍     | 22491/50000 [33:36<33:54, 13.52it/s]

Epochs: 22488 | epoch avg. loss: 0.031 | test avg. loss: 5.321
Epochs: 22489 | epoch avg. loss: 0.012 | test avg. loss: 5.308
Epochs: 22490 | epoch avg. loss: 0.011 | test avg. loss: 5.449


 45%|████▍     | 22493/50000 [33:36<34:24, 13.32it/s]

Epochs: 22491 | epoch avg. loss: 0.023 | test avg. loss: 5.364
Epochs: 22492 | epoch avg. loss: 0.019 | test avg. loss: 5.322
Epochs: 22493 | epoch avg. loss: 0.010 | test avg. loss: 5.344


 45%|████▍     | 22497/50000 [33:37<36:23, 12.59it/s]

Epochs: 22494 | epoch avg. loss: 0.010 | test avg. loss: 5.303
Epochs: 22495 | epoch avg. loss: 0.008 | test avg. loss: 5.326
Epochs: 22496 | epoch avg. loss: 0.015 | test avg. loss: 5.449


 45%|████▍     | 22499/50000 [33:37<36:08, 12.68it/s]

Epochs: 22497 | epoch avg. loss: 0.009 | test avg. loss: 5.506
Epochs: 22498 | epoch avg. loss: 0.012 | test avg. loss: 5.334
Epochs: 22499 | epoch avg. loss: 0.015 | test avg. loss: 5.304


 45%|████▌     | 22503/50000 [33:39<1:56:53,  3.92it/s]

Epochs: 22500 | epoch avg. loss: 0.014 | test avg. loss: 5.392
Epochs: 22501 | epoch avg. loss: 0.016 | test avg. loss: 5.359
Epochs: 22502 | epoch avg. loss: 0.026 | test avg. loss: 5.516


 45%|████▌     | 22505/50000 [33:39<1:30:59,  5.04it/s]

Epochs: 22503 | epoch avg. loss: 0.033 | test avg. loss: 5.489
Epochs: 22504 | epoch avg. loss: 0.033 | test avg. loss: 5.228
Epochs: 22505 | epoch avg. loss: 0.008 | test avg. loss: 5.297


 45%|████▌     | 22509/50000 [33:39<1:00:26,  7.58it/s]

Epochs: 22506 | epoch avg. loss: 0.029 | test avg. loss: 5.390
Epochs: 22507 | epoch avg. loss: 0.012 | test avg. loss: 5.460
Epochs: 22508 | epoch avg. loss: 0.031 | test avg. loss: 5.477
Epochs: 22509 | epoch avg. loss: 0.007 | test avg. loss: 5.329


 45%|████▌     | 22513/50000 [33:39<45:17, 10.12it/s]

Epochs: 22510 | epoch avg. loss: 0.007 | test avg. loss: 5.238
Epochs: 22511 | epoch avg. loss: 0.014 | test avg. loss: 5.190
Epochs: 22512 | epoch avg. loss: 0.010 | test avg. loss: 5.389


 45%|████▌     | 22517/50000 [33:40<37:50, 12.11it/s]

Epochs: 22513 | epoch avg. loss: 0.011 | test avg. loss: 5.539
Epochs: 22514 | epoch avg. loss: 0.015 | test avg. loss: 5.409
Epochs: 22515 | epoch avg. loss: 0.018 | test avg. loss: 5.354
Epochs: 22516 | epoch avg. loss: 0.008 | test avg. loss: 5.373


 45%|████▌     | 22521/50000 [33:40<33:13, 13.79it/s]

Epochs: 22517 | epoch avg. loss: 0.009 | test avg. loss: 5.339
Epochs: 22518 | epoch avg. loss: 0.009 | test avg. loss: 5.332
Epochs: 22519 | epoch avg. loss: 0.012 | test avg. loss: 5.451
Epochs: 22520 | epoch avg. loss: 0.010 | test avg. loss: 5.416


 45%|████▌     | 22525/50000 [33:40<31:42, 14.44it/s]

Epochs: 22521 | epoch avg. loss: 0.014 | test avg. loss: 5.356
Epochs: 22522 | epoch avg. loss: 0.010 | test avg. loss: 5.452
Epochs: 22523 | epoch avg. loss: 0.022 | test avg. loss: 5.348
Epochs: 22524 | epoch avg. loss: 0.018 | test avg. loss: 5.397


 45%|████▌     | 22527/50000 [33:40<31:37, 14.48it/s]

Epochs: 22525 | epoch avg. loss: 0.024 | test avg. loss: 5.508
Epochs: 22526 | epoch avg. loss: 0.021 | test avg. loss: 5.376
Epochs: 22527 | epoch avg. loss: 0.015 | test avg. loss: 5.319


 45%|████▌     | 22531/50000 [33:41<31:26, 14.56it/s]

Epochs: 22528 | epoch avg. loss: 0.013 | test avg. loss: 5.429
Epochs: 22529 | epoch avg. loss: 0.027 | test avg. loss: 5.300
Epochs: 22530 | epoch avg. loss: 0.022 | test avg. loss: 5.356
Epochs: 22531 | epoch avg. loss: 0.016 | test avg. loss: 5.535


                                                     

Epochs: 22532 | epoch avg. loss: 0.043 | test avg. loss: 5.395
Epochs: 22533 | epoch avg. loss: 0.012 | test avg. loss: 5.260
Epochs: 22534 | epoch avg. loss: 0.015 | test avg. loss: 5.317


 45%|████▌     | 22537/50000 [33:41<31:22, 14.59it/s]

Epochs: 22535 | epoch avg. loss: 0.009 | test avg. loss: 5.347
Epochs: 22536 | epoch avg. loss: 0.007 | test avg. loss: 5.408
Epochs: 22537 | epoch avg. loss: 0.009 | test avg. loss: 5.446


 45%|████▌     | 22541/50000 [33:41<30:43, 14.89it/s]

Epochs: 22538 | epoch avg. loss: 0.008 | test avg. loss: 5.430
Epochs: 22539 | epoch avg. loss: 0.020 | test avg. loss: 5.347
Epochs: 22540 | epoch avg. loss: 0.009 | test avg. loss: 5.308
Epochs: 22541 | epoch avg. loss: 0.006 | test avg. loss: 5.404


 45%|████▌     | 22545/50000 [33:42<31:27, 14.54it/s]

Epochs: 22542 | epoch avg. loss: 0.006 | test avg. loss: 5.474
Epochs: 22543 | epoch avg. loss: 0.006 | test avg. loss: 5.411
Epochs: 22544 | epoch avg. loss: 0.013 | test avg. loss: 5.375


 45%|████▌     | 22547/50000 [33:42<32:54, 13.91it/s]

Epochs: 22545 | epoch avg. loss: 0.006 | test avg. loss: 5.357
Epochs: 22546 | epoch avg. loss: 0.009 | test avg. loss: 5.388
Epochs: 22547 | epoch avg. loss: 0.010 | test avg. loss: 5.386


 45%|████▌     | 22551/50000 [33:42<33:49, 13.52it/s]

Epochs: 22548 | epoch avg. loss: 0.005 | test avg. loss: 5.397
Epochs: 22549 | epoch avg. loss: 0.005 | test avg. loss: 5.391
Epochs: 22550 | epoch avg. loss: 0.005 | test avg. loss: 5.453


 45%|████▌     | 22553/50000 [33:42<34:20, 13.32it/s]

Epochs: 22551 | epoch avg. loss: 0.014 | test avg. loss: 5.345
Epochs: 22552 | epoch avg. loss: 0.008 | test avg. loss: 5.290
Epochs: 22553 | epoch avg. loss: 0.010 | test avg. loss: 5.450


 45%|████▌     | 22557/50000 [33:43<37:00, 12.36it/s]

Epochs: 22554 | epoch avg. loss: 0.028 | test avg. loss: 5.400
Epochs: 22555 | epoch avg. loss: 0.012 | test avg. loss: 5.335
Epochs: 22556 | epoch avg. loss: 0.023 | test avg. loss: 5.447


 45%|████▌     | 22561/50000 [33:43<33:08, 13.80it/s]

Epochs: 22557 | epoch avg. loss: 0.024 | test avg. loss: 5.432
Epochs: 22558 | epoch avg. loss: 0.015 | test avg. loss: 5.422
Epochs: 22559 | epoch avg. loss: 0.026 | test avg. loss: 5.411
Epochs: 22560 | epoch avg. loss: 0.007 | test avg. loss: 5.448


 45%|████▌     | 22565/50000 [33:43<30:30, 14.99it/s]

Epochs: 22561 | epoch avg. loss: 0.015 | test avg. loss: 5.374
Epochs: 22562 | epoch avg. loss: 0.021 | test avg. loss: 5.368
Epochs: 22563 | epoch avg. loss: 0.015 | test avg. loss: 5.589
Epochs: 22564 | epoch avg. loss: 0.063 | test avg. loss: 5.312


 45%|████▌     | 22567/50000 [33:43<30:19, 15.08it/s]

Epochs: 22565 | epoch avg. loss: 0.033 | test avg. loss: 5.243
Epochs: 22566 | epoch avg. loss: 0.084 | test avg. loss: 5.405
Epochs: 22567 | epoch avg. loss: 0.018 | test avg. loss: 5.474


 45%|████▌     | 22571/50000 [33:44<31:42, 14.42it/s]

Epochs: 22568 | epoch avg. loss: 0.014 | test avg. loss: 5.395
Epochs: 22569 | epoch avg. loss: 0.023 | test avg. loss: 5.484
Epochs: 22570 | epoch avg. loss: 0.053 | test avg. loss: 5.366


 45%|████▌     | 22573/50000 [33:44<32:14, 14.18it/s]

Epochs: 22571 | epoch avg. loss: 0.019 | test avg. loss: 5.255
Epochs: 22572 | epoch avg. loss: 0.027 | test avg. loss: 5.512
Epochs: 22573 | epoch avg. loss: 0.047 | test avg. loss: 5.455
Epochs: 22574 | epoch avg. loss: 0.048 | test avg. loss: 5.390


 45%|████▌     | 22579/50000 [33:44<30:19, 15.07it/s]

Epochs: 22575 | epoch avg. loss: 0.103 | test avg. loss: 5.502
Epochs: 22576 | epoch avg. loss: 0.133 | test avg. loss: 5.371
Epochs: 22577 | epoch avg. loss: 0.072 | test avg. loss: 5.313
Epochs: 22578 | epoch avg. loss: 0.392 | test avg. loss: 5.559


 45%|████▌     | 22581/50000 [33:44<29:39, 15.41it/s]

Epochs: 22579 | epoch avg. loss: 0.196 | test avg. loss: 6.967
Epochs: 22580 | epoch avg. loss: 0.969 | test avg. loss: 5.894
Epochs: 22581 | epoch avg. loss: 1.534 | test avg. loss: 6.012
Epochs: 22582 | epoch avg. loss: 0.743 | test avg. loss: 9.423


 45%|████▌     | 22585/50000 [33:45<31:59, 14.28it/s]

Epochs: 22583 | epoch avg. loss: 2.137 | test avg. loss: 6.062
Epochs: 22584 | epoch avg. loss: 1.693 | test avg. loss: 5.412
Epochs: 22585 | epoch avg. loss: 1.206 | test avg. loss: 7.202


 45%|████▌     | 22589/50000 [33:45<30:57, 14.76it/s]

Epochs: 22586 | epoch avg. loss: 1.467 | test avg. loss: 5.818
Epochs: 22587 | epoch avg. loss: 1.208 | test avg. loss: 6.140
Epochs: 22588 | epoch avg. loss: 0.765 | test avg. loss: 4.733
Epochs: 22589 | epoch avg. loss: 0.550 | test avg. loss: 4.575


 45%|████▌     | 22593/50000 [33:45<29:26, 15.51it/s]

Epochs: 22590 | epoch avg. loss: 0.449 | test avg. loss: 5.083
Epochs: 22591 | epoch avg. loss: 0.342 | test avg. loss: 4.888
Epochs: 22592 | epoch avg. loss: 0.251 | test avg. loss: 5.461
Epochs: 22593 | epoch avg. loss: 0.206 | test avg. loss: 5.198


 45%|████▌     | 22597/50000 [33:45<29:16, 15.60it/s]

Epochs: 22594 | epoch avg. loss: 0.215 | test avg. loss: 5.138
Epochs: 22595 | epoch avg. loss: 0.066 | test avg. loss: 5.089
Epochs: 22596 | epoch avg. loss: 0.070 | test avg. loss: 4.922
Epochs: 22597 | epoch avg. loss: 0.050 | test avg. loss: 4.935


 45%|████▌     | 22599/50000 [33:45<29:55, 15.27it/s]

Epochs: 22598 | epoch avg. loss: 0.036 | test avg. loss: 5.046
Epochs: 22599 | epoch avg. loss: 0.020 | test avg. loss: 5.145


 45%|████▌     | 22603/50000 [33:47<1:42:45,  4.44it/s]

Epochs: 22600 | epoch avg. loss: 0.024 | test avg. loss: 5.029
Epochs: 22601 | epoch avg. loss: 0.052 | test avg. loss: 4.942
Epochs: 22602 | epoch avg. loss: 0.025 | test avg. loss: 4.963


 45%|████▌     | 22605/50000 [33:47<1:21:09,  5.63it/s]

Epochs: 22603 | epoch avg. loss: 0.022 | test avg. loss: 4.929
Epochs: 22604 | epoch avg. loss: 0.095 | test avg. loss: 5.132
Epochs: 22605 | epoch avg. loss: 0.056 | test avg. loss: 5.189


 45%|████▌     | 22609/50000 [33:48<57:06,  7.99it/s]  

Epochs: 22606 | epoch avg. loss: 0.039 | test avg. loss: 5.144
Epochs: 22607 | epoch avg. loss: 0.064 | test avg. loss: 5.610
Epochs: 22608 | epoch avg. loss: 0.173 | test avg. loss: 5.146


                                                     

Epochs: 22609 | epoch avg. loss: 0.059 | test avg. loss: 5.171
Epochs: 22610 | epoch avg. loss: 0.020 | test avg. loss: 5.155
Epochs: 22611 | epoch avg. loss: 0.050 | test avg. loss: 5.059


 45%|████▌     | 22615/50000 [33:48<39:50, 11.45it/s]

Epochs: 22612 | epoch avg. loss: 0.045 | test avg. loss: 5.169
Epochs: 22613 | epoch avg. loss: 0.048 | test avg. loss: 4.945
Epochs: 22614 | epoch avg. loss: 0.039 | test avg. loss: 5.054
Epochs: 22615 | epoch avg. loss: 0.023 | test avg. loss: 5.140


 45%|████▌     | 22619/50000 [33:48<34:43, 13.14it/s]

Epochs: 22616 | epoch avg. loss: 0.018 | test avg. loss: 5.057
Epochs: 22617 | epoch avg. loss: 0.021 | test avg. loss: 5.210
Epochs: 22618 | epoch avg. loss: 0.021 | test avg. loss: 5.114
Epochs: 22619 | epoch avg. loss: 0.030 | test avg. loss: 5.116


 45%|████▌     | 22623/50000 [33:49<34:56, 13.06it/s]

Epochs: 22620 | epoch avg. loss: 0.039 | test avg. loss: 5.325
Epochs: 22621 | epoch avg. loss: 0.055 | test avg. loss: 5.139
Epochs: 22622 | epoch avg. loss: 0.032 | test avg. loss: 5.063


 45%|████▌     | 22625/50000 [33:49<35:29, 12.86it/s]

Epochs: 22623 | epoch avg. loss: 0.038 | test avg. loss: 5.225
Epochs: 22624 | epoch avg. loss: 0.038 | test avg. loss: 5.037
Epochs: 22625 | epoch avg. loss: 0.043 | test avg. loss: 5.161


 45%|████▌     | 22629/50000 [33:49<34:14, 13.32it/s]

Epochs: 22626 | epoch avg. loss: 0.035 | test avg. loss: 5.280
Epochs: 22627 | epoch avg. loss: 0.046 | test avg. loss: 5.304
Epochs: 22628 | epoch avg. loss: 0.189 | test avg. loss: 5.487


 45%|████▌     | 22631/50000 [33:49<33:39, 13.55it/s]

Epochs: 22629 | epoch avg. loss: 0.102 | test avg. loss: 5.127
Epochs: 22630 | epoch avg. loss: 0.053 | test avg. loss: 4.891
Epochs: 22631 | epoch avg. loss: 0.158 | test avg. loss: 5.340


 45%|████▌     | 22635/50000 [33:49<33:58, 13.43it/s]

Epochs: 22632 | epoch avg. loss: 0.205 | test avg. loss: 5.095
Epochs: 22633 | epoch avg. loss: 0.079 | test avg. loss: 5.241
Epochs: 22634 | epoch avg. loss: 0.041 | test avg. loss: 5.276


 45%|████▌     | 22637/50000 [33:50<34:48, 13.10it/s]

Epochs: 22635 | epoch avg. loss: 0.042 | test avg. loss: 5.182
Epochs: 22636 | epoch avg. loss: 0.086 | test avg. loss: 5.314
Epochs: 22637 | epoch avg. loss: 0.085 | test avg. loss: 5.101


 45%|████▌     | 22641/50000 [33:50<34:53, 13.07it/s]

Epochs: 22638 | epoch avg. loss: 0.018 | test avg. loss: 5.049
Epochs: 22639 | epoch avg. loss: 0.016 | test avg. loss: 5.170
Epochs: 22640 | epoch avg. loss: 0.018 | test avg. loss: 5.095


 45%|████▌     | 22643/50000 [33:50<33:30, 13.61it/s]

Epochs: 22641 | epoch avg. loss: 0.025 | test avg. loss: 5.234
Epochs: 22642 | epoch avg. loss: 0.018 | test avg. loss: 5.157
Epochs: 22643 | epoch avg. loss: 0.010 | test avg. loss: 5.057
Epochs: 22644 | epoch avg. loss: 0.016 | test avg. loss: 5.176


 45%|████▌     | 22647/50000 [33:50<32:34, 14.00it/s]

Epochs: 22645 | epoch avg. loss: 0.037 | test avg. loss: 5.105
Epochs: 22646 | epoch avg. loss: 0.025 | test avg. loss: 5.142
Epochs: 22647 | epoch avg. loss: 0.027 | test avg. loss: 5.385


 45%|████▌     | 22651/50000 [33:51<33:28, 13.62it/s]

Epochs: 22648 | epoch avg. loss: 0.085 | test avg. loss: 5.014
Epochs: 22649 | epoch avg. loss: 0.065 | test avg. loss: 5.076
Epochs: 22650 | epoch avg. loss: 0.023 | test avg. loss: 5.307
Epochs: 22651 | epoch avg. loss: 0.031 | test avg. loss: 5.220


 45%|████▌     | 22655/50000 [33:51<32:58, 13.82it/s]

Epochs: 22652 | epoch avg. loss: 0.027 | test avg. loss: 5.297
Epochs: 22653 | epoch avg. loss: 0.044 | test avg. loss: 5.287
Epochs: 22654 | epoch avg. loss: 0.031 | test avg. loss: 5.098


 45%|████▌     | 22659/50000 [33:51<31:14, 14.59it/s]

Epochs: 22655 | epoch avg. loss: 0.054 | test avg. loss: 5.345
Epochs: 22656 | epoch avg. loss: 0.054 | test avg. loss: 5.256
Epochs: 22657 | epoch avg. loss: 0.094 | test avg. loss: 5.289
Epochs: 22658 | epoch avg. loss: 0.096 | test avg. loss: 5.508


 45%|████▌     | 22661/50000 [33:51<31:52, 14.29it/s]

Epochs: 22659 | epoch avg. loss: 0.093 | test avg. loss: 5.189
Epochs: 22660 | epoch avg. loss: 0.026 | test avg. loss: 5.017
Epochs: 22661 | epoch avg. loss: 0.045 | test avg. loss: 5.201


 45%|████▌     | 22665/50000 [33:52<37:04, 12.29it/s]

Epochs: 22662 | epoch avg. loss: 0.038 | test avg. loss: 5.175
Epochs: 22663 | epoch avg. loss: 0.009 | test avg. loss: 5.171
Epochs: 22664 | epoch avg. loss: 0.012 | test avg. loss: 5.232


 45%|████▌     | 22667/50000 [33:52<37:59, 11.99it/s]

Epochs: 22665 | epoch avg. loss: 0.017 | test avg. loss: 5.083
Epochs: 22666 | epoch avg. loss: 0.013 | test avg. loss: 5.142
Epochs: 22667 | epoch avg. loss: 0.019 | test avg. loss: 5.121


 45%|████▌     | 22671/50000 [33:52<36:42, 12.41it/s]

Epochs: 22668 | epoch avg. loss: 0.016 | test avg. loss: 5.074
Epochs: 22669 | epoch avg. loss: 0.036 | test avg. loss: 5.276
Epochs: 22670 | epoch avg. loss: 0.034 | test avg. loss: 5.138


 45%|████▌     | 22673/50000 [33:52<36:43, 12.40it/s]

Epochs: 22671 | epoch avg. loss: 0.024 | test avg. loss: 5.087
Epochs: 22672 | epoch avg. loss: 0.028 | test avg. loss: 5.255
Epochs: 22673 | epoch avg. loss: 0.049 | test avg. loss: 5.102


 45%|████▌     | 22677/50000 [33:53<36:44, 12.39it/s]

Epochs: 22674 | epoch avg. loss: 0.093 | test avg. loss: 5.349
Epochs: 22675 | epoch avg. loss: 0.061 | test avg. loss: 5.468
Epochs: 22676 | epoch avg. loss: 0.038 | test avg. loss: 5.266


 45%|████▌     | 22679/50000 [33:53<36:40, 12.41it/s]

Epochs: 22677 | epoch avg. loss: 0.049 | test avg. loss: 5.393
Epochs: 22678 | epoch avg. loss: 0.040 | test avg. loss: 5.214
Epochs: 22679 | epoch avg. loss: 0.027 | test avg. loss: 5.140


 45%|████▌     | 22683/50000 [33:53<33:30, 13.59it/s]

Epochs: 22680 | epoch avg. loss: 0.058 | test avg. loss: 5.245
Epochs: 22681 | epoch avg. loss: 0.015 | test avg. loss: 5.135
Epochs: 22682 | epoch avg. loss: 0.024 | test avg. loss: 5.163
Epochs: 22683 | epoch avg. loss: 0.015 | test avg. loss: 5.327


 45%|████▌     | 22687/50000 [33:53<33:15, 13.69it/s]

Epochs: 22684 | epoch avg. loss: 0.017 | test avg. loss: 5.246
Epochs: 22685 | epoch avg. loss: 0.009 | test avg. loss: 5.265
Epochs: 22686 | epoch avg. loss: 0.009 | test avg. loss: 5.152


 45%|████▌     | 22689/50000 [33:54<35:39, 12.77it/s]

Epochs: 22687 | epoch avg. loss: 0.028 | test avg. loss: 5.068
Epochs: 22688 | epoch avg. loss: 0.031 | test avg. loss: 5.226
Epochs: 22689 | epoch avg. loss: 0.027 | test avg. loss: 5.152


 45%|████▌     | 22693/50000 [33:54<33:37, 13.54it/s]

Epochs: 22690 | epoch avg. loss: 0.016 | test avg. loss: 5.138
Epochs: 22691 | epoch avg. loss: 0.020 | test avg. loss: 5.263
Epochs: 22692 | epoch avg. loss: 0.021 | test avg. loss: 5.115




Epochs: 22693 | epoch avg. loss: 0.029 | test avg. loss: 5.122
Epochs: 22694 | epoch avg. loss: 0.014 | test avg. loss: 5.275
Epochs: 22695 | epoch avg. loss: 0.035 | test avg. loss: 5.096


 45%|████▌     | 22699/50000 [33:54<32:14, 14.11it/s]

Epochs: 22696 | epoch avg. loss: 0.015 | test avg. loss: 5.201
Epochs: 22697 | epoch avg. loss: 0.026 | test avg. loss: 5.255
Epochs: 22698 | epoch avg. loss: 0.012 | test avg. loss: 5.244


 45%|████▌     | 22699/50000 [33:54<32:14, 14.11it/s]

Epochs: 22699 | epoch avg. loss: 0.008 | test avg. loss: 5.335


 45%|████▌     | 22703/50000 [33:56<1:52:50,  4.03it/s]

Epochs: 22700 | epoch avg. loss: 0.016 | test avg. loss: 5.167
Epochs: 22701 | epoch avg. loss: 0.010 | test avg. loss: 5.118
Epochs: 22702 | epoch avg. loss: 0.017 | test avg. loss: 5.223


 45%|████▌     | 22705/50000 [33:56<1:29:56,  5.06it/s]

Epochs: 22703 | epoch avg. loss: 0.042 | test avg. loss: 5.083
Epochs: 22704 | epoch avg. loss: 0.027 | test avg. loss: 5.238
Epochs: 22705 | epoch avg. loss: 0.041 | test avg. loss: 5.365


 45%|████▌     | 22709/50000 [33:57<1:04:55,  7.01it/s]

Epochs: 22706 | epoch avg. loss: 0.057 | test avg. loss: 5.105
Epochs: 22707 | epoch avg. loss: 0.034 | test avg. loss: 5.101
Epochs: 22708 | epoch avg. loss: 0.013 | test avg. loss: 5.250


 45%|████▌     | 22711/50000 [33:57<56:29,  8.05it/s]

Epochs: 22709 | epoch avg. loss: 0.019 | test avg. loss: 5.129
Epochs: 22710 | epoch avg. loss: 0.039 | test avg. loss: 5.299
Epochs: 22711 | epoch avg. loss: 0.034 | test avg. loss: 5.388


 45%|████▌     | 22715/50000 [33:57<43:32, 10.44it/s]

Epochs: 22712 | epoch avg. loss: 0.023 | test avg. loss: 5.249
Epochs: 22713 | epoch avg. loss: 0.022 | test avg. loss: 5.344
Epochs: 22714 | epoch avg. loss: 0.030 | test avg. loss: 5.150


 45%|████▌     | 22717/50000 [33:57<42:21, 10.73it/s]

Epochs: 22715 | epoch avg. loss: 0.029 | test avg. loss: 5.111
Epochs: 22716 | epoch avg. loss: 0.022 | test avg. loss: 5.259
Epochs: 22717 | epoch avg. loss: 0.042 | test avg. loss: 5.060


 45%|████▌     | 22721/50000 [33:58<38:33, 11.79it/s]

Epochs: 22718 | epoch avg. loss: 0.030 | test avg. loss: 5.166
Epochs: 22719 | epoch avg. loss: 0.016 | test avg. loss: 5.333
Epochs: 22720 | epoch avg. loss: 0.019 | test avg. loss: 5.285


 45%|████▌     | 22723/50000 [33:58<37:16, 12.19it/s]

Epochs: 22721 | epoch avg. loss: 0.015 | test avg. loss: 5.290
Epochs: 22722 | epoch avg. loss: 0.008 | test avg. loss: 5.238
Epochs: 22723 | epoch avg. loss: 0.008 | test avg. loss: 5.077


 45%|████▌     | 22727/50000 [33:58<34:31, 13.17it/s]

Epochs: 22724 | epoch avg. loss: 0.026 | test avg. loss: 5.132
Epochs: 22725 | epoch avg. loss: 0.020 | test avg. loss: 5.253
Epochs: 22726 | epoch avg. loss: 0.021 | test avg. loss: 5.193
Epochs: 22727 | epoch avg. loss: 0.085 | test avg. loss: 5.336


 45%|████▌     | 22731/50000 [33:58<34:01, 13.36it/s]

Epochs: 22728 | epoch avg. loss: 0.058 | test avg. loss: 5.371
Epochs: 22729 | epoch avg. loss: 0.047 | test avg. loss: 5.086
Epochs: 22730 | epoch avg. loss: 0.064 | test avg. loss: 5.141


 45%|████▌     | 22733/50000 [33:59<35:18, 12.87it/s]

Epochs: 22731 | epoch avg. loss: 0.012 | test avg. loss: 5.241
Epochs: 22732 | epoch avg. loss: 0.015 | test avg. loss: 5.131
Epochs: 22733 | epoch avg. loss: 0.035 | test avg. loss: 5.356


 45%|████▌     | 22737/50000 [33:59<33:09, 13.70it/s]

Epochs: 22734 | epoch avg. loss: 0.037 | test avg. loss: 5.382
Epochs: 22735 | epoch avg. loss: 0.026 | test avg. loss: 5.189
Epochs: 22736 | epoch avg. loss: 0.017 | test avg. loss: 5.151




Epochs: 22737 | epoch avg. loss: 0.010 | test avg. loss: 5.191
Epochs: 22738 | epoch avg. loss: 0.017 | test avg. loss: 5.121
Epochs: 22739 | epoch avg. loss: 0.009 | test avg. loss: 5.160




Epochs: 22740 | epoch avg. loss: 0.008 | test avg. loss: 5.254
Epochs: 22741 | epoch avg. loss: 0.008 | test avg. loss: 5.246
Epochs: 22742 | epoch avg. loss: 0.006 | test avg. loss: 5.242


 45%|████▌     | 22745/50000 [33:59<31:28, 14.43it/s]

Epochs: 22743 | epoch avg. loss: 0.006 | test avg. loss: 5.162
Epochs: 22744 | epoch avg. loss: 0.010 | test avg. loss: 5.156
Epochs: 22745 | epoch avg. loss: 0.007 | test avg. loss: 5.234


 45%|████▌     | 22749/50000 [34:00<33:32, 13.54it/s]

Epochs: 22746 | epoch avg. loss: 0.015 | test avg. loss: 5.234
Epochs: 22747 | epoch avg. loss: 0.008 | test avg. loss: 5.165
Epochs: 22748 | epoch avg. loss: 0.027 | test avg. loss: 5.306
Epochs: 22749 | epoch avg. loss: 0.033 | test avg. loss: 5.200




Epochs: 22750 | epoch avg. loss: 0.015 | test avg. loss: 5.070
Epochs: 22751 | epoch avg. loss: 0.019 | test avg. loss: 5.177
Epochs: 22752 | epoch avg. loss: 0.026 | test avg. loss: 5.098


 46%|████▌     | 22757/50000 [34:00<30:40, 14.80it/s]

Epochs: 22753 | epoch avg. loss: 0.009 | test avg. loss: 5.161
Epochs: 22754 | epoch avg. loss: 0.010 | test avg. loss: 5.329
Epochs: 22755 | epoch avg. loss: 0.021 | test avg. loss: 5.251
Epochs: 22756 | epoch avg. loss: 0.023 | test avg. loss: 5.203


 46%|████▌     | 22759/50000 [34:00<30:59, 14.65it/s]

Epochs: 22757 | epoch avg. loss: 0.019 | test avg. loss: 5.359
Epochs: 22758 | epoch avg. loss: 0.043 | test avg. loss: 5.136
Epochs: 22759 | epoch avg. loss: 0.010 | test avg. loss: 5.181


 46%|████▌     | 22763/50000 [34:01<32:14, 14.08it/s]

Epochs: 22760 | epoch avg. loss: 0.019 | test avg. loss: 5.147
Epochs: 22761 | epoch avg. loss: 0.014 | test avg. loss: 5.145
Epochs: 22762 | epoch avg. loss: 0.021 | test avg. loss: 5.322


 46%|████▌     | 22767/50000 [34:01<30:11, 15.04it/s]

Epochs: 22763 | epoch avg. loss: 0.016 | test avg. loss: 5.279
Epochs: 22764 | epoch avg. loss: 0.008 | test avg. loss: 5.158
Epochs: 22765 | epoch avg. loss: 0.018 | test avg. loss: 5.229
Epochs: 22766 | epoch avg. loss: 0.015 | test avg. loss: 5.137


 46%|████▌     | 22771/50000 [34:01<29:17, 15.49it/s]

Epochs: 22767 | epoch avg. loss: 0.008 | test avg. loss: 5.156
Epochs: 22768 | epoch avg. loss: 0.006 | test avg. loss: 5.255
Epochs: 22769 | epoch avg. loss: 0.008 | test avg. loss: 5.202
Epochs: 22770 | epoch avg. loss: 0.020 | test avg. loss: 5.216


 46%|████▌     | 22773/50000 [34:01<30:19, 14.96it/s]

Epochs: 22771 | epoch avg. loss: 0.009 | test avg. loss: 5.210
Epochs: 22772 | epoch avg. loss: 0.006 | test avg. loss: 5.132
Epochs: 22773 | epoch avg. loss: 0.006 | test avg. loss: 5.116


 46%|████▌     | 22777/50000 [34:02<33:23, 13.59it/s]

Epochs: 22774 | epoch avg. loss: 0.006 | test avg. loss: 5.126
Epochs: 22775 | epoch avg. loss: 0.009 | test avg. loss: 5.197
Epochs: 22776 | epoch avg. loss: 0.005 | test avg. loss: 5.270


 46%|████▌     | 22779/50000 [34:02<33:54, 13.38it/s]

Epochs: 22777 | epoch avg. loss: 0.008 | test avg. loss: 5.263
Epochs: 22778 | epoch avg. loss: 0.007 | test avg. loss: 5.151
Epochs: 22779 | epoch avg. loss: 0.017 | test avg. loss: 5.234


 46%|████▌     | 22783/50000 [34:02<31:40, 14.32it/s]

Epochs: 22780 | epoch avg. loss: 0.019 | test avg. loss: 5.147
Epochs: 22781 | epoch avg. loss: 0.026 | test avg. loss: 5.158
Epochs: 22782 | epoch avg. loss: 0.020 | test avg. loss: 5.365
Epochs: 22783 | epoch avg. loss: 0.059 | test avg. loss: 5.297


 46%|████▌     | 22787/50000 [34:02<31:55, 14.21it/s]

Epochs: 22784 | epoch avg. loss: 0.019 | test avg. loss: 5.143
Epochs: 22785 | epoch avg. loss: 0.072 | test avg. loss: 5.170
Epochs: 22786 | epoch avg. loss: 0.015 | test avg. loss: 5.272


 46%|████▌     | 22789/50000 [34:03<32:38, 13.89it/s]

Epochs: 22787 | epoch avg. loss: 0.013 | test avg. loss: 5.169
Epochs: 22788 | epoch avg. loss: 0.014 | test avg. loss: 5.169
Epochs: 22789 | epoch avg. loss: 0.009 | test avg. loss: 5.190


 46%|████▌     | 22793/50000 [34:03<36:02, 12.58it/s]

Epochs: 22790 | epoch avg. loss: 0.009 | test avg. loss: 5.166
Epochs: 22791 | epoch avg. loss: 0.006 | test avg. loss: 5.215
Epochs: 22792 | epoch avg. loss: 0.007 | test avg. loss: 5.269


 46%|████▌     | 22797/50000 [34:03<32:37, 13.89it/s]

Epochs: 22793 | epoch avg. loss: 0.009 | test avg. loss: 5.221
Epochs: 22794 | epoch avg. loss: 0.007 | test avg. loss: 5.161
Epochs: 22795 | epoch avg. loss: 0.011 | test avg. loss: 5.256
Epochs: 22796 | epoch avg. loss: 0.009 | test avg. loss: 5.186


 46%|████▌     | 22799/50000 [34:03<31:58, 14.18it/s]

Epochs: 22797 | epoch avg. loss: 0.007 | test avg. loss: 5.188
Epochs: 22798 | epoch avg. loss: 0.006 | test avg. loss: 5.253
Epochs: 22799 | epoch avg. loss: 0.009 | test avg. loss: 5.233


 46%|████▌     | 22803/50000 [34:05<1:40:42,  4.50it/s]

Epochs: 22800 | epoch avg. loss: 0.005 | test avg. loss: 5.213
Epochs: 22801 | epoch avg. loss: 0.007 | test avg. loss: 5.263
Epochs: 22802 | epoch avg. loss: 0.009 | test avg. loss: 5.299


 46%|████▌     | 22807/50000 [34:05<1:03:24,  7.15it/s]

Epochs: 22803 | epoch avg. loss: 0.013 | test avg. loss: 5.185
Epochs: 22804 | epoch avg. loss: 0.006 | test avg. loss: 5.230
Epochs: 22805 | epoch avg. loss: 0.010 | test avg. loss: 5.186
Epochs: 22806 | epoch avg. loss: 0.006 | test avg. loss: 5.173


 46%|████▌     | 22809/50000 [34:05<53:58,  8.40it/s]

Epochs: 22807 | epoch avg. loss: 0.005 | test avg. loss: 5.227
Epochs: 22808 | epoch avg. loss: 0.010 | test avg. loss: 5.234
Epochs: 22809 | epoch avg. loss: 0.007 | test avg. loss: 5.171


 46%|████▌     | 22813/50000 [34:06<42:42, 10.61it/s]

Epochs: 22810 | epoch avg. loss: 0.018 | test avg. loss: 5.239
Epochs: 22811 | epoch avg. loss: 0.013 | test avg. loss: 5.205
Epochs: 22812 | epoch avg. loss: 0.010 | test avg. loss: 5.152


 46%|████▌     | 22815/50000 [34:06<39:03, 11.60it/s]

Epochs: 22813 | epoch avg. loss: 0.026 | test avg. loss: 5.224
Epochs: 22814 | epoch avg. loss: 0.016 | test avg. loss: 5.303
Epochs: 22815 | epoch avg. loss: 0.015 | test avg. loss: 5.201


 46%|████▌     | 22819/50000 [34:06<34:36, 13.09it/s]

Epochs: 22816 | epoch avg. loss: 0.041 | test avg. loss: 5.244
Epochs: 22817 | epoch avg. loss: 0.010 | test avg. loss: 5.277
Epochs: 22818 | epoch avg. loss: 0.015 | test avg. loss: 5.137
Epochs: 22819 | epoch avg. loss: 0.009 | test avg. loss: 5.114


 46%|████▌     | 22823/50000 [34:06<31:25, 14.41it/s]

Epochs: 22820 | epoch avg. loss: 0.009 | test avg. loss: 5.317
Epochs: 22821 | epoch avg. loss: 0.060 | test avg. loss: 5.257
Epochs: 22822 | epoch avg. loss: 0.029 | test avg. loss: 5.236
Epochs: 22823 | epoch avg. loss: 0.148 | test avg. loss: 5.292


 46%|████▌     | 22827/50000 [34:07<33:25, 13.55it/s]

Epochs: 22824 | epoch avg. loss: 0.021 | test avg. loss: 5.425
Epochs: 22825 | epoch avg. loss: 0.038 | test avg. loss: 5.178
Epochs: 22826 | epoch avg. loss: 0.072 | test avg. loss: 5.197


 46%|████▌     | 22829/50000 [34:07<33:54, 13.35it/s]

Epochs: 22827 | epoch avg. loss: 0.074 | test avg. loss: 5.343
Epochs: 22828 | epoch avg. loss: 0.081 | test avg. loss: 5.114
Epochs: 22829 | epoch avg. loss: 0.087 | test avg. loss: 5.267


 46%|████▌     | 22833/50000 [34:07<36:01, 12.57it/s]

Epochs: 22830 | epoch avg. loss: 0.044 | test avg. loss: 5.371
Epochs: 22831 | epoch avg. loss: 0.046 | test avg. loss: 5.213
Epochs: 22832 | epoch avg. loss: 0.196 | test avg. loss: 5.236


 46%|████▌     | 22837/50000 [34:07<33:06, 13.67it/s]

Epochs: 22833 | epoch avg. loss: 0.071 | test avg. loss: 5.906
Epochs: 22834 | epoch avg. loss: 0.301 | test avg. loss: 5.221
Epochs: 22835 | epoch avg. loss: 0.387 | test avg. loss: 5.435
Epochs: 22836 | epoch avg. loss: 0.417 | test avg. loss: 5.433




Epochs: 22837 | epoch avg. loss: 0.105 | test avg. loss: 5.330
Epochs: 22838 | epoch avg. loss: 0.079 | test avg. loss: 5.196
Epochs: 22839 | epoch avg. loss: 0.157 | test avg. loss: 5.541


                                                     

Epochs: 22840 | epoch avg. loss: 0.039 | test avg. loss: 5.596
Epochs: 22841 | epoch avg. loss: 0.074 | test avg. loss: 5.259
Epochs: 22842 | epoch avg. loss: 0.116 | test avg. loss: 5.164


 46%|████▌     | 22845/50000 [34:08<31:29, 14.37it/s]

Epochs: 22843 | epoch avg. loss: 0.158 | test avg. loss: 4.902
Epochs: 22844 | epoch avg. loss: 0.106 | test avg. loss: 4.696
Epochs: 22845 | epoch avg. loss: 0.030 | test avg. loss: 4.926


 46%|████▌     | 22849/50000 [34:08<32:56, 13.74it/s]

Epochs: 22846 | epoch avg. loss: 0.066 | test avg. loss: 5.018
Epochs: 22847 | epoch avg. loss: 0.080 | test avg. loss: 4.672
Epochs: 22848 | epoch avg. loss: 0.027 | test avg. loss: 4.646


 46%|████▌     | 22851/50000 [34:08<32:22, 13.98it/s]

Epochs: 22849 | epoch avg. loss: 0.042 | test avg. loss: 4.770
Epochs: 22850 | epoch avg. loss: 0.042 | test avg. loss: 4.701
Epochs: 22851 | epoch avg. loss: 0.035 | test avg. loss: 4.690


 46%|████▌     | 22855/50000 [34:09<35:55, 12.59it/s]

Epochs: 22852 | epoch avg. loss: 0.021 | test avg. loss: 4.791
Epochs: 22853 | epoch avg. loss: 0.060 | test avg. loss: 4.441
Epochs: 22854 | epoch avg. loss: 0.061 | test avg. loss: 4.393


 46%|████▌     | 22857/50000 [34:09<34:44, 13.02it/s]

Epochs: 22855 | epoch avg. loss: 0.044 | test avg. loss: 4.914
Epochs: 22856 | epoch avg. loss: 0.241 | test avg. loss: 4.672
Epochs: 22857 | epoch avg. loss: 0.121 | test avg. loss: 4.776


 46%|████▌     | 22861/50000 [34:09<36:43, 12.31it/s]

Epochs: 22858 | epoch avg. loss: 0.152 | test avg. loss: 4.872
Epochs: 22859 | epoch avg. loss: 0.139 | test avg. loss: 4.594
Epochs: 22860 | epoch avg. loss: 0.062 | test avg. loss: 4.567


 46%|████▌     | 22863/50000 [34:09<37:03, 12.21it/s]

Epochs: 22861 | epoch avg. loss: 0.206 | test avg. loss: 4.744
Epochs: 22862 | epoch avg. loss: 0.200 | test avg. loss: 5.132
Epochs: 22863 | epoch avg. loss: 0.239 | test avg. loss: 4.608


 46%|████▌     | 22867/50000 [34:10<36:14, 12.48it/s]

Epochs: 22864 | epoch avg. loss: 0.480 | test avg. loss: 4.537
Epochs: 22865 | epoch avg. loss: 0.090 | test avg. loss: 5.217
Epochs: 22866 | epoch avg. loss: 0.254 | test avg. loss: 4.670
Epochs: 22867 | epoch avg. loss: 0.172 | test avg. loss: 4.684


 46%|████▌     | 22871/50000 [34:10<32:57, 13.72it/s]

Epochs: 22868 | epoch avg. loss: 0.084 | test avg. loss: 5.225
Epochs: 22869 | epoch avg. loss: 0.197 | test avg. loss: 4.664
Epochs: 22870 | epoch avg. loss: 0.148 | test avg. loss: 4.668
Epochs: 22871 | epoch avg. loss: 0.045 | test avg. loss: 4.962


 46%|████▌     | 22875/50000 [34:10<32:23, 13.95it/s]

Epochs: 22872 | epoch avg. loss: 0.109 | test avg. loss: 4.700
Epochs: 22873 | epoch avg. loss: 0.031 | test avg. loss: 4.809
Epochs: 22874 | epoch avg. loss: 0.037 | test avg. loss: 4.858


 46%|████▌     | 22877/50000 [34:10<34:41, 13.03it/s]

Epochs: 22875 | epoch avg. loss: 0.037 | test avg. loss: 4.706
Epochs: 22876 | epoch avg. loss: 0.028 | test avg. loss: 4.930
Epochs: 22877 | epoch avg. loss: 0.078 | test avg. loss: 4.836


 46%|████▌     | 22881/50000 [34:11<34:53, 12.95it/s]

Epochs: 22878 | epoch avg. loss: 0.064 | test avg. loss: 4.858
Epochs: 22879 | epoch avg. loss: 0.389 | test avg. loss: 4.950
Epochs: 22880 | epoch avg. loss: 0.113 | test avg. loss: 5.143


 46%|████▌     | 22883/50000 [34:11<33:39, 13.43it/s]

Epochs: 22881 | epoch avg. loss: 0.097 | test avg. loss: 4.780
Epochs: 22882 | epoch avg. loss: 0.275 | test avg. loss: 4.591
Epochs: 22883 | epoch avg. loss: 0.121 | test avg. loss: 5.113


 46%|████▌     | 22887/50000 [34:11<36:53, 12.25it/s]

Epochs: 22884 | epoch avg. loss: 0.234 | test avg. loss: 4.683
Epochs: 22885 | epoch avg. loss: 0.348 | test avg. loss: 4.690
Epochs: 22886 | epoch avg. loss: 0.082 | test avg. loss: 5.239


 46%|████▌     | 22889/50000 [34:11<39:37, 11.40it/s]

Epochs: 22887 | epoch avg. loss: 0.199 | test avg. loss: 4.667
Epochs: 22888 | epoch avg. loss: 0.243 | test avg. loss: 4.481
Epochs: 22889 | epoch avg. loss: 0.096 | test avg. loss: 4.935


 46%|████▌     | 22893/50000 [34:12<36:29, 12.38it/s]

Epochs: 22890 | epoch avg. loss: 0.258 | test avg. loss: 4.772
Epochs: 22891 | epoch avg. loss: 0.228 | test avg. loss: 4.964
Epochs: 22892 | epoch avg. loss: 0.082 | test avg. loss: 5.213
Epochs: 22893 | epoch avg. loss: 0.089 | test avg. loss: 4.797


 46%|████▌     | 22897/50000 [34:12<32:58, 13.70it/s]

Epochs: 22894 | epoch avg. loss: 0.124 | test avg. loss: 4.757
Epochs: 22895 | epoch avg. loss: 0.060 | test avg. loss: 4.848
Epochs: 22896 | epoch avg. loss: 0.064 | test avg. loss: 4.626


 46%|████▌     | 22899/50000 [34:12<33:43, 13.39it/s]

Epochs: 22897 | epoch avg. loss: 0.161 | test avg. loss: 4.818
Epochs: 22898 | epoch avg. loss: 0.056 | test avg. loss: 5.181
Epochs: 22899 | epoch avg. loss: 0.132 | test avg. loss: 4.738


 46%|████▌     | 22903/50000 [34:14<1:50:50,  4.07it/s]

Epochs: 22900 | epoch avg. loss: 0.123 | test avg. loss: 4.692
Epochs: 22901 | epoch avg. loss: 0.029 | test avg. loss: 4.803
Epochs: 22902 | epoch avg. loss: 0.025 | test avg. loss: 4.712


 46%|████▌     | 22905/50000 [34:14<1:29:09,  5.07it/s]

Epochs: 22903 | epoch avg. loss: 0.037 | test avg. loss: 4.752
Epochs: 22904 | epoch avg. loss: 0.026 | test avg. loss: 4.829
Epochs: 22905 | epoch avg. loss: 0.039 | test avg. loss: 4.704


 46%|████▌     | 22909/50000 [34:15<1:04:22,  7.01it/s]

Epochs: 22906 | epoch avg. loss: 0.021 | test avg. loss: 4.758
Epochs: 22907 | epoch avg. loss: 0.042 | test avg. loss: 4.996
Epochs: 22908 | epoch avg. loss: 0.068 | test avg. loss: 4.805


 46%|████▌     | 22911/50000 [34:15<55:17,  8.16it/s]

Epochs: 22909 | epoch avg. loss: 0.057 | test avg. loss: 4.840
Epochs: 22910 | epoch avg. loss: 0.054 | test avg. loss: 4.856
Epochs: 22911 | epoch avg. loss: 0.075 | test avg. loss: 4.516


 46%|████▌     | 22915/50000 [34:15<43:32, 10.37it/s]

Epochs: 22912 | epoch avg. loss: 0.142 | test avg. loss: 4.658
Epochs: 22913 | epoch avg. loss: 0.031 | test avg. loss: 5.006
Epochs: 22914 | epoch avg. loss: 0.042 | test avg. loss: 4.906


 46%|████▌     | 22917/50000 [34:15<40:17, 11.20it/s]

Epochs: 22915 | epoch avg. loss: 0.148 | test avg. loss: 4.786
Epochs: 22916 | epoch avg. loss: 0.028 | test avg. loss: 4.772
Epochs: 22917 | epoch avg. loss: 0.044 | test avg. loss: 4.625


 46%|████▌     | 22921/50000 [34:15<40:08, 11.24it/s]

Epochs: 22918 | epoch avg. loss: 0.037 | test avg. loss: 4.681
Epochs: 22919 | epoch avg. loss: 0.058 | test avg. loss: 5.057
Epochs: 22920 | epoch avg. loss: 0.084 | test avg. loss: 4.957


 46%|████▌     | 22923/50000 [34:16<38:32, 11.71it/s]

Epochs: 22921 | epoch avg. loss: 0.035 | test avg. loss: 4.747
Epochs: 22922 | epoch avg. loss: 0.104 | test avg. loss: 4.702
Epochs: 22923 | epoch avg. loss: 0.030 | test avg. loss: 5.003
Epochs: 22924 | epoch avg. loss: 0.095 | test avg. loss: 4.864


 46%|████▌     | 22927/50000 [34:16<34:27, 13.09it/s]

Epochs: 22925 | epoch avg. loss: 0.050 | test avg. loss: 4.928
Epochs: 22926 | epoch avg. loss: 0.037 | test avg. loss: 5.098
Epochs: 22927 | epoch avg. loss: 0.050 | test avg. loss: 4.856


 46%|████▌     | 22931/50000 [34:16<33:46, 13.36it/s]

Epochs: 22928 | epoch avg. loss: 0.030 | test avg. loss: 4.761
Epochs: 22929 | epoch avg. loss: 0.037 | test avg. loss: 4.963
Epochs: 22930 | epoch avg. loss: 0.053 | test avg. loss: 4.869


 46%|████▌     | 22933/50000 [34:16<34:59, 12.89it/s]

Epochs: 22931 | epoch avg. loss: 0.020 | test avg. loss: 4.799
Epochs: 22932 | epoch avg. loss: 0.047 | test avg. loss: 4.872
Epochs: 22933 | epoch avg. loss: 0.021 | test avg. loss: 4.880


 46%|████▌     | 22937/50000 [34:17<38:05, 11.84it/s]

Epochs: 22934 | epoch avg. loss: 0.022 | test avg. loss: 4.711
Epochs: 22935 | epoch avg. loss: 0.055 | test avg. loss: 4.885
Epochs: 22936 | epoch avg. loss: 0.031 | test avg. loss: 5.061


                                                     

Epochs: 22937 | epoch avg. loss: 0.037 | test avg. loss: 4.831
Epochs: 22938 | epoch avg. loss: 0.079 | test avg. loss: 4.813
Epochs: 22939 | epoch avg. loss: 0.039 | test avg. loss: 4.982


                                                     

Epochs: 22940 | epoch avg. loss: 0.095 | test avg. loss: 4.735
Epochs: 22941 | epoch avg. loss: 0.082 | test avg. loss: 4.813
Epochs: 22942 | epoch avg. loss: 0.036 | test avg. loss: 5.188


 46%|████▌     | 22945/50000 [34:17<32:27, 13.89it/s]

Epochs: 22943 | epoch avg. loss: 0.114 | test avg. loss: 4.834
Epochs: 22944 | epoch avg. loss: 0.050 | test avg. loss: 4.811
Epochs: 22945 | epoch avg. loss: 0.065 | test avg. loss: 4.916


 46%|████▌     | 22949/50000 [34:18<34:44, 12.97it/s]

Epochs: 22946 | epoch avg. loss: 0.083 | test avg. loss: 4.732
Epochs: 22947 | epoch avg. loss: 0.223 | test avg. loss: 4.812
Epochs: 22948 | epoch avg. loss: 0.090 | test avg. loss: 4.894


 46%|████▌     | 22951/50000 [34:18<33:29, 13.46it/s]

Epochs: 22949 | epoch avg. loss: 0.063 | test avg. loss: 4.812
Epochs: 22950 | epoch avg. loss: 0.106 | test avg. loss: 4.906
Epochs: 22951 | epoch avg. loss: 0.039 | test avg. loss: 5.068


 46%|████▌     | 22955/50000 [34:18<32:37, 13.82it/s]

Epochs: 22952 | epoch avg. loss: 0.045 | test avg. loss: 4.871
Epochs: 22953 | epoch avg. loss: 0.041 | test avg. loss: 4.943
Epochs: 22954 | epoch avg. loss: 0.040 | test avg. loss: 4.902


 46%|████▌     | 22959/50000 [34:18<30:48, 14.63it/s]

Epochs: 22955 | epoch avg. loss: 0.026 | test avg. loss: 4.749
Epochs: 22956 | epoch avg. loss: 0.019 | test avg. loss: 4.803
Epochs: 22957 | epoch avg. loss: 0.011 | test avg. loss: 4.956
Epochs: 22958 | epoch avg. loss: 0.017 | test avg. loss: 4.906


 46%|████▌     | 22961/50000 [34:18<31:49, 14.16it/s]

Epochs: 22959 | epoch avg. loss: 0.042 | test avg. loss: 5.018
Epochs: 22960 | epoch avg. loss: 0.021 | test avg. loss: 5.124
Epochs: 22961 | epoch avg. loss: 0.021 | test avg. loss: 4.919


 46%|████▌     | 22965/50000 [34:19<31:42, 14.21it/s]

Epochs: 22962 | epoch avg. loss: 0.053 | test avg. loss: 4.855
Epochs: 22963 | epoch avg. loss: 0.011 | test avg. loss: 4.850
Epochs: 22964 | epoch avg. loss: 0.010 | test avg. loss: 4.779


 46%|████▌     | 22967/50000 [34:19<31:57, 14.10it/s]

Epochs: 22965 | epoch avg. loss: 0.010 | test avg. loss: 4.888
Epochs: 22966 | epoch avg. loss: 0.021 | test avg. loss: 4.995
Epochs: 22967 | epoch avg. loss: 0.015 | test avg. loss: 4.908
Epochs: 22968 | epoch avg. loss: 0.033 | test avg. loss: 4.946


 46%|████▌     | 22971/50000 [34:19<29:59, 15.02it/s]

Epochs: 22969 | epoch avg. loss: 0.023 | test avg. loss: 4.935
Epochs: 22970 | epoch avg. loss: 0.019 | test avg. loss: 4.774
Epochs: 22971 | epoch avg. loss: 0.052 | test avg. loss: 4.910


 46%|████▌     | 22975/50000 [34:19<35:48, 12.58it/s]

Epochs: 22972 | epoch avg. loss: 0.019 | test avg. loss: 5.003
Epochs: 22973 | epoch avg. loss: 0.028 | test avg. loss: 4.941
Epochs: 22974 | epoch avg. loss: 0.014 | test avg. loss: 4.893


 46%|████▌     | 22979/50000 [34:20<32:50, 13.71it/s]

Epochs: 22975 | epoch avg. loss: 0.018 | test avg. loss: 5.056
Epochs: 22976 | epoch avg. loss: 0.061 | test avg. loss: 4.869
Epochs: 22977 | epoch avg. loss: 0.020 | test avg. loss: 4.772
Epochs: 22978 | epoch avg. loss: 0.063 | test avg. loss: 4.962


 46%|████▌     | 22981/50000 [34:20<32:13, 13.98it/s]

Epochs: 22979 | epoch avg. loss: 0.080 | test avg. loss: 4.959
Epochs: 22980 | epoch avg. loss: 0.043 | test avg. loss: 4.731
Epochs: 22981 | epoch avg. loss: 0.075 | test avg. loss: 4.771


 46%|████▌     | 22985/50000 [34:20<31:23, 14.34it/s]

Epochs: 22982 | epoch avg. loss: 0.018 | test avg. loss: 4.826
Epochs: 22983 | epoch avg. loss: 0.027 | test avg. loss: 4.754
Epochs: 22984 | epoch avg. loss: 0.120 | test avg. loss: 4.866
Epochs: 22985 | epoch avg. loss: 0.012 | test avg. loss: 4.954


 46%|████▌     | 22989/50000 [34:20<31:26, 14.32it/s]

Epochs: 22986 | epoch avg. loss: 0.015 | test avg. loss: 4.825
Epochs: 22987 | epoch avg. loss: 0.041 | test avg. loss: 4.810
Epochs: 22988 | epoch avg. loss: 0.005 | test avg. loss: 4.719


 46%|████▌     | 22993/50000 [34:21<30:28, 14.77it/s]

Epochs: 22989 | epoch avg. loss: 0.013 | test avg. loss: 4.745
Epochs: 22990 | epoch avg. loss: 0.017 | test avg. loss: 4.917
Epochs: 22991 | epoch avg. loss: 0.029 | test avg. loss: 4.938
Epochs: 22992 | epoch avg. loss: 0.021 | test avg. loss: 4.927


 46%|████▌     | 22997/50000 [34:21<29:26, 15.28it/s]

Epochs: 22993 | epoch avg. loss: 0.014 | test avg. loss: 4.947
Epochs: 22994 | epoch avg. loss: 0.018 | test avg. loss: 4.810
Epochs: 22995 | epoch avg. loss: 0.014 | test avg. loss: 4.779
Epochs: 22996 | epoch avg. loss: 0.022 | test avg. loss: 4.915


 46%|████▌     | 22999/50000 [34:21<31:13, 14.42it/s]

Epochs: 22997 | epoch avg. loss: 0.009 | test avg. loss: 4.971
Epochs: 22998 | epoch avg. loss: 0.011 | test avg. loss: 4.888
Epochs: 22999 | epoch avg. loss: 0.006 | test avg. loss: 4.850


 46%|████▌     | 23003/50000 [34:23<1:41:36,  4.43it/s]

Epochs: 23000 | epoch avg. loss: 0.005 | test avg. loss: 4.792
Epochs: 23001 | epoch avg. loss: 0.013 | test avg. loss: 4.767
Epochs: 23002 | epoch avg. loss: 0.079 | test avg. loss: 4.955


 46%|████▌     | 23005/50000 [34:23<1:21:18,  5.53it/s]

Epochs: 23003 | epoch avg. loss: 0.029 | test avg. loss: 5.268
Epochs: 23004 | epoch avg. loss: 0.074 | test avg. loss: 4.905
Epochs: 23005 | epoch avg. loss: 0.056 | test avg. loss: 4.840


 46%|████▌     | 23009/50000 [34:23<58:32,  7.69it/s]  

Epochs: 23006 | epoch avg. loss: 0.027 | test avg. loss: 4.933
Epochs: 23007 | epoch avg. loss: 0.053 | test avg. loss: 4.785
Epochs: 23008 | epoch avg. loss: 0.015 | test avg. loss: 4.901


 46%|████▌     | 23011/50000 [34:24<53:31,  8.40it/s]

Epochs: 23009 | epoch avg. loss: 0.012 | test avg. loss: 5.048
Epochs: 23010 | epoch avg. loss: 0.018 | test avg. loss: 4.927
Epochs: 23011 | epoch avg. loss: 0.008 | test avg. loss: 4.848


 46%|████▌     | 23015/50000 [34:24<41:29, 10.84it/s]

Epochs: 23012 | epoch avg. loss: 0.008 | test avg. loss: 4.889
Epochs: 23013 | epoch avg. loss: 0.017 | test avg. loss: 4.911
Epochs: 23014 | epoch avg. loss: 0.014 | test avg. loss: 4.879
Epochs: 23015 | epoch avg. loss: 0.005 | test avg. loss: 4.908


 46%|████▌     | 23019/50000 [34:24<34:54, 12.88it/s]

Epochs: 23016 | epoch avg. loss: 0.007 | test avg. loss: 4.983
Epochs: 23017 | epoch avg. loss: 0.015 | test avg. loss: 4.791
Epochs: 23018 | epoch avg. loss: 0.032 | test avg. loss: 4.768
Epochs: 23019 | epoch avg. loss: 0.061 | test avg. loss: 4.924


 46%|████▌     | 23023/50000 [34:24<31:48, 14.14it/s]

Epochs: 23020 | epoch avg. loss: 0.039 | test avg. loss: 5.005
Epochs: 23021 | epoch avg. loss: 0.028 | test avg. loss: 4.865
Epochs: 23022 | epoch avg. loss: 0.075 | test avg. loss: 4.829
Epochs: 23023 | epoch avg. loss: 0.042 | test avg. loss: 4.990


 46%|████▌     | 23027/50000 [34:25<33:05, 13.58it/s]

Epochs: 23024 | epoch avg. loss: 0.139 | test avg. loss: 4.679
Epochs: 23025 | epoch avg. loss: 0.055 | test avg. loss: 4.807
Epochs: 23026 | epoch avg. loss: 0.065 | test avg. loss: 5.216


 46%|████▌     | 23031/50000 [34:25<30:29, 14.74it/s]

Epochs: 23027 | epoch avg. loss: 0.117 | test avg. loss: 5.090
Epochs: 23028 | epoch avg. loss: 0.041 | test avg. loss: 4.922
Epochs: 23029 | epoch avg. loss: 0.099 | test avg. loss: 4.991
Epochs: 23030 | epoch avg. loss: 0.054 | test avg. loss: 5.219


 46%|████▌     | 23035/50000 [34:25<29:39, 15.15it/s]

Epochs: 23031 | epoch avg. loss: 0.148 | test avg. loss: 4.650
Epochs: 23032 | epoch avg. loss: 0.068 | test avg. loss: 4.680
Epochs: 23033 | epoch avg. loss: 0.044 | test avg. loss: 5.022
Epochs: 23034 | epoch avg. loss: 0.061 | test avg. loss: 5.165


 46%|████▌     | 23037/50000 [34:25<30:23, 14.79it/s]

Epochs: 23035 | epoch avg. loss: 0.032 | test avg. loss: 5.050
Epochs: 23036 | epoch avg. loss: 0.143 | test avg. loss: 5.007
Epochs: 23037 | epoch avg. loss: 0.041 | test avg. loss: 4.920


 46%|████▌     | 23041/50000 [34:26<36:31, 12.30it/s]

Epochs: 23038 | epoch avg. loss: 0.038 | test avg. loss: 4.745
Epochs: 23039 | epoch avg. loss: 0.048 | test avg. loss: 4.939
Epochs: 23040 | epoch avg. loss: 0.045 | test avg. loss: 5.182


 46%|████▌     | 23043/50000 [34:26<34:38, 12.97it/s]

Epochs: 23041 | epoch avg. loss: 0.035 | test avg. loss: 5.086
Epochs: 23042 | epoch avg. loss: 0.100 | test avg. loss: 5.168
Epochs: 23043 | epoch avg. loss: 0.037 | test avg. loss: 5.373


 46%|████▌     | 23047/50000 [34:26<32:02, 14.02it/s]

Epochs: 23044 | epoch avg. loss: 0.102 | test avg. loss: 4.951
Epochs: 23045 | epoch avg. loss: 0.065 | test avg. loss: 4.837
Epochs: 23046 | epoch avg. loss: 0.041 | test avg. loss: 5.102
Epochs: 23047 | epoch avg. loss: 0.104 | test avg. loss: 4.968


 46%|████▌     | 23051/50000 [34:26<32:34, 13.79it/s]

Epochs: 23048 | epoch avg. loss: 0.040 | test avg. loss: 5.007
Epochs: 23049 | epoch avg. loss: 0.047 | test avg. loss: 5.188
Epochs: 23050 | epoch avg. loss: 0.074 | test avg. loss: 5.031




Epochs: 23051 | epoch avg. loss: 0.038 | test avg. loss: 4.831
Epochs: 23052 | epoch avg. loss: 0.135 | test avg. loss: 4.929
Epochs: 23053 | epoch avg. loss: 0.096 | test avg. loss: 5.355


 46%|████▌     | 23057/50000 [34:27<30:57, 14.51it/s]

Epochs: 23054 | epoch avg. loss: 0.146 | test avg. loss: 5.053
Epochs: 23055 | epoch avg. loss: 0.112 | test avg. loss: 5.049
Epochs: 23056 | epoch avg. loss: 0.033 | test avg. loss: 5.157


 46%|████▌     | 23059/50000 [34:27<33:11, 13.52it/s]

Epochs: 23057 | epoch avg. loss: 0.076 | test avg. loss: 4.885
Epochs: 23058 | epoch avg. loss: 0.032 | test avg. loss: 4.850
Epochs: 23059 | epoch avg. loss: 0.115 | test avg. loss: 5.072


 46%|████▌     | 23063/50000 [34:27<35:11, 12.76it/s]

Epochs: 23060 | epoch avg. loss: 0.141 | test avg. loss: 5.213
Epochs: 23061 | epoch avg. loss: 0.076 | test avg. loss: 5.038
Epochs: 23062 | epoch avg. loss: 0.154 | test avg. loss: 5.141


 46%|████▌     | 23065/50000 [34:27<38:14, 11.74it/s]

Epochs: 23063 | epoch avg. loss: 0.045 | test avg. loss: 5.404
Epochs: 23064 | epoch avg. loss: 0.096 | test avg. loss: 4.967


 46%|████▌     | 23067/50000 [34:28<38:52, 11.55it/s]

Epochs: 23065 | epoch avg. loss: 0.104 | test avg. loss: 4.937
Epochs: 23066 | epoch avg. loss: 0.072 | test avg. loss: 5.329
Epochs: 23067 | epoch avg. loss: 0.222 | test avg. loss: 5.137


 46%|████▌     | 23071/50000 [34:28<39:26, 11.38it/s]

Epochs: 23068 | epoch avg. loss: 0.060 | test avg. loss: 4.995
Epochs: 23069 | epoch avg. loss: 0.239 | test avg. loss: 4.952
Epochs: 23070 | epoch avg. loss: 0.066 | test avg. loss: 5.463


 46%|████▌     | 23075/50000 [34:28<34:33, 12.98it/s]

Epochs: 23071 | epoch avg. loss: 0.258 | test avg. loss: 5.104
Epochs: 23072 | epoch avg. loss: 0.051 | test avg. loss: 4.887
Epochs: 23073 | epoch avg. loss: 0.098 | test avg. loss: 4.988
Epochs: 23074 | epoch avg. loss: 0.022 | test avg. loss: 5.122


 46%|████▌     | 23077/50000 [34:28<35:19, 12.70it/s]

Epochs: 23075 | epoch avg. loss: 0.027 | test avg. loss: 5.031
Epochs: 23076 | epoch avg. loss: 0.017 | test avg. loss: 5.025
Epochs: 23077 | epoch avg. loss: 0.020 | test avg. loss: 5.017


 46%|████▌     | 23081/50000 [34:29<32:46, 13.69it/s]

Epochs: 23078 | epoch avg. loss: 0.022 | test avg. loss: 4.931
Epochs: 23079 | epoch avg. loss: 0.036 | test avg. loss: 4.924
Epochs: 23080 | epoch avg. loss: 0.159 | test avg. loss: 5.102
Epochs: 23081 | epoch avg. loss: 0.046 | test avg. loss: 5.141




Epochs: 23082 | epoch avg. loss: 0.038 | test avg. loss: 4.972
Epochs: 23083 | epoch avg. loss: 0.044 | test avg. loss: 5.070
Epochs: 23084 | epoch avg. loss: 0.053 | test avg. loss: 4.947


 46%|████▌     | 23087/50000 [34:29<31:54, 14.05it/s]

Epochs: 23085 | epoch avg. loss: 0.023 | test avg. loss: 4.874
Epochs: 23086 | epoch avg. loss: 0.070 | test avg. loss: 5.011
Epochs: 23087 | epoch avg. loss: 0.046 | test avg. loss: 5.314




Epochs: 23088 | epoch avg. loss: 0.079 | test avg. loss: 4.978
Epochs: 23089 | epoch avg. loss: 0.059 | test avg. loss: 4.981


 46%|████▌     | 23093/50000 [34:30<37:29, 11.96it/s]

Epochs: 23090 | epoch avg. loss: 0.023 | test avg. loss: 5.098
Epochs: 23091 | epoch avg. loss: 0.029 | test avg. loss: 4.873
Epochs: 23092 | epoch avg. loss: 0.021 | test avg. loss: 4.849


 46%|████▌     | 23095/50000 [34:30<38:09, 11.75it/s]

Epochs: 23093 | epoch avg. loss: 0.015 | test avg. loss: 5.051
Epochs: 23094 | epoch avg. loss: 0.082 | test avg. loss: 5.040
Epochs: 23095 | epoch avg. loss: 0.023 | test avg. loss: 5.066


 46%|████▌     | 23099/50000 [34:30<35:40, 12.57it/s]

Epochs: 23096 | epoch avg. loss: 0.060 | test avg. loss: 5.214
Epochs: 23097 | epoch avg. loss: 0.071 | test avg. loss: 5.003
Epochs: 23098 | epoch avg. loss: 0.031 | test avg. loss: 4.865


 46%|████▌     | 23099/50000 [34:30<35:40, 12.57it/s]

Epochs: 23099 | epoch avg. loss: 0.133 | test avg. loss: 4.971


 46%|████▌     | 23103/50000 [34:32<1:52:07,  4.00it/s]

Epochs: 23100 | epoch avg. loss: 0.039 | test avg. loss: 5.107
Epochs: 23101 | epoch avg. loss: 0.048 | test avg. loss: 5.156
Epochs: 23102 | epoch avg. loss: 0.065 | test avg. loss: 5.290


 46%|████▌     | 23105/50000 [34:32<1:28:08,  5.09it/s]

Epochs: 23103 | epoch avg. loss: 0.100 | test avg. loss: 4.876
Epochs: 23104 | epoch avg. loss: 0.148 | test avg. loss: 4.692
Epochs: 23105 | epoch avg. loss: 0.101 | test avg. loss: 5.451


 46%|████▌     | 23109/50000 [34:33<1:01:36,  7.27it/s]

Epochs: 23106 | epoch avg. loss: 0.466 | test avg. loss: 5.076
Epochs: 23107 | epoch avg. loss: 0.177 | test avg. loss: 5.244
Epochs: 23108 | epoch avg. loss: 0.157 | test avg. loss: 5.522


 46%|████▌     | 23111/50000 [34:33<53:10,  8.43it/s]

Epochs: 23109 | epoch avg. loss: 0.309 | test avg. loss: 5.278
Epochs: 23110 | epoch avg. loss: 0.272 | test avg. loss: 4.747
Epochs: 23111 | epoch avg. loss: 0.313 | test avg. loss: 5.028


 46%|████▌     | 23115/50000 [34:33<43:47, 10.23it/s]

Epochs: 23112 | epoch avg. loss: 0.071 | test avg. loss: 5.406
Epochs: 23113 | epoch avg. loss: 0.087 | test avg. loss: 5.241
Epochs: 23114 | epoch avg. loss: 0.140 | test avg. loss: 4.989


 46%|████▌     | 23117/50000 [34:33<39:19, 11.39it/s]

Epochs: 23115 | epoch avg. loss: 0.047 | test avg. loss: 4.765
Epochs: 23116 | epoch avg. loss: 0.060 | test avg. loss: 4.729
Epochs: 23117 | epoch avg. loss: 0.049 | test avg. loss: 5.173


 46%|████▌     | 23121/50000 [34:33<38:48, 11.55it/s]

Epochs: 23118 | epoch avg. loss: 0.125 | test avg. loss: 5.303
Epochs: 23119 | epoch avg. loss: 0.037 | test avg. loss: 5.296
Epochs: 23120 | epoch avg. loss: 0.045 | test avg. loss: 5.142


 46%|████▌     | 23123/50000 [34:34<37:16, 12.02it/s]

Epochs: 23121 | epoch avg. loss: 0.020 | test avg. loss: 4.847
Epochs: 23122 | epoch avg. loss: 0.027 | test avg. loss: 4.700
Epochs: 23123 | epoch avg. loss: 0.032 | test avg. loss: 4.810


 46%|████▋     | 23127/50000 [34:34<35:55, 12.47it/s]

Epochs: 23124 | epoch avg. loss: 0.022 | test avg. loss: 4.944
Epochs: 23125 | epoch avg. loss: 0.017 | test avg. loss: 5.169
Epochs: 23126 | epoch avg. loss: 0.015 | test avg. loss: 5.281


 46%|████▋     | 23131/50000 [34:34<32:42, 13.69it/s]

Epochs: 23127 | epoch avg. loss: 0.019 | test avg. loss: 5.134
Epochs: 23128 | epoch avg. loss: 0.011 | test avg. loss: 4.970
Epochs: 23129 | epoch avg. loss: 0.008 | test avg. loss: 4.888
Epochs: 23130 | epoch avg. loss: 0.011 | test avg. loss: 4.896


 46%|████▋     | 23133/50000 [34:34<32:41, 13.70it/s]

Epochs: 23131 | epoch avg. loss: 0.013 | test avg. loss: 5.025
Epochs: 23132 | epoch avg. loss: 0.008 | test avg. loss: 5.254
Epochs: 23133 | epoch avg. loss: 0.032 | test avg. loss: 5.193


 46%|████▋     | 23137/50000 [34:35<35:47, 12.51it/s]

Epochs: 23134 | epoch avg. loss: 0.033 | test avg. loss: 5.085
Epochs: 23135 | epoch avg. loss: 0.038 | test avg. loss: 5.155
Epochs: 23136 | epoch avg. loss: 0.056 | test avg. loss: 4.957


 46%|████▋     | 23139/50000 [34:35<35:20, 12.67it/s]

Epochs: 23137 | epoch avg. loss: 0.019 | test avg. loss: 4.879
Epochs: 23138 | epoch avg. loss: 0.026 | test avg. loss: 5.158
Epochs: 23139 | epoch avg. loss: 0.040 | test avg. loss: 5.179


 46%|████▋     | 23143/50000 [34:35<34:44, 12.88it/s]

Epochs: 23140 | epoch avg. loss: 0.021 | test avg. loss: 5.074
Epochs: 23141 | epoch avg. loss: 0.072 | test avg. loss: 5.175
Epochs: 23142 | epoch avg. loss: 0.044 | test avg. loss: 5.215


 46%|████▋     | 23145/50000 [34:35<34:31, 12.97it/s]

Epochs: 23143 | epoch avg. loss: 0.036 | test avg. loss: 5.040
Epochs: 23144 | epoch avg. loss: 0.036 | test avg. loss: 5.006
Epochs: 23145 | epoch avg. loss: 0.056 | test avg. loss: 5.153


 46%|████▋     | 23149/50000 [34:36<33:55, 13.19it/s]

Epochs: 23146 | epoch avg. loss: 0.031 | test avg. loss: 5.120
Epochs: 23147 | epoch avg. loss: 0.029 | test avg. loss: 5.092
Epochs: 23148 | epoch avg. loss: 0.036 | test avg. loss: 5.571


 46%|████▋     | 23153/50000 [34:36<31:00, 14.43it/s]

Epochs: 23149 | epoch avg. loss: 0.207 | test avg. loss: 5.047
Epochs: 23150 | epoch avg. loss: 0.050 | test avg. loss: 4.913
Epochs: 23151 | epoch avg. loss: 0.054 | test avg. loss: 5.052
Epochs: 23152 | epoch avg. loss: 0.038 | test avg. loss: 4.941


 46%|████▋     | 23157/50000 [34:36<29:30, 15.16it/s]

Epochs: 23153 | epoch avg. loss: 0.069 | test avg. loss: 5.037
Epochs: 23154 | epoch avg. loss: 0.080 | test avg. loss: 5.161
Epochs: 23155 | epoch avg. loss: 0.034 | test avg. loss: 4.969
Epochs: 23156 | epoch avg. loss: 0.018 | test avg. loss: 4.979


 46%|████▋     | 23159/50000 [34:36<30:21, 14.74it/s]

Epochs: 23157 | epoch avg. loss: 0.018 | test avg. loss: 5.094
Epochs: 23158 | epoch avg. loss: 0.041 | test avg. loss: 5.021
Epochs: 23159 | epoch avg. loss: 0.023 | test avg. loss: 4.973


 46%|████▋     | 23163/50000 [34:37<32:41, 13.68it/s]

Epochs: 23160 | epoch avg. loss: 0.104 | test avg. loss: 4.978
Epochs: 23161 | epoch avg. loss: 0.024 | test avg. loss: 5.143
Epochs: 23162 | epoch avg. loss: 0.068 | test avg. loss: 5.000


 46%|████▋     | 23165/50000 [34:37<35:14, 12.69it/s]

Epochs: 23163 | epoch avg. loss: 0.024 | test avg. loss: 4.956
Epochs: 23164 | epoch avg. loss: 0.029 | test avg. loss: 5.189
Epochs: 23165 | epoch avg. loss: 0.081 | test avg. loss: 4.892


 46%|████▋     | 23169/50000 [34:37<35:11, 12.71it/s]

Epochs: 23166 | epoch avg. loss: 0.054 | test avg. loss: 4.932
Epochs: 23167 | epoch avg. loss: 0.207 | test avg. loss: 5.060
Epochs: 23168 | epoch avg. loss: 0.035 | test avg. loss: 5.252


 46%|████▋     | 23171/50000 [34:37<34:09, 13.09it/s]

Epochs: 23169 | epoch avg. loss: 0.026 | test avg. loss: 5.156
Epochs: 23170 | epoch avg. loss: 0.031 | test avg. loss: 5.256
Epochs: 23171 | epoch avg. loss: 0.040 | test avg. loss: 5.240


 46%|████▋     | 23175/50000 [34:37<32:47, 13.63it/s]

Epochs: 23172 | epoch avg. loss: 0.034 | test avg. loss: 4.941
Epochs: 23173 | epoch avg. loss: 0.145 | test avg. loss: 4.991
Epochs: 23174 | epoch avg. loss: 0.042 | test avg. loss: 5.421


 46%|████▋     | 23177/50000 [34:38<31:57, 13.99it/s]

Epochs: 23175 | epoch avg. loss: 0.124 | test avg. loss: 5.244
Epochs: 23176 | epoch avg. loss: 0.042 | test avg. loss: 5.180
Epochs: 23177 | epoch avg. loss: 0.136 | test avg. loss: 4.999
Epochs: 23178 | epoch avg. loss: 0.063 | test avg. loss: 5.217


 46%|████▋     | 23183/50000 [34:38<29:46, 15.01it/s]

Epochs: 23179 | epoch avg. loss: 0.144 | test avg. loss: 4.794
Epochs: 23180 | epoch avg. loss: 0.251 | test avg. loss: 5.027
Epochs: 23181 | epoch avg. loss: 0.098 | test avg. loss: 6.113
Epochs: 23182 | epoch avg. loss: 0.414 | test avg. loss: 5.209


 46%|████▋     | 23185/50000 [34:38<30:01, 14.89it/s]

Epochs: 23183 | epoch avg. loss: 0.167 | test avg. loss: 5.030
Epochs: 23184 | epoch avg. loss: 0.100 | test avg. loss: 5.677
Epochs: 23185 | epoch avg. loss: 0.340 | test avg. loss: 4.939
Epochs: 23186 | epoch avg. loss: 0.558 | test avg. loss: 5.831


 46%|████▋     | 23189/50000 [34:39<33:08, 13.48it/s]

Epochs: 23187 | epoch avg. loss: 0.905 | test avg. loss: 6.161
Epochs: 23188 | epoch avg. loss: 0.646 | test avg. loss: 5.665
Epochs: 23189 | epoch avg. loss: 0.332 | test avg. loss: 5.444


 46%|████▋     | 23193/50000 [34:39<32:26, 13.77it/s]

Epochs: 23190 | epoch avg. loss: 0.954 | test avg. loss: 5.074
Epochs: 23191 | epoch avg. loss: 0.380 | test avg. loss: 5.360
Epochs: 23192 | epoch avg. loss: 0.199 | test avg. loss: 5.662


 46%|████▋     | 23197/50000 [34:39<30:43, 14.54it/s]

Epochs: 23193 | epoch avg. loss: 0.408 | test avg. loss: 6.552
Epochs: 23194 | epoch avg. loss: 0.553 | test avg. loss: 6.345
Epochs: 23195 | epoch avg. loss: 0.277 | test avg. loss: 5.512
Epochs: 23196 | epoch avg. loss: 0.670 | test avg. loss: 5.102


 46%|████▋     | 23199/50000 [34:39<30:36, 14.59it/s]

Epochs: 23197 | epoch avg. loss: 0.450 | test avg. loss: 5.051
Epochs: 23198 | epoch avg. loss: 0.361 | test avg. loss: 4.610
Epochs: 23199 | epoch avg. loss: 0.355 | test avg. loss: 5.324


 46%|████▋     | 23203/50000 [34:41<1:40:18,  4.45it/s]

Epochs: 23200 | epoch avg. loss: 0.163 | test avg. loss: 5.502
Epochs: 23201 | epoch avg. loss: 0.123 | test avg. loss: 5.509
Epochs: 23202 | epoch avg. loss: 0.276 | test avg. loss: 5.356
Epochs: 23203 | epoch avg. loss: 0.212 | test avg. loss: 5.268


                                                       

Epochs: 23204 | epoch avg. loss: 0.170 | test avg. loss: 4.805
Epochs: 23205 | epoch avg. loss: 0.245 | test avg. loss: 4.987
Epochs: 23206 | epoch avg. loss: 0.229 | test avg. loss: 5.139


 46%|████▋     | 23209/50000 [34:41<54:04,  8.26it/s]

Epochs: 23207 | epoch avg. loss: 0.133 | test avg. loss: 4.860
Epochs: 23208 | epoch avg. loss: 0.119 | test avg. loss: 4.906
Epochs: 23209 | epoch avg. loss: 0.053 | test avg. loss: 4.885


 46%|████▋     | 23213/50000 [34:42<44:57,  9.93it/s]

Epochs: 23210 | epoch avg. loss: 0.051 | test avg. loss: 4.942
Epochs: 23211 | epoch avg. loss: 0.047 | test avg. loss: 5.339
Epochs: 23212 | epoch avg. loss: 0.091 | test avg. loss: 5.278


 46%|████▋     | 23217/50000 [34:42<36:39, 12.18it/s]

Epochs: 23213 | epoch avg. loss: 0.059 | test avg. loss: 5.249
Epochs: 23214 | epoch avg. loss: 0.063 | test avg. loss: 5.474
Epochs: 23215 | epoch avg. loss: 0.158 | test avg. loss: 5.044
Epochs: 23216 | epoch avg. loss: 0.137 | test avg. loss: 5.071


 46%|████▋     | 23221/50000 [34:42<32:17, 13.82it/s]

Epochs: 23217 | epoch avg. loss: 0.065 | test avg. loss: 5.275
Epochs: 23218 | epoch avg. loss: 0.081 | test avg. loss: 5.144
Epochs: 23219 | epoch avg. loss: 0.031 | test avg. loss: 5.086
Epochs: 23220 | epoch avg. loss: 0.022 | test avg. loss: 5.131


 46%|████▋     | 23223/50000 [34:42<32:21, 13.79it/s]

Epochs: 23221 | epoch avg. loss: 0.019 | test avg. loss: 5.058
Epochs: 23222 | epoch avg. loss: 0.025 | test avg. loss: 5.169
Epochs: 23223 | epoch avg. loss: 0.019 | test avg. loss: 5.207


 46%|████▋     | 23227/50000 [34:43<32:21, 13.79it/s]

Epochs: 23224 | epoch avg. loss: 0.018 | test avg. loss: 5.111
Epochs: 23225 | epoch avg. loss: 0.039 | test avg. loss: 5.192
Epochs: 23226 | epoch avg. loss: 0.019 | test avg. loss: 5.228


 46%|████▋     | 23231/50000 [34:43<31:02, 14.37it/s]

Epochs: 23227 | epoch avg. loss: 0.016 | test avg. loss: 5.098
Epochs: 23228 | epoch avg. loss: 0.028 | test avg. loss: 5.261
Epochs: 23229 | epoch avg. loss: 0.049 | test avg. loss: 5.014
Epochs: 23230 | epoch avg. loss: 0.018 | test avg. loss: 4.957


 46%|████▋     | 23235/50000 [34:43<29:25, 15.16it/s]

Epochs: 23231 | epoch avg. loss: 0.016 | test avg. loss: 5.048
Epochs: 23232 | epoch avg. loss: 0.009 | test avg. loss: 5.057
Epochs: 23233 | epoch avg. loss: 0.013 | test avg. loss: 5.158
Epochs: 23234 | epoch avg. loss: 0.008 | test avg. loss: 5.165


 46%|████▋     | 23239/50000 [34:43<29:04, 15.34it/s]

Epochs: 23235 | epoch avg. loss: 0.007 | test avg. loss: 5.122
Epochs: 23236 | epoch avg. loss: 0.006 | test avg. loss: 5.065
Epochs: 23237 | epoch avg. loss: 0.007 | test avg. loss: 5.171
Epochs: 23238 | epoch avg. loss: 0.018 | test avg. loss: 5.057


 46%|████▋     | 23241/50000 [34:44<33:17, 13.40it/s]

Epochs: 23239 | epoch avg. loss: 0.013 | test avg. loss: 5.129
Epochs: 23240 | epoch avg. loss: 0.015 | test avg. loss: 5.173
Epochs: 23241 | epoch avg. loss: 0.012 | test avg. loss: 5.142


 46%|████▋     | 23245/50000 [34:44<34:09, 13.05it/s]

Epochs: 23242 | epoch avg. loss: 0.006 | test avg. loss: 5.215
Epochs: 23243 | epoch avg. loss: 0.013 | test avg. loss: 5.130
Epochs: 23244 | epoch avg. loss: 0.013 | test avg. loss: 5.061


 46%|████▋     | 23247/50000 [34:44<35:34, 12.53it/s]

Epochs: 23245 | epoch avg. loss: 0.016 | test avg. loss: 5.157
Epochs: 23246 | epoch avg. loss: 0.023 | test avg. loss: 5.018
Epochs: 23247 | epoch avg. loss: 0.009 | test avg. loss: 5.009


 47%|████▋     | 23251/50000 [34:44<35:25, 12.58it/s]

Epochs: 23248 | epoch avg. loss: 0.007 | test avg. loss: 5.098
Epochs: 23249 | epoch avg. loss: 0.008 | test avg. loss: 5.132
Epochs: 23250 | epoch avg. loss: 0.005 | test avg. loss: 5.121


 47%|████▋     | 23255/50000 [34:45<32:30, 13.71it/s]

Epochs: 23251 | epoch avg. loss: 0.010 | test avg. loss: 5.110
Epochs: 23252 | epoch avg. loss: 0.006 | test avg. loss: 5.159
Epochs: 23253 | epoch avg. loss: 0.013 | test avg. loss: 5.109
Epochs: 23254 | epoch avg. loss: 0.007 | test avg. loss: 5.057


 47%|████▋     | 23257/50000 [34:45<33:27, 13.32it/s]

Epochs: 23255 | epoch avg. loss: 0.017 | test avg. loss: 5.102
Epochs: 23256 | epoch avg. loss: 0.007 | test avg. loss: 5.117
Epochs: 23257 | epoch avg. loss: 0.008 | test avg. loss: 5.043


 47%|████▋     | 23261/50000 [34:45<31:18, 14.23it/s]

Epochs: 23258 | epoch avg. loss: 0.008 | test avg. loss: 5.100
Epochs: 23259 | epoch avg. loss: 0.009 | test avg. loss: 5.093
Epochs: 23260 | epoch avg. loss: 0.009 | test avg. loss: 5.052
Epochs: 23261 | epoch avg. loss: 0.017 | test avg. loss: 5.141


 47%|████▋     | 23265/50000 [34:45<31:27, 14.17it/s]

Epochs: 23262 | epoch avg. loss: 0.007 | test avg. loss: 5.128
Epochs: 23263 | epoch avg. loss: 0.005 | test avg. loss: 5.171
Epochs: 23264 | epoch avg. loss: 0.010 | test avg. loss: 5.143


 47%|████▋     | 23267/50000 [34:46<36:56, 12.06it/s]

Epochs: 23265 | epoch avg. loss: 0.007 | test avg. loss: 5.041
Epochs: 23266 | epoch avg. loss: 0.011 | test avg. loss: 5.111
Epochs: 23267 | epoch avg. loss: 0.016 | test avg. loss: 5.131


 47%|████▋     | 23271/50000 [34:46<35:49, 12.43it/s]

Epochs: 23268 | epoch avg. loss: 0.006 | test avg. loss: 5.148
Epochs: 23269 | epoch avg. loss: 0.006 | test avg. loss: 5.151
Epochs: 23270 | epoch avg. loss: 0.006 | test avg. loss: 5.028


 47%|████▋     | 23273/50000 [34:46<37:47, 11.79it/s]

Epochs: 23271 | epoch avg. loss: 0.023 | test avg. loss: 5.099
Epochs: 23272 | epoch avg. loss: 0.022 | test avg. loss: 5.151


 47%|████▋     | 23275/50000 [34:46<39:07, 11.39it/s]

Epochs: 23273 | epoch avg. loss: 0.012 | test avg. loss: 5.124
Epochs: 23274 | epoch avg. loss: 0.017 | test avg. loss: 5.239
Epochs: 23275 | epoch avg. loss: 0.019 | test avg. loss: 5.218


 47%|████▋     | 23279/50000 [34:47<38:16, 11.63it/s]

Epochs: 23276 | epoch avg. loss: 0.021 | test avg. loss: 5.117
Epochs: 23277 | epoch avg. loss: 0.007 | test avg. loss: 5.070
Epochs: 23278 | epoch avg. loss: 0.008 | test avg. loss: 5.167


 47%|████▋     | 23281/50000 [34:47<38:12, 11.66it/s]

Epochs: 23279 | epoch avg. loss: 0.019 | test avg. loss: 5.143
Epochs: 23280 | epoch avg. loss: 0.007 | test avg. loss: 5.086
Epochs: 23281 | epoch avg. loss: 0.009 | test avg. loss: 5.076


 47%|████▋     | 23285/50000 [34:47<36:26, 12.22it/s]

Epochs: 23282 | epoch avg. loss: 0.007 | test avg. loss: 5.095
Epochs: 23283 | epoch avg. loss: 0.022 | test avg. loss: 5.060
Epochs: 23284 | epoch avg. loss: 0.009 | test avg. loss: 5.025


 47%|████▋     | 23289/50000 [34:47<31:42, 14.04it/s]

Epochs: 23285 | epoch avg. loss: 0.034 | test avg. loss: 5.149
Epochs: 23286 | epoch avg. loss: 0.012 | test avg. loss: 5.199
Epochs: 23287 | epoch avg. loss: 0.014 | test avg. loss: 5.107
Epochs: 23288 | epoch avg. loss: 0.012 | test avg. loss: 5.140


 47%|████▋     | 23291/50000 [34:48<34:57, 12.73it/s]

Epochs: 23289 | epoch avg. loss: 0.011 | test avg. loss: 5.061
Epochs: 23290 | epoch avg. loss: 0.018 | test avg. loss: 5.000
Epochs: 23291 | epoch avg. loss: 0.039 | test avg. loss: 5.196


 47%|████▋     | 23295/50000 [34:48<33:57, 13.11it/s]

Epochs: 23292 | epoch avg. loss: 0.049 | test avg. loss: 5.121
Epochs: 23293 | epoch avg. loss: 0.023 | test avg. loss: 5.010
Epochs: 23294 | epoch avg. loss: 0.082 | test avg. loss: 5.160


 47%|████▋     | 23297/50000 [34:48<35:55, 12.39it/s]

Epochs: 23295 | epoch avg. loss: 0.060 | test avg. loss: 5.299
Epochs: 23296 | epoch avg. loss: 0.040 | test avg. loss: 5.191
Epochs: 23297 | epoch avg. loss: 0.039 | test avg. loss: 5.198


 47%|████▋     | 23299/50000 [34:48<37:20, 11.92it/s]

Epochs: 23298 | epoch avg. loss: 0.013 | test avg. loss: 5.050
Epochs: 23299 | epoch avg. loss: 0.022 | test avg. loss: 4.945


 47%|████▋     | 23303/50000 [34:50<1:43:13,  4.31it/s]

Epochs: 23300 | epoch avg. loss: 0.028 | test avg. loss: 5.083
Epochs: 23301 | epoch avg. loss: 0.014 | test avg. loss: 5.180
Epochs: 23302 | epoch avg. loss: 0.008 | test avg. loss: 5.220
Epochs: 23303 | epoch avg. loss: 0.009 | test avg. loss: 5.198


 47%|████▋     | 23307/50000 [34:50<1:06:54,  6.65it/s]

Epochs: 23304 | epoch avg. loss: 0.008 | test avg. loss: 5.120
Epochs: 23305 | epoch avg. loss: 0.007 | test avg. loss: 5.020
Epochs: 23306 | epoch avg. loss: 0.018 | test avg. loss: 5.097


 47%|████▋     | 23309/50000 [34:50<55:22,  8.03it/s]

Epochs: 23307 | epoch avg. loss: 0.012 | test avg. loss: 5.217
Epochs: 23308 | epoch avg. loss: 0.026 | test avg. loss: 5.110
Epochs: 23309 | epoch avg. loss: 0.007 | test avg. loss: 5.046
Epochs: 23310 | epoch avg. loss: 0.007 | test avg. loss: 5.049


 47%|████▋     | 23313/50000 [34:51<45:01,  9.88it/s]

Epochs: 23311 | epoch avg. loss: 0.005 | test avg. loss: 5.015
Epochs: 23312 | epoch avg. loss: 0.012 | test avg. loss: 5.130
Epochs: 23313 | epoch avg. loss: 0.015 | test avg. loss: 5.180


 47%|████▋     | 23317/50000 [34:51<35:59, 12.36it/s]

Epochs: 23314 | epoch avg. loss: 0.013 | test avg. loss: 5.076
Epochs: 23315 | epoch avg. loss: 0.022 | test avg. loss: 5.166
Epochs: 23316 | epoch avg. loss: 0.037 | test avg. loss: 5.316
Epochs: 23317 | epoch avg. loss: 0.074 | test avg. loss: 4.922


 47%|████▋     | 23321/50000 [34:51<31:27, 14.13it/s]

Epochs: 23318 | epoch avg. loss: 0.126 | test avg. loss: 4.945
Epochs: 23319 | epoch avg. loss: 0.036 | test avg. loss: 5.336
Epochs: 23320 | epoch avg. loss: 0.083 | test avg. loss: 5.150
Epochs: 23321 | epoch avg. loss: 0.070 | test avg. loss: 5.166


 47%|████▋     | 23325/50000 [34:51<31:35, 14.07it/s]

Epochs: 23322 | epoch avg. loss: 0.048 | test avg. loss: 5.530
Epochs: 23323 | epoch avg. loss: 0.213 | test avg. loss: 5.078
Epochs: 23324 | epoch avg. loss: 0.073 | test avg. loss: 5.011


 47%|████▋     | 23327/50000 [34:52<33:00, 13.47it/s]

Epochs: 23325 | epoch avg. loss: 0.088 | test avg. loss: 5.591
Epochs: 23326 | epoch avg. loss: 0.255 | test avg. loss: 5.042
Epochs: 23327 | epoch avg. loss: 0.037 | test avg. loss: 4.866


                                                     

Epochs: 23328 | epoch avg. loss: 0.083 | test avg. loss: 5.084
Epochs: 23329 | epoch avg. loss: 0.018 | test avg. loss: 5.194
Epochs: 23330 | epoch avg. loss: 0.023 | test avg. loss: 5.227


 47%|████▋     | 23335/50000 [34:52<30:37, 14.51it/s]

Epochs: 23331 | epoch avg. loss: 0.031 | test avg. loss: 5.240
Epochs: 23332 | epoch avg. loss: 0.008 | test avg. loss: 5.076
Epochs: 23333 | epoch avg. loss: 0.027 | test avg. loss: 5.029
Epochs: 23334 | epoch avg. loss: 0.022 | test avg. loss: 5.171


 47%|████▋     | 23337/50000 [34:52<31:47, 13.98it/s]

Epochs: 23335 | epoch avg. loss: 0.011 | test avg. loss: 5.227
Epochs: 23336 | epoch avg. loss: 0.011 | test avg. loss: 5.214
Epochs: 23337 | epoch avg. loss: 0.011 | test avg. loss: 5.167


 47%|████▋     | 23341/50000 [34:53<35:18, 12.58it/s]

Epochs: 23338 | epoch avg. loss: 0.005 | test avg. loss: 5.124
Epochs: 23339 | epoch avg. loss: 0.006 | test avg. loss: 5.069
Epochs: 23340 | epoch avg. loss: 0.011 | test avg. loss: 5.187


 47%|████▋     | 23343/50000 [34:53<34:32, 12.86it/s]

Epochs: 23341 | epoch avg. loss: 0.027 | test avg. loss: 5.218
Epochs: 23342 | epoch avg. loss: 0.012 | test avg. loss: 5.165
Epochs: 23343 | epoch avg. loss: 0.024 | test avg. loss: 5.221


 47%|████▋     | 23347/50000 [34:53<32:39, 13.60it/s]

Epochs: 23344 | epoch avg. loss: 0.026 | test avg. loss: 5.147
Epochs: 23345 | epoch avg. loss: 0.013 | test avg. loss: 5.052
Epochs: 23346 | epoch avg. loss: 0.034 | test avg. loss: 5.184


 47%|████▋     | 23349/50000 [34:53<32:30, 13.66it/s]

Epochs: 23347 | epoch avg. loss: 0.034 | test avg. loss: 5.253
Epochs: 23348 | epoch avg. loss: 0.019 | test avg. loss: 5.169
Epochs: 23349 | epoch avg. loss: 0.062 | test avg. loss: 5.228


 47%|████▋     | 23351/50000 [34:54<31:53, 13.93it/s]

Epochs: 23350 | epoch avg. loss: 0.028 | test avg. loss: 5.376
Epochs: 23351 | epoch avg. loss: 0.059 | test avg. loss: 5.054
Epochs: 23352 | epoch avg. loss: 0.070 | test avg. loss: 5.012


 47%|████▋     | 23355/50000 [34:54<36:00, 12.33it/s]

Epochs: 23353 | epoch avg. loss: 0.039 | test avg. loss: 5.483
Epochs: 23354 | epoch avg. loss: 0.223 | test avg. loss: 5.083
Epochs: 23355 | epoch avg. loss: 0.138 | test avg. loss: 5.305


 47%|████▋     | 23359/50000 [34:54<38:08, 11.64it/s]

Epochs: 23356 | epoch avg. loss: 0.203 | test avg. loss: 5.651
Epochs: 23357 | epoch avg. loss: 0.156 | test avg. loss: 5.380
Epochs: 23358 | epoch avg. loss: 0.077 | test avg. loss: 4.959


 47%|████▋     | 23361/50000 [34:54<36:55, 12.02it/s]

Epochs: 23359 | epoch avg. loss: 0.149 | test avg. loss: 4.971
Epochs: 23360 | epoch avg. loss: 0.044 | test avg. loss: 5.440
Epochs: 23361 | epoch avg. loss: 0.124 | test avg. loss: 5.353


 47%|████▋     | 23365/50000 [34:55<36:38, 12.12it/s]

Epochs: 23362 | epoch avg. loss: 0.042 | test avg. loss: 5.185
Epochs: 23363 | epoch avg. loss: 0.024 | test avg. loss: 5.160
Epochs: 23364 | epoch avg. loss: 0.018 | test avg. loss: 5.235


 47%|████▋     | 23369/50000 [34:55<33:06, 13.40it/s]

Epochs: 23365 | epoch avg. loss: 0.023 | test avg. loss: 5.230
Epochs: 23366 | epoch avg. loss: 0.015 | test avg. loss: 5.115
Epochs: 23367 | epoch avg. loss: 0.022 | test avg. loss: 5.132
Epochs: 23368 | epoch avg. loss: 0.016 | test avg. loss: 5.165


 47%|████▋     | 23371/50000 [34:55<31:50, 13.94it/s]

Epochs: 23369 | epoch avg. loss: 0.018 | test avg. loss: 5.040
Epochs: 23370 | epoch avg. loss: 0.067 | test avg. loss: 5.153
Epochs: 23371 | epoch avg. loss: 0.009 | test avg. loss: 5.247


 47%|████▋     | 23375/50000 [34:55<30:25, 14.59it/s]

Epochs: 23372 | epoch avg. loss: 0.010 | test avg. loss: 5.174
Epochs: 23373 | epoch avg. loss: 0.008 | test avg. loss: 5.128
Epochs: 23374 | epoch avg. loss: 0.007 | test avg. loss: 5.100
Epochs: 23375 | epoch avg. loss: 0.007 | test avg. loss: 5.104


 47%|████▋     | 23379/50000 [34:55<30:58, 14.33it/s]

Epochs: 23376 | epoch avg. loss: 0.006 | test avg. loss: 5.165
Epochs: 23377 | epoch avg. loss: 0.015 | test avg. loss: 5.233
Epochs: 23378 | epoch avg. loss: 0.019 | test avg. loss: 5.137


 47%|████▋     | 23381/50000 [34:56<31:17, 14.18it/s]

Epochs: 23379 | epoch avg. loss: 0.004 | test avg. loss: 5.137
Epochs: 23380 | epoch avg. loss: 0.013 | test avg. loss: 5.152
Epochs: 23381 | epoch avg. loss: 0.009 | test avg. loss: 5.076


 47%|████▋     | 23385/50000 [34:56<29:37, 14.97it/s]

Epochs: 23382 | epoch avg. loss: 0.019 | test avg. loss: 5.154
Epochs: 23383 | epoch avg. loss: 0.009 | test avg. loss: 5.254
Epochs: 23384 | epoch avg. loss: 0.030 | test avg. loss: 5.169
Epochs: 23385 | epoch avg. loss: 0.011 | test avg. loss: 5.115


 47%|████▋     | 23389/50000 [34:56<29:00, 15.29it/s]

Epochs: 23386 | epoch avg. loss: 0.021 | test avg. loss: 5.203
Epochs: 23387 | epoch avg. loss: 0.033 | test avg. loss: 5.188
Epochs: 23388 | epoch avg. loss: 0.017 | test avg. loss: 5.041
Epochs: 23389 | epoch avg. loss: 0.046 | test avg. loss: 5.142


 47%|████▋     | 23393/50000 [34:56<29:20, 15.11it/s]

Epochs: 23390 | epoch avg. loss: 0.012 | test avg. loss: 5.289
Epochs: 23391 | epoch avg. loss: 0.030 | test avg. loss: 5.181
Epochs: 23392 | epoch avg. loss: 0.019 | test avg. loss: 5.074


 47%|████▋     | 23395/50000 [34:57<32:14, 13.75it/s]

Epochs: 23393 | epoch avg. loss: 0.010 | test avg. loss: 5.026
Epochs: 23394 | epoch avg. loss: 0.007 | test avg. loss: 5.116
Epochs: 23395 | epoch avg. loss: 0.018 | test avg. loss: 5.203


 47%|████▋     | 23399/50000 [34:57<30:49, 14.38it/s]

Epochs: 23396 | epoch avg. loss: 0.011 | test avg. loss: 5.136
Epochs: 23397 | epoch avg. loss: 0.041 | test avg. loss: 5.185
Epochs: 23398 | epoch avg. loss: 0.031 | test avg. loss: 5.367
Epochs: 23399 | epoch avg. loss: 0.072 | test avg. loss: 5.086


 47%|████▋     | 23403/50000 [34:59<1:33:29,  4.74it/s]

Epochs: 23400 | epoch avg. loss: 0.085 | test avg. loss: 5.234
Epochs: 23401 | epoch avg. loss: 0.107 | test avg. loss: 5.530
Epochs: 23402 | epoch avg. loss: 0.093 | test avg. loss: 5.456
Epochs: 23403 | epoch avg. loss: 0.110 | test avg. loss: 4.914


 47%|████▋     | 23407/50000 [34:59<59:55,  7.40it/s]

Epochs: 23404 | epoch avg. loss: 0.095 | test avg. loss: 4.894
Epochs: 23405 | epoch avg. loss: 0.088 | test avg. loss: 5.382
Epochs: 23406 | epoch avg. loss: 0.124 | test avg. loss: 5.347
Epochs: 23407 | epoch avg. loss: 0.084 | test avg. loss: 5.448


 47%|████▋     | 23411/50000 [34:59<43:16, 10.24it/s]

Epochs: 23408 | epoch avg. loss: 0.277 | test avg. loss: 5.336
Epochs: 23409 | epoch avg. loss: 0.095 | test avg. loss: 5.197
Epochs: 23410 | epoch avg. loss: 0.100 | test avg. loss: 4.951
Epochs: 23411 | epoch avg. loss: 0.223 | test avg. loss: 5.040


 47%|████▋     | 23415/50000 [34:59<35:02, 12.64it/s]

Epochs: 23412 | epoch avg. loss: 0.044 | test avg. loss: 5.636
Epochs: 23413 | epoch avg. loss: 0.143 | test avg. loss: 5.344
Epochs: 23414 | epoch avg. loss: 0.054 | test avg. loss: 5.177
Epochs: 23415 | epoch avg. loss: 0.102 | test avg. loss: 5.265


 47%|████▋     | 23419/50000 [35:00<33:23, 13.27it/s]

Epochs: 23416 | epoch avg. loss: 0.079 | test avg. loss: 5.376
Epochs: 23417 | epoch avg. loss: 0.100 | test avg. loss: 5.128
Epochs: 23418 | epoch avg. loss: 0.035 | test avg. loss: 5.234


 47%|████▋     | 23423/50000 [35:00<31:11, 14.20it/s]

Epochs: 23419 | epoch avg. loss: 0.029 | test avg. loss: 5.683
Epochs: 23420 | epoch avg. loss: 0.200 | test avg. loss: 5.096
Epochs: 23421 | epoch avg. loss: 0.140 | test avg. loss: 4.973
Epochs: 23422 | epoch avg. loss: 0.120 | test avg. loss: 5.451


 47%|████▋     | 23427/50000 [35:00<29:54, 14.81it/s]

Epochs: 23423 | epoch avg. loss: 0.244 | test avg. loss: 5.036
Epochs: 23424 | epoch avg. loss: 0.080 | test avg. loss: 5.158
Epochs: 23425 | epoch avg. loss: 0.335 | test avg. loss: 5.230
Epochs: 23426 | epoch avg. loss: 0.204 | test avg. loss: 6.057


 47%|████▋     | 23429/50000 [35:00<30:56, 14.31it/s]

Epochs: 23427 | epoch avg. loss: 0.569 | test avg. loss: 5.141
Epochs: 23428 | epoch avg. loss: 0.408 | test avg. loss: 5.448
Epochs: 23429 | epoch avg. loss: 0.703 | test avg. loss: 5.623


 47%|████▋     | 23433/50000 [35:01<35:18, 12.54it/s]

Epochs: 23430 | epoch avg. loss: 0.304 | test avg. loss: 5.188
Epochs: 23431 | epoch avg. loss: 0.138 | test avg. loss: 4.894
Epochs: 23432 | epoch avg. loss: 0.091 | test avg. loss: 5.229


 47%|████▋     | 23435/50000 [35:01<34:31, 12.82it/s]

Epochs: 23433 | epoch avg. loss: 0.151 | test avg. loss: 5.224
Epochs: 23434 | epoch avg. loss: 0.056 | test avg. loss: 5.348
Epochs: 23435 | epoch avg. loss: 0.041 | test avg. loss: 5.369


 47%|████▋     | 23439/50000 [35:01<33:30, 13.21it/s]

Epochs: 23436 | epoch avg. loss: 0.040 | test avg. loss: 5.121
Epochs: 23437 | epoch avg. loss: 0.031 | test avg. loss: 4.874
Epochs: 23438 | epoch avg. loss: 0.054 | test avg. loss: 4.933


 47%|████▋     | 23443/50000 [35:01<30:54, 14.32it/s]

Epochs: 23439 | epoch avg. loss: 0.047 | test avg. loss: 5.235
Epochs: 23440 | epoch avg. loss: 0.048 | test avg. loss: 5.347
Epochs: 23441 | epoch avg. loss: 0.080 | test avg. loss: 5.470
Epochs: 23442 | epoch avg. loss: 0.043 | test avg. loss: 5.600


 47%|████▋     | 23445/50000 [35:01<31:11, 14.19it/s]

Epochs: 23443 | epoch avg. loss: 0.113 | test avg. loss: 5.193
Epochs: 23444 | epoch avg. loss: 0.085 | test avg. loss: 5.095
Epochs: 23445 | epoch avg. loss: 0.056 | test avg. loss: 5.512




Epochs: 23446 | epoch avg. loss: 0.150 | test avg. loss: 5.144
Epochs: 23447 | epoch avg. loss: 0.138 | test avg. loss: 5.235
Epochs: 23448 | epoch avg. loss: 0.102 | test avg. loss: 5.553


 47%|████▋     | 23453/50000 [35:02<30:06, 14.70it/s]

Epochs: 23449 | epoch avg. loss: 0.151 | test avg. loss: 5.297
Epochs: 23450 | epoch avg. loss: 0.036 | test avg. loss: 5.052
Epochs: 23451 | epoch avg. loss: 0.045 | test avg. loss: 5.140
Epochs: 23452 | epoch avg. loss: 0.017 | test avg. loss: 5.375


 47%|████▋     | 23455/50000 [35:02<32:42, 13.52it/s]

Epochs: 23453 | epoch avg. loss: 0.038 | test avg. loss: 5.373
Epochs: 23454 | epoch avg. loss: 0.013 | test avg. loss: 5.314
Epochs: 23455 | epoch avg. loss: 0.012 | test avg. loss: 5.251


 47%|████▋     | 23459/50000 [35:02<35:40, 12.40it/s]

Epochs: 23456 | epoch avg. loss: 0.038 | test avg. loss: 5.048
Epochs: 23457 | epoch avg. loss: 0.011 | test avg. loss: 5.030
Epochs: 23458 | epoch avg. loss: 0.009 | test avg. loss: 5.095


 47%|████▋     | 23463/50000 [35:03<32:40, 13.54it/s]

Epochs: 23459 | epoch avg. loss: 0.006 | test avg. loss: 5.191
Epochs: 23460 | epoch avg. loss: 0.014 | test avg. loss: 5.208
Epochs: 23461 | epoch avg. loss: 0.017 | test avg. loss: 5.041
Epochs: 23462 | epoch avg. loss: 0.014 | test avg. loss: 4.962


 47%|████▋     | 23467/50000 [35:03<30:08, 14.67it/s]

Epochs: 23463 | epoch avg. loss: 0.014 | test avg. loss: 5.071
Epochs: 23464 | epoch avg. loss: 0.035 | test avg. loss: 4.828
Epochs: 23465 | epoch avg. loss: 0.051 | test avg. loss: 4.899
Epochs: 23466 | epoch avg. loss: 0.015 | test avg. loss: 5.109


 47%|████▋     | 23471/50000 [35:03<29:07, 15.18it/s]

Epochs: 23467 | epoch avg. loss: 0.015 | test avg. loss: 4.994
Epochs: 23468 | epoch avg. loss: 0.046 | test avg. loss: 5.021
Epochs: 23469 | epoch avg. loss: 0.017 | test avg. loss: 5.185
Epochs: 23470 | epoch avg. loss: 0.061 | test avg. loss: 4.924


 47%|████▋     | 23473/50000 [35:03<31:17, 14.13it/s]

Epochs: 23471 | epoch avg. loss: 0.064 | test avg. loss: 4.917
Epochs: 23472 | epoch avg. loss: 0.060 | test avg. loss: 5.210
Epochs: 23473 | epoch avg. loss: 0.127 | test avg. loss: 5.114


                                                     

Epochs: 23474 | epoch avg. loss: 0.056 | test avg. loss: 4.863
Epochs: 23475 | epoch avg. loss: 0.093 | test avg. loss: 4.968
Epochs: 23476 | epoch avg. loss: 0.033 | test avg. loss: 5.320


 47%|████▋     | 23479/50000 [35:04<33:22, 13.25it/s]

Epochs: 23477 | epoch avg. loss: 0.076 | test avg. loss: 5.040
Epochs: 23478 | epoch avg. loss: 0.087 | test avg. loss: 4.932
Epochs: 23479 | epoch avg. loss: 0.028 | test avg. loss: 5.057


 47%|████▋     | 23483/50000 [35:04<34:05, 12.96it/s]

Epochs: 23480 | epoch avg. loss: 0.090 | test avg. loss: 4.880
Epochs: 23481 | epoch avg. loss: 0.025 | test avg. loss: 4.878
Epochs: 23482 | epoch avg. loss: 0.027 | test avg. loss: 5.213


 47%|████▋     | 23485/50000 [35:04<33:53, 13.04it/s]

Epochs: 23483 | epoch avg. loss: 0.112 | test avg. loss: 4.905
Epochs: 23484 | epoch avg. loss: 0.097 | test avg. loss: 4.932
Epochs: 23485 | epoch avg. loss: 0.165 | test avg. loss: 5.303


 47%|████▋     | 23489/50000 [35:05<35:51, 12.32it/s]

Epochs: 23486 | epoch avg. loss: 0.177 | test avg. loss: 5.082
Epochs: 23487 | epoch avg. loss: 0.042 | test avg. loss: 5.008
Epochs: 23488 | epoch avg. loss: 0.128 | test avg. loss: 5.100


 47%|████▋     | 23491/50000 [35:05<37:10, 11.89it/s]

Epochs: 23489 | epoch avg. loss: 0.070 | test avg. loss: 5.263
Epochs: 23490 | epoch avg. loss: 0.069 | test avg. loss: 4.942
Epochs: 23491 | epoch avg. loss: 0.039 | test avg. loss: 4.987


 47%|████▋     | 23495/50000 [35:05<35:44, 12.36it/s]

Epochs: 23492 | epoch avg. loss: 0.057 | test avg. loss: 5.174
Epochs: 23493 | epoch avg. loss: 0.073 | test avg. loss: 4.944
Epochs: 23494 | epoch avg. loss: 0.054 | test avg. loss: 4.909


 47%|████▋     | 23497/50000 [35:05<36:22, 12.14it/s]

Epochs: 23495 | epoch avg. loss: 0.084 | test avg. loss: 5.019
Epochs: 23496 | epoch avg. loss: 0.062 | test avg. loss: 5.096
Epochs: 23497 | epoch avg. loss: 0.063 | test avg. loss: 4.873


 47%|████▋     | 23499/50000 [35:06<37:22, 11.82it/s]

Epochs: 23498 | epoch avg. loss: 0.171 | test avg. loss: 4.954
Epochs: 23499 | epoch avg. loss: 0.069 | test avg. loss: 5.295


 47%|████▋     | 23503/50000 [35:07<1:49:14,  4.04it/s]

Epochs: 23500 | epoch avg. loss: 0.094 | test avg. loss: 4.974
Epochs: 23501 | epoch avg. loss: 0.194 | test avg. loss: 5.006
Epochs: 23502 | epoch avg. loss: 0.068 | test avg. loss: 5.589


 47%|████▋     | 23505/50000 [35:08<1:27:54,  5.02it/s]

Epochs: 23503 | epoch avg. loss: 0.201 | test avg. loss: 5.084
Epochs: 23504 | epoch avg. loss: 0.269 | test avg. loss: 5.098
Epochs: 23505 | epoch avg. loss: 0.122 | test avg. loss: 5.861


 47%|████▋     | 23509/50000 [35:08<57:37,  7.66it/s]

Epochs: 23506 | epoch avg. loss: 0.310 | test avg. loss: 5.256
Epochs: 23507 | epoch avg. loss: 0.808 | test avg. loss: 4.776
Epochs: 23508 | epoch avg. loss: 0.171 | test avg. loss: 5.259
Epochs: 23509 | epoch avg. loss: 0.249 | test avg. loss: 4.957


 47%|████▋     | 23513/50000 [35:08<42:35, 10.36it/s]

Epochs: 23510 | epoch avg. loss: 0.866 | test avg. loss: 4.859
Epochs: 23511 | epoch avg. loss: 0.387 | test avg. loss: 6.824
Epochs: 23512 | epoch avg. loss: 1.219 | test avg. loss: 5.218
Epochs: 23513 | epoch avg. loss: 0.960 | test avg. loss: 5.291


 47%|████▋     | 23517/50000 [35:08<37:48, 11.67it/s]

Epochs: 23514 | epoch avg. loss: 0.409 | test avg. loss: 7.211
Epochs: 23515 | epoch avg. loss: 1.054 | test avg. loss: 5.611
Epochs: 23516 | epoch avg. loss: 0.732 | test avg. loss: 5.047


 47%|████▋     | 23519/50000 [35:09<38:07, 11.58it/s]

Epochs: 23517 | epoch avg. loss: 0.201 | test avg. loss: 5.433
Epochs: 23518 | epoch avg. loss: 0.354 | test avg. loss: 4.852
Epochs: 23519 | epoch avg. loss: 0.264 | test avg. loss: 5.179
Epochs: 23520 | epoch avg. loss: 0.183 | test avg. loss: 5.299


 47%|████▋     | 23525/50000 [35:09<31:24, 14.05it/s]

Epochs: 23521 | epoch avg. loss: 0.152 | test avg. loss: 5.491
Epochs: 23522 | epoch avg. loss: 0.213 | test avg. loss: 5.943
Epochs: 23523 | epoch avg. loss: 0.249 | test avg. loss: 5.125
Epochs: 23524 | epoch avg. loss: 0.209 | test avg. loss: 4.898


 47%|████▋     | 23529/50000 [35:09<29:25, 14.99it/s]

Epochs: 23525 | epoch avg. loss: 0.093 | test avg. loss: 5.131
Epochs: 23526 | epoch avg. loss: 0.093 | test avg. loss: 5.081
Epochs: 23527 | epoch avg. loss: 0.192 | test avg. loss: 5.368
Epochs: 23528 | epoch avg. loss: 0.064 | test avg. loss: 5.311


 47%|████▋     | 23531/50000 [35:09<29:41, 14.86it/s]

Epochs: 23529 | epoch avg. loss: 0.042 | test avg. loss: 5.071
Epochs: 23530 | epoch avg. loss: 0.047 | test avg. loss: 5.198
Epochs: 23531 | epoch avg. loss: 0.114 | test avg. loss: 4.790


 47%|████▋     | 23535/50000 [35:10<33:09, 13.30it/s]

Epochs: 23532 | epoch avg. loss: 0.072 | test avg. loss: 4.855
Epochs: 23533 | epoch avg. loss: 0.049 | test avg. loss: 5.193
Epochs: 23534 | epoch avg. loss: 0.051 | test avg. loss: 5.204


 47%|████▋     | 23539/50000 [35:10<31:17, 14.09it/s]

Epochs: 23535 | epoch avg. loss: 0.072 | test avg. loss: 5.331
Epochs: 23536 | epoch avg. loss: 0.061 | test avg. loss: 5.258
Epochs: 23537 | epoch avg. loss: 0.035 | test avg. loss: 5.076
Epochs: 23538 | epoch avg. loss: 0.038 | test avg. loss: 5.353


 47%|████▋     | 23541/50000 [35:10<31:44, 13.89it/s]

Epochs: 23539 | epoch avg. loss: 0.042 | test avg. loss: 5.232
Epochs: 23540 | epoch avg. loss: 0.055 | test avg. loss: 5.282
Epochs: 23541 | epoch avg. loss: 0.062 | test avg. loss: 5.388


 47%|████▋     | 23545/50000 [35:10<33:14, 13.26it/s]

Epochs: 23542 | epoch avg. loss: 0.077 | test avg. loss: 5.172
Epochs: 23543 | epoch avg. loss: 0.250 | test avg. loss: 5.324
Epochs: 23544 | epoch avg. loss: 0.078 | test avg. loss: 5.450


 47%|████▋     | 23547/50000 [35:11<35:22, 12.46it/s]

Epochs: 23545 | epoch avg. loss: 0.062 | test avg. loss: 5.356
Epochs: 23546 | epoch avg. loss: 0.098 | test avg. loss: 5.650
Epochs: 23547 | epoch avg. loss: 0.125 | test avg. loss: 5.260


 47%|████▋     | 23551/50000 [35:11<35:19, 12.48it/s]

Epochs: 23548 | epoch avg. loss: 0.032 | test avg. loss: 5.251
Epochs: 23549 | epoch avg. loss: 0.027 | test avg. loss: 5.456
Epochs: 23550 | epoch avg. loss: 0.040 | test avg. loss: 5.264


 47%|████▋     | 23553/50000 [35:11<36:38, 12.03it/s]

Epochs: 23551 | epoch avg. loss: 0.040 | test avg. loss: 5.325
Epochs: 23552 | epoch avg. loss: 0.046 | test avg. loss: 5.204
Epochs: 23553 | epoch avg. loss: 0.038 | test avg. loss: 5.051


 47%|████▋     | 23557/50000 [35:11<36:16, 12.15it/s]

Epochs: 23554 | epoch avg. loss: 0.067 | test avg. loss: 5.341
Epochs: 23555 | epoch avg. loss: 0.091 | test avg. loss: 5.211
Epochs: 23556 | epoch avg. loss: 0.044 | test avg. loss: 5.322


 47%|████▋     | 23559/50000 [35:12<36:18, 12.14it/s]

Epochs: 23557 | epoch avg. loss: 0.046 | test avg. loss: 5.487
Epochs: 23558 | epoch avg. loss: 0.022 | test avg. loss: 5.224
Epochs: 23559 | epoch avg. loss: 0.060 | test avg. loss: 5.229


 47%|████▋     | 23563/50000 [35:12<35:22, 12.45it/s]

Epochs: 23560 | epoch avg. loss: 0.029 | test avg. loss: 5.110
Epochs: 23561 | epoch avg. loss: 0.019 | test avg. loss: 4.982
Epochs: 23562 | epoch avg. loss: 0.034 | test avg. loss: 5.143


 47%|████▋     | 23567/50000 [35:12<33:17, 13.23it/s]

Epochs: 23563 | epoch avg. loss: 0.040 | test avg. loss: 5.103
Epochs: 23564 | epoch avg. loss: 0.025 | test avg. loss: 5.076
Epochs: 23565 | epoch avg. loss: 0.042 | test avg. loss: 5.209
Epochs: 23566 | epoch avg. loss: 0.037 | test avg. loss: 5.055


 47%|████▋     | 23569/50000 [35:12<34:11, 12.88it/s]

Epochs: 23567 | epoch avg. loss: 0.012 | test avg. loss: 5.007
Epochs: 23568 | epoch avg. loss: 0.016 | test avg. loss: 5.135
Epochs: 23569 | epoch avg. loss: 0.015 | test avg. loss: 5.028


 47%|████▋     | 23573/50000 [35:13<38:38, 11.40it/s]

Epochs: 23570 | epoch avg. loss: 0.038 | test avg. loss: 5.042
Epochs: 23571 | epoch avg. loss: 0.022 | test avg. loss: 5.276
Epochs: 23572 | epoch avg. loss: 0.081 | test avg. loss: 5.053


 47%|████▋     | 23575/50000 [35:13<37:43, 11.67it/s]

Epochs: 23573 | epoch avg. loss: 0.031 | test avg. loss: 5.048
Epochs: 23574 | epoch avg. loss: 0.045 | test avg. loss: 5.210
Epochs: 23575 | epoch avg. loss: 0.050 | test avg. loss: 5.037


 47%|████▋     | 23579/50000 [35:13<37:10, 11.85it/s]

Epochs: 23576 | epoch avg. loss: 0.012 | test avg. loss: 4.886
Epochs: 23577 | epoch avg. loss: 0.012 | test avg. loss: 4.830
Epochs: 23578 | epoch avg. loss: 0.020 | test avg. loss: 4.949


 47%|████▋     | 23581/50000 [35:14<39:13, 11.23it/s]

Epochs: 23579 | epoch avg. loss: 0.009 | test avg. loss: 5.069
Epochs: 23580 | epoch avg. loss: 0.008 | test avg. loss: 5.051
Epochs: 23581 | epoch avg. loss: 0.007 | test avg. loss: 5.074


                                                     

Epochs: 23582 | epoch avg. loss: 0.008 | test avg. loss: 5.042
Epochs: 23583 | epoch avg. loss: 0.008 | test avg. loss: 4.960
Epochs: 23584 | epoch avg. loss: 0.012 | test avg. loss: 4.952


 47%|████▋     | 23587/50000 [35:14<32:58, 13.35it/s]

Epochs: 23585 | epoch avg. loss: 0.010 | test avg. loss: 5.093
Epochs: 23586 | epoch avg. loss: 0.012 | test avg. loss: 5.003
Epochs: 23587 | epoch avg. loss: 0.016 | test avg. loss: 5.042


 47%|████▋     | 23591/50000 [35:14<31:39, 13.90it/s]

Epochs: 23588 | epoch avg. loss: 0.027 | test avg. loss: 5.023
Epochs: 23589 | epoch avg. loss: 0.021 | test avg. loss: 4.961
Epochs: 23590 | epoch avg. loss: 0.060 | test avg. loss: 5.111
Epochs: 23591 | epoch avg. loss: 0.030 | test avg. loss: 5.083


 47%|████▋     | 23595/50000 [35:14<32:45, 13.44it/s]

Epochs: 23592 | epoch avg. loss: 0.012 | test avg. loss: 5.034
Epochs: 23593 | epoch avg. loss: 0.011 | test avg. loss: 5.112
Epochs: 23594 | epoch avg. loss: 0.009 | test avg. loss: 5.040


 47%|████▋     | 23597/50000 [35:15<32:04, 13.72it/s]

Epochs: 23595 | epoch avg. loss: 0.007 | test avg. loss: 5.020
Epochs: 23596 | epoch avg. loss: 0.006 | test avg. loss: 4.977
Epochs: 23597 | epoch avg. loss: 0.006 | test avg. loss: 4.937
Epochs: 23598 | epoch avg. loss: 0.008 | test avg. loss: 4.983


 47%|████▋     | 23599/50000 [35:15<30:53, 14.25it/s]

Epochs: 23599 | epoch avg. loss: 0.006 | test avg. loss: 5.043


 47%|████▋     | 23603/50000 [35:16<1:36:32,  4.56it/s]

Epochs: 23600 | epoch avg. loss: 0.008 | test avg. loss: 5.005
Epochs: 23601 | epoch avg. loss: 0.006 | test avg. loss: 5.017
Epochs: 23602 | epoch avg. loss: 0.008 | test avg. loss: 5.073


 47%|████▋     | 23605/50000 [35:17<1:19:36,  5.53it/s]

Epochs: 23603 | epoch avg. loss: 0.013 | test avg. loss: 4.985
Epochs: 23604 | epoch avg. loss: 0.022 | test avg. loss: 4.992
Epochs: 23605 | epoch avg. loss: 0.007 | test avg. loss: 4.999


 47%|████▋     | 23609/50000 [35:17<57:14,  7.68it/s]  

Epochs: 23606 | epoch avg. loss: 0.009 | test avg. loss: 4.939
Epochs: 23607 | epoch avg. loss: 0.007 | test avg. loss: 4.985
Epochs: 23608 | epoch avg. loss: 0.005 | test avg. loss: 4.988


 47%|████▋     | 23611/50000 [35:17<49:22,  8.91it/s]

Epochs: 23609 | epoch avg. loss: 0.008 | test avg. loss: 4.960
Epochs: 23610 | epoch avg. loss: 0.010 | test avg. loss: 5.036
Epochs: 23611 | epoch avg. loss: 0.020 | test avg. loss: 5.040


 47%|████▋     | 23615/50000 [35:17<43:18, 10.15it/s]

Epochs: 23612 | epoch avg. loss: 0.013 | test avg. loss: 4.934
Epochs: 23613 | epoch avg. loss: 0.025 | test avg. loss: 5.024
Epochs: 23614 | epoch avg. loss: 0.028 | test avg. loss: 5.080


 47%|████▋     | 23617/50000 [35:18<39:53, 11.02it/s]

Epochs: 23615 | epoch avg. loss: 0.023 | test avg. loss: 4.988
Epochs: 23616 | epoch avg. loss: 0.097 | test avg. loss: 5.176
Epochs: 23617 | epoch avg. loss: 0.080 | test avg. loss: 5.164
Epochs: 23618 | epoch avg. loss: 0.037 | test avg. loss: 4.946


 47%|████▋     | 23621/50000 [35:18<34:57, 12.58it/s]

Epochs: 23619 | epoch avg. loss: 0.041 | test avg. loss: 5.232
Epochs: 23620 | epoch avg. loss: 0.078 | test avg. loss: 5.013
Epochs: 23621 | epoch avg. loss: 0.031 | test avg. loss: 4.938
Epochs: 23622 | epoch avg. loss: 0.035 | test avg. loss: 5.053


 47%|████▋     | 23625/50000 [35:18<31:51, 13.80it/s]

Epochs: 23623 | epoch avg. loss: 0.020 | test avg. loss: 4.930
Epochs: 23624 | epoch avg. loss: 0.009 | test avg. loss: 4.915
Epochs: 23625 | epoch avg. loss: 0.009 | test avg. loss: 5.000


 47%|████▋     | 23629/50000 [35:18<33:01, 13.31it/s]

Epochs: 23626 | epoch avg. loss: 0.009 | test avg. loss: 5.000
Epochs: 23627 | epoch avg. loss: 0.014 | test avg. loss: 5.101
Epochs: 23628 | epoch avg. loss: 0.012 | test avg. loss: 5.062


 47%|████▋     | 23631/50000 [35:19<36:21, 12.09it/s]

Epochs: 23629 | epoch avg. loss: 0.014 | test avg. loss: 4.995
Epochs: 23630 | epoch avg. loss: 0.037 | test avg. loss: 5.117
Epochs: 23631 | epoch avg. loss: 0.027 | test avg. loss: 5.075


 47%|████▋     | 23635/50000 [35:19<33:04, 13.29it/s]

Epochs: 23632 | epoch avg. loss: 0.021 | test avg. loss: 5.067
Epochs: 23633 | epoch avg. loss: 0.016 | test avg. loss: 5.158
Epochs: 23634 | epoch avg. loss: 0.027 | test avg. loss: 4.929
Epochs: 23635 | epoch avg. loss: 0.014 | test avg. loss: 4.867


 47%|████▋     | 23639/50000 [35:19<30:41, 14.31it/s]

Epochs: 23636 | epoch avg. loss: 0.012 | test avg. loss: 5.032
Epochs: 23637 | epoch avg. loss: 0.047 | test avg. loss: 5.007
Epochs: 23638 | epoch avg. loss: 0.011 | test avg. loss: 4.999
Epochs: 23639 | epoch avg. loss: 0.015 | test avg. loss: 5.085


 47%|████▋     | 23643/50000 [35:19<33:24, 13.15it/s]

Epochs: 23640 | epoch avg. loss: 0.010 | test avg. loss: 4.996
Epochs: 23641 | epoch avg. loss: 0.019 | test avg. loss: 4.973
Epochs: 23642 | epoch avg. loss: 0.014 | test avg. loss: 5.040


 47%|████▋     | 23647/50000 [35:20<31:06, 14.12it/s]

Epochs: 23643 | epoch avg. loss: 0.009 | test avg. loss: 4.967
Epochs: 23644 | epoch avg. loss: 0.041 | test avg. loss: 5.029
Epochs: 23645 | epoch avg. loss: 0.019 | test avg. loss: 5.111
Epochs: 23646 | epoch avg. loss: 0.028 | test avg. loss: 4.935


 47%|████▋     | 23651/50000 [35:20<29:32, 14.87it/s]

Epochs: 23647 | epoch avg. loss: 0.013 | test avg. loss: 4.985
Epochs: 23648 | epoch avg. loss: 0.021 | test avg. loss: 5.140
Epochs: 23649 | epoch avg. loss: 0.038 | test avg. loss: 4.943
Epochs: 23650 | epoch avg. loss: 0.041 | test avg. loss: 4.948


 47%|████▋     | 23655/50000 [35:20<28:19, 15.50it/s]

Epochs: 23651 | epoch avg. loss: 0.014 | test avg. loss: 5.009
Epochs: 23652 | epoch avg. loss: 0.008 | test avg. loss: 4.958
Epochs: 23653 | epoch avg. loss: 0.016 | test avg. loss: 4.977
Epochs: 23654 | epoch avg. loss: 0.011 | test avg. loss: 5.027


 47%|████▋     | 23657/50000 [35:20<29:28, 14.89it/s]

Epochs: 23655 | epoch avg. loss: 0.008 | test avg. loss: 5.079
Epochs: 23656 | epoch avg. loss: 0.018 | test avg. loss: 4.994
Epochs: 23657 | epoch avg. loss: 0.018 | test avg. loss: 5.003


 47%|████▋     | 23661/50000 [35:21<30:31, 14.38it/s]

Epochs: 23658 | epoch avg. loss: 0.016 | test avg. loss: 5.031
Epochs: 23659 | epoch avg. loss: 0.006 | test avg. loss: 5.013
Epochs: 23660 | epoch avg. loss: 0.007 | test avg. loss: 5.028
Epochs: 23661 | epoch avg. loss: 0.012 | test avg. loss: 4.867


 47%|████▋     | 23665/50000 [35:21<29:09, 15.05it/s]

Epochs: 23662 | epoch avg. loss: 0.041 | test avg. loss: 4.909
Epochs: 23663 | epoch avg. loss: 0.019 | test avg. loss: 5.088
Epochs: 23664 | epoch avg. loss: 0.030 | test avg. loss: 5.094
Epochs: 23665 | epoch avg. loss: 0.016 | test avg. loss: 4.967


 47%|████▋     | 23669/50000 [35:21<28:30, 15.40it/s]

Epochs: 23666 | epoch avg. loss: 0.013 | test avg. loss: 4.983
Epochs: 23667 | epoch avg. loss: 0.007 | test avg. loss: 5.022
Epochs: 23668 | epoch avg. loss: 0.007 | test avg. loss: 4.974
Epochs: 23669 | epoch avg. loss: 0.006 | test avg. loss: 4.992


 47%|████▋     | 23673/50000 [35:21<31:09, 14.08it/s]

Epochs: 23670 | epoch avg. loss: 0.006 | test avg. loss: 5.002
Epochs: 23671 | epoch avg. loss: 0.005 | test avg. loss: 5.051
Epochs: 23672 | epoch avg. loss: 0.015 | test avg. loss: 5.017


                                                     

Epochs: 23673 | epoch avg. loss: 0.006 | test avg. loss: 4.961
Epochs: 23674 | epoch avg. loss: 0.008 | test avg. loss: 5.006
Epochs: 23675 | epoch avg. loss: 0.019 | test avg. loss: 5.001


 47%|████▋     | 23679/50000 [35:22<30:30, 14.38it/s]

Epochs: 23676 | epoch avg. loss: 0.016 | test avg. loss: 4.904
Epochs: 23677 | epoch avg. loss: 0.018 | test avg. loss: 4.933
Epochs: 23678 | epoch avg. loss: 0.013 | test avg. loss: 5.150
Epochs: 23679 | epoch avg. loss: 0.039 | test avg. loss: 5.082


                                                     

Epochs: 23680 | epoch avg. loss: 0.011 | test avg. loss: 5.052
Epochs: 23681 | epoch avg. loss: 0.034 | test avg. loss: 5.020
Epochs: 23682 | epoch avg. loss: 0.008 | test avg. loss: 4.978


 47%|████▋     | 23685/50000 [35:22<29:39, 14.78it/s]

Epochs: 23683 | epoch avg. loss: 0.006 | test avg. loss: 4.920
Epochs: 23684 | epoch avg. loss: 0.007 | test avg. loss: 4.952
Epochs: 23685 | epoch avg. loss: 0.006 | test avg. loss: 5.033


 47%|████▋     | 23689/50000 [35:23<33:46, 12.98it/s]

Epochs: 23686 | epoch avg. loss: 0.005 | test avg. loss: 5.058
Epochs: 23687 | epoch avg. loss: 0.005 | test avg. loss: 5.032
Epochs: 23688 | epoch avg. loss: 0.005 | test avg. loss: 5.003


 47%|████▋     | 23693/50000 [35:23<30:54, 14.18it/s]

Epochs: 23689 | epoch avg. loss: 0.004 | test avg. loss: 5.006
Epochs: 23690 | epoch avg. loss: 0.007 | test avg. loss: 5.028
Epochs: 23691 | epoch avg. loss: 0.006 | test avg. loss: 4.993
Epochs: 23692 | epoch avg. loss: 0.013 | test avg. loss: 5.036


 47%|████▋     | 23697/50000 [35:23<29:08, 15.04it/s]

Epochs: 23693 | epoch avg. loss: 0.006 | test avg. loss: 5.058
Epochs: 23694 | epoch avg. loss: 0.007 | test avg. loss: 4.959
Epochs: 23695 | epoch avg. loss: 0.006 | test avg. loss: 4.907
Epochs: 23696 | epoch avg. loss: 0.008 | test avg. loss: 5.036


 47%|████▋     | 23699/50000 [35:23<29:58, 14.62it/s]

Epochs: 23697 | epoch avg. loss: 0.040 | test avg. loss: 5.016
Epochs: 23698 | epoch avg. loss: 0.012 | test avg. loss: 4.963
Epochs: 23699 | epoch avg. loss: 0.033 | test avg. loss: 5.039


 47%|████▋     | 23703/50000 [35:25<1:45:23,  4.16it/s]

Epochs: 23700 | epoch avg. loss: 0.007 | test avg. loss: 5.088
Epochs: 23701 | epoch avg. loss: 0.007 | test avg. loss: 5.001
Epochs: 23702 | epoch avg. loss: 0.014 | test avg. loss: 5.023


 47%|████▋     | 23705/50000 [35:25<1:22:21,  5.32it/s]

Epochs: 23703 | epoch avg. loss: 0.011 | test avg. loss: 5.069
Epochs: 23704 | epoch avg. loss: 0.011 | test avg. loss: 5.009
Epochs: 23705 | epoch avg. loss: 0.024 | test avg. loss: 5.011


 47%|████▋     | 23709/50000 [35:26<58:18,  7.52it/s]  

Epochs: 23706 | epoch avg. loss: 0.060 | test avg. loss: 4.997
Epochs: 23707 | epoch avg. loss: 0.022 | test avg. loss: 5.054
Epochs: 23708 | epoch avg. loss: 0.026 | test avg. loss: 4.939


 47%|████▋     | 23711/50000 [35:26<52:37,  8.33it/s]

Epochs: 23709 | epoch avg. loss: 0.099 | test avg. loss: 4.990
Epochs: 23710 | epoch avg. loss: 0.025 | test avg. loss: 5.267
Epochs: 23711 | epoch avg. loss: 0.095 | test avg. loss: 5.078


 47%|████▋     | 23715/50000 [35:26<44:56,  9.75it/s]

Epochs: 23712 | epoch avg. loss: 0.016 | test avg. loss: 4.928
Epochs: 23713 | epoch avg. loss: 0.021 | test avg. loss: 5.051
Epochs: 23714 | epoch avg. loss: 0.020 | test avg. loss: 5.193


 47%|████▋     | 23717/50000 [35:26<44:32,  9.83it/s]

Epochs: 23715 | epoch avg. loss: 0.027 | test avg. loss: 5.084
Epochs: 23716 | epoch avg. loss: 0.036 | test avg. loss: 5.018
Epochs: 23717 | epoch avg. loss: 0.018 | test avg. loss: 5.113


 47%|████▋     | 23721/50000 [35:27<40:13, 10.89it/s]

Epochs: 23718 | epoch avg. loss: 0.070 | test avg. loss: 4.806
Epochs: 23719 | epoch avg. loss: 0.024 | test avg. loss: 4.855
Epochs: 23720 | epoch avg. loss: 0.063 | test avg. loss: 5.269


 47%|████▋     | 23723/50000 [35:27<38:32, 11.36it/s]

Epochs: 23721 | epoch avg. loss: 0.122 | test avg. loss: 5.158
Epochs: 23722 | epoch avg. loss: 0.134 | test avg. loss: 5.144
Epochs: 23723 | epoch avg. loss: 0.135 | test avg. loss: 5.167


 47%|████▋     | 23727/50000 [35:27<34:47, 12.59it/s]

Epochs: 23724 | epoch avg. loss: 0.182 | test avg. loss: 4.983
Epochs: 23725 | epoch avg. loss: 0.048 | test avg. loss: 4.893
Epochs: 23726 | epoch avg. loss: 0.025 | test avg. loss: 5.160


 47%|████▋     | 23729/50000 [35:27<35:13, 12.43it/s]

Epochs: 23727 | epoch avg. loss: 0.027 | test avg. loss: 5.058
Epochs: 23728 | epoch avg. loss: 0.037 | test avg. loss: 5.054
Epochs: 23729 | epoch avg. loss: 0.017 | test avg. loss: 5.036


 47%|████▋     | 23733/50000 [35:28<37:50, 11.57it/s]

Epochs: 23730 | epoch avg. loss: 0.016 | test avg. loss: 4.898
Epochs: 23731 | epoch avg. loss: 0.069 | test avg. loss: 4.951
Epochs: 23732 | epoch avg. loss: 0.049 | test avg. loss: 5.249


 47%|████▋     | 23737/50000 [35:28<33:44, 12.97it/s]

Epochs: 23733 | epoch avg. loss: 0.100 | test avg. loss: 4.911
Epochs: 23734 | epoch avg. loss: 0.136 | test avg. loss: 4.873
Epochs: 23735 | epoch avg. loss: 0.121 | test avg. loss: 5.478
Epochs: 23736 | epoch avg. loss: 0.416 | test avg. loss: 5.030


 47%|████▋     | 23739/50000 [35:28<33:29, 13.07it/s]

Epochs: 23737 | epoch avg. loss: 0.167 | test avg. loss: 5.250
Epochs: 23738 | epoch avg. loss: 0.180 | test avg. loss: 5.729
Epochs: 23739 | epoch avg. loss: 0.297 | test avg. loss: 5.217
Epochs: 23740 | epoch avg. loss: 0.201 | test avg. loss: 5.153


 47%|████▋     | 23743/50000 [35:28<30:55, 14.15it/s]

Epochs: 23741 | epoch avg. loss: 0.210 | test avg. loss: 5.617
Epochs: 23742 | epoch avg. loss: 0.422 | test avg. loss: 5.292
Epochs: 23743 | epoch avg. loss: 0.094 | test avg. loss: 5.400


 47%|████▋     | 23747/50000 [35:29<32:02, 13.66it/s]

Epochs: 23744 | epoch avg. loss: 0.208 | test avg. loss: 5.520
Epochs: 23745 | epoch avg. loss: 0.177 | test avg. loss: 5.062
Epochs: 23746 | epoch avg. loss: 0.036 | test avg. loss: 4.854


 47%|████▋     | 23749/50000 [35:29<32:58, 13.27it/s]

Epochs: 23747 | epoch avg. loss: 0.041 | test avg. loss: 5.050
Epochs: 23748 | epoch avg. loss: 0.066 | test avg. loss: 5.034
Epochs: 23749 | epoch avg. loss: 0.125 | test avg. loss: 5.211


 48%|████▊     | 23753/50000 [35:29<33:58, 12.87it/s]

Epochs: 23750 | epoch avg. loss: 0.072 | test avg. loss: 5.784
Epochs: 23751 | epoch avg. loss: 0.251 | test avg. loss: 5.196
Epochs: 23752 | epoch avg. loss: 0.129 | test avg. loss: 5.205


 48%|████▊     | 23755/50000 [35:29<34:15, 12.77it/s]

Epochs: 23753 | epoch avg. loss: 0.073 | test avg. loss: 5.625
Epochs: 23754 | epoch avg. loss: 0.126 | test avg. loss: 5.179
Epochs: 23755 | epoch avg. loss: 0.209 | test avg. loss: 5.177


 48%|████▊     | 23759/50000 [35:30<35:18, 12.39it/s]

Epochs: 23756 | epoch avg. loss: 0.138 | test avg. loss: 5.366
Epochs: 23757 | epoch avg. loss: 0.181 | test avg. loss: 5.066
Epochs: 23758 | epoch avg. loss: 0.180 | test avg. loss: 5.153


 48%|████▊     | 23761/50000 [35:30<35:35, 12.29it/s]

Epochs: 23759 | epoch avg. loss: 0.277 | test avg. loss: 5.549
Epochs: 23760 | epoch avg. loss: 0.325 | test avg. loss: 5.630
Epochs: 23761 | epoch avg. loss: 0.165 | test avg. loss: 5.783


 48%|████▊     | 23765/50000 [35:30<33:24, 13.09it/s]

Epochs: 23762 | epoch avg. loss: 0.799 | test avg. loss: 5.888
Epochs: 23763 | epoch avg. loss: 0.310 | test avg. loss: 6.220
Epochs: 23764 | epoch avg. loss: 0.230 | test avg. loss: 5.710


 48%|████▊     | 23767/50000 [35:30<34:05, 12.82it/s]

Epochs: 23765 | epoch avg. loss: 0.783 | test avg. loss: 5.591
Epochs: 23766 | epoch avg. loss: 0.670 | test avg. loss: 5.973
Epochs: 23767 | epoch avg. loss: 0.470 | test avg. loss: 5.771


 48%|████▊     | 23771/50000 [35:31<37:16, 11.73it/s]

Epochs: 23768 | epoch avg. loss: 0.822 | test avg. loss: 6.000
Epochs: 23769 | epoch avg. loss: 0.400 | test avg. loss: 6.343
Epochs: 23770 | epoch avg. loss: 0.357 | test avg. loss: 5.835


 48%|████▊     | 23773/50000 [35:31<36:34, 11.95it/s]

Epochs: 23771 | epoch avg. loss: 0.910 | test avg. loss: 6.061
Epochs: 23772 | epoch avg. loss: 1.001 | test avg. loss: 5.511
Epochs: 23773 | epoch avg. loss: 0.611 | test avg. loss: 7.073


 48%|████▊     | 23777/50000 [35:31<34:10, 12.79it/s]

Epochs: 23774 | epoch avg. loss: 1.731 | test avg. loss: 7.469
Epochs: 23775 | epoch avg. loss: 1.632 | test avg. loss: 6.022
Epochs: 23776 | epoch avg. loss: 0.710 | test avg. loss: 5.660


 48%|████▊     | 23779/50000 [35:31<33:07, 13.19it/s]

Epochs: 23777 | epoch avg. loss: 0.669 | test avg. loss: 6.425
Epochs: 23778 | epoch avg. loss: 0.819 | test avg. loss: 5.343
Epochs: 23779 | epoch avg. loss: 0.970 | test avg. loss: 5.054


 48%|████▊     | 23783/50000 [35:32<36:34, 11.95it/s]

Epochs: 23780 | epoch avg. loss: 0.379 | test avg. loss: 4.472
Epochs: 23781 | epoch avg. loss: 0.467 | test avg. loss: 4.531
Epochs: 23782 | epoch avg. loss: 0.378 | test avg. loss: 4.805


 48%|████▊     | 23785/50000 [35:32<35:53, 12.17it/s]

Epochs: 23783 | epoch avg. loss: 0.286 | test avg. loss: 4.870
Epochs: 23784 | epoch avg. loss: 0.299 | test avg. loss: 5.156
Epochs: 23785 | epoch avg. loss: 0.273 | test avg. loss: 4.705


 48%|████▊     | 23789/50000 [35:32<35:27, 12.32it/s]

Epochs: 23786 | epoch avg. loss: 0.327 | test avg. loss: 4.889
Epochs: 23787 | epoch avg. loss: 0.702 | test avg. loss: 4.531
Epochs: 23788 | epoch avg. loss: 0.354 | test avg. loss: 4.728


 48%|████▊     | 23791/50000 [35:32<35:17, 12.38it/s]

Epochs: 23789 | epoch avg. loss: 0.228 | test avg. loss: 5.340
Epochs: 23790 | epoch avg. loss: 0.267 | test avg. loss: 4.826
Epochs: 23791 | epoch avg. loss: 0.286 | test avg. loss: 4.976


 48%|████▊     | 23795/50000 [35:33<36:13, 12.06it/s]

Epochs: 23792 | epoch avg. loss: 0.168 | test avg. loss: 4.432
Epochs: 23793 | epoch avg. loss: 0.159 | test avg. loss: 4.648
Epochs: 23794 | epoch avg. loss: 0.205 | test avg. loss: 4.456


 48%|████▊     | 23799/50000 [35:33<32:11, 13.57it/s]

Epochs: 23795 | epoch avg. loss: 0.150 | test avg. loss: 4.573
Epochs: 23796 | epoch avg. loss: 0.106 | test avg. loss: 4.945
Epochs: 23797 | epoch avg. loss: 0.121 | test avg. loss: 4.804
Epochs: 23798 | epoch avg. loss: 0.206 | test avg. loss: 5.175


 48%|████▊     | 23799/50000 [35:33<32:11, 13.57it/s]

Epochs: 23799 | epoch avg. loss: 0.205 | test avg. loss: 4.817


 48%|████▊     | 23803/50000 [35:35<1:39:06,  4.41it/s]

Epochs: 23800 | epoch avg. loss: 0.163 | test avg. loss: 5.005
Epochs: 23801 | epoch avg. loss: 0.086 | test avg. loss: 5.103
Epochs: 23802 | epoch avg. loss: 0.073 | test avg. loss: 5.012


 48%|████▊     | 23805/50000 [35:35<1:19:52,  5.47it/s]

Epochs: 23803 | epoch avg. loss: 0.071 | test avg. loss: 5.179
Epochs: 23804 | epoch avg. loss: 0.055 | test avg. loss: 4.825
Epochs: 23805 | epoch avg. loss: 0.058 | test avg. loss: 4.986


 48%|████▊     | 23809/50000 [35:35<56:15,  7.76it/s]  

Epochs: 23806 | epoch avg. loss: 0.052 | test avg. loss: 4.867
Epochs: 23807 | epoch avg. loss: 0.013 | test avg. loss: 4.859
Epochs: 23808 | epoch avg. loss: 0.016 | test avg. loss: 4.970


 48%|████▊     | 23811/50000 [35:35<50:18,  8.68it/s]

Epochs: 23809 | epoch avg. loss: 0.025 | test avg. loss: 4.853
Epochs: 23810 | epoch avg. loss: 0.011 | test avg. loss: 4.943
Epochs: 23811 | epoch avg. loss: 0.013 | test avg. loss: 4.841


 48%|████▊     | 23815/50000 [35:36<42:52, 10.18it/s]

Epochs: 23812 | epoch avg. loss: 0.015 | test avg. loss: 4.911
Epochs: 23813 | epoch avg. loss: 0.010 | test avg. loss: 4.929
Epochs: 23814 | epoch avg. loss: 0.008 | test avg. loss: 4.943


 48%|████▊     | 23817/50000 [35:36<39:34, 11.03it/s]

Epochs: 23815 | epoch avg. loss: 0.008 | test avg. loss: 4.967
Epochs: 23816 | epoch avg. loss: 0.007 | test avg. loss: 4.945
Epochs: 23817 | epoch avg. loss: 0.007 | test avg. loss: 4.928


 48%|████▊     | 23821/50000 [35:36<33:40, 12.96it/s]

Epochs: 23818 | epoch avg. loss: 0.007 | test avg. loss: 4.855
Epochs: 23819 | epoch avg. loss: 0.011 | test avg. loss: 4.987
Epochs: 23820 | epoch avg. loss: 0.023 | test avg. loss: 4.868
Epochs: 23821 | epoch avg. loss: 0.011 | test avg. loss: 4.962




Epochs: 23822 | epoch avg. loss: 0.009 | test avg. loss: 4.928
Epochs: 23823 | epoch avg. loss: 0.009 | test avg. loss: 4.975
Epochs: 23824 | epoch avg. loss: 0.006 | test avg. loss: 4.915


 48%|████▊     | 23827/50000 [35:36<32:05, 13.59it/s]

Epochs: 23825 | epoch avg. loss: 0.007 | test avg. loss: 4.906
Epochs: 23826 | epoch avg. loss: 0.005 | test avg. loss: 4.903
Epochs: 23827 | epoch avg. loss: 0.005 | test avg. loss: 4.902


 48%|████▊     | 23831/50000 [35:37<32:36, 13.37it/s]

Epochs: 23828 | epoch avg. loss: 0.005 | test avg. loss: 4.907
Epochs: 23829 | epoch avg. loss: 0.007 | test avg. loss: 4.938
Epochs: 23830 | epoch avg. loss: 0.005 | test avg. loss: 4.911


 48%|████▊     | 23835/50000 [35:37<30:09, 14.46it/s]

Epochs: 23831 | epoch avg. loss: 0.004 | test avg. loss: 4.906
Epochs: 23832 | epoch avg. loss: 0.005 | test avg. loss: 4.900
Epochs: 23833 | epoch avg. loss: 0.009 | test avg. loss: 4.874
Epochs: 23834 | epoch avg. loss: 0.014 | test avg. loss: 4.956


 48%|████▊     | 23839/50000 [35:37<29:22, 14.85it/s]

Epochs: 23835 | epoch avg. loss: 0.011 | test avg. loss: 4.927
Epochs: 23836 | epoch avg. loss: 0.007 | test avg. loss: 4.879
Epochs: 23837 | epoch avg. loss: 0.007 | test avg. loss: 4.890
Epochs: 23838 | epoch avg. loss: 0.006 | test avg. loss: 4.858


 48%|████▊     | 23841/50000 [35:37<29:08, 14.96it/s]

Epochs: 23839 | epoch avg. loss: 0.005 | test avg. loss: 4.853
Epochs: 23840 | epoch avg. loss: 0.006 | test avg. loss: 4.922
Epochs: 23841 | epoch avg. loss: 0.008 | test avg. loss: 4.943


 48%|████▊     | 23845/50000 [35:38<29:23, 14.83it/s]

Epochs: 23842 | epoch avg. loss: 0.007 | test avg. loss: 4.876
Epochs: 23843 | epoch avg. loss: 0.008 | test avg. loss: 4.936
Epochs: 23844 | epoch avg. loss: 0.008 | test avg. loss: 4.856


 48%|████▊     | 23847/50000 [35:38<30:38, 14.23it/s]

Epochs: 23845 | epoch avg. loss: 0.006 | test avg. loss: 4.927
Epochs: 23846 | epoch avg. loss: 0.022 | test avg. loss: 4.924
Epochs: 23847 | epoch avg. loss: 0.009 | test avg. loss: 4.901


 48%|████▊     | 23851/50000 [35:38<30:55, 14.09it/s]

Epochs: 23848 | epoch avg. loss: 0.009 | test avg. loss: 4.950
Epochs: 23849 | epoch avg. loss: 0.006 | test avg. loss: 4.886
Epochs: 23850 | epoch avg. loss: 0.010 | test avg. loss: 4.964


 48%|████▊     | 23853/50000 [35:38<31:59, 13.62it/s]

Epochs: 23851 | epoch avg. loss: 0.010 | test avg. loss: 4.873
Epochs: 23852 | epoch avg. loss: 0.018 | test avg. loss: 4.867
Epochs: 23853 | epoch avg. loss: 0.011 | test avg. loss: 4.882


 48%|████▊     | 23857/50000 [35:39<33:32, 12.99it/s]

Epochs: 23854 | epoch avg. loss: 0.011 | test avg. loss: 4.823
Epochs: 23855 | epoch avg. loss: 0.006 | test avg. loss: 4.924
Epochs: 23856 | epoch avg. loss: 0.013 | test avg. loss: 4.852


 48%|████▊     | 23859/50000 [35:39<34:22, 12.68it/s]

Epochs: 23857 | epoch avg. loss: 0.011 | test avg. loss: 4.927
Epochs: 23858 | epoch avg. loss: 0.006 | test avg. loss: 4.914
Epochs: 23859 | epoch avg. loss: 0.009 | test avg. loss: 4.905


 48%|████▊     | 23863/50000 [35:39<35:28, 12.28it/s]

Epochs: 23860 | epoch avg. loss: 0.007 | test avg. loss: 4.940
Epochs: 23861 | epoch avg. loss: 0.007 | test avg. loss: 4.880
Epochs: 23862 | epoch avg. loss: 0.007 | test avg. loss: 4.899


 48%|████▊     | 23865/50000 [35:39<35:56, 12.12it/s]

Epochs: 23863 | epoch avg. loss: 0.005 | test avg. loss: 4.880
Epochs: 23864 | epoch avg. loss: 0.004 | test avg. loss: 4.880
Epochs: 23865 | epoch avg. loss: 0.004 | test avg. loss: 4.868


 48%|████▊     | 23869/50000 [35:40<34:44, 12.54it/s]

Epochs: 23866 | epoch avg. loss: 0.008 | test avg. loss: 4.844
Epochs: 23867 | epoch avg. loss: 0.014 | test avg. loss: 4.898
Epochs: 23868 | epoch avg. loss: 0.004 | test avg. loss: 4.859
Epochs: 23869 | epoch avg. loss: 0.005 | test avg. loss: 4.931


 48%|████▊     | 23873/50000 [35:40<32:36, 13.35it/s]

Epochs: 23870 | epoch avg. loss: 0.014 | test avg. loss: 4.875
Epochs: 23871 | epoch avg. loss: 0.007 | test avg. loss: 4.836
Epochs: 23872 | epoch avg. loss: 0.010 | test avg. loss: 4.943
Epochs: 23873 | epoch avg. loss: 0.010 | test avg. loss: 4.851


 48%|████▊     | 23877/50000 [35:40<30:55, 14.08it/s]

Epochs: 23874 | epoch avg. loss: 0.023 | test avg. loss: 4.920
Epochs: 23875 | epoch avg. loss: 0.008 | test avg. loss: 4.923
Epochs: 23876 | epoch avg. loss: 0.008 | test avg. loss: 4.858
Epochs: 23877 | epoch avg. loss: 0.008 | test avg. loss: 4.918


 48%|████▊     | 23881/50000 [35:40<29:23, 14.81it/s]

Epochs: 23878 | epoch avg. loss: 0.022 | test avg. loss: 4.791
Epochs: 23879 | epoch avg. loss: 0.025 | test avg. loss: 4.827
Epochs: 23880 | epoch avg. loss: 0.011 | test avg. loss: 4.951
Epochs: 23881 | epoch avg. loss: 0.011 | test avg. loss: 4.893


 48%|████▊     | 23885/50000 [35:41<28:58, 15.02it/s]

Epochs: 23882 | epoch avg. loss: 0.014 | test avg. loss: 4.948
Epochs: 23883 | epoch avg. loss: 0.011 | test avg. loss: 4.988
Epochs: 23884 | epoch avg. loss: 0.019 | test avg. loss: 4.807
Epochs: 23885 | epoch avg. loss: 0.018 | test avg. loss: 4.822


                                                     

Epochs: 23886 | epoch avg. loss: 0.016 | test avg. loss: 4.847
Epochs: 23887 | epoch avg. loss: 0.010 | test avg. loss: 4.823
Epochs: 23888 | epoch avg. loss: 0.018 | test avg. loss: 4.954


 48%|████▊     | 23891/50000 [35:41<30:03, 14.47it/s]

Epochs: 23889 | epoch avg. loss: 0.016 | test avg. loss: 4.888
Epochs: 23890 | epoch avg. loss: 0.012 | test avg. loss: 4.919
Epochs: 23891 | epoch avg. loss: 0.015 | test avg. loss: 4.959


 48%|████▊     | 23895/50000 [35:41<32:33, 13.37it/s]

Epochs: 23892 | epoch avg. loss: 0.012 | test avg. loss: 4.874
Epochs: 23893 | epoch avg. loss: 0.012 | test avg. loss: 4.898
Epochs: 23894 | epoch avg. loss: 0.007 | test avg. loss: 4.932


 48%|████▊     | 23897/50000 [35:42<32:23, 13.43it/s]

Epochs: 23895 | epoch avg. loss: 0.009 | test avg. loss: 4.823
Epochs: 23896 | epoch avg. loss: 0.020 | test avg. loss: 4.922
Epochs: 23897 | epoch avg. loss: 0.026 | test avg. loss: 4.852


 48%|████▊     | 23899/50000 [35:42<33:13, 13.09it/s]

Epochs: 23898 | epoch avg. loss: 0.026 | test avg. loss: 4.852
Epochs: 23899 | epoch avg. loss: 0.044 | test avg. loss: 5.118


 48%|████▊     | 23903/50000 [35:44<1:45:35,  4.12it/s]

Epochs: 23900 | epoch avg. loss: 0.074 | test avg. loss: 4.885
Epochs: 23901 | epoch avg. loss: 0.057 | test avg. loss: 4.910
Epochs: 23902 | epoch avg. loss: 0.040 | test avg. loss: 4.870


 48%|████▊     | 23905/50000 [35:44<1:24:38,  5.14it/s]

Epochs: 23903 | epoch avg. loss: 0.033 | test avg. loss: 4.729
Epochs: 23904 | epoch avg. loss: 0.057 | test avg. loss: 4.975
Epochs: 23905 | epoch avg. loss: 0.051 | test avg. loss: 4.804


 48%|████▊     | 23909/50000 [35:44<58:35,  7.42it/s]  

Epochs: 23906 | epoch avg. loss: 0.074 | test avg. loss: 4.973
Epochs: 23907 | epoch avg. loss: 0.041 | test avg. loss: 5.102
Epochs: 23908 | epoch avg. loss: 0.048 | test avg. loss: 4.830


 48%|████▊     | 23911/50000 [35:44<50:43,  8.57it/s]

Epochs: 23909 | epoch avg. loss: 0.106 | test avg. loss: 5.034
Epochs: 23910 | epoch avg. loss: 0.083 | test avg. loss: 4.811
Epochs: 23911 | epoch avg. loss: 0.101 | test avg. loss: 4.832


 48%|████▊     | 23915/50000 [35:44<39:46, 10.93it/s]

Epochs: 23912 | epoch avg. loss: 0.093 | test avg. loss: 5.187
Epochs: 23913 | epoch avg. loss: 0.093 | test avg. loss: 4.780
Epochs: 23914 | epoch avg. loss: 0.066 | test avg. loss: 4.756


 48%|████▊     | 23917/50000 [35:45<37:37, 11.55it/s]

Epochs: 23915 | epoch avg. loss: 0.024 | test avg. loss: 4.927
Epochs: 23916 | epoch avg. loss: 0.026 | test avg. loss: 4.864
Epochs: 23917 | epoch avg. loss: 0.040 | test avg. loss: 4.990


 48%|████▊     | 23921/50000 [35:45<33:58, 12.80it/s]

Epochs: 23918 | epoch avg. loss: 0.026 | test avg. loss: 5.031
Epochs: 23919 | epoch avg. loss: 0.024 | test avg. loss: 4.886
Epochs: 23920 | epoch avg. loss: 0.047 | test avg. loss: 4.949


 48%|████▊     | 23923/50000 [35:45<33:57, 12.80it/s]

Epochs: 23921 | epoch avg. loss: 0.031 | test avg. loss: 4.930
Epochs: 23922 | epoch avg. loss: 0.018 | test avg. loss: 4.836
Epochs: 23923 | epoch avg. loss: 0.038 | test avg. loss: 4.904


 48%|████▊     | 23927/50000 [35:45<34:53, 12.46it/s]

Epochs: 23924 | epoch avg. loss: 0.023 | test avg. loss: 4.888
Epochs: 23925 | epoch avg. loss: 0.020 | test avg. loss: 4.785
Epochs: 23926 | epoch avg. loss: 0.053 | test avg. loss: 4.905


 48%|████▊     | 23929/50000 [35:46<34:44, 12.51it/s]

Epochs: 23927 | epoch avg. loss: 0.031 | test avg. loss: 4.797
Epochs: 23928 | epoch avg. loss: 0.018 | test avg. loss: 4.836
Epochs: 23929 | epoch avg. loss: 0.011 | test avg. loss: 4.886


 48%|████▊     | 23933/50000 [35:46<34:02, 12.76it/s]

Epochs: 23930 | epoch avg. loss: 0.009 | test avg. loss: 4.865
Epochs: 23931 | epoch avg. loss: 0.011 | test avg. loss: 5.046
Epochs: 23932 | epoch avg. loss: 0.040 | test avg. loss: 4.874


 48%|████▊     | 23935/50000 [35:46<33:09, 13.10it/s]

Epochs: 23933 | epoch avg. loss: 0.023 | test avg. loss: 4.950
Epochs: 23934 | epoch avg. loss: 0.017 | test avg. loss: 4.984
Epochs: 23935 | epoch avg. loss: 0.019 | test avg. loss: 4.859


 48%|████▊     | 23939/50000 [35:46<33:21, 13.02it/s]

Epochs: 23936 | epoch avg. loss: 0.050 | test avg. loss: 4.940
Epochs: 23937 | epoch avg. loss: 0.023 | test avg. loss: 4.847
Epochs: 23938 | epoch avg. loss: 0.011 | test avg. loss: 4.793


 48%|████▊     | 23941/50000 [35:46<33:24, 13.00it/s]

Epochs: 23939 | epoch avg. loss: 0.010 | test avg. loss: 4.893
Epochs: 23940 | epoch avg. loss: 0.007 | test avg. loss: 4.927
Epochs: 23941 | epoch avg. loss: 0.006 | test avg. loss: 5.012


 48%|████▊     | 23945/50000 [35:47<33:09, 13.10it/s]

Epochs: 23942 | epoch avg. loss: 0.009 | test avg. loss: 4.967
Epochs: 23943 | epoch avg. loss: 0.006 | test avg. loss: 4.890
Epochs: 23944 | epoch avg. loss: 0.006 | test avg. loss: 4.858


 48%|████▊     | 23947/50000 [35:47<33:47, 12.85it/s]

Epochs: 23945 | epoch avg. loss: 0.005 | test avg. loss: 4.863
Epochs: 23946 | epoch avg. loss: 0.006 | test avg. loss: 4.855
Epochs: 23947 | epoch avg. loss: 0.014 | test avg. loss: 4.951


 48%|████▊     | 23951/50000 [35:47<35:10, 12.34it/s]

Epochs: 23948 | epoch avg. loss: 0.008 | test avg. loss: 4.909
Epochs: 23949 | epoch avg. loss: 0.015 | test avg. loss: 4.899
Epochs: 23950 | epoch avg. loss: 0.009 | test avg. loss: 4.973


 48%|████▊     | 23953/50000 [35:47<35:51, 12.11it/s]

Epochs: 23951 | epoch avg. loss: 0.014 | test avg. loss: 4.824
Epochs: 23952 | epoch avg. loss: 0.019 | test avg. loss: 4.902
Epochs: 23953 | epoch avg. loss: 0.020 | test avg. loss: 4.856


 48%|████▊     | 23957/50000 [35:48<35:34, 12.20it/s]

Epochs: 23954 | epoch avg. loss: 0.011 | test avg. loss: 4.791
Epochs: 23955 | epoch avg. loss: 0.023 | test avg. loss: 4.926
Epochs: 23956 | epoch avg. loss: 0.013 | test avg. loss: 4.927


 48%|████▊     | 23959/50000 [35:48<35:59, 12.06it/s]

Epochs: 23957 | epoch avg. loss: 0.006 | test avg. loss: 4.942
Epochs: 23958 | epoch avg. loss: 0.006 | test avg. loss: 4.970
Epochs: 23959 | epoch avg. loss: 0.010 | test avg. loss: 4.906


 48%|████▊     | 23963/50000 [35:48<34:33, 12.56it/s]

Epochs: 23960 | epoch avg. loss: 0.006 | test avg. loss: 4.848
Epochs: 23961 | epoch avg. loss: 0.009 | test avg. loss: 4.883
Epochs: 23962 | epoch avg. loss: 0.005 | test avg. loss: 4.939


 48%|████▊     | 23965/50000 [35:48<34:34, 12.55it/s]

Epochs: 23963 | epoch avg. loss: 0.005 | test avg. loss: 4.917
Epochs: 23964 | epoch avg. loss: 0.007 | test avg. loss: 4.901
Epochs: 23965 | epoch avg. loss: 0.006 | test avg. loss: 4.928


 48%|████▊     | 23969/50000 [35:49<33:33, 12.93it/s]

Epochs: 23966 | epoch avg. loss: 0.013 | test avg. loss: 4.844
Epochs: 23967 | epoch avg. loss: 0.008 | test avg. loss: 4.905
Epochs: 23968 | epoch avg. loss: 0.005 | test avg. loss: 4.962


 48%|████▊     | 23971/50000 [35:49<34:36, 12.53it/s]

Epochs: 23969 | epoch avg. loss: 0.006 | test avg. loss: 4.901
Epochs: 23970 | epoch avg. loss: 0.007 | test avg. loss: 4.877
Epochs: 23971 | epoch avg. loss: 0.005 | test avg. loss: 4.887


 48%|████▊     | 23975/50000 [35:49<37:03, 11.71it/s]

Epochs: 23972 | epoch avg. loss: 0.009 | test avg. loss: 4.839
Epochs: 23973 | epoch avg. loss: 0.005 | test avg. loss: 4.877
Epochs: 23974 | epoch avg. loss: 0.005 | test avg. loss: 4.901


 48%|████▊     | 23977/50000 [35:49<37:54, 11.44it/s]

Epochs: 23975 | epoch avg. loss: 0.005 | test avg. loss: 4.893
Epochs: 23976 | epoch avg. loss: 0.005 | test avg. loss: 4.844
Epochs: 23977 | epoch avg. loss: 0.012 | test avg. loss: 4.866


 48%|████▊     | 23981/50000 [35:50<35:04, 12.36it/s]

Epochs: 23978 | epoch avg. loss: 0.004 | test avg. loss: 4.873
Epochs: 23979 | epoch avg. loss: 0.005 | test avg. loss: 4.858
Epochs: 23980 | epoch avg. loss: 0.007 | test avg. loss: 4.909


 48%|████▊     | 23983/50000 [35:50<34:20, 12.62it/s]

Epochs: 23981 | epoch avg. loss: 0.012 | test avg. loss: 4.936
Epochs: 23982 | epoch avg. loss: 0.016 | test avg. loss: 4.797
Epochs: 23983 | epoch avg. loss: 0.019 | test avg. loss: 4.870


 48%|████▊     | 23987/50000 [35:50<34:53, 12.43it/s]

Epochs: 23984 | epoch avg. loss: 0.014 | test avg. loss: 4.987
Epochs: 23985 | epoch avg. loss: 0.024 | test avg. loss: 4.836
Epochs: 23986 | epoch avg. loss: 0.064 | test avg. loss: 4.883


 48%|████▊     | 23989/50000 [35:50<33:55, 12.78it/s]

Epochs: 23987 | epoch avg. loss: 0.022 | test avg. loss: 4.974
Epochs: 23988 | epoch avg. loss: 0.028 | test avg. loss: 4.881
Epochs: 23989 | epoch avg. loss: 0.106 | test avg. loss: 5.035


 48%|████▊     | 23993/50000 [35:51<35:24, 12.24it/s]

Epochs: 23990 | epoch avg. loss: 0.080 | test avg. loss: 5.091
Epochs: 23991 | epoch avg. loss: 0.061 | test avg. loss: 4.791
Epochs: 23992 | epoch avg. loss: 0.122 | test avg. loss: 4.958


 48%|████▊     | 23995/50000 [35:51<33:25, 12.97it/s]

Epochs: 23993 | epoch avg. loss: 0.135 | test avg. loss: 4.875
Epochs: 23994 | epoch avg. loss: 0.052 | test avg. loss: 4.784
Epochs: 23995 | epoch avg. loss: 0.123 | test avg. loss: 4.968


 48%|████▊     | 23999/50000 [35:51<33:12, 13.05it/s]

Epochs: 23996 | epoch avg. loss: 0.058 | test avg. loss: 4.988
Epochs: 23997 | epoch avg. loss: 0.025 | test avg. loss: 4.931
Epochs: 23998 | epoch avg. loss: 0.042 | test avg. loss: 5.025
Epochs: 23999 | epoch avg. loss: 0.048 | test avg. loss: 4.827


 48%|████▊     | 24003/50000 [35:53<1:48:53,  3.98it/s]

Epochs: 24000 | epoch avg. loss: 0.020 | test avg. loss: 4.669
Epochs: 24001 | epoch avg. loss: 0.043 | test avg. loss: 4.801
Epochs: 24002 | epoch avg. loss: 0.020 | test avg. loss: 4.939


 48%|████▊     | 24005/50000 [35:53<1:26:19,  5.02it/s]

Epochs: 24003 | epoch avg. loss: 0.020 | test avg. loss: 4.891
Epochs: 24004 | epoch avg. loss: 0.044 | test avg. loss: 4.956
Epochs: 24005 | epoch avg. loss: 0.027 | test avg. loss: 4.884


 48%|████▊     | 24009/50000 [35:53<59:07,  7.33it/s]  

Epochs: 24006 | epoch avg. loss: 0.020 | test avg. loss: 4.736
Epochs: 24007 | epoch avg. loss: 0.022 | test avg. loss: 4.860
Epochs: 24008 | epoch avg. loss: 0.013 | test avg. loss: 4.802


 48%|████▊     | 24011/50000 [35:54<51:49,  8.36it/s]

Epochs: 24009 | epoch avg. loss: 0.032 | test avg. loss: 4.800
Epochs: 24010 | epoch avg. loss: 0.023 | test avg. loss: 5.052
Epochs: 24011 | epoch avg. loss: 0.074 | test avg. loss: 4.780


 48%|████▊     | 24015/50000 [35:54<41:34, 10.42it/s]

Epochs: 24012 | epoch avg. loss: 0.029 | test avg. loss: 4.779
Epochs: 24013 | epoch avg. loss: 0.016 | test avg. loss: 4.898
Epochs: 24014 | epoch avg. loss: 0.011 | test avg. loss: 4.898


 48%|████▊     | 24017/50000 [35:54<37:17, 11.61it/s]

Epochs: 24015 | epoch avg. loss: 0.024 | test avg. loss: 4.891
Epochs: 24016 | epoch avg. loss: 0.011 | test avg. loss: 4.884
Epochs: 24017 | epoch avg. loss: 0.009 | test avg. loss: 4.823


 48%|████▊     | 24021/50000 [35:54<33:38, 12.87it/s]

Epochs: 24018 | epoch avg. loss: 0.008 | test avg. loss: 4.819
Epochs: 24019 | epoch avg. loss: 0.013 | test avg. loss: 4.935
Epochs: 24020 | epoch avg. loss: 0.007 | test avg. loss: 4.937


 48%|████▊     | 24023/50000 [35:55<33:29, 12.93it/s]

Epochs: 24021 | epoch avg. loss: 0.008 | test avg. loss: 4.910
Epochs: 24022 | epoch avg. loss: 0.011 | test avg. loss: 5.015
Epochs: 24023 | epoch avg. loss: 0.026 | test avg. loss: 4.900


 48%|████▊     | 24027/50000 [35:55<35:10, 12.31it/s]

Epochs: 24024 | epoch avg. loss: 0.021 | test avg. loss: 4.870
Epochs: 24025 | epoch avg. loss: 0.012 | test avg. loss: 4.948
Epochs: 24026 | epoch avg. loss: 0.018 | test avg. loss: 4.817


 48%|████▊     | 24029/50000 [35:55<34:22, 12.59it/s]

Epochs: 24027 | epoch avg. loss: 0.039 | test avg. loss: 4.876
Epochs: 24028 | epoch avg. loss: 0.012 | test avg. loss: 5.011
Epochs: 24029 | epoch avg. loss: 0.018 | test avg. loss: 4.949


 48%|████▊     | 24033/50000 [35:55<32:27, 13.33it/s]

Epochs: 24030 | epoch avg. loss: 0.009 | test avg. loss: 4.904
Epochs: 24031 | epoch avg. loss: 0.006 | test avg. loss: 4.878
Epochs: 24032 | epoch avg. loss: 0.007 | test avg. loss: 4.872


 48%|████▊     | 24035/50000 [35:56<33:36, 12.87it/s]

Epochs: 24033 | epoch avg. loss: 0.006 | test avg. loss: 4.865
Epochs: 24034 | epoch avg. loss: 0.006 | test avg. loss: 4.904
Epochs: 24035 | epoch avg. loss: 0.004 | test avg. loss: 4.932


 48%|████▊     | 24039/50000 [35:56<33:04, 13.08it/s]

Epochs: 24036 | epoch avg. loss: 0.005 | test avg. loss: 4.928
Epochs: 24037 | epoch avg. loss: 0.006 | test avg. loss: 4.887
Epochs: 24038 | epoch avg. loss: 0.004 | test avg. loss: 4.952


 48%|████▊     | 24041/50000 [35:56<31:28, 13.75it/s]

Epochs: 24039 | epoch avg. loss: 0.015 | test avg. loss: 4.915
Epochs: 24040 | epoch avg. loss: 0.004 | test avg. loss: 4.942
Epochs: 24041 | epoch avg. loss: 0.006 | test avg. loss: 4.865


 48%|████▊     | 24045/50000 [35:56<32:13, 13.43it/s]

Epochs: 24042 | epoch avg. loss: 0.007 | test avg. loss: 4.885
Epochs: 24043 | epoch avg. loss: 0.013 | test avg. loss: 4.897
Epochs: 24044 | epoch avg. loss: 0.012 | test avg. loss: 4.901


 48%|████▊     | 24047/50000 [35:56<32:31, 13.30it/s]

Epochs: 24045 | epoch avg. loss: 0.039 | test avg. loss: 5.142
Epochs: 24046 | epoch avg. loss: 0.062 | test avg. loss: 5.076
Epochs: 24047 | epoch avg. loss: 0.029 | test avg. loss: 4.928


 48%|████▊     | 24051/50000 [35:57<32:12, 13.43it/s]

Epochs: 24048 | epoch avg. loss: 0.033 | test avg. loss: 5.115
Epochs: 24049 | epoch avg. loss: 0.145 | test avg. loss: 4.835
Epochs: 24050 | epoch avg. loss: 0.075 | test avg. loss: 4.845


 48%|████▊     | 24055/50000 [35:57<30:31, 14.16it/s]

Epochs: 24051 | epoch avg. loss: 0.084 | test avg. loss: 5.115
Epochs: 24052 | epoch avg. loss: 0.069 | test avg. loss: 4.918
Epochs: 24053 | epoch avg. loss: 0.032 | test avg. loss: 4.900
Epochs: 24054 | epoch avg. loss: 0.027 | test avg. loss: 5.153


 48%|████▊     | 24059/50000 [35:57<29:05, 14.87it/s]

Epochs: 24055 | epoch avg. loss: 0.057 | test avg. loss: 4.947
Epochs: 24056 | epoch avg. loss: 0.033 | test avg. loss: 4.920
Epochs: 24057 | epoch avg. loss: 0.016 | test avg. loss: 4.959
Epochs: 24058 | epoch avg. loss: 0.021 | test avg. loss: 4.741


 48%|████▊     | 24061/50000 [35:57<28:42, 15.06it/s]

Epochs: 24059 | epoch avg. loss: 0.025 | test avg. loss: 4.851
Epochs: 24060 | epoch avg. loss: 0.034 | test avg. loss: 4.915
Epochs: 24061 | epoch avg. loss: 0.030 | test avg. loss: 4.852


 48%|████▊     | 24065/50000 [35:58<32:55, 13.13it/s]

Epochs: 24062 | epoch avg. loss: 0.011 | test avg. loss: 4.913
Epochs: 24063 | epoch avg. loss: 0.012 | test avg. loss: 5.062
Epochs: 24064 | epoch avg. loss: 0.044 | test avg. loss: 4.934


 48%|████▊     | 24067/50000 [35:58<33:46, 12.80it/s]

Epochs: 24065 | epoch avg. loss: 0.018 | test avg. loss: 4.836
Epochs: 24066 | epoch avg. loss: 0.023 | test avg. loss: 4.921
Epochs: 24067 | epoch avg. loss: 0.025 | test avg. loss: 4.798


 48%|████▊     | 24071/50000 [35:58<30:49, 14.02it/s]

Epochs: 24068 | epoch avg. loss: 0.023 | test avg. loss: 4.803
Epochs: 24069 | epoch avg. loss: 0.058 | test avg. loss: 5.135
Epochs: 24070 | epoch avg. loss: 0.082 | test avg. loss: 5.130
Epochs: 24071 | epoch avg. loss: 0.038 | test avg. loss: 4.904


 48%|████▊     | 24075/50000 [35:58<30:54, 13.98it/s]

Epochs: 24072 | epoch avg. loss: 0.022 | test avg. loss: 4.887
Epochs: 24073 | epoch avg. loss: 0.013 | test avg. loss: 4.962
Epochs: 24074 | epoch avg. loss: 0.029 | test avg. loss: 4.843


 48%|████▊     | 24077/50000 [35:59<30:59, 13.94it/s]

Epochs: 24075 | epoch avg. loss: 0.082 | test avg. loss: 4.913
Epochs: 24076 | epoch avg. loss: 0.025 | test avg. loss: 5.166
Epochs: 24077 | epoch avg. loss: 0.065 | test avg. loss: 4.957


 48%|████▊     | 24081/50000 [35:59<31:13, 13.83it/s]

Epochs: 24078 | epoch avg. loss: 0.044 | test avg. loss: 4.868
Epochs: 24079 | epoch avg. loss: 0.073 | test avg. loss: 4.981
Epochs: 24080 | epoch avg. loss: 0.066 | test avg. loss: 5.067


 48%|████▊     | 24083/50000 [35:59<31:33, 13.69it/s]

Epochs: 24081 | epoch avg. loss: 0.065 | test avg. loss: 4.815
Epochs: 24082 | epoch avg. loss: 0.041 | test avg. loss: 4.937
Epochs: 24083 | epoch avg. loss: 0.085 | test avg. loss: 5.142


 48%|████▊     | 24087/50000 [35:59<31:29, 13.71it/s]

Epochs: 24084 | epoch avg. loss: 0.074 | test avg. loss: 4.921
Epochs: 24085 | epoch avg. loss: 0.215 | test avg. loss: 4.913
Epochs: 24086 | epoch avg. loss: 0.082 | test avg. loss: 5.299


 48%|████▊     | 24089/50000 [35:59<31:26, 13.73it/s]

Epochs: 24087 | epoch avg. loss: 0.161 | test avg. loss: 4.890
Epochs: 24088 | epoch avg. loss: 0.444 | test avg. loss: 4.781
Epochs: 24089 | epoch avg. loss: 0.215 | test avg. loss: 5.430




Epochs: 24090 | epoch avg. loss: 0.327 | test avg. loss: 4.886
Epochs: 24091 | epoch avg. loss: 0.089 | test avg. loss: 5.056
Epochs: 24092 | epoch avg. loss: 0.058 | test avg. loss: 5.025


 48%|████▊     | 24095/50000 [36:00<30:55, 13.96it/s]

Epochs: 24093 | epoch avg. loss: 0.048 | test avg. loss: 4.736
Epochs: 24094 | epoch avg. loss: 0.126 | test avg. loss: 4.721
Epochs: 24095 | epoch avg. loss: 0.097 | test avg. loss: 4.756


 48%|████▊     | 24099/50000 [36:00<30:41, 14.06it/s]

Epochs: 24096 | epoch avg. loss: 0.071 | test avg. loss: 4.859
Epochs: 24097 | epoch avg. loss: 0.195 | test avg. loss: 5.325
Epochs: 24098 | epoch avg. loss: 0.183 | test avg. loss: 5.329
Epochs: 24099 | epoch avg. loss: 0.098 | test avg. loss: 5.166


 48%|████▊     | 24103/50000 [36:02<1:42:38,  4.21it/s]

Epochs: 24100 | epoch avg. loss: 0.155 | test avg. loss: 6.025
Epochs: 24101 | epoch avg. loss: 0.685 | test avg. loss: 4.924
Epochs: 24102 | epoch avg. loss: 0.171 | test avg. loss: 5.025


 48%|████▊     | 24105/50000 [36:02<1:22:20,  5.24it/s]

Epochs: 24103 | epoch avg. loss: 0.362 | test avg. loss: 5.366
Epochs: 24104 | epoch avg. loss: 0.224 | test avg. loss: 5.224
Epochs: 24105 | epoch avg. loss: 0.091 | test avg. loss: 5.275


 48%|████▊     | 24109/50000 [36:02<57:49,  7.46it/s]  

Epochs: 24106 | epoch avg. loss: 0.162 | test avg. loss: 5.943
Epochs: 24107 | epoch avg. loss: 0.457 | test avg. loss: 4.995
Epochs: 24108 | epoch avg. loss: 0.066 | test avg. loss: 4.829


 48%|████▊     | 24111/50000 [36:03<50:04,  8.62it/s]

Epochs: 24109 | epoch avg. loss: 0.087 | test avg. loss: 5.538
Epochs: 24110 | epoch avg. loss: 0.189 | test avg. loss: 5.143
Epochs: 24111 | epoch avg. loss: 0.331 | test avg. loss: 5.355
Epochs: 24112 | epoch avg. loss: 0.223 | test avg. loss: 5.908


 48%|████▊     | 24115/50000 [36:03<39:38, 10.88it/s]

Epochs: 24113 | epoch avg. loss: 0.311 | test avg. loss: 5.018
Epochs: 24114 | epoch avg. loss: 0.400 | test avg. loss: 4.949
Epochs: 24115 | epoch avg. loss: 0.165 | test avg. loss: 5.582


 48%|████▊     | 24119/50000 [36:03<36:16, 11.89it/s]

Epochs: 24116 | epoch avg. loss: 0.228 | test avg. loss: 5.321
Epochs: 24117 | epoch avg. loss: 0.395 | test avg. loss: 5.767
Epochs: 24118 | epoch avg. loss: 0.382 | test avg. loss: 5.487


 48%|████▊     | 24121/50000 [36:03<34:15, 12.59it/s]

Epochs: 24119 | epoch avg. loss: 0.137 | test avg. loss: 5.221
Epochs: 24120 | epoch avg. loss: 0.166 | test avg. loss: 5.492
Epochs: 24121 | epoch avg. loss: 0.245 | test avg. loss: 4.818


 48%|████▊     | 24125/50000 [36:04<32:29, 13.27it/s]

Epochs: 24122 | epoch avg. loss: 0.071 | test avg. loss: 4.776
Epochs: 24123 | epoch avg. loss: 0.124 | test avg. loss: 5.397
Epochs: 24124 | epoch avg. loss: 0.211 | test avg. loss: 5.121


 48%|████▊     | 24127/50000 [36:04<32:12, 13.39it/s]

Epochs: 24125 | epoch avg. loss: 0.040 | test avg. loss: 5.124
Epochs: 24126 | epoch avg. loss: 0.025 | test avg. loss: 5.269
Epochs: 24127 | epoch avg. loss: 0.045 | test avg. loss: 5.145




Epochs: 24128 | epoch avg. loss: 0.017 | test avg. loss: 5.238
Epochs: 24129 | epoch avg. loss: 0.031 | test avg. loss: 5.220
Epochs: 24130 | epoch avg. loss: 0.018 | test avg. loss: 5.248


 48%|████▊     | 24133/50000 [36:04<30:14, 14.26it/s]

Epochs: 24131 | epoch avg. loss: 0.016 | test avg. loss: 5.301
Epochs: 24132 | epoch avg. loss: 0.018 | test avg. loss: 5.164
Epochs: 24133 | epoch avg. loss: 0.054 | test avg. loss: 5.290


 48%|████▊     | 24137/50000 [36:04<32:16, 13.36it/s]

Epochs: 24134 | epoch avg. loss: 0.046 | test avg. loss: 5.273
Epochs: 24135 | epoch avg. loss: 0.027 | test avg. loss: 5.147
Epochs: 24136 | epoch avg. loss: 0.073 | test avg. loss: 5.332


 48%|████▊     | 24139/50000 [36:05<31:32, 13.67it/s]

Epochs: 24137 | epoch avg. loss: 0.028 | test avg. loss: 5.446
Epochs: 24138 | epoch avg. loss: 0.031 | test avg. loss: 5.343
Epochs: 24139 | epoch avg. loss: 0.100 | test avg. loss: 5.444
Epochs: 24140 | epoch avg. loss: 0.024 | test avg. loss: 5.305




Epochs: 24141 | epoch avg. loss: 0.016 | test avg. loss: 5.179
Epochs: 24142 | epoch avg. loss: 0.018 | test avg. loss: 5.237
Epochs: 24143 | epoch avg. loss: 0.017 | test avg. loss: 5.142


 48%|████▊     | 24147/50000 [36:05<29:17, 14.71it/s]

Epochs: 24144 | epoch avg. loss: 0.013 | test avg. loss: 5.256
Epochs: 24145 | epoch avg. loss: 0.011 | test avg. loss: 5.354
Epochs: 24146 | epoch avg. loss: 0.013 | test avg. loss: 5.240
Epochs: 24147 | epoch avg. loss: 0.059 | test avg. loss: 5.283


 48%|████▊     | 24151/50000 [36:05<31:53, 13.51it/s]

Epochs: 24148 | epoch avg. loss: 0.017 | test avg. loss: 5.190
Epochs: 24149 | epoch avg. loss: 0.019 | test avg. loss: 5.054
Epochs: 24150 | epoch avg. loss: 0.051 | test avg. loss: 5.180


 48%|████▊     | 24153/50000 [36:06<30:46, 14.00it/s]

Epochs: 24151 | epoch avg. loss: 0.018 | test avg. loss: 5.218
Epochs: 24152 | epoch avg. loss: 0.010 | test avg. loss: 5.209
Epochs: 24153 | epoch avg. loss: 0.011 | test avg. loss: 5.374


 48%|████▊     | 24157/50000 [36:06<31:38, 13.61it/s]

Epochs: 24154 | epoch avg. loss: 0.035 | test avg. loss: 5.148
Epochs: 24155 | epoch avg. loss: 0.035 | test avg. loss: 5.091
Epochs: 24156 | epoch avg. loss: 0.028 | test avg. loss: 5.307


 48%|████▊     | 24159/50000 [36:06<31:30, 13.67it/s]

Epochs: 24157 | epoch avg. loss: 0.061 | test avg. loss: 5.143
Epochs: 24158 | epoch avg. loss: 0.082 | test avg. loss: 5.216
Epochs: 24159 | epoch avg. loss: 0.044 | test avg. loss: 5.527


 48%|████▊     | 24163/50000 [36:06<33:55, 12.69it/s]

Epochs: 24160 | epoch avg. loss: 0.094 | test avg. loss: 5.097
Epochs: 24161 | epoch avg. loss: 0.068 | test avg. loss: 4.954
Epochs: 24162 | epoch avg. loss: 0.045 | test avg. loss: 5.217


 48%|████▊     | 24165/50000 [36:07<36:08, 11.91it/s]

Epochs: 24163 | epoch avg. loss: 0.087 | test avg. loss: 5.031
Epochs: 24164 | epoch avg. loss: 0.204 | test avg. loss: 5.335
Epochs: 24165 | epoch avg. loss: 0.059 | test avg. loss: 5.899


 48%|████▊     | 24169/50000 [36:07<36:25, 11.82it/s]

Epochs: 24166 | epoch avg. loss: 0.123 | test avg. loss: 5.403
Epochs: 24167 | epoch avg. loss: 0.126 | test avg. loss: 5.299
Epochs: 24168 | epoch avg. loss: 0.040 | test avg. loss: 5.191


 48%|████▊     | 24171/50000 [36:07<34:27, 12.49it/s]

Epochs: 24169 | epoch avg. loss: 0.049 | test avg. loss: 4.943
Epochs: 24170 | epoch avg. loss: 0.045 | test avg. loss: 5.276
Epochs: 24171 | epoch avg. loss: 0.112 | test avg. loss: 5.325


 48%|████▊     | 24175/50000 [36:07<34:51, 12.35it/s]

Epochs: 24172 | epoch avg. loss: 0.038 | test avg. loss: 5.362
Epochs: 24173 | epoch avg. loss: 0.057 | test avg. loss: 5.860
Epochs: 24174 | epoch avg. loss: 0.217 | test avg. loss: 5.168


 48%|████▊     | 24177/50000 [36:08<36:03, 11.94it/s]

Epochs: 24175 | epoch avg. loss: 0.081 | test avg. loss: 5.086
Epochs: 24176 | epoch avg. loss: 0.051 | test avg. loss: 5.449
Epochs: 24177 | epoch avg. loss: 0.092 | test avg. loss: 5.241


 48%|████▊     | 24181/50000 [36:08<34:13, 12.57it/s]

Epochs: 24178 | epoch avg. loss: 0.087 | test avg. loss: 5.489
Epochs: 24179 | epoch avg. loss: 0.055 | test avg. loss: 5.471
Epochs: 24180 | epoch avg. loss: 0.026 | test avg. loss: 5.295


 48%|████▊     | 24183/50000 [36:08<34:36, 12.43it/s]

Epochs: 24181 | epoch avg. loss: 0.017 | test avg. loss: 5.164
Epochs: 24182 | epoch avg. loss: 0.016 | test avg. loss: 5.063
Epochs: 24183 | epoch avg. loss: 0.040 | test avg. loss: 5.203


 48%|████▊     | 24187/50000 [36:08<33:22, 12.89it/s]

Epochs: 24184 | epoch avg. loss: 0.013 | test avg. loss: 5.414
Epochs: 24185 | epoch avg. loss: 0.025 | test avg. loss: 5.332
Epochs: 24186 | epoch avg. loss: 0.041 | test avg. loss: 5.235


 48%|████▊     | 24189/50000 [36:09<36:15, 11.87it/s]

Epochs: 24187 | epoch avg. loss: 0.023 | test avg. loss: 5.228
Epochs: 24188 | epoch avg. loss: 0.037 | test avg. loss: 5.052
Epochs: 24189 | epoch avg. loss: 0.014 | test avg. loss: 5.062


 48%|████▊     | 24193/50000 [36:09<33:54, 12.68it/s]

Epochs: 24190 | epoch avg. loss: 0.012 | test avg. loss: 5.183
Epochs: 24191 | epoch avg. loss: 0.012 | test avg. loss: 5.220
Epochs: 24192 | epoch avg. loss: 0.012 | test avg. loss: 5.209


 48%|████▊     | 24195/50000 [36:09<33:19, 12.90it/s]

Epochs: 24193 | epoch avg. loss: 0.010 | test avg. loss: 5.126
Epochs: 24194 | epoch avg. loss: 0.007 | test avg. loss: 5.078
Epochs: 24195 | epoch avg. loss: 0.008 | test avg. loss: 5.184


 48%|████▊     | 24199/50000 [36:09<31:49, 13.51it/s]

Epochs: 24196 | epoch avg. loss: 0.010 | test avg. loss: 5.162
Epochs: 24197 | epoch avg. loss: 0.017 | test avg. loss: 5.223
Epochs: 24198 | epoch avg. loss: 0.023 | test avg. loss: 5.425


 48%|████▊     | 24199/50000 [36:09<31:49, 13.51it/s]

Epochs: 24199 | epoch avg. loss: 0.089 | test avg. loss: 5.203


 48%|████▊     | 24203/50000 [36:11<1:47:49,  3.99it/s]

Epochs: 24200 | epoch avg. loss: 0.037 | test avg. loss: 5.199
Epochs: 24201 | epoch avg. loss: 0.028 | test avg. loss: 5.328
Epochs: 24202 | epoch avg. loss: 0.021 | test avg. loss: 5.176


 48%|████▊     | 24205/50000 [36:11<1:26:20,  4.98it/s]

Epochs: 24203 | epoch avg. loss: 0.016 | test avg. loss: 5.191
Epochs: 24204 | epoch avg. loss: 0.023 | test avg. loss: 5.486
Epochs: 24205 | epoch avg. loss: 0.093 | test avg. loss: 5.053


 48%|████▊     | 24209/50000 [36:12<57:31,  7.47it/s]  

Epochs: 24206 | epoch avg. loss: 0.102 | test avg. loss: 5.142
Epochs: 24207 | epoch avg. loss: 0.093 | test avg. loss: 5.568
Epochs: 24208 | epoch avg. loss: 0.170 | test avg. loss: 5.168


 48%|████▊     | 24211/50000 [36:12<50:51,  8.45it/s]

Epochs: 24209 | epoch avg. loss: 0.130 | test avg. loss: 5.204
Epochs: 24210 | epoch avg. loss: 0.053 | test avg. loss: 5.358
Epochs: 24211 | epoch avg. loss: 0.049 | test avg. loss: 5.165


 48%|████▊     | 24215/50000 [36:12<44:56,  9.56it/s]

Epochs: 24212 | epoch avg. loss: 0.077 | test avg. loss: 5.213
Epochs: 24213 | epoch avg. loss: 0.021 | test avg. loss: 5.247
Epochs: 24214 | epoch avg. loss: 0.032 | test avg. loss: 5.178


 48%|████▊     | 24217/50000 [36:12<41:54, 10.25it/s]

Epochs: 24215 | epoch avg. loss: 0.024 | test avg. loss: 5.250
Epochs: 24216 | epoch avg. loss: 0.026 | test avg. loss: 5.484
Epochs: 24217 | epoch avg. loss: 0.025 | test avg. loss: 5.303


 48%|████▊     | 24221/50000 [36:13<38:34, 11.14it/s]

Epochs: 24218 | epoch avg. loss: 0.016 | test avg. loss: 5.196
Epochs: 24219 | epoch avg. loss: 0.012 | test avg. loss: 5.060
Epochs: 24220 | epoch avg. loss: 0.018 | test avg. loss: 4.946


 48%|████▊     | 24223/50000 [36:13<35:34, 12.07it/s]

Epochs: 24221 | epoch avg. loss: 0.014 | test avg. loss: 5.042
Epochs: 24222 | epoch avg. loss: 0.008 | test avg. loss: 5.270
Epochs: 24223 | epoch avg. loss: 0.010 | test avg. loss: 5.307


 48%|████▊     | 24227/50000 [36:13<34:42, 12.37it/s]

Epochs: 24224 | epoch avg. loss: 0.012 | test avg. loss: 5.271
Epochs: 24225 | epoch avg. loss: 0.011 | test avg. loss: 5.323
Epochs: 24226 | epoch avg. loss: 0.033 | test avg. loss: 5.130


 48%|████▊     | 24229/50000 [36:13<34:22, 12.50it/s]

Epochs: 24227 | epoch avg. loss: 0.021 | test avg. loss: 5.080
Epochs: 24228 | epoch avg. loss: 0.035 | test avg. loss: 5.499
Epochs: 24229 | epoch avg. loss: 0.133 | test avg. loss: 5.178


 48%|████▊     | 24233/50000 [36:14<36:19, 11.82it/s]

Epochs: 24230 | epoch avg. loss: 0.084 | test avg. loss: 5.177
Epochs: 24231 | epoch avg. loss: 0.082 | test avg. loss: 5.542
Epochs: 24232 | epoch avg. loss: 0.140 | test avg. loss: 5.113


 48%|████▊     | 24235/50000 [36:14<34:54, 12.30it/s]

Epochs: 24233 | epoch avg. loss: 0.059 | test avg. loss: 5.075
Epochs: 24234 | epoch avg. loss: 0.050 | test avg. loss: 5.500
Epochs: 24235 | epoch avg. loss: 0.174 | test avg. loss: 5.161


 48%|████▊     | 24239/50000 [36:14<35:47, 11.99it/s]

Epochs: 24236 | epoch avg. loss: 0.041 | test avg. loss: 5.147
Epochs: 24237 | epoch avg. loss: 0.057 | test avg. loss: 5.543
Epochs: 24238 | epoch avg. loss: 0.143 | test avg. loss: 5.362


 48%|████▊     | 24241/50000 [36:14<35:45, 12.00it/s]

Epochs: 24239 | epoch avg. loss: 0.108 | test avg. loss: 5.439
Epochs: 24240 | epoch avg. loss: 0.151 | test avg. loss: 5.776
Epochs: 24241 | epoch avg. loss: 0.183 | test avg. loss: 5.195


                                                     

Epochs: 24242 | epoch avg. loss: 0.091 | test avg. loss: 5.089
Epochs: 24243 | epoch avg. loss: 0.114 | test avg. loss: 5.619
Epochs: 24244 | epoch avg. loss: 0.176 | test avg. loss: 5.144


 48%|████▊     | 24247/50000 [36:15<35:00, 12.26it/s]

Epochs: 24245 | epoch avg. loss: 0.061 | test avg. loss: 5.072
Epochs: 24246 | epoch avg. loss: 0.138 | test avg. loss: 5.163
Epochs: 24247 | epoch avg. loss: 0.073 | test avg. loss: 5.310


 49%|████▊     | 24251/50000 [36:15<35:37, 12.05it/s]

Epochs: 24248 | epoch avg. loss: 0.066 | test avg. loss: 5.128
Epochs: 24249 | epoch avg. loss: 0.188 | test avg. loss: 5.231
Epochs: 24250 | epoch avg. loss: 0.147 | test avg. loss: 5.217


 49%|████▊     | 24253/50000 [36:15<33:50, 12.68it/s]

Epochs: 24251 | epoch avg. loss: 0.108 | test avg. loss: 5.301
Epochs: 24252 | epoch avg. loss: 0.325 | test avg. loss: 5.322
Epochs: 24253 | epoch avg. loss: 0.222 | test avg. loss: 5.191


 49%|████▊     | 24257/50000 [36:16<35:35, 12.05it/s]

Epochs: 24254 | epoch avg. loss: 0.080 | test avg. loss: 5.187
Epochs: 24255 | epoch avg. loss: 0.135 | test avg. loss: 5.589
Epochs: 24256 | epoch avg. loss: 0.121 | test avg. loss: 5.333


 49%|████▊     | 24259/50000 [36:16<34:44, 12.35it/s]

Epochs: 24257 | epoch avg. loss: 0.070 | test avg. loss: 5.340
Epochs: 24258 | epoch avg. loss: 0.079 | test avg. loss: 5.404
Epochs: 24259 | epoch avg. loss: 0.026 | test avg. loss: 5.219


 49%|████▊     | 24263/50000 [36:16<31:53, 13.45it/s]

Epochs: 24260 | epoch avg. loss: 0.035 | test avg. loss: 5.072
Epochs: 24261 | epoch avg. loss: 0.033 | test avg. loss: 5.197
Epochs: 24262 | epoch avg. loss: 0.044 | test avg. loss: 5.143
Epochs: 24263 | epoch avg. loss: 0.030 | test avg. loss: 5.250


 49%|████▊     | 24267/50000 [36:16<31:06, 13.78it/s]

Epochs: 24264 | epoch avg. loss: 0.042 | test avg. loss: 5.554
Epochs: 24265 | epoch avg. loss: 0.069 | test avg. loss: 5.399
Epochs: 24266 | epoch avg. loss: 0.030 | test avg. loss: 5.160


 49%|████▊     | 24269/50000 [36:17<32:05, 13.36it/s]

Epochs: 24267 | epoch avg. loss: 0.061 | test avg. loss: 5.179
Epochs: 24268 | epoch avg. loss: 0.029 | test avg. loss: 4.962
Epochs: 24269 | epoch avg. loss: 0.016 | test avg. loss: 4.996


 49%|████▊     | 24273/50000 [36:17<33:55, 12.64it/s]

Epochs: 24270 | epoch avg. loss: 0.020 | test avg. loss: 5.220
Epochs: 24271 | epoch avg. loss: 0.029 | test avg. loss: 5.197
Epochs: 24272 | epoch avg. loss: 0.021 | test avg. loss: 5.219


 49%|████▊     | 24275/50000 [36:17<34:26, 12.45it/s]

Epochs: 24273 | epoch avg. loss: 0.013 | test avg. loss: 5.228
Epochs: 24274 | epoch avg. loss: 0.010 | test avg. loss: 5.202
Epochs: 24275 | epoch avg. loss: 0.013 | test avg. loss: 5.133


 49%|████▊     | 24279/50000 [36:17<34:07, 12.56it/s]

Epochs: 24276 | epoch avg. loss: 0.009 | test avg. loss: 5.284
Epochs: 24277 | epoch avg. loss: 0.019 | test avg. loss: 5.331
Epochs: 24278 | epoch avg. loss: 0.014 | test avg. loss: 5.184


 49%|████▊     | 24281/50000 [36:18<33:40, 12.73it/s]

Epochs: 24279 | epoch avg. loss: 0.020 | test avg. loss: 5.207
Epochs: 24280 | epoch avg. loss: 0.012 | test avg. loss: 5.183
Epochs: 24281 | epoch avg. loss: 0.009 | test avg. loss: 5.099


                                                     

Epochs: 24282 | epoch avg. loss: 0.009 | test avg. loss: 5.180
Epochs: 24283 | epoch avg. loss: 0.008 | test avg. loss: 5.212
Epochs: 24284 | epoch avg. loss: 0.007 | test avg. loss: 5.171


 49%|████▊     | 24289/50000 [36:18<29:46, 14.39it/s]

Epochs: 24285 | epoch avg. loss: 0.024 | test avg. loss: 5.226
Epochs: 24286 | epoch avg. loss: 0.007 | test avg. loss: 5.241
Epochs: 24287 | epoch avg. loss: 0.005 | test avg. loss: 5.142
Epochs: 24288 | epoch avg. loss: 0.011 | test avg. loss: 5.160


 49%|████▊     | 24293/50000 [36:18<28:48, 14.87it/s]

Epochs: 24289 | epoch avg. loss: 0.006 | test avg. loss: 5.140
Epochs: 24290 | epoch avg. loss: 0.005 | test avg. loss: 5.176
Epochs: 24291 | epoch avg. loss: 0.011 | test avg. loss: 5.190
Epochs: 24292 | epoch avg. loss: 0.006 | test avg. loss: 5.164


 49%|████▊     | 24295/50000 [36:19<31:27, 13.62it/s]

Epochs: 24293 | epoch avg. loss: 0.008 | test avg. loss: 5.144
Epochs: 24294 | epoch avg. loss: 0.007 | test avg. loss: 5.282
Epochs: 24295 | epoch avg. loss: 0.037 | test avg. loss: 5.193


 49%|████▊     | 24299/50000 [36:19<35:44, 11.99it/s]

Epochs: 24296 | epoch avg. loss: 0.012 | test avg. loss: 5.058
Epochs: 24297 | epoch avg. loss: 0.016 | test avg. loss: 5.195
Epochs: 24298 | epoch avg. loss: 0.014 | test avg. loss: 5.193


 49%|████▊     | 24299/50000 [36:19<35:44, 11.99it/s]

Epochs: 24299 | epoch avg. loss: 0.009 | test avg. loss: 5.161


 49%|████▊     | 24303/50000 [36:21<1:38:22,  4.35it/s]

Epochs: 24300 | epoch avg. loss: 0.032 | test avg. loss: 5.225
Epochs: 24301 | epoch avg. loss: 0.026 | test avg. loss: 5.304
Epochs: 24302 | epoch avg. loss: 0.028 | test avg. loss: 5.145


 49%|████▊     | 24307/50000 [36:21<1:02:17,  6.87it/s]

Epochs: 24303 | epoch avg. loss: 0.062 | test avg. loss: 5.286
Epochs: 24304 | epoch avg. loss: 0.042 | test avg. loss: 5.547
Epochs: 24305 | epoch avg. loss: 0.092 | test avg. loss: 5.110
Epochs: 24306 | epoch avg. loss: 0.091 | test avg. loss: 5.067


 49%|████▊     | 24311/50000 [36:21<44:34,  9.60it/s]

Epochs: 24307 | epoch avg. loss: 0.031 | test avg. loss: 5.271
Epochs: 24308 | epoch avg. loss: 0.043 | test avg. loss: 5.071
Epochs: 24309 | epoch avg. loss: 0.089 | test avg. loss: 5.074
Epochs: 24310 | epoch avg. loss: 0.033 | test avg. loss: 5.346


 49%|████▊     | 24315/50000 [36:21<36:14, 11.81it/s]

Epochs: 24311 | epoch avg. loss: 0.053 | test avg. loss: 5.105
Epochs: 24312 | epoch avg. loss: 0.108 | test avg. loss: 5.187
Epochs: 24313 | epoch avg. loss: 0.098 | test avg. loss: 5.437
Epochs: 24314 | epoch avg. loss: 0.075 | test avg. loss: 5.369


 49%|████▊     | 24317/50000 [36:22<35:05, 12.20it/s]

Epochs: 24315 | epoch avg. loss: 0.251 | test avg. loss: 5.250
Epochs: 24316 | epoch avg. loss: 0.043 | test avg. loss: 5.296
Epochs: 24317 | epoch avg. loss: 0.058 | test avg. loss: 5.030


 49%|████▊     | 24321/50000 [36:22<32:22, 13.22it/s]

Epochs: 24318 | epoch avg. loss: 0.058 | test avg. loss: 5.204
Epochs: 24319 | epoch avg. loss: 0.045 | test avg. loss: 5.449
Epochs: 24320 | epoch avg. loss: 0.043 | test avg. loss: 5.338
Epochs: 24321 | epoch avg. loss: 0.061 | test avg. loss: 5.411


 49%|████▊     | 24325/50000 [36:22<30:01, 14.25it/s]

Epochs: 24322 | epoch avg. loss: 0.070 | test avg. loss: 5.399
Epochs: 24323 | epoch avg. loss: 0.058 | test avg. loss: 5.048
Epochs: 24324 | epoch avg. loss: 0.237 | test avg. loss: 5.004
Epochs: 24325 | epoch avg. loss: 0.057 | test avg. loss: 5.449


 49%|████▊     | 24329/50000 [36:22<28:36, 14.96it/s]

Epochs: 24326 | epoch avg. loss: 0.153 | test avg. loss: 5.314
Epochs: 24327 | epoch avg. loss: 0.036 | test avg. loss: 5.340
Epochs: 24328 | epoch avg. loss: 0.062 | test avg. loss: 5.626
Epochs: 24329 | epoch avg. loss: 0.165 | test avg. loss: 5.370


 49%|████▊     | 24333/50000 [36:23<29:35, 14.46it/s]

Epochs: 24330 | epoch avg. loss: 0.055 | test avg. loss: 5.108
Epochs: 24331 | epoch avg. loss: 0.174 | test avg. loss: 5.270
Epochs: 24332 | epoch avg. loss: 0.120 | test avg. loss: 5.532


 49%|████▊     | 24337/50000 [36:23<29:16, 14.61it/s]

Epochs: 24333 | epoch avg. loss: 0.087 | test avg. loss: 5.354
Epochs: 24334 | epoch avg. loss: 0.161 | test avg. loss: 5.388
Epochs: 24335 | epoch avg. loss: 0.059 | test avg. loss: 5.381
Epochs: 24336 | epoch avg. loss: 0.034 | test avg. loss: 5.273


 49%|████▊     | 24341/50000 [36:23<27:55, 15.31it/s]

Epochs: 24337 | epoch avg. loss: 0.017 | test avg. loss: 5.308
Epochs: 24338 | epoch avg. loss: 0.015 | test avg. loss: 5.268
Epochs: 24339 | epoch avg. loss: 0.014 | test avg. loss: 5.171
Epochs: 24340 | epoch avg. loss: 0.053 | test avg. loss: 5.314


 49%|████▊     | 24345/50000 [36:23<27:09, 15.75it/s]

Epochs: 24341 | epoch avg. loss: 0.063 | test avg. loss: 5.385
Epochs: 24342 | epoch avg. loss: 0.033 | test avg. loss: 5.270
Epochs: 24343 | epoch avg. loss: 0.023 | test avg. loss: 5.452
Epochs: 24344 | epoch avg. loss: 0.051 | test avg. loss: 5.235


 49%|████▊     | 24349/50000 [36:24<27:47, 15.39it/s]

Epochs: 24345 | epoch avg. loss: 0.046 | test avg. loss: 5.148
Epochs: 24346 | epoch avg. loss: 0.100 | test avg. loss: 5.353
Epochs: 24347 | epoch avg. loss: 0.107 | test avg. loss: 5.298
Epochs: 24348 | epoch avg. loss: 0.034 | test avg. loss: 5.334


 49%|████▊     | 24353/50000 [36:24<27:47, 15.38it/s]

Epochs: 24349 | epoch avg. loss: 0.050 | test avg. loss: 5.659
Epochs: 24350 | epoch avg. loss: 0.116 | test avg. loss: 5.378
Epochs: 24351 | epoch avg. loss: 0.055 | test avg. loss: 5.332
Epochs: 24352 | epoch avg. loss: 0.061 | test avg. loss: 5.300


 49%|████▊     | 24357/50000 [36:24<27:11, 15.71it/s]

Epochs: 24353 | epoch avg. loss: 0.057 | test avg. loss: 4.971
Epochs: 24354 | epoch avg. loss: 0.099 | test avg. loss: 5.196
Epochs: 24355 | epoch avg. loss: 0.077 | test avg. loss: 5.024
Epochs: 24356 | epoch avg. loss: 0.028 | test avg. loss: 4.878




Epochs: 24357 | epoch avg. loss: 0.051 | test avg. loss: 4.972
Epochs: 24358 | epoch avg. loss: 0.020 | test avg. loss: 4.937
Epochs: 24359 | epoch avg. loss: 0.013 | test avg. loss: 4.978


 49%|████▊     | 24363/50000 [36:24<28:39, 14.91it/s]

Epochs: 24360 | epoch avg. loss: 0.010 | test avg. loss: 5.071
Epochs: 24361 | epoch avg. loss: 0.010 | test avg. loss: 4.971
Epochs: 24362 | epoch avg. loss: 0.025 | test avg. loss: 4.929


 49%|████▊     | 24365/50000 [36:25<29:50, 14.32it/s]

Epochs: 24363 | epoch avg. loss: 0.009 | test avg. loss: 4.930
Epochs: 24364 | epoch avg. loss: 0.013 | test avg. loss: 4.853
Epochs: 24365 | epoch avg. loss: 0.038 | test avg. loss: 4.979


 49%|████▊     | 24369/50000 [36:25<30:10, 14.16it/s]

Epochs: 24366 | epoch avg. loss: 0.009 | test avg. loss: 5.105
Epochs: 24367 | epoch avg. loss: 0.016 | test avg. loss: 4.981
Epochs: 24368 | epoch avg. loss: 0.027 | test avg. loss: 4.893


 49%|████▊     | 24371/50000 [36:25<30:12, 14.14it/s]

Epochs: 24369 | epoch avg. loss: 0.013 | test avg. loss: 4.980
Epochs: 24370 | epoch avg. loss: 0.025 | test avg. loss: 4.929
Epochs: 24371 | epoch avg. loss: 0.008 | test avg. loss: 4.965


 49%|████▉     | 24375/50000 [36:25<31:38, 13.50it/s]

Epochs: 24372 | epoch avg. loss: 0.012 | test avg. loss: 5.306
Epochs: 24373 | epoch avg. loss: 0.112 | test avg. loss: 5.054
Epochs: 24374 | epoch avg. loss: 0.019 | test avg. loss: 4.869


 49%|████▉     | 24377/50000 [36:26<33:40, 12.68it/s]

Epochs: 24375 | epoch avg. loss: 0.036 | test avg. loss: 5.044
Epochs: 24376 | epoch avg. loss: 0.060 | test avg. loss: 4.926
Epochs: 24377 | epoch avg. loss: 0.022 | test avg. loss: 4.909


 49%|████▉     | 24381/50000 [36:26<35:38, 11.98it/s]

Epochs: 24378 | epoch avg. loss: 0.052 | test avg. loss: 5.085
Epochs: 24379 | epoch avg. loss: 0.053 | test avg. loss: 5.115
Epochs: 24380 | epoch avg. loss: 0.032 | test avg. loss: 4.976


 49%|████▉     | 24383/50000 [36:26<35:13, 12.12it/s]

Epochs: 24381 | epoch avg. loss: 0.041 | test avg. loss: 5.031
Epochs: 24382 | epoch avg. loss: 0.015 | test avg. loss: 5.205
Epochs: 24383 | epoch avg. loss: 0.047 | test avg. loss: 5.010


 49%|████▉     | 24387/50000 [36:26<35:02, 12.18it/s]

Epochs: 24384 | epoch avg. loss: 0.045 | test avg. loss: 4.994
Epochs: 24385 | epoch avg. loss: 0.090 | test avg. loss: 5.210
Epochs: 24386 | epoch avg. loss: 0.092 | test avg. loss: 5.136


 49%|████▉     | 24391/50000 [36:27<32:10, 13.27it/s]

Epochs: 24387 | epoch avg. loss: 0.035 | test avg. loss: 4.911
Epochs: 24388 | epoch avg. loss: 0.057 | test avg. loss: 4.872
Epochs: 24389 | epoch avg. loss: 0.027 | test avg. loss: 4.987
Epochs: 24390 | epoch avg. loss: 0.046 | test avg. loss: 4.866


 49%|████▉     | 24393/50000 [36:27<32:21, 13.19it/s]

Epochs: 24391 | epoch avg. loss: 0.046 | test avg. loss: 5.094
Epochs: 24392 | epoch avg. loss: 0.026 | test avg. loss: 5.200
Epochs: 24393 | epoch avg. loss: 0.033 | test avg. loss: 4.977


 49%|████▉     | 24397/50000 [36:27<34:32, 12.35it/s]

Epochs: 24394 | epoch avg. loss: 0.059 | test avg. loss: 4.897
Epochs: 24395 | epoch avg. loss: 0.041 | test avg. loss: 4.985
Epochs: 24396 | epoch avg. loss: 0.030 | test avg. loss: 4.906


 49%|████▉     | 24399/50000 [36:27<34:01, 12.54it/s]

Epochs: 24397 | epoch avg. loss: 0.019 | test avg. loss: 4.859
Epochs: 24398 | epoch avg. loss: 0.074 | test avg. loss: 5.019
Epochs: 24399 | epoch avg. loss: 0.027 | test avg. loss: 5.049


 49%|████▉     | 24403/50000 [36:29<1:51:51,  3.81it/s]

Epochs: 24400 | epoch avg. loss: 0.017 | test avg. loss: 4.996
Epochs: 24401 | epoch avg. loss: 0.010 | test avg. loss: 5.035
Epochs: 24402 | epoch avg. loss: 0.013 | test avg. loss: 4.932




Epochs: 24403 | epoch avg. loss: 0.021 | test avg. loss: 4.945
Epochs: 24404 | epoch avg. loss: 0.032 | test avg. loss: 5.204
Epochs: 24405 | epoch avg. loss: 0.038 | test avg. loss: 5.193


 49%|████▉     | 24409/50000 [36:30<58:13,  7.33it/s]  

Epochs: 24406 | epoch avg. loss: 0.014 | test avg. loss: 5.051
Epochs: 24407 | epoch avg. loss: 0.022 | test avg. loss: 5.128
Epochs: 24408 | epoch avg. loss: 0.055 | test avg. loss: 5.059


 49%|████▉     | 24413/50000 [36:30<42:32, 10.02it/s]

Epochs: 24409 | epoch avg. loss: 0.025 | test avg. loss: 4.948
Epochs: 24410 | epoch avg. loss: 0.039 | test avg. loss: 5.018
Epochs: 24411 | epoch avg. loss: 0.014 | test avg. loss: 5.182
Epochs: 24412 | epoch avg. loss: 0.044 | test avg. loss: 5.024


 49%|████▉     | 24415/50000 [36:30<40:20, 10.57it/s]

Epochs: 24413 | epoch avg. loss: 0.044 | test avg. loss: 4.968
Epochs: 24414 | epoch avg. loss: 0.047 | test avg. loss: 5.193
Epochs: 24415 | epoch avg. loss: 0.071 | test avg. loss: 5.049


 49%|████▉     | 24419/50000 [36:31<41:24, 10.30it/s]

Epochs: 24416 | epoch avg. loss: 0.031 | test avg. loss: 5.042
Epochs: 24417 | epoch avg. loss: 0.053 | test avg. loss: 5.239
Epochs: 24418 | epoch avg. loss: 0.053 | test avg. loss: 5.190


 49%|████▉     | 24421/50000 [36:31<41:28, 10.28it/s]

Epochs: 24419 | epoch avg. loss: 0.027 | test avg. loss: 5.012
Epochs: 24420 | epoch avg. loss: 0.019 | test avg. loss: 4.964
Epochs: 24421 | epoch avg. loss: 0.029 | test avg. loss: 4.960


 49%|████▉     | 24425/50000 [36:31<37:07, 11.48it/s]

Epochs: 24422 | epoch avg. loss: 0.029 | test avg. loss: 5.062
Epochs: 24423 | epoch avg. loss: 0.010 | test avg. loss: 5.076
Epochs: 24424 | epoch avg. loss: 0.014 | test avg. loss: 4.981


 49%|████▉     | 24427/50000 [36:31<36:59, 11.52it/s]

Epochs: 24425 | epoch avg. loss: 0.036 | test avg. loss: 5.028
Epochs: 24426 | epoch avg. loss: 0.015 | test avg. loss: 5.113
Epochs: 24427 | epoch avg. loss: 0.017 | test avg. loss: 5.006


 49%|████▉     | 24431/50000 [36:32<38:39, 11.02it/s]

Epochs: 24428 | epoch avg. loss: 0.057 | test avg. loss: 5.099
Epochs: 24429 | epoch avg. loss: 0.020 | test avg. loss: 5.190
Epochs: 24430 | epoch avg. loss: 0.020 | test avg. loss: 5.043


 49%|████▉     | 24433/50000 [36:32<37:10, 11.46it/s]

Epochs: 24431 | epoch avg. loss: 0.070 | test avg. loss: 5.058
Epochs: 24432 | epoch avg. loss: 0.020 | test avg. loss: 5.194
Epochs: 24433 | epoch avg. loss: 0.038 | test avg. loss: 5.008


                                                     

Epochs: 24434 | epoch avg. loss: 0.030 | test avg. loss: 5.015
Epochs: 24435 | epoch avg. loss: 0.018 | test avg. loss: 5.141
Epochs: 24436 | epoch avg. loss: 0.027 | test avg. loss: 5.002


 49%|████▉     | 24439/50000 [36:32<33:26, 12.74it/s]

Epochs: 24437 | epoch avg. loss: 0.013 | test avg. loss: 5.029
Epochs: 24438 | epoch avg. loss: 0.010 | test avg. loss: 5.166
Epochs: 24439 | epoch avg. loss: 0.020 | test avg. loss: 5.044


 49%|████▉     | 24441/50000 [36:33<34:59, 12.17it/s]

Epochs: 24440 | epoch avg. loss: 0.032 | test avg. loss: 5.039
Epochs: 24441 | epoch avg. loss: 0.075 | test avg. loss: 5.096


 49%|████▉     | 24445/50000 [36:33<38:23, 11.10it/s]

Epochs: 24442 | epoch avg. loss: 0.018 | test avg. loss: 5.083
Epochs: 24443 | epoch avg. loss: 0.016 | test avg. loss: 4.962
Epochs: 24444 | epoch avg. loss: 0.010 | test avg. loss: 5.044


 49%|████▉     | 24447/50000 [36:33<38:13, 11.14it/s]

Epochs: 24445 | epoch avg. loss: 0.006 | test avg. loss: 5.072
Epochs: 24446 | epoch avg. loss: 0.005 | test avg. loss: 5.095
Epochs: 24447 | epoch avg. loss: 0.006 | test avg. loss: 5.117


 49%|████▉     | 24451/50000 [36:33<36:53, 11.54it/s]

Epochs: 24448 | epoch avg. loss: 0.008 | test avg. loss: 5.093
Epochs: 24449 | epoch avg. loss: 0.007 | test avg. loss: 5.003
Epochs: 24450 | epoch avg. loss: 0.008 | test avg. loss: 5.007


 49%|████▉     | 24453/50000 [36:34<37:40, 11.30it/s]

Epochs: 24451 | epoch avg. loss: 0.007 | test avg. loss: 5.055
Epochs: 24452 | epoch avg. loss: 0.006 | test avg. loss: 5.042
Epochs: 24453 | epoch avg. loss: 0.005 | test avg. loss: 5.049


 49%|████▉     | 24457/50000 [36:34<33:31, 12.70it/s]

Epochs: 24454 | epoch avg. loss: 0.005 | test avg. loss: 5.070
Epochs: 24455 | epoch avg. loss: 0.005 | test avg. loss: 5.117
Epochs: 24456 | epoch avg. loss: 0.013 | test avg. loss: 5.056


 49%|████▉     | 24459/50000 [36:34<32:18, 13.18it/s]

Epochs: 24457 | epoch avg. loss: 0.005 | test avg. loss: 5.032
Epochs: 24458 | epoch avg. loss: 0.005 | test avg. loss: 5.085
Epochs: 24459 | epoch avg. loss: 0.005 | test avg. loss: 5.092


 49%|████▉     | 24463/50000 [36:34<32:13, 13.21it/s]

Epochs: 24460 | epoch avg. loss: 0.004 | test avg. loss: 5.105
Epochs: 24461 | epoch avg. loss: 0.004 | test avg. loss: 5.075
Epochs: 24462 | epoch avg. loss: 0.005 | test avg. loss: 5.056


 49%|████▉     | 24465/50000 [36:35<34:23, 12.37it/s]

Epochs: 24463 | epoch avg. loss: 0.004 | test avg. loss: 5.102
Epochs: 24464 | epoch avg. loss: 0.021 | test avg. loss: 5.096
Epochs: 24465 | epoch avg. loss: 0.011 | test avg. loss: 4.988


 49%|████▉     | 24469/50000 [36:35<32:42, 13.01it/s]

Epochs: 24466 | epoch avg. loss: 0.019 | test avg. loss: 5.024
Epochs: 24467 | epoch avg. loss: 0.007 | test avg. loss: 5.141
Epochs: 24468 | epoch avg. loss: 0.019 | test avg. loss: 5.124


                                                     

Epochs: 24469 | epoch avg. loss: 0.009 | test avg. loss: 5.107
Epochs: 24470 | epoch avg. loss: 0.010 | test avg. loss: 5.189
Epochs: 24471 | epoch avg. loss: 0.070 | test avg. loss: 4.948


 49%|████▉     | 24475/50000 [36:35<30:17, 14.04it/s]

Epochs: 24472 | epoch avg. loss: 0.024 | test avg. loss: 4.865
Epochs: 24473 | epoch avg. loss: 0.061 | test avg. loss: 5.076
Epochs: 24474 | epoch avg. loss: 0.033 | test avg. loss: 5.226


 49%|████▉     | 24477/50000 [36:35<32:49, 12.96it/s]

Epochs: 24475 | epoch avg. loss: 0.031 | test avg. loss: 5.072
Epochs: 24476 | epoch avg. loss: 0.038 | test avg. loss: 5.059
Epochs: 24477 | epoch avg. loss: 0.023 | test avg. loss: 5.412


 49%|████▉     | 24481/50000 [36:36<32:55, 12.92it/s]

Epochs: 24478 | epoch avg. loss: 0.192 | test avg. loss: 5.173
Epochs: 24479 | epoch avg. loss: 0.031 | test avg. loss: 5.090
Epochs: 24480 | epoch avg. loss: 0.073 | test avg. loss: 5.285


 49%|████▉     | 24483/50000 [36:36<32:31, 13.08it/s]

Epochs: 24481 | epoch avg. loss: 0.071 | test avg. loss: 5.413
Epochs: 24482 | epoch avg. loss: 0.064 | test avg. loss: 5.083
Epochs: 24483 | epoch avg. loss: 0.100 | test avg. loss: 5.014


 49%|████▉     | 24487/50000 [36:36<33:57, 12.52it/s]

Epochs: 24484 | epoch avg. loss: 0.029 | test avg. loss: 5.273
Epochs: 24485 | epoch avg. loss: 0.090 | test avg. loss: 5.060
Epochs: 24486 | epoch avg. loss: 0.017 | test avg. loss: 5.054


 49%|████▉     | 24489/50000 [36:36<33:20, 12.75it/s]

Epochs: 24487 | epoch avg. loss: 0.048 | test avg. loss: 5.214
Epochs: 24488 | epoch avg. loss: 0.011 | test avg. loss: 5.323
Epochs: 24489 | epoch avg. loss: 0.011 | test avg. loss: 5.188


 49%|████▉     | 24493/50000 [36:37<34:02, 12.49it/s]

Epochs: 24490 | epoch avg. loss: 0.024 | test avg. loss: 5.101
Epochs: 24491 | epoch avg. loss: 0.009 | test avg. loss: 5.111
Epochs: 24492 | epoch avg. loss: 0.021 | test avg. loss: 4.992


 49%|████▉     | 24495/50000 [36:37<33:24, 12.72it/s]

Epochs: 24493 | epoch avg. loss: 0.020 | test avg. loss: 5.000
Epochs: 24494 | epoch avg. loss: 0.030 | test avg. loss: 5.141
Epochs: 24495 | epoch avg. loss: 0.007 | test avg. loss: 5.174


 49%|████▉     | 24499/50000 [36:37<31:38, 13.43it/s]

Epochs: 24496 | epoch avg. loss: 0.012 | test avg. loss: 5.121
Epochs: 24497 | epoch avg. loss: 0.012 | test avg. loss: 5.141
Epochs: 24498 | epoch avg. loss: 0.015 | test avg. loss: 5.112


 49%|████▉     | 24499/50000 [36:37<31:38, 13.43it/s]

Epochs: 24499 | epoch avg. loss: 0.011 | test avg. loss: 5.062


 49%|████▉     | 24503/50000 [36:39<1:40:16,  4.24it/s]

Epochs: 24500 | epoch avg. loss: 0.008 | test avg. loss: 5.094
Epochs: 24501 | epoch avg. loss: 0.007 | test avg. loss: 5.152
Epochs: 24502 | epoch avg. loss: 0.008 | test avg. loss: 5.162


 49%|████▉     | 24505/50000 [36:39<1:19:30,  5.34it/s]

Epochs: 24503 | epoch avg. loss: 0.008 | test avg. loss: 5.100
Epochs: 24504 | epoch avg. loss: 0.007 | test avg. loss: 5.059
Epochs: 24505 | epoch avg. loss: 0.006 | test avg. loss: 5.130


 49%|████▉     | 24509/50000 [36:39<54:00,  7.87it/s]  

Epochs: 24506 | epoch avg. loss: 0.020 | test avg. loss: 5.121
Epochs: 24507 | epoch avg. loss: 0.014 | test avg. loss: 5.094
Epochs: 24508 | epoch avg. loss: 0.014 | test avg. loss: 5.107


 49%|████▉     | 24511/50000 [36:40<48:03,  8.84it/s]

Epochs: 24509 | epoch avg. loss: 0.013 | test avg. loss: 5.268
Epochs: 24510 | epoch avg. loss: 0.046 | test avg. loss: 5.029
Epochs: 24511 | epoch avg. loss: 0.017 | test avg. loss: 4.912


 49%|████▉     | 24515/50000 [36:40<41:38, 10.20it/s]

Epochs: 24512 | epoch avg. loss: 0.055 | test avg. loss: 5.022
Epochs: 24513 | epoch avg. loss: 0.021 | test avg. loss: 5.176
Epochs: 24514 | epoch avg. loss: 0.030 | test avg. loss: 5.046


 49%|████▉     | 24517/50000 [36:40<38:15, 11.10it/s]

Epochs: 24515 | epoch avg. loss: 0.072 | test avg. loss: 5.060
Epochs: 24516 | epoch avg. loss: 0.024 | test avg. loss: 5.296
Epochs: 24517 | epoch avg. loss: 0.079 | test avg. loss: 4.973
Epochs: 24518 | epoch avg. loss: 0.052 | test avg. loss: 4.955


 49%|████▉     | 24521/50000 [36:40<33:46, 12.57it/s]

Epochs: 24519 | epoch avg. loss: 0.054 | test avg. loss: 5.252
Epochs: 24520 | epoch avg. loss: 0.075 | test avg. loss: 5.193
Epochs: 24521 | epoch avg. loss: 0.050 | test avg. loss: 5.121


 49%|████▉     | 24525/50000 [36:41<36:20, 11.68it/s]

Epochs: 24522 | epoch avg. loss: 0.049 | test avg. loss: 5.290
Epochs: 24523 | epoch avg. loss: 0.107 | test avg. loss: 5.029
Epochs: 24524 | epoch avg. loss: 0.041 | test avg. loss: 4.888


 49%|████▉     | 24527/50000 [36:41<34:25, 12.33it/s]

Epochs: 24525 | epoch avg. loss: 0.102 | test avg. loss: 5.048
Epochs: 24526 | epoch avg. loss: 0.027 | test avg. loss: 5.375
Epochs: 24527 | epoch avg. loss: 0.059 | test avg. loss: 5.064


 49%|████▉     | 24531/50000 [36:41<31:55, 13.30it/s]

Epochs: 24528 | epoch avg. loss: 0.186 | test avg. loss: 5.129
Epochs: 24529 | epoch avg. loss: 0.182 | test avg. loss: 5.341
Epochs: 24530 | epoch avg. loss: 0.286 | test avg. loss: 5.469
Epochs: 24531 | epoch avg. loss: 0.192 | test avg. loss: 5.005


 49%|████▉     | 24535/50000 [36:41<32:09, 13.20it/s]

Epochs: 24532 | epoch avg. loss: 0.134 | test avg. loss: 5.113
Epochs: 24533 | epoch avg. loss: 0.068 | test avg. loss: 5.324
Epochs: 24534 | epoch avg. loss: 0.088 | test avg. loss: 5.348


 49%|████▉     | 24537/50000 [36:42<33:04, 12.83it/s]

Epochs: 24535 | epoch avg. loss: 0.039 | test avg. loss: 5.151
Epochs: 24536 | epoch avg. loss: 0.115 | test avg. loss: 5.128
Epochs: 24537 | epoch avg. loss: 0.029 | test avg. loss: 5.369


 49%|████▉     | 24541/50000 [36:42<32:07, 13.21it/s]

Epochs: 24538 | epoch avg. loss: 0.086 | test avg. loss: 5.135
Epochs: 24539 | epoch avg. loss: 0.033 | test avg. loss: 5.055
Epochs: 24540 | epoch avg. loss: 0.078 | test avg. loss: 5.261


 49%|████▉     | 24543/50000 [36:42<30:41, 13.82it/s]

Epochs: 24541 | epoch avg. loss: 0.053 | test avg. loss: 5.416
Epochs: 24542 | epoch avg. loss: 0.096 | test avg. loss: 5.137
Epochs: 24543 | epoch avg. loss: 0.026 | test avg. loss: 5.083


 49%|████▉     | 24547/50000 [36:42<30:14, 14.03it/s]

Epochs: 24544 | epoch avg. loss: 0.103 | test avg. loss: 5.273
Epochs: 24545 | epoch avg. loss: 0.075 | test avg. loss: 5.363
Epochs: 24546 | epoch avg. loss: 0.079 | test avg. loss: 5.250
Epochs: 24547 | epoch avg. loss: 0.014 | test avg. loss: 5.256


 49%|████▉     | 24551/50000 [36:43<32:44, 12.96it/s]

Epochs: 24548 | epoch avg. loss: 0.033 | test avg. loss: 5.446
Epochs: 24549 | epoch avg. loss: 0.110 | test avg. loss: 5.299
Epochs: 24550 | epoch avg. loss: 0.103 | test avg. loss: 5.200


 49%|████▉     | 24555/50000 [36:43<30:31, 13.90it/s]

Epochs: 24551 | epoch avg. loss: 0.027 | test avg. loss: 5.228
Epochs: 24552 | epoch avg. loss: 0.057 | test avg. loss: 5.361
Epochs: 24553 | epoch avg. loss: 0.023 | test avg. loss: 5.402
Epochs: 24554 | epoch avg. loss: 0.012 | test avg. loss: 5.264


 49%|████▉     | 24557/50000 [36:43<29:34, 14.34it/s]

Epochs: 24555 | epoch avg. loss: 0.013 | test avg. loss: 5.179
Epochs: 24556 | epoch avg. loss: 0.031 | test avg. loss: 5.178
Epochs: 24557 | epoch avg. loss: 0.015 | test avg. loss: 5.145
Epochs: 24558 | epoch avg. loss: 0.023 | test avg. loss: 5.335


 49%|████▉     | 24563/50000 [36:43<28:07, 15.08it/s]

Epochs: 24559 | epoch avg. loss: 0.039 | test avg. loss: 5.487
Epochs: 24560 | epoch avg. loss: 0.054 | test avg. loss: 5.150
Epochs: 24561 | epoch avg. loss: 0.042 | test avg. loss: 5.119
Epochs: 24562 | epoch avg. loss: 0.035 | test avg. loss: 5.473


 49%|████▉     | 24565/50000 [36:44<32:38, 12.99it/s]

Epochs: 24563 | epoch avg. loss: 0.128 | test avg. loss: 5.238
Epochs: 24564 | epoch avg. loss: 0.087 | test avg. loss: 5.354
Epochs: 24565 | epoch avg. loss: 0.295 | test avg. loss: 5.215


 49%|████▉     | 24569/50000 [36:44<34:30, 12.28it/s]

Epochs: 24566 | epoch avg. loss: 0.134 | test avg. loss: 6.133
Epochs: 24567 | epoch avg. loss: 0.490 | test avg. loss: 4.979
Epochs: 24568 | epoch avg. loss: 0.612 | test avg. loss: 5.386


 49%|████▉     | 24571/50000 [36:44<33:20, 12.71it/s]

Epochs: 24569 | epoch avg. loss: 0.506 | test avg. loss: 6.254
Epochs: 24570 | epoch avg. loss: 0.959 | test avg. loss: 7.857
Epochs: 24571 | epoch avg. loss: 1.196 | test avg. loss: 6.865


 49%|████▉     | 24575/50000 [36:44<36:47, 11.52it/s]

Epochs: 24572 | epoch avg. loss: 2.885 | test avg. loss: 5.020
Epochs: 24573 | epoch avg. loss: 0.751 | test avg. loss: 7.531
Epochs: 24574 | epoch avg. loss: 1.465 | test avg. loss: 6.258


 49%|████▉     | 24577/50000 [36:45<36:24, 11.64it/s]

Epochs: 24575 | epoch avg. loss: 1.087 | test avg. loss: 6.099
Epochs: 24576 | epoch avg. loss: 0.486 | test avg. loss: 6.676
Epochs: 24577 | epoch avg. loss: 0.508 | test avg. loss: 5.251


 49%|████▉     | 24581/50000 [36:45<38:26, 11.02it/s]

Epochs: 24578 | epoch avg. loss: 0.329 | test avg. loss: 5.672
Epochs: 24579 | epoch avg. loss: 0.539 | test avg. loss: 6.489
Epochs: 24580 | epoch avg. loss: 0.538 | test avg. loss: 6.719


 49%|████▉     | 24583/50000 [36:45<37:54, 11.17it/s]

Epochs: 24581 | epoch avg. loss: 1.764 | test avg. loss: 5.842
Epochs: 24582 | epoch avg. loss: 0.391 | test avg. loss: 5.591
Epochs: 24583 | epoch avg. loss: 0.336 | test avg. loss: 5.318


 49%|████▉     | 24587/50000 [36:45<36:46, 11.52it/s]

Epochs: 24584 | epoch avg. loss: 0.586 | test avg. loss: 5.899
Epochs: 24585 | epoch avg. loss: 0.971 | test avg. loss: 6.330
Epochs: 24586 | epoch avg. loss: 0.545 | test avg. loss: 5.761


 49%|████▉     | 24589/50000 [36:46<37:38, 11.25it/s]

Epochs: 24587 | epoch avg. loss: 0.307 | test avg. loss: 6.581
Epochs: 24588 | epoch avg. loss: 0.366 | test avg. loss: 5.958
Epochs: 24589 | epoch avg. loss: 0.725 | test avg. loss: 5.927
Epochs: 24590 | epoch avg. loss: 0.487 | test avg. loss: 5.606


 49%|████▉     | 24593/50000 [36:46<34:09, 12.39it/s]

Epochs: 24591 | epoch avg. loss: 0.477 | test avg. loss: 5.318
Epochs: 24592 | epoch avg. loss: 0.515 | test avg. loss: 6.619
Epochs: 24593 | epoch avg. loss: 0.477 | test avg. loss: 6.303


 49%|████▉     | 24597/50000 [36:46<35:36, 11.89it/s]

Epochs: 24594 | epoch avg. loss: 0.254 | test avg. loss: 5.991
Epochs: 24595 | epoch avg. loss: 0.202 | test avg. loss: 6.282
Epochs: 24596 | epoch avg. loss: 0.408 | test avg. loss: 5.220


 49%|████▉     | 24599/50000 [36:47<36:27, 11.61it/s]

Epochs: 24597 | epoch avg. loss: 0.481 | test avg. loss: 5.507
Epochs: 24598 | epoch avg. loss: 0.166 | test avg. loss: 5.703
Epochs: 24599 | epoch avg. loss: 0.383 | test avg. loss: 6.096


 49%|████▉     | 24603/50000 [36:48<1:45:10,  4.02it/s]

Epochs: 24600 | epoch avg. loss: 0.430 | test avg. loss: 7.688
Epochs: 24601 | epoch avg. loss: 0.805 | test avg. loss: 6.444
Epochs: 24602 | epoch avg. loss: 1.270 | test avg. loss: 6.555


 49%|████▉     | 24605/50000 [36:49<1:24:05,  5.03it/s]

Epochs: 24603 | epoch avg. loss: 1.180 | test avg. loss: 5.070
Epochs: 24604 | epoch avg. loss: 0.645 | test avg. loss: 5.215
Epochs: 24605 | epoch avg. loss: 0.559 | test avg. loss: 7.061


 49%|████▉     | 24609/50000 [36:49<56:13,  7.53it/s]  

Epochs: 24606 | epoch avg. loss: 0.860 | test avg. loss: 6.912
Epochs: 24607 | epoch avg. loss: 1.285 | test avg. loss: 8.499
Epochs: 24608 | epoch avg. loss: 1.237 | test avg. loss: 6.063


 49%|████▉     | 24611/50000 [36:49<50:39,  8.35it/s]

Epochs: 24609 | epoch avg. loss: 0.483 | test avg. loss: 6.126
Epochs: 24610 | epoch avg. loss: 0.377 | test avg. loss: 5.427
Epochs: 24611 | epoch avg. loss: 0.359 | test avg. loss: 5.340


 49%|████▉     | 24615/50000 [36:49<42:11, 10.03it/s]

Epochs: 24612 | epoch avg. loss: 0.300 | test avg. loss: 6.148
Epochs: 24613 | epoch avg. loss: 0.337 | test avg. loss: 5.917
Epochs: 24614 | epoch avg. loss: 0.472 | test avg. loss: 6.648


 49%|████▉     | 24617/50000 [36:50<42:22,  9.99it/s]

Epochs: 24615 | epoch avg. loss: 0.356 | test avg. loss: 5.522
Epochs: 24616 | epoch avg. loss: 0.224 | test avg. loss: 5.815


 49%|████▉     | 24619/50000 [36:50<42:04, 10.06it/s]

Epochs: 24617 | epoch avg. loss: 0.172 | test avg. loss: 5.447
Epochs: 24618 | epoch avg. loss: 0.198 | test avg. loss: 5.650
Epochs: 24619 | epoch avg. loss: 0.151 | test avg. loss: 5.412


 49%|████▉     | 24623/50000 [36:50<38:48, 10.90it/s]

Epochs: 24620 | epoch avg. loss: 0.107 | test avg. loss: 5.486
Epochs: 24621 | epoch avg. loss: 0.155 | test avg. loss: 5.675
Epochs: 24622 | epoch avg. loss: 0.114 | test avg. loss: 5.446


 49%|████▉     | 24625/50000 [36:50<38:43, 10.92it/s]

Epochs: 24623 | epoch avg. loss: 0.056 | test avg. loss: 5.733
Epochs: 24624 | epoch avg. loss: 0.076 | test avg. loss: 5.513
Epochs: 24625 | epoch avg. loss: 0.076 | test avg. loss: 5.637


 49%|████▉     | 24629/50000 [36:51<38:16, 11.05it/s]

Epochs: 24626 | epoch avg. loss: 0.033 | test avg. loss: 5.671
Epochs: 24627 | epoch avg. loss: 0.033 | test avg. loss: 5.561
Epochs: 24628 | epoch avg. loss: 0.058 | test avg. loss: 5.866


 49%|████▉     | 24631/50000 [36:51<36:22, 11.63it/s]

Epochs: 24629 | epoch avg. loss: 0.093 | test avg. loss: 5.446
Epochs: 24630 | epoch avg. loss: 0.095 | test avg. loss: 5.696
Epochs: 24631 | epoch avg. loss: 0.053 | test avg. loss: 5.512


 49%|████▉     | 24635/50000 [36:51<35:45, 11.82it/s]

Epochs: 24632 | epoch avg. loss: 0.064 | test avg. loss: 5.606
Epochs: 24633 | epoch avg. loss: 0.036 | test avg. loss: 5.670
Epochs: 24634 | epoch avg. loss: 0.030 | test avg. loss: 5.466


 49%|████▉     | 24637/50000 [36:51<35:53, 11.78it/s]

Epochs: 24635 | epoch avg. loss: 0.021 | test avg. loss: 5.456
Epochs: 24636 | epoch avg. loss: 0.021 | test avg. loss: 5.306
Epochs: 24637 | epoch avg. loss: 0.025 | test avg. loss: 5.349


 49%|████▉     | 24641/50000 [36:52<35:57, 11.75it/s]

Epochs: 24638 | epoch avg. loss: 0.011 | test avg. loss: 5.408
Epochs: 24639 | epoch avg. loss: 0.014 | test avg. loss: 5.366
Epochs: 24640 | epoch avg. loss: 0.019 | test avg. loss: 5.498


 49%|████▉     | 24643/50000 [36:52<34:00, 12.43it/s]

Epochs: 24641 | epoch avg. loss: 0.033 | test avg. loss: 5.299
Epochs: 24642 | epoch avg. loss: 0.028 | test avg. loss: 5.416
Epochs: 24643 | epoch avg. loss: 0.030 | test avg. loss: 5.275


 49%|████▉     | 24647/50000 [36:52<31:27, 13.43it/s]

Epochs: 24644 | epoch avg. loss: 0.018 | test avg. loss: 5.389
Epochs: 24645 | epoch avg. loss: 0.020 | test avg. loss: 5.355
Epochs: 24646 | epoch avg. loss: 0.012 | test avg. loss: 5.402


 49%|████▉     | 24649/50000 [36:52<31:01, 13.62it/s]

Epochs: 24647 | epoch avg. loss: 0.023 | test avg. loss: 5.367
Epochs: 24648 | epoch avg. loss: 0.014 | test avg. loss: 5.341
Epochs: 24649 | epoch avg. loss: 0.016 | test avg. loss: 5.446


 49%|████▉     | 24653/50000 [36:52<31:02, 13.61it/s]

Epochs: 24650 | epoch avg. loss: 0.019 | test avg. loss: 5.344
Epochs: 24651 | epoch avg. loss: 0.026 | test avg. loss: 5.380
Epochs: 24652 | epoch avg. loss: 0.009 | test avg. loss: 5.333


 49%|████▉     | 24655/50000 [36:53<33:53, 12.46it/s]

Epochs: 24653 | epoch avg. loss: 0.007 | test avg. loss: 5.313
Epochs: 24654 | epoch avg. loss: 0.009 | test avg. loss: 5.320
Epochs: 24655 | epoch avg. loss: 0.009 | test avg. loss: 5.350




Epochs: 24656 | epoch avg. loss: 0.008 | test avg. loss: 5.309
Epochs: 24657 | epoch avg. loss: 0.011 | test avg. loss: 5.393
Epochs: 24658 | epoch avg. loss: 0.013 | test avg. loss: 5.332


 49%|████▉     | 24661/50000 [36:53<31:20, 13.47it/s]

Epochs: 24659 | epoch avg. loss: 0.015 | test avg. loss: 5.323
Epochs: 24660 | epoch avg. loss: 0.007 | test avg. loss: 5.283
Epochs: 24661 | epoch avg. loss: 0.007 | test avg. loss: 5.304


 49%|████▉     | 24665/50000 [36:53<32:18, 13.07it/s]

Epochs: 24662 | epoch avg. loss: 0.007 | test avg. loss: 5.320
Epochs: 24663 | epoch avg. loss: 0.006 | test avg. loss: 5.375
Epochs: 24664 | epoch avg. loss: 0.007 | test avg. loss: 5.361


 49%|████▉     | 24667/50000 [36:54<34:00, 12.41it/s]

Epochs: 24665 | epoch avg. loss: 0.006 | test avg. loss: 5.367
Epochs: 24666 | epoch avg. loss: 0.006 | test avg. loss: 5.337
Epochs: 24667 | epoch avg. loss: 0.007 | test avg. loss: 5.362


 49%|████▉     | 24671/50000 [36:54<31:04, 13.59it/s]

Epochs: 24668 | epoch avg. loss: 0.006 | test avg. loss: 5.308
Epochs: 24669 | epoch avg. loss: 0.009 | test avg. loss: 5.375
Epochs: 24670 | epoch avg. loss: 0.008 | test avg. loss: 5.282


 49%|████▉     | 24673/50000 [36:54<31:28, 13.41it/s]

Epochs: 24671 | epoch avg. loss: 0.017 | test avg. loss: 5.456
Epochs: 24672 | epoch avg. loss: 0.040 | test avg. loss: 5.273
Epochs: 24673 | epoch avg. loss: 0.036 | test avg. loss: 5.348


 49%|████▉     | 24677/50000 [36:54<32:50, 12.85it/s]

Epochs: 24674 | epoch avg. loss: 0.034 | test avg. loss: 5.380
Epochs: 24675 | epoch avg. loss: 0.032 | test avg. loss: 5.357
Epochs: 24676 | epoch avg. loss: 0.033 | test avg. loss: 5.476


 49%|████▉     | 24679/50000 [36:55<33:24, 12.63it/s]

Epochs: 24677 | epoch avg. loss: 0.025 | test avg. loss: 5.285
Epochs: 24678 | epoch avg. loss: 0.009 | test avg. loss: 5.293
Epochs: 24679 | epoch avg. loss: 0.012 | test avg. loss: 5.256


 49%|████▉     | 24683/50000 [36:55<33:20, 12.65it/s]

Epochs: 24680 | epoch avg. loss: 0.009 | test avg. loss: 5.368
Epochs: 24681 | epoch avg. loss: 0.012 | test avg. loss: 5.354
Epochs: 24682 | epoch avg. loss: 0.007 | test avg. loss: 5.367


 49%|████▉     | 24685/50000 [36:55<32:33, 12.96it/s]

Epochs: 24683 | epoch avg. loss: 0.007 | test avg. loss: 5.276
Epochs: 24684 | epoch avg. loss: 0.007 | test avg. loss: 5.324
Epochs: 24685 | epoch avg. loss: 0.013 | test avg. loss: 5.300


 49%|████▉     | 24689/50000 [36:55<32:27, 12.99it/s]

Epochs: 24686 | epoch avg. loss: 0.009 | test avg. loss: 5.372
Epochs: 24687 | epoch avg. loss: 0.009 | test avg. loss: 5.355
Epochs: 24688 | epoch avg. loss: 0.009 | test avg. loss: 5.343


 49%|████▉     | 24691/50000 [36:55<33:20, 12.65it/s]

Epochs: 24689 | epoch avg. loss: 0.010 | test avg. loss: 5.381
Epochs: 24690 | epoch avg. loss: 0.016 | test avg. loss: 5.258
Epochs: 24691 | epoch avg. loss: 0.043 | test avg. loss: 5.448


 49%|████▉     | 24695/50000 [36:56<33:46, 12.49it/s]

Epochs: 24692 | epoch avg. loss: 0.025 | test avg. loss: 5.306
Epochs: 24693 | epoch avg. loss: 0.019 | test avg. loss: 5.329
Epochs: 24694 | epoch avg. loss: 0.013 | test avg. loss: 5.346


 49%|████▉     | 24697/50000 [36:56<33:10, 12.71it/s]

Epochs: 24695 | epoch avg. loss: 0.014 | test avg. loss: 5.234
Epochs: 24696 | epoch avg. loss: 0.022 | test avg. loss: 5.344
Epochs: 24697 | epoch avg. loss: 0.012 | test avg. loss: 5.344


 49%|████▉     | 24699/50000 [36:56<33:15, 12.68it/s]

Epochs: 24698 | epoch avg. loss: 0.007 | test avg. loss: 5.349
Epochs: 24699 | epoch avg. loss: 0.007 | test avg. loss: 5.344


 49%|████▉     | 24703/50000 [36:58<1:42:57,  4.09it/s]

Epochs: 24700 | epoch avg. loss: 0.007 | test avg. loss: 5.352
Epochs: 24701 | epoch avg. loss: 0.006 | test avg. loss: 5.347
Epochs: 24702 | epoch avg. loss: 0.006 | test avg. loss: 5.349


 49%|████▉     | 24705/50000 [36:58<1:21:35,  5.17it/s]

Epochs: 24703 | epoch avg. loss: 0.005 | test avg. loss: 5.350
Epochs: 24704 | epoch avg. loss: 0.006 | test avg. loss: 5.329
Epochs: 24705 | epoch avg. loss: 0.008 | test avg. loss: 5.355


 49%|████▉     | 24709/50000 [36:58<56:40,  7.44it/s]  

Epochs: 24706 | epoch avg. loss: 0.005 | test avg. loss: 5.349
Epochs: 24707 | epoch avg. loss: 0.006 | test avg. loss: 5.327
Epochs: 24708 | epoch avg. loss: 0.007 | test avg. loss: 5.349


 49%|████▉     | 24711/50000 [36:59<49:36,  8.49it/s]

Epochs: 24709 | epoch avg. loss: 0.005 | test avg. loss: 5.347
Epochs: 24710 | epoch avg. loss: 0.006 | test avg. loss: 5.340
Epochs: 24711 | epoch avg. loss: 0.006 | test avg. loss: 5.349


 49%|████▉     | 24715/50000 [36:59<39:07, 10.77it/s]

Epochs: 24712 | epoch avg. loss: 0.006 | test avg. loss: 5.304
Epochs: 24713 | epoch avg. loss: 0.009 | test avg. loss: 5.383
Epochs: 24714 | epoch avg. loss: 0.016 | test avg. loss: 5.320
Epochs: 24715 | epoch avg. loss: 0.008 | test avg. loss: 5.328


 49%|████▉     | 24719/50000 [36:59<33:43, 12.49it/s]

Epochs: 24716 | epoch avg. loss: 0.005 | test avg. loss: 5.289
Epochs: 24717 | epoch avg. loss: 0.014 | test avg. loss: 5.351
Epochs: 24718 | epoch avg. loss: 0.008 | test avg. loss: 5.354


 49%|████▉     | 24721/50000 [36:59<33:01, 12.75it/s]

Epochs: 24719 | epoch avg. loss: 0.013 | test avg. loss: 5.324
Epochs: 24720 | epoch avg. loss: 0.014 | test avg. loss: 5.412
Epochs: 24721 | epoch avg. loss: 0.020 | test avg. loss: 5.253


 49%|████▉     | 24725/50000 [37:00<31:48, 13.24it/s]

Epochs: 24722 | epoch avg. loss: 0.017 | test avg. loss: 5.305
Epochs: 24723 | epoch avg. loss: 0.012 | test avg. loss: 5.267
Epochs: 24724 | epoch avg. loss: 0.007 | test avg. loss: 5.325


 49%|████▉     | 24727/50000 [37:00<31:00, 13.58it/s]

Epochs: 24725 | epoch avg. loss: 0.006 | test avg. loss: 5.303
Epochs: 24726 | epoch avg. loss: 0.010 | test avg. loss: 5.346
Epochs: 24727 | epoch avg. loss: 0.008 | test avg. loss: 5.380


 49%|████▉     | 24731/50000 [37:00<30:45, 13.69it/s]

Epochs: 24728 | epoch avg. loss: 0.013 | test avg. loss: 5.305
Epochs: 24729 | epoch avg. loss: 0.006 | test avg. loss: 5.355
Epochs: 24730 | epoch avg. loss: 0.015 | test avg. loss: 5.320


 49%|████▉     | 24733/50000 [37:00<30:06, 13.99it/s]

Epochs: 24731 | epoch avg. loss: 0.008 | test avg. loss: 5.330
Epochs: 24732 | epoch avg. loss: 0.008 | test avg. loss: 5.396
Epochs: 24733 | epoch avg. loss: 0.012 | test avg. loss: 5.327


 49%|████▉     | 24737/50000 [37:00<31:05, 13.54it/s]

Epochs: 24734 | epoch avg. loss: 0.009 | test avg. loss: 5.318
Epochs: 24735 | epoch avg. loss: 0.006 | test avg. loss: 5.328
Epochs: 24736 | epoch avg. loss: 0.007 | test avg. loss: 5.319


 49%|████▉     | 24739/50000 [37:01<32:32, 12.94it/s]

Epochs: 24737 | epoch avg. loss: 0.005 | test avg. loss: 5.326
Epochs: 24738 | epoch avg. loss: 0.005 | test avg. loss: 5.323
Epochs: 24739 | epoch avg. loss: 0.007 | test avg. loss: 5.388


 49%|████▉     | 24743/50000 [37:01<31:24, 13.40it/s]

Epochs: 24740 | epoch avg. loss: 0.015 | test avg. loss: 5.271
Epochs: 24741 | epoch avg. loss: 0.015 | test avg. loss: 5.291
Epochs: 24742 | epoch avg. loss: 0.006 | test avg. loss: 5.296


 49%|████▉     | 24745/50000 [37:01<31:20, 13.43it/s]

Epochs: 24743 | epoch avg. loss: 0.006 | test avg. loss: 5.286
Epochs: 24744 | epoch avg. loss: 0.006 | test avg. loss: 5.317
Epochs: 24745 | epoch avg. loss: 0.006 | test avg. loss: 5.266


 49%|████▉     | 24749/50000 [37:01<30:42, 13.70it/s]

Epochs: 24746 | epoch avg. loss: 0.011 | test avg. loss: 5.312
Epochs: 24747 | epoch avg. loss: 0.006 | test avg. loss: 5.313
Epochs: 24748 | epoch avg. loss: 0.007 | test avg. loss: 5.302


 50%|████▉     | 24751/50000 [37:02<33:19, 12.63it/s]

Epochs: 24749 | epoch avg. loss: 0.008 | test avg. loss: 5.360
Epochs: 24750 | epoch avg. loss: 0.008 | test avg. loss: 5.252
Epochs: 24751 | epoch avg. loss: 0.018 | test avg. loss: 5.343


 50%|████▉     | 24755/50000 [37:02<34:02, 12.36it/s]

Epochs: 24752 | epoch avg. loss: 0.010 | test avg. loss: 5.315
Epochs: 24753 | epoch avg. loss: 0.012 | test avg. loss: 5.313
Epochs: 24754 | epoch avg. loss: 0.012 | test avg. loss: 5.433


 50%|████▉     | 24757/50000 [37:02<33:41, 12.48it/s]

Epochs: 24755 | epoch avg. loss: 0.021 | test avg. loss: 5.263
Epochs: 24756 | epoch avg. loss: 0.039 | test avg. loss: 5.309
Epochs: 24757 | epoch avg. loss: 0.015 | test avg. loss: 5.442


 50%|████▉     | 24761/50000 [37:02<32:27, 12.96it/s]

Epochs: 24758 | epoch avg. loss: 0.025 | test avg. loss: 5.268
Epochs: 24759 | epoch avg. loss: 0.028 | test avg. loss: 5.350
Epochs: 24760 | epoch avg. loss: 0.023 | test avg. loss: 5.232


 50%|████▉     | 24763/50000 [37:03<35:14, 11.93it/s]

Epochs: 24761 | epoch avg. loss: 0.008 | test avg. loss: 5.194
Epochs: 24762 | epoch avg. loss: 0.009 | test avg. loss: 5.282
Epochs: 24763 | epoch avg. loss: 0.008 | test avg. loss: 5.319


 50%|████▉     | 24767/50000 [37:03<35:23, 11.88it/s]

Epochs: 24764 | epoch avg. loss: 0.013 | test avg. loss: 5.435
Epochs: 24765 | epoch avg. loss: 0.009 | test avg. loss: 5.384
Epochs: 24766 | epoch avg. loss: 0.010 | test avg. loss: 5.312


 50%|████▉     | 24769/50000 [37:03<34:08, 12.32it/s]

Epochs: 24767 | epoch avg. loss: 0.009 | test avg. loss: 5.369
Epochs: 24768 | epoch avg. loss: 0.015 | test avg. loss: 5.286
Epochs: 24769 | epoch avg. loss: 0.017 | test avg. loss: 5.316


 50%|████▉     | 24773/50000 [37:03<32:04, 13.11it/s]

Epochs: 24770 | epoch avg. loss: 0.012 | test avg. loss: 5.443
Epochs: 24771 | epoch avg. loss: 0.024 | test avg. loss: 5.265
Epochs: 24772 | epoch avg. loss: 0.024 | test avg. loss: 5.348


 50%|████▉     | 24775/50000 [37:03<32:40, 12.87it/s]

Epochs: 24773 | epoch avg. loss: 0.017 | test avg. loss: 5.260
Epochs: 24774 | epoch avg. loss: 0.015 | test avg. loss: 5.200
Epochs: 24775 | epoch avg. loss: 0.026 | test avg. loss: 5.310


 50%|████▉     | 24779/50000 [37:04<34:28, 12.19it/s]

Epochs: 24776 | epoch avg. loss: 0.007 | test avg. loss: 5.259
Epochs: 24777 | epoch avg. loss: 0.020 | test avg. loss: 5.324
Epochs: 24778 | epoch avg. loss: 0.005 | test avg. loss: 5.313


 50%|████▉     | 24781/50000 [37:04<36:37, 11.48it/s]

Epochs: 24779 | epoch avg. loss: 0.008 | test avg. loss: 5.321
Epochs: 24780 | epoch avg. loss: 0.009 | test avg. loss: 5.335


 50%|████▉     | 24783/50000 [37:04<39:22, 10.67it/s]

Epochs: 24781 | epoch avg. loss: 0.009 | test avg. loss: 5.254
Epochs: 24782 | epoch avg. loss: 0.010 | test avg. loss: 5.387
Epochs: 24783 | epoch avg. loss: 0.016 | test avg. loss: 5.297


 50%|████▉     | 24787/50000 [37:05<38:51, 10.81it/s]

Epochs: 24784 | epoch avg. loss: 0.027 | test avg. loss: 5.380
Epochs: 24785 | epoch avg. loss: 0.016 | test avg. loss: 5.460
Epochs: 24786 | epoch avg. loss: 0.019 | test avg. loss: 5.282


 50%|████▉     | 24789/50000 [37:05<38:46, 10.84it/s]

Epochs: 24787 | epoch avg. loss: 0.018 | test avg. loss: 5.292
Epochs: 24788 | epoch avg. loss: 0.013 | test avg. loss: 5.295
Epochs: 24789 | epoch avg. loss: 0.010 | test avg. loss: 5.261


 50%|████▉     | 24793/50000 [37:05<36:17, 11.58it/s]

Epochs: 24790 | epoch avg. loss: 0.014 | test avg. loss: 5.340
Epochs: 24791 | epoch avg. loss: 0.006 | test avg. loss: 5.359
Epochs: 24792 | epoch avg. loss: 0.006 | test avg. loss: 5.372


 50%|████▉     | 24795/50000 [37:05<35:47, 11.74it/s]

Epochs: 24793 | epoch avg. loss: 0.010 | test avg. loss: 5.351
Epochs: 24794 | epoch avg. loss: 0.007 | test avg. loss: 5.276
Epochs: 24795 | epoch avg. loss: 0.009 | test avg. loss: 5.375


 50%|████▉     | 24799/50000 [37:06<37:35, 11.17it/s]

Epochs: 24796 | epoch avg. loss: 0.032 | test avg. loss: 5.345
Epochs: 24797 | epoch avg. loss: 0.016 | test avg. loss: 5.277
Epochs: 24798 | epoch avg. loss: 0.044 | test avg. loss: 5.474


 50%|████▉     | 24799/50000 [37:06<37:35, 11.17it/s]

Epochs: 24799 | epoch avg. loss: 0.037 | test avg. loss: 5.318


 50%|████▉     | 24803/50000 [37:08<1:55:30,  3.64it/s]

Epochs: 24800 | epoch avg. loss: 0.018 | test avg. loss: 5.258
Epochs: 24801 | epoch avg. loss: 0.020 | test avg. loss: 5.371
Epochs: 24802 | epoch avg. loss: 0.016 | test avg. loss: 5.257


 50%|████▉     | 24805/50000 [37:08<1:30:11,  4.66it/s]

Epochs: 24803 | epoch avg. loss: 0.022 | test avg. loss: 5.304
Epochs: 24804 | epoch avg. loss: 0.011 | test avg. loss: 5.389
Epochs: 24805 | epoch avg. loss: 0.018 | test avg. loss: 5.263


 50%|████▉     | 24809/50000 [37:08<1:03:05,  6.66it/s]

Epochs: 24806 | epoch avg. loss: 0.016 | test avg. loss: 5.256
Epochs: 24807 | epoch avg. loss: 0.012 | test avg. loss: 5.335
Epochs: 24808 | epoch avg. loss: 0.008 | test avg. loss: 5.280


 50%|████▉     | 24811/50000 [37:08<55:23,  7.58it/s]  

Epochs: 24809 | epoch avg. loss: 0.015 | test avg. loss: 5.285
Epochs: 24810 | epoch avg. loss: 0.009 | test avg. loss: 5.339


 50%|████▉     | 24813/50000 [37:09<51:16,  8.19it/s]

Epochs: 24811 | epoch avg. loss: 0.007 | test avg. loss: 5.245
Epochs: 24812 | epoch avg. loss: 0.015 | test avg. loss: 5.275
Epochs: 24813 | epoch avg. loss: 0.015 | test avg. loss: 5.182


 50%|████▉     | 24817/50000 [37:09<41:11, 10.19it/s]

Epochs: 24814 | epoch avg. loss: 0.010 | test avg. loss: 5.156
Epochs: 24815 | epoch avg. loss: 0.009 | test avg. loss: 5.280
Epochs: 24816 | epoch avg. loss: 0.012 | test avg. loss: 5.273


 50%|████▉     | 24819/50000 [37:09<38:18, 10.96it/s]

Epochs: 24817 | epoch avg. loss: 0.008 | test avg. loss: 5.312
Epochs: 24818 | epoch avg. loss: 0.010 | test avg. loss: 5.371
Epochs: 24819 | epoch avg. loss: 0.016 | test avg. loss: 5.212


 50%|████▉     | 24823/50000 [37:09<34:00, 12.34it/s]

Epochs: 24820 | epoch avg. loss: 0.015 | test avg. loss: 5.190
Epochs: 24821 | epoch avg. loss: 0.011 | test avg. loss: 5.367
Epochs: 24822 | epoch avg. loss: 0.045 | test avg. loss: 5.287


 50%|████▉     | 24825/50000 [37:10<36:54, 11.37it/s]

Epochs: 24823 | epoch avg. loss: 0.036 | test avg. loss: 5.288
Epochs: 24824 | epoch avg. loss: 0.018 | test avg. loss: 5.284
Epochs: 24825 | epoch avg. loss: 0.019 | test avg. loss: 5.156


 50%|████▉     | 24829/50000 [37:10<35:16, 11.89it/s]

Epochs: 24826 | epoch avg. loss: 0.039 | test avg. loss: 5.350
Epochs: 24827 | epoch avg. loss: 0.054 | test avg. loss: 5.234
Epochs: 24828 | epoch avg. loss: 0.041 | test avg. loss: 5.262


 50%|████▉     | 24831/50000 [37:10<36:30, 11.49it/s]

Epochs: 24829 | epoch avg. loss: 0.038 | test avg. loss: 5.614
Epochs: 24830 | epoch avg. loss: 0.132 | test avg. loss: 5.162
Epochs: 24831 | epoch avg. loss: 0.102 | test avg. loss: 5.109


 50%|████▉     | 24835/50000 [37:10<37:07, 11.30it/s]

Epochs: 24832 | epoch avg. loss: 0.061 | test avg. loss: 5.498
Epochs: 24833 | epoch avg. loss: 0.149 | test avg. loss: 5.202
Epochs: 24834 | epoch avg. loss: 0.143 | test avg. loss: 5.304


 50%|████▉     | 24837/50000 [37:11<37:51, 11.08it/s]

Epochs: 24835 | epoch avg. loss: 0.044 | test avg. loss: 5.434
Epochs: 24836 | epoch avg. loss: 0.055 | test avg. loss: 5.145
Epochs: 24837 | epoch avg. loss: 0.119 | test avg. loss: 5.274


                                                     

Epochs: 24838 | epoch avg. loss: 0.088 | test avg. loss: 5.102
Epochs: 24839 | epoch avg. loss: 0.056 | test avg. loss: 5.108
Epochs: 24840 | epoch avg. loss: 0.098 | test avg. loss: 5.400


 50%|████▉     | 24843/50000 [37:11<32:32, 12.89it/s]

Epochs: 24841 | epoch avg. loss: 0.032 | test avg. loss: 5.277
Epochs: 24842 | epoch avg. loss: 0.076 | test avg. loss: 5.335
Epochs: 24843 | epoch avg. loss: 0.057 | test avg. loss: 5.433


 50%|████▉     | 24847/50000 [37:11<33:33, 12.49it/s]

Epochs: 24844 | epoch avg. loss: 0.083 | test avg. loss: 5.060
Epochs: 24845 | epoch avg. loss: 0.119 | test avg. loss: 5.313
Epochs: 24846 | epoch avg. loss: 0.049 | test avg. loss: 5.435


 50%|████▉     | 24849/50000 [37:12<35:41, 11.74it/s]

Epochs: 24847 | epoch avg. loss: 0.045 | test avg. loss: 5.371
Epochs: 24848 | epoch avg. loss: 0.049 | test avg. loss: 5.308
Epochs: 24849 | epoch avg. loss: 0.019 | test avg. loss: 4.960


 50%|████▉     | 24853/50000 [37:12<33:22, 12.56it/s]

Epochs: 24850 | epoch avg. loss: 0.047 | test avg. loss: 4.949
Epochs: 24851 | epoch avg. loss: 0.024 | test avg. loss: 5.178
Epochs: 24852 | epoch avg. loss: 0.027 | test avg. loss: 5.232


 50%|████▉     | 24855/50000 [37:12<31:39, 13.24it/s]

Epochs: 24853 | epoch avg. loss: 0.014 | test avg. loss: 5.338
Epochs: 24854 | epoch avg. loss: 0.015 | test avg. loss: 5.220
Epochs: 24855 | epoch avg. loss: 0.008 | test avg. loss: 5.132


 50%|████▉     | 24859/50000 [37:12<31:43, 13.21it/s]

Epochs: 24856 | epoch avg. loss: 0.011 | test avg. loss: 5.213
Epochs: 24857 | epoch avg. loss: 0.016 | test avg. loss: 5.177
Epochs: 24858 | epoch avg. loss: 0.027 | test avg. loss: 5.408


 50%|████▉     | 24861/50000 [37:13<32:11, 13.01it/s]

Epochs: 24859 | epoch avg. loss: 0.031 | test avg. loss: 5.475
Epochs: 24860 | epoch avg. loss: 0.031 | test avg. loss: 5.186
Epochs: 24861 | epoch avg. loss: 0.055 | test avg. loss: 5.306


 50%|████▉     | 24865/50000 [37:13<31:45, 13.19it/s]

Epochs: 24862 | epoch avg. loss: 0.062 | test avg. loss: 5.098
Epochs: 24863 | epoch avg. loss: 0.049 | test avg. loss: 5.177
Epochs: 24864 | epoch avg. loss: 0.028 | test avg. loss: 5.343


                                                     

Epochs: 24865 | epoch avg. loss: 0.035 | test avg. loss: 5.160
Epochs: 24866 | epoch avg. loss: 0.070 | test avg. loss: 5.335
Epochs: 24867 | epoch avg. loss: 0.016 | test avg. loss: 5.276


 50%|████▉     | 24871/50000 [37:13<30:18, 13.82it/s]

Epochs: 24868 | epoch avg. loss: 0.019 | test avg. loss: 5.249
Epochs: 24869 | epoch avg. loss: 0.024 | test avg. loss: 5.394
Epochs: 24870 | epoch avg. loss: 0.094 | test avg. loss: 5.122


 50%|████▉     | 24873/50000 [37:13<30:11, 13.87it/s]

Epochs: 24871 | epoch avg. loss: 0.021 | test avg. loss: 5.202
Epochs: 24872 | epoch avg. loss: 0.014 | test avg. loss: 5.396
Epochs: 24873 | epoch avg. loss: 0.020 | test avg. loss: 5.264


 50%|████▉     | 24877/50000 [37:14<31:43, 13.20it/s]

Epochs: 24874 | epoch avg. loss: 0.027 | test avg. loss: 5.256
Epochs: 24875 | epoch avg. loss: 0.017 | test avg. loss: 5.236
Epochs: 24876 | epoch avg. loss: 0.016 | test avg. loss: 5.127
Epochs: 24877 | epoch avg. loss: 0.044 | test avg. loss: 5.292


 50%|████▉     | 24881/50000 [37:14<29:21, 14.26it/s]

Epochs: 24878 | epoch avg. loss: 0.048 | test avg. loss: 5.240
Epochs: 24879 | epoch avg. loss: 0.021 | test avg. loss: 5.125
Epochs: 24880 | epoch avg. loss: 0.057 | test avg. loss: 5.218
Epochs: 24881 | epoch avg. loss: 0.033 | test avg. loss: 5.101


                                                     

Epochs: 24882 | epoch avg. loss: 0.015 | test avg. loss: 5.025
Epochs: 24883 | epoch avg. loss: 0.021 | test avg. loss: 5.215
Epochs: 24884 | epoch avg. loss: 0.019 | test avg. loss: 5.292


 50%|████▉     | 24887/50000 [37:14<28:53, 14.49it/s]

Epochs: 24885 | epoch avg. loss: 0.010 | test avg. loss: 5.284
Epochs: 24886 | epoch avg. loss: 0.015 | test avg. loss: 5.256
Epochs: 24887 | epoch avg. loss: 0.005 | test avg. loss: 5.112


                                                     

Epochs: 24888 | epoch avg. loss: 0.011 | test avg. loss: 5.120
Epochs: 24889 | epoch avg. loss: 0.009 | test avg. loss: 5.205
Epochs: 24890 | epoch avg. loss: 0.009 | test avg. loss: 5.174


                                                     

Epochs: 24891 | epoch avg. loss: 0.027 | test avg. loss: 5.339
Epochs: 24892 | epoch avg. loss: 0.032 | test avg. loss: 5.270
Epochs: 24893 | epoch avg. loss: 0.014 | test avg. loss: 5.073


 50%|████▉     | 24897/50000 [37:15<29:20, 14.26it/s]

Epochs: 24894 | epoch avg. loss: 0.021 | test avg. loss: 5.195
Epochs: 24895 | epoch avg. loss: 0.058 | test avg. loss: 5.108
Epochs: 24896 | epoch avg. loss: 0.016 | test avg. loss: 5.095
Epochs: 24897 | epoch avg. loss: 0.020 | test avg. loss: 5.391


 50%|████▉     | 24899/50000 [37:15<29:06, 14.37it/s]

Epochs: 24898 | epoch avg. loss: 0.073 | test avg. loss: 5.118
Epochs: 24899 | epoch avg. loss: 0.049 | test avg. loss: 5.092


 50%|████▉     | 24903/50000 [37:17<1:30:29,  4.62it/s]

Epochs: 24900 | epoch avg. loss: 0.039 | test avg. loss: 5.453
Epochs: 24901 | epoch avg. loss: 0.165 | test avg. loss: 5.060
Epochs: 24902 | epoch avg. loss: 0.094 | test avg. loss: 5.105
Epochs: 24903 | epoch avg. loss: 0.078 | test avg. loss: 5.608


 50%|████▉     | 24907/50000 [37:17<58:36,  7.14it/s]

Epochs: 24904 | epoch avg. loss: 0.298 | test avg. loss: 4.900
Epochs: 24905 | epoch avg. loss: 0.145 | test avg. loss: 4.933
Epochs: 24906 | epoch avg. loss: 0.081 | test avg. loss: 5.458
Epochs: 24907 | epoch avg. loss: 0.134 | test avg. loss: 5.277


 50%|████▉     | 24911/50000 [37:17<43:30,  9.61it/s]

Epochs: 24908 | epoch avg. loss: 0.214 | test avg. loss: 5.358
Epochs: 24909 | epoch avg. loss: 0.061 | test avg. loss: 5.343
Epochs: 24910 | epoch avg. loss: 0.082 | test avg. loss: 5.212


 50%|████▉     | 24913/50000 [37:18<41:57,  9.97it/s]

Epochs: 24911 | epoch avg. loss: 0.309 | test avg. loss: 5.581
Epochs: 24912 | epoch avg. loss: 0.316 | test avg. loss: 5.185
Epochs: 24913 | epoch avg. loss: 0.100 | test avg. loss: 5.015


 50%|████▉     | 24917/50000 [37:18<36:28, 11.46it/s]

Epochs: 24914 | epoch avg. loss: 0.137 | test avg. loss: 5.743
Epochs: 24915 | epoch avg. loss: 0.427 | test avg. loss: 5.015
Epochs: 24916 | epoch avg. loss: 0.361 | test avg. loss: 5.164


 50%|████▉     | 24919/50000 [37:18<34:32, 12.10it/s]

Epochs: 24917 | epoch avg. loss: 0.257 | test avg. loss: 5.922
Epochs: 24918 | epoch avg. loss: 0.309 | test avg. loss: 5.422
Epochs: 24919 | epoch avg. loss: 0.319 | test avg. loss: 5.580
Epochs: 24920 | epoch avg. loss: 0.149 | test avg. loss: 5.517


 50%|████▉     | 24923/50000 [37:18<31:21, 13.33it/s]

Epochs: 24921 | epoch avg. loss: 0.079 | test avg. loss: 5.215
Epochs: 24922 | epoch avg. loss: 0.064 | test avg. loss: 5.217
Epochs: 24923 | epoch avg. loss: 0.131 | test avg. loss: 4.812


 50%|████▉     | 24927/50000 [37:19<31:50, 13.12it/s]

Epochs: 24924 | epoch avg. loss: 0.087 | test avg. loss: 4.890
Epochs: 24925 | epoch avg. loss: 0.084 | test avg. loss: 5.465
Epochs: 24926 | epoch avg. loss: 0.208 | test avg. loss: 5.015


 50%|████▉     | 24931/50000 [37:19<29:13, 14.30it/s]

Epochs: 24927 | epoch avg. loss: 0.028 | test avg. loss: 5.081
Epochs: 24928 | epoch avg. loss: 0.063 | test avg. loss: 5.087
Epochs: 24929 | epoch avg. loss: 0.073 | test avg. loss: 4.886
Epochs: 24930 | epoch avg. loss: 0.023 | test avg. loss: 5.088


 50%|████▉     | 24935/50000 [37:19<27:58, 14.94it/s]

Epochs: 24931 | epoch avg. loss: 0.042 | test avg. loss: 5.035
Epochs: 24932 | epoch avg. loss: 0.039 | test avg. loss: 4.945
Epochs: 24933 | epoch avg. loss: 0.033 | test avg. loss: 5.100
Epochs: 24934 | epoch avg. loss: 0.047 | test avg. loss: 4.938


 50%|████▉     | 24939/50000 [37:19<27:36, 15.13it/s]

Epochs: 24935 | epoch avg. loss: 0.036 | test avg. loss: 5.064
Epochs: 24936 | epoch avg. loss: 0.032 | test avg. loss: 5.014
Epochs: 24937 | epoch avg. loss: 0.018 | test avg. loss: 4.959
Epochs: 24938 | epoch avg. loss: 0.017 | test avg. loss: 5.098


 50%|████▉     | 24941/50000 [37:20<31:37, 13.21it/s]

Epochs: 24939 | epoch avg. loss: 0.020 | test avg. loss: 5.049
Epochs: 24940 | epoch avg. loss: 0.009 | test avg. loss: 5.104
Epochs: 24941 | epoch avg. loss: 0.009 | test avg. loss: 5.056


 50%|████▉     | 24945/50000 [37:20<29:21, 14.22it/s]

Epochs: 24942 | epoch avg. loss: 0.011 | test avg. loss: 5.040
Epochs: 24943 | epoch avg. loss: 0.012 | test avg. loss: 5.106
Epochs: 24944 | epoch avg. loss: 0.011 | test avg. loss: 5.049


 50%|████▉     | 24949/50000 [37:20<28:26, 14.68it/s]

Epochs: 24945 | epoch avg. loss: 0.010 | test avg. loss: 5.007
Epochs: 24946 | epoch avg. loss: 0.012 | test avg. loss: 5.080
Epochs: 24947 | epoch avg. loss: 0.013 | test avg. loss: 5.026
Epochs: 24948 | epoch avg. loss: 0.005 | test avg. loss: 5.048


 50%|████▉     | 24951/50000 [37:20<27:57, 14.93it/s]

Epochs: 24949 | epoch avg. loss: 0.007 | test avg. loss: 5.017
Epochs: 24950 | epoch avg. loss: 0.013 | test avg. loss: 5.032
Epochs: 24951 | epoch avg. loss: 0.014 | test avg. loss: 5.156


 50%|████▉     | 24955/50000 [37:21<28:11, 14.81it/s]

Epochs: 24952 | epoch avg. loss: 0.018 | test avg. loss: 5.067
Epochs: 24953 | epoch avg. loss: 0.023 | test avg. loss: 5.038
Epochs: 24954 | epoch avg. loss: 0.014 | test avg. loss: 5.156
Epochs: 24955 | epoch avg. loss: 0.034 | test avg. loss: 5.001


 50%|████▉     | 24959/50000 [37:21<28:09, 14.82it/s]

Epochs: 24956 | epoch avg. loss: 0.014 | test avg. loss: 5.008
Epochs: 24957 | epoch avg. loss: 0.010 | test avg. loss: 5.096
Epochs: 24958 | epoch avg. loss: 0.016 | test avg. loss: 4.979


 50%|████▉     | 24961/50000 [37:21<29:26, 14.17it/s]

Epochs: 24959 | epoch avg. loss: 0.007 | test avg. loss: 5.010
Epochs: 24960 | epoch avg. loss: 0.010 | test avg. loss: 5.017
Epochs: 24961 | epoch avg. loss: 0.007 | test avg. loss: 5.059


 50%|████▉     | 24965/50000 [37:21<29:42, 14.04it/s]

Epochs: 24962 | epoch avg. loss: 0.007 | test avg. loss: 5.069
Epochs: 24963 | epoch avg. loss: 0.005 | test avg. loss: 5.074
Epochs: 24964 | epoch avg. loss: 0.005 | test avg. loss: 5.068


 50%|████▉     | 24967/50000 [37:21<31:09, 13.39it/s]

Epochs: 24965 | epoch avg. loss: 0.006 | test avg. loss: 4.981
Epochs: 24966 | epoch avg. loss: 0.025 | test avg. loss: 5.035
Epochs: 24967 | epoch avg. loss: 0.018 | test avg. loss: 5.175


 50%|████▉     | 24971/50000 [37:22<34:45, 12.00it/s]

Epochs: 24968 | epoch avg. loss: 0.032 | test avg. loss: 5.023
Epochs: 24969 | epoch avg. loss: 0.020 | test avg. loss: 5.082
Epochs: 24970 | epoch avg. loss: 0.020 | test avg. loss: 5.117


 50%|████▉     | 24973/50000 [37:22<34:35, 12.06it/s]

Epochs: 24971 | epoch avg. loss: 0.016 | test avg. loss: 5.002
Epochs: 24972 | epoch avg. loss: 0.009 | test avg. loss: 5.077
Epochs: 24973 | epoch avg. loss: 0.018 | test avg. loss: 5.032


 50%|████▉     | 24977/50000 [37:22<33:37, 12.40it/s]

Epochs: 24974 | epoch avg. loss: 0.019 | test avg. loss: 5.009
Epochs: 24975 | epoch avg. loss: 0.036 | test avg. loss: 5.170
Epochs: 24976 | epoch avg. loss: 0.068 | test avg. loss: 5.090


 50%|████▉     | 24979/50000 [37:22<33:37, 12.40it/s]

Epochs: 24977 | epoch avg. loss: 0.046 | test avg. loss: 5.078
Epochs: 24978 | epoch avg. loss: 0.067 | test avg. loss: 5.516
Epochs: 24979 | epoch avg. loss: 0.171 | test avg. loss: 4.984


 50%|████▉     | 24983/50000 [37:23<30:30, 13.67it/s]

Epochs: 24980 | epoch avg. loss: 0.079 | test avg. loss: 4.986
Epochs: 24981 | epoch avg. loss: 0.073 | test avg. loss: 5.364
Epochs: 24982 | epoch avg. loss: 0.152 | test avg. loss: 5.070


 50%|████▉     | 24987/50000 [37:23<29:24, 14.17it/s]

Epochs: 24983 | epoch avg. loss: 0.240 | test avg. loss: 5.156
Epochs: 24984 | epoch avg. loss: 0.093 | test avg. loss: 5.520
Epochs: 24985 | epoch avg. loss: 0.169 | test avg. loss: 4.943
Epochs: 24986 | epoch avg. loss: 0.153 | test avg. loss: 5.001


 50%|████▉     | 24989/50000 [37:23<28:49, 14.46it/s]

Epochs: 24987 | epoch avg. loss: 0.048 | test avg. loss: 5.000
Epochs: 24988 | epoch avg. loss: 0.046 | test avg. loss: 4.929
Epochs: 24989 | epoch avg. loss: 0.068 | test avg. loss: 5.214


 50%|████▉     | 24993/50000 [37:23<31:39, 13.17it/s]

Epochs: 24990 | epoch avg. loss: 0.100 | test avg. loss: 4.962
Epochs: 24991 | epoch avg. loss: 0.036 | test avg. loss: 4.971
Epochs: 24992 | epoch avg. loss: 0.026 | test avg. loss: 5.147


 50%|████▉     | 24995/50000 [37:24<34:15, 12.16it/s]

Epochs: 24993 | epoch avg. loss: 0.023 | test avg. loss: 5.048
Epochs: 24994 | epoch avg. loss: 0.020 | test avg. loss: 5.047
Epochs: 24995 | epoch avg. loss: 0.008 | test avg. loss: 5.091


 50%|████▉     | 24999/50000 [37:24<33:18, 12.51it/s]

Epochs: 24996 | epoch avg. loss: 0.013 | test avg. loss: 5.027
Epochs: 24997 | epoch avg. loss: 0.033 | test avg. loss: 5.117
Epochs: 24998 | epoch avg. loss: 0.030 | test avg. loss: 5.077


 50%|████▉     | 24999/50000 [37:24<33:18, 12.51it/s]

Epochs: 24999 | epoch avg. loss: 0.020 | test avg. loss: 5.023


 50%|█████     | 25003/50000 [37:26<1:42:15,  4.07it/s]

Epochs: 25000 | epoch avg. loss: 0.032 | test avg. loss: 5.330
Epochs: 25001 | epoch avg. loss: 0.101 | test avg. loss: 5.137
Epochs: 25002 | epoch avg. loss: 0.015 | test avg. loss: 5.009


 50%|█████     | 25007/50000 [37:26<1:03:39,  6.54it/s]

Epochs: 25003 | epoch avg. loss: 0.012 | test avg. loss: 5.077
Epochs: 25004 | epoch avg. loss: 0.015 | test avg. loss: 5.039
Epochs: 25005 | epoch avg. loss: 0.020 | test avg. loss: 5.147
Epochs: 25006 | epoch avg. loss: 0.011 | test avg. loss: 5.224


 50%|█████     | 25011/50000 [37:26<45:04,  9.24it/s]

Epochs: 25007 | epoch avg. loss: 0.020 | test avg. loss: 5.115
Epochs: 25008 | epoch avg. loss: 0.007 | test avg. loss: 5.033
Epochs: 25009 | epoch avg. loss: 0.007 | test avg. loss: 5.043
Epochs: 25010 | epoch avg. loss: 0.007 | test avg. loss: 5.026


 50%|█████     | 25013/50000 [37:27<41:53,  9.94it/s]

Epochs: 25011 | epoch avg. loss: 0.005 | test avg. loss: 5.067
Epochs: 25012 | epoch avg. loss: 0.005 | test avg. loss: 5.057
Epochs: 25013 | epoch avg. loss: 0.013 | test avg. loss: 5.127


 50%|█████     | 25017/50000 [37:27<37:46, 11.02it/s]

Epochs: 25014 | epoch avg. loss: 0.008 | test avg. loss: 5.087
Epochs: 25015 | epoch avg. loss: 0.017 | test avg. loss: 5.022
Epochs: 25016 | epoch avg. loss: 0.011 | test avg. loss: 5.068


 50%|█████     | 25019/50000 [37:27<35:56, 11.58it/s]

Epochs: 25017 | epoch avg. loss: 0.018 | test avg. loss: 4.984
Epochs: 25018 | epoch avg. loss: 0.008 | test avg. loss: 5.043
Epochs: 25019 | epoch avg. loss: 0.009 | test avg. loss: 5.126


 50%|█████     | 25023/50000 [37:27<36:22, 11.45it/s]

Epochs: 25020 | epoch avg. loss: 0.014 | test avg. loss: 5.051
Epochs: 25021 | epoch avg. loss: 0.019 | test avg. loss: 5.037
Epochs: 25022 | epoch avg. loss: 0.012 | test avg. loss: 5.156


 50%|█████     | 25025/50000 [37:28<35:33, 11.71it/s]

Epochs: 25023 | epoch avg. loss: 0.024 | test avg. loss: 5.021
Epochs: 25024 | epoch avg. loss: 0.018 | test avg. loss: 5.024
Epochs: 25025 | epoch avg. loss: 0.012 | test avg. loss: 5.193


 50%|█████     | 25029/50000 [37:28<32:45, 12.71it/s]

Epochs: 25026 | epoch avg. loss: 0.029 | test avg. loss: 5.074
Epochs: 25027 | epoch avg. loss: 0.010 | test avg. loss: 5.117
Epochs: 25028 | epoch avg. loss: 0.010 | test avg. loss: 5.194


 50%|█████     | 25031/50000 [37:28<32:56, 12.63it/s]

Epochs: 25029 | epoch avg. loss: 0.022 | test avg. loss: 5.008
Epochs: 25030 | epoch avg. loss: 0.007 | test avg. loss: 5.015
Epochs: 25031 | epoch avg. loss: 0.010 | test avg. loss: 5.050


 50%|█████     | 25035/50000 [37:28<33:41, 12.35it/s]

Epochs: 25032 | epoch avg. loss: 0.007 | test avg. loss: 5.068
Epochs: 25033 | epoch avg. loss: 0.018 | test avg. loss: 5.114
Epochs: 25034 | epoch avg. loss: 0.005 | test avg. loss: 5.079


 50%|█████     | 25037/50000 [37:29<34:00, 12.23it/s]

Epochs: 25035 | epoch avg. loss: 0.006 | test avg. loss: 5.017
Epochs: 25036 | epoch avg. loss: 0.010 | test avg. loss: 5.063
Epochs: 25037 | epoch avg. loss: 0.005 | test avg. loss: 5.080


 50%|█████     | 25041/50000 [37:29<32:43, 12.71it/s]

Epochs: 25038 | epoch avg. loss: 0.008 | test avg. loss: 5.085
Epochs: 25039 | epoch avg. loss: 0.023 | test avg. loss: 5.217
Epochs: 25040 | epoch avg. loss: 0.011 | test avg. loss: 5.088


 50%|█████     | 25043/50000 [37:29<32:43, 12.71it/s]

Epochs: 25041 | epoch avg. loss: 0.019 | test avg. loss: 5.015
Epochs: 25042 | epoch avg. loss: 0.016 | test avg. loss: 5.047
Epochs: 25043 | epoch avg. loss: 0.011 | test avg. loss: 4.978


 50%|█████     | 25047/50000 [37:29<32:51, 12.65it/s]

Epochs: 25044 | epoch avg. loss: 0.009 | test avg. loss: 4.972
Epochs: 25045 | epoch avg. loss: 0.014 | test avg. loss: 5.035
Epochs: 25046 | epoch avg. loss: 0.005 | test avg. loss: 5.073


 50%|█████     | 25049/50000 [37:29<32:47, 12.68it/s]

Epochs: 25047 | epoch avg. loss: 0.005 | test avg. loss: 5.080
Epochs: 25048 | epoch avg. loss: 0.005 | test avg. loss: 5.077
Epochs: 25049 | epoch avg. loss: 0.006 | test avg. loss: 5.062


 50%|█████     | 25053/50000 [37:30<31:25, 13.23it/s]

Epochs: 25050 | epoch avg. loss: 0.005 | test avg. loss: 5.082
Epochs: 25051 | epoch avg. loss: 0.006 | test avg. loss: 5.081
Epochs: 25052 | epoch avg. loss: 0.005 | test avg. loss: 5.078


 50%|█████     | 25055/50000 [37:30<31:29, 13.20it/s]

Epochs: 25053 | epoch avg. loss: 0.007 | test avg. loss: 5.098
Epochs: 25054 | epoch avg. loss: 0.007 | test avg. loss: 5.143
Epochs: 25055 | epoch avg. loss: 0.013 | test avg. loss: 4.984


 50%|█████     | 25059/50000 [37:30<31:46, 13.08it/s]

Epochs: 25056 | epoch avg. loss: 0.014 | test avg. loss: 5.010
Epochs: 25057 | epoch avg. loss: 0.015 | test avg. loss: 5.142
Epochs: 25058 | epoch avg. loss: 0.023 | test avg. loss: 5.043


 50%|█████     | 25061/50000 [37:30<32:34, 12.76it/s]

Epochs: 25059 | epoch avg. loss: 0.019 | test avg. loss: 5.145
Epochs: 25060 | epoch avg. loss: 0.009 | test avg. loss: 5.174
Epochs: 25061 | epoch avg. loss: 0.008 | test avg. loss: 5.038


 50%|█████     | 25065/50000 [37:31<34:41, 11.98it/s]

Epochs: 25062 | epoch avg. loss: 0.012 | test avg. loss: 4.973
Epochs: 25063 | epoch avg. loss: 0.008 | test avg. loss: 5.051
Epochs: 25064 | epoch avg. loss: 0.014 | test avg. loss: 5.038


 50%|█████     | 25067/50000 [37:31<33:44, 12.32it/s]

Epochs: 25065 | epoch avg. loss: 0.015 | test avg. loss: 5.036
Epochs: 25066 | epoch avg. loss: 0.041 | test avg. loss: 5.128
Epochs: 25067 | epoch avg. loss: 0.049 | test avg. loss: 5.143


 50%|█████     | 25071/50000 [37:31<31:12, 13.32it/s]

Epochs: 25068 | epoch avg. loss: 0.034 | test avg. loss: 4.992
Epochs: 25069 | epoch avg. loss: 0.025 | test avg. loss: 5.034
Epochs: 25070 | epoch avg. loss: 0.007 | test avg. loss: 5.016


 50%|█████     | 25073/50000 [37:31<30:42, 13.53it/s]

Epochs: 25071 | epoch avg. loss: 0.009 | test avg. loss: 4.979
Epochs: 25072 | epoch avg. loss: 0.009 | test avg. loss: 5.110
Epochs: 25073 | epoch avg. loss: 0.020 | test avg. loss: 5.128


 50%|█████     | 25077/50000 [37:31<29:48, 13.93it/s]

Epochs: 25074 | epoch avg. loss: 0.013 | test avg. loss: 5.068
Epochs: 25075 | epoch avg. loss: 0.051 | test avg. loss: 5.147
Epochs: 25076 | epoch avg. loss: 0.043 | test avg. loss: 5.290


 50%|█████     | 25079/50000 [37:32<29:37, 14.02it/s]

Epochs: 25077 | epoch avg. loss: 0.082 | test avg. loss: 5.004
Epochs: 25078 | epoch avg. loss: 0.040 | test avg. loss: 5.031
Epochs: 25079 | epoch avg. loss: 0.025 | test avg. loss: 5.296
Epochs: 25080 | epoch avg. loss: 0.079 | test avg. loss: 5.091


 50%|█████     | 25083/50000 [37:32<30:44, 13.51it/s]

Epochs: 25081 | epoch avg. loss: 0.119 | test avg. loss: 5.160
Epochs: 25082 | epoch avg. loss: 0.052 | test avg. loss: 5.322
Epochs: 25083 | epoch avg. loss: 0.091 | test avg. loss: 4.952


 50%|█████     | 25087/50000 [37:32<28:56, 14.35it/s]

Epochs: 25084 | epoch avg. loss: 0.028 | test avg. loss: 4.736
Epochs: 25085 | epoch avg. loss: 0.029 | test avg. loss: 4.809
Epochs: 25086 | epoch avg. loss: 0.022 | test avg. loss: 5.092
Epochs: 25087 | epoch avg. loss: 0.021 | test avg. loss: 5.231


 50%|█████     | 25091/50000 [37:32<29:18, 14.17it/s]

Epochs: 25088 | epoch avg. loss: 0.017 | test avg. loss: 5.175
Epochs: 25089 | epoch avg. loss: 0.013 | test avg. loss: 5.190
Epochs: 25090 | epoch avg. loss: 0.028 | test avg. loss: 4.865


 50%|█████     | 25093/50000 [37:33<28:56, 14.34it/s]

Epochs: 25091 | epoch avg. loss: 0.064 | test avg. loss: 4.857
Epochs: 25092 | epoch avg. loss: 0.032 | test avg. loss: 5.225
Epochs: 25093 | epoch avg. loss: 0.061 | test avg. loss: 5.094
Epochs: 25094 | epoch avg. loss: 0.020 | test avg. loss: 5.157


 50%|█████     | 25097/50000 [37:33<31:14, 13.29it/s]

Epochs: 25095 | epoch avg. loss: 0.011 | test avg. loss: 5.155
Epochs: 25096 | epoch avg. loss: 0.010 | test avg. loss: 5.038
Epochs: 25097 | epoch avg. loss: 0.009 | test avg. loss: 4.996


 50%|█████     | 25099/50000 [37:33<31:24, 13.21it/s]

Epochs: 25098 | epoch avg. loss: 0.008 | test avg. loss: 4.955
Epochs: 25099 | epoch avg. loss: 0.021 | test avg. loss: 5.167


 50%|█████     | 25103/50000 [37:35<1:29:32,  4.63it/s]

Epochs: 25100 | epoch avg. loss: 0.039 | test avg. loss: 5.184
Epochs: 25101 | epoch avg. loss: 0.020 | test avg. loss: 5.003
Epochs: 25102 | epoch avg. loss: 0.028 | test avg. loss: 4.996
Epochs: 25103 | epoch avg. loss: 0.013 | test avg. loss: 5.097


 50%|█████     | 25107/50000 [37:35<59:14,  7.00it/s]  

Epochs: 25104 | epoch avg. loss: 0.024 | test avg. loss: 5.068
Epochs: 25105 | epoch avg. loss: 0.012 | test avg. loss: 5.055
Epochs: 25106 | epoch avg. loss: 0.016 | test avg. loss: 5.247


 50%|█████     | 25109/50000 [37:35<50:25,  8.23it/s]

Epochs: 25107 | epoch avg. loss: 0.076 | test avg. loss: 5.109
Epochs: 25108 | epoch avg. loss: 0.018 | test avg. loss: 4.972
Epochs: 25109 | epoch avg. loss: 0.048 | test avg. loss: 5.024


 50%|█████     | 25113/50000 [37:35<41:56,  9.89it/s]

Epochs: 25110 | epoch avg. loss: 0.009 | test avg. loss: 5.089
Epochs: 25111 | epoch avg. loss: 0.009 | test avg. loss: 5.022
Epochs: 25112 | epoch avg. loss: 0.016 | test avg. loss: 4.992


 50%|█████     | 25115/50000 [37:36<40:39, 10.20it/s]

Epochs: 25113 | epoch avg. loss: 0.009 | test avg. loss: 5.055
Epochs: 25114 | epoch avg. loss: 0.028 | test avg. loss: 5.040
Epochs: 25115 | epoch avg. loss: 0.028 | test avg. loss: 4.912


 50%|█████     | 25119/50000 [37:36<36:24, 11.39it/s]

Epochs: 25116 | epoch avg. loss: 0.013 | test avg. loss: 4.999
Epochs: 25117 | epoch avg. loss: 0.010 | test avg. loss: 5.135
Epochs: 25118 | epoch avg. loss: 0.018 | test avg. loss: 5.022


 50%|█████     | 25121/50000 [37:36<34:43, 11.94it/s]

Epochs: 25119 | epoch avg. loss: 0.007 | test avg. loss: 5.055
Epochs: 25120 | epoch avg. loss: 0.021 | test avg. loss: 4.957
Epochs: 25121 | epoch avg. loss: 0.015 | test avg. loss: 4.914


 50%|█████     | 25125/50000 [37:36<34:01, 12.18it/s]

Epochs: 25122 | epoch avg. loss: 0.053 | test avg. loss: 5.031
Epochs: 25123 | epoch avg. loss: 0.012 | test avg. loss: 5.057
Epochs: 25124 | epoch avg. loss: 0.009 | test avg. loss: 5.007




Epochs: 25125 | epoch avg. loss: 0.024 | test avg. loss: 5.072
Epochs: 25126 | epoch avg. loss: 0.016 | test avg. loss: 5.075
Epochs: 25127 | epoch avg. loss: 0.010 | test avg. loss: 5.080


 50%|█████     | 25131/50000 [37:37<30:54, 13.41it/s]

Epochs: 25128 | epoch avg. loss: 0.014 | test avg. loss: 5.270
Epochs: 25129 | epoch avg. loss: 0.076 | test avg. loss: 4.985
Epochs: 25130 | epoch avg. loss: 0.021 | test avg. loss: 4.957


 50%|█████     | 25133/50000 [37:37<30:24, 13.63it/s]

Epochs: 25131 | epoch avg. loss: 0.018 | test avg. loss: 5.120
Epochs: 25132 | epoch avg. loss: 0.040 | test avg. loss: 5.086
Epochs: 25133 | epoch avg. loss: 0.020 | test avg. loss: 4.988


 50%|█████     | 25137/50000 [37:37<31:54, 12.99it/s]

Epochs: 25134 | epoch avg. loss: 0.007 | test avg. loss: 5.030
Epochs: 25135 | epoch avg. loss: 0.007 | test avg. loss: 5.066
Epochs: 25136 | epoch avg. loss: 0.006 | test avg. loss: 5.032


 50%|█████     | 25139/50000 [37:38<32:48, 12.63it/s]

Epochs: 25137 | epoch avg. loss: 0.006 | test avg. loss: 5.009
Epochs: 25138 | epoch avg. loss: 0.007 | test avg. loss: 4.986
Epochs: 25139 | epoch avg. loss: 0.007 | test avg. loss: 4.930


 50%|█████     | 25143/50000 [37:38<31:42, 13.07it/s]

Epochs: 25140 | epoch avg. loss: 0.032 | test avg. loss: 4.982
Epochs: 25141 | epoch avg. loss: 0.019 | test avg. loss: 5.122
Epochs: 25142 | epoch avg. loss: 0.032 | test avg. loss: 5.225


 50%|█████     | 25145/50000 [37:38<31:30, 13.15it/s]

Epochs: 25143 | epoch avg. loss: 0.074 | test avg. loss: 4.952
Epochs: 25144 | epoch avg. loss: 0.030 | test avg. loss: 4.952
Epochs: 25145 | epoch avg. loss: 0.047 | test avg. loss: 5.056


 50%|█████     | 25149/50000 [37:38<30:00, 13.80it/s]

Epochs: 25146 | epoch avg. loss: 0.051 | test avg. loss: 4.929
Epochs: 25147 | epoch avg. loss: 0.060 | test avg. loss: 5.041
Epochs: 25148 | epoch avg. loss: 0.053 | test avg. loss: 5.075


 50%|█████     | 25151/50000 [37:38<30:22, 13.63it/s]

Epochs: 25149 | epoch avg. loss: 0.024 | test avg. loss: 5.018
Epochs: 25150 | epoch avg. loss: 0.028 | test avg. loss: 4.830
Epochs: 25151 | epoch avg. loss: 0.028 | test avg. loss: 4.947


 50%|█████     | 25155/50000 [37:39<31:19, 13.22it/s]

Epochs: 25152 | epoch avg. loss: 0.068 | test avg. loss: 5.305
Epochs: 25153 | epoch avg. loss: 0.108 | test avg. loss: 5.092
Epochs: 25154 | epoch avg. loss: 0.058 | test avg. loss: 5.052


 50%|█████     | 25157/50000 [37:39<30:53, 13.40it/s]

Epochs: 25155 | epoch avg. loss: 0.066 | test avg. loss: 5.105
Epochs: 25156 | epoch avg. loss: 0.036 | test avg. loss: 5.001
Epochs: 25157 | epoch avg. loss: 0.034 | test avg. loss: 4.897


 50%|█████     | 25161/50000 [37:39<29:22, 14.09it/s]

Epochs: 25158 | epoch avg. loss: 0.116 | test avg. loss: 5.060
Epochs: 25159 | epoch avg. loss: 0.043 | test avg. loss: 5.819
Epochs: 25160 | epoch avg. loss: 0.334 | test avg. loss: 5.278
Epochs: 25161 | epoch avg. loss: 0.383 | test avg. loss: 5.473


 50%|█████     | 25165/50000 [37:39<32:14, 12.84it/s]

Epochs: 25162 | epoch avg. loss: 0.613 | test avg. loss: 5.500
Epochs: 25163 | epoch avg. loss: 0.587 | test avg. loss: 5.172
Epochs: 25164 | epoch avg. loss: 0.197 | test avg. loss: 5.456


 50%|█████     | 25167/50000 [37:40<33:31, 12.35it/s]

Epochs: 25165 | epoch avg. loss: 0.560 | test avg. loss: 5.679
Epochs: 25166 | epoch avg. loss: 0.256 | test avg. loss: 5.407
Epochs: 25167 | epoch avg. loss: 0.152 | test avg. loss: 5.192


 50%|█████     | 25171/50000 [37:40<34:10, 12.11it/s]

Epochs: 25168 | epoch avg. loss: 0.458 | test avg. loss: 5.088
Epochs: 25169 | epoch avg. loss: 0.334 | test avg. loss: 4.629
Epochs: 25170 | epoch avg. loss: 0.520 | test avg. loss: 4.528


 50%|█████     | 25173/50000 [37:40<32:37, 12.68it/s]

Epochs: 25171 | epoch avg. loss: 0.331 | test avg. loss: 5.624
Epochs: 25172 | epoch avg. loss: 0.693 | test avg. loss: 4.982
Epochs: 25173 | epoch avg. loss: 0.552 | test avg. loss: 5.283
Epochs: 25174 | epoch avg. loss: 0.693 | test avg. loss: 6.474


 50%|█████     | 25177/50000 [37:40<30:34, 13.53it/s]

Epochs: 25175 | epoch avg. loss: 0.905 | test avg. loss: 6.563
Epochs: 25176 | epoch avg. loss: 2.202 | test avg. loss: 5.760
Epochs: 25177 | epoch avg. loss: 1.053 | test avg. loss: 5.521


 50%|█████     | 25181/50000 [37:41<30:18, 13.64it/s]

Epochs: 25178 | epoch avg. loss: 0.553 | test avg. loss: 5.206
Epochs: 25179 | epoch avg. loss: 0.459 | test avg. loss: 5.545
Epochs: 25180 | epoch avg. loss: 0.324 | test avg. loss: 4.854


 50%|█████     | 25183/50000 [37:41<30:13, 13.68it/s]

Epochs: 25181 | epoch avg. loss: 0.539 | test avg. loss: 5.114
Epochs: 25182 | epoch avg. loss: 0.817 | test avg. loss: 4.956
Epochs: 25183 | epoch avg. loss: 0.524 | test avg. loss: 5.234


 50%|█████     | 25187/50000 [37:41<31:19, 13.20it/s]

Epochs: 25184 | epoch avg. loss: 0.318 | test avg. loss: 6.049
Epochs: 25185 | epoch avg. loss: 0.414 | test avg. loss: 5.100
Epochs: 25186 | epoch avg. loss: 0.483 | test avg. loss: 4.952


 50%|█████     | 25189/50000 [37:41<33:04, 12.50it/s]

Epochs: 25187 | epoch avg. loss: 0.218 | test avg. loss: 4.615
Epochs: 25188 | epoch avg. loss: 0.114 | test avg. loss: 4.624
Epochs: 25189 | epoch avg. loss: 0.157 | test avg. loss: 5.019


 50%|█████     | 25193/50000 [37:42<32:56, 12.55it/s]

Epochs: 25190 | epoch avg. loss: 0.199 | test avg. loss: 4.673
Epochs: 25191 | epoch avg. loss: 0.129 | test avg. loss: 4.685
Epochs: 25192 | epoch avg. loss: 0.064 | test avg. loss: 4.861


 50%|█████     | 25195/50000 [37:42<31:18, 13.21it/s]

Epochs: 25193 | epoch avg. loss: 0.036 | test avg. loss: 4.861
Epochs: 25194 | epoch avg. loss: 0.040 | test avg. loss: 4.951
Epochs: 25195 | epoch avg. loss: 0.027 | test avg. loss: 4.832


 50%|█████     | 25199/50000 [37:42<29:34, 13.98it/s]

Epochs: 25196 | epoch avg. loss: 0.043 | test avg. loss: 4.849
Epochs: 25197 | epoch avg. loss: 0.041 | test avg. loss: 4.861
Epochs: 25198 | epoch avg. loss: 0.047 | test avg. loss: 4.624
Epochs: 25199 | epoch avg. loss: 0.097 | test avg. loss: 4.793


 50%|█████     | 25203/50000 [37:44<1:41:45,  4.06it/s]

Epochs: 25200 | epoch avg. loss: 0.089 | test avg. loss: 4.694
Epochs: 25201 | epoch avg. loss: 0.045 | test avg. loss: 4.749
Epochs: 25202 | epoch avg. loss: 0.041 | test avg. loss: 5.015


 50%|█████     | 25205/50000 [37:44<1:22:14,  5.03it/s]

Epochs: 25203 | epoch avg. loss: 0.079 | test avg. loss: 4.799
Epochs: 25204 | epoch avg. loss: 0.052 | test avg. loss: 4.930
Epochs: 25205 | epoch avg. loss: 0.049 | test avg. loss: 4.889


 50%|█████     | 25209/50000 [37:44<57:52,  7.14it/s]  

Epochs: 25206 | epoch avg. loss: 0.018 | test avg. loss: 4.859
Epochs: 25207 | epoch avg. loss: 0.014 | test avg. loss: 4.932
Epochs: 25208 | epoch avg. loss: 0.012 | test avg. loss: 4.947


 50%|█████     | 25213/50000 [37:45<42:23,  9.75it/s]

Epochs: 25209 | epoch avg. loss: 0.017 | test avg. loss: 5.059
Epochs: 25210 | epoch avg. loss: 0.025 | test avg. loss: 4.976
Epochs: 25211 | epoch avg. loss: 0.012 | test avg. loss: 4.867
Epochs: 25212 | epoch avg. loss: 0.016 | test avg. loss: 5.015


 50%|█████     | 25217/50000 [37:45<34:39, 11.91it/s]

Epochs: 25213 | epoch avg. loss: 0.020 | test avg. loss: 4.909
Epochs: 25214 | epoch avg. loss: 0.041 | test avg. loss: 5.139
Epochs: 25215 | epoch avg. loss: 0.043 | test avg. loss: 5.050
Epochs: 25216 | epoch avg. loss: 0.042 | test avg. loss: 4.989


 50%|█████     | 25219/50000 [37:45<33:27, 12.34it/s]

Epochs: 25217 | epoch avg. loss: 0.023 | test avg. loss: 5.028
Epochs: 25218 | epoch avg. loss: 0.026 | test avg. loss: 4.835
Epochs: 25219 | epoch avg. loss: 0.073 | test avg. loss: 4.985


 50%|█████     | 25223/50000 [37:45<32:35, 12.67it/s]

Epochs: 25220 | epoch avg. loss: 0.057 | test avg. loss: 4.895
Epochs: 25221 | epoch avg. loss: 0.062 | test avg. loss: 4.884
Epochs: 25222 | epoch avg. loss: 0.078 | test avg. loss: 5.203


 50%|█████     | 25225/50000 [37:46<33:11, 12.44it/s]

Epochs: 25223 | epoch avg. loss: 0.112 | test avg. loss: 4.851
Epochs: 25224 | epoch avg. loss: 0.217 | test avg. loss: 4.940
Epochs: 25225 | epoch avg. loss: 0.170 | test avg. loss: 4.937


 50%|█████     | 25229/50000 [37:46<31:41, 13.03it/s]

Epochs: 25226 | epoch avg. loss: 0.106 | test avg. loss: 4.859
Epochs: 25227 | epoch avg. loss: 0.159 | test avg. loss: 5.228
Epochs: 25228 | epoch avg. loss: 0.133 | test avg. loss: 5.183


                                                     

Epochs: 25229 | epoch avg. loss: 0.220 | test avg. loss: 5.272
Epochs: 25230 | epoch avg. loss: 0.085 | test avg. loss: 5.165
Epochs: 25231 | epoch avg. loss: 0.053 | test avg. loss: 4.785


 50%|█████     | 25235/50000 [37:46<31:29, 13.11it/s]

Epochs: 25232 | epoch avg. loss: 0.059 | test avg. loss: 4.784
Epochs: 25233 | epoch avg. loss: 0.049 | test avg. loss: 4.741
Epochs: 25234 | epoch avg. loss: 0.014 | test avg. loss: 4.963


 50%|█████     | 25237/50000 [37:47<32:06, 12.85it/s]

Epochs: 25235 | epoch avg. loss: 0.020 | test avg. loss: 5.024
Epochs: 25236 | epoch avg. loss: 0.049 | test avg. loss: 5.038
Epochs: 25237 | epoch avg. loss: 0.028 | test avg. loss: 5.012


 50%|█████     | 25241/50000 [37:47<31:04, 13.28it/s]

Epochs: 25238 | epoch avg. loss: 0.022 | test avg. loss: 4.754
Epochs: 25239 | epoch avg. loss: 0.024 | test avg. loss: 4.785
Epochs: 25240 | epoch avg. loss: 0.024 | test avg. loss: 4.881
Epochs: 25241 | epoch avg. loss: 0.016 | test avg. loss: 4.905


 50%|█████     | 25245/50000 [37:47<31:41, 13.02it/s]

Epochs: 25242 | epoch avg. loss: 0.012 | test avg. loss: 5.047
Epochs: 25243 | epoch avg. loss: 0.008 | test avg. loss: 5.020
Epochs: 25244 | epoch avg. loss: 0.021 | test avg. loss: 4.977


 50%|█████     | 25247/50000 [37:47<32:23, 12.73it/s]

Epochs: 25245 | epoch avg. loss: 0.012 | test avg. loss: 5.064
Epochs: 25246 | epoch avg. loss: 0.027 | test avg. loss: 4.921
Epochs: 25247 | epoch avg. loss: 0.029 | test avg. loss: 5.030


 51%|█████     | 25251/50000 [37:48<34:25, 11.98it/s]

Epochs: 25248 | epoch avg. loss: 0.020 | test avg. loss: 5.106
Epochs: 25249 | epoch avg. loss: 0.021 | test avg. loss: 4.934
Epochs: 25250 | epoch avg. loss: 0.038 | test avg. loss: 4.950


 51%|█████     | 25253/50000 [37:48<33:56, 12.15it/s]

Epochs: 25251 | epoch avg. loss: 0.007 | test avg. loss: 4.883
Epochs: 25252 | epoch avg. loss: 0.010 | test avg. loss: 4.878
Epochs: 25253 | epoch avg. loss: 0.013 | test avg. loss: 5.019


 51%|█████     | 25257/50000 [37:48<34:12, 12.06it/s]

Epochs: 25254 | epoch avg. loss: 0.019 | test avg. loss: 4.898
Epochs: 25255 | epoch avg. loss: 0.043 | test avg. loss: 4.971
Epochs: 25256 | epoch avg. loss: 0.029 | test avg. loss: 4.991


 51%|█████     | 25259/50000 [37:48<34:00, 12.13it/s]

Epochs: 25257 | epoch avg. loss: 0.018 | test avg. loss: 4.953
Epochs: 25258 | epoch avg. loss: 0.022 | test avg. loss: 5.171
Epochs: 25259 | epoch avg. loss: 0.037 | test avg. loss: 5.024


 51%|█████     | 25263/50000 [37:49<32:46, 12.58it/s]

Epochs: 25260 | epoch avg. loss: 0.033 | test avg. loss: 5.084
Epochs: 25261 | epoch avg. loss: 0.013 | test avg. loss: 5.088
Epochs: 25262 | epoch avg. loss: 0.014 | test avg. loss: 4.935
Epochs: 25263 | epoch avg. loss: 0.039 | test avg. loss: 5.012


                                                     

Epochs: 25264 | epoch avg. loss: 0.035 | test avg. loss: 4.868
Epochs: 25265 | epoch avg. loss: 0.013 | test avg. loss: 4.854
Epochs: 25266 | epoch avg. loss: 0.013 | test avg. loss: 5.018


 51%|█████     | 25269/50000 [37:49<30:27, 13.53it/s]

Epochs: 25267 | epoch avg. loss: 0.016 | test avg. loss: 5.033
Epochs: 25268 | epoch avg. loss: 0.023 | test avg. loss: 5.046
Epochs: 25269 | epoch avg. loss: 0.012 | test avg. loss: 5.032


 51%|█████     | 25273/50000 [37:49<31:10, 13.22it/s]

Epochs: 25270 | epoch avg. loss: 0.010 | test avg. loss: 4.916
Epochs: 25271 | epoch avg. loss: 0.006 | test avg. loss: 4.853
Epochs: 25272 | epoch avg. loss: 0.010 | test avg. loss: 4.914


 51%|█████     | 25275/50000 [37:50<33:35, 12.27it/s]

Epochs: 25273 | epoch avg. loss: 0.007 | test avg. loss: 4.952
Epochs: 25274 | epoch avg. loss: 0.006 | test avg. loss: 5.033
Epochs: 25275 | epoch avg. loss: 0.007 | test avg. loss: 4.974


 51%|█████     | 25279/50000 [37:50<33:37, 12.25it/s]

Epochs: 25276 | epoch avg. loss: 0.019 | test avg. loss: 4.938
Epochs: 25277 | epoch avg. loss: 0.019 | test avg. loss: 5.012
Epochs: 25278 | epoch avg. loss: 0.011 | test avg. loss: 4.924


 51%|█████     | 25283/50000 [37:50<30:56, 13.31it/s]

Epochs: 25279 | epoch avg. loss: 0.015 | test avg. loss: 5.022
Epochs: 25280 | epoch avg. loss: 0.024 | test avg. loss: 5.040
Epochs: 25281 | epoch avg. loss: 0.032 | test avg. loss: 5.018
Epochs: 25282 | epoch avg. loss: 0.084 | test avg. loss: 5.161


 51%|█████     | 25285/50000 [37:50<30:39, 13.43it/s]

Epochs: 25283 | epoch avg. loss: 0.059 | test avg. loss: 4.905
Epochs: 25284 | epoch avg. loss: 0.153 | test avg. loss: 4.844
Epochs: 25285 | epoch avg. loss: 0.092 | test avg. loss: 5.183


 51%|█████     | 25289/50000 [37:51<29:19, 14.05it/s]

Epochs: 25286 | epoch avg. loss: 0.217 | test avg. loss: 4.865
Epochs: 25287 | epoch avg. loss: 0.161 | test avg. loss: 5.130
Epochs: 25288 | epoch avg. loss: 0.081 | test avg. loss: 5.221
Epochs: 25289 | epoch avg. loss: 0.045 | test avg. loss: 5.024


                                                     

Epochs: 25290 | epoch avg. loss: 0.059 | test avg. loss: 5.101
Epochs: 25291 | epoch avg. loss: 0.070 | test avg. loss: 4.974
Epochs: 25292 | epoch avg. loss: 0.023 | test avg. loss: 4.947




Epochs: 25293 | epoch avg. loss: 0.028 | test avg. loss: 5.159
Epochs: 25294 | epoch avg. loss: 0.044 | test avg. loss: 4.973
Epochs: 25295 | epoch avg. loss: 0.038 | test avg. loss: 5.072


 51%|█████     | 25299/50000 [37:51<28:03, 14.67it/s]

Epochs: 25296 | epoch avg. loss: 0.042 | test avg. loss: 5.021
Epochs: 25297 | epoch avg. loss: 0.025 | test avg. loss: 4.882
Epochs: 25298 | epoch avg. loss: 0.044 | test avg. loss: 4.969


 51%|█████     | 25299/50000 [37:51<28:03, 14.67it/s]

Epochs: 25299 | epoch avg. loss: 0.076 | test avg. loss: 4.782


                                                       

Epochs: 25300 | epoch avg. loss: 0.026 | test avg. loss: 4.768
Epochs: 25301 | epoch avg. loss: 0.027 | test avg. loss: 5.033
Epochs: 25302 | epoch avg. loss: 0.076 | test avg. loss: 4.806


 51%|█████     | 25307/50000 [37:53<1:00:04,  6.85it/s]

Epochs: 25303 | epoch avg. loss: 0.051 | test avg. loss: 4.893
Epochs: 25304 | epoch avg. loss: 0.014 | test avg. loss: 5.027
Epochs: 25305 | epoch avg. loss: 0.015 | test avg. loss: 4.955
Epochs: 25306 | epoch avg. loss: 0.021 | test avg. loss: 4.999


 51%|█████     | 25309/50000 [37:54<52:51,  7.78it/s]

Epochs: 25307 | epoch avg. loss: 0.042 | test avg. loss: 4.950
Epochs: 25308 | epoch avg. loss: 0.033 | test avg. loss: 4.878
Epochs: 25309 | epoch avg. loss: 0.119 | test avg. loss: 5.117


 51%|█████     | 25313/50000 [37:54<40:26, 10.17it/s]

Epochs: 25310 | epoch avg. loss: 0.140 | test avg. loss: 4.856
Epochs: 25311 | epoch avg. loss: 0.037 | test avg. loss: 4.853
Epochs: 25312 | epoch avg. loss: 0.048 | test avg. loss: 5.077
Epochs: 25313 | epoch avg. loss: 0.104 | test avg. loss: 4.833


 51%|█████     | 25317/50000 [37:54<32:53, 12.50it/s]

Epochs: 25314 | epoch avg. loss: 0.062 | test avg. loss: 4.995
Epochs: 25315 | epoch avg. loss: 0.015 | test avg. loss: 5.148
Epochs: 25316 | epoch avg. loss: 0.017 | test avg. loss: 5.048
Epochs: 25317 | epoch avg. loss: 0.027 | test avg. loss: 5.028


 51%|█████     | 25321/50000 [37:54<29:25, 13.98it/s]

Epochs: 25318 | epoch avg. loss: 0.020 | test avg. loss: 4.841
Epochs: 25319 | epoch avg. loss: 0.020 | test avg. loss: 4.811
Epochs: 25320 | epoch avg. loss: 0.015 | test avg. loss: 4.913
Epochs: 25321 | epoch avg. loss: 0.014 | test avg. loss: 5.039


 51%|█████     | 25325/50000 [37:55<30:52, 13.32it/s]

Epochs: 25322 | epoch avg. loss: 0.013 | test avg. loss: 5.037
Epochs: 25323 | epoch avg. loss: 0.016 | test avg. loss: 4.932
Epochs: 25324 | epoch avg. loss: 0.011 | test avg. loss: 4.906


 51%|█████     | 25329/50000 [37:55<28:51, 14.25it/s]

Epochs: 25325 | epoch avg. loss: 0.019 | test avg. loss: 4.846
Epochs: 25326 | epoch avg. loss: 0.010 | test avg. loss: 4.852
Epochs: 25327 | epoch avg. loss: 0.010 | test avg. loss: 5.052
Epochs: 25328 | epoch avg. loss: 0.012 | test avg. loss: 4.996


 51%|█████     | 25333/50000 [37:55<26:59, 15.23it/s]

Epochs: 25329 | epoch avg. loss: 0.027 | test avg. loss: 4.998
Epochs: 25330 | epoch avg. loss: 0.009 | test avg. loss: 4.994
Epochs: 25331 | epoch avg. loss: 0.009 | test avg. loss: 4.885
Epochs: 25332 | epoch avg. loss: 0.010 | test avg. loss: 4.983


 51%|█████     | 25337/50000 [37:55<26:56, 15.26it/s]

Epochs: 25333 | epoch avg. loss: 0.017 | test avg. loss: 4.876
Epochs: 25334 | epoch avg. loss: 0.017 | test avg. loss: 4.947
Epochs: 25335 | epoch avg. loss: 0.009 | test avg. loss: 4.991
Epochs: 25336 | epoch avg. loss: 0.008 | test avg. loss: 4.929


 51%|█████     | 25339/50000 [37:56<29:46, 13.80it/s]

Epochs: 25337 | epoch avg. loss: 0.007 | test avg. loss: 4.942
Epochs: 25338 | epoch avg. loss: 0.007 | test avg. loss: 4.887
Epochs: 25339 | epoch avg. loss: 0.009 | test avg. loss: 4.878


 51%|█████     | 25343/50000 [37:56<29:56, 13.73it/s]

Epochs: 25340 | epoch avg. loss: 0.013 | test avg. loss: 5.002
Epochs: 25341 | epoch avg. loss: 0.016 | test avg. loss: 4.971
Epochs: 25342 | epoch avg. loss: 0.005 | test avg. loss: 4.940
Epochs: 25343 | epoch avg. loss: 0.006 | test avg. loss: 4.955


 51%|█████     | 25347/50000 [37:56<27:55, 14.71it/s]

Epochs: 25344 | epoch avg. loss: 0.004 | test avg. loss: 4.945
Epochs: 25345 | epoch avg. loss: 0.005 | test avg. loss: 4.986
Epochs: 25346 | epoch avg. loss: 0.006 | test avg. loss: 4.962
Epochs: 25347 | epoch avg. loss: 0.004 | test avg. loss: 4.921


 51%|█████     | 25351/50000 [37:56<27:34, 14.90it/s]

Epochs: 25348 | epoch avg. loss: 0.005 | test avg. loss: 4.915
Epochs: 25349 | epoch avg. loss: 0.004 | test avg. loss: 4.936
Epochs: 25350 | epoch avg. loss: 0.004 | test avg. loss: 4.915




Epochs: 25351 | epoch avg. loss: 0.012 | test avg. loss: 4.930
Epochs: 25352 | epoch avg. loss: 0.013 | test avg. loss: 5.007
Epochs: 25353 | epoch avg. loss: 0.010 | test avg. loss: 4.939


 51%|█████     | 25357/50000 [37:57<29:23, 13.97it/s]

Epochs: 25354 | epoch avg. loss: 0.014 | test avg. loss: 4.907
Epochs: 25355 | epoch avg. loss: 0.010 | test avg. loss: 4.956
Epochs: 25356 | epoch avg. loss: 0.015 | test avg. loss: 4.942




Epochs: 25357 | epoch avg. loss: 0.007 | test avg. loss: 4.927
Epochs: 25358 | epoch avg. loss: 0.022 | test avg. loss: 4.991
Epochs: 25359 | epoch avg. loss: 0.008 | test avg. loss: 4.978


 51%|█████     | 25363/50000 [37:57<28:39, 14.33it/s]

Epochs: 25360 | epoch avg. loss: 0.005 | test avg. loss: 4.918
Epochs: 25361 | epoch avg. loss: 0.006 | test avg. loss: 4.986
Epochs: 25362 | epoch avg. loss: 0.014 | test avg. loss: 4.897


 51%|█████     | 25365/50000 [37:57<29:09, 14.08it/s]

Epochs: 25363 | epoch avg. loss: 0.016 | test avg. loss: 4.911
Epochs: 25364 | epoch avg. loss: 0.008 | test avg. loss: 4.994
Epochs: 25365 | epoch avg. loss: 0.009 | test avg. loss: 4.912


 51%|█████     | 25369/50000 [37:58<29:55, 13.72it/s]

Epochs: 25366 | epoch avg. loss: 0.017 | test avg. loss: 4.937
Epochs: 25367 | epoch avg. loss: 0.007 | test avg. loss: 4.938
Epochs: 25368 | epoch avg. loss: 0.006 | test avg. loss: 4.887
Epochs: 25369 | epoch avg. loss: 0.007 | test avg. loss: 4.991


 51%|█████     | 25373/50000 [37:58<29:59, 13.69it/s]

Epochs: 25370 | epoch avg. loss: 0.024 | test avg. loss: 4.924
Epochs: 25371 | epoch avg. loss: 0.012 | test avg. loss: 4.907
Epochs: 25372 | epoch avg. loss: 0.009 | test avg. loss: 4.983


 51%|█████     | 25377/50000 [37:58<28:40, 14.31it/s]

Epochs: 25373 | epoch avg. loss: 0.014 | test avg. loss: 4.905
Epochs: 25374 | epoch avg. loss: 0.005 | test avg. loss: 4.915
Epochs: 25375 | epoch avg. loss: 0.005 | test avg. loss: 4.976
Epochs: 25376 | epoch avg. loss: 0.008 | test avg. loss: 4.917


                                                     

Epochs: 25377 | epoch avg. loss: 0.008 | test avg. loss: 4.896
Epochs: 25378 | epoch avg. loss: 0.007 | test avg. loss: 4.965
Epochs: 25379 | epoch avg. loss: 0.008 | test avg. loss: 4.946


 51%|█████     | 25383/50000 [37:59<28:45, 14.27it/s]

Epochs: 25380 | epoch avg. loss: 0.004 | test avg. loss: 4.969
Epochs: 25381 | epoch avg. loss: 0.006 | test avg. loss: 4.992
Epochs: 25382 | epoch avg. loss: 0.008 | test avg. loss: 4.919


 51%|█████     | 25387/50000 [37:59<27:56, 14.68it/s]

Epochs: 25383 | epoch avg. loss: 0.008 | test avg. loss: 4.946
Epochs: 25384 | epoch avg. loss: 0.005 | test avg. loss: 4.983
Epochs: 25385 | epoch avg. loss: 0.005 | test avg. loss: 4.935
Epochs: 25386 | epoch avg. loss: 0.012 | test avg. loss: 5.011


 51%|█████     | 25391/50000 [37:59<27:02, 15.16it/s]

Epochs: 25387 | epoch avg. loss: 0.015 | test avg. loss: 4.934
Epochs: 25388 | epoch avg. loss: 0.011 | test avg. loss: 4.870
Epochs: 25389 | epoch avg. loss: 0.025 | test avg. loss: 5.096
Epochs: 25390 | epoch avg. loss: 0.048 | test avg. loss: 5.062


 51%|█████     | 25393/50000 [37:59<27:29, 14.91it/s]

Epochs: 25391 | epoch avg. loss: 0.018 | test avg. loss: 4.947
Epochs: 25392 | epoch avg. loss: 0.052 | test avg. loss: 4.971
Epochs: 25393 | epoch avg. loss: 0.030 | test avg. loss: 4.896


 51%|█████     | 25397/50000 [38:00<26:54, 15.24it/s]

Epochs: 25394 | epoch avg. loss: 0.026 | test avg. loss: 4.930
Epochs: 25395 | epoch avg. loss: 0.030 | test avg. loss: 5.160
Epochs: 25396 | epoch avg. loss: 0.040 | test avg. loss: 4.968
Epochs: 25397 | epoch avg. loss: 0.047 | test avg. loss: 4.873


 51%|█████     | 25399/50000 [38:00<26:25, 15.52it/s]

Epochs: 25398 | epoch avg. loss: 0.029 | test avg. loss: 4.978
Epochs: 25399 | epoch avg. loss: 0.029 | test avg. loss: 4.824


 51%|█████     | 25403/50000 [38:05<4:02:22,  1.69it/s]

Epochs: 25400 | epoch avg. loss: 0.036 | test avg. loss: 4.908
Epochs: 25401 | epoch avg. loss: 0.013 | test avg. loss: 5.017
Epochs: 25402 | epoch avg. loss: 0.012 | test avg. loss: 4.972


 51%|█████     | 25405/50000 [38:05<3:00:23,  2.27it/s]

Epochs: 25403 | epoch avg. loss: 0.008 | test avg. loss: 4.927
Epochs: 25404 | epoch avg. loss: 0.006 | test avg. loss: 4.888
Epochs: 25405 | epoch avg. loss: 0.005 | test avg. loss: 4.835


 51%|█████     | 25409/50000 [38:05<1:46:45,  3.84it/s]

Epochs: 25406 | epoch avg. loss: 0.009 | test avg. loss: 4.831
Epochs: 25407 | epoch avg. loss: 0.007 | test avg. loss: 4.921
Epochs: 25408 | epoch avg. loss: 0.012 | test avg. loss: 4.961


 51%|█████     | 25411/50000 [38:06<1:25:23,  4.80it/s]

Epochs: 25409 | epoch avg. loss: 0.008 | test avg. loss: 4.903
Epochs: 25410 | epoch avg. loss: 0.018 | test avg. loss: 4.896
Epochs: 25411 | epoch avg. loss: 0.009 | test avg. loss: 4.961


 51%|█████     | 25415/50000 [38:06<1:02:09,  6.59it/s]

Epochs: 25412 | epoch avg. loss: 0.019 | test avg. loss: 4.847
Epochs: 25413 | epoch avg. loss: 0.016 | test avg. loss: 4.873
Epochs: 25414 | epoch avg. loss: 0.008 | test avg. loss: 4.962


 51%|█████     | 25417/50000 [38:06<54:02,  7.58it/s]

Epochs: 25415 | epoch avg. loss: 0.006 | test avg. loss: 4.910
Epochs: 25416 | epoch avg. loss: 0.015 | test avg. loss: 4.934
Epochs: 25417 | epoch avg. loss: 0.010 | test avg. loss: 5.040


 51%|█████     | 25421/50000 [38:07<46:32,  8.80it/s]

Epochs: 25418 | epoch avg. loss: 0.030 | test avg. loss: 4.865
Epochs: 25419 | epoch avg. loss: 0.008 | test avg. loss: 4.902
Epochs: 25420 | epoch avg. loss: 0.018 | test avg. loss: 4.936


 51%|█████     | 25423/50000 [38:07<43:14,  9.47it/s]

Epochs: 25421 | epoch avg. loss: 0.014 | test avg. loss: 4.867
Epochs: 25422 | epoch avg. loss: 0.012 | test avg. loss: 5.016
Epochs: 25423 | epoch avg. loss: 0.019 | test avg. loss: 5.012


 51%|█████     | 25427/50000 [38:07<38:22, 10.67it/s]

Epochs: 25424 | epoch avg. loss: 0.010 | test avg. loss: 4.882
Epochs: 25425 | epoch avg. loss: 0.026 | test avg. loss: 4.892
Epochs: 25426 | epoch avg. loss: 0.021 | test avg. loss: 4.900


 51%|█████     | 25429/50000 [38:07<36:11, 11.31it/s]

Epochs: 25427 | epoch avg. loss: 0.014 | test avg. loss: 4.861
Epochs: 25428 | epoch avg. loss: 0.047 | test avg. loss: 4.911
Epochs: 25429 | epoch avg. loss: 0.008 | test avg. loss: 4.932


 51%|█████     | 25433/50000 [38:08<35:53, 11.41it/s]

Epochs: 25430 | epoch avg. loss: 0.006 | test avg. loss: 4.929
Epochs: 25431 | epoch avg. loss: 0.006 | test avg. loss: 5.014
Epochs: 25432 | epoch avg. loss: 0.023 | test avg. loss: 4.937


 51%|█████     | 25435/50000 [38:08<34:46, 11.77it/s]

Epochs: 25433 | epoch avg. loss: 0.038 | test avg. loss: 4.948
Epochs: 25434 | epoch avg. loss: 0.029 | test avg. loss: 5.206
Epochs: 25435 | epoch avg. loss: 0.115 | test avg. loss: 4.810


 51%|█████     | 25439/50000 [38:08<31:32, 12.98it/s]

Epochs: 25436 | epoch avg. loss: 0.099 | test avg. loss: 4.806
Epochs: 25437 | epoch avg. loss: 0.120 | test avg. loss: 5.208
Epochs: 25438 | epoch avg. loss: 0.162 | test avg. loss: 4.892


 51%|█████     | 25441/50000 [38:08<30:54, 13.24it/s]

Epochs: 25439 | epoch avg. loss: 0.038 | test avg. loss: 4.706
Epochs: 25440 | epoch avg. loss: 0.068 | test avg. loss: 4.898
Epochs: 25441 | epoch avg. loss: 0.048 | test avg. loss: 4.838


 51%|█████     | 25445/50000 [38:08<30:43, 13.32it/s]

Epochs: 25442 | epoch avg. loss: 0.079 | test avg. loss: 4.961
Epochs: 25443 | epoch avg. loss: 0.032 | test avg. loss: 5.100
Epochs: 25444 | epoch avg. loss: 0.026 | test avg. loss: 4.974


 51%|█████     | 25447/50000 [38:09<30:01, 13.63it/s]

Epochs: 25445 | epoch avg. loss: 0.037 | test avg. loss: 4.866
Epochs: 25446 | epoch avg. loss: 0.024 | test avg. loss: 5.065
Epochs: 25447 | epoch avg. loss: 0.051 | test avg. loss: 4.810


 51%|█████     | 25451/50000 [38:09<29:48, 13.73it/s]

Epochs: 25448 | epoch avg. loss: 0.050 | test avg. loss: 4.853
Epochs: 25449 | epoch avg. loss: 0.036 | test avg. loss: 5.070
Epochs: 25450 | epoch avg. loss: 0.081 | test avg. loss: 4.937


 51%|█████     | 25453/50000 [38:09<30:03, 13.61it/s]

Epochs: 25451 | epoch avg. loss: 0.068 | test avg. loss: 4.915
Epochs: 25452 | epoch avg. loss: 0.046 | test avg. loss: 5.055
Epochs: 25453 | epoch avg. loss: 0.082 | test avg. loss: 4.938


 51%|█████     | 25457/50000 [38:09<30:15, 13.52it/s]

Epochs: 25454 | epoch avg. loss: 0.023 | test avg. loss: 4.956
Epochs: 25455 | epoch avg. loss: 0.037 | test avg. loss: 5.156
Epochs: 25456 | epoch avg. loss: 0.067 | test avg. loss: 5.025


 51%|█████     | 25459/50000 [38:10<30:01, 13.62it/s]

Epochs: 25457 | epoch avg. loss: 0.033 | test avg. loss: 4.885
Epochs: 25458 | epoch avg. loss: 0.105 | test avg. loss: 5.021
Epochs: 25459 | epoch avg. loss: 0.178 | test avg. loss: 4.984


 51%|█████     | 25463/50000 [38:10<31:04, 13.16it/s]

Epochs: 25460 | epoch avg. loss: 0.113 | test avg. loss: 5.004
Epochs: 25461 | epoch avg. loss: 0.564 | test avg. loss: 5.069
Epochs: 25462 | epoch avg. loss: 0.345 | test avg. loss: 5.795


 51%|█████     | 25465/50000 [38:10<32:01, 12.77it/s]

Epochs: 25463 | epoch avg. loss: 0.488 | test avg. loss: 4.991
Epochs: 25464 | epoch avg. loss: 0.580 | test avg. loss: 4.845
Epochs: 25465 | epoch avg. loss: 0.281 | test avg. loss: 6.250


 51%|█████     | 25469/50000 [38:10<32:57, 12.41it/s]

Epochs: 25466 | epoch avg. loss: 0.698 | test avg. loss: 5.187
Epochs: 25467 | epoch avg. loss: 1.200 | test avg. loss: 5.010
Epochs: 25468 | epoch avg. loss: 1.003 | test avg. loss: 5.782


                                                     

Epochs: 25469 | epoch avg. loss: 0.803 | test avg. loss: 7.436
Epochs: 25470 | epoch avg. loss: 1.873 | test avg. loss: 6.810
Epochs: 25471 | epoch avg. loss: 1.018 | test avg. loss: 5.263


 51%|█████     | 25475/50000 [38:11<29:43, 13.75it/s]

Epochs: 25472 | epoch avg. loss: 0.381 | test avg. loss: 5.198
Epochs: 25473 | epoch avg. loss: 0.668 | test avg. loss: 5.750
Epochs: 25474 | epoch avg. loss: 0.606 | test avg. loss: 5.102
Epochs: 25475 | epoch avg. loss: 0.264 | test avg. loss: 5.536


                                                     

Epochs: 25476 | epoch avg. loss: 0.196 | test avg. loss: 5.741
Epochs: 25477 | epoch avg. loss: 0.158 | test avg. loss: 5.333
Epochs: 25478 | epoch avg. loss: 0.198 | test avg. loss: 4.731


 51%|█████     | 25481/50000 [38:11<31:08, 13.12it/s]

Epochs: 25479 | epoch avg. loss: 0.104 | test avg. loss: 4.634
Epochs: 25480 | epoch avg. loss: 0.114 | test avg. loss: 4.744
Epochs: 25481 | epoch avg. loss: 0.103 | test avg. loss: 5.065


 51%|█████     | 25485/50000 [38:12<34:38, 11.79it/s]

Epochs: 25482 | epoch avg. loss: 0.071 | test avg. loss: 5.094
Epochs: 25483 | epoch avg. loss: 0.110 | test avg. loss: 5.104
Epochs: 25484 | epoch avg. loss: 0.061 | test avg. loss: 4.876


 51%|█████     | 25487/50000 [38:12<34:41, 11.78it/s]

Epochs: 25485 | epoch avg. loss: 0.045 | test avg. loss: 4.859
Epochs: 25486 | epoch avg. loss: 0.075 | test avg. loss: 4.522
Epochs: 25487 | epoch avg. loss: 0.070 | test avg. loss: 4.543


 51%|█████     | 25491/50000 [38:12<32:07, 12.72it/s]

Epochs: 25488 | epoch avg. loss: 0.040 | test avg. loss: 4.663
Epochs: 25489 | epoch avg. loss: 0.050 | test avg. loss: 4.554
Epochs: 25490 | epoch avg. loss: 0.041 | test avg. loss: 4.629


 51%|█████     | 25493/50000 [38:12<31:07, 13.13it/s]

Epochs: 25491 | epoch avg. loss: 0.021 | test avg. loss: 4.691
Epochs: 25492 | epoch avg. loss: 0.027 | test avg. loss: 4.744
Epochs: 25493 | epoch avg. loss: 0.023 | test avg. loss: 4.850


 51%|█████     | 25497/50000 [38:12<32:14, 12.67it/s]

Epochs: 25494 | epoch avg. loss: 0.017 | test avg. loss: 4.909
Epochs: 25495 | epoch avg. loss: 0.018 | test avg. loss: 4.952
Epochs: 25496 | epoch avg. loss: 0.017 | test avg. loss: 4.865


 51%|█████     | 25499/50000 [38:13<30:48, 13.26it/s]

Epochs: 25497 | epoch avg. loss: 0.014 | test avg. loss: 4.881
Epochs: 25498 | epoch avg. loss: 0.011 | test avg. loss: 4.813
Epochs: 25499 | epoch avg. loss: 0.017 | test avg. loss: 4.821


                                                       

Epochs: 25500 | epoch avg. loss: 0.010 | test avg. loss: 4.833
Epochs: 25501 | epoch avg. loss: 0.010 | test avg. loss: 4.869
Epochs: 25502 | epoch avg. loss: 0.015 | test avg. loss: 4.879


 51%|█████     | 25507/50000 [38:15<59:20,  6.88it/s]  

Epochs: 25503 | epoch avg. loss: 0.010 | test avg. loss: 4.843
Epochs: 25504 | epoch avg. loss: 0.008 | test avg. loss: 4.859
Epochs: 25505 | epoch avg. loss: 0.010 | test avg. loss: 4.794
Epochs: 25506 | epoch avg. loss: 0.010 | test avg. loss: 4.796


 51%|█████     | 25511/50000 [38:15<42:51,  9.52it/s]

Epochs: 25507 | epoch avg. loss: 0.006 | test avg. loss: 4.793
Epochs: 25508 | epoch avg. loss: 0.008 | test avg. loss: 4.763
Epochs: 25509 | epoch avg. loss: 0.012 | test avg. loss: 4.857
Epochs: 25510 | epoch avg. loss: 0.028 | test avg. loss: 4.757


 51%|█████     | 25513/50000 [38:15<38:10, 10.69it/s]

Epochs: 25511 | epoch avg. loss: 0.030 | test avg. loss: 4.739
Epochs: 25512 | epoch avg. loss: 0.015 | test avg. loss: 4.803
Epochs: 25513 | epoch avg. loss: 0.020 | test avg. loss: 4.729


 51%|█████     | 25517/50000 [38:15<33:15, 12.27it/s]

Epochs: 25514 | epoch avg. loss: 0.021 | test avg. loss: 4.836
Epochs: 25515 | epoch avg. loss: 0.020 | test avg. loss: 4.794
Epochs: 25516 | epoch avg. loss: 0.007 | test avg. loss: 4.792


 51%|█████     | 25519/50000 [38:16<33:18, 12.25it/s]

Epochs: 25517 | epoch avg. loss: 0.008 | test avg. loss: 4.821
Epochs: 25518 | epoch avg. loss: 0.013 | test avg. loss: 4.714
Epochs: 25519 | epoch avg. loss: 0.042 | test avg. loss: 4.797


 51%|█████     | 25523/50000 [38:16<30:59, 13.16it/s]

Epochs: 25520 | epoch avg. loss: 0.028 | test avg. loss: 4.775
Epochs: 25521 | epoch avg. loss: 0.010 | test avg. loss: 4.765
Epochs: 25522 | epoch avg. loss: 0.010 | test avg. loss: 4.805


 51%|█████     | 25525/50000 [38:16<29:46, 13.70it/s]

Epochs: 25523 | epoch avg. loss: 0.007 | test avg. loss: 4.762
Epochs: 25524 | epoch avg. loss: 0.016 | test avg. loss: 4.814
Epochs: 25525 | epoch avg. loss: 0.007 | test avg. loss: 4.810
Epochs: 25526 | epoch avg. loss: 0.012 | test avg. loss: 4.772


 51%|█████     | 25529/50000 [38:16<28:37, 14.25it/s]

Epochs: 25527 | epoch avg. loss: 0.017 | test avg. loss: 4.890
Epochs: 25528 | epoch avg. loss: 0.034 | test avg. loss: 4.739
Epochs: 25529 | epoch avg. loss: 0.017 | test avg. loss: 4.785


 51%|█████     | 25533/50000 [38:17<30:41, 13.29it/s]

Epochs: 25530 | epoch avg. loss: 0.019 | test avg. loss: 4.746
Epochs: 25531 | epoch avg. loss: 0.022 | test avg. loss: 4.717
Epochs: 25532 | epoch avg. loss: 0.047 | test avg. loss: 4.911


                                                     

Epochs: 25533 | epoch avg. loss: 0.074 | test avg. loss: 4.811
Epochs: 25534 | epoch avg. loss: 0.035 | test avg. loss: 4.854
Epochs: 25535 | epoch avg. loss: 0.035 | test avg. loss: 5.122


                                                     

Epochs: 25536 | epoch avg. loss: 0.077 | test avg. loss: 4.872
Epochs: 25537 | epoch avg. loss: 0.129 | test avg. loss: 4.822
Epochs: 25538 | epoch avg. loss: 0.036 | test avg. loss: 4.941


 51%|█████     | 25543/50000 [38:17<27:24, 14.87it/s]

Epochs: 25539 | epoch avg. loss: 0.051 | test avg. loss: 4.739
Epochs: 25540 | epoch avg. loss: 0.056 | test avg. loss: 4.770
Epochs: 25541 | epoch avg. loss: 0.045 | test avg. loss: 4.740
Epochs: 25542 | epoch avg. loss: 0.030 | test avg. loss: 4.694


 51%|█████     | 25545/50000 [38:17<28:53, 14.10it/s]

Epochs: 25543 | epoch avg. loss: 0.042 | test avg. loss: 4.884
Epochs: 25544 | epoch avg. loss: 0.070 | test avg. loss: 4.740
Epochs: 25545 | epoch avg. loss: 0.034 | test avg. loss: 4.737


 51%|█████     | 25549/50000 [38:18<32:30, 12.53it/s]

Epochs: 25546 | epoch avg. loss: 0.023 | test avg. loss: 4.836
Epochs: 25547 | epoch avg. loss: 0.030 | test avg. loss: 4.725
Epochs: 25548 | epoch avg. loss: 0.086 | test avg. loss: 4.803


 51%|█████     | 25551/50000 [38:18<32:05, 12.70it/s]

Epochs: 25549 | epoch avg. loss: 0.028 | test avg. loss: 4.908
Epochs: 25550 | epoch avg. loss: 0.018 | test avg. loss: 4.902
Epochs: 25551 | epoch avg. loss: 0.014 | test avg. loss: 5.025


 51%|█████     | 25555/50000 [38:18<30:18, 13.44it/s]

Epochs: 25552 | epoch avg. loss: 0.030 | test avg. loss: 4.868
Epochs: 25553 | epoch avg. loss: 0.025 | test avg. loss: 4.788
Epochs: 25554 | epoch avg. loss: 0.017 | test avg. loss: 4.851
Epochs: 25555 | epoch avg. loss: 0.025 | test avg. loss: 4.756


 51%|█████     | 25559/50000 [38:18<30:19, 13.43it/s]

Epochs: 25556 | epoch avg. loss: 0.028 | test avg. loss: 4.883
Epochs: 25557 | epoch avg. loss: 0.012 | test avg. loss: 4.920
Epochs: 25558 | epoch avg. loss: 0.016 | test avg. loss: 4.906


 51%|█████     | 25563/50000 [38:19<29:00, 14.04it/s]

Epochs: 25559 | epoch avg. loss: 0.016 | test avg. loss: 4.991
Epochs: 25560 | epoch avg. loss: 0.029 | test avg. loss: 4.804
Epochs: 25561 | epoch avg. loss: 0.019 | test avg. loss: 4.788
Epochs: 25562 | epoch avg. loss: 0.008 | test avg. loss: 4.832


 51%|█████     | 25567/50000 [38:19<27:57, 14.57it/s]

Epochs: 25563 | epoch avg. loss: 0.006 | test avg. loss: 4.844
Epochs: 25564 | epoch avg. loss: 0.008 | test avg. loss: 4.895
Epochs: 25565 | epoch avg. loss: 0.006 | test avg. loss: 4.868
Epochs: 25566 | epoch avg. loss: 0.005 | test avg. loss: 4.858


 51%|█████     | 25571/50000 [38:19<27:02, 15.06it/s]

Epochs: 25567 | epoch avg. loss: 0.006 | test avg. loss: 4.823
Epochs: 25568 | epoch avg. loss: 0.005 | test avg. loss: 4.815
Epochs: 25569 | epoch avg. loss: 0.005 | test avg. loss: 4.801
Epochs: 25570 | epoch avg. loss: 0.005 | test avg. loss: 4.824


 51%|█████     | 25573/50000 [38:19<27:18, 14.91it/s]

Epochs: 25571 | epoch avg. loss: 0.005 | test avg. loss: 4.860
Epochs: 25572 | epoch avg. loss: 0.005 | test avg. loss: 4.858
Epochs: 25573 | epoch avg. loss: 0.005 | test avg. loss: 4.798


 51%|█████     | 25577/50000 [38:20<27:16, 14.92it/s]

Epochs: 25574 | epoch avg. loss: 0.015 | test avg. loss: 4.832
Epochs: 25575 | epoch avg. loss: 0.009 | test avg. loss: 4.809
Epochs: 25576 | epoch avg. loss: 0.005 | test avg. loss: 4.784
Epochs: 25577 | epoch avg. loss: 0.006 | test avg. loss: 4.846


 51%|█████     | 25581/50000 [38:20<26:44, 15.22it/s]

Epochs: 25578 | epoch avg. loss: 0.022 | test avg. loss: 4.808
Epochs: 25579 | epoch avg. loss: 0.012 | test avg. loss: 4.815
Epochs: 25580 | epoch avg. loss: 0.023 | test avg. loss: 4.974
Epochs: 25581 | epoch avg. loss: 0.025 | test avg. loss: 4.863


 51%|█████     | 25585/50000 [38:20<27:58, 14.55it/s]

Epochs: 25582 | epoch avg. loss: 0.011 | test avg. loss: 4.829
Epochs: 25583 | epoch avg. loss: 0.007 | test avg. loss: 4.817
Epochs: 25584 | epoch avg. loss: 0.005 | test avg. loss: 4.855


 51%|█████     | 25587/50000 [38:20<28:05, 14.48it/s]

Epochs: 25585 | epoch avg. loss: 0.007 | test avg. loss: 4.821
Epochs: 25586 | epoch avg. loss: 0.011 | test avg. loss: 4.836
Epochs: 25587 | epoch avg. loss: 0.009 | test avg. loss: 4.882


 51%|█████     | 25591/50000 [38:21<29:14, 13.91it/s]

Epochs: 25588 | epoch avg. loss: 0.012 | test avg. loss: 4.765
Epochs: 25589 | epoch avg. loss: 0.023 | test avg. loss: 4.817
Epochs: 25590 | epoch avg. loss: 0.011 | test avg. loss: 4.873


 51%|█████     | 25593/50000 [38:21<29:33, 13.76it/s]

Epochs: 25591 | epoch avg. loss: 0.014 | test avg. loss: 4.779
Epochs: 25592 | epoch avg. loss: 0.018 | test avg. loss: 4.812
Epochs: 25593 | epoch avg. loss: 0.006 | test avg. loss: 4.855


 51%|█████     | 25597/50000 [38:21<29:13, 13.92it/s]

Epochs: 25594 | epoch avg. loss: 0.006 | test avg. loss: 4.857
Epochs: 25595 | epoch avg. loss: 0.005 | test avg. loss: 4.822
Epochs: 25596 | epoch avg. loss: 0.004 | test avg. loss: 4.786


 51%|█████     | 25599/50000 [38:21<30:37, 13.28it/s]

Epochs: 25597 | epoch avg. loss: 0.005 | test avg. loss: 4.837
Epochs: 25598 | epoch avg. loss: 0.011 | test avg. loss: 4.790
Epochs: 25599 | epoch avg. loss: 0.008 | test avg. loss: 4.783


 51%|█████     | 25603/50000 [38:23<1:33:46,  4.34it/s]

Epochs: 25600 | epoch avg. loss: 0.010 | test avg. loss: 4.825
Epochs: 25601 | epoch avg. loss: 0.006 | test avg. loss: 4.836
Epochs: 25602 | epoch avg. loss: 0.005 | test avg. loss: 4.827
Epochs: 25603 | epoch avg. loss: 0.005 | test avg. loss: 4.801


 51%|█████     | 25607/50000 [38:23<1:00:49,  6.68it/s]

Epochs: 25604 | epoch avg. loss: 0.007 | test avg. loss: 4.797
Epochs: 25605 | epoch avg. loss: 0.007 | test avg. loss: 4.814
Epochs: 25606 | epoch avg. loss: 0.004 | test avg. loss: 4.828


 51%|█████     | 25609/50000 [38:24<52:51,  7.69it/s]

Epochs: 25607 | epoch avg. loss: 0.004 | test avg. loss: 4.810
Epochs: 25608 | epoch avg. loss: 0.011 | test avg. loss: 4.788
Epochs: 25609 | epoch avg. loss: 0.007 | test avg. loss: 4.831


 51%|█████     | 25613/50000 [38:24<43:07,  9.43it/s]

Epochs: 25610 | epoch avg. loss: 0.008 | test avg. loss: 4.748
Epochs: 25611 | epoch avg. loss: 0.015 | test avg. loss: 4.759
Epochs: 25612 | epoch avg. loss: 0.007 | test avg. loss: 4.809


 51%|█████     | 25615/50000 [38:24<41:33,  9.78it/s]

Epochs: 25613 | epoch avg. loss: 0.007 | test avg. loss: 4.799
Epochs: 25614 | epoch avg. loss: 0.015 | test avg. loss: 4.893
Epochs: 25615 | epoch avg. loss: 0.020 | test avg. loss: 4.945


 51%|█████     | 25619/50000 [38:24<36:09, 11.24it/s]

Epochs: 25616 | epoch avg. loss: 0.033 | test avg. loss: 4.811
Epochs: 25617 | epoch avg. loss: 0.019 | test avg. loss: 4.804
Epochs: 25618 | epoch avg. loss: 0.015 | test avg. loss: 4.936


 51%|█████     | 25621/50000 [38:25<35:01, 11.60it/s]

Epochs: 25619 | epoch avg. loss: 0.032 | test avg. loss: 4.847
Epochs: 25620 | epoch avg. loss: 0.023 | test avg. loss: 4.820
Epochs: 25621 | epoch avg. loss: 0.016 | test avg. loss: 4.887


 51%|█████▏    | 25625/50000 [38:25<34:37, 11.73it/s]

Epochs: 25622 | epoch avg. loss: 0.032 | test avg. loss: 4.737
Epochs: 25623 | epoch avg. loss: 0.025 | test avg. loss: 4.862
Epochs: 25624 | epoch avg. loss: 0.023 | test avg. loss: 4.912


 51%|█████▏    | 25627/50000 [38:25<32:59, 12.31it/s]

Epochs: 25625 | epoch avg. loss: 0.016 | test avg. loss: 4.814
Epochs: 25626 | epoch avg. loss: 0.013 | test avg. loss: 4.855
Epochs: 25627 | epoch avg. loss: 0.026 | test avg. loss: 4.816


 51%|█████▏    | 25631/50000 [38:25<31:46, 12.78it/s]

Epochs: 25628 | epoch avg. loss: 0.014 | test avg. loss: 4.732
Epochs: 25629 | epoch avg. loss: 0.017 | test avg. loss: 4.829
Epochs: 25630 | epoch avg. loss: 0.009 | test avg. loss: 4.828


 51%|█████▏    | 25633/50000 [38:25<31:47, 12.78it/s]

Epochs: 25631 | epoch avg. loss: 0.006 | test avg. loss: 4.816
Epochs: 25632 | epoch avg. loss: 0.006 | test avg. loss: 4.842
Epochs: 25633 | epoch avg. loss: 0.008 | test avg. loss: 4.813


 51%|█████▏    | 25637/50000 [38:26<32:30, 12.49it/s]

Epochs: 25634 | epoch avg. loss: 0.006 | test avg. loss: 4.825
Epochs: 25635 | epoch avg. loss: 0.007 | test avg. loss: 4.878
Epochs: 25636 | epoch avg. loss: 0.015 | test avg. loss: 4.814


 51%|█████▏    | 25639/50000 [38:26<30:50, 13.16it/s]

Epochs: 25637 | epoch avg. loss: 0.017 | test avg. loss: 4.851
Epochs: 25638 | epoch avg. loss: 0.011 | test avg. loss: 4.916
Epochs: 25639 | epoch avg. loss: 0.015 | test avg. loss: 4.834


 51%|█████▏    | 25643/50000 [38:26<31:25, 12.92it/s]

Epochs: 25640 | epoch avg. loss: 0.027 | test avg. loss: 4.842
Epochs: 25641 | epoch avg. loss: 0.017 | test avg. loss: 5.011
Epochs: 25642 | epoch avg. loss: 0.062 | test avg. loss: 4.802


 51%|█████▏    | 25645/50000 [38:26<32:33, 12.47it/s]

Epochs: 25643 | epoch avg. loss: 0.137 | test avg. loss: 4.758
Epochs: 25644 | epoch avg. loss: 0.078 | test avg. loss: 4.889
Epochs: 25645 | epoch avg. loss: 0.090 | test avg. loss: 4.609


 51%|█████▏    | 25649/50000 [38:27<34:39, 11.71it/s]

Epochs: 25646 | epoch avg. loss: 0.046 | test avg. loss: 4.640
Epochs: 25647 | epoch avg. loss: 0.083 | test avg. loss: 4.903
Epochs: 25648 | epoch avg. loss: 0.079 | test avg. loss: 4.857


 51%|█████▏    | 25651/50000 [38:27<34:44, 11.68it/s]

Epochs: 25649 | epoch avg. loss: 0.054 | test avg. loss: 4.863
Epochs: 25650 | epoch avg. loss: 0.081 | test avg. loss: 4.931
Epochs: 25651 | epoch avg. loss: 0.074 | test avg. loss: 4.836


 51%|█████▏    | 25655/50000 [38:27<35:01, 11.59it/s]

Epochs: 25652 | epoch avg. loss: 0.034 | test avg. loss: 4.860
Epochs: 25653 | epoch avg. loss: 0.090 | test avg. loss: 5.116
Epochs: 25654 | epoch avg. loss: 0.118 | test avg. loss: 4.961


 51%|█████▏    | 25657/50000 [38:27<36:06, 11.24it/s]

Epochs: 25655 | epoch avg. loss: 0.039 | test avg. loss: 4.929
Epochs: 25656 | epoch avg. loss: 0.045 | test avg. loss: 5.071


 51%|█████▏    | 25659/50000 [38:28<36:37, 11.08it/s]

Epochs: 25657 | epoch avg. loss: 0.028 | test avg. loss: 5.026
Epochs: 25658 | epoch avg. loss: 0.013 | test avg. loss: 4.989
Epochs: 25659 | epoch avg. loss: 0.010 | test avg. loss: 4.988


 51%|█████▏    | 25663/50000 [38:28<33:36, 12.07it/s]

Epochs: 25660 | epoch avg. loss: 0.011 | test avg. loss: 4.923
Epochs: 25661 | epoch avg. loss: 0.008 | test avg. loss: 4.856
Epochs: 25662 | epoch avg. loss: 0.006 | test avg. loss: 4.830


 51%|█████▏    | 25665/50000 [38:28<32:52, 12.34it/s]

Epochs: 25663 | epoch avg. loss: 0.005 | test avg. loss: 4.872
Epochs: 25664 | epoch avg. loss: 0.006 | test avg. loss: 4.857
Epochs: 25665 | epoch avg. loss: 0.018 | test avg. loss: 4.973


 51%|█████▏    | 25669/50000 [38:28<32:16, 12.57it/s]

Epochs: 25666 | epoch avg. loss: 0.023 | test avg. loss: 4.954
Epochs: 25667 | epoch avg. loss: 0.038 | test avg. loss: 4.938
Epochs: 25668 | epoch avg. loss: 0.057 | test avg. loss: 5.216


 51%|█████▏    | 25671/50000 [38:29<35:10, 11.53it/s]

Epochs: 25669 | epoch avg. loss: 0.105 | test avg. loss: 4.812
Epochs: 25670 | epoch avg. loss: 0.053 | test avg. loss: 4.809
Epochs: 25671 | epoch avg. loss: 0.092 | test avg. loss: 4.911


 51%|█████▏    | 25675/50000 [38:29<32:37, 12.43it/s]

Epochs: 25672 | epoch avg. loss: 0.041 | test avg. loss: 4.867
Epochs: 25673 | epoch avg. loss: 0.022 | test avg. loss: 4.836
Epochs: 25674 | epoch avg. loss: 0.040 | test avg. loss: 5.147


 51%|█████▏    | 25677/50000 [38:29<32:42, 12.40it/s]

Epochs: 25675 | epoch avg. loss: 0.105 | test avg. loss: 4.837
Epochs: 25676 | epoch avg. loss: 0.045 | test avg. loss: 4.811
Epochs: 25677 | epoch avg. loss: 0.059 | test avg. loss: 5.236


 51%|█████▏    | 25681/50000 [38:29<34:42, 11.68it/s]

Epochs: 25678 | epoch avg. loss: 0.186 | test avg. loss: 4.743
Epochs: 25679 | epoch avg. loss: 0.072 | test avg. loss: 4.676
Epochs: 25680 | epoch avg. loss: 0.081 | test avg. loss: 5.131


 51%|█████▏    | 25683/50000 [38:30<35:38, 11.37it/s]

Epochs: 25681 | epoch avg. loss: 0.253 | test avg. loss: 4.687
Epochs: 25682 | epoch avg. loss: 0.095 | test avg. loss: 4.670
Epochs: 25683 | epoch avg. loss: 0.098 | test avg. loss: 4.827




Epochs: 25684 | epoch avg. loss: 0.104 | test avg. loss: 4.555
Epochs: 25685 | epoch avg. loss: 0.021 | test avg. loss: 4.503
Epochs: 25686 | epoch avg. loss: 0.022 | test avg. loss: 4.683


 51%|█████▏    | 25689/50000 [38:30<32:25, 12.50it/s]

Epochs: 25687 | epoch avg. loss: 0.041 | test avg. loss: 4.625
Epochs: 25688 | epoch avg. loss: 0.075 | test avg. loss: 4.666
Epochs: 25689 | epoch avg. loss: 0.030 | test avg. loss: 4.866


 51%|█████▏    | 25693/50000 [38:30<31:40, 12.79it/s]

Epochs: 25690 | epoch avg. loss: 0.083 | test avg. loss: 4.613
Epochs: 25691 | epoch avg. loss: 0.165 | test avg. loss: 4.665
Epochs: 25692 | epoch avg. loss: 0.119 | test avg. loss: 4.845


 51%|█████▏    | 25695/50000 [38:31<32:31, 12.45it/s]

Epochs: 25693 | epoch avg. loss: 0.131 | test avg. loss: 4.623
Epochs: 25694 | epoch avg. loss: 0.048 | test avg. loss: 4.497
Epochs: 25695 | epoch avg. loss: 0.056 | test avg. loss: 4.633


 51%|█████▏    | 25699/50000 [38:31<31:14, 12.97it/s]

Epochs: 25696 | epoch avg. loss: 0.053 | test avg. loss: 4.613
Epochs: 25697 | epoch avg. loss: 0.032 | test avg. loss: 4.713
Epochs: 25698 | epoch avg. loss: 0.049 | test avg. loss: 4.981


 51%|█████▏    | 25699/50000 [38:31<31:14, 12.97it/s]

Epochs: 25699 | epoch avg. loss: 0.089 | test avg. loss: 4.721


 51%|█████▏    | 25703/50000 [38:33<1:38:21,  4.12it/s]

Epochs: 25700 | epoch avg. loss: 0.022 | test avg. loss: 4.456
Epochs: 25701 | epoch avg. loss: 0.028 | test avg. loss: 4.487
Epochs: 25702 | epoch avg. loss: 0.025 | test avg. loss: 4.476


                                                       

Epochs: 25703 | epoch avg. loss: 0.039 | test avg. loss: 4.665
Epochs: 25704 | epoch avg. loss: 0.033 | test avg. loss: 4.831
Epochs: 25705 | epoch avg. loss: 0.032 | test avg. loss: 4.687


 51%|█████▏    | 25709/50000 [38:33<51:25,  7.87it/s]

Epochs: 25706 | epoch avg. loss: 0.027 | test avg. loss: 4.723
Epochs: 25707 | epoch avg. loss: 0.034 | test avg. loss: 4.587
Epochs: 25708 | epoch avg. loss: 0.015 | test avg. loss: 4.536
Epochs: 25709 | epoch avg. loss: 0.028 | test avg. loss: 4.672


 51%|█████▏    | 25713/50000 [38:33<39:28, 10.25it/s]

Epochs: 25710 | epoch avg. loss: 0.028 | test avg. loss: 4.739
Epochs: 25711 | epoch avg. loss: 0.013 | test avg. loss: 4.696
Epochs: 25712 | epoch avg. loss: 0.014 | test avg. loss: 4.776




Epochs: 25713 | epoch avg. loss: 0.029 | test avg. loss: 4.635
Epochs: 25714 | epoch avg. loss: 0.017 | test avg. loss: 4.603


 51%|█████▏    | 25717/50000 [38:34<38:09, 10.61it/s]

Epochs: 25715 | epoch avg. loss: 0.017 | test avg. loss: 4.722
Epochs: 25716 | epoch avg. loss: 0.017 | test avg. loss: 4.700
Epochs: 25717 | epoch avg. loss: 0.019 | test avg. loss: 4.763


 51%|█████▏    | 25721/50000 [38:34<32:47, 12.34it/s]

Epochs: 25718 | epoch avg. loss: 0.014 | test avg. loss: 4.896
Epochs: 25719 | epoch avg. loss: 0.040 | test avg. loss: 4.654
Epochs: 25720 | epoch avg. loss: 0.027 | test avg. loss: 4.596


 51%|█████▏    | 25723/50000 [38:34<32:21, 12.50it/s]

Epochs: 25721 | epoch avg. loss: 0.034 | test avg. loss: 4.726
Epochs: 25722 | epoch avg. loss: 0.016 | test avg. loss: 4.770
Epochs: 25723 | epoch avg. loss: 0.014 | test avg. loss: 4.673


 51%|█████▏    | 25727/50000 [38:34<32:02, 12.62it/s]

Epochs: 25724 | epoch avg. loss: 0.040 | test avg. loss: 4.642
Epochs: 25725 | epoch avg. loss: 0.017 | test avg. loss: 4.721
Epochs: 25726 | epoch avg. loss: 0.044 | test avg. loss: 4.641


 51%|█████▏    | 25729/50000 [38:35<30:42, 13.18it/s]

Epochs: 25727 | epoch avg. loss: 0.010 | test avg. loss: 4.634
Epochs: 25728 | epoch avg. loss: 0.025 | test avg. loss: 4.670
Epochs: 25729 | epoch avg. loss: 0.005 | test avg. loss: 4.628


 51%|█████▏    | 25733/50000 [38:35<29:38, 13.65it/s]

Epochs: 25730 | epoch avg. loss: 0.014 | test avg. loss: 4.609
Epochs: 25731 | epoch avg. loss: 0.013 | test avg. loss: 4.728
Epochs: 25732 | epoch avg. loss: 0.021 | test avg. loss: 4.693
Epochs: 25733 | epoch avg. loss: 0.029 | test avg. loss: 4.694


 51%|█████▏    | 25737/50000 [38:35<27:29, 14.71it/s]

Epochs: 25734 | epoch avg. loss: 0.017 | test avg. loss: 4.775
Epochs: 25735 | epoch avg. loss: 0.022 | test avg. loss: 4.685
Epochs: 25736 | epoch avg. loss: 0.008 | test avg. loss: 4.721
Epochs: 25737 | epoch avg. loss: 0.013 | test avg. loss: 4.758


 51%|█████▏    | 25741/50000 [38:35<29:24, 13.75it/s]

Epochs: 25738 | epoch avg. loss: 0.015 | test avg. loss: 4.681
Epochs: 25739 | epoch avg. loss: 0.005 | test avg. loss: 4.726
Epochs: 25740 | epoch avg. loss: 0.011 | test avg. loss: 4.728


 51%|█████▏    | 25743/50000 [38:36<30:10, 13.40it/s]

Epochs: 25741 | epoch avg. loss: 0.006 | test avg. loss: 4.710
Epochs: 25742 | epoch avg. loss: 0.011 | test avg. loss: 4.737
Epochs: 25743 | epoch avg. loss: 0.007 | test avg. loss: 4.723


 51%|█████▏    | 25747/50000 [38:36<29:50, 13.54it/s]

Epochs: 25744 | epoch avg. loss: 0.007 | test avg. loss: 4.633
Epochs: 25745 | epoch avg. loss: 0.028 | test avg. loss: 4.682
Epochs: 25746 | epoch avg. loss: 0.010 | test avg. loss: 4.750


 51%|█████▏    | 25749/50000 [38:36<29:22, 13.76it/s]

Epochs: 25747 | epoch avg. loss: 0.012 | test avg. loss: 4.704
Epochs: 25748 | epoch avg. loss: 0.014 | test avg. loss: 4.753
Epochs: 25749 | epoch avg. loss: 0.027 | test avg. loss: 4.641


 52%|█████▏    | 25753/50000 [38:36<30:08, 13.41it/s]

Epochs: 25750 | epoch avg. loss: 0.012 | test avg. loss: 4.526
Epochs: 25751 | epoch avg. loss: 0.017 | test avg. loss: 4.632
Epochs: 25752 | epoch avg. loss: 0.024 | test avg. loss: 4.700


 52%|█████▏    | 25755/50000 [38:37<30:06, 13.42it/s]

Epochs: 25753 | epoch avg. loss: 0.012 | test avg. loss: 4.686
Epochs: 25754 | epoch avg. loss: 0.011 | test avg. loss: 4.712
Epochs: 25755 | epoch avg. loss: 0.008 | test avg. loss: 4.706


 52%|█████▏    | 25759/50000 [38:37<28:22, 14.24it/s]

Epochs: 25756 | epoch avg. loss: 0.007 | test avg. loss: 4.705
Epochs: 25757 | epoch avg. loss: 0.006 | test avg. loss: 4.691
Epochs: 25758 | epoch avg. loss: 0.009 | test avg. loss: 4.723


 52%|█████▏    | 25761/50000 [38:37<29:37, 13.64it/s]

Epochs: 25759 | epoch avg. loss: 0.005 | test avg. loss: 4.785
Epochs: 25760 | epoch avg. loss: 0.012 | test avg. loss: 4.706
Epochs: 25761 | epoch avg. loss: 0.011 | test avg. loss: 4.674


 52%|█████▏    | 25765/50000 [38:37<28:16, 14.28it/s]

Epochs: 25762 | epoch avg. loss: 0.008 | test avg. loss: 4.658
Epochs: 25763 | epoch avg. loss: 0.006 | test avg. loss: 4.659
Epochs: 25764 | epoch avg. loss: 0.010 | test avg. loss: 4.699
Epochs: 25765 | epoch avg. loss: 0.006 | test avg. loss: 4.721


 52%|█████▏    | 25769/50000 [38:37<29:39, 13.62it/s]

Epochs: 25766 | epoch avg. loss: 0.004 | test avg. loss: 4.710
Epochs: 25767 | epoch avg. loss: 0.004 | test avg. loss: 4.696
Epochs: 25768 | epoch avg. loss: 0.004 | test avg. loss: 4.694


 52%|█████▏    | 25771/50000 [38:38<30:00, 13.45it/s]

Epochs: 25769 | epoch avg. loss: 0.004 | test avg. loss: 4.733
Epochs: 25770 | epoch avg. loss: 0.007 | test avg. loss: 4.669
Epochs: 25771 | epoch avg. loss: 0.011 | test avg. loss: 4.657


 52%|█████▏    | 25775/50000 [38:38<32:04, 12.59it/s]

Epochs: 25772 | epoch avg. loss: 0.011 | test avg. loss: 4.790
Epochs: 25773 | epoch avg. loss: 0.052 | test avg. loss: 4.679
Epochs: 25774 | epoch avg. loss: 0.021 | test avg. loss: 4.705


 52%|█████▏    | 25777/50000 [38:38<32:11, 12.54it/s]

Epochs: 25775 | epoch avg. loss: 0.026 | test avg. loss: 4.867
Epochs: 25776 | epoch avg. loss: 0.026 | test avg. loss: 4.774
Epochs: 25777 | epoch avg. loss: 0.008 | test avg. loss: 4.714


 52%|█████▏    | 25781/50000 [38:38<32:21, 12.47it/s]

Epochs: 25778 | epoch avg. loss: 0.013 | test avg. loss: 4.760
Epochs: 25779 | epoch avg. loss: 0.009 | test avg. loss: 4.760
Epochs: 25780 | epoch avg. loss: 0.008 | test avg. loss: 4.704


 52%|█████▏    | 25783/50000 [38:39<30:57, 13.04it/s]

Epochs: 25781 | epoch avg. loss: 0.009 | test avg. loss: 4.673
Epochs: 25782 | epoch avg. loss: 0.021 | test avg. loss: 4.726
Epochs: 25783 | epoch avg. loss: 0.009 | test avg. loss: 4.827


 52%|█████▏    | 25787/50000 [38:39<29:49, 13.53it/s]

Epochs: 25784 | epoch avg. loss: 0.019 | test avg. loss: 4.768
Epochs: 25785 | epoch avg. loss: 0.018 | test avg. loss: 4.813
Epochs: 25786 | epoch avg. loss: 0.011 | test avg. loss: 4.927


 52%|█████▏    | 25789/50000 [38:39<30:42, 13.14it/s]

Epochs: 25787 | epoch avg. loss: 0.031 | test avg. loss: 4.829
Epochs: 25788 | epoch avg. loss: 0.015 | test avg. loss: 4.738
Epochs: 25789 | epoch avg. loss: 0.013 | test avg. loss: 4.699


 52%|█████▏    | 25793/50000 [38:39<30:19, 13.31it/s]

Epochs: 25790 | epoch avg. loss: 0.012 | test avg. loss: 4.622
Epochs: 25791 | epoch avg. loss: 0.006 | test avg. loss: 4.642
Epochs: 25792 | epoch avg. loss: 0.004 | test avg. loss: 4.672


 52%|█████▏    | 25795/50000 [38:40<30:14, 13.34it/s]

Epochs: 25793 | epoch avg. loss: 0.007 | test avg. loss: 4.738
Epochs: 25794 | epoch avg. loss: 0.009 | test avg. loss: 4.784
Epochs: 25795 | epoch avg. loss: 0.023 | test avg. loss: 4.741


 52%|█████▏    | 25799/50000 [38:40<30:43, 13.13it/s]

Epochs: 25796 | epoch avg. loss: 0.015 | test avg. loss: 4.651
Epochs: 25797 | epoch avg. loss: 0.011 | test avg. loss: 4.624
Epochs: 25798 | epoch avg. loss: 0.030 | test avg. loss: 4.648


 52%|█████▏    | 25799/50000 [38:40<30:43, 13.13it/s]

Epochs: 25799 | epoch avg. loss: 0.008 | test avg. loss: 4.784


 52%|█████▏    | 25803/50000 [38:42<1:42:38,  3.93it/s]

Epochs: 25800 | epoch avg. loss: 0.028 | test avg. loss: 4.671
Epochs: 25801 | epoch avg. loss: 0.024 | test avg. loss: 4.646
Epochs: 25802 | epoch avg. loss: 0.043 | test avg. loss: 4.740


 52%|█████▏    | 25805/50000 [38:42<1:21:34,  4.94it/s]

Epochs: 25803 | epoch avg. loss: 0.010 | test avg. loss: 4.733
Epochs: 25804 | epoch avg. loss: 0.007 | test avg. loss: 4.673
Epochs: 25805 | epoch avg. loss: 0.008 | test avg. loss: 4.714


 52%|█████▏    | 25809/50000 [38:42<58:48,  6.86it/s]  

Epochs: 25806 | epoch avg. loss: 0.008 | test avg. loss: 4.661
Epochs: 25807 | epoch avg. loss: 0.005 | test avg. loss: 4.661
Epochs: 25808 | epoch avg. loss: 0.010 | test avg. loss: 4.759


 52%|█████▏    | 25811/50000 [38:43<53:30,  7.53it/s]

Epochs: 25809 | epoch avg. loss: 0.026 | test avg. loss: 4.752
Epochs: 25810 | epoch avg. loss: 0.007 | test avg. loss: 4.694
Epochs: 25811 | epoch avg. loss: 0.007 | test avg. loss: 4.707


 52%|█████▏    | 25815/50000 [38:43<43:36,  9.24it/s]

Epochs: 25812 | epoch avg. loss: 0.008 | test avg. loss: 4.652
Epochs: 25813 | epoch avg. loss: 0.006 | test avg. loss: 4.633
Epochs: 25814 | epoch avg. loss: 0.005 | test avg. loss: 4.714


 52%|█████▏    | 25817/50000 [38:43<39:22, 10.24it/s]

Epochs: 25815 | epoch avg. loss: 0.007 | test avg. loss: 4.685
Epochs: 25816 | epoch avg. loss: 0.041 | test avg. loss: 4.697
Epochs: 25817 | epoch avg. loss: 0.032 | test avg. loss: 4.759


 52%|█████▏    | 25821/50000 [38:43<35:31, 11.34it/s]

Epochs: 25818 | epoch avg. loss: 0.018 | test avg. loss: 4.684
Epochs: 25819 | epoch avg. loss: 0.014 | test avg. loss: 4.615
Epochs: 25820 | epoch avg. loss: 0.085 | test avg. loss: 4.666


 52%|█████▏    | 25823/50000 [38:44<35:17, 11.42it/s]

Epochs: 25821 | epoch avg. loss: 0.015 | test avg. loss: 4.820
Epochs: 25822 | epoch avg. loss: 0.021 | test avg. loss: 4.717
Epochs: 25823 | epoch avg. loss: 0.036 | test avg. loss: 4.674


 52%|█████▏    | 25827/50000 [38:44<33:49, 11.91it/s]

Epochs: 25824 | epoch avg. loss: 0.015 | test avg. loss: 4.743
Epochs: 25825 | epoch avg. loss: 0.052 | test avg. loss: 4.611
Epochs: 25826 | epoch avg. loss: 0.037 | test avg. loss: 4.752


 52%|█████▏    | 25829/50000 [38:44<31:19, 12.86it/s]

Epochs: 25827 | epoch avg. loss: 0.227 | test avg. loss: 4.876
Epochs: 25828 | epoch avg. loss: 0.030 | test avg. loss: 5.103
Epochs: 25829 | epoch avg. loss: 0.057 | test avg. loss: 4.842


                                                     

Epochs: 25830 | epoch avg. loss: 0.064 | test avg. loss: 4.712
Epochs: 25831 | epoch avg. loss: 0.110 | test avg. loss: 4.870
Epochs: 25832 | epoch avg. loss: 0.165 | test avg. loss: 4.924


 52%|█████▏    | 25835/50000 [38:44<30:45, 13.09it/s]

Epochs: 25833 | epoch avg. loss: 0.088 | test avg. loss: 4.685
Epochs: 25834 | epoch avg. loss: 0.068 | test avg. loss: 4.771
Epochs: 25835 | epoch avg. loss: 0.030 | test avg. loss: 4.857


 52%|█████▏    | 25839/50000 [38:45<29:52, 13.48it/s]

Epochs: 25836 | epoch avg. loss: 0.028 | test avg. loss: 4.772
Epochs: 25837 | epoch avg. loss: 0.025 | test avg. loss: 4.586
Epochs: 25838 | epoch avg. loss: 0.064 | test avg. loss: 4.590
Epochs: 25839 | epoch avg. loss: 0.034 | test avg. loss: 4.755


 52%|█████▏    | 25843/50000 [38:45<27:36, 14.59it/s]

Epochs: 25840 | epoch avg. loss: 0.029 | test avg. loss: 4.851
Epochs: 25841 | epoch avg. loss: 0.031 | test avg. loss: 4.736
Epochs: 25842 | epoch avg. loss: 0.012 | test avg. loss: 4.664
Epochs: 25843 | epoch avg. loss: 0.016 | test avg. loss: 4.707


 52%|█████▏    | 25847/50000 [38:45<30:33, 13.17it/s]

Epochs: 25844 | epoch avg. loss: 0.029 | test avg. loss: 4.712
Epochs: 25845 | epoch avg. loss: 0.013 | test avg. loss: 4.680
Epochs: 25846 | epoch avg. loss: 0.028 | test avg. loss: 4.730


 52%|█████▏    | 25849/50000 [38:46<31:41, 12.70it/s]

Epochs: 25847 | epoch avg. loss: 0.012 | test avg. loss: 4.833
Epochs: 25848 | epoch avg. loss: 0.026 | test avg. loss: 4.723
Epochs: 25849 | epoch avg. loss: 0.014 | test avg. loss: 4.690


 52%|█████▏    | 25853/50000 [38:46<34:39, 11.61it/s]

Epochs: 25850 | epoch avg. loss: 0.019 | test avg. loss: 4.780
Epochs: 25851 | epoch avg. loss: 0.023 | test avg. loss: 4.846
Epochs: 25852 | epoch avg. loss: 0.024 | test avg. loss: 4.709


 52%|█████▏    | 25855/50000 [38:46<33:06, 12.15it/s]

Epochs: 25853 | epoch avg. loss: 0.022 | test avg. loss: 4.664
Epochs: 25854 | epoch avg. loss: 0.013 | test avg. loss: 4.777
Epochs: 25855 | epoch avg. loss: 0.034 | test avg. loss: 4.743


 52%|█████▏    | 25859/50000 [38:46<32:50, 12.25it/s]

Epochs: 25856 | epoch avg. loss: 0.013 | test avg. loss: 4.701
Epochs: 25857 | epoch avg. loss: 0.083 | test avg. loss: 4.766
Epochs: 25858 | epoch avg. loss: 0.028 | test avg. loss: 5.018


 52%|█████▏    | 25861/50000 [38:47<34:53, 11.53it/s]

Epochs: 25859 | epoch avg. loss: 0.120 | test avg. loss: 4.894
Epochs: 25860 | epoch avg. loss: 0.032 | test avg. loss: 4.752
Epochs: 25861 | epoch avg. loss: 0.166 | test avg. loss: 4.659


 52%|█████▏    | 25865/50000 [38:47<35:39, 11.28it/s]

Epochs: 25862 | epoch avg. loss: 0.051 | test avg. loss: 5.242
Epochs: 25863 | epoch avg. loss: 0.407 | test avg. loss: 4.763
Epochs: 25864 | epoch avg. loss: 0.088 | test avg. loss: 5.000


 52%|█████▏    | 25867/50000 [38:47<33:34, 11.98it/s]

Epochs: 25865 | epoch avg. loss: 0.488 | test avg. loss: 4.643
Epochs: 25866 | epoch avg. loss: 0.199 | test avg. loss: 4.767
Epochs: 25867 | epoch avg. loss: 0.290 | test avg. loss: 4.323


 52%|█████▏    | 25871/50000 [38:47<30:56, 13.00it/s]

Epochs: 25868 | epoch avg. loss: 0.336 | test avg. loss: 4.458
Epochs: 25869 | epoch avg. loss: 0.121 | test avg. loss: 4.958
Epochs: 25870 | epoch avg. loss: 0.226 | test avg. loss: 4.740


 52%|█████▏    | 25873/50000 [38:48<33:04, 12.16it/s]

Epochs: 25871 | epoch avg. loss: 0.053 | test avg. loss: 4.606
Epochs: 25872 | epoch avg. loss: 0.064 | test avg. loss: 4.630
Epochs: 25873 | epoch avg. loss: 0.090 | test avg. loss: 4.762


 52%|█████▏    | 25877/50000 [38:48<32:53, 12.23it/s]

Epochs: 25874 | epoch avg. loss: 0.074 | test avg. loss: 4.648
Epochs: 25875 | epoch avg. loss: 0.211 | test avg. loss: 4.627
Epochs: 25876 | epoch avg. loss: 0.082 | test avg. loss: 5.179


 52%|█████▏    | 25879/50000 [38:48<33:12, 12.10it/s]

Epochs: 25877 | epoch avg. loss: 0.297 | test avg. loss: 4.526
Epochs: 25878 | epoch avg. loss: 0.084 | test avg. loss: 4.601
Epochs: 25879 | epoch avg. loss: 0.072 | test avg. loss: 4.979


 52%|█████▏    | 25883/50000 [38:48<32:47, 12.26it/s]

Epochs: 25880 | epoch avg. loss: 0.093 | test avg. loss: 5.025
Epochs: 25881 | epoch avg. loss: 0.030 | test avg. loss: 5.084
Epochs: 25882 | epoch avg. loss: 0.025 | test avg. loss: 5.149


 52%|█████▏    | 25885/50000 [38:49<34:13, 11.74it/s]

Epochs: 25883 | epoch avg. loss: 0.045 | test avg. loss: 5.027
Epochs: 25884 | epoch avg. loss: 0.017 | test avg. loss: 5.015
Epochs: 25885 | epoch avg. loss: 0.028 | test avg. loss: 5.476


 52%|█████▏    | 25889/50000 [38:49<30:19, 13.25it/s]

Epochs: 25886 | epoch avg. loss: 0.152 | test avg. loss: 5.142
Epochs: 25887 | epoch avg. loss: 0.023 | test avg. loss: 5.096
Epochs: 25888 | epoch avg. loss: 0.030 | test avg. loss: 5.277
Epochs: 25889 | epoch avg. loss: 0.065 | test avg. loss: 5.263


 52%|█████▏    | 25893/50000 [38:49<28:10, 14.26it/s]

Epochs: 25890 | epoch avg. loss: 0.025 | test avg. loss: 5.145
Epochs: 25891 | epoch avg. loss: 0.044 | test avg. loss: 5.477
Epochs: 25892 | epoch avg. loss: 0.195 | test avg. loss: 5.159
Epochs: 25893 | epoch avg. loss: 0.062 | test avg. loss: 4.923


 52%|█████▏    | 25897/50000 [38:49<26:54, 14.93it/s]

Epochs: 25894 | epoch avg. loss: 0.129 | test avg. loss: 5.022
Epochs: 25895 | epoch avg. loss: 0.058 | test avg. loss: 5.258
Epochs: 25896 | epoch avg. loss: 0.084 | test avg. loss: 4.987
Epochs: 25897 | epoch avg. loss: 0.081 | test avg. loss: 4.977


 52%|█████▏    | 25899/50000 [38:50<27:55, 14.38it/s]

Epochs: 25898 | epoch avg. loss: 0.030 | test avg. loss: 5.081
Epochs: 25899 | epoch avg. loss: 0.055 | test avg. loss: 5.004


 52%|█████▏    | 25903/50000 [38:51<1:34:23,  4.25it/s]

Epochs: 25900 | epoch avg. loss: 0.016 | test avg. loss: 4.984
Epochs: 25901 | epoch avg. loss: 0.014 | test avg. loss: 5.098
Epochs: 25902 | epoch avg. loss: 0.016 | test avg. loss: 5.107
Epochs: 25903 | epoch avg. loss: 0.012 | test avg. loss: 5.071


 52%|█████▏    | 25907/50000 [38:52<1:01:02,  6.58it/s]

Epochs: 25904 | epoch avg. loss: 0.034 | test avg. loss: 5.041
Epochs: 25905 | epoch avg. loss: 0.012 | test avg. loss: 5.064
Epochs: 25906 | epoch avg. loss: 0.035 | test avg. loss: 4.967


 52%|█████▏    | 25909/50000 [38:52<53:08,  7.56it/s]

Epochs: 25907 | epoch avg. loss: 0.010 | test avg. loss: 4.956
Epochs: 25908 | epoch avg. loss: 0.022 | test avg. loss: 5.106
Epochs: 25909 | epoch avg. loss: 0.019 | test avg. loss: 5.144


 52%|█████▏    | 25913/50000 [38:52<39:31, 10.16it/s]

Epochs: 25910 | epoch avg. loss: 0.026 | test avg. loss: 4.904
Epochs: 25911 | epoch avg. loss: 0.010 | test avg. loss: 4.887
Epochs: 25912 | epoch avg. loss: 0.016 | test avg. loss: 4.903
Epochs: 25913 | epoch avg. loss: 0.009 | test avg. loss: 4.925




Epochs: 25914 | epoch avg. loss: 0.013 | test avg. loss: 5.151
Epochs: 25915 | epoch avg. loss: 0.062 | test avg. loss: 5.013
Epochs: 25916 | epoch avg. loss: 0.014 | test avg. loss: 4.877




Epochs: 25917 | epoch avg. loss: 0.029 | test avg. loss: 4.862
Epochs: 25918 | epoch avg. loss: 0.008 | test avg. loss: 4.934


 52%|█████▏    | 25921/50000 [38:53<33:52, 11.85it/s]

Epochs: 25919 | epoch avg. loss: 0.010 | test avg. loss: 4.937
Epochs: 25920 | epoch avg. loss: 0.005 | test avg. loss: 4.950
Epochs: 25921 | epoch avg. loss: 0.007 | test avg. loss: 4.924


 52%|█████▏    | 25925/50000 [38:53<30:24, 13.20it/s]

Epochs: 25922 | epoch avg. loss: 0.009 | test avg. loss: 4.902
Epochs: 25923 | epoch avg. loss: 0.008 | test avg. loss: 4.857
Epochs: 25924 | epoch avg. loss: 0.005 | test avg. loss: 4.858
Epochs: 25925 | epoch avg. loss: 0.004 | test avg. loss: 4.858


                                                     

Epochs: 25926 | epoch avg. loss: 0.009 | test avg. loss: 4.856
Epochs: 25927 | epoch avg. loss: 0.010 | test avg. loss: 4.906
Epochs: 25928 | epoch avg. loss: 0.010 | test avg. loss: 4.854


 52%|█████▏    | 25931/50000 [38:53<27:45, 14.45it/s]

Epochs: 25929 | epoch avg. loss: 0.007 | test avg. loss: 4.790
Epochs: 25930 | epoch avg. loss: 0.019 | test avg. loss: 4.855
Epochs: 25931 | epoch avg. loss: 0.010 | test avg. loss: 4.995


 52%|█████▏    | 25935/50000 [38:54<32:01, 12.52it/s]

Epochs: 25932 | epoch avg. loss: 0.015 | test avg. loss: 4.935
Epochs: 25933 | epoch avg. loss: 0.013 | test avg. loss: 5.003
Epochs: 25934 | epoch avg. loss: 0.016 | test avg. loss: 4.951


 52%|█████▏    | 25937/50000 [38:54<32:20, 12.40it/s]

Epochs: 25935 | epoch avg. loss: 0.010 | test avg. loss: 4.828
Epochs: 25936 | epoch avg. loss: 0.024 | test avg. loss: 4.789
Epochs: 25937 | epoch avg. loss: 0.009 | test avg. loss: 4.865


                                                     

Epochs: 25938 | epoch avg. loss: 0.023 | test avg. loss: 4.922
Epochs: 25939 | epoch avg. loss: 0.010 | test avg. loss: 4.887
Epochs: 25940 | epoch avg. loss: 0.031 | test avg. loss: 5.020


 52%|█████▏    | 25943/50000 [38:54<29:23, 13.64it/s]

Epochs: 25941 | epoch avg. loss: 0.049 | test avg. loss: 5.095
Epochs: 25942 | epoch avg. loss: 0.086 | test avg. loss: 4.697
Epochs: 25943 | epoch avg. loss: 0.190 | test avg. loss: 4.969


 52%|█████▏    | 25947/50000 [38:55<29:38, 13.53it/s]

Epochs: 25944 | epoch avg. loss: 0.284 | test avg. loss: 5.383
Epochs: 25945 | epoch avg. loss: 0.236 | test avg. loss: 6.001
Epochs: 25946 | epoch avg. loss: 0.476 | test avg. loss: 5.081


 52%|█████▏    | 25951/50000 [38:55<28:00, 14.31it/s]

Epochs: 25947 | epoch avg. loss: 0.426 | test avg. loss: 5.549
Epochs: 25948 | epoch avg. loss: 0.708 | test avg. loss: 5.713
Epochs: 25949 | epoch avg. loss: 0.821 | test avg. loss: 5.501
Epochs: 25950 | epoch avg. loss: 0.350 | test avg. loss: 5.869


 52%|█████▏    | 25955/50000 [38:55<26:54, 14.89it/s]

Epochs: 25951 | epoch avg. loss: 0.935 | test avg. loss: 5.462
Epochs: 25952 | epoch avg. loss: 0.497 | test avg. loss: 4.757
Epochs: 25953 | epoch avg. loss: 0.324 | test avg. loss: 4.381
Epochs: 25954 | epoch avg. loss: 0.413 | test avg. loss: 4.650


 52%|█████▏    | 25957/50000 [38:55<27:02, 14.82it/s]

Epochs: 25955 | epoch avg. loss: 0.222 | test avg. loss: 5.529
Epochs: 25956 | epoch avg. loss: 0.361 | test avg. loss: 4.963
Epochs: 25957 | epoch avg. loss: 0.416 | test avg. loss: 4.644


 52%|█████▏    | 25961/50000 [38:55<29:22, 13.64it/s]

Epochs: 25958 | epoch avg. loss: 0.154 | test avg. loss: 5.050
Epochs: 25959 | epoch avg. loss: 0.450 | test avg. loss: 4.453
Epochs: 25960 | epoch avg. loss: 0.165 | test avg. loss: 4.817


 52%|█████▏    | 25965/50000 [38:56<28:46, 13.92it/s]

Epochs: 25961 | epoch avg. loss: 0.084 | test avg. loss: 5.616
Epochs: 25962 | epoch avg. loss: 0.254 | test avg. loss: 5.198
Epochs: 25963 | epoch avg. loss: 0.314 | test avg. loss: 4.911
Epochs: 25964 | epoch avg. loss: 0.097 | test avg. loss: 4.778


 52%|█████▏    | 25969/50000 [38:56<27:15, 14.69it/s]

Epochs: 25965 | epoch avg. loss: 0.121 | test avg. loss: 4.710
Epochs: 25966 | epoch avg. loss: 0.512 | test avg. loss: 4.594
Epochs: 25967 | epoch avg. loss: 0.094 | test avg. loss: 4.687
Epochs: 25968 | epoch avg. loss: 0.069 | test avg. loss: 4.905


 52%|█████▏    | 25971/50000 [38:56<27:14, 14.70it/s]

Epochs: 25969 | epoch avg. loss: 0.063 | test avg. loss: 5.041
Epochs: 25970 | epoch avg. loss: 0.026 | test avg. loss: 4.913
Epochs: 25971 | epoch avg. loss: 0.045 | test avg. loss: 4.810


 52%|█████▏    | 25975/50000 [38:56<30:01, 13.33it/s]

Epochs: 25972 | epoch avg. loss: 0.028 | test avg. loss: 4.740
Epochs: 25973 | epoch avg. loss: 0.024 | test avg. loss: 4.737
Epochs: 25974 | epoch avg. loss: 0.025 | test avg. loss: 4.786


 52%|█████▏    | 25977/50000 [38:57<29:38, 13.50it/s]

Epochs: 25975 | epoch avg. loss: 0.031 | test avg. loss: 4.630
Epochs: 25976 | epoch avg. loss: 0.024 | test avg. loss: 4.629
Epochs: 25977 | epoch avg. loss: 0.011 | test avg. loss: 4.607
Epochs: 25978 | epoch avg. loss: 0.016 | test avg. loss: 4.570


 52%|█████▏    | 25981/50000 [38:57<27:59, 14.30it/s]

Epochs: 25979 | epoch avg. loss: 0.053 | test avg. loss: 4.714
Epochs: 25980 | epoch avg. loss: 0.030 | test avg. loss: 4.848
Epochs: 25981 | epoch avg. loss: 0.023 | test avg. loss: 4.768
Epochs: 25982 | epoch avg. loss: 0.071 | test avg. loss: 4.817


 52%|█████▏    | 25987/50000 [38:57<26:57, 14.84it/s]

Epochs: 25983 | epoch avg. loss: 0.027 | test avg. loss: 4.881
Epochs: 25984 | epoch avg. loss: 0.024 | test avg. loss: 4.805
Epochs: 25985 | epoch avg. loss: 0.061 | test avg. loss: 4.882
Epochs: 25986 | epoch avg. loss: 0.013 | test avg. loss: 4.797


                                                     

Epochs: 25987 | epoch avg. loss: 0.012 | test avg. loss: 4.657
Epochs: 25988 | epoch avg. loss: 0.018 | test avg. loss: 4.690


 52%|█████▏    | 25991/50000 [38:58<32:59, 12.13it/s]

Epochs: 25989 | epoch avg. loss: 0.015 | test avg. loss: 4.612
Epochs: 25990 | epoch avg. loss: 0.045 | test avg. loss: 4.710
Epochs: 25991 | epoch avg. loss: 0.027 | test avg. loss: 4.955


 52%|█████▏    | 25995/50000 [38:58<29:32, 13.55it/s]

Epochs: 25992 | epoch avg. loss: 0.056 | test avg. loss: 4.808
Epochs: 25993 | epoch avg. loss: 0.030 | test avg. loss: 4.867
Epochs: 25994 | epoch avg. loss: 0.027 | test avg. loss: 4.991
Epochs: 25995 | epoch avg. loss: 0.055 | test avg. loss: 4.755


 52%|█████▏    | 25999/50000 [38:58<27:43, 14.43it/s]

Epochs: 25996 | epoch avg. loss: 0.065 | test avg. loss: 4.839
Epochs: 25997 | epoch avg. loss: 0.022 | test avg. loss: 5.047
Epochs: 25998 | epoch avg. loss: 0.037 | test avg. loss: 4.834
Epochs: 25999 | epoch avg. loss: 0.066 | test avg. loss: 4.842


 52%|█████▏    | 26003/50000 [39:00<1:40:01,  4.00it/s]

Epochs: 26000 | epoch avg. loss: 0.034 | test avg. loss: 4.923
Epochs: 26001 | epoch avg. loss: 0.072 | test avg. loss: 4.731
Epochs: 26002 | epoch avg. loss: 0.020 | test avg. loss: 4.773


 52%|█████▏    | 26005/50000 [39:00<1:18:08,  5.12it/s]

Epochs: 26003 | epoch avg. loss: 0.012 | test avg. loss: 4.859
Epochs: 26004 | epoch avg. loss: 0.015 | test avg. loss: 4.729
Epochs: 26005 | epoch avg. loss: 0.053 | test avg. loss: 4.718


 52%|█████▏    | 26009/50000 [39:01<53:56,  7.41it/s]  

Epochs: 26006 | epoch avg. loss: 0.023 | test avg. loss: 4.741
Epochs: 26007 | epoch avg. loss: 0.030 | test avg. loss: 4.759
Epochs: 26008 | epoch avg. loss: 0.127 | test avg. loss: 4.957


 52%|█████▏    | 26013/50000 [39:01<39:35, 10.10it/s]

Epochs: 26009 | epoch avg. loss: 0.095 | test avg. loss: 5.072
Epochs: 26010 | epoch avg. loss: 0.073 | test avg. loss: 4.867
Epochs: 26011 | epoch avg. loss: 0.188 | test avg. loss: 4.990
Epochs: 26012 | epoch avg. loss: 0.164 | test avg. loss: 4.950


 52%|█████▏    | 26015/50000 [39:01<36:03, 11.09it/s]

Epochs: 26013 | epoch avg. loss: 0.068 | test avg. loss: 4.769
Epochs: 26014 | epoch avg. loss: 0.129 | test avg. loss: 4.812
Epochs: 26015 | epoch avg. loss: 0.100 | test avg. loss: 4.470


 52%|█████▏    | 26019/50000 [39:01<35:28, 11.27it/s]

Epochs: 26016 | epoch avg. loss: 0.067 | test avg. loss: 4.399
Epochs: 26017 | epoch avg. loss: 0.097 | test avg. loss: 4.713
Epochs: 26018 | epoch avg. loss: 0.108 | test avg. loss: 4.719


 52%|█████▏    | 26021/50000 [39:02<33:47, 11.82it/s]

Epochs: 26019 | epoch avg. loss: 0.117 | test avg. loss: 4.895
Epochs: 26020 | epoch avg. loss: 0.083 | test avg. loss: 5.503
Epochs: 26021 | epoch avg. loss: 0.259 | test avg. loss: 4.765


 52%|█████▏    | 26025/50000 [39:02<33:42, 11.85it/s]

Epochs: 26022 | epoch avg. loss: 0.178 | test avg. loss: 4.716
Epochs: 26023 | epoch avg. loss: 0.065 | test avg. loss: 5.116
Epochs: 26024 | epoch avg. loss: 0.113 | test avg. loss: 4.814


 52%|█████▏    | 26027/50000 [39:02<33:34, 11.90it/s]

Epochs: 26025 | epoch avg. loss: 0.099 | test avg. loss: 4.911
Epochs: 26026 | epoch avg. loss: 0.022 | test avg. loss: 4.851
Epochs: 26027 | epoch avg. loss: 0.019 | test avg. loss: 4.772


 52%|█████▏    | 26031/50000 [39:02<32:16, 12.38it/s]

Epochs: 26028 | epoch avg. loss: 0.020 | test avg. loss: 4.781
Epochs: 26029 | epoch avg. loss: 0.033 | test avg. loss: 4.707
Epochs: 26030 | epoch avg. loss: 0.012 | test avg. loss: 4.799


 52%|█████▏    | 26033/50000 [39:03<32:04, 12.46it/s]

Epochs: 26031 | epoch avg. loss: 0.010 | test avg. loss: 4.896
Epochs: 26032 | epoch avg. loss: 0.009 | test avg. loss: 4.889
Epochs: 26033 | epoch avg. loss: 0.014 | test avg. loss: 4.963


 52%|█████▏    | 26037/50000 [39:03<31:26, 12.70it/s]

Epochs: 26034 | epoch avg. loss: 0.016 | test avg. loss: 4.902
Epochs: 26035 | epoch avg. loss: 0.015 | test avg. loss: 4.824
Epochs: 26036 | epoch avg. loss: 0.026 | test avg. loss: 4.860


 52%|█████▏    | 26041/50000 [39:03<28:15, 14.13it/s]

Epochs: 26037 | epoch avg. loss: 0.013 | test avg. loss: 4.845
Epochs: 26038 | epoch avg. loss: 0.007 | test avg. loss: 4.804
Epochs: 26039 | epoch avg. loss: 0.010 | test avg. loss: 4.842
Epochs: 26040 | epoch avg. loss: 0.013 | test avg. loss: 4.823


                                                     

Epochs: 26041 | epoch avg. loss: 0.006 | test avg. loss: 4.790
Epochs: 26042 | epoch avg. loss: 0.006 | test avg. loss: 4.799
Epochs: 26043 | epoch avg. loss: 0.007 | test avg. loss: 4.781


 52%|█████▏    | 26047/50000 [39:04<29:30, 13.53it/s]

Epochs: 26044 | epoch avg. loss: 0.005 | test avg. loss: 4.783
Epochs: 26045 | epoch avg. loss: 0.006 | test avg. loss: 4.792
Epochs: 26046 | epoch avg. loss: 0.005 | test avg. loss: 4.777


 52%|█████▏    | 26049/50000 [39:04<28:19, 14.09it/s]

Epochs: 26047 | epoch avg. loss: 0.006 | test avg. loss: 4.784
Epochs: 26048 | epoch avg. loss: 0.006 | test avg. loss: 4.803
Epochs: 26049 | epoch avg. loss: 0.005 | test avg. loss: 4.806


 52%|█████▏    | 26053/50000 [39:04<29:11, 13.67it/s]

Epochs: 26050 | epoch avg. loss: 0.009 | test avg. loss: 4.806
Epochs: 26051 | epoch avg. loss: 0.008 | test avg. loss: 4.724
Epochs: 26052 | epoch avg. loss: 0.013 | test avg. loss: 4.758




Epochs: 26053 | epoch avg. loss: 0.009 | test avg. loss: 4.847
Epochs: 26054 | epoch avg. loss: 0.011 | test avg. loss: 4.855
Epochs: 26055 | epoch avg. loss: 0.014 | test avg. loss: 4.722
Epochs: 26056 | epoch avg. loss: 0.019 | test avg. loss: 4.735

 52%|█████▏    | 26059/50000 [39:04<28:07, 14.19it/s]


Epochs: 26057 | epoch avg. loss: 0.021 | test avg. loss: 4.821
Epochs: 26058 | epoch avg. loss: 0.017 | test avg. loss: 4.769


 52%|█████▏    | 26061/50000 [39:05<29:17, 13.62it/s]

Epochs: 26059 | epoch avg. loss: 0.023 | test avg. loss: 4.802
Epochs: 26060 | epoch avg. loss: 0.020 | test avg. loss: 4.756
Epochs: 26061 | epoch avg. loss: 0.024 | test avg. loss: 4.618


 52%|█████▏    | 26065/50000 [39:05<30:43, 12.98it/s]

Epochs: 26062 | epoch avg. loss: 0.030 | test avg. loss: 4.705
Epochs: 26063 | epoch avg. loss: 0.009 | test avg. loss: 4.883
Epochs: 26064 | epoch avg. loss: 0.013 | test avg. loss: 4.863


 52%|█████▏    | 26067/50000 [39:05<30:23, 13.12it/s]

Epochs: 26065 | epoch avg. loss: 0.032 | test avg. loss: 4.877
Epochs: 26066 | epoch avg. loss: 0.018 | test avg. loss: 5.017
Epochs: 26067 | epoch avg. loss: 0.061 | test avg. loss: 4.720


 52%|█████▏    | 26071/50000 [39:05<31:20, 12.73it/s]

Epochs: 26068 | epoch avg. loss: 0.026 | test avg. loss: 4.635
Epochs: 26069 | epoch avg. loss: 0.032 | test avg. loss: 4.905
Epochs: 26070 | epoch avg. loss: 0.115 | test avg. loss: 4.730


 52%|█████▏    | 26073/50000 [39:06<34:28, 11.57it/s]

Epochs: 26071 | epoch avg. loss: 0.072 | test avg. loss: 4.713
Epochs: 26072 | epoch avg. loss: 0.058 | test avg. loss: 4.856
Epochs: 26073 | epoch avg. loss: 0.052 | test avg. loss: 4.697


 52%|█████▏    | 26077/50000 [39:06<33:48, 11.79it/s]

Epochs: 26074 | epoch avg. loss: 0.038 | test avg. loss: 4.706
Epochs: 26075 | epoch avg. loss: 0.028 | test avg. loss: 4.867
Epochs: 26076 | epoch avg. loss: 0.031 | test avg. loss: 4.831


 52%|█████▏    | 26079/50000 [39:06<32:14, 12.37it/s]

Epochs: 26077 | epoch avg. loss: 0.022 | test avg. loss: 4.692
Epochs: 26078 | epoch avg. loss: 0.099 | test avg. loss: 4.758
Epochs: 26079 | epoch avg. loss: 0.057 | test avg. loss: 4.787


 52%|█████▏    | 26083/50000 [39:06<30:13, 13.19it/s]

Epochs: 26080 | epoch avg. loss: 0.034 | test avg. loss: 4.750
Epochs: 26081 | epoch avg. loss: 0.157 | test avg. loss: 4.846
Epochs: 26082 | epoch avg. loss: 0.081 | test avg. loss: 4.718


 52%|█████▏    | 26085/50000 [39:06<33:01, 12.07it/s]

Epochs: 26083 | epoch avg. loss: 0.047 | test avg. loss: 4.513
Epochs: 26084 | epoch avg. loss: 0.102 | test avg. loss: 4.699


 52%|█████▏    | 26087/50000 [39:07<33:38, 11.85it/s]

Epochs: 26085 | epoch avg. loss: 0.089 | test avg. loss: 4.795
Epochs: 26086 | epoch avg. loss: 0.035 | test avg. loss: 4.807
Epochs: 26087 | epoch avg. loss: 0.020 | test avg. loss: 4.941


 52%|█████▏    | 26091/50000 [39:07<30:44, 12.96it/s]

Epochs: 26088 | epoch avg. loss: 0.031 | test avg. loss: 4.816
Epochs: 26089 | epoch avg. loss: 0.013 | test avg. loss: 4.680
Epochs: 26090 | epoch avg. loss: 0.020 | test avg. loss: 4.816
Epochs: 26091 | epoch avg. loss: 0.038 | test avg. loss: 4.806


 52%|█████▏    | 26095/50000 [39:07<31:07, 12.80it/s]

Epochs: 26092 | epoch avg. loss: 0.013 | test avg. loss: 4.741
Epochs: 26093 | epoch avg. loss: 0.017 | test avg. loss: 4.764
Epochs: 26094 | epoch avg. loss: 0.011 | test avg. loss: 4.731


 52%|█████▏    | 26097/50000 [39:08<32:39, 12.20it/s]

Epochs: 26095 | epoch avg. loss: 0.014 | test avg. loss: 4.644
Epochs: 26096 | epoch avg. loss: 0.076 | test avg. loss: 4.734
Epochs: 26097 | epoch avg. loss: 0.024 | test avg. loss: 4.826


 52%|█████▏    | 26099/50000 [39:08<32:43, 12.17it/s]

Epochs: 26098 | epoch avg. loss: 0.027 | test avg. loss: 4.727
Epochs: 26099 | epoch avg. loss: 0.010 | test avg. loss: 4.757


 52%|█████▏    | 26103/50000 [39:10<1:39:43,  3.99it/s]

Epochs: 26100 | epoch avg. loss: 0.006 | test avg. loss: 4.808
Epochs: 26101 | epoch avg. loss: 0.010 | test avg. loss: 4.818
Epochs: 26102 | epoch avg. loss: 0.007 | test avg. loss: 4.744


 52%|█████▏    | 26105/50000 [39:10<1:19:07,  5.03it/s]

Epochs: 26103 | epoch avg. loss: 0.013 | test avg. loss: 4.677
Epochs: 26104 | epoch avg. loss: 0.031 | test avg. loss: 4.683
Epochs: 26105 | epoch avg. loss: 0.029 | test avg. loss: 4.851


 52%|█████▏    | 26107/50000 [39:10<1:04:09,  6.21it/s]

Epochs: 26106 | epoch avg. loss: 0.063 | test avg. loss: 4.683
Epochs: 26107 | epoch avg. loss: 0.060 | test avg. loss: 4.787


 52%|█████▏    | 26111/50000 [39:10<48:44,  8.17it/s]

Epochs: 26108 | epoch avg. loss: 0.034 | test avg. loss: 5.083
Epochs: 26109 | epoch avg. loss: 0.103 | test avg. loss: 4.859
Epochs: 26110 | epoch avg. loss: 0.085 | test avg. loss: 4.820


 52%|█████▏    | 26113/50000 [39:10<42:32,  9.36it/s]

Epochs: 26111 | epoch avg. loss: 0.051 | test avg. loss: 5.134
Epochs: 26112 | epoch avg. loss: 0.142 | test avg. loss: 4.757
Epochs: 26113 | epoch avg. loss: 0.095 | test avg. loss: 4.871


 52%|█████▏    | 26117/50000 [39:11<35:33, 11.19it/s]

Epochs: 26114 | epoch avg. loss: 0.098 | test avg. loss: 5.241
Epochs: 26115 | epoch avg. loss: 0.141 | test avg. loss: 5.036
Epochs: 26116 | epoch avg. loss: 0.056 | test avg. loss: 4.829


 52%|█████▏    | 26119/50000 [39:11<32:59, 12.06it/s]

Epochs: 26117 | epoch avg. loss: 0.328 | test avg. loss: 4.707
Epochs: 26118 | epoch avg. loss: 0.187 | test avg. loss: 4.753
Epochs: 26119 | epoch avg. loss: 0.188 | test avg. loss: 4.908


 52%|█████▏    | 26123/50000 [39:11<30:56, 12.86it/s]

Epochs: 26120 | epoch avg. loss: 0.711 | test avg. loss: 5.041
Epochs: 26121 | epoch avg. loss: 0.439 | test avg. loss: 5.686
Epochs: 26122 | epoch avg. loss: 0.414 | test avg. loss: 5.240


 52%|█████▏    | 26125/50000 [39:11<30:59, 12.84it/s]

Epochs: 26123 | epoch avg. loss: 0.438 | test avg. loss: 5.076
Epochs: 26124 | epoch avg. loss: 0.161 | test avg. loss: 5.034
Epochs: 26125 | epoch avg. loss: 0.154 | test avg. loss: 4.682


 52%|█████▏    | 26129/50000 [39:12<33:43, 11.80it/s]

Epochs: 26126 | epoch avg. loss: 0.112 | test avg. loss: 4.925
Epochs: 26127 | epoch avg. loss: 0.141 | test avg. loss: 4.920
Epochs: 26128 | epoch avg. loss: 0.066 | test avg. loss: 4.803


 52%|█████▏    | 26133/50000 [39:12<30:35, 13.00it/s]

Epochs: 26129 | epoch avg. loss: 0.087 | test avg. loss: 4.986
Epochs: 26130 | epoch avg. loss: 0.132 | test avg. loss: 4.688
Epochs: 26131 | epoch avg. loss: 0.076 | test avg. loss: 4.806
Epochs: 26132 | epoch avg. loss: 0.051 | test avg. loss: 5.177


 52%|█████▏    | 26135/50000 [39:12<29:03, 13.69it/s]

Epochs: 26133 | epoch avg. loss: 0.081 | test avg. loss: 5.075
Epochs: 26134 | epoch avg. loss: 0.069 | test avg. loss: 5.096
Epochs: 26135 | epoch avg. loss: 0.045 | test avg. loss: 5.283




Epochs: 26136 | epoch avg. loss: 0.094 | test avg. loss: 4.980
Epochs: 26137 | epoch avg. loss: 0.153 | test avg. loss: 4.969
Epochs: 26138 | epoch avg. loss: 0.109 | test avg. loss: 5.020


 52%|█████▏    | 26141/50000 [39:12<29:02, 13.70it/s]

Epochs: 26139 | epoch avg. loss: 0.077 | test avg. loss: 4.838
Epochs: 26140 | epoch avg. loss: 0.148 | test avg. loss: 5.078
Epochs: 26141 | epoch avg. loss: 0.185 | test avg. loss: 4.963


 52%|█████▏    | 26145/50000 [39:13<28:27, 13.97it/s]

Epochs: 26142 | epoch avg. loss: 0.070 | test avg. loss: 4.950
Epochs: 26143 | epoch avg. loss: 0.102 | test avg. loss: 5.173
Epochs: 26144 | epoch avg. loss: 0.127 | test avg. loss: 4.838
Epochs: 26145 | epoch avg. loss: 0.139 | test avg. loss: 4.734


 52%|█████▏    | 26149/50000 [39:13<26:57, 14.74it/s]

Epochs: 26146 | epoch avg. loss: 0.051 | test avg. loss: 4.873
Epochs: 26147 | epoch avg. loss: 0.072 | test avg. loss: 4.656
Epochs: 26148 | epoch avg. loss: 0.045 | test avg. loss: 4.653
Epochs: 26149 | epoch avg. loss: 0.019 | test avg. loss: 4.668


 52%|█████▏    | 26153/50000 [39:13<27:02, 14.70it/s]

Epochs: 26150 | epoch avg. loss: 0.016 | test avg. loss: 4.611
Epochs: 26151 | epoch avg. loss: 0.037 | test avg. loss: 4.721
Epochs: 26152 | epoch avg. loss: 0.040 | test avg. loss: 4.752


 52%|█████▏    | 26155/50000 [39:13<27:47, 14.30it/s]

Epochs: 26153 | epoch avg. loss: 0.021 | test avg. loss: 4.743
Epochs: 26154 | epoch avg. loss: 0.019 | test avg. loss: 4.840
Epochs: 26155 | epoch avg. loss: 0.027 | test avg. loss: 4.734


 52%|█████▏    | 26159/50000 [39:14<29:47, 13.34it/s]

Epochs: 26156 | epoch avg. loss: 0.035 | test avg. loss: 4.831
Epochs: 26157 | epoch avg. loss: 0.011 | test avg. loss: 4.950
Epochs: 26158 | epoch avg. loss: 0.010 | test avg. loss: 4.905


 52%|█████▏    | 26161/50000 [39:14<30:24, 13.07it/s]

Epochs: 26159 | epoch avg. loss: 0.009 | test avg. loss: 4.880
Epochs: 26160 | epoch avg. loss: 0.006 | test avg. loss: 4.862
Epochs: 26161 | epoch avg. loss: 0.008 | test avg. loss: 4.761


 52%|█████▏    | 26165/50000 [39:14<29:31, 13.46it/s]

Epochs: 26162 | epoch avg. loss: 0.016 | test avg. loss: 4.820
Epochs: 26163 | epoch avg. loss: 0.014 | test avg. loss: 4.888
Epochs: 26164 | epoch avg. loss: 0.016 | test avg. loss: 4.825


 52%|█████▏    | 26167/50000 [39:14<30:01, 13.23it/s]

Epochs: 26165 | epoch avg. loss: 0.019 | test avg. loss: 4.892
Epochs: 26166 | epoch avg. loss: 0.012 | test avg. loss: 4.834
Epochs: 26167 | epoch avg. loss: 0.007 | test avg. loss: 4.776


 52%|█████▏    | 26171/50000 [39:15<29:20, 13.54it/s]

Epochs: 26168 | epoch avg. loss: 0.008 | test avg. loss: 4.811
Epochs: 26169 | epoch avg. loss: 0.007 | test avg. loss: 4.775
Epochs: 26170 | epoch avg. loss: 0.010 | test avg. loss: 4.826
Epochs: 26171 | epoch avg. loss: 0.007 | test avg. loss: 4.814


 52%|█████▏    | 26175/50000 [39:15<29:11, 13.60it/s]

Epochs: 26172 | epoch avg. loss: 0.008 | test avg. loss: 4.749
Epochs: 26173 | epoch avg. loss: 0.021 | test avg. loss: 4.770
Epochs: 26174 | epoch avg. loss: 0.007 | test avg. loss: 4.841


 52%|█████▏    | 26179/50000 [39:15<27:45, 14.31it/s]

Epochs: 26175 | epoch avg. loss: 0.028 | test avg. loss: 4.805
Epochs: 26176 | epoch avg. loss: 0.010 | test avg. loss: 4.740
Epochs: 26177 | epoch avg. loss: 0.045 | test avg. loss: 4.766
Epochs: 26178 | epoch avg. loss: 0.019 | test avg. loss: 4.856


 52%|█████▏    | 26181/50000 [39:15<28:41, 13.84it/s]

Epochs: 26179 | epoch avg. loss: 0.029 | test avg. loss: 4.711
Epochs: 26180 | epoch avg. loss: 0.051 | test avg. loss: 4.762
Epochs: 26181 | epoch avg. loss: 0.029 | test avg. loss: 4.801


 52%|█████▏    | 26185/50000 [39:16<30:06, 13.18it/s]

Epochs: 26182 | epoch avg. loss: 0.017 | test avg. loss: 4.731
Epochs: 26183 | epoch avg. loss: 0.018 | test avg. loss: 4.792
Epochs: 26184 | epoch avg. loss: 0.022 | test avg. loss: 4.650


                                                     

Epochs: 26185 | epoch avg. loss: 0.025 | test avg. loss: 4.653
Epochs: 26186 | epoch avg. loss: 0.017 | test avg. loss: 4.675
Epochs: 26187 | epoch avg. loss: 0.017 | test avg. loss: 4.620


 52%|█████▏    | 26191/50000 [39:16<27:22, 14.49it/s]

Epochs: 26188 | epoch avg. loss: 0.025 | test avg. loss: 4.825
Epochs: 26189 | epoch avg. loss: 0.025 | test avg. loss: 4.841
Epochs: 26190 | epoch avg. loss: 0.009 | test avg. loss: 4.852
Epochs: 26191 | epoch avg. loss: 0.007 | test avg. loss: 4.824


 52%|█████▏    | 26195/50000 [39:16<26:36, 14.91it/s]

Epochs: 26192 | epoch avg. loss: 0.006 | test avg. loss: 4.802
Epochs: 26193 | epoch avg. loss: 0.006 | test avg. loss: 4.852
Epochs: 26194 | epoch avg. loss: 0.012 | test avg. loss: 4.787


 52%|█████▏    | 26197/50000 [39:17<31:06, 12.76it/s]

Epochs: 26195 | epoch avg. loss: 0.018 | test avg. loss: 4.819
Epochs: 26196 | epoch avg. loss: 0.008 | test avg. loss: 4.907
Epochs: 26197 | epoch avg. loss: 0.018 | test avg. loss: 4.849


 52%|█████▏    | 26199/50000 [39:17<29:24, 13.49it/s]

Epochs: 26198 | epoch avg. loss: 0.010 | test avg. loss: 4.827
Epochs: 26199 | epoch avg. loss: 0.007 | test avg. loss: 4.809


 52%|█████▏    | 26203/50000 [39:18<1:32:18,  4.30it/s]

Epochs: 26200 | epoch avg. loss: 0.005 | test avg. loss: 4.734
Epochs: 26201 | epoch avg. loss: 0.009 | test avg. loss: 4.747
Epochs: 26202 | epoch avg. loss: 0.008 | test avg. loss: 4.786


 52%|█████▏    | 26205/50000 [39:19<1:13:51,  5.37it/s]

Epochs: 26203 | epoch avg. loss: 0.009 | test avg. loss: 4.722
Epochs: 26204 | epoch avg. loss: 0.030 | test avg. loss: 4.785
Epochs: 26205 | epoch avg. loss: 0.019 | test avg. loss: 4.863


 52%|█████▏    | 26209/50000 [39:19<51:24,  7.71it/s]  

Epochs: 26206 | epoch avg. loss: 0.018 | test avg. loss: 4.789
Epochs: 26207 | epoch avg. loss: 0.066 | test avg. loss: 4.837
Epochs: 26208 | epoch avg. loss: 0.033 | test avg. loss: 4.895


 52%|█████▏    | 26213/50000 [39:19<38:53, 10.19it/s]

Epochs: 26209 | epoch avg. loss: 0.025 | test avg. loss: 4.882
Epochs: 26210 | epoch avg. loss: 0.091 | test avg. loss: 5.089
Epochs: 26211 | epoch avg. loss: 0.102 | test avg. loss: 4.895
Epochs: 26212 | epoch avg. loss: 0.077 | test avg. loss: 4.766


 52%|█████▏    | 26215/50000 [39:19<34:37, 11.45it/s]

Epochs: 26213 | epoch avg. loss: 0.139 | test avg. loss: 4.885
Epochs: 26214 | epoch avg. loss: 0.121 | test avg. loss: 4.721
Epochs: 26215 | epoch avg. loss: 0.048 | test avg. loss: 4.773


 52%|█████▏    | 26219/50000 [39:20<33:32, 11.82it/s]

Epochs: 26216 | epoch avg. loss: 0.068 | test avg. loss: 5.286
Epochs: 26217 | epoch avg. loss: 0.224 | test avg. loss: 4.948
Epochs: 26218 | epoch avg. loss: 0.165 | test avg. loss: 4.977


 52%|█████▏    | 26221/50000 [39:20<32:51, 12.06it/s]

Epochs: 26219 | epoch avg. loss: 0.211 | test avg. loss: 5.047
Epochs: 26220 | epoch avg. loss: 0.170 | test avg. loss: 4.658
Epochs: 26221 | epoch avg. loss: 0.047 | test avg. loss: 4.645


 52%|█████▏    | 26225/50000 [39:20<32:23, 12.23it/s]

Epochs: 26222 | epoch avg. loss: 0.070 | test avg. loss: 5.107
Epochs: 26223 | epoch avg. loss: 0.142 | test avg. loss: 4.787
Epochs: 26224 | epoch avg. loss: 0.108 | test avg. loss: 4.762


 52%|█████▏    | 26227/50000 [39:20<33:53, 11.69it/s]

Epochs: 26225 | epoch avg. loss: 0.072 | test avg. loss: 5.084
Epochs: 26226 | epoch avg. loss: 0.157 | test avg. loss: 4.849
Epochs: 26227 | epoch avg. loss: 0.050 | test avg. loss: 4.930


 52%|█████▏    | 26231/50000 [39:21<33:49, 11.71it/s]

Epochs: 26228 | epoch avg. loss: 0.037 | test avg. loss: 5.168
Epochs: 26229 | epoch avg. loss: 0.144 | test avg. loss: 4.708
Epochs: 26230 | epoch avg. loss: 0.142 | test avg. loss: 4.655


 52%|█████▏    | 26233/50000 [39:21<33:37, 11.78it/s]

Epochs: 26231 | epoch avg. loss: 0.055 | test avg. loss: 4.784
Epochs: 26232 | epoch avg. loss: 0.067 | test avg. loss: 4.541
Epochs: 26233 | epoch avg. loss: 0.071 | test avg. loss: 4.531


 52%|█████▏    | 26237/50000 [39:21<35:08, 11.27it/s]

Epochs: 26234 | epoch avg. loss: 0.031 | test avg. loss: 4.589
Epochs: 26235 | epoch avg. loss: 0.045 | test avg. loss: 4.608
Epochs: 26236 | epoch avg. loss: 0.253 | test avg. loss: 4.939


 52%|█████▏    | 26239/50000 [39:21<34:18, 11.54it/s]

Epochs: 26237 | epoch avg. loss: 0.202 | test avg. loss: 4.909
Epochs: 26238 | epoch avg. loss: 0.136 | test avg. loss: 4.806
Epochs: 26239 | epoch avg. loss: 0.355 | test avg. loss: 4.766


 52%|█████▏    | 26243/50000 [39:22<33:58, 11.65it/s]

Epochs: 26240 | epoch avg. loss: 0.303 | test avg. loss: 4.467
Epochs: 26241 | epoch avg. loss: 0.145 | test avg. loss: 4.694
Epochs: 26242 | epoch avg. loss: 0.389 | test avg. loss: 5.143


 52%|█████▏    | 26247/50000 [39:22<30:11, 13.11it/s]

Epochs: 26243 | epoch avg. loss: 0.238 | test avg. loss: 4.941
Epochs: 26244 | epoch avg. loss: 0.087 | test avg. loss: 4.815
Epochs: 26245 | epoch avg. loss: 0.232 | test avg. loss: 5.174
Epochs: 26246 | epoch avg. loss: 0.328 | test avg. loss: 4.766


 53%|█████▎    | 26251/50000 [39:22<28:04, 14.10it/s]

Epochs: 26247 | epoch avg. loss: 0.105 | test avg. loss: 4.817
Epochs: 26248 | epoch avg. loss: 0.114 | test avg. loss: 5.175
Epochs: 26249 | epoch avg. loss: 0.188 | test avg. loss: 4.837
Epochs: 26250 | epoch avg. loss: 0.107 | test avg. loss: 4.937


 53%|█████▎    | 26253/50000 [39:22<30:59, 12.77it/s]

Epochs: 26251 | epoch avg. loss: 0.120 | test avg. loss: 5.506
Epochs: 26252 | epoch avg. loss: 0.382 | test avg. loss: 4.805


 53%|█████▎    | 26255/50000 [39:23<31:56, 12.39it/s]

Epochs: 26253 | epoch avg. loss: 0.242 | test avg. loss: 4.892
Epochs: 26254 | epoch avg. loss: 0.268 | test avg. loss: 5.315
Epochs: 26255 | epoch avg. loss: 0.227 | test avg. loss: 5.554


                                                     

Epochs: 26256 | epoch avg. loss: 0.826 | test avg. loss: 5.972
Epochs: 26257 | epoch avg. loss: 0.581 | test avg. loss: 5.547
Epochs: 26258 | epoch avg. loss: 0.831 | test avg. loss: 5.487


 53%|█████▎    | 26263/50000 [39:23<26:39, 14.84it/s]

Epochs: 26259 | epoch avg. loss: 0.434 | test avg. loss: 6.322
Epochs: 26260 | epoch avg. loss: 0.564 | test avg. loss: 5.258
Epochs: 26261 | epoch avg. loss: 0.647 | test avg. loss: 5.175
Epochs: 26262 | epoch avg. loss: 0.557 | test avg. loss: 4.643


 53%|█████▎    | 26267/50000 [39:23<26:03, 15.18it/s]

Epochs: 26263 | epoch avg. loss: 0.209 | test avg. loss: 4.872
Epochs: 26264 | epoch avg. loss: 0.231 | test avg. loss: 5.944
Epochs: 26265 | epoch avg. loss: 0.550 | test avg. loss: 5.475
Epochs: 26266 | epoch avg. loss: 0.720 | test avg. loss: 5.968


 53%|█████▎    | 26269/50000 [39:24<29:15, 13.52it/s]

Epochs: 26267 | epoch avg. loss: 1.121 | test avg. loss: 5.232
Epochs: 26268 | epoch avg. loss: 1.090 | test avg. loss: 5.146
Epochs: 26269 | epoch avg. loss: 0.847 | test avg. loss: 5.269


 53%|█████▎    | 26273/50000 [39:24<28:55, 13.67it/s]

Epochs: 26270 | epoch avg. loss: 0.584 | test avg. loss: 4.867
Epochs: 26271 | epoch avg. loss: 0.513 | test avg. loss: 5.538
Epochs: 26272 | epoch avg. loss: 0.542 | test avg. loss: 5.055
Epochs: 26273 | epoch avg. loss: 0.664 | test avg. loss: 5.352


 53%|█████▎    | 26277/50000 [39:24<29:10, 13.55it/s]

Epochs: 26274 | epoch avg. loss: 0.425 | test avg. loss: 4.827
Epochs: 26275 | epoch avg. loss: 0.208 | test avg. loss: 4.955
Epochs: 26276 | epoch avg. loss: 0.130 | test avg. loss: 4.800


 53%|█████▎    | 26279/50000 [39:24<30:41, 12.88it/s]

Epochs: 26277 | epoch avg. loss: 0.109 | test avg. loss: 4.649
Epochs: 26278 | epoch avg. loss: 0.103 | test avg. loss: 4.701
Epochs: 26279 | epoch avg. loss: 0.126 | test avg. loss: 4.590


 53%|█████▎    | 26283/50000 [39:25<32:57, 11.99it/s]

Epochs: 26280 | epoch avg. loss: 0.140 | test avg. loss: 4.885
Epochs: 26281 | epoch avg. loss: 0.126 | test avg. loss: 4.811
Epochs: 26282 | epoch avg. loss: 0.118 | test avg. loss: 4.907


 53%|█████▎    | 26285/50000 [39:25<33:22, 11.84it/s]

Epochs: 26283 | epoch avg. loss: 0.051 | test avg. loss: 4.772
Epochs: 26284 | epoch avg. loss: 0.017 | test avg. loss: 4.753
Epochs: 26285 | epoch avg. loss: 0.029 | test avg. loss: 4.766


 53%|█████▎    | 26289/50000 [39:25<31:41, 12.47it/s]

Epochs: 26286 | epoch avg. loss: 0.025 | test avg. loss: 4.828
Epochs: 26287 | epoch avg. loss: 0.023 | test avg. loss: 4.998
Epochs: 26288 | epoch avg. loss: 0.029 | test avg. loss: 4.888


 53%|█████▎    | 26291/50000 [39:25<31:08, 12.69it/s]

Epochs: 26289 | epoch avg. loss: 0.029 | test avg. loss: 4.861
Epochs: 26290 | epoch avg. loss: 0.012 | test avg. loss: 4.693
Epochs: 26291 | epoch avg. loss: 0.040 | test avg. loss: 4.654


 53%|█████▎    | 26295/50000 [39:26<34:03, 11.60it/s]

Epochs: 26292 | epoch avg. loss: 0.025 | test avg. loss: 4.698
Epochs: 26293 | epoch avg. loss: 0.023 | test avg. loss: 4.736
Epochs: 26294 | epoch avg. loss: 0.032 | test avg. loss: 4.950


 53%|█████▎    | 26297/50000 [39:26<34:24, 11.48it/s]

Epochs: 26295 | epoch avg. loss: 0.034 | test avg. loss: 4.794
Epochs: 26296 | epoch avg. loss: 0.058 | test avg. loss: 4.926
Epochs: 26297 | epoch avg. loss: 0.061 | test avg. loss: 4.733


 53%|█████▎    | 26299/50000 [39:26<32:09, 12.29it/s]

Epochs: 26298 | epoch avg. loss: 0.032 | test avg. loss: 4.776
Epochs: 26299 | epoch avg. loss: 0.018 | test avg. loss: 4.758


                                                       

Epochs: 26300 | epoch avg. loss: 0.016 | test avg. loss: 4.726
Epochs: 26301 | epoch avg. loss: 0.016 | test avg. loss: 4.759
Epochs: 26302 | epoch avg. loss: 0.016 | test avg. loss: 4.671


 53%|█████▎    | 26307/50000 [39:28<1:00:48,  6.49it/s]

Epochs: 26303 | epoch avg. loss: 0.012 | test avg. loss: 4.732
Epochs: 26304 | epoch avg. loss: 0.011 | test avg. loss: 4.712
Epochs: 26305 | epoch avg. loss: 0.008 | test avg. loss: 4.777
Epochs: 26306 | epoch avg. loss: 0.007 | test avg. loss: 4.758


 53%|█████▎    | 26309/50000 [39:28<50:15,  7.86it/s]

Epochs: 26307 | epoch avg. loss: 0.011 | test avg. loss: 4.781
Epochs: 26308 | epoch avg. loss: 0.006 | test avg. loss: 4.776
Epochs: 26309 | epoch avg. loss: 0.007 | test avg. loss: 4.786


 53%|█████▎    | 26313/50000 [39:29<40:19,  9.79it/s]

Epochs: 26310 | epoch avg. loss: 0.010 | test avg. loss: 4.855
Epochs: 26311 | epoch avg. loss: 0.019 | test avg. loss: 4.734
Epochs: 26312 | epoch avg. loss: 0.019 | test avg. loss: 4.808


 53%|█████▎    | 26315/50000 [39:29<38:52, 10.16it/s]

Epochs: 26313 | epoch avg. loss: 0.017 | test avg. loss: 4.750
Epochs: 26314 | epoch avg. loss: 0.014 | test avg. loss: 4.738
Epochs: 26315 | epoch avg. loss: 0.015 | test avg. loss: 4.845


 53%|█████▎    | 26319/50000 [39:29<32:16, 12.23it/s]

Epochs: 26316 | epoch avg. loss: 0.031 | test avg. loss: 4.654
Epochs: 26317 | epoch avg. loss: 0.053 | test avg. loss: 4.794
Epochs: 26318 | epoch avg. loss: 0.054 | test avg. loss: 4.653
Epochs: 26319 | epoch avg. loss: 0.039 | test avg. loss: 4.752


 53%|█████▎    | 26323/50000 [39:29<29:17, 13.47it/s]

Epochs: 26320 | epoch avg. loss: 0.029 | test avg. loss: 4.769
Epochs: 26321 | epoch avg. loss: 0.041 | test avg. loss: 4.765
Epochs: 26322 | epoch avg. loss: 0.029 | test avg. loss: 4.838


 53%|█████▎    | 26325/50000 [39:30<31:02, 12.71it/s]

Epochs: 26323 | epoch avg. loss: 0.028 | test avg. loss: 4.681
Epochs: 26324 | epoch avg. loss: 0.035 | test avg. loss: 4.811
Epochs: 26325 | epoch avg. loss: 0.028 | test avg. loss: 4.691


 53%|█████▎    | 26329/50000 [39:30<29:00, 13.60it/s]

Epochs: 26326 | epoch avg. loss: 0.025 | test avg. loss: 4.786
Epochs: 26327 | epoch avg. loss: 0.032 | test avg. loss: 4.783
Epochs: 26328 | epoch avg. loss: 0.022 | test avg. loss: 4.720


 53%|█████▎    | 26331/50000 [39:30<29:48, 13.23it/s]

Epochs: 26329 | epoch avg. loss: 0.039 | test avg. loss: 4.886
Epochs: 26330 | epoch avg. loss: 0.039 | test avg. loss: 4.704
Epochs: 26331 | epoch avg. loss: 0.042 | test avg. loss: 4.766


 53%|█████▎    | 26335/50000 [39:30<27:29, 14.35it/s]

Epochs: 26332 | epoch avg. loss: 0.017 | test avg. loss: 4.745
Epochs: 26333 | epoch avg. loss: 0.012 | test avg. loss: 4.716
Epochs: 26334 | epoch avg. loss: 0.013 | test avg. loss: 4.796
Epochs: 26335 | epoch avg. loss: 0.015 | test avg. loss: 4.688


 53%|█████▎    | 26339/50000 [39:31<30:37, 12.88it/s]

Epochs: 26336 | epoch avg. loss: 0.033 | test avg. loss: 4.855
Epochs: 26337 | epoch avg. loss: 0.032 | test avg. loss: 4.696
Epochs: 26338 | epoch avg. loss: 0.035 | test avg. loss: 4.770


 53%|█████▎    | 26341/50000 [39:31<30:15, 13.03it/s]

Epochs: 26339 | epoch avg. loss: 0.026 | test avg. loss: 4.831
Epochs: 26340 | epoch avg. loss: 0.030 | test avg. loss: 4.698
Epochs: 26341 | epoch avg. loss: 0.066 | test avg. loss: 4.945


 53%|█████▎    | 26345/50000 [39:31<29:45, 13.25it/s]

Epochs: 26342 | epoch avg. loss: 0.054 | test avg. loss: 4.721
Epochs: 26343 | epoch avg. loss: 0.037 | test avg. loss: 4.758
Epochs: 26344 | epoch avg. loss: 0.038 | test avg. loss: 4.698


 53%|█████▎    | 26347/50000 [39:31<29:37, 13.30it/s]

Epochs: 26345 | epoch avg. loss: 0.022 | test avg. loss: 4.703
Epochs: 26346 | epoch avg. loss: 0.016 | test avg. loss: 4.804
Epochs: 26347 | epoch avg. loss: 0.016 | test avg. loss: 4.744


 53%|█████▎    | 26351/50000 [39:31<29:02, 13.57it/s]

Epochs: 26348 | epoch avg. loss: 0.015 | test avg. loss: 4.832
Epochs: 26349 | epoch avg. loss: 0.027 | test avg. loss: 4.665
Epochs: 26350 | epoch avg. loss: 0.013 | test avg. loss: 4.697


 53%|█████▎    | 26353/50000 [39:32<30:05, 13.09it/s]

Epochs: 26351 | epoch avg. loss: 0.010 | test avg. loss: 4.678
Epochs: 26352 | epoch avg. loss: 0.008 | test avg. loss: 4.704
Epochs: 26353 | epoch avg. loss: 0.008 | test avg. loss: 4.720


 53%|█████▎    | 26357/50000 [39:32<29:40, 13.28it/s]

Epochs: 26354 | epoch avg. loss: 0.006 | test avg. loss: 4.697
Epochs: 26355 | epoch avg. loss: 0.006 | test avg. loss: 4.737
Epochs: 26356 | epoch avg. loss: 0.010 | test avg. loss: 4.645


 53%|█████▎    | 26359/50000 [39:32<30:38, 12.86it/s]

Epochs: 26357 | epoch avg. loss: 0.024 | test avg. loss: 4.744
Epochs: 26358 | epoch avg. loss: 0.039 | test avg. loss: 4.709
Epochs: 26359 | epoch avg. loss: 0.032 | test avg. loss: 4.820


 53%|█████▎    | 26363/50000 [39:32<29:36, 13.30it/s]

Epochs: 26360 | epoch avg. loss: 0.029 | test avg. loss: 5.004
Epochs: 26361 | epoch avg. loss: 0.045 | test avg. loss: 4.753
Epochs: 26362 | epoch avg. loss: 0.063 | test avg. loss: 4.843


 53%|█████▎    | 26365/50000 [39:33<33:06, 11.90it/s]

Epochs: 26363 | epoch avg. loss: 0.068 | test avg. loss: 4.572
Epochs: 26364 | epoch avg. loss: 0.070 | test avg. loss: 4.797
Epochs: 26365 | epoch avg. loss: 0.167 | test avg. loss: 4.537


 53%|█████▎    | 26369/50000 [39:33<33:24, 11.79it/s]

Epochs: 26366 | epoch avg. loss: 0.057 | test avg. loss: 4.616
Epochs: 26367 | epoch avg. loss: 0.071 | test avg. loss: 4.746
Epochs: 26368 | epoch avg. loss: 0.068 | test avg. loss: 4.675




Epochs: 26369 | epoch avg. loss: 0.109 | test avg. loss: 4.982
Epochs: 26370 | epoch avg. loss: 0.175 | test avg. loss: 4.537
Epochs: 26371 | epoch avg. loss: 0.130 | test avg. loss: 4.647


 53%|█████▎    | 26375/50000 [39:33<29:08, 13.51it/s]

Epochs: 26372 | epoch avg. loss: 0.039 | test avg. loss: 4.849
Epochs: 26373 | epoch avg. loss: 0.042 | test avg. loss: 4.789
Epochs: 26374 | epoch avg. loss: 0.024 | test avg. loss: 4.850


 53%|█████▎    | 26377/50000 [39:34<28:53, 13.63it/s]

Epochs: 26375 | epoch avg. loss: 0.030 | test avg. loss: 4.636
Epochs: 26376 | epoch avg. loss: 0.093 | test avg. loss: 4.783
Epochs: 26377 | epoch avg. loss: 0.101 | test avg. loss: 4.569


 53%|█████▎    | 26381/50000 [39:34<28:11, 13.96it/s]

Epochs: 26378 | epoch avg. loss: 0.089 | test avg. loss: 4.627
Epochs: 26379 | epoch avg. loss: 0.049 | test avg. loss: 4.608
Epochs: 26380 | epoch avg. loss: 0.049 | test avg. loss: 4.495
Epochs: 26381 | epoch avg. loss: 0.058 | test avg. loss: 4.745


 53%|█████▎    | 26385/50000 [39:34<27:48, 14.15it/s]

Epochs: 26382 | epoch avg. loss: 0.105 | test avg. loss: 4.528
Epochs: 26383 | epoch avg. loss: 0.076 | test avg. loss: 4.818
Epochs: 26384 | epoch avg. loss: 0.084 | test avg. loss: 4.811


 53%|█████▎    | 26387/50000 [39:34<27:52, 14.12it/s]

Epochs: 26385 | epoch avg. loss: 0.048 | test avg. loss: 4.843
Epochs: 26386 | epoch avg. loss: 0.042 | test avg. loss: 4.988
Epochs: 26387 | epoch avg. loss: 0.060 | test avg. loss: 4.648


 53%|█████▎    | 26391/50000 [39:34<30:55, 12.72it/s]

Epochs: 26388 | epoch avg. loss: 0.124 | test avg. loss: 4.817
Epochs: 26389 | epoch avg. loss: 0.072 | test avg. loss: 4.694
Epochs: 26390 | epoch avg. loss: 0.048 | test avg. loss: 4.845


 53%|█████▎    | 26393/50000 [39:35<31:05, 12.66it/s]

Epochs: 26391 | epoch avg. loss: 0.033 | test avg. loss: 4.980
Epochs: 26392 | epoch avg. loss: 0.041 | test avg. loss: 4.784
Epochs: 26393 | epoch avg. loss: 0.051 | test avg. loss: 4.846


 53%|█████▎    | 26397/50000 [39:35<30:15, 13.00it/s]

Epochs: 26394 | epoch avg. loss: 0.032 | test avg. loss: 4.674
Epochs: 26395 | epoch avg. loss: 0.077 | test avg. loss: 4.869
Epochs: 26396 | epoch avg. loss: 0.039 | test avg. loss: 4.874


 53%|█████▎    | 26399/50000 [39:35<30:56, 12.71it/s]

Epochs: 26397 | epoch avg. loss: 0.017 | test avg. loss: 4.837
Epochs: 26398 | epoch avg. loss: 0.013 | test avg. loss: 4.823
Epochs: 26399 | epoch avg. loss: 0.017 | test avg. loss: 4.640


 53%|█████▎    | 26403/50000 [39:37<1:32:31,  4.25it/s]

Epochs: 26400 | epoch avg. loss: 0.033 | test avg. loss: 4.710
Epochs: 26401 | epoch avg. loss: 0.015 | test avg. loss: 4.705
Epochs: 26402 | epoch avg. loss: 0.007 | test avg. loss: 4.768
Epochs: 26403 | epoch avg. loss: 0.013 | test avg. loss: 4.730


 53%|█████▎    | 26407/50000 [39:37<58:17,  6.75it/s]

Epochs: 26404 | epoch avg. loss: 0.009 | test avg. loss: 4.618
Epochs: 26405 | epoch avg. loss: 0.012 | test avg. loss: 4.606
Epochs: 26406 | epoch avg. loss: 0.006 | test avg. loss: 4.564
Epochs: 26407 | epoch avg. loss: 0.009 | test avg. loss: 4.685


 53%|█████▎    | 26411/50000 [39:37<44:19,  8.87it/s]

Epochs: 26408 | epoch avg. loss: 0.011 | test avg. loss: 4.717
Epochs: 26409 | epoch avg. loss: 0.014 | test avg. loss: 4.746
Epochs: 26410 | epoch avg. loss: 0.009 | test avg. loss: 4.730


 53%|█████▎    | 26413/50000 [39:38<38:44, 10.15it/s]

Epochs: 26411 | epoch avg. loss: 0.006 | test avg. loss: 4.659
Epochs: 26412 | epoch avg. loss: 0.007 | test avg. loss: 4.638
Epochs: 26413 | epoch avg. loss: 0.005 | test avg. loss: 4.657


 53%|█████▎    | 26417/50000 [39:38<33:42, 11.66it/s]

Epochs: 26414 | epoch avg. loss: 0.005 | test avg. loss: 4.669
Epochs: 26415 | epoch avg. loss: 0.004 | test avg. loss: 4.714
Epochs: 26416 | epoch avg. loss: 0.005 | test avg. loss: 4.674


 53%|█████▎    | 26419/50000 [39:38<32:45, 12.00it/s]

Epochs: 26417 | epoch avg. loss: 0.007 | test avg. loss: 4.783
Epochs: 26418 | epoch avg. loss: 0.021 | test avg. loss: 4.598
Epochs: 26419 | epoch avg. loss: 0.026 | test avg. loss: 4.640


 53%|█████▎    | 26423/50000 [39:38<31:19, 12.55it/s]

Epochs: 26420 | epoch avg. loss: 0.013 | test avg. loss: 4.732
Epochs: 26421 | epoch avg. loss: 0.011 | test avg. loss: 4.715
Epochs: 26422 | epoch avg. loss: 0.006 | test avg. loss: 4.746


 53%|█████▎    | 26425/50000 [39:39<35:07, 11.18it/s]

Epochs: 26423 | epoch avg. loss: 0.005 | test avg. loss: 4.714
Epochs: 26424 | epoch avg. loss: 0.004 | test avg. loss: 4.679
Epochs: 26425 | epoch avg. loss: 0.006 | test avg. loss: 4.606




Epochs: 26426 | epoch avg. loss: 0.008 | test avg. loss: 4.612
Epochs: 26427 | epoch avg. loss: 0.006 | test avg. loss: 4.699
Epochs: 26428 | epoch avg. loss: 0.008 | test avg. loss: 4.700


 53%|█████▎    | 26433/50000 [39:39<28:03, 14.00it/s]

Epochs: 26429 | epoch avg. loss: 0.004 | test avg. loss: 4.698
Epochs: 26430 | epoch avg. loss: 0.005 | test avg. loss: 4.694
Epochs: 26431 | epoch avg. loss: 0.006 | test avg. loss: 4.624
Epochs: 26432 | epoch avg. loss: 0.009 | test avg. loss: 4.642


 53%|█████▎    | 26435/50000 [39:39<26:59, 14.55it/s]

Epochs: 26433 | epoch avg. loss: 0.004 | test avg. loss: 4.637
Epochs: 26434 | epoch avg. loss: 0.009 | test avg. loss: 4.658
Epochs: 26435 | epoch avg. loss: 0.006 | test avg. loss: 4.679


 53%|█████▎    | 26439/50000 [39:40<29:34, 13.28it/s]

Epochs: 26436 | epoch avg. loss: 0.004 | test avg. loss: 4.656
Epochs: 26437 | epoch avg. loss: 0.004 | test avg. loss: 4.648
Epochs: 26438 | epoch avg. loss: 0.004 | test avg. loss: 4.607


 53%|█████▎    | 26443/50000 [39:40<27:08, 14.47it/s]

Epochs: 26439 | epoch avg. loss: 0.009 | test avg. loss: 4.688
Epochs: 26440 | epoch avg. loss: 0.014 | test avg. loss: 4.657
Epochs: 26441 | epoch avg. loss: 0.009 | test avg. loss: 4.646
Epochs: 26442 | epoch avg. loss: 0.008 | test avg. loss: 4.694


 53%|█████▎    | 26447/50000 [39:40<25:22, 15.47it/s]

Epochs: 26443 | epoch avg. loss: 0.009 | test avg. loss: 4.631
Epochs: 26444 | epoch avg. loss: 0.006 | test avg. loss: 4.694
Epochs: 26445 | epoch avg. loss: 0.009 | test avg. loss: 4.645
Epochs: 26446 | epoch avg. loss: 0.007 | test avg. loss: 4.725


 53%|█████▎    | 26449/50000 [39:40<25:03, 15.67it/s]

Epochs: 26447 | epoch avg. loss: 0.007 | test avg. loss: 4.729
Epochs: 26448 | epoch avg. loss: 0.009 | test avg. loss: 4.716
Epochs: 26449 | epoch avg. loss: 0.007 | test avg. loss: 4.743


 53%|█████▎    | 26453/50000 [39:41<29:31, 13.29it/s]

Epochs: 26450 | epoch avg. loss: 0.006 | test avg. loss: 4.663
Epochs: 26451 | epoch avg. loss: 0.019 | test avg. loss: 4.671
Epochs: 26452 | epoch avg. loss: 0.012 | test avg. loss: 4.717


 53%|█████▎    | 26455/50000 [39:41<32:18, 12.15it/s]

Epochs: 26453 | epoch avg. loss: 0.015 | test avg. loss: 4.628
Epochs: 26454 | epoch avg. loss: 0.020 | test avg. loss: 4.801
Epochs: 26455 | epoch avg. loss: 0.042 | test avg. loss: 4.689


 53%|█████▎    | 26459/50000 [39:41<32:29, 12.08it/s]

Epochs: 26456 | epoch avg. loss: 0.009 | test avg. loss: 4.648
Epochs: 26457 | epoch avg. loss: 0.014 | test avg. loss: 4.749
Epochs: 26458 | epoch avg. loss: 0.032 | test avg. loss: 4.609


 53%|█████▎    | 26461/50000 [39:41<32:51, 11.94it/s]

Epochs: 26459 | epoch avg. loss: 0.030 | test avg. loss: 4.681
Epochs: 26460 | epoch avg. loss: 0.010 | test avg. loss: 4.719
Epochs: 26461 | epoch avg. loss: 0.008 | test avg. loss: 4.690


 53%|█████▎    | 26465/50000 [39:42<36:08, 10.85it/s]

Epochs: 26462 | epoch avg. loss: 0.007 | test avg. loss: 4.690
Epochs: 26463 | epoch avg. loss: 0.007 | test avg. loss: 4.637
Epochs: 26464 | epoch avg. loss: 0.005 | test avg. loss: 4.642


 53%|█████▎    | 26467/50000 [39:42<36:22, 10.78it/s]

Epochs: 26465 | epoch avg. loss: 0.005 | test avg. loss: 4.652
Epochs: 26466 | epoch avg. loss: 0.005 | test avg. loss: 4.737
Epochs: 26467 | epoch avg. loss: 0.009 | test avg. loss: 4.732


 53%|█████▎    | 26471/50000 [39:42<32:06, 12.21it/s]

Epochs: 26468 | epoch avg. loss: 0.005 | test avg. loss: 4.675
Epochs: 26469 | epoch avg. loss: 0.005 | test avg. loss: 4.689
Epochs: 26470 | epoch avg. loss: 0.008 | test avg. loss: 4.645
Epochs: 26471 | epoch avg. loss: 0.004 | test avg. loss: 4.679


 53%|█████▎    | 26475/50000 [39:42<31:18, 12.53it/s]

Epochs: 26472 | epoch avg. loss: 0.004 | test avg. loss: 4.666
Epochs: 26473 | epoch avg. loss: 0.015 | test avg. loss: 4.657
Epochs: 26474 | epoch avg. loss: 0.010 | test avg. loss: 4.766


 53%|█████▎    | 26479/50000 [39:43<29:40, 13.21it/s]

Epochs: 26475 | epoch avg. loss: 0.035 | test avg. loss: 4.620
Epochs: 26476 | epoch avg. loss: 0.030 | test avg. loss: 4.641
Epochs: 26477 | epoch avg. loss: 0.019 | test avg. loss: 4.803
Epochs: 26478 | epoch avg. loss: 0.027 | test avg. loss: 4.729


 53%|█████▎    | 26481/50000 [39:43<28:53, 13.57it/s]

Epochs: 26479 | epoch avg. loss: 0.006 | test avg. loss: 4.707
Epochs: 26480 | epoch avg. loss: 0.005 | test avg. loss: 4.655
Epochs: 26481 | epoch avg. loss: 0.008 | test avg. loss: 4.622


 53%|█████▎    | 26485/50000 [39:43<27:58, 14.01it/s]

Epochs: 26482 | epoch avg. loss: 0.008 | test avg. loss: 4.637
Epochs: 26483 | epoch avg. loss: 0.007 | test avg. loss: 4.640
Epochs: 26484 | epoch avg. loss: 0.005 | test avg. loss: 4.637


 53%|█████▎    | 26487/50000 [39:43<28:40, 13.67it/s]

Epochs: 26485 | epoch avg. loss: 0.007 | test avg. loss: 4.740
Epochs: 26486 | epoch avg. loss: 0.011 | test avg. loss: 4.664
Epochs: 26487 | epoch avg. loss: 0.008 | test avg. loss: 4.666


 53%|█████▎    | 26491/50000 [39:44<31:06, 12.59it/s]

Epochs: 26488 | epoch avg. loss: 0.012 | test avg. loss: 4.622
Epochs: 26489 | epoch avg. loss: 0.007 | test avg. loss: 4.603
Epochs: 26490 | epoch avg. loss: 0.008 | test avg. loss: 4.678


 53%|█████▎    | 26493/50000 [39:44<31:16, 12.53it/s]

Epochs: 26491 | epoch avg. loss: 0.006 | test avg. loss: 4.689
Epochs: 26492 | epoch avg. loss: 0.004 | test avg. loss: 4.678
Epochs: 26493 | epoch avg. loss: 0.006 | test avg. loss: 4.749


 53%|█████▎    | 26497/50000 [39:44<32:14, 12.15it/s]

Epochs: 26494 | epoch avg. loss: 0.015 | test avg. loss: 4.637
Epochs: 26495 | epoch avg. loss: 0.019 | test avg. loss: 4.668
Epochs: 26496 | epoch avg. loss: 0.009 | test avg. loss: 4.676


 53%|█████▎    | 26499/50000 [39:44<31:25, 12.46it/s]

Epochs: 26497 | epoch avg. loss: 0.009 | test avg. loss: 4.611
Epochs: 26498 | epoch avg. loss: 0.012 | test avg. loss: 4.704
Epochs: 26499 | epoch avg. loss: 0.013 | test avg. loss: 4.620


 53%|█████▎    | 26503/50000 [39:46<1:37:01,  4.04it/s]

Epochs: 26500 | epoch avg. loss: 0.025 | test avg. loss: 4.594
Epochs: 26501 | epoch avg. loss: 0.012 | test avg. loss: 4.680
Epochs: 26502 | epoch avg. loss: 0.020 | test avg. loss: 4.585


 53%|█████▎    | 26507/50000 [39:47<1:01:19,  6.38it/s]

Epochs: 26503 | epoch avg. loss: 0.043 | test avg. loss: 4.753
Epochs: 26504 | epoch avg. loss: 0.032 | test avg. loss: 4.849
Epochs: 26505 | epoch avg. loss: 0.041 | test avg. loss: 4.624
Epochs: 26506 | epoch avg. loss: 0.022 | test avg. loss: 4.701


 53%|█████▎    | 26509/50000 [39:47<52:27,  7.46it/s]

Epochs: 26507 | epoch avg. loss: 0.013 | test avg. loss: 4.675
Epochs: 26508 | epoch avg. loss: 0.016 | test avg. loss: 4.705
Epochs: 26509 | epoch avg. loss: 0.037 | test avg. loss: 4.920


 53%|█████▎    | 26513/50000 [39:47<42:14,  9.27it/s]

Epochs: 26510 | epoch avg. loss: 0.026 | test avg. loss: 4.835
Epochs: 26511 | epoch avg. loss: 0.009 | test avg. loss: 4.704
Epochs: 26512 | epoch avg. loss: 0.008 | test avg. loss: 4.615


 53%|█████▎    | 26515/50000 [39:47<37:57, 10.31it/s]

Epochs: 26513 | epoch avg. loss: 0.009 | test avg. loss: 4.698
Epochs: 26514 | epoch avg. loss: 0.022 | test avg. loss: 4.620
Epochs: 26515 | epoch avg. loss: 0.024 | test avg. loss: 4.732


 53%|█████▎    | 26519/50000 [39:47<33:19, 11.74it/s]

Epochs: 26516 | epoch avg. loss: 0.012 | test avg. loss: 4.722
Epochs: 26517 | epoch avg. loss: 0.011 | test avg. loss: 4.601
Epochs: 26518 | epoch avg. loss: 0.018 | test avg. loss: 4.597


 53%|█████▎    | 26521/50000 [39:48<31:34, 12.40it/s]

Epochs: 26519 | epoch avg. loss: 0.015 | test avg. loss: 4.552
Epochs: 26520 | epoch avg. loss: 0.009 | test avg. loss: 4.565
Epochs: 26521 | epoch avg. loss: 0.007 | test avg. loss: 4.648


 53%|█████▎    | 26525/50000 [39:48<28:42, 13.63it/s]

Epochs: 26522 | epoch avg. loss: 0.006 | test avg. loss: 4.623
Epochs: 26523 | epoch avg. loss: 0.021 | test avg. loss: 4.683
Epochs: 26524 | epoch avg. loss: 0.018 | test avg. loss: 4.689
Epochs: 26525 | epoch avg. loss: 0.018 | test avg. loss: 4.585


 53%|█████▎    | 26529/50000 [39:48<26:26, 14.79it/s]

Epochs: 26526 | epoch avg. loss: 0.021 | test avg. loss: 4.648
Epochs: 26527 | epoch avg. loss: 0.008 | test avg. loss: 4.732
Epochs: 26528 | epoch avg. loss: 0.014 | test avg. loss: 4.669
Epochs: 26529 | epoch avg. loss: 0.012 | test avg. loss: 4.680


 53%|█████▎    | 26533/50000 [39:48<27:43, 14.11it/s]

Epochs: 26530 | epoch avg. loss: 0.006 | test avg. loss: 4.730
Epochs: 26531 | epoch avg. loss: 0.010 | test avg. loss: 4.716
Epochs: 26532 | epoch avg. loss: 0.007 | test avg. loss: 4.709


 53%|█████▎    | 26537/50000 [39:49<26:59, 14.49it/s]

Epochs: 26533 | epoch avg. loss: 0.007 | test avg. loss: 4.749
Epochs: 26534 | epoch avg. loss: 0.013 | test avg. loss: 4.716
Epochs: 26535 | epoch avg. loss: 0.007 | test avg. loss: 4.653
Epochs: 26536 | epoch avg. loss: 0.007 | test avg. loss: 4.665


 53%|█████▎    | 26539/50000 [39:49<28:24, 13.76it/s]

Epochs: 26537 | epoch avg. loss: 0.005 | test avg. loss: 4.727
Epochs: 26538 | epoch avg. loss: 0.007 | test avg. loss: 4.689
Epochs: 26539 | epoch avg. loss: 0.008 | test avg. loss: 4.715


 53%|█████▎    | 26543/50000 [39:49<28:11, 13.86it/s]

Epochs: 26540 | epoch avg. loss: 0.004 | test avg. loss: 4.723
Epochs: 26541 | epoch avg. loss: 0.005 | test avg. loss: 4.702
Epochs: 26542 | epoch avg. loss: 0.006 | test avg. loss: 4.777
Epochs: 26543 | epoch avg. loss: 0.016 | test avg. loss: 4.686


 53%|█████▎    | 26547/50000 [39:49<27:54, 14.01it/s]

Epochs: 26544 | epoch avg. loss: 0.014 | test avg. loss: 4.670
Epochs: 26545 | epoch avg. loss: 0.020 | test avg. loss: 4.847
Epochs: 26546 | epoch avg. loss: 0.046 | test avg. loss: 4.801


 53%|█████▎    | 26551/50000 [39:50<27:29, 14.21it/s]

Epochs: 26547 | epoch avg. loss: 0.016 | test avg. loss: 4.807
Epochs: 26548 | epoch avg. loss: 0.022 | test avg. loss: 4.813
Epochs: 26549 | epoch avg. loss: 0.025 | test avg. loss: 4.616
Epochs: 26550 | epoch avg. loss: 0.044 | test avg. loss: 4.662


 53%|█████▎    | 26555/50000 [39:50<26:20, 14.84it/s]

Epochs: 26551 | epoch avg. loss: 0.024 | test avg. loss: 4.689
Epochs: 26552 | epoch avg. loss: 0.016 | test avg. loss: 4.650
Epochs: 26553 | epoch avg. loss: 0.022 | test avg. loss: 4.883
Epochs: 26554 | epoch avg. loss: 0.065 | test avg. loss: 4.748


 53%|█████▎    | 26557/50000 [39:50<26:13, 14.90it/s]

Epochs: 26555 | epoch avg. loss: 0.039 | test avg. loss: 4.711
Epochs: 26556 | epoch avg. loss: 0.034 | test avg. loss: 5.002
Epochs: 26557 | epoch avg. loss: 0.151 | test avg. loss: 4.734


 53%|█████▎    | 26561/50000 [39:50<28:18, 13.80it/s]

Epochs: 26558 | epoch avg. loss: 0.153 | test avg. loss: 4.884
Epochs: 26559 | epoch avg. loss: 0.079 | test avg. loss: 5.361
Epochs: 26560 | epoch avg. loss: 0.172 | test avg. loss: 4.872


 53%|█████▎    | 26563/50000 [39:51<31:00, 12.60it/s]

Epochs: 26561 | epoch avg. loss: 0.119 | test avg. loss: 4.769
Epochs: 26562 | epoch avg. loss: 0.051 | test avg. loss: 4.935
Epochs: 26563 | epoch avg. loss: 0.075 | test avg. loss: 4.640


 53%|█████▎    | 26567/50000 [39:51<30:03, 12.99it/s]

Epochs: 26564 | epoch avg. loss: 0.164 | test avg. loss: 4.667
Epochs: 26565 | epoch avg. loss: 0.067 | test avg. loss: 4.676
Epochs: 26566 | epoch avg. loss: 0.042 | test avg. loss: 4.657


 53%|█████▎    | 26569/50000 [39:51<29:31, 13.23it/s]

Epochs: 26567 | epoch avg. loss: 0.092 | test avg. loss: 4.842
Epochs: 26568 | epoch avg. loss: 0.150 | test avg. loss: 4.778
Epochs: 26569 | epoch avg. loss: 0.044 | test avg. loss: 4.838
Epochs: 26570 | epoch avg. loss: 0.047 | test avg. loss: 5.129


 53%|█████▎    | 26573/50000 [39:51<27:58, 13.96it/s]

Epochs: 26571 | epoch avg. loss: 0.096 | test avg. loss: 4.935
Epochs: 26572 | epoch avg. loss: 0.068 | test avg. loss: 4.963
Epochs: 26573 | epoch avg. loss: 0.063 | test avg. loss: 5.177


 53%|█████▎    | 26577/50000 [39:52<28:47, 13.56it/s]

Epochs: 26574 | epoch avg. loss: 0.104 | test avg. loss: 4.781
Epochs: 26575 | epoch avg. loss: 0.105 | test avg. loss: 4.678
Epochs: 26576 | epoch avg. loss: 0.066 | test avg. loss: 4.852


 53%|█████▎    | 26579/50000 [39:52<29:38, 13.17it/s]

Epochs: 26577 | epoch avg. loss: 0.078 | test avg. loss: 4.737
Epochs: 26578 | epoch avg. loss: 0.052 | test avg. loss: 4.805
Epochs: 26579 | epoch avg. loss: 0.036 | test avg. loss: 4.992


 53%|█████▎    | 26583/50000 [39:52<29:38, 13.16it/s]

Epochs: 26580 | epoch avg. loss: 0.059 | test avg. loss: 4.846
Epochs: 26581 | epoch avg. loss: 0.013 | test avg. loss: 4.885
Epochs: 26582 | epoch avg. loss: 0.037 | test avg. loss: 5.006


 53%|█████▎    | 26587/50000 [39:52<27:40, 14.10it/s]

Epochs: 26583 | epoch avg. loss: 0.033 | test avg. loss: 4.896
Epochs: 26584 | epoch avg. loss: 0.059 | test avg. loss: 4.939
Epochs: 26585 | epoch avg. loss: 0.008 | test avg. loss: 4.894
Epochs: 26586 | epoch avg. loss: 0.008 | test avg. loss: 4.877


 53%|█████▎    | 26589/50000 [39:53<29:13, 13.35it/s]

Epochs: 26587 | epoch avg. loss: 0.018 | test avg. loss: 4.935
Epochs: 26588 | epoch avg. loss: 0.041 | test avg. loss: 4.801
Epochs: 26589 | epoch avg. loss: 0.014 | test avg. loss: 4.784


 53%|█████▎    | 26593/50000 [39:53<28:10, 13.84it/s]

Epochs: 26590 | epoch avg. loss: 0.030 | test avg. loss: 4.905
Epochs: 26591 | epoch avg. loss: 0.016 | test avg. loss: 4.826
Epochs: 26592 | epoch avg. loss: 0.019 | test avg. loss: 4.771


 53%|█████▎    | 26595/50000 [39:53<28:16, 13.80it/s]

Epochs: 26593 | epoch avg. loss: 0.038 | test avg. loss: 4.923
Epochs: 26594 | epoch avg. loss: 0.031 | test avg. loss: 4.728
Epochs: 26595 | epoch avg. loss: 0.037 | test avg. loss: 4.668
Epochs: 26596 | epoch avg. loss: 0.035 | test avg. loss: 4.852


 53%|█████▎    | 26599/50000 [39:53<26:59, 14.45it/s]

Epochs: 26597 | epoch avg. loss: 0.056 | test avg. loss: 4.791
Epochs: 26598 | epoch avg. loss: 0.042 | test avg. loss: 4.802
Epochs: 26599 | epoch avg. loss: 0.038 | test avg. loss: 4.967


                                                       

Epochs: 26600 | epoch avg. loss: 0.086 | test avg. loss: 4.762
Epochs: 26601 | epoch avg. loss: 0.094 | test avg. loss: 4.788
Epochs: 26602 | epoch avg. loss: 0.046 | test avg. loss: 4.958


 53%|█████▎    | 26607/50000 [39:55<55:40,  7.00it/s]  

Epochs: 26603 | epoch avg. loss: 0.053 | test avg. loss: 4.815
Epochs: 26604 | epoch avg. loss: 0.102 | test avg. loss: 4.958
Epochs: 26605 | epoch avg. loss: 0.093 | test avg. loss: 4.943
Epochs: 26606 | epoch avg. loss: 0.074 | test avg. loss: 4.663


 53%|█████▎    | 26609/50000 [39:55<48:25,  8.05it/s]

Epochs: 26607 | epoch avg. loss: 0.193 | test avg. loss: 4.734
Epochs: 26608 | epoch avg. loss: 0.150 | test avg. loss: 4.974


 53%|█████▎    | 26611/50000 [39:56<45:40,  8.53it/s]

Epochs: 26609 | epoch avg. loss: 0.092 | test avg. loss: 4.924
Epochs: 26610 | epoch avg. loss: 0.175 | test avg. loss: 5.030
Epochs: 26611 | epoch avg. loss: 0.093 | test avg. loss: 4.833


 53%|█████▎    | 26615/50000 [39:56<39:25,  9.88it/s]

Epochs: 26612 | epoch avg. loss: 0.066 | test avg. loss: 4.595
Epochs: 26613 | epoch avg. loss: 0.204 | test avg. loss: 4.602
Epochs: 26614 | epoch avg. loss: 0.042 | test avg. loss: 4.997


 53%|█████▎    | 26617/50000 [39:56<36:52, 10.57it/s]

Epochs: 26615 | epoch avg. loss: 0.131 | test avg. loss: 4.657
Epochs: 26616 | epoch avg. loss: 0.067 | test avg. loss: 4.644
Epochs: 26617 | epoch avg. loss: 0.075 | test avg. loss: 4.734


 53%|█████▎    | 26621/50000 [39:56<32:35, 11.96it/s]

Epochs: 26618 | epoch avg. loss: 0.066 | test avg. loss: 4.626
Epochs: 26619 | epoch avg. loss: 0.042 | test avg. loss: 4.739
Epochs: 26620 | epoch avg. loss: 0.065 | test avg. loss: 4.694


 53%|█████▎    | 26623/50000 [39:57<33:07, 11.76it/s]

Epochs: 26621 | epoch avg. loss: 0.065 | test avg. loss: 4.701
Epochs: 26622 | epoch avg. loss: 0.122 | test avg. loss: 4.928
Epochs: 26623 | epoch avg. loss: 0.133 | test avg. loss: 4.604
Epochs: 26624 | epoch avg. loss: 0.062 | test avg. loss: 4.590


 53%|█████▎    | 26627/50000 [39:57<29:26, 13.23it/s]

Epochs: 26625 | epoch avg. loss: 0.143 | test avg. loss: 4.789
Epochs: 26626 | epoch avg. loss: 0.179 | test avg. loss: 4.634
Epochs: 26627 | epoch avg. loss: 0.047 | test avg. loss: 4.527


 53%|█████▎    | 26631/50000 [39:57<28:51, 13.49it/s]

Epochs: 26628 | epoch avg. loss: 0.075 | test avg. loss: 4.653
Epochs: 26629 | epoch avg. loss: 0.037 | test avg. loss: 4.723
Epochs: 26630 | epoch avg. loss: 0.025 | test avg. loss: 4.575


 53%|█████▎    | 26633/50000 [39:57<29:45, 13.09it/s]

Epochs: 26631 | epoch avg. loss: 0.030 | test avg. loss: 4.613
Epochs: 26632 | epoch avg. loss: 0.035 | test avg. loss: 4.674
Epochs: 26633 | epoch avg. loss: 0.038 | test avg. loss: 4.625


 53%|█████▎    | 26635/50000 [39:58<32:06, 12.13it/s]

Epochs: 26634 | epoch avg. loss: 0.214 | test avg. loss: 4.731
Epochs: 26635 | epoch avg. loss: 0.066 | test avg. loss: 4.852
Epochs: 26636 | epoch avg. loss: 0.082 | test avg. loss: 4.795


 53%|█████▎    | 26639/50000 [39:58<34:19, 11.34it/s]

Epochs: 26637 | epoch avg. loss: 0.074 | test avg. loss: 5.033
Epochs: 26638 | epoch avg. loss: 0.132 | test avg. loss: 4.478
Epochs: 26639 | epoch avg. loss: 0.214 | test avg. loss: 4.429


 53%|█████▎    | 26643/50000 [39:58<33:38, 11.57it/s]

Epochs: 26640 | epoch avg. loss: 0.045 | test avg. loss: 4.710
Epochs: 26641 | epoch avg. loss: 0.041 | test avg. loss: 4.769
Epochs: 26642 | epoch avg. loss: 0.131 | test avg. loss: 4.805


 53%|█████▎    | 26645/50000 [39:58<33:30, 11.62it/s]

Epochs: 26643 | epoch avg. loss: 0.089 | test avg. loss: 4.755
Epochs: 26644 | epoch avg. loss: 0.060 | test avg. loss: 4.584
Epochs: 26645 | epoch avg. loss: 0.151 | test avg. loss: 4.660


 53%|█████▎    | 26649/50000 [39:59<33:19, 11.68it/s]

Epochs: 26646 | epoch avg. loss: 0.088 | test avg. loss: 4.648
Epochs: 26647 | epoch avg. loss: 0.078 | test avg. loss: 4.700
Epochs: 26648 | epoch avg. loss: 0.130 | test avg. loss: 5.026


 53%|█████▎    | 26653/50000 [39:59<29:45, 13.08it/s]

Epochs: 26649 | epoch avg. loss: 0.142 | test avg. loss: 4.726
Epochs: 26650 | epoch avg. loss: 0.043 | test avg. loss: 4.696
Epochs: 26651 | epoch avg. loss: 0.073 | test avg. loss: 4.854
Epochs: 26652 | epoch avg. loss: 0.051 | test avg. loss: 4.698


 53%|█████▎    | 26655/50000 [39:59<30:00, 12.96it/s]

Epochs: 26653 | epoch avg. loss: 0.060 | test avg. loss: 4.683
Epochs: 26654 | epoch avg. loss: 0.031 | test avg. loss: 4.778
Epochs: 26655 | epoch avg. loss: 0.053 | test avg. loss: 4.719


 53%|█████▎    | 26659/50000 [39:59<31:09, 12.48it/s]

Epochs: 26656 | epoch avg. loss: 0.302 | test avg. loss: 4.782
Epochs: 26657 | epoch avg. loss: 0.109 | test avg. loss: 4.912
Epochs: 26658 | epoch avg. loss: 0.071 | test avg. loss: 4.804


 53%|█████▎    | 26661/50000 [40:00<31:49, 12.22it/s]

Epochs: 26659 | epoch avg. loss: 0.144 | test avg. loss: 4.774
Epochs: 26660 | epoch avg. loss: 0.063 | test avg. loss: 4.628
Epochs: 26661 | epoch avg. loss: 0.038 | test avg. loss: 4.552


 53%|█████▎    | 26665/50000 [40:00<31:53, 12.19it/s]

Epochs: 26662 | epoch avg. loss: 0.038 | test avg. loss: 4.860
Epochs: 26663 | epoch avg. loss: 0.066 | test avg. loss: 4.751
Epochs: 26664 | epoch avg. loss: 0.051 | test avg. loss: 4.846


 53%|█████▎    | 26667/50000 [40:00<32:37, 11.92it/s]

Epochs: 26665 | epoch avg. loss: 0.035 | test avg. loss: 5.132
Epochs: 26666 | epoch avg. loss: 0.132 | test avg. loss: 4.695
Epochs: 26667 | epoch avg. loss: 0.070 | test avg. loss: 4.616


 53%|█████▎    | 26669/50000 [40:00<32:44, 11.87it/s]

Epochs: 26668 | epoch avg. loss: 0.032 | test avg. loss: 4.733
Epochs: 26669 | epoch avg. loss: 0.032 | test avg. loss: 4.664


 53%|█████▎    | 26673/50000 [40:01<37:11, 10.45it/s]

Epochs: 26670 | epoch avg. loss: 0.031 | test avg. loss: 4.651
Epochs: 26671 | epoch avg. loss: 0.015 | test avg. loss: 4.703
Epochs: 26672 | epoch avg. loss: 0.039 | test avg. loss: 4.536


 53%|█████▎    | 26675/50000 [40:01<35:14, 11.03it/s]

Epochs: 26673 | epoch avg. loss: 0.026 | test avg. loss: 4.578
Epochs: 26674 | epoch avg. loss: 0.015 | test avg. loss: 4.709
Epochs: 26675 | epoch avg. loss: 0.014 | test avg. loss: 4.690


 53%|█████▎    | 26679/50000 [40:01<32:37, 11.91it/s]

Epochs: 26676 | epoch avg. loss: 0.014 | test avg. loss: 4.838
Epochs: 26677 | epoch avg. loss: 0.030 | test avg. loss: 4.729
Epochs: 26678 | epoch avg. loss: 0.010 | test avg. loss: 4.708


 53%|█████▎    | 26681/50000 [40:01<32:25, 11.99it/s]

Epochs: 26679 | epoch avg. loss: 0.014 | test avg. loss: 4.820
Epochs: 26680 | epoch avg. loss: 0.019 | test avg. loss: 4.728
Epochs: 26681 | epoch avg. loss: 0.043 | test avg. loss: 4.816


 53%|█████▎    | 26685/50000 [40:02<32:44, 11.87it/s]

Epochs: 26682 | epoch avg. loss: 0.013 | test avg. loss: 4.804
Epochs: 26683 | epoch avg. loss: 0.011 | test avg. loss: 4.663
Epochs: 26684 | epoch avg. loss: 0.020 | test avg. loss: 4.677


 53%|█████▎    | 26687/50000 [40:02<32:38, 11.90it/s]

Epochs: 26685 | epoch avg. loss: 0.017 | test avg. loss: 4.651
Epochs: 26686 | epoch avg. loss: 0.009 | test avg. loss: 4.635
Epochs: 26687 | epoch avg. loss: 0.015 | test avg. loss: 4.688


 53%|█████▎    | 26691/50000 [40:02<33:05, 11.74it/s]

Epochs: 26688 | epoch avg. loss: 0.007 | test avg. loss: 4.754
Epochs: 26689 | epoch avg. loss: 0.010 | test avg. loss: 4.673
Epochs: 26690 | epoch avg. loss: 0.012 | test avg. loss: 4.637


 53%|█████▎    | 26693/50000 [40:02<33:10, 11.71it/s]

Epochs: 26691 | epoch avg. loss: 0.009 | test avg. loss: 4.747
Epochs: 26692 | epoch avg. loss: 0.024 | test avg. loss: 4.647
Epochs: 26693 | epoch avg. loss: 0.029 | test avg. loss: 4.670


 53%|█████▎    | 26697/50000 [40:03<31:14, 12.43it/s]

Epochs: 26694 | epoch avg. loss: 0.014 | test avg. loss: 4.811
Epochs: 26695 | epoch avg. loss: 0.026 | test avg. loss: 4.701
Epochs: 26696 | epoch avg. loss: 0.107 | test avg. loss: 4.741


 53%|█████▎    | 26699/50000 [40:03<31:03, 12.50it/s]

Epochs: 26697 | epoch avg. loss: 0.040 | test avg. loss: 4.987
Epochs: 26698 | epoch avg. loss: 0.084 | test avg. loss: 4.705
Epochs: 26699 | epoch avg. loss: 0.123 | test avg. loss: 4.767


 53%|█████▎    | 26703/50000 [40:05<1:45:04,  3.70it/s]

Epochs: 26700 | epoch avg. loss: 0.016 | test avg. loss: 4.892
Epochs: 26701 | epoch avg. loss: 0.019 | test avg. loss: 4.799
Epochs: 26702 | epoch avg. loss: 0.036 | test avg. loss: 4.732


 53%|█████▎    | 26705/50000 [40:05<1:21:55,  4.74it/s]

Epochs: 26703 | epoch avg. loss: 0.017 | test avg. loss: 4.736
Epochs: 26704 | epoch avg. loss: 0.022 | test avg. loss: 4.774
Epochs: 26705 | epoch avg. loss: 0.017 | test avg. loss: 4.851


 53%|█████▎    | 26709/50000 [40:05<54:37,  7.11it/s]  

Epochs: 26706 | epoch avg. loss: 0.008 | test avg. loss: 4.866
Epochs: 26707 | epoch avg. loss: 0.009 | test avg. loss: 4.769
Epochs: 26708 | epoch avg. loss: 0.008 | test avg. loss: 4.645


 53%|█████▎    | 26711/50000 [40:06<46:01,  8.43it/s]

Epochs: 26709 | epoch avg. loss: 0.008 | test avg. loss: 4.662
Epochs: 26710 | epoch avg. loss: 0.008 | test avg. loss: 4.644
Epochs: 26711 | epoch avg. loss: 0.033 | test avg. loss: 4.695


 53%|█████▎    | 26715/50000 [40:06<36:37, 10.60it/s]

Epochs: 26712 | epoch avg. loss: 0.009 | test avg. loss: 4.766
Epochs: 26713 | epoch avg. loss: 0.020 | test avg. loss: 4.682
Epochs: 26714 | epoch avg. loss: 0.007 | test avg. loss: 4.657


 53%|█████▎    | 26717/50000 [40:06<34:19, 11.30it/s]

Epochs: 26715 | epoch avg. loss: 0.005 | test avg. loss: 4.662
Epochs: 26716 | epoch avg. loss: 0.006 | test avg. loss: 4.677
Epochs: 26717 | epoch avg. loss: 0.007 | test avg. loss: 4.839


 53%|█████▎    | 26721/50000 [40:06<30:39, 12.65it/s]

Epochs: 26718 | epoch avg. loss: 0.029 | test avg. loss: 4.747
Epochs: 26719 | epoch avg. loss: 0.023 | test avg. loss: 4.715
Epochs: 26720 | epoch avg. loss: 0.023 | test avg. loss: 4.702


 53%|█████▎    | 26723/50000 [40:07<30:07, 12.88it/s]

Epochs: 26721 | epoch avg. loss: 0.022 | test avg. loss: 4.612
Epochs: 26722 | epoch avg. loss: 0.065 | test avg. loss: 4.721
Epochs: 26723 | epoch avg. loss: 0.063 | test avg. loss: 4.866


 53%|█████▎    | 26727/50000 [40:07<33:28, 11.59it/s]

Epochs: 26724 | epoch avg. loss: 0.062 | test avg. loss: 4.701
Epochs: 26725 | epoch avg. loss: 0.080 | test avg. loss: 4.718
Epochs: 26726 | epoch avg. loss: 0.023 | test avg. loss: 4.730


 53%|█████▎    | 26729/50000 [40:07<34:08, 11.36it/s]

Epochs: 26727 | epoch avg. loss: 0.025 | test avg. loss: 4.591
Epochs: 26728 | epoch avg. loss: 0.047 | test avg. loss: 4.840
Epochs: 26729 | epoch avg. loss: 0.046 | test avg. loss: 4.721


 53%|█████▎    | 26733/50000 [40:07<30:41, 12.64it/s]

Epochs: 26730 | epoch avg. loss: 0.029 | test avg. loss: 4.682
Epochs: 26731 | epoch avg. loss: 0.031 | test avg. loss: 4.793
Epochs: 26732 | epoch avg. loss: 0.045 | test avg. loss: 4.661


 53%|█████▎    | 26735/50000 [40:08<32:03, 12.09it/s]

Epochs: 26733 | epoch avg. loss: 0.016 | test avg. loss: 4.663
Epochs: 26734 | epoch avg. loss: 0.018 | test avg. loss: 4.869
Epochs: 26735 | epoch avg. loss: 0.031 | test avg. loss: 4.834


 53%|█████▎    | 26739/50000 [40:08<33:04, 11.72it/s]

Epochs: 26736 | epoch avg. loss: 0.042 | test avg. loss: 4.839
Epochs: 26737 | epoch avg. loss: 0.023 | test avg. loss: 4.923
Epochs: 26738 | epoch avg. loss: 0.042 | test avg. loss: 4.694


 53%|█████▎    | 26741/50000 [40:08<33:14, 11.66it/s]

Epochs: 26739 | epoch avg. loss: 0.029 | test avg. loss: 4.706
Epochs: 26740 | epoch avg. loss: 0.032 | test avg. loss: 4.718
Epochs: 26741 | epoch avg. loss: 0.028 | test avg. loss: 4.621


 53%|█████▎    | 26745/50000 [40:08<31:43, 12.22it/s]

Epochs: 26742 | epoch avg. loss: 0.010 | test avg. loss: 4.722
Epochs: 26743 | epoch avg. loss: 0.013 | test avg. loss: 4.741
Epochs: 26744 | epoch avg. loss: 0.008 | test avg. loss: 4.729


 53%|█████▎    | 26747/50000 [40:09<34:20, 11.29it/s]

Epochs: 26745 | epoch avg. loss: 0.006 | test avg. loss: 4.711
Epochs: 26746 | epoch avg. loss: 0.004 | test avg. loss: 4.725
Epochs: 26747 | epoch avg. loss: 0.007 | test avg. loss: 4.726


 54%|█████▎    | 26751/50000 [40:09<31:08, 12.44it/s]

Epochs: 26748 | epoch avg. loss: 0.005 | test avg. loss: 4.753
Epochs: 26749 | epoch avg. loss: 0.004 | test avg. loss: 4.785
Epochs: 26750 | epoch avg. loss: 0.004 | test avg. loss: 4.772


 54%|█████▎    | 26753/50000 [40:09<31:14, 12.40it/s]

Epochs: 26751 | epoch avg. loss: 0.008 | test avg. loss: 4.745
Epochs: 26752 | epoch avg. loss: 0.006 | test avg. loss: 4.811
Epochs: 26753 | epoch avg. loss: 0.022 | test avg. loss: 4.633


 54%|█████▎    | 26757/50000 [40:09<30:09, 12.84it/s]

Epochs: 26754 | epoch avg. loss: 0.024 | test avg. loss: 4.620
Epochs: 26755 | epoch avg. loss: 0.016 | test avg. loss: 4.779
Epochs: 26756 | epoch avg. loss: 0.023 | test avg. loss: 4.769


 54%|█████▎    | 26759/50000 [40:10<32:51, 11.79it/s]

Epochs: 26757 | epoch avg. loss: 0.005 | test avg. loss: 4.781
Epochs: 26758 | epoch avg. loss: 0.007 | test avg. loss: 4.810
Epochs: 26759 | epoch avg. loss: 0.006 | test avg. loss: 4.805


 54%|█████▎    | 26763/50000 [40:10<31:50, 12.16it/s]

Epochs: 26760 | epoch avg. loss: 0.006 | test avg. loss: 4.718
Epochs: 26761 | epoch avg. loss: 0.018 | test avg. loss: 4.710
Epochs: 26762 | epoch avg. loss: 0.009 | test avg. loss: 4.762


 54%|█████▎    | 26765/50000 [40:10<30:37, 12.65it/s]

Epochs: 26763 | epoch avg. loss: 0.015 | test avg. loss: 4.693
Epochs: 26764 | epoch avg. loss: 0.014 | test avg. loss: 4.731
Epochs: 26765 | epoch avg. loss: 0.005 | test avg. loss: 4.764


 54%|█████▎    | 26769/50000 [40:10<29:30, 13.12it/s]

Epochs: 26766 | epoch avg. loss: 0.006 | test avg. loss: 4.707
Epochs: 26767 | epoch avg. loss: 0.011 | test avg. loss: 4.755
Epochs: 26768 | epoch avg. loss: 0.008 | test avg. loss: 4.766


 54%|█████▎    | 26771/50000 [40:10<30:57, 12.51it/s]

Epochs: 26769 | epoch avg. loss: 0.007 | test avg. loss: 4.766
Epochs: 26770 | epoch avg. loss: 0.007 | test avg. loss: 4.779
Epochs: 26771 | epoch avg. loss: 0.005 | test avg. loss: 4.743


 54%|█████▎    | 26775/50000 [40:11<31:09, 12.42it/s]

Epochs: 26772 | epoch avg. loss: 0.005 | test avg. loss: 4.752
Epochs: 26773 | epoch avg. loss: 0.005 | test avg. loss: 4.683
Epochs: 26774 | epoch avg. loss: 0.010 | test avg. loss: 4.675


 54%|█████▎    | 26777/50000 [40:11<31:09, 12.42it/s]

Epochs: 26775 | epoch avg. loss: 0.017 | test avg. loss: 4.706
Epochs: 26776 | epoch avg. loss: 0.006 | test avg. loss: 4.801
Epochs: 26777 | epoch avg. loss: 0.015 | test avg. loss: 4.757


 54%|█████▎    | 26781/50000 [40:11<31:56, 12.12it/s]

Epochs: 26778 | epoch avg. loss: 0.006 | test avg. loss: 4.761
Epochs: 26779 | epoch avg. loss: 0.006 | test avg. loss: 4.845
Epochs: 26780 | epoch avg. loss: 0.020 | test avg. loss: 4.842


 54%|█████▎    | 26783/50000 [40:11<31:00, 12.48it/s]

Epochs: 26781 | epoch avg. loss: 0.009 | test avg. loss: 4.734
Epochs: 26782 | epoch avg. loss: 0.017 | test avg. loss: 4.807
Epochs: 26783 | epoch avg. loss: 0.025 | test avg. loss: 4.740


 54%|█████▎    | 26787/50000 [40:12<31:03, 12.46it/s]

Epochs: 26784 | epoch avg. loss: 0.010 | test avg. loss: 4.653
Epochs: 26785 | epoch avg. loss: 0.022 | test avg. loss: 4.751
Epochs: 26786 | epoch avg. loss: 0.031 | test avg. loss: 4.795


 54%|█████▎    | 26789/50000 [40:12<31:03, 12.46it/s]

Epochs: 26787 | epoch avg. loss: 0.027 | test avg. loss: 4.703
Epochs: 26788 | epoch avg. loss: 0.033 | test avg. loss: 4.751
Epochs: 26789 | epoch avg. loss: 0.070 | test avg. loss: 4.792


 54%|█████▎    | 26793/50000 [40:12<30:48, 12.55it/s]

Epochs: 26790 | epoch avg. loss: 0.057 | test avg. loss: 4.771
Epochs: 26791 | epoch avg. loss: 0.036 | test avg. loss: 4.707
Epochs: 26792 | epoch avg. loss: 0.131 | test avg. loss: 4.859


 54%|█████▎    | 26795/50000 [40:12<31:01, 12.46it/s]

Epochs: 26793 | epoch avg. loss: 0.048 | test avg. loss: 5.060
Epochs: 26794 | epoch avg. loss: 0.075 | test avg. loss: 4.771
Epochs: 26795 | epoch avg. loss: 0.077 | test avg. loss: 4.773


 54%|█████▎    | 26799/50000 [40:13<31:14, 12.38it/s]

Epochs: 26796 | epoch avg. loss: 0.046 | test avg. loss: 5.031
Epochs: 26797 | epoch avg. loss: 0.094 | test avg. loss: 4.873
Epochs: 26798 | epoch avg. loss: 0.035 | test avg. loss: 4.841


 54%|█████▎    | 26799/50000 [40:13<31:14, 12.38it/s]

Epochs: 26799 | epoch avg. loss: 0.068 | test avg. loss: 4.892


 54%|█████▎    | 26803/50000 [40:15<1:44:19,  3.71it/s]

Epochs: 26800 | epoch avg. loss: 0.035 | test avg. loss: 4.815
Epochs: 26801 | epoch avg. loss: 0.020 | test avg. loss: 4.609
Epochs: 26802 | epoch avg. loss: 0.077 | test avg. loss: 4.550


 54%|█████▎    | 26805/50000 [40:15<1:21:56,  4.72it/s]

Epochs: 26803 | epoch avg. loss: 0.026 | test avg. loss: 4.708
Epochs: 26804 | epoch avg. loss: 0.034 | test avg. loss: 4.643
Epochs: 26805 | epoch avg. loss: 0.077 | test avg. loss: 4.664


 54%|█████▎    | 26809/50000 [40:15<53:56,  7.16it/s]  

Epochs: 26806 | epoch avg. loss: 0.024 | test avg. loss: 4.768
Epochs: 26807 | epoch avg. loss: 0.064 | test avg. loss: 4.561
Epochs: 26808 | epoch avg. loss: 0.033 | test avg. loss: 4.535


 54%|█████▎    | 26811/50000 [40:15<48:00,  8.05it/s]

Epochs: 26809 | epoch avg. loss: 0.028 | test avg. loss: 4.718
Epochs: 26810 | epoch avg. loss: 0.071 | test avg. loss: 4.676


 54%|█████▎    | 26813/50000 [40:16<44:31,  8.68it/s]

Epochs: 26811 | epoch avg. loss: 0.028 | test avg. loss: 4.773
Epochs: 26812 | epoch avg. loss: 0.059 | test avg. loss: 4.979
Epochs: 26813 | epoch avg. loss: 0.042 | test avg. loss: 4.764


 54%|█████▎    | 26817/50000 [40:16<37:51, 10.20it/s]

Epochs: 26814 | epoch avg. loss: 0.126 | test avg. loss: 4.669
Epochs: 26815 | epoch avg. loss: 0.091 | test avg. loss: 4.744
Epochs: 26816 | epoch avg. loss: 0.050 | test avg. loss: 4.714


 54%|█████▎    | 26819/50000 [40:16<36:20, 10.63it/s]

Epochs: 26817 | epoch avg. loss: 0.034 | test avg. loss: 4.726
Epochs: 26818 | epoch avg. loss: 0.066 | test avg. loss: 4.899
Epochs: 26819 | epoch avg. loss: 0.102 | test avg. loss: 4.501


 54%|█████▎    | 26823/50000 [40:16<36:02, 10.72it/s]

Epochs: 26820 | epoch avg. loss: 0.105 | test avg. loss: 4.356
Epochs: 26821 | epoch avg. loss: 0.094 | test avg. loss: 4.625
Epochs: 26822 | epoch avg. loss: 0.245 | test avg. loss: 4.472


 54%|█████▎    | 26825/50000 [40:17<35:25, 10.90it/s]

Epochs: 26823 | epoch avg. loss: 0.069 | test avg. loss: 4.735
Epochs: 26824 | epoch avg. loss: 0.124 | test avg. loss: 4.976
Epochs: 26825 | epoch avg. loss: 0.049 | test avg. loss: 4.797


 54%|█████▎    | 26829/50000 [40:17<32:53, 11.74it/s]

Epochs: 26826 | epoch avg. loss: 0.031 | test avg. loss: 4.616
Epochs: 26827 | epoch avg. loss: 0.034 | test avg. loss: 4.593
Epochs: 26828 | epoch avg. loss: 0.039 | test avg. loss: 4.736


 54%|█████▎    | 26831/50000 [40:17<32:53, 11.74it/s]

Epochs: 26829 | epoch avg. loss: 0.035 | test avg. loss: 4.634
Epochs: 26830 | epoch avg. loss: 0.022 | test avg. loss: 4.492
Epochs: 26831 | epoch avg. loss: 0.014 | test avg. loss: 4.455


 54%|█████▎    | 26835/50000 [40:17<33:31, 11.52it/s]

Epochs: 26832 | epoch avg. loss: 0.030 | test avg. loss: 4.386
Epochs: 26833 | epoch avg. loss: 0.013 | test avg. loss: 4.491
Epochs: 26834 | epoch avg. loss: 0.007 | test avg. loss: 4.642


 54%|█████▎    | 26837/50000 [40:18<32:28, 11.89it/s]

Epochs: 26835 | epoch avg. loss: 0.007 | test avg. loss: 4.684
Epochs: 26836 | epoch avg. loss: 0.012 | test avg. loss: 4.630
Epochs: 26837 | epoch avg. loss: 0.011 | test avg. loss: 4.598


 54%|█████▎    | 26841/50000 [40:18<32:00, 12.06it/s]

Epochs: 26838 | epoch avg. loss: 0.014 | test avg. loss: 4.506
Epochs: 26839 | epoch avg. loss: 0.011 | test avg. loss: 4.505
Epochs: 26840 | epoch avg. loss: 0.005 | test avg. loss: 4.545


 54%|█████▎    | 26843/50000 [40:18<30:30, 12.65it/s]

Epochs: 26841 | epoch avg. loss: 0.007 | test avg. loss: 4.633
Epochs: 26842 | epoch avg. loss: 0.013 | test avg. loss: 4.713
Epochs: 26843 | epoch avg. loss: 0.046 | test avg. loss: 4.552


 54%|█████▎    | 26847/50000 [40:18<31:43, 12.16it/s]

Epochs: 26844 | epoch avg. loss: 0.008 | test avg. loss: 4.510
Epochs: 26845 | epoch avg. loss: 0.009 | test avg. loss: 4.595
Epochs: 26846 | epoch avg. loss: 0.010 | test avg. loss: 4.604


 54%|█████▎    | 26849/50000 [40:19<31:34, 12.22it/s]

Epochs: 26847 | epoch avg. loss: 0.018 | test avg. loss: 4.718
Epochs: 26848 | epoch avg. loss: 0.013 | test avg. loss: 4.874
Epochs: 26849 | epoch avg. loss: 0.037 | test avg. loss: 4.713


 54%|█████▎    | 26853/50000 [40:19<33:16, 11.60it/s]

Epochs: 26850 | epoch avg. loss: 0.025 | test avg. loss: 4.654
Epochs: 26851 | epoch avg. loss: 0.030 | test avg. loss: 4.746
Epochs: 26852 | epoch avg. loss: 0.043 | test avg. loss: 4.659


 54%|█████▎    | 26855/50000 [40:19<33:40, 11.45it/s]

Epochs: 26853 | epoch avg. loss: 0.029 | test avg. loss: 4.691
Epochs: 26854 | epoch avg. loss: 0.030 | test avg. loss: 4.892
Epochs: 26855 | epoch avg. loss: 0.065 | test avg. loss: 4.781


 54%|█████▎    | 26859/50000 [40:20<34:27, 11.19it/s]

Epochs: 26856 | epoch avg. loss: 0.018 | test avg. loss: 4.684
Epochs: 26857 | epoch avg. loss: 0.035 | test avg. loss: 4.903
Epochs: 26858 | epoch avg. loss: 0.060 | test avg. loss: 4.849


 54%|█████▎    | 26861/50000 [40:20<32:58, 11.70it/s]

Epochs: 26859 | epoch avg. loss: 0.026 | test avg. loss: 4.654
Epochs: 26860 | epoch avg. loss: 0.053 | test avg. loss: 4.709
Epochs: 26861 | epoch avg. loss: 0.036 | test avg. loss: 4.831


 54%|█████▎    | 26865/50000 [40:20<33:32, 11.50it/s]

Epochs: 26862 | epoch avg. loss: 0.042 | test avg. loss: 4.590
Epochs: 26863 | epoch avg. loss: 0.086 | test avg. loss: 4.544
Epochs: 26864 | epoch avg. loss: 0.029 | test avg. loss: 4.591


 54%|█████▎    | 26867/50000 [40:20<33:04, 11.66it/s]

Epochs: 26865 | epoch avg. loss: 0.014 | test avg. loss: 4.587
Epochs: 26866 | epoch avg. loss: 0.011 | test avg. loss: 4.575
Epochs: 26867 | epoch avg. loss: 0.028 | test avg. loss: 4.712


 54%|█████▎    | 26871/50000 [40:21<35:19, 10.91it/s]

Epochs: 26868 | epoch avg. loss: 0.067 | test avg. loss: 4.873
Epochs: 26869 | epoch avg. loss: 0.068 | test avg. loss: 4.642
Epochs: 26870 | epoch avg. loss: 0.071 | test avg. loss: 4.629


 54%|█████▎    | 26873/50000 [40:21<35:15, 10.93it/s]

Epochs: 26871 | epoch avg. loss: 0.020 | test avg. loss: 4.716
Epochs: 26872 | epoch avg. loss: 0.033 | test avg. loss: 4.738
Epochs: 26873 | epoch avg. loss: 0.030 | test avg. loss: 4.611


 54%|█████▍    | 26877/50000 [40:21<33:21, 11.55it/s]

Epochs: 26874 | epoch avg. loss: 0.029 | test avg. loss: 4.589
Epochs: 26875 | epoch avg. loss: 0.031 | test avg. loss: 4.737
Epochs: 26876 | epoch avg. loss: 0.061 | test avg. loss: 4.621


 54%|█████▍    | 26879/50000 [40:21<31:26, 12.25it/s]

Epochs: 26877 | epoch avg. loss: 0.018 | test avg. loss: 4.546
Epochs: 26878 | epoch avg. loss: 0.030 | test avg. loss: 4.647
Epochs: 26879 | epoch avg. loss: 0.010 | test avg. loss: 4.730


 54%|█████▍    | 26883/50000 [40:22<30:00, 12.84it/s]

Epochs: 26880 | epoch avg. loss: 0.008 | test avg. loss: 4.773
Epochs: 26881 | epoch avg. loss: 0.010 | test avg. loss: 4.759
Epochs: 26882 | epoch avg. loss: 0.010 | test avg. loss: 4.679


 54%|█████▍    | 26885/50000 [40:22<29:28, 13.07it/s]

Epochs: 26883 | epoch avg. loss: 0.005 | test avg. loss: 4.591
Epochs: 26884 | epoch avg. loss: 0.008 | test avg. loss: 4.622
Epochs: 26885 | epoch avg. loss: 0.013 | test avg. loss: 4.729


 54%|█████▍    | 26889/50000 [40:22<29:13, 13.18it/s]

Epochs: 26886 | epoch avg. loss: 0.018 | test avg. loss: 4.687
Epochs: 26887 | epoch avg. loss: 0.009 | test avg. loss: 4.696
Epochs: 26888 | epoch avg. loss: 0.006 | test avg. loss: 4.705


 54%|█████▍    | 26891/50000 [40:22<29:24, 13.10it/s]

Epochs: 26889 | epoch avg. loss: 0.006 | test avg. loss: 4.623
Epochs: 26890 | epoch avg. loss: 0.006 | test avg. loss: 4.602
Epochs: 26891 | epoch avg. loss: 0.005 | test avg. loss: 4.623


 54%|█████▍    | 26895/50000 [40:22<30:19, 12.70it/s]

Epochs: 26892 | epoch avg. loss: 0.006 | test avg. loss: 4.599
Epochs: 26893 | epoch avg. loss: 0.007 | test avg. loss: 4.641
Epochs: 26894 | epoch avg. loss: 0.010 | test avg. loss: 4.672


 54%|█████▍    | 26897/50000 [40:23<29:51, 12.89it/s]

Epochs: 26895 | epoch avg. loss: 0.009 | test avg. loss: 4.677
Epochs: 26896 | epoch avg. loss: 0.013 | test avg. loss: 4.622
Epochs: 26897 | epoch avg. loss: 0.008 | test avg. loss: 4.544


 54%|█████▍    | 26899/50000 [40:23<30:06, 12.79it/s]

Epochs: 26898 | epoch avg. loss: 0.026 | test avg. loss: 4.593
Epochs: 26899 | epoch avg. loss: 0.008 | test avg. loss: 4.700


 54%|█████▍    | 26903/50000 [40:25<1:41:17,  3.80it/s]

Epochs: 26900 | epoch avg. loss: 0.023 | test avg. loss: 4.684
Epochs: 26901 | epoch avg. loss: 0.007 | test avg. loss: 4.637
Epochs: 26902 | epoch avg. loss: 0.006 | test avg. loss: 4.599


 54%|█████▍    | 26905/50000 [40:25<1:19:46,  4.83it/s]

Epochs: 26903 | epoch avg. loss: 0.010 | test avg. loss: 4.599
Epochs: 26904 | epoch avg. loss: 0.013 | test avg. loss: 4.568
Epochs: 26905 | epoch avg. loss: 0.005 | test avg. loss: 4.587


 54%|█████▍    | 26909/50000 [40:25<56:11,  6.85it/s]  

Epochs: 26906 | epoch avg. loss: 0.005 | test avg. loss: 4.657
Epochs: 26907 | epoch avg. loss: 0.006 | test avg. loss: 4.692
Epochs: 26908 | epoch avg. loss: 0.014 | test avg. loss: 4.632


 54%|█████▍    | 26911/50000 [40:26<50:04,  7.68it/s]

Epochs: 26909 | epoch avg. loss: 0.005 | test avg. loss: 4.554
Epochs: 26910 | epoch avg. loss: 0.009 | test avg. loss: 4.631
Epochs: 26911 | epoch avg. loss: 0.012 | test avg. loss: 4.690


 54%|█████▍    | 26915/50000 [40:26<42:11,  9.12it/s]

Epochs: 26912 | epoch avg. loss: 0.010 | test avg. loss: 4.652
Epochs: 26913 | epoch avg. loss: 0.013 | test avg. loss: 4.674
Epochs: 26914 | epoch avg. loss: 0.022 | test avg. loss: 4.750


 54%|█████▍    | 26917/50000 [40:26<40:07,  9.59it/s]

Epochs: 26915 | epoch avg. loss: 0.021 | test avg. loss: 4.754
Epochs: 26916 | epoch avg. loss: 0.020 | test avg. loss: 4.597
Epochs: 26917 | epoch avg. loss: 0.042 | test avg. loss: 4.634


 54%|█████▍    | 26921/50000 [40:26<37:27, 10.27it/s]

Epochs: 26918 | epoch avg. loss: 0.021 | test avg. loss: 4.741
Epochs: 26919 | epoch avg. loss: 0.027 | test avg. loss: 4.584
Epochs: 26920 | epoch avg. loss: 0.074 | test avg. loss: 4.586


 54%|█████▍    | 26923/50000 [40:27<35:59, 10.69it/s]

Epochs: 26921 | epoch avg. loss: 0.027 | test avg. loss: 4.819
Epochs: 26922 | epoch avg. loss: 0.104 | test avg. loss: 4.624
Epochs: 26923 | epoch avg. loss: 0.040 | test avg. loss: 4.594


 54%|█████▍    | 26927/50000 [40:27<33:46, 11.38it/s]

Epochs: 26924 | epoch avg. loss: 0.056 | test avg. loss: 4.650
Epochs: 26925 | epoch avg. loss: 0.063 | test avg. loss: 4.709
Epochs: 26926 | epoch avg. loss: 0.040 | test avg. loss: 4.621


 54%|█████▍    | 26929/50000 [40:27<32:57, 11.67it/s]

Epochs: 26927 | epoch avg. loss: 0.129 | test avg. loss: 4.551
Epochs: 26928 | epoch avg. loss: 0.037 | test avg. loss: 4.726
Epochs: 26929 | epoch avg. loss: 0.149 | test avg. loss: 4.405


 54%|█████▍    | 26933/50000 [40:27<32:53, 11.69it/s]

Epochs: 26930 | epoch avg. loss: 0.069 | test avg. loss: 4.537
Epochs: 26931 | epoch avg. loss: 0.105 | test avg. loss: 4.844
Epochs: 26932 | epoch avg. loss: 0.119 | test avg. loss: 4.939


 54%|█████▍    | 26935/50000 [40:28<32:29, 11.83it/s]

Epochs: 26933 | epoch avg. loss: 0.055 | test avg. loss: 4.798
Epochs: 26934 | epoch avg. loss: 0.105 | test avg. loss: 4.838
Epochs: 26935 | epoch avg. loss: 0.039 | test avg. loss: 5.160




Epochs: 26936 | epoch avg. loss: 0.110 | test avg. loss: 4.702
Epochs: 26937 | epoch avg. loss: 0.232 | test avg. loss: 4.617
Epochs: 26938 | epoch avg. loss: 0.105 | test avg. loss: 4.759


 54%|█████▍    | 26941/50000 [40:28<29:32, 13.01it/s]

Epochs: 26939 | epoch avg. loss: 0.096 | test avg. loss: 4.651
Epochs: 26940 | epoch avg. loss: 0.047 | test avg. loss: 4.641
Epochs: 26941 | epoch avg. loss: 0.072 | test avg. loss: 4.961


 54%|█████▍    | 26945/50000 [40:28<29:35, 12.98it/s]

Epochs: 26942 | epoch avg. loss: 0.202 | test avg. loss: 4.657
Epochs: 26943 | epoch avg. loss: 0.053 | test avg. loss: 4.676
Epochs: 26944 | epoch avg. loss: 0.280 | test avg. loss: 4.737


 54%|█████▍    | 26947/50000 [40:29<30:09, 12.74it/s]

Epochs: 26945 | epoch avg. loss: 0.089 | test avg. loss: 5.464
Epochs: 26946 | epoch avg. loss: 0.311 | test avg. loss: 4.749
Epochs: 26947 | epoch avg. loss: 0.302 | test avg. loss: 4.853


 54%|█████▍    | 26951/50000 [40:29<29:47, 12.89it/s]

Epochs: 26948 | epoch avg. loss: 0.209 | test avg. loss: 5.110
Epochs: 26949 | epoch avg. loss: 0.300 | test avg. loss: 4.892
Epochs: 26950 | epoch avg. loss: 0.124 | test avg. loss: 4.814


 54%|█████▍    | 26953/50000 [40:29<28:59, 13.25it/s]

Epochs: 26951 | epoch avg. loss: 0.180 | test avg. loss: 4.888
Epochs: 26952 | epoch avg. loss: 0.058 | test avg. loss: 5.107
Epochs: 26953 | epoch avg. loss: 0.103 | test avg. loss: 5.027


 54%|█████▍    | 26957/50000 [40:29<28:17, 13.57it/s]

Epochs: 26954 | epoch avg. loss: 0.050 | test avg. loss: 4.713
Epochs: 26955 | epoch avg. loss: 0.064 | test avg. loss: 4.590
Epochs: 26956 | epoch avg. loss: 0.054 | test avg. loss: 4.704


 54%|█████▍    | 26959/50000 [40:29<28:45, 13.36it/s]

Epochs: 26957 | epoch avg. loss: 0.087 | test avg. loss: 4.613
Epochs: 26958 | epoch avg. loss: 0.048 | test avg. loss: 4.429
Epochs: 26959 | epoch avg. loss: 0.038 | test avg. loss: 4.472


 54%|█████▍    | 26963/50000 [40:30<28:35, 13.43it/s]

Epochs: 26960 | epoch avg. loss: 0.021 | test avg. loss: 4.665
Epochs: 26961 | epoch avg. loss: 0.033 | test avg. loss: 4.574
Epochs: 26962 | epoch avg. loss: 0.077 | test avg. loss: 4.567


 54%|█████▍    | 26967/50000 [40:30<27:38, 13.89it/s]

Epochs: 26963 | epoch avg. loss: 0.020 | test avg. loss: 4.621
Epochs: 26964 | epoch avg. loss: 0.016 | test avg. loss: 4.544
Epochs: 26965 | epoch avg. loss: 0.070 | test avg. loss: 4.552
Epochs: 26966 | epoch avg. loss: 0.019 | test avg. loss: 4.683


 54%|█████▍    | 26969/50000 [40:30<28:33, 13.44it/s]

Epochs: 26967 | epoch avg. loss: 0.048 | test avg. loss: 4.569
Epochs: 26968 | epoch avg. loss: 0.018 | test avg. loss: 4.551
Epochs: 26969 | epoch avg. loss: 0.031 | test avg. loss: 4.616


 54%|█████▍    | 26973/50000 [40:30<28:52, 13.29it/s]

Epochs: 26970 | epoch avg. loss: 0.016 | test avg. loss: 4.547
Epochs: 26971 | epoch avg. loss: 0.012 | test avg. loss: 4.450
Epochs: 26972 | epoch avg. loss: 0.063 | test avg. loss: 4.494


 54%|█████▍    | 26975/50000 [40:31<29:12, 13.14it/s]

Epochs: 26973 | epoch avg. loss: 0.089 | test avg. loss: 4.554
Epochs: 26974 | epoch avg. loss: 0.050 | test avg. loss: 4.722
Epochs: 26975 | epoch avg. loss: 0.137 | test avg. loss: 5.053


 54%|█████▍    | 26979/50000 [40:31<28:50, 13.30it/s]

Epochs: 26976 | epoch avg. loss: 0.225 | test avg. loss: 4.870
Epochs: 26977 | epoch avg. loss: 0.105 | test avg. loss: 4.684
Epochs: 26978 | epoch avg. loss: 0.366 | test avg. loss: 4.503


 54%|█████▍    | 26981/50000 [40:31<29:02, 13.21it/s]

Epochs: 26979 | epoch avg. loss: 0.163 | test avg. loss: 4.793
Epochs: 26980 | epoch avg. loss: 0.205 | test avg. loss: 4.430
Epochs: 26981 | epoch avg. loss: 0.326 | test avg. loss: 4.500


 54%|█████▍    | 26985/50000 [40:31<29:17, 13.10it/s]

Epochs: 26982 | epoch avg. loss: 0.067 | test avg. loss: 5.084
Epochs: 26983 | epoch avg. loss: 0.169 | test avg. loss: 4.847
Epochs: 26984 | epoch avg. loss: 0.406 | test avg. loss: 4.843


 54%|█████▍    | 26987/50000 [40:32<31:29, 12.18it/s]

Epochs: 26985 | epoch avg. loss: 0.326 | test avg. loss: 5.870
Epochs: 26986 | epoch avg. loss: 0.720 | test avg. loss: 4.957
Epochs: 26987 | epoch avg. loss: 0.769 | test avg. loss: 5.420


 54%|█████▍    | 26991/50000 [40:32<29:58, 12.79it/s]

Epochs: 26988 | epoch avg. loss: 1.480 | test avg. loss: 5.986
Epochs: 26989 | epoch avg. loss: 0.864 | test avg. loss: 6.402
Epochs: 26990 | epoch avg. loss: 1.788 | test avg. loss: 5.888


 54%|█████▍    | 26993/50000 [40:32<30:26, 12.60it/s]

Epochs: 26991 | epoch avg. loss: 1.263 | test avg. loss: 4.672
Epochs: 26992 | epoch avg. loss: 0.674 | test avg. loss: 5.054
Epochs: 26993 | epoch avg. loss: 0.913 | test avg. loss: 9.087


 54%|█████▍    | 26997/50000 [40:32<28:22, 13.51it/s]

Epochs: 26994 | epoch avg. loss: 3.004 | test avg. loss: 8.376
Epochs: 26995 | epoch avg. loss: 3.127 | test avg. loss: 6.334
Epochs: 26996 | epoch avg. loss: 1.666 | test avg. loss: 5.608


 54%|█████▍    | 26999/50000 [40:33<29:04, 13.19it/s]

Epochs: 26997 | epoch avg. loss: 1.285 | test avg. loss: 5.556
Epochs: 26998 | epoch avg. loss: 0.441 | test avg. loss: 5.957
Epochs: 26999 | epoch avg. loss: 1.347 | test avg. loss: 5.703


 54%|█████▍    | 27003/50000 [40:34<1:34:18,  4.06it/s]

Epochs: 27000 | epoch avg. loss: 0.771 | test avg. loss: 5.110
Epochs: 27001 | epoch avg. loss: 0.576 | test avg. loss: 4.658
Epochs: 27002 | epoch avg. loss: 0.450 | test avg. loss: 4.839


 54%|█████▍    | 27005/50000 [40:35<1:14:40,  5.13it/s]

Epochs: 27003 | epoch avg. loss: 0.390 | test avg. loss: 4.879
Epochs: 27004 | epoch avg. loss: 0.324 | test avg. loss: 5.295
Epochs: 27005 | epoch avg. loss: 0.287 | test avg. loss: 5.428


 54%|█████▍    | 27009/50000 [40:35<50:19,  7.61it/s]  

Epochs: 27006 | epoch avg. loss: 0.558 | test avg. loss: 6.665
Epochs: 27007 | epoch avg. loss: 0.791 | test avg. loss: 5.454
Epochs: 27008 | epoch avg. loss: 0.877 | test avg. loss: 5.264


 54%|█████▍    | 27011/50000 [40:35<42:43,  8.97it/s]

Epochs: 27009 | epoch avg. loss: 0.490 | test avg. loss: 4.580
Epochs: 27010 | epoch avg. loss: 0.413 | test avg. loss: 4.840
Epochs: 27011 | epoch avg. loss: 0.321 | test avg. loss: 5.293


 54%|█████▍    | 27015/50000 [40:35<36:16, 10.56it/s]

Epochs: 27012 | epoch avg. loss: 0.138 | test avg. loss: 5.424
Epochs: 27013 | epoch avg. loss: 0.108 | test avg. loss: 5.323
Epochs: 27014 | epoch avg. loss: 0.101 | test avg. loss: 4.841


 54%|█████▍    | 27017/50000 [40:36<35:26, 10.81it/s]

Epochs: 27015 | epoch avg. loss: 0.120 | test avg. loss: 4.827
Epochs: 27016 | epoch avg. loss: 0.130 | test avg. loss: 4.547
Epochs: 27017 | epoch avg. loss: 0.165 | test avg. loss: 4.864


 54%|█████▍    | 27021/50000 [40:36<35:10, 10.89it/s]

Epochs: 27018 | epoch avg. loss: 0.211 | test avg. loss: 4.607
Epochs: 27019 | epoch avg. loss: 0.172 | test avg. loss: 4.818
Epochs: 27020 | epoch avg. loss: 0.140 | test avg. loss: 4.579


 54%|█████▍    | 27023/50000 [40:36<32:38, 11.73it/s]

Epochs: 27021 | epoch avg. loss: 0.269 | test avg. loss: 4.846
Epochs: 27022 | epoch avg. loss: 0.337 | test avg. loss: 4.516
Epochs: 27023 | epoch avg. loss: 0.196 | test avg. loss: 5.230


 54%|█████▍    | 27027/50000 [40:36<29:39, 12.91it/s]

Epochs: 27024 | epoch avg. loss: 0.320 | test avg. loss: 5.059
Epochs: 27025 | epoch avg. loss: 0.218 | test avg. loss: 5.440
Epochs: 27026 | epoch avg. loss: 0.198 | test avg. loss: 5.623


 54%|█████▍    | 27029/50000 [40:36<31:36, 12.11it/s]

Epochs: 27027 | epoch avg. loss: 0.168 | test avg. loss: 5.135
Epochs: 27028 | epoch avg. loss: 0.153 | test avg. loss: 4.971


 54%|█████▍    | 27031/50000 [40:37<33:23, 11.46it/s]

Epochs: 27029 | epoch avg. loss: 0.150 | test avg. loss: 4.533
Epochs: 27030 | epoch avg. loss: 0.107 | test avg. loss: 4.771
Epochs: 27031 | epoch avg. loss: 0.103 | test avg. loss: 4.587


 54%|█████▍    | 27035/50000 [40:37<34:27, 11.11it/s]

Epochs: 27032 | epoch avg. loss: 0.095 | test avg. loss: 4.803
Epochs: 27033 | epoch avg. loss: 0.090 | test avg. loss: 4.533
Epochs: 27034 | epoch avg. loss: 0.078 | test avg. loss: 4.679


 54%|█████▍    | 27037/50000 [40:37<35:04, 10.91it/s]

Epochs: 27035 | epoch avg. loss: 0.096 | test avg. loss: 4.484
Epochs: 27036 | epoch avg. loss: 0.074 | test avg. loss: 4.616
Epochs: 27037 | epoch avg. loss: 0.015 | test avg. loss: 4.664




Epochs: 27038 | epoch avg. loss: 0.020 | test avg. loss: 4.826
Epochs: 27039 | epoch avg. loss: 0.026 | test avg. loss: 4.700


 54%|█████▍    | 27043/50000 [40:38<35:27, 10.79it/s]

Epochs: 27040 | epoch avg. loss: 0.047 | test avg. loss: 4.666
Epochs: 27041 | epoch avg. loss: 0.036 | test avg. loss: 4.421
Epochs: 27042 | epoch avg. loss: 0.060 | test avg. loss: 4.474


 54%|█████▍    | 27045/50000 [40:38<33:11, 11.53it/s]

Epochs: 27043 | epoch avg. loss: 0.042 | test avg. loss: 4.448
Epochs: 27044 | epoch avg. loss: 0.021 | test avg. loss: 4.478
Epochs: 27045 | epoch avg. loss: 0.026 | test avg. loss: 4.540


 54%|█████▍    | 27049/50000 [40:38<31:28, 12.15it/s]

Epochs: 27046 | epoch avg. loss: 0.041 | test avg. loss: 4.455
Epochs: 27047 | epoch avg. loss: 0.082 | test avg. loss: 4.625
Epochs: 27048 | epoch avg. loss: 0.053 | test avg. loss: 4.463


 54%|█████▍    | 27051/50000 [40:38<30:22, 12.59it/s]

Epochs: 27049 | epoch avg. loss: 0.072 | test avg. loss: 4.684
Epochs: 27050 | epoch avg. loss: 0.059 | test avg. loss: 4.561
Epochs: 27051 | epoch avg. loss: 0.052 | test avg. loss: 4.751


 54%|█████▍    | 27055/50000 [40:39<31:26, 12.16it/s]

Epochs: 27052 | epoch avg. loss: 0.062 | test avg. loss: 4.551
Epochs: 27053 | epoch avg. loss: 0.050 | test avg. loss: 4.696
Epochs: 27054 | epoch avg. loss: 0.067 | test avg. loss: 4.455


 54%|█████▍    | 27057/50000 [40:39<32:27, 11.78it/s]

Epochs: 27055 | epoch avg. loss: 0.034 | test avg. loss: 4.593
Epochs: 27056 | epoch avg. loss: 0.074 | test avg. loss: 4.434
Epochs: 27057 | epoch avg. loss: 0.050 | test avg. loss: 4.600


 54%|█████▍    | 27061/50000 [40:39<32:31, 11.75it/s]

Epochs: 27058 | epoch avg. loss: 0.038 | test avg. loss: 4.562
Epochs: 27059 | epoch avg. loss: 0.061 | test avg. loss: 4.802
Epochs: 27060 | epoch avg. loss: 0.100 | test avg. loss: 4.469


 54%|█████▍    | 27063/50000 [40:39<32:12, 11.87it/s]

Epochs: 27061 | epoch avg. loss: 0.038 | test avg. loss: 4.457
Epochs: 27062 | epoch avg. loss: 0.043 | test avg. loss: 4.413


 54%|█████▍    | 27065/50000 [40:40<34:25, 11.10it/s]

Epochs: 27063 | epoch avg. loss: 0.048 | test avg. loss: 4.531
Epochs: 27064 | epoch avg. loss: 0.035 | test avg. loss: 4.676
Epochs: 27065 | epoch avg. loss: 0.038 | test avg. loss: 4.516


 54%|█████▍    | 27069/50000 [40:40<31:55, 11.97it/s]

Epochs: 27066 | epoch avg. loss: 0.037 | test avg. loss: 4.552
Epochs: 27067 | epoch avg. loss: 0.029 | test avg. loss: 4.359
Epochs: 27068 | epoch avg. loss: 0.031 | test avg. loss: 4.447


 54%|█████▍    | 27071/50000 [40:40<30:12, 12.65it/s]

Epochs: 27069 | epoch avg. loss: 0.031 | test avg. loss: 4.379
Epochs: 27070 | epoch avg. loss: 0.024 | test avg. loss: 4.549
Epochs: 27071 | epoch avg. loss: 0.023 | test avg. loss: 4.519




Epochs: 27072 | epoch avg. loss: 0.024 | test avg. loss: 4.659
Epochs: 27073 | epoch avg. loss: 0.029 | test avg. loss: 4.510


 54%|█████▍    | 27077/50000 [40:41<32:58, 11.58it/s]

Epochs: 27074 | epoch avg. loss: 0.030 | test avg. loss: 4.529
Epochs: 27075 | epoch avg. loss: 0.019 | test avg. loss: 4.373
Epochs: 27076 | epoch avg. loss: 0.026 | test avg. loss: 4.473


 54%|█████▍    | 27079/50000 [40:41<33:33, 11.38it/s]

Epochs: 27077 | epoch avg. loss: 0.033 | test avg. loss: 4.451
Epochs: 27078 | epoch avg. loss: 0.050 | test avg. loss: 4.627
Epochs: 27079 | epoch avg. loss: 0.035 | test avg. loss: 4.594


 54%|█████▍    | 27083/50000 [40:41<34:14, 11.15it/s]

Epochs: 27080 | epoch avg. loss: 0.021 | test avg. loss: 4.485
Epochs: 27081 | epoch avg. loss: 0.014 | test avg. loss: 4.425
Epochs: 27082 | epoch avg. loss: 0.016 | test avg. loss: 4.365


 54%|█████▍    | 27085/50000 [40:41<35:40, 10.71it/s]

Epochs: 27083 | epoch avg. loss: 0.023 | test avg. loss: 4.492
Epochs: 27084 | epoch avg. loss: 0.020 | test avg. loss: 4.461
Epochs: 27085 | epoch avg. loss: 0.027 | test avg. loss: 4.554


 54%|█████▍    | 27089/50000 [40:42<33:23, 11.44it/s]

Epochs: 27086 | epoch avg. loss: 0.011 | test avg. loss: 4.481
Epochs: 27087 | epoch avg. loss: 0.017 | test avg. loss: 4.497
Epochs: 27088 | epoch avg. loss: 0.014 | test avg. loss: 4.479


 54%|█████▍    | 27093/50000 [40:42<29:51, 12.79it/s]

Epochs: 27089 | epoch avg. loss: 0.014 | test avg. loss: 4.551
Epochs: 27090 | epoch avg. loss: 0.017 | test avg. loss: 4.524
Epochs: 27091 | epoch avg. loss: 0.012 | test avg. loss: 4.475
Epochs: 27092 | epoch avg. loss: 0.009 | test avg. loss: 4.431


 54%|█████▍    | 27095/50000 [40:42<29:13, 13.06it/s]

Epochs: 27093 | epoch avg. loss: 0.007 | test avg. loss: 4.390
Epochs: 27094 | epoch avg. loss: 0.008 | test avg. loss: 4.444
Epochs: 27095 | epoch avg. loss: 0.009 | test avg. loss: 4.440


 54%|█████▍    | 27099/50000 [40:42<29:41, 12.85it/s]

Epochs: 27096 | epoch avg. loss: 0.012 | test avg. loss: 4.585
Epochs: 27097 | epoch avg. loss: 0.026 | test avg. loss: 4.475
Epochs: 27098 | epoch avg. loss: 0.029 | test avg. loss: 4.542


 54%|█████▍    | 27099/50000 [40:43<29:41, 12.85it/s]

Epochs: 27099 | epoch avg. loss: 0.013 | test avg. loss: 4.484


 54%|█████▍    | 27103/50000 [40:45<1:48:07,  3.53it/s]

Epochs: 27100 | epoch avg. loss: 0.012 | test avg. loss: 4.491
Epochs: 27101 | epoch avg. loss: 0.007 | test avg. loss: 4.506
Epochs: 27102 | epoch avg. loss: 0.005 | test avg. loss: 4.524


 54%|█████▍    | 27105/50000 [40:45<1:26:16,  4.42it/s]

Epochs: 27103 | epoch avg. loss: 0.006 | test avg. loss: 4.525
Epochs: 27104 | epoch avg. loss: 0.005 | test avg. loss: 4.493
Epochs: 27105 | epoch avg. loss: 0.004 | test avg. loss: 4.455


 54%|█████▍    | 27109/50000 [40:45<1:00:15,  6.33it/s]

Epochs: 27106 | epoch avg. loss: 0.004 | test avg. loss: 4.466
Epochs: 27107 | epoch avg. loss: 0.008 | test avg. loss: 4.418
Epochs: 27108 | epoch avg. loss: 0.006 | test avg. loss: 4.433


 54%|█████▍    | 27111/50000 [40:46<53:38,  7.11it/s]

Epochs: 27109 | epoch avg. loss: 0.007 | test avg. loss: 4.480
Epochs: 27110 | epoch avg. loss: 0.007 | test avg. loss: 4.467
Epochs: 27111 | epoch avg. loss: 0.008 | test avg. loss: 4.493


 54%|█████▍    | 27115/50000 [40:46<44:04,  8.65it/s]

Epochs: 27112 | epoch avg. loss: 0.005 | test avg. loss: 4.453
Epochs: 27113 | epoch avg. loss: 0.007 | test avg. loss: 4.520
Epochs: 27114 | epoch avg. loss: 0.012 | test avg. loss: 4.440


 54%|█████▍    | 27117/50000 [40:46<40:21,  9.45it/s]

Epochs: 27115 | epoch avg. loss: 0.015 | test avg. loss: 4.534
Epochs: 27116 | epoch avg. loss: 0.018 | test avg. loss: 4.456
Epochs: 27117 | epoch avg. loss: 0.011 | test avg. loss: 4.459


 54%|█████▍    | 27121/50000 [40:46<34:28, 11.06it/s]

Epochs: 27118 | epoch avg. loss: 0.008 | test avg. loss: 4.445
Epochs: 27119 | epoch avg. loss: 0.007 | test avg. loss: 4.412
Epochs: 27120 | epoch avg. loss: 0.006 | test avg. loss: 4.439
Epochs: 27121 | epoch avg. loss: 0.006 | test avg. loss: 4.421


 54%|█████▍    | 27125/50000 [40:47<31:08, 12.24it/s]

Epochs: 27122 | epoch avg. loss: 0.010 | test avg. loss: 4.514
Epochs: 27123 | epoch avg. loss: 0.011 | test avg. loss: 4.446
Epochs: 27124 | epoch avg. loss: 0.020 | test avg. loss: 4.483




Epochs: 27125 | epoch avg. loss: 0.008 | test avg. loss: 4.439
Epochs: 27126 | epoch avg. loss: 0.006 | test avg. loss: 4.507
Epochs: 27127 | epoch avg. loss: 0.013 | test avg. loss: 4.490


 54%|█████▍    | 27131/50000 [40:47<28:13, 13.51it/s]

Epochs: 27128 | epoch avg. loss: 0.008 | test avg. loss: 4.520
Epochs: 27129 | epoch avg. loss: 0.014 | test avg. loss: 4.499
Epochs: 27130 | epoch avg. loss: 0.016 | test avg. loss: 4.444


 54%|█████▍    | 27133/50000 [40:47<27:55, 13.65it/s]

Epochs: 27131 | epoch avg. loss: 0.026 | test avg. loss: 4.550
Epochs: 27132 | epoch avg. loss: 0.022 | test avg. loss: 4.442
Epochs: 27133 | epoch avg. loss: 0.023 | test avg. loss: 4.476


 54%|█████▍    | 27137/50000 [40:47<28:54, 13.18it/s]

Epochs: 27134 | epoch avg. loss: 0.018 | test avg. loss: 4.396
Epochs: 27135 | epoch avg. loss: 0.010 | test avg. loss: 4.379
Epochs: 27136 | epoch avg. loss: 0.010 | test avg. loss: 4.407


 54%|█████▍    | 27139/50000 [40:48<30:14, 12.60it/s]

Epochs: 27137 | epoch avg. loss: 0.008 | test avg. loss: 4.443
Epochs: 27138 | epoch avg. loss: 0.008 | test avg. loss: 4.500
Epochs: 27139 | epoch avg. loss: 0.006 | test avg. loss: 4.460


 54%|█████▍    | 27143/50000 [40:48<29:29, 12.92it/s]

Epochs: 27140 | epoch avg. loss: 0.007 | test avg. loss: 4.476
Epochs: 27141 | epoch avg. loss: 0.010 | test avg. loss: 4.403
Epochs: 27142 | epoch avg. loss: 0.006 | test avg. loss: 4.446


 54%|█████▍    | 27145/50000 [40:48<30:17, 12.58it/s]

Epochs: 27143 | epoch avg. loss: 0.008 | test avg. loss: 4.446
Epochs: 27144 | epoch avg. loss: 0.004 | test avg. loss: 4.521
Epochs: 27145 | epoch avg. loss: 0.009 | test avg. loss: 4.469


 54%|█████▍    | 27149/50000 [40:48<30:11, 12.61it/s]

Epochs: 27146 | epoch avg. loss: 0.006 | test avg. loss: 4.506
Epochs: 27147 | epoch avg. loss: 0.009 | test avg. loss: 4.443
Epochs: 27148 | epoch avg. loss: 0.012 | test avg. loss: 4.471


 54%|█████▍    | 27151/50000 [40:49<29:03, 13.11it/s]

Epochs: 27149 | epoch avg. loss: 0.009 | test avg. loss: 4.464
Epochs: 27150 | epoch avg. loss: 0.010 | test avg. loss: 4.427
Epochs: 27151 | epoch avg. loss: 0.015 | test avg. loss: 4.538
Epochs: 27152 | epoch avg. loss: 0.018 | test avg. loss: 4.471




Epochs: 27153 | epoch avg. loss: 0.019 | test avg. loss: 4.530
Epochs: 27154 | epoch avg. loss: 0.014 | test avg. loss: 4.447
Epochs: 27155 | epoch avg. loss: 0.012 | test avg. loss: 4.446
Epochs: 27156 | epoch avg. loss: 0.011 | test avg. loss: 4.434

 54%|█████▍    | 27159/50000 [40:49<26:28, 14.38it/s]


Epochs: 27157 | epoch avg. loss: 0.010 | test avg. loss: 4.422
Epochs: 27158 | epoch avg. loss: 0.015 | test avg. loss: 4.522
Epochs: 27159 | epoch avg. loss: 0.025 | test avg. loss: 4.394


 54%|█████▍    | 27163/50000 [40:49<26:29, 14.36it/s]

Epochs: 27160 | epoch avg. loss: 0.023 | test avg. loss: 4.462
Epochs: 27161 | epoch avg. loss: 0.010 | test avg. loss: 4.486
Epochs: 27162 | epoch avg. loss: 0.006 | test avg. loss: 4.489


 54%|█████▍    | 27165/50000 [40:50<27:06, 14.04it/s]

Epochs: 27163 | epoch avg. loss: 0.007 | test avg. loss: 4.523
Epochs: 27164 | epoch avg. loss: 0.010 | test avg. loss: 4.430
Epochs: 27165 | epoch avg. loss: 0.031 | test avg. loss: 4.476


 54%|█████▍    | 27169/50000 [40:50<27:11, 14.00it/s]

Epochs: 27166 | epoch avg. loss: 0.009 | test avg. loss: 4.445
Epochs: 27167 | epoch avg. loss: 0.008 | test avg. loss: 4.483
Epochs: 27168 | epoch avg. loss: 0.013 | test avg. loss: 4.472


 54%|█████▍    | 27171/50000 [40:50<27:38, 13.76it/s]

Epochs: 27169 | epoch avg. loss: 0.014 | test avg. loss: 4.451
Epochs: 27170 | epoch avg. loss: 0.022 | test avg. loss: 4.563
Epochs: 27171 | epoch avg. loss: 0.017 | test avg. loss: 4.485


 54%|█████▍    | 27175/50000 [40:50<26:04, 14.59it/s]

Epochs: 27172 | epoch avg. loss: 0.032 | test avg. loss: 4.549
Epochs: 27173 | epoch avg. loss: 0.024 | test avg. loss: 4.432
Epochs: 27174 | epoch avg. loss: 0.009 | test avg. loss: 4.418
Epochs: 27175 | epoch avg. loss: 0.013 | test avg. loss: 4.460


 54%|█████▍    | 27179/50000 [40:51<28:33, 13.32it/s]

Epochs: 27176 | epoch avg. loss: 0.013 | test avg. loss: 4.440
Epochs: 27177 | epoch avg. loss: 0.016 | test avg. loss: 4.512
Epochs: 27178 | epoch avg. loss: 0.008 | test avg. loss: 4.486


 54%|█████▍    | 27183/50000 [40:51<27:12, 13.97it/s]

Epochs: 27179 | epoch avg. loss: 0.005 | test avg. loss: 4.467
Epochs: 27180 | epoch avg. loss: 0.006 | test avg. loss: 4.453
Epochs: 27181 | epoch avg. loss: 0.006 | test avg. loss: 4.438
Epochs: 27182 | epoch avg. loss: 0.007 | test avg. loss: 4.495


 54%|█████▍    | 27185/50000 [40:51<26:52, 14.15it/s]

Epochs: 27183 | epoch avg. loss: 0.005 | test avg. loss: 4.472
Epochs: 27184 | epoch avg. loss: 0.011 | test avg. loss: 4.584
Epochs: 27185 | epoch avg. loss: 0.022 | test avg. loss: 4.451


 54%|█████▍    | 27189/50000 [40:51<26:44, 14.21it/s]

Epochs: 27186 | epoch avg. loss: 0.029 | test avg. loss: 4.526
Epochs: 27187 | epoch avg. loss: 0.030 | test avg. loss: 4.495
Epochs: 27188 | epoch avg. loss: 0.011 | test avg. loss: 4.535


 54%|█████▍    | 27191/50000 [40:51<26:39, 14.26it/s]

Epochs: 27189 | epoch avg. loss: 0.008 | test avg. loss: 4.526
Epochs: 27190 | epoch avg. loss: 0.007 | test avg. loss: 4.527
Epochs: 27191 | epoch avg. loss: 0.006 | test avg. loss: 4.515


 54%|█████▍    | 27195/50000 [40:52<29:44, 12.78it/s]

Epochs: 27192 | epoch avg. loss: 0.007 | test avg. loss: 4.467
Epochs: 27193 | epoch avg. loss: 0.006 | test avg. loss: 4.487
Epochs: 27194 | epoch avg. loss: 0.008 | test avg. loss: 4.439


 54%|█████▍    | 27197/50000 [40:52<29:46, 12.76it/s]

Epochs: 27195 | epoch avg. loss: 0.011 | test avg. loss: 4.513
Epochs: 27196 | epoch avg. loss: 0.012 | test avg. loss: 4.471
Epochs: 27197 | epoch avg. loss: 0.008 | test avg. loss: 4.507


 54%|█████▍    | 27199/50000 [40:52<30:04, 12.64it/s]

Epochs: 27198 | epoch avg. loss: 0.012 | test avg. loss: 4.441
Epochs: 27199 | epoch avg. loss: 0.008 | test avg. loss: 4.391


 54%|█████▍    | 27203/50000 [40:54<1:27:22,  4.35it/s]

Epochs: 27200 | epoch avg. loss: 0.009 | test avg. loss: 4.485
Epochs: 27201 | epoch avg. loss: 0.008 | test avg. loss: 4.482
Epochs: 27202 | epoch avg. loss: 0.011 | test avg. loss: 4.532
Epochs: 27203 | epoch avg. loss: 0.007 | test avg. loss: 4.500


 54%|█████▍    | 27207/50000 [40:54<55:07,  6.89it/s]

Epochs: 27204 | epoch avg. loss: 0.004 | test avg. loss: 4.486
Epochs: 27205 | epoch avg. loss: 0.009 | test avg. loss: 4.489
Epochs: 27206 | epoch avg. loss: 0.007 | test avg. loss: 4.464
Epochs: 27207 | epoch avg. loss: 0.005 | test avg. loss: 4.526


 54%|█████▍    | 27211/50000 [40:54<39:22,  9.65it/s]

Epochs: 27208 | epoch avg. loss: 0.011 | test avg. loss: 4.485
Epochs: 27209 | epoch avg. loss: 0.005 | test avg. loss: 4.528
Epochs: 27210 | epoch avg. loss: 0.008 | test avg. loss: 4.488


 54%|█████▍    | 27213/50000 [40:55<37:08, 10.22it/s]

Epochs: 27211 | epoch avg. loss: 0.004 | test avg. loss: 4.511
Epochs: 27212 | epoch avg. loss: 0.012 | test avg. loss: 4.524
Epochs: 27213 | epoch avg. loss: 0.008 | test avg. loss: 4.480


                                                     

Epochs: 27214 | epoch avg. loss: 0.017 | test avg. loss: 4.521
Epochs: 27215 | epoch avg. loss: 0.009 | test avg. loss: 4.474
Epochs: 27216 | epoch avg. loss: 0.007 | test avg. loss: 4.500


 54%|█████▍    | 27221/50000 [40:55<28:29, 13.32it/s]

Epochs: 27217 | epoch avg. loss: 0.005 | test avg. loss: 4.458
Epochs: 27218 | epoch avg. loss: 0.006 | test avg. loss: 4.449
Epochs: 27219 | epoch avg. loss: 0.004 | test avg. loss: 4.461
Epochs: 27220 | epoch avg. loss: 0.005 | test avg. loss: 4.448


 54%|█████▍    | 27225/50000 [40:55<26:41, 14.22it/s]

Epochs: 27221 | epoch avg. loss: 0.008 | test avg. loss: 4.510
Epochs: 27222 | epoch avg. loss: 0.008 | test avg. loss: 4.421
Epochs: 27223 | epoch avg. loss: 0.016 | test avg. loss: 4.451
Epochs: 27224 | epoch avg. loss: 0.004 | test avg. loss: 4.434


 54%|█████▍    | 27227/50000 [40:56<26:57, 14.08it/s]

Epochs: 27225 | epoch avg. loss: 0.007 | test avg. loss: 4.447
Epochs: 27226 | epoch avg. loss: 0.005 | test avg. loss: 4.478
Epochs: 27227 | epoch avg. loss: 0.006 | test avg. loss: 4.439


 54%|█████▍    | 27231/50000 [40:56<27:40, 13.71it/s]

Epochs: 27228 | epoch avg. loss: 0.009 | test avg. loss: 4.529
Epochs: 27229 | epoch avg. loss: 0.018 | test avg. loss: 4.433
Epochs: 27230 | epoch avg. loss: 0.014 | test avg. loss: 4.426


                                                     

Epochs: 27231 | epoch avg. loss: 0.009 | test avg. loss: 4.527
Epochs: 27232 | epoch avg. loss: 0.016 | test avg. loss: 4.452
Epochs: 27233 | epoch avg. loss: 0.029 | test avg. loss: 4.534


 54%|█████▍    | 27237/50000 [40:56<28:02, 13.53it/s]

Epochs: 27234 | epoch avg. loss: 0.029 | test avg. loss: 4.424
Epochs: 27235 | epoch avg. loss: 0.012 | test avg. loss: 4.368
Epochs: 27236 | epoch avg. loss: 0.014 | test avg. loss: 4.494


 54%|█████▍    | 27239/50000 [40:56<28:12, 13.44it/s]

Epochs: 27237 | epoch avg. loss: 0.010 | test avg. loss: 4.496
Epochs: 27238 | epoch avg. loss: 0.016 | test avg. loss: 4.546
Epochs: 27239 | epoch avg. loss: 0.008 | test avg. loss: 4.484


 54%|█████▍    | 27243/50000 [40:57<29:59, 12.64it/s]

Epochs: 27240 | epoch avg. loss: 0.006 | test avg. loss: 4.498
Epochs: 27241 | epoch avg. loss: 0.007 | test avg. loss: 4.473
Epochs: 27242 | epoch avg. loss: 0.006 | test avg. loss: 4.505


 54%|█████▍    | 27245/50000 [40:57<30:36, 12.39it/s]

Epochs: 27243 | epoch avg. loss: 0.009 | test avg. loss: 4.561
Epochs: 27244 | epoch avg. loss: 0.011 | test avg. loss: 4.492
Epochs: 27245 | epoch avg. loss: 0.006 | test avg. loss: 4.537


 54%|█████▍    | 27249/50000 [40:57<27:29, 13.79it/s]

Epochs: 27246 | epoch avg. loss: 0.017 | test avg. loss: 4.466
Epochs: 27247 | epoch avg. loss: 0.028 | test avg. loss: 4.496
Epochs: 27248 | epoch avg. loss: 0.012 | test avg. loss: 4.555
Epochs: 27249 | epoch avg. loss: 0.013 | test avg. loss: 4.441


 55%|█████▍    | 27253/50000 [40:57<27:01, 14.03it/s]

Epochs: 27250 | epoch avg. loss: 0.014 | test avg. loss: 4.411
Epochs: 27251 | epoch avg. loss: 0.006 | test avg. loss: 4.433
Epochs: 27252 | epoch avg. loss: 0.007 | test avg. loss: 4.435


 55%|█████▍    | 27255/50000 [40:58<29:04, 13.04it/s]

Epochs: 27253 | epoch avg. loss: 0.012 | test avg. loss: 4.594
Epochs: 27254 | epoch avg. loss: 0.017 | test avg. loss: 4.505
Epochs: 27255 | epoch avg. loss: 0.021 | test avg. loss: 4.525


 55%|█████▍    | 27259/50000 [40:58<29:42, 12.76it/s]

Epochs: 27256 | epoch avg. loss: 0.017 | test avg. loss: 4.471
Epochs: 27257 | epoch avg. loss: 0.013 | test avg. loss: 4.410
Epochs: 27258 | epoch avg. loss: 0.022 | test avg. loss: 4.560


 55%|█████▍    | 27263/50000 [40:58<27:33, 13.75it/s]

Epochs: 27259 | epoch avg. loss: 0.029 | test avg. loss: 4.455
Epochs: 27260 | epoch avg. loss: 0.024 | test avg. loss: 4.509
Epochs: 27261 | epoch avg. loss: 0.019 | test avg. loss: 4.527
Epochs: 27262 | epoch avg. loss: 0.013 | test avg. loss: 4.479


 55%|█████▍    | 27265/50000 [40:58<26:48, 14.13it/s]

Epochs: 27263 | epoch avg. loss: 0.018 | test avg. loss: 4.568
Epochs: 27264 | epoch avg. loss: 0.012 | test avg. loss: 4.565
Epochs: 27265 | epoch avg. loss: 0.006 | test avg. loss: 4.541


 55%|█████▍    | 27269/50000 [40:59<28:56, 13.09it/s]

Epochs: 27266 | epoch avg. loss: 0.006 | test avg. loss: 4.545
Epochs: 27267 | epoch avg. loss: 0.006 | test avg. loss: 4.453
Epochs: 27268 | epoch avg. loss: 0.007 | test avg. loss: 4.476


 55%|█████▍    | 27271/50000 [40:59<29:37, 12.79it/s]

Epochs: 27269 | epoch avg. loss: 0.009 | test avg. loss: 4.446
Epochs: 27270 | epoch avg. loss: 0.015 | test avg. loss: 4.494
Epochs: 27271 | epoch avg. loss: 0.008 | test avg. loss: 4.553


 55%|█████▍    | 27273/50000 [40:59<30:40, 12.35it/s]

Epochs: 27272 | epoch avg. loss: 0.009 | test avg. loss: 4.521
Epochs: 27273 | epoch avg. loss: 0.016 | test avg. loss: 4.538


 55%|█████▍    | 27277/50000 [40:59<31:57, 11.85it/s]

Epochs: 27274 | epoch avg. loss: 0.005 | test avg. loss: 4.459
Epochs: 27275 | epoch avg. loss: 0.006 | test avg. loss: 4.475
Epochs: 27276 | epoch avg. loss: 0.005 | test avg. loss: 4.474


 55%|█████▍    | 27279/50000 [41:00<33:45, 11.22it/s]

Epochs: 27277 | epoch avg. loss: 0.004 | test avg. loss: 4.483
Epochs: 27278 | epoch avg. loss: 0.004 | test avg. loss: 4.477
Epochs: 27279 | epoch avg. loss: 0.004 | test avg. loss: 4.473


 55%|█████▍    | 27283/50000 [41:00<32:05, 11.80it/s]

Epochs: 27280 | epoch avg. loss: 0.004 | test avg. loss: 4.467
Epochs: 27281 | epoch avg. loss: 0.004 | test avg. loss: 4.450
Epochs: 27282 | epoch avg. loss: 0.005 | test avg. loss: 4.479


 55%|█████▍    | 27285/50000 [41:00<30:20, 12.47it/s]

Epochs: 27283 | epoch avg. loss: 0.009 | test avg. loss: 4.427
Epochs: 27284 | epoch avg. loss: 0.015 | test avg. loss: 4.481
Epochs: 27285 | epoch avg. loss: 0.011 | test avg. loss: 4.541


 55%|█████▍    | 27289/50000 [41:00<30:17, 12.50it/s]

Epochs: 27286 | epoch avg. loss: 0.012 | test avg. loss: 4.470
Epochs: 27287 | epoch avg. loss: 0.026 | test avg. loss: 4.522
Epochs: 27288 | epoch avg. loss: 0.020 | test avg. loss: 4.525


 55%|█████▍    | 27291/50000 [41:01<30:26, 12.43it/s]

Epochs: 27289 | epoch avg. loss: 0.010 | test avg. loss: 4.486
Epochs: 27290 | epoch avg. loss: 0.010 | test avg. loss: 4.479
Epochs: 27291 | epoch avg. loss: 0.005 | test avg. loss: 4.419


 55%|█████▍    | 27295/50000 [41:01<29:58, 12.62it/s]

Epochs: 27292 | epoch avg. loss: 0.008 | test avg. loss: 4.431
Epochs: 27293 | epoch avg. loss: 0.008 | test avg. loss: 4.468
Epochs: 27294 | epoch avg. loss: 0.007 | test avg. loss: 4.463
Epochs: 27295 | epoch avg. loss: 0.004 | test avg. loss: 4.498


 55%|█████▍    | 27299/50000 [41:01<27:33, 13.73it/s]

Epochs: 27296 | epoch avg. loss: 0.006 | test avg. loss: 4.449
Epochs: 27297 | epoch avg. loss: 0.011 | test avg. loss: 4.468
Epochs: 27298 | epoch avg. loss: 0.006 | test avg. loss: 4.505


 55%|█████▍    | 27299/50000 [41:01<27:33, 13.73it/s]

Epochs: 27299 | epoch avg. loss: 0.010 | test avg. loss: 4.490


 55%|█████▍    | 27303/50000 [41:03<1:36:51,  3.91it/s]

Epochs: 27300 | epoch avg. loss: 0.004 | test avg. loss: 4.475
Epochs: 27301 | epoch avg. loss: 0.007 | test avg. loss: 4.483
Epochs: 27302 | epoch avg. loss: 0.005 | test avg. loss: 4.508


 55%|█████▍    | 27305/50000 [41:03<1:17:04,  4.91it/s]

Epochs: 27303 | epoch avg. loss: 0.006 | test avg. loss: 4.474
Epochs: 27304 | epoch avg. loss: 0.004 | test avg. loss: 4.475
Epochs: 27305 | epoch avg. loss: 0.005 | test avg. loss: 4.511


 55%|█████▍    | 27309/50000 [41:04<55:23,  6.83it/s]  

Epochs: 27306 | epoch avg. loss: 0.006 | test avg. loss: 4.474
Epochs: 27307 | epoch avg. loss: 0.008 | test avg. loss: 4.487
Epochs: 27308 | epoch avg. loss: 0.007 | test avg. loss: 4.501


 55%|█████▍    | 27311/50000 [41:04<47:14,  8.00it/s]

Epochs: 27309 | epoch avg. loss: 0.006 | test avg. loss: 4.460
Epochs: 27310 | epoch avg. loss: 0.010 | test avg. loss: 4.459
Epochs: 27311 | epoch avg. loss: 0.004 | test avg. loss: 4.481


 55%|█████▍    | 27315/50000 [41:04<36:04, 10.48it/s]

Epochs: 27312 | epoch avg. loss: 0.005 | test avg. loss: 4.457
Epochs: 27313 | epoch avg. loss: 0.005 | test avg. loss: 4.467
Epochs: 27314 | epoch avg. loss: 0.004 | test avg. loss: 4.454
Epochs: 27315 | epoch avg. loss: 0.007 | test avg. loss: 4.445


 55%|█████▍    | 27319/50000 [41:04<30:45, 12.29it/s]

Epochs: 27316 | epoch avg. loss: 0.006 | test avg. loss: 4.493
Epochs: 27317 | epoch avg. loss: 0.008 | test avg. loss: 4.430
Epochs: 27318 | epoch avg. loss: 0.010 | test avg. loss: 4.474


 55%|█████▍    | 27321/50000 [41:05<31:54, 11.84it/s]

Epochs: 27319 | epoch avg. loss: 0.007 | test avg. loss: 4.520
Epochs: 27320 | epoch avg. loss: 0.008 | test avg. loss: 4.489
Epochs: 27321 | epoch avg. loss: 0.004 | test avg. loss: 4.481


 55%|█████▍    | 27325/50000 [41:05<31:08, 12.13it/s]

Epochs: 27322 | epoch avg. loss: 0.004 | test avg. loss: 4.489
Epochs: 27323 | epoch avg. loss: 0.005 | test avg. loss: 4.447
Epochs: 27324 | epoch avg. loss: 0.005 | test avg. loss: 4.486


 55%|█████▍    | 27329/50000 [41:05<27:41, 13.64it/s]

Epochs: 27325 | epoch avg. loss: 0.006 | test avg. loss: 4.485
Epochs: 27326 | epoch avg. loss: 0.004 | test avg. loss: 4.494
Epochs: 27327 | epoch avg. loss: 0.004 | test avg. loss: 4.497
Epochs: 27328 | epoch avg. loss: 0.004 | test avg. loss: 4.506


 55%|█████▍    | 27333/50000 [41:05<26:13, 14.40it/s]

Epochs: 27329 | epoch avg. loss: 0.004 | test avg. loss: 4.467
Epochs: 27330 | epoch avg. loss: 0.010 | test avg. loss: 4.478
Epochs: 27331 | epoch avg. loss: 0.004 | test avg. loss: 4.502
Epochs: 27332 | epoch avg. loss: 0.004 | test avg. loss: 4.523


 55%|█████▍    | 27335/50000 [41:06<29:42, 12.72it/s]

Epochs: 27333 | epoch avg. loss: 0.004 | test avg. loss: 4.542
Epochs: 27334 | epoch avg. loss: 0.004 | test avg. loss: 4.489
Epochs: 27335 | epoch avg. loss: 0.007 | test avg. loss: 4.516


 55%|█████▍    | 27339/50000 [41:06<31:06, 12.14it/s]

Epochs: 27336 | epoch avg. loss: 0.011 | test avg. loss: 4.480
Epochs: 27337 | epoch avg. loss: 0.006 | test avg. loss: 4.483
Epochs: 27338 | epoch avg. loss: 0.004 | test avg. loss: 4.479


 55%|█████▍    | 27341/50000 [41:06<32:06, 11.76it/s]

Epochs: 27339 | epoch avg. loss: 0.007 | test avg. loss: 4.456
Epochs: 27340 | epoch avg. loss: 0.014 | test avg. loss: 4.515
Epochs: 27341 | epoch avg. loss: 0.008 | test avg. loss: 4.484


 55%|█████▍    | 27345/50000 [41:06<29:44, 12.69it/s]

Epochs: 27342 | epoch avg. loss: 0.011 | test avg. loss: 4.484
Epochs: 27343 | epoch avg. loss: 0.011 | test avg. loss: 4.571
Epochs: 27344 | epoch avg. loss: 0.020 | test avg. loss: 4.496


 55%|█████▍    | 27347/50000 [41:07<30:06, 12.54it/s]

Epochs: 27345 | epoch avg. loss: 0.012 | test avg. loss: 4.452
Epochs: 27346 | epoch avg. loss: 0.012 | test avg. loss: 4.510
Epochs: 27347 | epoch avg. loss: 0.013 | test avg. loss: 4.443


 55%|█████▍    | 27351/50000 [41:07<29:03, 12.99it/s]

Epochs: 27348 | epoch avg. loss: 0.022 | test avg. loss: 4.460
Epochs: 27349 | epoch avg. loss: 0.015 | test avg. loss: 4.520
Epochs: 27350 | epoch avg. loss: 0.029 | test avg. loss: 4.393




Epochs: 27351 | epoch avg. loss: 0.043 | test avg. loss: 4.470
Epochs: 27352 | epoch avg. loss: 0.016 | test avg. loss: 4.559
Epochs: 27353 | epoch avg. loss: 0.021 | test avg. loss: 4.468


 55%|█████▍    | 27357/50000 [41:07<27:18, 13.82it/s]

Epochs: 27354 | epoch avg. loss: 0.032 | test avg. loss: 4.490
Epochs: 27355 | epoch avg. loss: 0.015 | test avg. loss: 4.486
Epochs: 27356 | epoch avg. loss: 0.011 | test avg. loss: 4.422


 55%|█████▍    | 27359/50000 [41:07<27:36, 13.67it/s]

Epochs: 27357 | epoch avg. loss: 0.010 | test avg. loss: 4.505
Epochs: 27358 | epoch avg. loss: 0.027 | test avg. loss: 4.442
Epochs: 27359 | epoch avg. loss: 0.019 | test avg. loss: 4.460


 55%|█████▍    | 27363/50000 [41:08<27:14, 13.85it/s]

Epochs: 27360 | epoch avg. loss: 0.013 | test avg. loss: 4.537
Epochs: 27361 | epoch avg. loss: 0.019 | test avg. loss: 4.424
Epochs: 27362 | epoch avg. loss: 0.039 | test avg. loss: 4.484


 55%|█████▍    | 27367/50000 [41:08<25:58, 14.53it/s]

Epochs: 27363 | epoch avg. loss: 0.031 | test avg. loss: 4.445
Epochs: 27364 | epoch avg. loss: 0.045 | test avg. loss: 4.467
Epochs: 27365 | epoch avg. loss: 0.065 | test avg. loss: 4.763
Epochs: 27366 | epoch avg. loss: 0.115 | test avg. loss: 4.508


 55%|█████▍    | 27369/50000 [41:08<25:19, 14.89it/s]

Epochs: 27367 | epoch avg. loss: 0.051 | test avg. loss: 4.506
Epochs: 27368 | epoch avg. loss: 0.042 | test avg. loss: 4.675
Epochs: 27369 | epoch avg. loss: 0.056 | test avg. loss: 4.524


 55%|█████▍    | 27373/50000 [41:08<25:49, 14.60it/s]

Epochs: 27370 | epoch avg. loss: 0.082 | test avg. loss: 4.626
Epochs: 27371 | epoch avg. loss: 0.206 | test avg. loss: 4.758
Epochs: 27372 | epoch avg. loss: 0.131 | test avg. loss: 4.729


 55%|█████▍    | 27375/50000 [41:09<28:25, 13.26it/s]

Epochs: 27373 | epoch avg. loss: 0.196 | test avg. loss: 4.846
Epochs: 27374 | epoch avg. loss: 0.099 | test avg. loss: 4.618
Epochs: 27375 | epoch avg. loss: 0.066 | test avg. loss: 4.518


 55%|█████▍    | 27379/50000 [41:09<27:46, 13.57it/s]

Epochs: 27376 | epoch avg. loss: 0.095 | test avg. loss: 4.911
Epochs: 27377 | epoch avg. loss: 0.215 | test avg. loss: 4.310
Epochs: 27378 | epoch avg. loss: 0.127 | test avg. loss: 4.404


 55%|█████▍    | 27383/50000 [41:09<25:52, 14.56it/s]

Epochs: 27379 | epoch avg. loss: 0.102 | test avg. loss: 4.793
Epochs: 27380 | epoch avg. loss: 0.060 | test avg. loss: 4.834
Epochs: 27381 | epoch avg. loss: 0.030 | test avg. loss: 4.784
Epochs: 27382 | epoch avg. loss: 0.034 | test avg. loss: 4.874


 55%|█████▍    | 27385/50000 [41:09<26:07, 14.42it/s]

Epochs: 27383 | epoch avg. loss: 0.063 | test avg. loss: 4.600
Epochs: 27384 | epoch avg. loss: 0.172 | test avg. loss: 4.739
Epochs: 27385 | epoch avg. loss: 0.038 | test avg. loss: 5.028
Epochs: 27386 | epoch avg. loss: 0.074 | test avg. loss: 4.885


 55%|█████▍    | 27391/50000 [41:10<25:24, 14.83it/s]

Epochs: 27387 | epoch avg. loss: 0.122 | test avg. loss: 4.794
Epochs: 27388 | epoch avg. loss: 0.098 | test avg. loss: 4.855
Epochs: 27389 | epoch avg. loss: 0.111 | test avg. loss: 4.482
Epochs: 27390 | epoch avg. loss: 0.210 | test avg. loss: 4.668


 55%|█████▍    | 27393/50000 [41:10<26:28, 14.23it/s]

Epochs: 27391 | epoch avg. loss: 0.073 | test avg. loss: 5.117
Epochs: 27392 | epoch avg. loss: 0.154 | test avg. loss: 4.937
Epochs: 27393 | epoch avg. loss: 0.346 | test avg. loss: 4.618


 55%|█████▍    | 27397/50000 [41:10<25:36, 14.71it/s]

Epochs: 27394 | epoch avg. loss: 0.237 | test avg. loss: 4.750
Epochs: 27395 | epoch avg. loss: 0.217 | test avg. loss: 4.671
Epochs: 27396 | epoch avg. loss: 0.531 | test avg. loss: 5.047
Epochs: 27397 | epoch avg. loss: 0.458 | test avg. loss: 4.593


 55%|█████▍    | 27399/50000 [41:10<25:35, 14.72it/s]

Epochs: 27398 | epoch avg. loss: 0.187 | test avg. loss: 4.881
Epochs: 27399 | epoch avg. loss: 0.335 | test avg. loss: 4.898


 55%|█████▍    | 27403/50000 [41:12<1:29:13,  4.22it/s]

Epochs: 27400 | epoch avg. loss: 0.199 | test avg. loss: 4.524
Epochs: 27401 | epoch avg. loss: 0.255 | test avg. loss: 4.403
Epochs: 27402 | epoch avg. loss: 0.103 | test avg. loss: 4.593


 55%|█████▍    | 27405/50000 [41:12<1:11:11,  5.29it/s]

Epochs: 27403 | epoch avg. loss: 0.111 | test avg. loss: 4.485
Epochs: 27404 | epoch avg. loss: 0.076 | test avg. loss: 4.755
Epochs: 27405 | epoch avg. loss: 0.038 | test avg. loss: 4.763


 55%|█████▍    | 27407/50000 [41:13<58:08,  6.48it/s]

Epochs: 27406 | epoch avg. loss: 0.066 | test avg. loss: 4.779
Epochs: 27407 | epoch avg. loss: 0.047 | test avg. loss: 4.851
Epochs: 27408 | epoch avg. loss: 0.049 | test avg. loss: 4.581


 55%|█████▍    | 27411/50000 [41:13<45:28,  8.28it/s]

Epochs: 27409 | epoch avg. loss: 0.117 | test avg. loss: 4.661
Epochs: 27410 | epoch avg. loss: 0.084 | test avg. loss: 4.699
Epochs: 27411 | epoch avg. loss: 0.059 | test avg. loss: 4.587


 55%|█████▍    | 27415/50000 [41:13<36:39, 10.27it/s]

Epochs: 27412 | epoch avg. loss: 0.032 | test avg. loss: 4.791
Epochs: 27413 | epoch avg. loss: 0.037 | test avg. loss: 4.740
Epochs: 27414 | epoch avg. loss: 0.075 | test avg. loss: 4.662


 55%|█████▍    | 27419/50000 [41:13<30:23, 12.38it/s]

Epochs: 27415 | epoch avg. loss: 0.040 | test avg. loss: 4.679
Epochs: 27416 | epoch avg. loss: 0.075 | test avg. loss: 4.481
Epochs: 27417 | epoch avg. loss: 0.234 | test avg. loss: 4.751
Epochs: 27418 | epoch avg. loss: 0.211 | test avg. loss: 4.724


 55%|█████▍    | 27421/50000 [41:14<30:55, 12.17it/s]

Epochs: 27419 | epoch avg. loss: 0.086 | test avg. loss: 4.844
Epochs: 27420 | epoch avg. loss: 0.084 | test avg. loss: 5.067
Epochs: 27421 | epoch avg. loss: 0.107 | test avg. loss: 4.716


 55%|█████▍    | 27425/50000 [41:14<28:36, 13.15it/s]

Epochs: 27422 | epoch avg. loss: 0.060 | test avg. loss: 4.670
Epochs: 27423 | epoch avg. loss: 0.021 | test avg. loss: 4.753
Epochs: 27424 | epoch avg. loss: 0.018 | test avg. loss: 4.806




Epochs: 27425 | epoch avg. loss: 0.011 | test avg. loss: 4.841
Epochs: 27426 | epoch avg. loss: 0.008 | test avg. loss: 4.786
Epochs: 27427 | epoch avg. loss: 0.009 | test avg. loss: 4.698


 55%|█████▍    | 27431/50000 [41:14<27:45, 13.55it/s]

Epochs: 27428 | epoch avg. loss: 0.008 | test avg. loss: 4.630
Epochs: 27429 | epoch avg. loss: 0.012 | test avg. loss: 4.707
Epochs: 27430 | epoch avg. loss: 0.007 | test avg. loss: 4.693


 55%|█████▍    | 27433/50000 [41:14<27:36, 13.63it/s]

Epochs: 27431 | epoch avg. loss: 0.005 | test avg. loss: 4.798
Epochs: 27432 | epoch avg. loss: 0.032 | test avg. loss: 4.617
Epochs: 27433 | epoch avg. loss: 0.010 | test avg. loss: 4.539


 55%|█████▍    | 27437/50000 [41:15<29:42, 12.66it/s]

Epochs: 27434 | epoch avg. loss: 0.012 | test avg. loss: 4.624
Epochs: 27435 | epoch avg. loss: 0.018 | test avg. loss: 4.564
Epochs: 27436 | epoch avg. loss: 0.043 | test avg. loss: 4.612


 55%|█████▍    | 27439/50000 [41:15<28:39, 13.12it/s]

Epochs: 27437 | epoch avg. loss: 0.013 | test avg. loss: 4.663
Epochs: 27438 | epoch avg. loss: 0.014 | test avg. loss: 4.545
Epochs: 27439 | epoch avg. loss: 0.017 | test avg. loss: 4.561


 55%|█████▍    | 27443/50000 [41:15<31:01, 12.12it/s]

Epochs: 27440 | epoch avg. loss: 0.012 | test avg. loss: 4.587
Epochs: 27441 | epoch avg. loss: 0.007 | test avg. loss: 4.653
Epochs: 27442 | epoch avg. loss: 0.007 | test avg. loss: 4.756


 55%|█████▍    | 27445/50000 [41:15<31:12, 12.05it/s]

Epochs: 27443 | epoch avg. loss: 0.007 | test avg. loss: 4.726
Epochs: 27444 | epoch avg. loss: 0.006 | test avg. loss: 4.711
Epochs: 27445 | epoch avg. loss: 0.013 | test avg. loss: 4.598


 55%|█████▍    | 27449/50000 [41:16<31:04, 12.10it/s]

Epochs: 27446 | epoch avg. loss: 0.012 | test avg. loss: 4.609
Epochs: 27447 | epoch avg. loss: 0.009 | test avg. loss: 4.741
Epochs: 27448 | epoch avg. loss: 0.025 | test avg. loss: 4.649


 55%|█████▍    | 27451/50000 [41:16<29:52, 12.58it/s]

Epochs: 27449 | epoch avg. loss: 0.035 | test avg. loss: 4.637
Epochs: 27450 | epoch avg. loss: 0.012 | test avg. loss: 4.608
Epochs: 27451 | epoch avg. loss: 0.010 | test avg. loss: 4.553


 55%|█████▍    | 27455/50000 [41:16<29:01, 12.95it/s]

Epochs: 27452 | epoch avg. loss: 0.006 | test avg. loss: 4.575
Epochs: 27453 | epoch avg. loss: 0.006 | test avg. loss: 4.624
Epochs: 27454 | epoch avg. loss: 0.006 | test avg. loss: 4.626


 55%|█████▍    | 27457/50000 [41:16<29:37, 12.68it/s]

Epochs: 27455 | epoch avg. loss: 0.009 | test avg. loss: 4.660
Epochs: 27456 | epoch avg. loss: 0.006 | test avg. loss: 4.678
Epochs: 27457 | epoch avg. loss: 0.006 | test avg. loss: 4.649


 55%|█████▍    | 27459/50000 [41:17<30:26, 12.34it/s]

Epochs: 27458 | epoch avg. loss: 0.004 | test avg. loss: 4.702
Epochs: 27459 | epoch avg. loss: 0.014 | test avg. loss: 4.656
Epochs: 27460 | epoch avg. loss: 0.017 | test avg. loss: 4.676


 55%|█████▍    | 27463/50000 [41:17<32:03, 11.72it/s]

Epochs: 27461 | epoch avg. loss: 0.007 | test avg. loss: 4.728
Epochs: 27462 | epoch avg. loss: 0.007 | test avg. loss: 4.677
Epochs: 27463 | epoch avg. loss: 0.016 | test avg. loss: 4.660


 55%|█████▍    | 27465/50000 [41:17<33:30, 11.21it/s]

Epochs: 27464 | epoch avg. loss: 0.006 | test avg. loss: 4.683
Epochs: 27465 | epoch avg. loss: 0.008 | test avg. loss: 4.618


 55%|█████▍    | 27469/50000 [41:17<37:30, 10.01it/s]

Epochs: 27466 | epoch avg. loss: 0.010 | test avg. loss: 4.643
Epochs: 27467 | epoch avg. loss: 0.006 | test avg. loss: 4.658
Epochs: 27468 | epoch avg. loss: 0.005 | test avg. loss: 4.624


 55%|█████▍    | 27471/50000 [41:18<36:30, 10.29it/s]

Epochs: 27469 | epoch avg. loss: 0.006 | test avg. loss: 4.656
Epochs: 27470 | epoch avg. loss: 0.009 | test avg. loss: 4.621
Epochs: 27471 | epoch avg. loss: 0.004 | test avg. loss: 4.643


 55%|█████▍    | 27475/50000 [41:18<32:02, 11.72it/s]

Epochs: 27472 | epoch avg. loss: 0.006 | test avg. loss: 4.657
Epochs: 27473 | epoch avg. loss: 0.006 | test avg. loss: 4.627
Epochs: 27474 | epoch avg. loss: 0.007 | test avg. loss: 4.666


 55%|█████▍    | 27477/50000 [41:18<31:01, 12.10it/s]

Epochs: 27475 | epoch avg. loss: 0.003 | test avg. loss: 4.621
Epochs: 27476 | epoch avg. loss: 0.014 | test avg. loss: 4.626
Epochs: 27477 | epoch avg. loss: 0.006 | test avg. loss: 4.635


 55%|█████▍    | 27481/50000 [41:18<29:55, 12.55it/s]

Epochs: 27478 | epoch avg. loss: 0.007 | test avg. loss: 4.615
Epochs: 27479 | epoch avg. loss: 0.010 | test avg. loss: 4.745
Epochs: 27480 | epoch avg. loss: 0.029 | test avg. loss: 4.778


 55%|█████▍    | 27483/50000 [41:19<33:09, 11.32it/s]

Epochs: 27481 | epoch avg. loss: 0.014 | test avg. loss: 4.678
Epochs: 27482 | epoch avg. loss: 0.024 | test avg. loss: 4.660
Epochs: 27483 | epoch avg. loss: 0.008 | test avg. loss: 4.689


 55%|█████▍    | 27487/50000 [41:19<30:20, 12.37it/s]

Epochs: 27484 | epoch avg. loss: 0.014 | test avg. loss: 4.595
Epochs: 27485 | epoch avg. loss: 0.017 | test avg. loss: 4.624
Epochs: 27486 | epoch avg. loss: 0.007 | test avg. loss: 4.634


 55%|█████▍    | 27489/50000 [41:19<30:02, 12.49it/s]

Epochs: 27487 | epoch avg. loss: 0.009 | test avg. loss: 4.660
Epochs: 27488 | epoch avg. loss: 0.008 | test avg. loss: 4.715
Epochs: 27489 | epoch avg. loss: 0.005 | test avg. loss: 4.709


 55%|█████▍    | 27493/50000 [41:19<30:16, 12.39it/s]

Epochs: 27490 | epoch avg. loss: 0.004 | test avg. loss: 4.746
Epochs: 27491 | epoch avg. loss: 0.007 | test avg. loss: 4.689
Epochs: 27492 | epoch avg. loss: 0.011 | test avg. loss: 4.689


 55%|█████▍    | 27495/50000 [41:20<31:59, 11.72it/s]

Epochs: 27493 | epoch avg. loss: 0.006 | test avg. loss: 4.728
Epochs: 27494 | epoch avg. loss: 0.009 | test avg. loss: 4.690
Epochs: 27495 | epoch avg. loss: 0.004 | test avg. loss: 4.715


 55%|█████▍    | 27499/50000 [41:20<33:14, 11.28it/s]

Epochs: 27496 | epoch avg. loss: 0.006 | test avg. loss: 4.721
Epochs: 27497 | epoch avg. loss: 0.004 | test avg. loss: 4.692
Epochs: 27498 | epoch avg. loss: 0.007 | test avg. loss: 4.658


 55%|█████▍    | 27499/50000 [41:20<33:14, 11.28it/s]

Epochs: 27499 | epoch avg. loss: 0.007 | test avg. loss: 4.651


 55%|█████▌    | 27503/50000 [41:22<1:43:30,  3.62it/s]

Epochs: 27500 | epoch avg. loss: 0.004 | test avg. loss: 4.672
Epochs: 27501 | epoch avg. loss: 0.004 | test avg. loss: 4.657
Epochs: 27502 | epoch avg. loss: 0.010 | test avg. loss: 4.709


 55%|█████▌    | 27505/50000 [41:22<1:20:26,  4.66it/s]

Epochs: 27503 | epoch avg. loss: 0.006 | test avg. loss: 4.703
Epochs: 27504 | epoch avg. loss: 0.009 | test avg. loss: 4.679
Epochs: 27505 | epoch avg. loss: 0.011 | test avg. loss: 4.727


 55%|█████▌    | 27509/50000 [41:23<55:36,  6.74it/s]  

Epochs: 27506 | epoch avg. loss: 0.011 | test avg. loss: 4.663
Epochs: 27507 | epoch avg. loss: 0.008 | test avg. loss: 4.617
Epochs: 27508 | epoch avg. loss: 0.020 | test avg. loss: 4.694


 55%|█████▌    | 27511/50000 [41:23<47:52,  7.83it/s]

Epochs: 27509 | epoch avg. loss: 0.014 | test avg. loss: 4.703
Epochs: 27510 | epoch avg. loss: 0.021 | test avg. loss: 4.684
Epochs: 27511 | epoch avg. loss: 0.034 | test avg. loss: 4.896


 55%|█████▌    | 27515/50000 [41:23<37:10, 10.08it/s]

Epochs: 27512 | epoch avg. loss: 0.090 | test avg. loss: 4.576
Epochs: 27513 | epoch avg. loss: 0.036 | test avg. loss: 4.574
Epochs: 27514 | epoch avg. loss: 0.065 | test avg. loss: 4.867


 55%|█████▌    | 27517/50000 [41:23<34:16, 10.93it/s]

Epochs: 27515 | epoch avg. loss: 0.105 | test avg. loss: 4.721
Epochs: 27516 | epoch avg. loss: 0.030 | test avg. loss: 4.664
Epochs: 27517 | epoch avg. loss: 0.035 | test avg. loss: 4.728


 55%|█████▌    | 27521/50000 [41:23<34:04, 10.99it/s]

Epochs: 27518 | epoch avg. loss: 0.039 | test avg. loss: 4.572
Epochs: 27519 | epoch avg. loss: 0.021 | test avg. loss: 4.618
Epochs: 27520 | epoch avg. loss: 0.015 | test avg. loss: 4.773


 55%|█████▌    | 27523/50000 [41:24<33:41, 11.12it/s]

Epochs: 27521 | epoch avg. loss: 0.024 | test avg. loss: 4.701
Epochs: 27522 | epoch avg. loss: 0.100 | test avg. loss: 4.768
Epochs: 27523 | epoch avg. loss: 0.076 | test avg. loss: 4.819


 55%|█████▌    | 27527/50000 [41:24<30:52, 12.13it/s]

Epochs: 27524 | epoch avg. loss: 0.088 | test avg. loss: 4.555
Epochs: 27525 | epoch avg. loss: 0.126 | test avg. loss: 4.680
Epochs: 27526 | epoch avg. loss: 0.039 | test avg. loss: 4.998


 55%|█████▌    | 27529/50000 [41:24<31:20, 11.95it/s]

Epochs: 27527 | epoch avg. loss: 0.084 | test avg. loss: 4.876
Epochs: 27528 | epoch avg. loss: 0.030 | test avg. loss: 4.830
Epochs: 27529 | epoch avg. loss: 0.020 | test avg. loss: 4.702


 55%|█████▌    | 27531/50000 [41:24<32:08, 11.65it/s]

Epochs: 27530 | epoch avg. loss: 0.017 | test avg. loss: 4.594
Epochs: 27531 | epoch avg. loss: 0.037 | test avg. loss: 4.719


 55%|█████▌    | 27535/50000 [41:25<31:51, 11.75it/s]

Epochs: 27532 | epoch avg. loss: 0.029 | test avg. loss: 4.769
Epochs: 27533 | epoch avg. loss: 0.019 | test avg. loss: 4.712
Epochs: 27534 | epoch avg. loss: 0.026 | test avg. loss: 4.687


 55%|█████▌    | 27537/50000 [41:25<31:55, 11.73it/s]

Epochs: 27535 | epoch avg. loss: 0.015 | test avg. loss: 4.669
Epochs: 27536 | epoch avg. loss: 0.014 | test avg. loss: 4.579
Epochs: 27537 | epoch avg. loss: 0.023 | test avg. loss: 4.680


 55%|█████▌    | 27541/50000 [41:25<31:41, 11.81it/s]

Epochs: 27538 | epoch avg. loss: 0.011 | test avg. loss: 4.710
Epochs: 27539 | epoch avg. loss: 0.006 | test avg. loss: 4.746
Epochs: 27540 | epoch avg. loss: 0.010 | test avg. loss: 4.736


 55%|█████▌    | 27543/50000 [41:25<31:36, 11.84it/s]

Epochs: 27541 | epoch avg. loss: 0.013 | test avg. loss: 4.641
Epochs: 27542 | epoch avg. loss: 0.009 | test avg. loss: 4.624
Epochs: 27543 | epoch avg. loss: 0.007 | test avg. loss: 4.674


 55%|█████▌    | 27547/50000 [41:26<32:13, 11.61it/s]

Epochs: 27544 | epoch avg. loss: 0.019 | test avg. loss: 4.608
Epochs: 27545 | epoch avg. loss: 0.011 | test avg. loss: 4.552
Epochs: 27546 | epoch avg. loss: 0.009 | test avg. loss: 4.604


 55%|█████▌    | 27549/50000 [41:26<30:15, 12.37it/s]

Epochs: 27547 | epoch avg. loss: 0.017 | test avg. loss: 4.569
Epochs: 27548 | epoch avg. loss: 0.016 | test avg. loss: 4.649
Epochs: 27549 | epoch avg. loss: 0.005 | test avg. loss: 4.668


 55%|█████▌    | 27553/50000 [41:26<29:26, 12.71it/s]

Epochs: 27550 | epoch avg. loss: 0.011 | test avg. loss: 4.638
Epochs: 27551 | epoch avg. loss: 0.017 | test avg. loss: 4.676
Epochs: 27552 | epoch avg. loss: 0.015 | test avg. loss: 4.646


 55%|█████▌    | 27555/50000 [41:26<30:12, 12.38it/s]

Epochs: 27553 | epoch avg. loss: 0.008 | test avg. loss: 4.632
Epochs: 27554 | epoch avg. loss: 0.008 | test avg. loss: 4.644
Epochs: 27555 | epoch avg. loss: 0.006 | test avg. loss: 4.636


 55%|█████▌    | 27559/50000 [41:27<29:17, 12.77it/s]

Epochs: 27556 | epoch avg. loss: 0.007 | test avg. loss: 4.675
Epochs: 27557 | epoch avg. loss: 0.004 | test avg. loss: 4.677
Epochs: 27558 | epoch avg. loss: 0.004 | test avg. loss: 4.698


 55%|█████▌    | 27561/50000 [41:27<30:45, 12.16it/s]

Epochs: 27559 | epoch avg. loss: 0.013 | test avg. loss: 4.756
Epochs: 27560 | epoch avg. loss: 0.016 | test avg. loss: 4.687
Epochs: 27561 | epoch avg. loss: 0.041 | test avg. loss: 4.719


 55%|█████▌    | 27565/50000 [41:27<31:54, 11.72it/s]

Epochs: 27562 | epoch avg. loss: 0.018 | test avg. loss: 4.814
Epochs: 27563 | epoch avg. loss: 0.035 | test avg. loss: 4.604
Epochs: 27564 | epoch avg. loss: 0.042 | test avg. loss: 4.568


 55%|█████▌    | 27567/50000 [41:27<30:27, 12.28it/s]

Epochs: 27565 | epoch avg. loss: 0.027 | test avg. loss: 4.740
Epochs: 27566 | epoch avg. loss: 0.075 | test avg. loss: 4.558
Epochs: 27567 | epoch avg. loss: 0.020 | test avg. loss: 4.538


 55%|█████▌    | 27571/50000 [41:28<29:06, 12.84it/s]

Epochs: 27568 | epoch avg. loss: 0.019 | test avg. loss: 4.739
Epochs: 27569 | epoch avg. loss: 0.046 | test avg. loss: 4.675
Epochs: 27570 | epoch avg. loss: 0.040 | test avg. loss: 4.781


 55%|█████▌    | 27573/50000 [41:28<29:20, 12.74it/s]

Epochs: 27571 | epoch avg. loss: 0.014 | test avg. loss: 4.869
Epochs: 27572 | epoch avg. loss: 0.021 | test avg. loss: 4.736
Epochs: 27573 | epoch avg. loss: 0.014 | test avg. loss: 4.671


 55%|█████▌    | 27577/50000 [41:28<28:21, 13.18it/s]

Epochs: 27574 | epoch avg. loss: 0.007 | test avg. loss: 4.651
Epochs: 27575 | epoch avg. loss: 0.007 | test avg. loss: 4.670
Epochs: 27576 | epoch avg. loss: 0.007 | test avg. loss: 4.646


 55%|█████▌    | 27579/50000 [41:28<27:36, 13.53it/s]

Epochs: 27577 | epoch avg. loss: 0.020 | test avg. loss: 4.703
Epochs: 27578 | epoch avg. loss: 0.019 | test avg. loss: 4.664
Epochs: 27579 | epoch avg. loss: 0.009 | test avg. loss: 4.678


 55%|█████▌    | 27581/50000 [41:28<28:58, 12.89it/s]

Epochs: 27580 | epoch avg. loss: 0.015 | test avg. loss: 4.810
Epochs: 27581 | epoch avg. loss: 0.016 | test avg. loss: 4.840


 55%|█████▌    | 27585/50000 [41:29<31:29, 11.87it/s]

Epochs: 27582 | epoch avg. loss: 0.007 | test avg. loss: 4.777
Epochs: 27583 | epoch avg. loss: 0.004 | test avg. loss: 4.741
Epochs: 27584 | epoch avg. loss: 0.008 | test avg. loss: 4.739


 55%|█████▌    | 27589/50000 [41:29<27:46, 13.45it/s]

Epochs: 27585 | epoch avg. loss: 0.007 | test avg. loss: 4.701
Epochs: 27586 | epoch avg. loss: 0.008 | test avg. loss: 4.704
Epochs: 27587 | epoch avg. loss: 0.005 | test avg. loss: 4.664
Epochs: 27588 | epoch avg. loss: 0.004 | test avg. loss: 4.612


 55%|█████▌    | 27591/50000 [41:29<27:11, 13.74it/s]

Epochs: 27589 | epoch avg. loss: 0.004 | test avg. loss: 4.595
Epochs: 27590 | epoch avg. loss: 0.004 | test avg. loss: 4.590
Epochs: 27591 | epoch avg. loss: 0.006 | test avg. loss: 4.622


 55%|█████▌    | 27595/50000 [41:29<28:50, 12.95it/s]

Epochs: 27592 | epoch avg. loss: 0.005 | test avg. loss: 4.682
Epochs: 27593 | epoch avg. loss: 0.009 | test avg. loss: 4.648
Epochs: 27594 | epoch avg. loss: 0.005 | test avg. loss: 4.621


 55%|█████▌    | 27599/50000 [41:30<27:17, 13.68it/s]

Epochs: 27595 | epoch avg. loss: 0.007 | test avg. loss: 4.669
Epochs: 27596 | epoch avg. loss: 0.014 | test avg. loss: 4.595
Epochs: 27597 | epoch avg. loss: 0.025 | test avg. loss: 4.630
Epochs: 27598 | epoch avg. loss: 0.006 | test avg. loss: 4.687


 55%|█████▌    | 27599/50000 [41:30<27:17, 13.68it/s]

Epochs: 27599 | epoch avg. loss: 0.010 | test avg. loss: 4.608


 55%|█████▌    | 27603/50000 [41:32<1:33:15,  4.00it/s]

Epochs: 27600 | epoch avg. loss: 0.032 | test avg. loss: 4.644
Epochs: 27601 | epoch avg. loss: 0.016 | test avg. loss: 4.769
Epochs: 27602 | epoch avg. loss: 0.023 | test avg. loss: 4.746


 55%|█████▌    | 27605/50000 [41:32<1:13:20,  5.09it/s]

Epochs: 27603 | epoch avg. loss: 0.007 | test avg. loss: 4.724
Epochs: 27604 | epoch avg. loss: 0.010 | test avg. loss: 4.765
Epochs: 27605 | epoch avg. loss: 0.004 | test avg. loss: 4.760


 55%|█████▌    | 27609/50000 [41:32<50:01,  7.46it/s]

Epochs: 27606 | epoch avg. loss: 0.008 | test avg. loss: 4.798
Epochs: 27607 | epoch avg. loss: 0.010 | test avg. loss: 4.814
Epochs: 27608 | epoch avg. loss: 0.016 | test avg. loss: 4.738


 55%|█████▌    | 27611/50000 [41:32<44:31,  8.38it/s]

Epochs: 27609 | epoch avg. loss: 0.007 | test avg. loss: 4.822
Epochs: 27610 | epoch avg. loss: 0.015 | test avg. loss: 4.830
Epochs: 27611 | epoch avg. loss: 0.010 | test avg. loss: 4.737


 55%|█████▌    | 27615/50000 [41:33<36:48, 10.13it/s]

Epochs: 27612 | epoch avg. loss: 0.029 | test avg. loss: 4.697
Epochs: 27613 | epoch avg. loss: 0.007 | test avg. loss: 4.673
Epochs: 27614 | epoch avg. loss: 0.006 | test avg. loss: 4.645


 55%|█████▌    | 27617/50000 [41:33<34:36, 10.78it/s]

Epochs: 27615 | epoch avg. loss: 0.013 | test avg. loss: 4.711
Epochs: 27616 | epoch avg. loss: 0.008 | test avg. loss: 4.822
Epochs: 27617 | epoch avg. loss: 0.027 | test avg. loss: 4.674


 55%|█████▌    | 27621/50000 [41:33<30:43, 12.14it/s]

Epochs: 27618 | epoch avg. loss: 0.018 | test avg. loss: 4.598
Epochs: 27619 | epoch avg. loss: 0.049 | test avg. loss: 4.659
Epochs: 27620 | epoch avg. loss: 0.017 | test avg. loss: 4.682


 55%|█████▌    | 27623/50000 [41:33<29:37, 12.59it/s]

Epochs: 27621 | epoch avg. loss: 0.014 | test avg. loss: 4.616
Epochs: 27622 | epoch avg. loss: 0.024 | test avg. loss: 4.698
Epochs: 27623 | epoch avg. loss: 0.015 | test avg. loss: 4.634


 55%|█████▌    | 27627/50000 [41:34<31:49, 11.71it/s]

Epochs: 27624 | epoch avg. loss: 0.012 | test avg. loss: 4.608
Epochs: 27625 | epoch avg. loss: 0.010 | test avg. loss: 4.672
Epochs: 27626 | epoch avg. loss: 0.007 | test avg. loss: 4.643


 55%|█████▌    | 27629/50000 [41:34<30:50, 12.09it/s]

Epochs: 27627 | epoch avg. loss: 0.015 | test avg. loss: 4.670
Epochs: 27628 | epoch avg. loss: 0.008 | test avg. loss: 4.747
Epochs: 27629 | epoch avg. loss: 0.022 | test avg. loss: 4.633


 55%|█████▌    | 27633/50000 [41:34<28:09, 13.24it/s]

Epochs: 27630 | epoch avg. loss: 0.012 | test avg. loss: 4.592
Epochs: 27631 | epoch avg. loss: 0.013 | test avg. loss: 4.726
Epochs: 27632 | epoch avg. loss: 0.067 | test avg. loss: 4.567


                                                     

Epochs: 27633 | epoch avg. loss: 0.023 | test avg. loss: 4.563
Epochs: 27634 | epoch avg. loss: 0.029 | test avg. loss: 4.682
Epochs: 27635 | epoch avg. loss: 0.052 | test avg. loss: 4.671


 55%|█████▌    | 27639/50000 [41:34<28:09, 13.24it/s]

Epochs: 27636 | epoch avg. loss: 0.026 | test avg. loss: 4.565
Epochs: 27637 | epoch avg. loss: 0.087 | test avg. loss: 4.572
Epochs: 27638 | epoch avg. loss: 0.011 | test avg. loss: 4.583


 55%|█████▌    | 27641/50000 [41:35<28:29, 13.08it/s]

Epochs: 27639 | epoch avg. loss: 0.013 | test avg. loss: 4.549
Epochs: 27640 | epoch avg. loss: 0.031 | test avg. loss: 4.620
Epochs: 27641 | epoch avg. loss: 0.027 | test avg. loss: 4.683


 55%|█████▌    | 27645/50000 [41:35<29:38, 12.57it/s]

Epochs: 27642 | epoch avg. loss: 0.054 | test avg. loss: 4.598
Epochs: 27643 | epoch avg. loss: 0.014 | test avg. loss: 4.563
Epochs: 27644 | epoch avg. loss: 0.029 | test avg. loss: 4.574


 55%|█████▌    | 27647/50000 [41:35<29:25, 12.66it/s]

Epochs: 27645 | epoch avg. loss: 0.013 | test avg. loss: 4.647
Epochs: 27646 | epoch avg. loss: 0.032 | test avg. loss: 4.564
Epochs: 27647 | epoch avg. loss: 0.009 | test avg. loss: 4.630


 55%|█████▌    | 27651/50000 [41:35<29:30, 12.63it/s]

Epochs: 27648 | epoch avg. loss: 0.007 | test avg. loss: 4.686
Epochs: 27649 | epoch avg. loss: 0.007 | test avg. loss: 4.667
Epochs: 27650 | epoch avg. loss: 0.006 | test avg. loss: 4.691


 55%|█████▌    | 27653/50000 [41:36<30:16, 12.30it/s]

Epochs: 27651 | epoch avg. loss: 0.026 | test avg. loss: 4.681
Epochs: 27652 | epoch avg. loss: 0.013 | test avg. loss: 4.729
Epochs: 27653 | epoch avg. loss: 0.009 | test avg. loss: 4.835


 55%|█████▌    | 27657/50000 [41:36<29:29, 12.63it/s]

Epochs: 27654 | epoch avg. loss: 0.018 | test avg. loss: 4.929
Epochs: 27655 | epoch avg. loss: 0.056 | test avg. loss: 4.666
Epochs: 27656 | epoch avg. loss: 0.015 | test avg. loss: 4.578


 55%|█████▌    | 27659/50000 [41:36<31:01, 12.00it/s]

Epochs: 27657 | epoch avg. loss: 0.064 | test avg. loss: 4.649
Epochs: 27658 | epoch avg. loss: 0.041 | test avg. loss: 5.062
Epochs: 27659 | epoch avg. loss: 0.228 | test avg. loss: 4.801


 55%|█████▌    | 27663/50000 [41:36<30:55, 12.04it/s]

Epochs: 27660 | epoch avg. loss: 0.161 | test avg. loss: 5.041
Epochs: 27661 | epoch avg. loss: 0.165 | test avg. loss: 5.192
Epochs: 27662 | epoch avg. loss: 0.268 | test avg. loss: 4.993


 55%|█████▌    | 27665/50000 [41:37<31:56, 11.66it/s]

Epochs: 27663 | epoch avg. loss: 0.086 | test avg. loss: 4.691
Epochs: 27664 | epoch avg. loss: 0.192 | test avg. loss: 4.393
Epochs: 27665 | epoch avg. loss: 0.085 | test avg. loss: 4.601


 55%|█████▌    | 27669/50000 [41:37<29:01, 12.83it/s]

Epochs: 27666 | epoch avg. loss: 0.310 | test avg. loss: 4.588
Epochs: 27667 | epoch avg. loss: 0.067 | test avg. loss: 5.117
Epochs: 27668 | epoch avg. loss: 0.126 | test avg. loss: 5.222


 55%|█████▌    | 27671/50000 [41:37<28:55, 12.87it/s]

Epochs: 27669 | epoch avg. loss: 0.080 | test avg. loss: 5.107
Epochs: 27670 | epoch avg. loss: 0.068 | test avg. loss: 4.943
Epochs: 27671 | epoch avg. loss: 0.183 | test avg. loss: 5.040


 55%|█████▌    | 27675/50000 [41:37<31:29, 11.81it/s]

Epochs: 27672 | epoch avg. loss: 0.094 | test avg. loss: 5.348
Epochs: 27673 | epoch avg. loss: 0.134 | test avg. loss: 4.830
Epochs: 27674 | epoch avg. loss: 0.158 | test avg. loss: 4.660


                                                     

Epochs: 27675 | epoch avg. loss: 0.086 | test avg. loss: 4.974
Epochs: 27676 | epoch avg. loss: 0.226 | test avg. loss: 4.477


 55%|█████▌    | 27679/50000 [41:38<35:03, 10.61it/s]

Epochs: 27677 | epoch avg. loss: 0.216 | test avg. loss: 4.329
Epochs: 27678 | epoch avg. loss: 0.097 | test avg. loss: 4.874
Epochs: 27679 | epoch avg. loss: 0.303 | test avg. loss: 4.664


 55%|█████▌    | 27683/50000 [41:38<34:09, 10.89it/s]

Epochs: 27680 | epoch avg. loss: 0.210 | test avg. loss: 4.935
Epochs: 27681 | epoch avg. loss: 0.197 | test avg. loss: 6.783
Epochs: 27682 | epoch avg. loss: 1.215 | test avg. loss: 4.946


 55%|█████▌    | 27685/50000 [41:38<33:33, 11.08it/s]

Epochs: 27683 | epoch avg. loss: 0.533 | test avg. loss: 5.100
Epochs: 27684 | epoch avg. loss: 0.445 | test avg. loss: 6.190
Epochs: 27685 | epoch avg. loss: 0.994 | test avg. loss: 4.531




Epochs: 27686 | epoch avg. loss: 0.242 | test avg. loss: 4.151
Epochs: 27687 | epoch avg. loss: 0.130 | test avg. loss: 4.394
Epochs: 27688 | epoch avg. loss: 0.288 | test avg. loss: 4.053


 55%|█████▌    | 27693/50000 [41:39<26:46, 13.88it/s]

Epochs: 27689 | epoch avg. loss: 0.220 | test avg. loss: 4.306
Epochs: 27690 | epoch avg. loss: 0.070 | test avg. loss: 4.802
Epochs: 27691 | epoch avg. loss: 0.133 | test avg. loss: 4.739
Epochs: 27692 | epoch avg. loss: 0.022 | test avg. loss: 4.679


 55%|█████▌    | 27695/50000 [41:39<26:24, 14.08it/s]

Epochs: 27693 | epoch avg. loss: 0.028 | test avg. loss: 4.755
Epochs: 27694 | epoch avg. loss: 0.040 | test avg. loss: 4.772
Epochs: 27695 | epoch avg. loss: 0.029 | test avg. loss: 4.807


 55%|█████▌    | 27699/50000 [41:39<26:37, 13.96it/s]

Epochs: 27696 | epoch avg. loss: 0.056 | test avg. loss: 5.038
Epochs: 27697 | epoch avg. loss: 0.074 | test avg. loss: 5.019
Epochs: 27698 | epoch avg. loss: 0.033 | test avg. loss: 4.836


 55%|█████▌    | 27699/50000 [41:39<26:37, 13.96it/s]

Epochs: 27699 | epoch avg. loss: 0.030 | test avg. loss: 4.925


 55%|█████▌    | 27703/50000 [41:41<1:35:59,  3.87it/s]

Epochs: 27700 | epoch avg. loss: 0.042 | test avg. loss: 4.840
Epochs: 27701 | epoch avg. loss: 0.025 | test avg. loss: 4.862
Epochs: 27702 | epoch avg. loss: 0.032 | test avg. loss: 4.985


 55%|█████▌    | 27707/50000 [41:42<59:24,  6.25it/s]  

Epochs: 27703 | epoch avg. loss: 0.021 | test avg. loss: 4.980
Epochs: 27704 | epoch avg. loss: 0.012 | test avg. loss: 4.910
Epochs: 27705 | epoch avg. loss: 0.013 | test avg. loss: 4.892
Epochs: 27706 | epoch avg. loss: 0.016 | test avg. loss: 4.723


 55%|█████▌    | 27711/50000 [41:42<41:21,  8.98it/s]

Epochs: 27707 | epoch avg. loss: 0.023 | test avg. loss: 4.740
Epochs: 27708 | epoch avg. loss: 0.010 | test avg. loss: 4.870
Epochs: 27709 | epoch avg. loss: 0.018 | test avg. loss: 4.807
Epochs: 27710 | epoch avg. loss: 0.006 | test avg. loss: 4.819


 55%|█████▌    | 27713/50000 [41:42<36:16, 10.24it/s]

Epochs: 27711 | epoch avg. loss: 0.008 | test avg. loss: 4.813
Epochs: 27712 | epoch avg. loss: 0.006 | test avg. loss: 4.807
Epochs: 27713 | epoch avg. loss: 0.020 | test avg. loss: 4.866


 55%|█████▌    | 27717/50000 [41:42<34:18, 10.82it/s]

Epochs: 27714 | epoch avg. loss: 0.006 | test avg. loss: 4.880
Epochs: 27715 | epoch avg. loss: 0.007 | test avg. loss: 4.829
Epochs: 27716 | epoch avg. loss: 0.016 | test avg. loss: 4.808


 55%|█████▌    | 27719/50000 [41:43<36:01, 10.31it/s]

Epochs: 27717 | epoch avg. loss: 0.009 | test avg. loss: 4.789
Epochs: 27718 | epoch avg. loss: 0.010 | test avg. loss: 4.848




Epochs: 27719 | epoch avg. loss: 0.006 | test avg. loss: 4.803
Epochs: 27720 | epoch avg. loss: 0.012 | test avg. loss: 4.759
Epochs: 27721 | epoch avg. loss: 0.009 | test avg. loss: 4.831


 55%|█████▌    | 27725/50000 [41:43<29:10, 12.73it/s]

Epochs: 27722 | epoch avg. loss: 0.023 | test avg. loss: 4.789
Epochs: 27723 | epoch avg. loss: 0.017 | test avg. loss: 4.784
Epochs: 27724 | epoch avg. loss: 0.022 | test avg. loss: 4.848
Epochs: 27725 | epoch avg. loss: 0.009 | test avg. loss: 4.823


 55%|█████▌    | 27729/50000 [41:43<26:40, 13.92it/s]

Epochs: 27726 | epoch avg. loss: 0.006 | test avg. loss: 4.823
Epochs: 27727 | epoch avg. loss: 0.005 | test avg. loss: 4.816
Epochs: 27728 | epoch avg. loss: 0.007 | test avg. loss: 4.774
Epochs: 27729 | epoch avg. loss: 0.027 | test avg. loss: 4.826


 55%|█████▌    | 27733/50000 [41:44<29:42, 12.49it/s]

Epochs: 27730 | epoch avg. loss: 0.025 | test avg. loss: 4.929
Epochs: 27731 | epoch avg. loss: 0.037 | test avg. loss: 4.819
Epochs: 27732 | epoch avg. loss: 0.028 | test avg. loss: 4.851


 55%|█████▌    | 27735/50000 [41:44<31:05, 11.94it/s]

Epochs: 27733 | epoch avg. loss: 0.034 | test avg. loss: 4.930
Epochs: 27734 | epoch avg. loss: 0.051 | test avg. loss: 4.705
Epochs: 27735 | epoch avg. loss: 0.071 | test avg. loss: 4.830


 55%|█████▌    | 27739/50000 [41:44<29:09, 12.72it/s]

Epochs: 27736 | epoch avg. loss: 0.019 | test avg. loss: 4.946
Epochs: 27737 | epoch avg. loss: 0.039 | test avg. loss: 4.914
Epochs: 27738 | epoch avg. loss: 0.127 | test avg. loss: 4.825


 55%|█████▌    | 27741/50000 [41:44<27:33, 13.46it/s]

Epochs: 27739 | epoch avg. loss: 0.037 | test avg. loss: 4.699
Epochs: 27740 | epoch avg. loss: 0.016 | test avg. loss: 4.553
Epochs: 27741 | epoch avg. loss: 0.013 | test avg. loss: 4.616


 55%|█████▌    | 27745/50000 [41:45<32:03, 11.57it/s]

Epochs: 27742 | epoch avg. loss: 0.048 | test avg. loss: 4.490
Epochs: 27743 | epoch avg. loss: 0.054 | test avg. loss: 4.492
Epochs: 27744 | epoch avg. loss: 0.033 | test avg. loss: 4.740


 55%|█████▌    | 27747/50000 [41:45<31:22, 11.82it/s]

Epochs: 27745 | epoch avg. loss: 0.119 | test avg. loss: 4.503
Epochs: 27746 | epoch avg. loss: 0.062 | test avg. loss: 4.560
Epochs: 27747 | epoch avg. loss: 0.019 | test avg. loss: 4.718


 56%|█████▌    | 27751/50000 [41:45<29:33, 12.55it/s]

Epochs: 27748 | epoch avg. loss: 0.024 | test avg. loss: 4.676
Epochs: 27749 | epoch avg. loss: 0.087 | test avg. loss: 4.662
Epochs: 27750 | epoch avg. loss: 0.006 | test avg. loss: 4.553


 56%|█████▌    | 27753/50000 [41:45<30:27, 12.17it/s]

Epochs: 27751 | epoch avg. loss: 0.036 | test avg. loss: 4.574
Epochs: 27752 | epoch avg. loss: 0.017 | test avg. loss: 4.705
Epochs: 27753 | epoch avg. loss: 0.014 | test avg. loss: 4.588


 56%|█████▌    | 27757/50000 [41:46<30:15, 12.25it/s]

Epochs: 27754 | epoch avg. loss: 0.036 | test avg. loss: 4.630
Epochs: 27755 | epoch avg. loss: 0.020 | test avg. loss: 4.721
Epochs: 27756 | epoch avg. loss: 0.018 | test avg. loss: 4.667


 56%|█████▌    | 27759/50000 [41:46<31:08, 11.91it/s]

Epochs: 27757 | epoch avg. loss: 0.038 | test avg. loss: 4.711
Epochs: 27758 | epoch avg. loss: 0.010 | test avg. loss: 4.679
Epochs: 27759 | epoch avg. loss: 0.009 | test avg. loss: 4.667


 56%|█████▌    | 27763/50000 [41:46<28:26, 13.03it/s]

Epochs: 27760 | epoch avg. loss: 0.009 | test avg. loss: 4.652
Epochs: 27761 | epoch avg. loss: 0.008 | test avg. loss: 4.624
Epochs: 27762 | epoch avg. loss: 0.008 | test avg. loss: 4.702


 56%|█████▌    | 27767/50000 [41:46<26:34, 13.94it/s]

Epochs: 27763 | epoch avg. loss: 0.011 | test avg. loss: 4.814
Epochs: 27764 | epoch avg. loss: 0.025 | test avg. loss: 4.699
Epochs: 27765 | epoch avg. loss: 0.007 | test avg. loss: 4.659
Epochs: 27766 | epoch avg. loss: 0.011 | test avg. loss: 4.622


 56%|█████▌    | 27769/50000 [41:47<29:36, 12.52it/s]

Epochs: 27767 | epoch avg. loss: 0.017 | test avg. loss: 4.512
Epochs: 27768 | epoch avg. loss: 0.012 | test avg. loss: 4.554
Epochs: 27769 | epoch avg. loss: 0.008 | test avg. loss: 4.605


 56%|█████▌    | 27773/50000 [41:47<29:12, 12.68it/s]

Epochs: 27770 | epoch avg. loss: 0.010 | test avg. loss: 4.573
Epochs: 27771 | epoch avg. loss: 0.016 | test avg. loss: 4.575
Epochs: 27772 | epoch avg. loss: 0.007 | test avg. loss: 4.626




Epochs: 27773 | epoch avg. loss: 0.017 | test avg. loss: 4.574
Epochs: 27774 | epoch avg. loss: 0.016 | test avg. loss: 4.628


 56%|█████▌    | 27777/50000 [41:47<33:33, 11.03it/s]

Epochs: 27775 | epoch avg. loss: 0.008 | test avg. loss: 4.702
Epochs: 27776 | epoch avg. loss: 0.007 | test avg. loss: 4.638
Epochs: 27777 | epoch avg. loss: 0.010 | test avg. loss: 4.616


 56%|█████▌    | 27781/50000 [41:48<32:17, 11.47it/s]

Epochs: 27778 | epoch avg. loss: 0.007 | test avg. loss: 4.691
Epochs: 27779 | epoch avg. loss: 0.031 | test avg. loss: 4.583
Epochs: 27780 | epoch avg. loss: 0.018 | test avg. loss: 4.600


 56%|█████▌    | 27783/50000 [41:48<30:29, 12.14it/s]

Epochs: 27781 | epoch avg. loss: 0.012 | test avg. loss: 4.709
Epochs: 27782 | epoch avg. loss: 0.027 | test avg. loss: 4.625
Epochs: 27783 | epoch avg. loss: 0.049 | test avg. loss: 4.631


 56%|█████▌    | 27787/50000 [41:48<28:19, 13.07it/s]

Epochs: 27784 | epoch avg. loss: 0.030 | test avg. loss: 4.823
Epochs: 27785 | epoch avg. loss: 0.060 | test avg. loss: 4.569
Epochs: 27786 | epoch avg. loss: 0.033 | test avg. loss: 4.562


 56%|█████▌    | 27791/50000 [41:48<26:18, 14.07it/s]

Epochs: 27787 | epoch avg. loss: 0.028 | test avg. loss: 4.725
Epochs: 27788 | epoch avg. loss: 0.016 | test avg. loss: 4.679
Epochs: 27789 | epoch avg. loss: 0.010 | test avg. loss: 4.696
Epochs: 27790 | epoch avg. loss: 0.013 | test avg. loss: 4.709


 56%|█████▌    | 27793/50000 [41:48<26:33, 13.93it/s]

Epochs: 27791 | epoch avg. loss: 0.030 | test avg. loss: 4.548
Epochs: 27792 | epoch avg. loss: 0.020 | test avg. loss: 4.557
Epochs: 27793 | epoch avg. loss: 0.022 | test avg. loss: 4.687


 56%|█████▌    | 27797/50000 [41:49<26:24, 14.01it/s]

Epochs: 27794 | epoch avg. loss: 0.037 | test avg. loss: 4.552
Epochs: 27795 | epoch avg. loss: 0.051 | test avg. loss: 4.590
Epochs: 27796 | epoch avg. loss: 0.022 | test avg. loss: 4.789


 56%|█████▌    | 27799/50000 [41:49<26:10, 14.13it/s]

Epochs: 27797 | epoch avg. loss: 0.072 | test avg. loss: 4.612
Epochs: 27798 | epoch avg. loss: 0.052 | test avg. loss: 4.623
Epochs: 27799 | epoch avg. loss: 0.033 | test avg. loss: 4.821


 56%|█████▌    | 27803/50000 [41:51<1:27:12,  4.24it/s]

Epochs: 27800 | epoch avg. loss: 0.052 | test avg. loss: 4.613
Epochs: 27801 | epoch avg. loss: 0.017 | test avg. loss: 4.614
Epochs: 27802 | epoch avg. loss: 0.016 | test avg. loss: 4.657


 56%|█████▌    | 27807/50000 [41:51<55:52,  6.62it/s]  

Epochs: 27803 | epoch avg. loss: 0.028 | test avg. loss: 4.600
Epochs: 27804 | epoch avg. loss: 0.006 | test avg. loss: 4.612
Epochs: 27805 | epoch avg. loss: 0.007 | test avg. loss: 4.657
Epochs: 27806 | epoch avg. loss: 0.016 | test avg. loss: 4.610


 56%|█████▌    | 27809/50000 [41:51<46:28,  7.96it/s]

Epochs: 27807 | epoch avg. loss: 0.005 | test avg. loss: 4.625
Epochs: 27808 | epoch avg. loss: 0.005 | test avg. loss: 4.663
Epochs: 27809 | epoch avg. loss: 0.005 | test avg. loss: 4.669


 56%|█████▌    | 27813/50000 [41:51<37:32,  9.85it/s]

Epochs: 27810 | epoch avg. loss: 0.007 | test avg. loss: 4.649
Epochs: 27811 | epoch avg. loss: 0.006 | test avg. loss: 4.639
Epochs: 27812 | epoch avg. loss: 0.014 | test avg. loss: 4.603


 56%|█████▌    | 27815/50000 [41:52<36:22, 10.16it/s]

Epochs: 27813 | epoch avg. loss: 0.010 | test avg. loss: 4.586
Epochs: 27814 | epoch avg. loss: 0.030 | test avg. loss: 4.627
Epochs: 27815 | epoch avg. loss: 0.020 | test avg. loss: 4.653


 56%|█████▌    | 27819/50000 [41:52<31:35, 11.70it/s]

Epochs: 27816 | epoch avg. loss: 0.011 | test avg. loss: 4.614
Epochs: 27817 | epoch avg. loss: 0.031 | test avg. loss: 4.601
Epochs: 27818 | epoch avg. loss: 0.009 | test avg. loss: 4.578




Epochs: 27819 | epoch avg. loss: 0.011 | test avg. loss: 4.571
Epochs: 27820 | epoch avg. loss: 0.013 | test avg. loss: 4.683
Epochs: 27821 | epoch avg. loss: 0.011 | test avg. loss: 4.781


 56%|█████▌    | 27825/50000 [41:52<28:14, 13.09it/s]

Epochs: 27822 | epoch avg. loss: 0.032 | test avg. loss: 4.642
Epochs: 27823 | epoch avg. loss: 0.012 | test avg. loss: 4.600
Epochs: 27824 | epoch avg. loss: 0.012 | test avg. loss: 4.676


 56%|█████▌    | 27827/50000 [41:53<28:47, 12.84it/s]

Epochs: 27825 | epoch avg. loss: 0.044 | test avg. loss: 4.716
Epochs: 27826 | epoch avg. loss: 0.015 | test avg. loss: 4.715
Epochs: 27827 | epoch avg. loss: 0.028 | test avg. loss: 4.747




Epochs: 27828 | epoch avg. loss: 0.022 | test avg. loss: 4.623
Epochs: 27829 | epoch avg. loss: 0.008 | test avg. loss: 4.589
Epochs: 27830 | epoch avg. loss: 0.006 | test avg. loss: 4.599


 56%|█████▌    | 27835/50000 [41:53<25:40, 14.39it/s]

Epochs: 27831 | epoch avg. loss: 0.007 | test avg. loss: 4.603
Epochs: 27832 | epoch avg. loss: 0.011 | test avg. loss: 4.622
Epochs: 27833 | epoch avg. loss: 0.006 | test avg. loss: 4.569
Epochs: 27834 | epoch avg. loss: 0.018 | test avg. loss: 4.596


 56%|█████▌    | 27837/50000 [41:53<25:35, 14.43it/s]

Epochs: 27835 | epoch avg. loss: 0.011 | test avg. loss: 4.698
Epochs: 27836 | epoch avg. loss: 0.015 | test avg. loss: 4.665
Epochs: 27837 | epoch avg. loss: 0.013 | test avg. loss: 4.639


 56%|█████▌    | 27841/50000 [41:53<27:14, 13.56it/s]

Epochs: 27838 | epoch avg. loss: 0.011 | test avg. loss: 4.672
Epochs: 27839 | epoch avg. loss: 0.031 | test avg. loss: 4.584
Epochs: 27840 | epoch avg. loss: 0.010 | test avg. loss: 4.562


 56%|█████▌    | 27843/50000 [41:54<27:46, 13.30it/s]

Epochs: 27841 | epoch avg. loss: 0.013 | test avg. loss: 4.707
Epochs: 27842 | epoch avg. loss: 0.043 | test avg. loss: 4.598
Epochs: 27843 | epoch avg. loss: 0.022 | test avg. loss: 4.621


 56%|█████▌    | 27847/50000 [41:54<26:29, 13.94it/s]

Epochs: 27844 | epoch avg. loss: 0.030 | test avg. loss: 4.824
Epochs: 27845 | epoch avg. loss: 0.074 | test avg. loss: 4.648
Epochs: 27846 | epoch avg. loss: 0.059 | test avg. loss: 4.626
Epochs: 27847 | epoch avg. loss: 0.041 | test avg. loss: 4.568


 56%|█████▌    | 27851/50000 [41:54<25:18, 14.59it/s]

Epochs: 27848 | epoch avg. loss: 0.011 | test avg. loss: 4.502
Epochs: 27849 | epoch avg. loss: 0.009 | test avg. loss: 4.584
Epochs: 27850 | epoch avg. loss: 0.011 | test avg. loss: 4.733
Epochs: 27851 | epoch avg. loss: 0.009 | test avg. loss: 4.728


 56%|█████▌    | 27855/50000 [41:54<27:34, 13.39it/s]

Epochs: 27852 | epoch avg. loss: 0.020 | test avg. loss: 4.681
Epochs: 27853 | epoch avg. loss: 0.008 | test avg. loss: 4.634
Epochs: 27854 | epoch avg. loss: 0.008 | test avg. loss: 4.614


 56%|█████▌    | 27857/50000 [41:55<26:56, 13.70it/s]

Epochs: 27855 | epoch avg. loss: 0.011 | test avg. loss: 4.692
Epochs: 27856 | epoch avg. loss: 0.014 | test avg. loss: 4.755
Epochs: 27857 | epoch avg. loss: 0.021 | test avg. loss: 4.621


 56%|█████▌    | 27861/50000 [41:55<26:29, 13.93it/s]

Epochs: 27858 | epoch avg. loss: 0.009 | test avg. loss: 4.572
Epochs: 27859 | epoch avg. loss: 0.027 | test avg. loss: 4.615
Epochs: 27860 | epoch avg. loss: 0.014 | test avg. loss: 4.758


 56%|█████▌    | 27865/50000 [41:55<25:42, 14.35it/s]

Epochs: 27861 | epoch avg. loss: 0.040 | test avg. loss: 4.686
Epochs: 27862 | epoch avg. loss: 0.035 | test avg. loss: 4.741
Epochs: 27863 | epoch avg. loss: 0.020 | test avg. loss: 4.894
Epochs: 27864 | epoch avg. loss: 0.059 | test avg. loss: 4.671




Epochs: 27865 | epoch avg. loss: 0.024 | test avg. loss: 4.632
Epochs: 27866 | epoch avg. loss: 0.012 | test avg. loss: 4.688
Epochs: 27867 | epoch avg. loss: 0.015 | test avg. loss: 4.690


 56%|█████▌    | 27871/50000 [41:56<28:02, 13.15it/s]

Epochs: 27868 | epoch avg. loss: 0.008 | test avg. loss: 4.672
Epochs: 27869 | epoch avg. loss: 0.038 | test avg. loss: 4.631
Epochs: 27870 | epoch avg. loss: 0.015 | test avg. loss: 4.608


 56%|█████▌    | 27873/50000 [41:56<27:11, 13.56it/s]

Epochs: 27871 | epoch avg. loss: 0.023 | test avg. loss: 4.544
Epochs: 27872 | epoch avg. loss: 0.010 | test avg. loss: 4.639
Epochs: 27873 | epoch avg. loss: 0.023 | test avg. loss: 4.760
Epochs: 27874 | epoch avg. loss: 0.023 | test avg. loss: 4.728


 56%|█████▌    | 27877/50000 [41:56<26:05, 14.13it/s]

Epochs: 27875 | epoch avg. loss: 0.017 | test avg. loss: 4.715
Epochs: 27876 | epoch avg. loss: 0.041 | test avg. loss: 4.731
Epochs: 27877 | epoch avg. loss: 0.008 | test avg. loss: 4.738


 56%|█████▌    | 27881/50000 [41:56<28:51, 12.78it/s]

Epochs: 27878 | epoch avg. loss: 0.016 | test avg. loss: 4.684
Epochs: 27879 | epoch avg. loss: 0.053 | test avg. loss: 4.605
Epochs: 27880 | epoch avg. loss: 0.039 | test avg. loss: 4.687


 56%|█████▌    | 27883/50000 [41:57<29:11, 12.63it/s]

Epochs: 27881 | epoch avg. loss: 0.096 | test avg. loss: 4.382
Epochs: 27882 | epoch avg. loss: 0.131 | test avg. loss: 4.564
Epochs: 27883 | epoch avg. loss: 0.079 | test avg. loss: 4.790


 56%|█████▌    | 27887/50000 [41:57<29:38, 12.43it/s]

Epochs: 27884 | epoch avg. loss: 0.107 | test avg. loss: 4.608
Epochs: 27885 | epoch avg. loss: 0.029 | test avg. loss: 4.418
Epochs: 27886 | epoch avg. loss: 0.098 | test avg. loss: 4.394


 56%|█████▌    | 27889/50000 [41:57<28:46, 12.80it/s]

Epochs: 27887 | epoch avg. loss: 0.147 | test avg. loss: 4.971
Epochs: 27888 | epoch avg. loss: 0.208 | test avg. loss: 4.874
Epochs: 27889 | epoch avg. loss: 0.161 | test avg. loss: 5.100


 56%|█████▌    | 27893/50000 [41:57<26:46, 13.76it/s]

Epochs: 27890 | epoch avg. loss: 0.090 | test avg. loss: 5.057
Epochs: 27891 | epoch avg. loss: 0.045 | test avg. loss: 4.512
Epochs: 27892 | epoch avg. loss: 0.032 | test avg. loss: 4.411


 56%|█████▌    | 27895/50000 [41:58<28:54, 12.75it/s]

Epochs: 27893 | epoch avg. loss: 0.062 | test avg. loss: 4.643
Epochs: 27894 | epoch avg. loss: 0.020 | test avg. loss: 4.926
Epochs: 27895 | epoch avg. loss: 0.077 | test avg. loss: 5.023


 56%|█████▌    | 27899/50000 [41:58<31:08, 11.83it/s]

Epochs: 27896 | epoch avg. loss: 0.029 | test avg. loss: 4.892
Epochs: 27897 | epoch avg. loss: 0.023 | test avg. loss: 4.711
Epochs: 27898 | epoch avg. loss: 0.059 | test avg. loss: 4.778


 56%|█████▌    | 27899/50000 [41:58<31:08, 11.83it/s]

Epochs: 27899 | epoch avg. loss: 0.022 | test avg. loss: 5.154


 56%|█████▌    | 27903/50000 [42:00<1:43:23,  3.56it/s]

Epochs: 27900 | epoch avg. loss: 0.079 | test avg. loss: 5.067
Epochs: 27901 | epoch avg. loss: 0.029 | test avg. loss: 4.812
Epochs: 27902 | epoch avg. loss: 0.027 | test avg. loss: 4.714


 56%|█████▌    | 27905/50000 [42:00<1:21:30,  4.52it/s]

Epochs: 27903 | epoch avg. loss: 0.015 | test avg. loss: 4.769
Epochs: 27904 | epoch avg. loss: 0.024 | test avg. loss: 4.723
Epochs: 27905 | epoch avg. loss: 0.047 | test avg. loss: 4.699


 56%|█████▌    | 27909/50000 [42:01<57:50,  6.37it/s]  

Epochs: 27906 | epoch avg. loss: 0.029 | test avg. loss: 4.840
Epochs: 27907 | epoch avg. loss: 0.083 | test avg. loss: 4.819
Epochs: 27908 | epoch avg. loss: 0.021 | test avg. loss: 4.804


 56%|█████▌    | 27911/50000 [42:01<50:39,  7.27it/s]

Epochs: 27909 | epoch avg. loss: 0.038 | test avg. loss: 4.742
Epochs: 27910 | epoch avg. loss: 0.014 | test avg. loss: 4.680
Epochs: 27911 | epoch avg. loss: 0.015 | test avg. loss: 4.685


 56%|█████▌    | 27915/50000 [42:01<41:10,  8.94it/s]

Epochs: 27912 | epoch avg. loss: 0.009 | test avg. loss: 4.794
Epochs: 27913 | epoch avg. loss: 0.009 | test avg. loss: 4.885
Epochs: 27914 | epoch avg. loss: 0.007 | test avg. loss: 4.847


 56%|█████▌    | 27917/50000 [42:01<37:49,  9.73it/s]

Epochs: 27915 | epoch avg. loss: 0.006 | test avg. loss: 4.784
Epochs: 27916 | epoch avg. loss: 0.007 | test avg. loss: 4.764
Epochs: 27917 | epoch avg. loss: 0.007 | test avg. loss: 4.740


 56%|█████▌    | 27921/50000 [42:02<34:59, 10.52it/s]

Epochs: 27918 | epoch avg. loss: 0.006 | test avg. loss: 4.792
Epochs: 27919 | epoch avg. loss: 0.008 | test avg. loss: 4.871
Epochs: 27920 | epoch avg. loss: 0.022 | test avg. loss: 4.768


 56%|█████▌    | 27923/50000 [42:02<33:49, 10.88it/s]

Epochs: 27921 | epoch avg. loss: 0.038 | test avg. loss: 4.805
Epochs: 27922 | epoch avg. loss: 0.173 | test avg. loss: 4.787
Epochs: 27923 | epoch avg. loss: 0.059 | test avg. loss: 4.942


 56%|█████▌    | 27927/50000 [42:02<31:29, 11.68it/s]

Epochs: 27924 | epoch avg. loss: 0.129 | test avg. loss: 4.567
Epochs: 27925 | epoch avg. loss: 0.079 | test avg. loss: 4.749
Epochs: 27926 | epoch avg. loss: 0.041 | test avg. loss: 5.213


 56%|█████▌    | 27929/50000 [42:02<30:01, 12.25it/s]

Epochs: 27927 | epoch avg. loss: 0.123 | test avg. loss: 5.060
Epochs: 27928 | epoch avg. loss: 0.056 | test avg. loss: 4.828
Epochs: 27929 | epoch avg. loss: 0.085 | test avg. loss: 4.830


 56%|█████▌    | 27933/50000 [42:03<29:10, 12.61it/s]

Epochs: 27930 | epoch avg. loss: 0.165 | test avg. loss: 4.549
Epochs: 27931 | epoch avg. loss: 0.041 | test avg. loss: 4.561
Epochs: 27932 | epoch avg. loss: 0.027 | test avg. loss: 4.916


 56%|█████▌    | 27935/50000 [42:03<28:19, 12.98it/s]

Epochs: 27933 | epoch avg. loss: 0.083 | test avg. loss: 5.094
Epochs: 27934 | epoch avg. loss: 0.054 | test avg. loss: 4.819
Epochs: 27935 | epoch avg. loss: 0.200 | test avg. loss: 4.662




Epochs: 27936 | epoch avg. loss: 0.100 | test avg. loss: 5.085
Epochs: 27937 | epoch avg. loss: 0.337 | test avg. loss: 4.778
Epochs: 27938 | epoch avg. loss: 0.076 | test avg. loss: 4.827


 56%|█████▌    | 27941/50000 [42:03<26:48, 13.71it/s]

Epochs: 27939 | epoch avg. loss: 0.143 | test avg. loss: 4.839
Epochs: 27940 | epoch avg. loss: 0.140 | test avg. loss: 4.726
Epochs: 27941 | epoch avg. loss: 0.041 | test avg. loss: 4.733


 56%|█████▌    | 27945/50000 [42:03<29:48, 12.33it/s]

Epochs: 27942 | epoch avg. loss: 0.015 | test avg. loss: 4.815
Epochs: 27943 | epoch avg. loss: 0.014 | test avg. loss: 4.828
Epochs: 27944 | epoch avg. loss: 0.028 | test avg. loss: 4.779


 56%|█████▌    | 27947/50000 [42:04<32:01, 11.48it/s]

Epochs: 27945 | epoch avg. loss: 0.013 | test avg. loss: 4.734
Epochs: 27946 | epoch avg. loss: 0.020 | test avg. loss: 4.614
Epochs: 27947 | epoch avg. loss: 0.016 | test avg. loss: 4.630


 56%|█████▌    | 27951/50000 [42:04<30:27, 12.06it/s]

Epochs: 27948 | epoch avg. loss: 0.024 | test avg. loss: 4.768
Epochs: 27949 | epoch avg. loss: 0.051 | test avg. loss: 4.687
Epochs: 27950 | epoch avg. loss: 0.067 | test avg. loss: 4.735


 56%|█████▌    | 27953/50000 [42:04<30:10, 12.18it/s]

Epochs: 27951 | epoch avg. loss: 0.112 | test avg. loss: 4.903
Epochs: 27952 | epoch avg. loss: 0.078 | test avg. loss: 4.903
Epochs: 27953 | epoch avg. loss: 0.036 | test avg. loss: 4.739


 56%|█████▌    | 27955/50000 [42:05<32:23, 11.34it/s]

Epochs: 27954 | epoch avg. loss: 0.137 | test avg. loss: 4.746
Epochs: 27955 | epoch avg. loss: 0.068 | test avg. loss: 4.961
Epochs: 27956 | epoch avg. loss: 0.080 | test avg. loss: 5.080


 56%|█████▌    | 27959/50000 [42:05<33:28, 10.97it/s]

Epochs: 27957 | epoch avg. loss: 0.311 | test avg. loss: 5.039
Epochs: 27958 | epoch avg. loss: 0.163 | test avg. loss: 5.046
Epochs: 27959 | epoch avg. loss: 0.186 | test avg. loss: 4.371


 56%|█████▌    | 27963/50000 [42:05<30:48, 11.92it/s]

Epochs: 27960 | epoch avg. loss: 0.220 | test avg. loss: 4.362
Epochs: 27961 | epoch avg. loss: 0.106 | test avg. loss: 4.889
Epochs: 27962 | epoch avg. loss: 0.181 | test avg. loss: 4.823


 56%|█████▌    | 27965/50000 [42:05<29:21, 12.51it/s]

Epochs: 27963 | epoch avg. loss: 0.212 | test avg. loss: 4.759
Epochs: 27964 | epoch avg. loss: 0.131 | test avg. loss: 4.920
Epochs: 27965 | epoch avg. loss: 0.504 | test avg. loss: 4.487


 56%|█████▌    | 27969/50000 [42:06<32:38, 11.25it/s]

Epochs: 27966 | epoch avg. loss: 0.323 | test avg. loss: 4.897
Epochs: 27967 | epoch avg. loss: 0.473 | test avg. loss: 5.587
Epochs: 27968 | epoch avg. loss: 0.938 | test avg. loss: 4.045


 56%|█████▌    | 27971/50000 [42:06<31:40, 11.59it/s]

Epochs: 27969 | epoch avg. loss: 0.204 | test avg. loss: 4.148
Epochs: 27970 | epoch avg. loss: 0.424 | test avg. loss: 4.209
Epochs: 27971 | epoch avg. loss: 0.114 | test avg. loss: 4.408


 56%|█████▌    | 27975/50000 [42:06<32:03, 11.45it/s]

Epochs: 27972 | epoch avg. loss: 0.113 | test avg. loss: 4.506
Epochs: 27973 | epoch avg. loss: 0.077 | test avg. loss: 4.552
Epochs: 27974 | epoch avg. loss: 0.093 | test avg. loss: 4.227


 56%|█████▌    | 27977/50000 [42:06<31:41, 11.58it/s]

Epochs: 27975 | epoch avg. loss: 0.056 | test avg. loss: 4.271
Epochs: 27976 | epoch avg. loss: 0.028 | test avg. loss: 4.468
Epochs: 27977 | epoch avg. loss: 0.027 | test avg. loss: 4.606


 56%|█████▌    | 27981/50000 [42:07<32:07, 11.42it/s]

Epochs: 27978 | epoch avg. loss: 0.031 | test avg. loss: 4.580
Epochs: 27979 | epoch avg. loss: 0.020 | test avg. loss: 4.448
Epochs: 27980 | epoch avg. loss: 0.015 | test avg. loss: 4.301


 56%|█████▌    | 27983/50000 [42:07<33:05, 11.09it/s]

Epochs: 27981 | epoch avg. loss: 0.013 | test avg. loss: 4.331
Epochs: 27982 | epoch avg. loss: 0.014 | test avg. loss: 4.370


 56%|█████▌    | 27985/50000 [42:07<32:37, 11.25it/s]

Epochs: 27983 | epoch avg. loss: 0.015 | test avg. loss: 4.447
Epochs: 27984 | epoch avg. loss: 0.022 | test avg. loss: 4.598
Epochs: 27985 | epoch avg. loss: 0.068 | test avg. loss: 4.281


 56%|█████▌    | 27989/50000 [42:07<29:24, 12.48it/s]

Epochs: 27986 | epoch avg. loss: 0.087 | test avg. loss: 4.175
Epochs: 27987 | epoch avg. loss: 0.065 | test avg. loss: 4.369
Epochs: 27988 | epoch avg. loss: 0.094 | test avg. loss: 4.171


 56%|█████▌    | 27991/50000 [42:08<30:10, 12.16it/s]

Epochs: 27989 | epoch avg. loss: 0.052 | test avg. loss: 4.232
Epochs: 27990 | epoch avg. loss: 0.061 | test avg. loss: 4.370
Epochs: 27991 | epoch avg. loss: 0.085 | test avg. loss: 4.247


 56%|█████▌    | 27995/50000 [42:08<28:07, 13.04it/s]

Epochs: 27992 | epoch avg. loss: 0.030 | test avg. loss: 4.079
Epochs: 27993 | epoch avg. loss: 0.032 | test avg. loss: 4.153
Epochs: 27994 | epoch avg. loss: 0.018 | test avg. loss: 4.217


 56%|█████▌    | 27997/50000 [42:08<26:56, 13.61it/s]

Epochs: 27995 | epoch avg. loss: 0.020 | test avg. loss: 4.294
Epochs: 27996 | epoch avg. loss: 0.038 | test avg. loss: 4.424
Epochs: 27997 | epoch avg. loss: 0.029 | test avg. loss: 4.391


 56%|█████▌    | 27999/50000 [42:08<27:11, 13.49it/s]

Epochs: 27998 | epoch avg. loss: 0.014 | test avg. loss: 4.325
Epochs: 27999 | epoch avg. loss: 0.036 | test avg. loss: 4.361


 56%|█████▌    | 28003/50000 [42:10<1:23:27,  4.39it/s]

Epochs: 28000 | epoch avg. loss: 0.014 | test avg. loss: 4.295
Epochs: 28001 | epoch avg. loss: 0.007 | test avg. loss: 4.321
Epochs: 28002 | epoch avg. loss: 0.020 | test avg. loss: 4.331


 56%|█████▌    | 28007/50000 [42:10<53:07,  6.90it/s]  

Epochs: 28003 | epoch avg. loss: 0.013 | test avg. loss: 4.241
Epochs: 28004 | epoch avg. loss: 0.028 | test avg. loss: 4.249
Epochs: 28005 | epoch avg. loss: 0.006 | test avg. loss: 4.260
Epochs: 28006 | epoch avg. loss: 0.006 | test avg. loss: 4.245


 56%|█████▌    | 28011/50000 [42:10<38:25,  9.54it/s]

Epochs: 28007 | epoch avg. loss: 0.007 | test avg. loss: 4.267
Epochs: 28008 | epoch avg. loss: 0.005 | test avg. loss: 4.263
Epochs: 28009 | epoch avg. loss: 0.010 | test avg. loss: 4.319
Epochs: 28010 | epoch avg. loss: 0.016 | test avg. loss: 4.320


 56%|█████▌    | 28013/50000 [42:11<36:06, 10.15it/s]

Epochs: 28011 | epoch avg. loss: 0.015 | test avg. loss: 4.284
Epochs: 28012 | epoch avg. loss: 0.053 | test avg. loss: 4.345
Epochs: 28013 | epoch avg. loss: 0.064 | test avg. loss: 4.291


 56%|█████▌    | 28017/50000 [42:11<30:06, 12.17it/s]

Epochs: 28014 | epoch avg. loss: 0.039 | test avg. loss: 4.147
Epochs: 28015 | epoch avg. loss: 0.106 | test avg. loss: 4.225
Epochs: 28016 | epoch avg. loss: 0.047 | test avg. loss: 4.228


 56%|█████▌    | 28019/50000 [42:11<28:40, 12.78it/s]

Epochs: 28017 | epoch avg. loss: 0.029 | test avg. loss: 4.147
Epochs: 28018 | epoch avg. loss: 0.038 | test avg. loss: 4.281
Epochs: 28019 | epoch avg. loss: 0.072 | test avg. loss: 4.085
Epochs: 28020 | epoch avg. loss: 0.019 | test avg. loss: 4.121




Epochs: 28021 | epoch avg. loss: 0.024 | test avg. loss: 4.408
Epochs: 28022 | epoch avg. loss: 0.104 | test avg. loss: 4.368
Epochs: 28023 | epoch avg. loss: 0.055 | test avg. loss: 4.444


 56%|█████▌    | 28027/50000 [42:11<28:17, 12.94it/s]

Epochs: 28024 | epoch avg. loss: 0.041 | test avg. loss: 4.481
Epochs: 28025 | epoch avg. loss: 0.035 | test avg. loss: 4.297
Epochs: 28026 | epoch avg. loss: 0.072 | test avg. loss: 4.288




Epochs: 28027 | epoch avg. loss: 0.020 | test avg. loss: 4.464
Epochs: 28028 | epoch avg. loss: 0.050 | test avg. loss: 4.374
Epochs: 28029 | epoch avg. loss: 0.085 | test avg. loss: 4.462


 56%|█████▌    | 28033/50000 [42:12<27:07, 13.49it/s]

Epochs: 28030 | epoch avg. loss: 0.032 | test avg. loss: 4.573
Epochs: 28031 | epoch avg. loss: 0.023 | test avg. loss: 4.394
Epochs: 28032 | epoch avg. loss: 0.017 | test avg. loss: 4.368


 56%|█████▌    | 28035/50000 [42:12<26:12, 13.97it/s]

Epochs: 28033 | epoch avg. loss: 0.013 | test avg. loss: 4.383
Epochs: 28034 | epoch avg. loss: 0.010 | test avg. loss: 4.398
Epochs: 28035 | epoch avg. loss: 0.010 | test avg. loss: 4.452
Epochs: 28036 | epoch avg. loss: 0.023 | test avg. loss: 4.268


 56%|█████▌    | 28039/50000 [42:12<25:08, 14.56it/s]

Epochs: 28037 | epoch avg. loss: 0.010 | test avg. loss: 4.174
Epochs: 28038 | epoch avg. loss: 0.011 | test avg. loss: 4.194
Epochs: 28039 | epoch avg. loss: 0.011 | test avg. loss: 4.187


 56%|█████▌    | 28043/50000 [42:13<28:41, 12.75it/s]

Epochs: 28040 | epoch avg. loss: 0.033 | test avg. loss: 4.276
Epochs: 28041 | epoch avg. loss: 0.017 | test avg. loss: 4.415
Epochs: 28042 | epoch avg. loss: 0.023 | test avg. loss: 4.377


 56%|█████▌    | 28047/50000 [42:13<26:35, 13.76it/s]

Epochs: 28043 | epoch avg. loss: 0.049 | test avg. loss: 4.455
Epochs: 28044 | epoch avg. loss: 0.031 | test avg. loss: 4.603
Epochs: 28045 | epoch avg. loss: 0.047 | test avg. loss: 4.435
Epochs: 28046 | epoch avg. loss: 0.056 | test avg. loss: 4.465


 56%|█████▌    | 28051/50000 [42:13<25:22, 14.42it/s]

Epochs: 28047 | epoch avg. loss: 0.069 | test avg. loss: 4.404
Epochs: 28048 | epoch avg. loss: 0.039 | test avg. loss: 4.241
Epochs: 28049 | epoch avg. loss: 0.061 | test avg. loss: 4.290
Epochs: 28050 | epoch avg. loss: 0.078 | test avg. loss: 4.283


 56%|█████▌    | 28053/50000 [42:13<25:09, 14.54it/s]

Epochs: 28051 | epoch avg. loss: 0.040 | test avg. loss: 4.406
Epochs: 28052 | epoch avg. loss: 0.132 | test avg. loss: 4.508
Epochs: 28053 | epoch avg. loss: 0.020 | test avg. loss: 4.539


 56%|█████▌    | 28057/50000 [42:14<25:31, 14.32it/s]

Epochs: 28054 | epoch avg. loss: 0.023 | test avg. loss: 4.397
Epochs: 28055 | epoch avg. loss: 0.034 | test avg. loss: 4.407
Epochs: 28056 | epoch avg. loss: 0.056 | test avg. loss: 4.276
Epochs: 28057 | epoch avg. loss: 0.016 | test avg. loss: 4.259


 56%|█████▌    | 28061/50000 [42:14<24:38, 14.83it/s]

Epochs: 28058 | epoch avg. loss: 0.014 | test avg. loss: 4.413
Epochs: 28059 | epoch avg. loss: 0.045 | test avg. loss: 4.294
Epochs: 28060 | epoch avg. loss: 0.026 | test avg. loss: 4.217
Epochs: 28061 | epoch avg. loss: 0.020 | test avg. loss: 4.304


 56%|█████▌    | 28065/50000 [42:14<26:11, 13.96it/s]

Epochs: 28062 | epoch avg. loss: 0.037 | test avg. loss: 4.203
Epochs: 28063 | epoch avg. loss: 0.016 | test avg. loss: 4.231
Epochs: 28064 | epoch avg. loss: 0.052 | test avg. loss: 4.296


 56%|█████▌    | 28067/50000 [42:14<26:13, 13.94it/s]

Epochs: 28065 | epoch avg. loss: 0.025 | test avg. loss: 4.360
Epochs: 28066 | epoch avg. loss: 0.016 | test avg. loss: 4.377
Epochs: 28067 | epoch avg. loss: 0.037 | test avg. loss: 4.556


 56%|█████▌    | 28071/50000 [42:15<26:32, 13.77it/s]

Epochs: 28068 | epoch avg. loss: 0.062 | test avg. loss: 4.516
Epochs: 28069 | epoch avg. loss: 0.024 | test avg. loss: 4.349
Epochs: 28070 | epoch avg. loss: 0.047 | test avg. loss: 4.341


 56%|█████▌    | 28073/50000 [42:15<25:48, 14.16it/s]

Epochs: 28071 | epoch avg. loss: 0.029 | test avg. loss: 4.384
Epochs: 28072 | epoch avg. loss: 0.031 | test avg. loss: 4.358
Epochs: 28073 | epoch avg. loss: 0.029 | test avg. loss: 4.389


 56%|█████▌    | 28077/50000 [42:15<26:15, 13.91it/s]

Epochs: 28074 | epoch avg. loss: 0.017 | test avg. loss: 4.479
Epochs: 28075 | epoch avg. loss: 0.056 | test avg. loss: 4.228
Epochs: 28076 | epoch avg. loss: 0.038 | test avg. loss: 4.176


 56%|█████▌    | 28079/50000 [42:15<28:41, 12.73it/s]

Epochs: 28077 | epoch avg. loss: 0.050 | test avg. loss: 4.318
Epochs: 28078 | epoch avg. loss: 0.064 | test avg. loss: 4.316
Epochs: 28079 | epoch avg. loss: 0.025 | test avg. loss: 4.327


 56%|█████▌    | 28083/50000 [42:16<28:36, 12.77it/s]

Epochs: 28080 | epoch avg. loss: 0.087 | test avg. loss: 4.269
Epochs: 28081 | epoch avg. loss: 0.037 | test avg. loss: 4.238
Epochs: 28082 | epoch avg. loss: 0.041 | test avg. loss: 4.223


 56%|█████▌    | 28085/50000 [42:16<27:59, 13.05it/s]

Epochs: 28083 | epoch avg. loss: 0.111 | test avg. loss: 4.303
Epochs: 28084 | epoch avg. loss: 0.123 | test avg. loss: 4.517
Epochs: 28085 | epoch avg. loss: 0.091 | test avg. loss: 4.504


 56%|█████▌    | 28089/50000 [42:16<27:23, 13.33it/s]

Epochs: 28086 | epoch avg. loss: 0.124 | test avg. loss: 4.471
Epochs: 28087 | epoch avg. loss: 0.071 | test avg. loss: 4.475
Epochs: 28088 | epoch avg. loss: 0.044 | test avg. loss: 4.373


 56%|█████▌    | 28091/50000 [42:16<26:37, 13.72it/s]

Epochs: 28089 | epoch avg. loss: 0.047 | test avg. loss: 4.538
Epochs: 28090 | epoch avg. loss: 0.024 | test avg. loss: 4.596
Epochs: 28091 | epoch avg. loss: 0.015 | test avg. loss: 4.609


 56%|█████▌    | 28095/50000 [42:16<28:18, 12.90it/s]

Epochs: 28092 | epoch avg. loss: 0.049 | test avg. loss: 4.619
Epochs: 28093 | epoch avg. loss: 0.041 | test avg. loss: 4.440
Epochs: 28094 | epoch avg. loss: 0.053 | test avg. loss: 4.510


 56%|█████▌    | 28097/50000 [42:17<29:20, 12.44it/s]

Epochs: 28095 | epoch avg. loss: 0.216 | test avg. loss: 4.685
Epochs: 28096 | epoch avg. loss: 0.123 | test avg. loss: 4.841
Epochs: 28097 | epoch avg. loss: 0.086 | test avg. loss: 4.566


 56%|█████▌    | 28099/50000 [42:17<30:17, 12.05it/s]

Epochs: 28098 | epoch avg. loss: 0.047 | test avg. loss: 4.666
Epochs: 28099 | epoch avg. loss: 0.126 | test avg. loss: 4.519


 56%|█████▌    | 28103/50000 [42:19<1:40:03,  3.65it/s]

Epochs: 28100 | epoch avg. loss: 0.033 | test avg. loss: 4.682
Epochs: 28101 | epoch avg. loss: 0.018 | test avg. loss: 4.878
Epochs: 28102 | epoch avg. loss: 0.020 | test avg. loss: 4.810


 56%|█████▌    | 28105/50000 [42:19<1:19:43,  4.58it/s]

Epochs: 28103 | epoch avg. loss: 0.031 | test avg. loss: 4.612
Epochs: 28104 | epoch avg. loss: 0.023 | test avg. loss: 4.459
Epochs: 28105 | epoch avg. loss: 0.027 | test avg. loss: 4.291


 56%|█████▌    | 28107/50000 [42:20<1:05:32,  5.57it/s]

Epochs: 28106 | epoch avg. loss: 0.018 | test avg. loss: 4.182
Epochs: 28107 | epoch avg. loss: 0.019 | test avg. loss: 4.238
Epochs: 28108 | epoch avg. loss: 0.011 | test avg. loss: 4.334


 56%|█████▌    | 28111/50000 [42:20<48:09,  7.58it/s]

Epochs: 28109 | epoch avg. loss: 0.011 | test avg. loss: 4.302
Epochs: 28110 | epoch avg. loss: 0.027 | test avg. loss: 4.375
Epochs: 28111 | epoch avg. loss: 0.012 | test avg. loss: 4.390


 56%|█████▌    | 28115/50000 [42:20<39:14,  9.30it/s]

Epochs: 28112 | epoch avg. loss: 0.022 | test avg. loss: 4.382
Epochs: 28113 | epoch avg. loss: 0.033 | test avg. loss: 4.545
Epochs: 28114 | epoch avg. loss: 0.070 | test avg. loss: 4.341


 56%|█████▌    | 28117/50000 [42:20<36:51,  9.89it/s]

Epochs: 28115 | epoch avg. loss: 0.039 | test avg. loss: 4.224
Epochs: 28116 | epoch avg. loss: 0.029 | test avg. loss: 4.233
Epochs: 28117 | epoch avg. loss: 0.084 | test avg. loss: 4.127


 56%|█████▌    | 28121/50000 [42:21<33:44, 10.81it/s]

Epochs: 28118 | epoch avg. loss: 0.044 | test avg. loss: 4.151
Epochs: 28119 | epoch avg. loss: 0.165 | test avg. loss: 4.299
Epochs: 28120 | epoch avg. loss: 0.090 | test avg. loss: 4.337


 56%|█████▌    | 28123/50000 [42:21<32:09, 11.34it/s]

Epochs: 28121 | epoch avg. loss: 0.052 | test avg. loss: 4.243
Epochs: 28122 | epoch avg. loss: 0.126 | test avg. loss: 4.326
Epochs: 28123 | epoch avg. loss: 0.053 | test avg. loss: 4.364


 56%|█████▋    | 28127/50000 [42:21<28:47, 12.66it/s]

Epochs: 28124 | epoch avg. loss: 0.058 | test avg. loss: 4.276
Epochs: 28125 | epoch avg. loss: 0.182 | test avg. loss: 4.211
Epochs: 28126 | epoch avg. loss: 0.157 | test avg. loss: 4.157
Epochs: 28127 | epoch avg. loss: 0.070 | test avg. loss: 4.307


 56%|█████▋    | 28131/50000 [42:21<26:45, 13.62it/s]

Epochs: 28128 | epoch avg. loss: 0.228 | test avg. loss: 4.338
Epochs: 28129 | epoch avg. loss: 0.186 | test avg. loss: 4.002
Epochs: 28130 | epoch avg. loss: 0.089 | test avg. loss: 3.986


 56%|█████▋    | 28133/50000 [42:21<27:34, 13.22it/s]

Epochs: 28131 | epoch avg. loss: 0.098 | test avg. loss: 4.517
Epochs: 28132 | epoch avg. loss: 0.350 | test avg. loss: 4.455
Epochs: 28133 | epoch avg. loss: 0.119 | test avg. loss: 4.812


 56%|█████▋    | 28137/50000 [42:22<29:03, 12.54it/s]

Epochs: 28134 | epoch avg. loss: 0.240 | test avg. loss: 4.892
Epochs: 28135 | epoch avg. loss: 0.259 | test avg. loss: 4.456
Epochs: 28136 | epoch avg. loss: 0.126 | test avg. loss: 4.350


 56%|█████▋    | 28139/50000 [42:22<28:43, 12.68it/s]

Epochs: 28137 | epoch avg. loss: 0.130 | test avg. loss: 4.860
Epochs: 28138 | epoch avg. loss: 0.380 | test avg. loss: 4.104
Epochs: 28139 | epoch avg. loss: 0.189 | test avg. loss: 4.115


 56%|█████▋    | 28143/50000 [42:22<30:46, 11.84it/s]

Epochs: 28140 | epoch avg. loss: 0.175 | test avg. loss: 5.229
Epochs: 28141 | epoch avg. loss: 0.479 | test avg. loss: 4.913
Epochs: 28142 | epoch avg. loss: 0.699 | test avg. loss: 4.861


 56%|█████▋    | 28145/50000 [42:22<30:26, 11.96it/s]

Epochs: 28143 | epoch avg. loss: 0.174 | test avg. loss: 5.254
Epochs: 28144 | epoch avg. loss: 0.177 | test avg. loss: 4.742
Epochs: 28145 | epoch avg. loss: 0.213 | test avg. loss: 4.184


 56%|█████▋    | 28149/50000 [42:23<31:22, 11.61it/s]

Epochs: 28146 | epoch avg. loss: 0.089 | test avg. loss: 4.214
Epochs: 28147 | epoch avg. loss: 0.132 | test avg. loss: 4.241
Epochs: 28148 | epoch avg. loss: 0.288 | test avg. loss: 4.428


 56%|█████▋    | 28151/50000 [42:23<29:47, 12.22it/s]

Epochs: 28149 | epoch avg. loss: 0.054 | test avg. loss: 4.736
Epochs: 28150 | epoch avg. loss: 0.061 | test avg. loss: 4.594
Epochs: 28151 | epoch avg. loss: 0.036 | test avg. loss: 4.622


 56%|█████▋    | 28155/50000 [42:23<29:39, 12.28it/s]

Epochs: 28152 | epoch avg. loss: 0.025 | test avg. loss: 4.429
Epochs: 28153 | epoch avg. loss: 0.082 | test avg. loss: 4.471
Epochs: 28154 | epoch avg. loss: 0.075 | test avg. loss: 4.483


 56%|█████▋    | 28157/50000 [42:23<30:05, 12.10it/s]

Epochs: 28155 | epoch avg. loss: 0.038 | test avg. loss: 4.357
Epochs: 28156 | epoch avg. loss: 0.041 | test avg. loss: 4.454
Epochs: 28157 | epoch avg. loss: 0.039 | test avg. loss: 4.216


 56%|█████▋    | 28161/50000 [42:24<31:39, 11.50it/s]

Epochs: 28158 | epoch avg. loss: 0.025 | test avg. loss: 4.212
Epochs: 28159 | epoch avg. loss: 0.014 | test avg. loss: 4.352
Epochs: 28160 | epoch avg. loss: 0.017 | test avg. loss: 4.415


 56%|█████▋    | 28163/50000 [42:24<31:44, 11.47it/s]

Epochs: 28161 | epoch avg. loss: 0.028 | test avg. loss: 4.492
Epochs: 28162 | epoch avg. loss: 0.018 | test avg. loss: 4.651
Epochs: 28163 | epoch avg. loss: 0.050 | test avg. loss: 4.461


 56%|█████▋    | 28167/50000 [42:24<30:15, 12.03it/s]

Epochs: 28164 | epoch avg. loss: 0.017 | test avg. loss: 4.386
Epochs: 28165 | epoch avg. loss: 0.016 | test avg. loss: 4.482
Epochs: 28166 | epoch avg. loss: 0.028 | test avg. loss: 4.403


 56%|█████▋    | 28169/50000 [42:25<29:54, 12.16it/s]

Epochs: 28167 | epoch avg. loss: 0.021 | test avg. loss: 4.470
Epochs: 28168 | epoch avg. loss: 0.015 | test avg. loss: 4.580
Epochs: 28169 | epoch avg. loss: 0.025 | test avg. loss: 4.462


 56%|█████▋    | 28173/50000 [42:25<28:48, 12.63it/s]

Epochs: 28170 | epoch avg. loss: 0.068 | test avg. loss: 4.404
Epochs: 28171 | epoch avg. loss: 0.020 | test avg. loss: 4.346
Epochs: 28172 | epoch avg. loss: 0.029 | test avg. loss: 4.135


 56%|█████▋    | 28175/50000 [42:25<27:34, 13.19it/s]

Epochs: 28173 | epoch avg. loss: 0.038 | test avg. loss: 4.341
Epochs: 28174 | epoch avg. loss: 0.102 | test avg. loss: 4.303
Epochs: 28175 | epoch avg. loss: 0.042 | test avg. loss: 4.423


 56%|█████▋    | 28179/50000 [42:25<29:14, 12.44it/s]

Epochs: 28176 | epoch avg. loss: 0.055 | test avg. loss: 4.511
Epochs: 28177 | epoch avg. loss: 0.054 | test avg. loss: 4.269
Epochs: 28178 | epoch avg. loss: 0.097 | test avg. loss: 4.241


 56%|█████▋    | 28181/50000 [42:25<29:57, 12.14it/s]

Epochs: 28179 | epoch avg. loss: 0.021 | test avg. loss: 4.357
Epochs: 28180 | epoch avg. loss: 0.026 | test avg. loss: 4.358
Epochs: 28181 | epoch avg. loss: 0.012 | test avg. loss: 4.474


 56%|█████▋    | 28185/50000 [42:26<30:39, 11.86it/s]

Epochs: 28182 | epoch avg. loss: 0.018 | test avg. loss: 4.360
Epochs: 28183 | epoch avg. loss: 0.041 | test avg. loss: 4.376
Epochs: 28184 | epoch avg. loss: 0.016 | test avg. loss: 4.348




Epochs: 28185 | epoch avg. loss: 0.013 | test avg. loss: 4.239
Epochs: 28186 | epoch avg. loss: 0.027 | test avg. loss: 4.247
Epochs: 28187 | epoch avg. loss: 0.021 | test avg. loss: 4.242


 56%|█████▋    | 28191/50000 [42:26<27:10, 13.38it/s]

Epochs: 28188 | epoch avg. loss: 0.009 | test avg. loss: 4.349
Epochs: 28189 | epoch avg. loss: 0.005 | test avg. loss: 4.464
Epochs: 28190 | epoch avg. loss: 0.007 | test avg. loss: 4.495


 56%|█████▋    | 28193/50000 [42:26<26:56, 13.49it/s]

Epochs: 28191 | epoch avg. loss: 0.008 | test avg. loss: 4.463
Epochs: 28192 | epoch avg. loss: 0.005 | test avg. loss: 4.395
Epochs: 28193 | epoch avg. loss: 0.009 | test avg. loss: 4.341


 56%|█████▋    | 28197/50000 [42:27<27:59, 12.98it/s]

Epochs: 28194 | epoch avg. loss: 0.006 | test avg. loss: 4.304
Epochs: 28195 | epoch avg. loss: 0.009 | test avg. loss: 4.377
Epochs: 28196 | epoch avg. loss: 0.008 | test avg. loss: 4.457


 56%|█████▋    | 28199/50000 [42:27<27:39, 13.14it/s]

Epochs: 28197 | epoch avg. loss: 0.015 | test avg. loss: 4.401
Epochs: 28198 | epoch avg. loss: 0.010 | test avg. loss: 4.372
Epochs: 28199 | epoch avg. loss: 0.007 | test avg. loss: 4.396


 56%|█████▋    | 28203/50000 [42:29<1:25:59,  4.22it/s]

Epochs: 28200 | epoch avg. loss: 0.019 | test avg. loss: 4.355
Epochs: 28201 | epoch avg. loss: 0.007 | test avg. loss: 4.314
Epochs: 28202 | epoch avg. loss: 0.015 | test avg. loss: 4.409


 56%|█████▋    | 28207/50000 [42:29<54:07,  6.71it/s]  

Epochs: 28203 | epoch avg. loss: 0.016 | test avg. loss: 4.363
Epochs: 28204 | epoch avg. loss: 0.016 | test avg. loss: 4.354
Epochs: 28205 | epoch avg. loss: 0.010 | test avg. loss: 4.418
Epochs: 28206 | epoch avg. loss: 0.014 | test avg. loss: 4.377


 56%|█████▋    | 28211/50000 [42:29<38:02,  9.55it/s]

Epochs: 28207 | epoch avg. loss: 0.007 | test avg. loss: 4.405
Epochs: 28208 | epoch avg. loss: 0.006 | test avg. loss: 4.472
Epochs: 28209 | epoch avg. loss: 0.016 | test avg. loss: 4.369
Epochs: 28210 | epoch avg. loss: 0.005 | test avg. loss: 4.267


 56%|█████▋    | 28213/50000 [42:29<34:12, 10.61it/s]

Epochs: 28211 | epoch avg. loss: 0.007 | test avg. loss: 4.344
Epochs: 28212 | epoch avg. loss: 0.026 | test avg. loss: 4.229
Epochs: 28213 | epoch avg. loss: 0.028 | test avg. loss: 4.268


 56%|█████▋    | 28217/50000 [42:30<31:44, 11.44it/s]

Epochs: 28214 | epoch avg. loss: 0.015 | test avg. loss: 4.397
Epochs: 28215 | epoch avg. loss: 0.036 | test avg. loss: 4.256
Epochs: 28216 | epoch avg. loss: 0.027 | test avg. loss: 4.213


 56%|█████▋    | 28221/50000 [42:30<27:39, 13.13it/s]

Epochs: 28217 | epoch avg. loss: 0.016 | test avg. loss: 4.400
Epochs: 28218 | epoch avg. loss: 0.060 | test avg. loss: 4.231
Epochs: 28219 | epoch avg. loss: 0.048 | test avg. loss: 4.249
Epochs: 28220 | epoch avg. loss: 0.032 | test avg. loss: 4.464


 56%|█████▋    | 28225/50000 [42:30<25:25, 14.27it/s]

Epochs: 28221 | epoch avg. loss: 0.067 | test avg. loss: 4.288
Epochs: 28222 | epoch avg. loss: 0.018 | test avg. loss: 4.267
Epochs: 28223 | epoch avg. loss: 0.030 | test avg. loss: 4.253
Epochs: 28224 | epoch avg. loss: 0.038 | test avg. loss: 4.097


 56%|█████▋    | 28227/50000 [42:30<24:55, 14.56it/s]

Epochs: 28225 | epoch avg. loss: 0.028 | test avg. loss: 4.239
Epochs: 28226 | epoch avg. loss: 0.030 | test avg. loss: 4.460
Epochs: 28227 | epoch avg. loss: 0.025 | test avg. loss: 4.510


 56%|█████▋    | 28231/50000 [42:31<25:53, 14.01it/s]

Epochs: 28228 | epoch avg. loss: 0.046 | test avg. loss: 4.613
Epochs: 28229 | epoch avg. loss: 0.041 | test avg. loss: 4.488
Epochs: 28230 | epoch avg. loss: 0.030 | test avg. loss: 4.336


 56%|█████▋    | 28235/50000 [42:31<25:01, 14.49it/s]

Epochs: 28231 | epoch avg. loss: 0.079 | test avg. loss: 4.293
Epochs: 28232 | epoch avg. loss: 0.057 | test avg. loss: 4.294
Epochs: 28233 | epoch avg. loss: 0.037 | test avg. loss: 4.281
Epochs: 28234 | epoch avg. loss: 0.065 | test avg. loss: 4.454


 56%|█████▋    | 28239/50000 [42:31<24:00, 15.10it/s]

Epochs: 28235 | epoch avg. loss: 0.031 | test avg. loss: 4.802
Epochs: 28236 | epoch avg. loss: 0.097 | test avg. loss: 4.496
Epochs: 28237 | epoch avg. loss: 0.038 | test avg. loss: 4.447
Epochs: 28238 | epoch avg. loss: 0.017 | test avg. loss: 4.487


 56%|█████▋    | 28241/50000 [42:31<24:21, 14.89it/s]

Epochs: 28239 | epoch avg. loss: 0.037 | test avg. loss: 4.447
Epochs: 28240 | epoch avg. loss: 0.009 | test avg. loss: 4.506
Epochs: 28241 | epoch avg. loss: 0.010 | test avg. loss: 4.583


 56%|█████▋    | 28245/50000 [42:31<26:38, 13.61it/s]

Epochs: 28242 | epoch avg. loss: 0.024 | test avg. loss: 4.407
Epochs: 28243 | epoch avg. loss: 0.022 | test avg. loss: 4.336
Epochs: 28244 | epoch avg. loss: 0.016 | test avg. loss: 4.425


 56%|█████▋    | 28247/50000 [42:32<26:48, 13.53it/s]

Epochs: 28245 | epoch avg. loss: 0.034 | test avg. loss: 4.321
Epochs: 28246 | epoch avg. loss: 0.082 | test avg. loss: 4.417
Epochs: 28247 | epoch avg. loss: 0.021 | test avg. loss: 4.573


 57%|█████▋    | 28251/50000 [42:32<26:38, 13.61it/s]

Epochs: 28248 | epoch avg. loss: 0.030 | test avg. loss: 4.471
Epochs: 28249 | epoch avg. loss: 0.079 | test avg. loss: 4.493
Epochs: 28250 | epoch avg. loss: 0.031 | test avg. loss: 4.463


 57%|█████▋    | 28253/50000 [42:32<26:06, 13.88it/s]

Epochs: 28251 | epoch avg. loss: 0.017 | test avg. loss: 4.376
Epochs: 28252 | epoch avg. loss: 0.015 | test avg. loss: 4.457
Epochs: 28253 | epoch avg. loss: 0.024 | test avg. loss: 4.415


 57%|█████▋    | 28257/50000 [42:32<25:44, 14.08it/s]

Epochs: 28254 | epoch avg. loss: 0.045 | test avg. loss: 4.465
Epochs: 28255 | epoch avg. loss: 0.028 | test avg. loss: 4.688
Epochs: 28256 | epoch avg. loss: 0.071 | test avg. loss: 4.492


 57%|█████▋    | 28259/50000 [42:33<25:52, 14.01it/s]

Epochs: 28257 | epoch avg. loss: 0.153 | test avg. loss: 4.413
Epochs: 28258 | epoch avg. loss: 0.036 | test avg. loss: 4.491
Epochs: 28259 | epoch avg. loss: 0.034 | test avg. loss: 4.354
Epochs: 28260 | epoch avg. loss: 0.058 | test avg. loss: 4.322


 57%|█████▋    | 28265/50000 [42:33<24:13, 14.95it/s]

Epochs: 28261 | epoch avg. loss: 0.013 | test avg. loss: 4.349
Epochs: 28262 | epoch avg. loss: 0.019 | test avg. loss: 4.336
Epochs: 28263 | epoch avg. loss: 0.016 | test avg. loss: 4.472
Epochs: 28264 | epoch avg. loss: 0.013 | test avg. loss: 4.529


 57%|█████▋    | 28269/50000 [42:33<23:39, 15.31it/s]

Epochs: 28265 | epoch avg. loss: 0.013 | test avg. loss: 4.374
Epochs: 28266 | epoch avg. loss: 0.024 | test avg. loss: 4.287
Epochs: 28267 | epoch avg. loss: 0.009 | test avg. loss: 4.217
Epochs: 28268 | epoch avg. loss: 0.007 | test avg. loss: 4.218


 57%|█████▋    | 28271/50000 [42:33<24:06, 15.02it/s]

Epochs: 28269 | epoch avg. loss: 0.008 | test avg. loss: 4.317
Epochs: 28270 | epoch avg. loss: 0.011 | test avg. loss: 4.311
Epochs: 28271 | epoch avg. loss: 0.007 | test avg. loss: 4.276


 57%|█████▋    | 28275/50000 [42:34<26:14, 13.80it/s]

Epochs: 28272 | epoch avg. loss: 0.006 | test avg. loss: 4.293
Epochs: 28273 | epoch avg. loss: 0.011 | test avg. loss: 4.242
Epochs: 28274 | epoch avg. loss: 0.015 | test avg. loss: 4.338


 57%|█████▋    | 28277/50000 [42:34<26:06, 13.87it/s]

Epochs: 28275 | epoch avg. loss: 0.011 | test avg. loss: 4.561
Epochs: 28276 | epoch avg. loss: 0.037 | test avg. loss: 4.413
Epochs: 28277 | epoch avg. loss: 0.031 | test avg. loss: 4.387




Epochs: 28278 | epoch avg. loss: 0.014 | test avg. loss: 4.476
Epochs: 28279 | epoch avg. loss: 0.036 | test avg. loss: 4.438
Epochs: 28280 | epoch avg. loss: 0.007 | test avg. loss: 4.424


 57%|█████▋    | 28285/50000 [42:34<24:29, 14.77it/s]

Epochs: 28281 | epoch avg. loss: 0.011 | test avg. loss: 4.410
Epochs: 28282 | epoch avg. loss: 0.005 | test avg. loss: 4.318
Epochs: 28283 | epoch avg. loss: 0.005 | test avg. loss: 4.298
Epochs: 28284 | epoch avg. loss: 0.004 | test avg. loss: 4.305


 57%|█████▋    | 28287/50000 [42:35<26:02, 13.90it/s]

Epochs: 28285 | epoch avg. loss: 0.007 | test avg. loss: 4.361
Epochs: 28286 | epoch avg. loss: 0.004 | test avg. loss: 4.393
Epochs: 28287 | epoch avg. loss: 0.005 | test avg. loss: 4.379


 57%|█████▋    | 28291/50000 [42:35<26:57, 13.42it/s]

Epochs: 28288 | epoch avg. loss: 0.004 | test avg. loss: 4.365
Epochs: 28289 | epoch avg. loss: 0.004 | test avg. loss: 4.316
Epochs: 28290 | epoch avg. loss: 0.010 | test avg. loss: 4.346


 57%|█████▋    | 28293/50000 [42:35<26:32, 13.63it/s]

Epochs: 28291 | epoch avg. loss: 0.005 | test avg. loss: 4.394
Epochs: 28292 | epoch avg. loss: 0.007 | test avg. loss: 4.335
Epochs: 28293 | epoch avg. loss: 0.027 | test avg. loss: 4.358


 57%|█████▋    | 28297/50000 [42:35<25:39, 14.10it/s]

Epochs: 28294 | epoch avg. loss: 0.012 | test avg. loss: 4.467
Epochs: 28295 | epoch avg. loss: 0.022 | test avg. loss: 4.446
Epochs: 28296 | epoch avg. loss: 0.007 | test avg. loss: 4.431
Epochs: 28297 | epoch avg. loss: 0.017 | test avg. loss: 4.499


 57%|█████▋    | 28299/50000 [42:35<25:23, 14.24it/s]

Epochs: 28298 | epoch avg. loss: 0.022 | test avg. loss: 4.469
Epochs: 28299 | epoch avg. loss: 0.008 | test avg. loss: 4.450


 57%|█████▋    | 28303/50000 [42:37<1:30:22,  4.00it/s]

Epochs: 28300 | epoch avg. loss: 0.010 | test avg. loss: 4.526
Epochs: 28301 | epoch avg. loss: 0.015 | test avg. loss: 4.402
Epochs: 28302 | epoch avg. loss: 0.032 | test avg. loss: 4.426


 57%|█████▋    | 28305/50000 [42:38<1:12:49,  4.97it/s]

Epochs: 28303 | epoch avg. loss: 0.050 | test avg. loss: 4.571
Epochs: 28304 | epoch avg. loss: 0.043 | test avg. loss: 4.577
Epochs: 28305 | epoch avg. loss: 0.030 | test avg. loss: 4.440


 57%|█████▋    | 28309/50000 [42:38<47:46,  7.57it/s]

Epochs: 28306 | epoch avg. loss: 0.034 | test avg. loss: 4.483
Epochs: 28307 | epoch avg. loss: 0.023 | test avg. loss: 4.586
Epochs: 28308 | epoch avg. loss: 0.066 | test avg. loss: 4.253
Epochs: 28309 | epoch avg. loss: 0.035 | test avg. loss: 4.137


 57%|█████▋    | 28313/50000 [42:38<37:05,  9.75it/s]

Epochs: 28310 | epoch avg. loss: 0.029 | test avg. loss: 4.318
Epochs: 28311 | epoch avg. loss: 0.062 | test avg. loss: 4.283
Epochs: 28312 | epoch avg. loss: 0.059 | test avg. loss: 4.483


 57%|█████▋    | 28317/50000 [42:38<29:59, 12.05it/s]

Epochs: 28313 | epoch avg. loss: 0.032 | test avg. loss: 4.685
Epochs: 28314 | epoch avg. loss: 0.048 | test avg. loss: 4.418
Epochs: 28315 | epoch avg. loss: 0.086 | test avg. loss: 4.342
Epochs: 28316 | epoch avg. loss: 0.031 | test avg. loss: 4.537


 57%|█████▋    | 28319/50000 [42:39<31:46, 11.37it/s]

Epochs: 28317 | epoch avg. loss: 0.080 | test avg. loss: 4.493
Epochs: 28318 | epoch avg. loss: 0.147 | test avg. loss: 4.631


 57%|█████▋    | 28321/50000 [42:39<32:36, 11.08it/s]

Epochs: 28319 | epoch avg. loss: 0.088 | test avg. loss: 4.797
Epochs: 28320 | epoch avg. loss: 0.081 | test avg. loss: 4.542
Epochs: 28321 | epoch avg. loss: 0.112 | test avg. loss: 4.674


 57%|█████▋    | 28325/50000 [42:39<31:34, 11.44it/s]

Epochs: 28322 | epoch avg. loss: 0.186 | test avg. loss: 4.838
Epochs: 28323 | epoch avg. loss: 0.197 | test avg. loss: 4.832
Epochs: 28324 | epoch avg. loss: 0.097 | test avg. loss: 4.793


 57%|█████▋    | 28327/50000 [42:39<31:18, 11.54it/s]

Epochs: 28325 | epoch avg. loss: 0.195 | test avg. loss: 4.899
Epochs: 28326 | epoch avg. loss: 0.164 | test avg. loss: 4.730
Epochs: 28327 | epoch avg. loss: 0.115 | test avg. loss: 4.828


 57%|█████▋    | 28329/50000 [42:40<30:39, 11.78it/s]

Epochs: 28328 | epoch avg. loss: 0.522 | test avg. loss: 4.427
Epochs: 28329 | epoch avg. loss: 0.262 | test avg. loss: 4.231
Epochs: 28330 | epoch avg. loss: 0.128 | test avg. loss: 4.216


 57%|█████▋    | 28333/50000 [42:40<30:53, 11.69it/s]

Epochs: 28331 | epoch avg. loss: 0.176 | test avg. loss: 4.346
Epochs: 28332 | epoch avg. loss: 0.130 | test avg. loss: 4.304
Epochs: 28333 | epoch avg. loss: 0.190 | test avg. loss: 4.401


 57%|█████▋    | 28337/50000 [42:40<27:39, 13.05it/s]

Epochs: 28334 | epoch avg. loss: 0.085 | test avg. loss: 4.608
Epochs: 28335 | epoch avg. loss: 0.091 | test avg. loss: 4.334
Epochs: 28336 | epoch avg. loss: 0.039 | test avg. loss: 4.353


 57%|█████▋    | 28339/50000 [42:40<28:05, 12.85it/s]

Epochs: 28337 | epoch avg. loss: 0.031 | test avg. loss: 4.296
Epochs: 28338 | epoch avg. loss: 0.026 | test avg. loss: 4.152
Epochs: 28339 | epoch avg. loss: 0.124 | test avg. loss: 4.099


 57%|█████▋    | 28343/50000 [42:40<29:15, 12.34it/s]

Epochs: 28340 | epoch avg. loss: 0.024 | test avg. loss: 4.245
Epochs: 28341 | epoch avg. loss: 0.044 | test avg. loss: 4.106
Epochs: 28342 | epoch avg. loss: 0.072 | test avg. loss: 4.103




Epochs: 28343 | epoch avg. loss: 0.035 | test avg. loss: 4.100
Epochs: 28344 | epoch avg. loss: 0.048 | test avg. loss: 4.171
Epochs: 28345 | epoch avg. loss: 0.204 | test avg. loss: 4.443


 57%|█████▋    | 28349/50000 [42:41<26:03, 13.84it/s]

Epochs: 28346 | epoch avg. loss: 0.253 | test avg. loss: 4.267
Epochs: 28347 | epoch avg. loss: 0.104 | test avg. loss: 4.265
Epochs: 28348 | epoch avg. loss: 0.249 | test avg. loss: 4.654
Epochs: 28349 | epoch avg. loss: 0.242 | test avg. loss: 4.300


 57%|█████▋    | 28353/50000 [42:41<24:40, 14.62it/s]

Epochs: 28350 | epoch avg. loss: 0.064 | test avg. loss: 3.927
Epochs: 28351 | epoch avg. loss: 0.067 | test avg. loss: 3.985
Epochs: 28352 | epoch avg. loss: 0.055 | test avg. loss: 4.231


 57%|█████▋    | 28355/50000 [42:41<25:33, 14.12it/s]

Epochs: 28353 | epoch avg. loss: 0.065 | test avg. loss: 4.138
Epochs: 28354 | epoch avg. loss: 0.026 | test avg. loss: 4.140
Epochs: 28355 | epoch avg. loss: 0.009 | test avg. loss: 4.158


 57%|█████▋    | 28359/50000 [42:42<28:15, 12.77it/s]

Epochs: 28356 | epoch avg. loss: 0.009 | test avg. loss: 4.206
Epochs: 28357 | epoch avg. loss: 0.010 | test avg. loss: 4.253
Epochs: 28358 | epoch avg. loss: 0.012 | test avg. loss: 4.258


 57%|█████▋    | 28363/50000 [42:42<25:11, 14.31it/s]

Epochs: 28359 | epoch avg. loss: 0.009 | test avg. loss: 4.185
Epochs: 28360 | epoch avg. loss: 0.009 | test avg. loss: 4.036
Epochs: 28361 | epoch avg. loss: 0.012 | test avg. loss: 4.021
Epochs: 28362 | epoch avg. loss: 0.006 | test avg. loss: 4.025


 57%|█████▋    | 28365/50000 [42:42<24:19, 14.83it/s]

Epochs: 28363 | epoch avg. loss: 0.006 | test avg. loss: 4.053
Epochs: 28364 | epoch avg. loss: 0.006 | test avg. loss: 4.114
Epochs: 28365 | epoch avg. loss: 0.012 | test avg. loss: 4.058


 57%|█████▋    | 28369/50000 [42:42<26:18, 13.70it/s]

Epochs: 28366 | epoch avg. loss: 0.014 | test avg. loss: 4.099
Epochs: 28367 | epoch avg. loss: 0.012 | test avg. loss: 4.162
Epochs: 28368 | epoch avg. loss: 0.026 | test avg. loss: 4.083


 57%|█████▋    | 28371/50000 [42:43<28:38, 12.58it/s]

Epochs: 28369 | epoch avg. loss: 0.028 | test avg. loss: 4.096
Epochs: 28370 | epoch avg. loss: 0.011 | test avg. loss: 4.131
Epochs: 28371 | epoch avg. loss: 0.014 | test avg. loss: 4.019


 57%|█████▋    | 28373/50000 [42:43<29:31, 12.21it/s]

Epochs: 28372 | epoch avg. loss: 0.025 | test avg. loss: 4.039
Epochs: 28373 | epoch avg. loss: 0.009 | test avg. loss: 4.147


 57%|█████▋    | 28377/50000 [42:43<32:37, 11.05it/s]

Epochs: 28374 | epoch avg. loss: 0.025 | test avg. loss: 3.996
Epochs: 28375 | epoch avg. loss: 0.021 | test avg. loss: 3.985
Epochs: 28376 | epoch avg. loss: 0.012 | test avg. loss: 4.113


 57%|█████▋    | 28379/50000 [42:43<32:00, 11.26it/s]

Epochs: 28377 | epoch avg. loss: 0.018 | test avg. loss: 4.103
Epochs: 28378 | epoch avg. loss: 0.012 | test avg. loss: 4.136
Epochs: 28379 | epoch avg. loss: 0.011 | test avg. loss: 4.140


 57%|█████▋    | 28383/50000 [42:44<31:24, 11.47it/s]

Epochs: 28380 | epoch avg. loss: 0.020 | test avg. loss: 3.978
Epochs: 28381 | epoch avg. loss: 0.022 | test avg. loss: 4.031
Epochs: 28382 | epoch avg. loss: 0.019 | test avg. loss: 4.106


 57%|█████▋    | 28385/50000 [42:44<32:33, 11.07it/s]

Epochs: 28383 | epoch avg. loss: 0.020 | test avg. loss: 4.028
Epochs: 28384 | epoch avg. loss: 0.054 | test avg. loss: 4.066
Epochs: 28385 | epoch avg. loss: 0.015 | test avg. loss: 4.118


 57%|█████▋    | 28389/50000 [42:44<29:30, 12.21it/s]

Epochs: 28386 | epoch avg. loss: 0.026 | test avg. loss: 3.923
Epochs: 28387 | epoch avg. loss: 0.029 | test avg. loss: 3.957
Epochs: 28388 | epoch avg. loss: 0.009 | test avg. loss: 4.061


 57%|█████▋    | 28391/50000 [42:44<28:02, 12.84it/s]

Epochs: 28389 | epoch avg. loss: 0.015 | test avg. loss: 4.023
Epochs: 28390 | epoch avg. loss: 0.016 | test avg. loss: 4.016
Epochs: 28391 | epoch avg. loss: 0.008 | test avg. loss: 4.046


 57%|█████▋    | 28395/50000 [42:45<27:40, 13.01it/s]

Epochs: 28392 | epoch avg. loss: 0.014 | test avg. loss: 3.934
Epochs: 28393 | epoch avg. loss: 0.009 | test avg. loss: 3.980
Epochs: 28394 | epoch avg. loss: 0.018 | test avg. loss: 4.011


 57%|█████▋    | 28397/50000 [42:45<28:04, 12.82it/s]

Epochs: 28395 | epoch avg. loss: 0.011 | test avg. loss: 3.945
Epochs: 28396 | epoch avg. loss: 0.021 | test avg. loss: 4.092
Epochs: 28397 | epoch avg. loss: 0.073 | test avg. loss: 4.002


 57%|█████▋    | 28399/50000 [42:45<28:07, 12.80it/s]

Epochs: 28398 | epoch avg. loss: 0.029 | test avg. loss: 3.823
Epochs: 28399 | epoch avg. loss: 0.050 | test avg. loss: 3.972


 57%|█████▋    | 28403/50000 [42:47<1:30:14,  3.99it/s]

Epochs: 28400 | epoch avg. loss: 0.070 | test avg. loss: 4.130
Epochs: 28401 | epoch avg. loss: 0.039 | test avg. loss: 4.096
Epochs: 28402 | epoch avg. loss: 0.064 | test avg. loss: 4.216
Epochs: 28403 | epoch avg. loss: 0.032 | test avg. loss: 4.189


 57%|█████▋    | 28407/50000 [42:47<56:06,  6.41it/s]

Epochs: 28404 | epoch avg. loss: 0.031 | test avg. loss: 4.007
Epochs: 28405 | epoch avg. loss: 0.082 | test avg. loss: 4.061
Epochs: 28406 | epoch avg. loss: 0.029 | test avg. loss: 4.089
Epochs: 28407 | epoch avg. loss: 0.016 | test avg. loss: 4.130


 57%|█████▋    | 28411/50000 [42:47<40:27,  8.89it/s]

Epochs: 28408 | epoch avg. loss: 0.017 | test avg. loss: 4.092
Epochs: 28409 | epoch avg. loss: 0.011 | test avg. loss: 3.977
Epochs: 28410 | epoch avg. loss: 0.007 | test avg. loss: 3.918


 57%|█████▋    | 28413/50000 [42:48<36:21,  9.89it/s]

Epochs: 28411 | epoch avg. loss: 0.009 | test avg. loss: 3.984
Epochs: 28412 | epoch avg. loss: 0.008 | test avg. loss: 4.075
Epochs: 28413 | epoch avg. loss: 0.006 | test avg. loss: 4.134


 57%|█████▋    | 28417/50000 [42:48<31:05, 11.57it/s]

Epochs: 28414 | epoch avg. loss: 0.012 | test avg. loss: 4.196
Epochs: 28415 | epoch avg. loss: 0.005 | test avg. loss: 4.179
Epochs: 28416 | epoch avg. loss: 0.005 | test avg. loss: 4.128


 57%|█████▋    | 28421/50000 [42:48<26:42, 13.47it/s]

Epochs: 28417 | epoch avg. loss: 0.007 | test avg. loss: 4.176
Epochs: 28418 | epoch avg. loss: 0.019 | test avg. loss: 4.131
Epochs: 28419 | epoch avg. loss: 0.008 | test avg. loss: 4.097
Epochs: 28420 | epoch avg. loss: 0.021 | test avg. loss: 4.126


 57%|█████▋    | 28423/50000 [42:48<25:45, 13.96it/s]

Epochs: 28421 | epoch avg. loss: 0.011 | test avg. loss: 4.093
Epochs: 28422 | epoch avg. loss: 0.011 | test avg. loss: 4.044
Epochs: 28423 | epoch avg. loss: 0.032 | test avg. loss: 4.130


 57%|█████▋    | 28427/50000 [42:49<27:37, 13.02it/s]

Epochs: 28424 | epoch avg. loss: 0.039 | test avg. loss: 4.093
Epochs: 28425 | epoch avg. loss: 0.017 | test avg. loss: 3.990
Epochs: 28426 | epoch avg. loss: 0.013 | test avg. loss: 4.027




Epochs: 28427 | epoch avg. loss: 0.006 | test avg. loss: 4.132
Epochs: 28428 | epoch avg. loss: 0.012 | test avg. loss: 4.070
Epochs: 28429 | epoch avg. loss: 0.016 | test avg. loss: 4.060


 57%|█████▋    | 28433/50000 [42:49<27:18, 13.16it/s]

Epochs: 28430 | epoch avg. loss: 0.009 | test avg. loss: 4.125
Epochs: 28431 | epoch avg. loss: 0.033 | test avg. loss: 4.074
Epochs: 28432 | epoch avg. loss: 0.013 | test avg. loss: 4.060


 57%|█████▋    | 28435/50000 [42:49<26:58, 13.32it/s]

Epochs: 28433 | epoch avg. loss: 0.030 | test avg. loss: 4.135
Epochs: 28434 | epoch avg. loss: 0.016 | test avg. loss: 4.037
Epochs: 28435 | epoch avg. loss: 0.015 | test avg. loss: 3.979


 57%|█████▋    | 28437/50000 [42:49<26:16, 13.68it/s]

Epochs: 28436 | epoch avg. loss: 0.026 | test avg. loss: 4.091
Epochs: 28437 | epoch avg. loss: 0.058 | test avg. loss: 4.089


 57%|█████▋    | 28441/50000 [42:50<29:36, 12.13it/s]

Epochs: 28438 | epoch avg. loss: 0.021 | test avg. loss: 4.143
Epochs: 28439 | epoch avg. loss: 0.031 | test avg. loss: 4.151
Epochs: 28440 | epoch avg. loss: 0.023 | test avg. loss: 4.149


                                                     

Epochs: 28441 | epoch avg. loss: 0.018 | test avg. loss: 4.096
Epochs: 28442 | epoch avg. loss: 0.022 | test avg. loss: 4.238
Epochs: 28443 | epoch avg. loss: 0.036 | test avg. loss: 4.221


 57%|█████▋    | 28447/50000 [42:50<26:21, 13.63it/s]

Epochs: 28444 | epoch avg. loss: 0.014 | test avg. loss: 4.149
Epochs: 28445 | epoch avg. loss: 0.023 | test avg. loss: 4.076
Epochs: 28446 | epoch avg. loss: 0.009 | test avg. loss: 4.004


                                                     

Epochs: 28447 | epoch avg. loss: 0.017 | test avg. loss: 4.034
Epochs: 28448 | epoch avg. loss: 0.014 | test avg. loss: 4.140
Epochs: 28449 | epoch avg. loss: 0.021 | test avg. loss: 4.123


 57%|█████▋    | 28453/50000 [42:51<28:44, 12.50it/s]

Epochs: 28450 | epoch avg. loss: 0.023 | test avg. loss: 4.167
Epochs: 28451 | epoch avg. loss: 0.039 | test avg. loss: 4.232
Epochs: 28452 | epoch avg. loss: 0.028 | test avg. loss: 4.300


 57%|█████▋    | 28455/50000 [42:51<30:37, 11.73it/s]

Epochs: 28453 | epoch avg. loss: 0.060 | test avg. loss: 4.105
Epochs: 28454 | epoch avg. loss: 0.050 | test avg. loss: 4.187
Epochs: 28455 | epoch avg. loss: 0.036 | test avg. loss: 4.421


 57%|█████▋    | 28459/50000 [42:51<28:46, 12.48it/s]

Epochs: 28456 | epoch avg. loss: 0.074 | test avg. loss: 4.196
Epochs: 28457 | epoch avg. loss: 0.042 | test avg. loss: 4.065
Epochs: 28458 | epoch avg. loss: 0.115 | test avg. loss: 4.052


 57%|█████▋    | 28463/50000 [42:51<25:55, 13.85it/s]

Epochs: 28459 | epoch avg. loss: 0.128 | test avg. loss: 4.257
Epochs: 28460 | epoch avg. loss: 0.099 | test avg. loss: 4.266
Epochs: 28461 | epoch avg. loss: 0.158 | test avg. loss: 4.535
Epochs: 28462 | epoch avg. loss: 0.059 | test avg. loss: 4.732


 57%|█████▋    | 28465/50000 [42:52<26:04, 13.76it/s]

Epochs: 28463 | epoch avg. loss: 0.072 | test avg. loss: 4.572
Epochs: 28464 | epoch avg. loss: 0.094 | test avg. loss: 4.440
Epochs: 28465 | epoch avg. loss: 0.105 | test avg. loss: 4.011


 57%|█████▋    | 28469/50000 [42:52<25:36, 14.02it/s]

Epochs: 28466 | epoch avg. loss: 0.102 | test avg. loss: 4.147
Epochs: 28467 | epoch avg. loss: 0.266 | test avg. loss: 4.418
Epochs: 28468 | epoch avg. loss: 0.320 | test avg. loss: 4.084
Epochs: 28469 | epoch avg. loss: 0.121 | test avg. loss: 3.893


 57%|█████▋    | 28473/50000 [42:52<24:17, 14.77it/s]

Epochs: 28470 | epoch avg. loss: 0.321 | test avg. loss: 3.665
Epochs: 28471 | epoch avg. loss: 0.132 | test avg. loss: 4.145
Epochs: 28472 | epoch avg. loss: 0.126 | test avg. loss: 4.294
Epochs: 28473 | epoch avg. loss: 0.149 | test avg. loss: 4.372


 57%|█████▋    | 28477/50000 [42:52<24:52, 14.42it/s]

Epochs: 28474 | epoch avg. loss: 0.112 | test avg. loss: 4.222
Epochs: 28475 | epoch avg. loss: 0.078 | test avg. loss: 4.122
Epochs: 28476 | epoch avg. loss: 0.073 | test avg. loss: 4.469
Epochs: 28477 | epoch avg. loss: 0.106 | test avg. loss: 4.330


 57%|█████▋    | 28481/50000 [42:53<26:14, 13.66it/s]

Epochs: 28478 | epoch avg. loss: 0.058 | test avg. loss: 4.325
Epochs: 28479 | epoch avg. loss: 0.051 | test avg. loss: 4.412
Epochs: 28480 | epoch avg. loss: 0.115 | test avg. loss: 3.988


 57%|█████▋    | 28485/50000 [42:53<24:09, 14.85it/s]

Epochs: 28481 | epoch avg. loss: 0.098 | test avg. loss: 3.986
Epochs: 28482 | epoch avg. loss: 0.043 | test avg. loss: 4.328
Epochs: 28483 | epoch avg. loss: 0.072 | test avg. loss: 4.291
Epochs: 28484 | epoch avg. loss: 0.154 | test avg. loss: 4.306


 57%|█████▋    | 28489/50000 [42:53<23:09, 15.48it/s]

Epochs: 28485 | epoch avg. loss: 0.134 | test avg. loss: 4.320
Epochs: 28486 | epoch avg. loss: 0.150 | test avg. loss: 4.167
Epochs: 28487 | epoch avg. loss: 0.525 | test avg. loss: 3.947
Epochs: 28488 | epoch avg. loss: 0.185 | test avg. loss: 4.123




Epochs: 28489 | epoch avg. loss: 0.194 | test avg. loss: 3.803
Epochs: 28490 | epoch avg. loss: 0.352 | test avg. loss: 4.032
Epochs: 28491 | epoch avg. loss: 0.221 | test avg. loss: 4.543


 57%|█████▋    | 28495/50000 [42:54<24:29, 14.63it/s]

Epochs: 28492 | epoch avg. loss: 0.195 | test avg. loss: 4.203
Epochs: 28493 | epoch avg. loss: 0.369 | test avg. loss: 4.179
Epochs: 28494 | epoch avg. loss: 0.374 | test avg. loss: 3.832


 57%|█████▋    | 28499/50000 [42:54<23:53, 15.00it/s]

Epochs: 28495 | epoch avg. loss: 0.164 | test avg. loss: 3.870
Epochs: 28496 | epoch avg. loss: 0.224 | test avg. loss: 4.246
Epochs: 28497 | epoch avg. loss: 0.359 | test avg. loss: 3.716
Epochs: 28498 | epoch avg. loss: 0.205 | test avg. loss: 3.987


 57%|█████▋    | 28499/50000 [42:54<23:53, 15.00it/s]

Epochs: 28499 | epoch avg. loss: 0.246 | test avg. loss: 4.505


 57%|█████▋    | 28503/50000 [42:56<1:27:01,  4.12it/s]

Epochs: 28500 | epoch avg. loss: 0.354 | test avg. loss: 4.244
Epochs: 28501 | epoch avg. loss: 0.285 | test avg. loss: 4.497
Epochs: 28502 | epoch avg. loss: 0.320 | test avg. loss: 5.538


 57%|█████▋    | 28505/50000 [42:56<1:10:01,  5.12it/s]

Epochs: 28503 | epoch avg. loss: 0.601 | test avg. loss: 5.266
Epochs: 28504 | epoch avg. loss: 1.171 | test avg. loss: 4.855
Epochs: 28505 | epoch avg. loss: 1.249 | test avg. loss: 3.931


 57%|█████▋    | 28509/50000 [42:56<48:34,  7.37it/s]

Epochs: 28506 | epoch avg. loss: 1.678 | test avg. loss: 4.214
Epochs: 28507 | epoch avg. loss: 1.301 | test avg. loss: 8.338
Epochs: 28508 | epoch avg. loss: 2.370 | test avg. loss: 7.295


 57%|█████▋    | 28511/50000 [42:56<43:03,  8.32it/s]

Epochs: 28509 | epoch avg. loss: 2.699 | test avg. loss: 8.603
Epochs: 28510 | epoch avg. loss: 2.808 | test avg. loss: 6.321
Epochs: 28511 | epoch avg. loss: 1.900 | test avg. loss: 7.021


 57%|█████▋    | 28515/50000 [42:57<35:25, 10.11it/s]

Epochs: 28512 | epoch avg. loss: 2.235 | test avg. loss: 5.527
Epochs: 28513 | epoch avg. loss: 1.351 | test avg. loss: 6.264
Epochs: 28514 | epoch avg. loss: 1.213 | test avg. loss: 6.315
Epochs: 28515 | epoch avg. loss: 2.465 | test avg. loss: 6.630


 57%|█████▋    | 28519/50000 [42:57<29:20, 12.20it/s]

Epochs: 28516 | epoch avg. loss: 1.541 | test avg. loss: 5.095
Epochs: 28517 | epoch avg. loss: 1.073 | test avg. loss: 5.000
Epochs: 28518 | epoch avg. loss: 1.146 | test avg. loss: 5.427
Epochs: 28519 | epoch avg. loss: 0.692 | test avg. loss: 5.800


 57%|█████▋    | 28523/50000 [42:57<27:20, 13.09it/s]

Epochs: 28520 | epoch avg. loss: 0.572 | test avg. loss: 6.447
Epochs: 28521 | epoch avg. loss: 0.607 | test avg. loss: 6.203
Epochs: 28522 | epoch avg. loss: 0.297 | test avg. loss: 5.820


 57%|█████▋    | 28525/50000 [42:57<26:42, 13.40it/s]

Epochs: 28523 | epoch avg. loss: 0.198 | test avg. loss: 5.964
Epochs: 28524 | epoch avg. loss: 0.407 | test avg. loss: 5.327
Epochs: 28525 | epoch avg. loss: 0.451 | test avg. loss: 5.725


 57%|█████▋    | 28529/50000 [42:58<30:17, 11.81it/s]

Epochs: 28526 | epoch avg. loss: 0.444 | test avg. loss: 5.526
Epochs: 28527 | epoch avg. loss: 0.275 | test avg. loss: 6.152
Epochs: 28528 | epoch avg. loss: 0.235 | test avg. loss: 5.908


 57%|█████▋    | 28531/50000 [42:58<30:07, 11.88it/s]

Epochs: 28529 | epoch avg. loss: 0.139 | test avg. loss: 5.963
Epochs: 28530 | epoch avg. loss: 0.124 | test avg. loss: 5.428
Epochs: 28531 | epoch avg. loss: 0.089 | test avg. loss: 5.561


 57%|█████▋    | 28535/50000 [42:58<28:57, 12.35it/s]

Epochs: 28532 | epoch avg. loss: 0.211 | test avg. loss: 5.237
Epochs: 28533 | epoch avg. loss: 0.109 | test avg. loss: 5.509
Epochs: 28534 | epoch avg. loss: 0.091 | test avg. loss: 5.498


 57%|█████▋    | 28537/50000 [42:58<29:53, 11.97it/s]

Epochs: 28535 | epoch avg. loss: 0.057 | test avg. loss: 5.558
Epochs: 28536 | epoch avg. loss: 0.060 | test avg. loss: 5.494
Epochs: 28537 | epoch avg. loss: 0.043 | test avg. loss: 5.397


 57%|█████▋    | 28541/50000 [42:59<31:35, 11.32it/s]

Epochs: 28538 | epoch avg. loss: 0.042 | test avg. loss: 5.363
Epochs: 28539 | epoch avg. loss: 0.039 | test avg. loss: 5.393
Epochs: 28540 | epoch avg. loss: 0.039 | test avg. loss: 5.327


                                                     

Epochs: 28541 | epoch avg. loss: 0.040 | test avg. loss: 5.484
Epochs: 28542 | epoch avg. loss: 0.030 | test avg. loss: 5.430
Epochs: 28543 | epoch avg. loss: 0.039 | test avg. loss: 5.554


 57%|█████▋    | 28547/50000 [42:59<26:47, 13.35it/s]

Epochs: 28544 | epoch avg. loss: 0.037 | test avg. loss: 5.384
Epochs: 28545 | epoch avg. loss: 0.040 | test avg. loss: 5.469
Epochs: 28546 | epoch avg. loss: 0.043 | test avg. loss: 5.297
Epochs: 28547 | epoch avg. loss: 0.046 | test avg. loss: 5.436


 57%|█████▋    | 28551/50000 [43:00<29:06, 12.28it/s]

Epochs: 28548 | epoch avg. loss: 0.040 | test avg. loss: 5.323
Epochs: 28549 | epoch avg. loss: 0.048 | test avg. loss: 5.511
Epochs: 28550 | epoch avg. loss: 0.060 | test avg. loss: 5.335


 57%|█████▋    | 28553/50000 [43:00<28:56, 12.35it/s]

Epochs: 28551 | epoch avg. loss: 0.103 | test avg. loss: 5.682
Epochs: 28552 | epoch avg. loss: 0.149 | test avg. loss: 5.316
Epochs: 28553 | epoch avg. loss: 0.122 | test avg. loss: 5.643


 57%|█████▋    | 28557/50000 [43:00<29:55, 11.94it/s]

Epochs: 28554 | epoch avg. loss: 0.144 | test avg. loss: 5.271
Epochs: 28555 | epoch avg. loss: 0.071 | test avg. loss: 5.421
Epochs: 28556 | epoch avg. loss: 0.048 | test avg. loss: 5.284


 57%|█████▋    | 28559/50000 [43:00<28:05, 12.72it/s]

Epochs: 28557 | epoch avg. loss: 0.042 | test avg. loss: 5.417
Epochs: 28558 | epoch avg. loss: 0.055 | test avg. loss: 5.275
Epochs: 28559 | epoch avg. loss: 0.052 | test avg. loss: 5.372




Epochs: 28560 | epoch avg. loss: 0.033 | test avg. loss: 5.308
Epochs: 28561 | epoch avg. loss: 0.037 | test avg. loss: 5.414


 57%|█████▋    | 28565/50000 [43:01<29:45, 12.00it/s]

Epochs: 28562 | epoch avg. loss: 0.037 | test avg. loss: 5.301
Epochs: 28563 | epoch avg. loss: 0.030 | test avg. loss: 5.343
Epochs: 28564 | epoch avg. loss: 0.029 | test avg. loss: 5.266


 57%|█████▋    | 28567/50000 [43:01<28:43, 12.44it/s]

Epochs: 28565 | epoch avg. loss: 0.034 | test avg. loss: 5.363
Epochs: 28566 | epoch avg. loss: 0.035 | test avg. loss: 5.273
Epochs: 28567 | epoch avg. loss: 0.027 | test avg. loss: 5.372


 57%|█████▋    | 28571/50000 [43:01<27:23, 13.04it/s]

Epochs: 28568 | epoch avg. loss: 0.027 | test avg. loss: 5.333
Epochs: 28569 | epoch avg. loss: 0.018 | test avg. loss: 5.300
Epochs: 28570 | epoch avg. loss: 0.023 | test avg. loss: 5.377
Epochs: 28571 | epoch avg. loss: 0.024 | test avg. loss: 5.265


 57%|█████▋    | 28575/50000 [43:01<28:12, 12.66it/s]

Epochs: 28572 | epoch avg. loss: 0.024 | test avg. loss: 5.364
Epochs: 28573 | epoch avg. loss: 0.030 | test avg. loss: 5.242
Epochs: 28574 | epoch avg. loss: 0.031 | test avg. loss: 5.335


 57%|█████▋    | 28577/50000 [43:02<29:53, 11.95it/s]

Epochs: 28575 | epoch avg. loss: 0.021 | test avg. loss: 5.269
Epochs: 28576 | epoch avg. loss: 0.024 | test avg. loss: 5.391
Epochs: 28577 | epoch avg. loss: 0.036 | test avg. loss: 5.249


 57%|█████▋    | 28581/50000 [43:02<31:52, 11.20it/s]

Epochs: 28578 | epoch avg. loss: 0.024 | test avg. loss: 5.350
Epochs: 28579 | epoch avg. loss: 0.032 | test avg. loss: 5.241
Epochs: 28580 | epoch avg. loss: 0.023 | test avg. loss: 5.354


 57%|█████▋    | 28583/50000 [43:02<31:32, 11.32it/s]

Epochs: 28581 | epoch avg. loss: 0.035 | test avg. loss: 5.238
Epochs: 28582 | epoch avg. loss: 0.061 | test avg. loss: 5.537
Epochs: 28583 | epoch avg. loss: 0.105 | test avg. loss: 5.260


 57%|█████▋    | 28585/50000 [43:02<31:20, 11.39it/s]

Epochs: 28584 | epoch avg. loss: 0.046 | test avg. loss: 5.354
Epochs: 28585 | epoch avg. loss: 0.023 | test avg. loss: 5.236


 57%|█████▋    | 28589/50000 [43:03<32:53, 10.85it/s]

Epochs: 28586 | epoch avg. loss: 0.039 | test avg. loss: 5.362
Epochs: 28587 | epoch avg. loss: 0.035 | test avg. loss: 5.220
Epochs: 28588 | epoch avg. loss: 0.038 | test avg. loss: 5.374


 57%|█████▋    | 28591/50000 [43:03<30:45, 11.60it/s]

Epochs: 28589 | epoch avg. loss: 0.039 | test avg. loss: 5.229
Epochs: 28590 | epoch avg. loss: 0.047 | test avg. loss: 5.440
Epochs: 28591 | epoch avg. loss: 0.059 | test avg. loss: 5.228


 57%|█████▋    | 28595/50000 [43:03<29:53, 11.93it/s]

Epochs: 28592 | epoch avg. loss: 0.079 | test avg. loss: 5.425
Epochs: 28593 | epoch avg. loss: 0.054 | test avg. loss: 5.222
Epochs: 28594 | epoch avg. loss: 0.081 | test avg. loss: 5.374


 57%|█████▋    | 28597/50000 [43:03<30:46, 11.59it/s]

Epochs: 28595 | epoch avg. loss: 0.043 | test avg. loss: 5.216
Epochs: 28596 | epoch avg. loss: 0.059 | test avg. loss: 5.380
Epochs: 28597 | epoch avg. loss: 0.039 | test avg. loss: 5.228


 57%|█████▋    | 28599/50000 [43:04<29:53, 11.93it/s]

Epochs: 28598 | epoch avg. loss: 0.033 | test avg. loss: 5.312
Epochs: 28599 | epoch avg. loss: 0.024 | test avg. loss: 5.206


                                                       

Epochs: 28600 | epoch avg. loss: 0.024 | test avg. loss: 5.210
Epochs: 28601 | epoch avg. loss: 0.016 | test avg. loss: 5.234
Epochs: 28602 | epoch avg. loss: 0.015 | test avg. loss: 5.272


 57%|█████▋    | 28605/50000 [43:06<1:11:43,  4.97it/s]

Epochs: 28603 | epoch avg. loss: 0.015 | test avg. loss: 5.253
Epochs: 28604 | epoch avg. loss: 0.017 | test avg. loss: 5.317
Epochs: 28605 | epoch avg. loss: 0.022 | test avg. loss: 5.227


 57%|█████▋    | 28609/50000 [43:06<47:44,  7.47it/s]

Epochs: 28606 | epoch avg. loss: 0.042 | test avg. loss: 5.416
Epochs: 28607 | epoch avg. loss: 0.063 | test avg. loss: 5.196
Epochs: 28608 | epoch avg. loss: 0.100 | test avg. loss: 5.466


 57%|█████▋    | 28611/50000 [43:06<41:33,  8.58it/s]

Epochs: 28609 | epoch avg. loss: 0.098 | test avg. loss: 5.210
Epochs: 28610 | epoch avg. loss: 0.064 | test avg. loss: 5.415
Epochs: 28611 | epoch avg. loss: 0.050 | test avg. loss: 5.224


 57%|█████▋    | 28613/50000 [43:06<37:08,  9.60it/s]

Epochs: 28612 | epoch avg. loss: 0.079 | test avg. loss: 5.364
Epochs: 28613 | epoch avg. loss: 0.042 | test avg. loss: 5.185


 57%|█████▋    | 28615/50000 [43:07<37:13,  9.57it/s]

Epochs: 28614 | epoch avg. loss: 0.044 | test avg. loss: 5.333
Epochs: 28615 | epoch avg. loss: 0.036 | test avg. loss: 5.169


 57%|█████▋    | 28619/50000 [43:07<35:00, 10.18it/s]

Epochs: 28616 | epoch avg. loss: 0.052 | test avg. loss: 5.390
Epochs: 28617 | epoch avg. loss: 0.062 | test avg. loss: 5.200
Epochs: 28618 | epoch avg. loss: 0.100 | test avg. loss: 5.386


 57%|█████▋    | 28621/50000 [43:07<32:31, 10.96it/s]

Epochs: 28619 | epoch avg. loss: 0.065 | test avg. loss: 5.161
Epochs: 28620 | epoch avg. loss: 0.062 | test avg. loss: 5.380
Epochs: 28621 | epoch avg. loss: 0.077 | test avg. loss: 5.154


 57%|█████▋    | 28625/50000 [43:07<29:43, 11.99it/s]

Epochs: 28622 | epoch avg. loss: 0.087 | test avg. loss: 5.405
Epochs: 28623 | epoch avg. loss: 0.086 | test avg. loss: 5.138
Epochs: 28624 | epoch avg. loss: 0.090 | test avg. loss: 5.450


 57%|█████▋    | 28627/50000 [43:08<29:49, 11.95it/s]

Epochs: 28625 | epoch avg. loss: 0.105 | test avg. loss: 5.152
Epochs: 28626 | epoch avg. loss: 0.111 | test avg. loss: 5.415
Epochs: 28627 | epoch avg. loss: 0.137 | test avg. loss: 5.160


 57%|█████▋    | 28631/50000 [43:08<28:27, 12.51it/s]

Epochs: 28628 | epoch avg. loss: 0.074 | test avg. loss: 5.310
Epochs: 28629 | epoch avg. loss: 0.048 | test avg. loss: 5.184
Epochs: 28630 | epoch avg. loss: 0.052 | test avg. loss: 5.161


 57%|█████▋    | 28635/50000 [43:08<25:59, 13.70it/s]

Epochs: 28631 | epoch avg. loss: 0.030 | test avg. loss: 5.183
Epochs: 28632 | epoch avg. loss: 0.026 | test avg. loss: 5.155
Epochs: 28633 | epoch avg. loss: 0.020 | test avg. loss: 5.099
Epochs: 28634 | epoch avg. loss: 0.020 | test avg. loss: 5.251


 57%|█████▋    | 28639/50000 [43:08<24:27, 14.55it/s]

Epochs: 28635 | epoch avg. loss: 0.032 | test avg. loss: 5.156
Epochs: 28636 | epoch avg. loss: 0.039 | test avg. loss: 5.416
Epochs: 28637 | epoch avg. loss: 0.079 | test avg. loss: 5.176
Epochs: 28638 | epoch avg. loss: 0.045 | test avg. loss: 5.368


 57%|█████▋    | 28641/50000 [43:09<28:23, 12.54it/s]

Epochs: 28639 | epoch avg. loss: 0.065 | test avg. loss: 5.119
Epochs: 28640 | epoch avg. loss: 0.063 | test avg. loss: 5.245
Epochs: 28641 | epoch avg. loss: 0.036 | test avg. loss: 5.165


 57%|█████▋    | 28645/50000 [43:09<28:11, 12.62it/s]

Epochs: 28642 | epoch avg. loss: 0.018 | test avg. loss: 5.279
Epochs: 28643 | epoch avg. loss: 0.029 | test avg. loss: 5.231
Epochs: 28644 | epoch avg. loss: 0.026 | test avg. loss: 5.247


 57%|█████▋    | 28649/50000 [43:09<25:28, 13.97it/s]

Epochs: 28645 | epoch avg. loss: 0.015 | test avg. loss: 5.176
Epochs: 28646 | epoch avg. loss: 0.021 | test avg. loss: 5.169
Epochs: 28647 | epoch avg. loss: 0.018 | test avg. loss: 5.130
Epochs: 28648 | epoch avg. loss: 0.016 | test avg. loss: 5.162


 57%|█████▋    | 28651/50000 [43:09<24:37, 14.45it/s]

Epochs: 28649 | epoch avg. loss: 0.030 | test avg. loss: 5.163
Epochs: 28650 | epoch avg. loss: 0.022 | test avg. loss: 5.144
Epochs: 28651 | epoch avg. loss: 0.030 | test avg. loss: 5.251


 57%|█████▋    | 28655/50000 [43:10<25:10, 14.14it/s]

Epochs: 28652 | epoch avg. loss: 0.038 | test avg. loss: 5.141
Epochs: 28653 | epoch avg. loss: 0.044 | test avg. loss: 5.273
Epochs: 28654 | epoch avg. loss: 0.043 | test avg. loss: 5.116


 57%|█████▋    | 28659/50000 [43:10<24:57, 14.25it/s]

Epochs: 28655 | epoch avg. loss: 0.060 | test avg. loss: 5.352
Epochs: 28656 | epoch avg. loss: 0.077 | test avg. loss: 5.119
Epochs: 28657 | epoch avg. loss: 0.098 | test avg. loss: 5.523
Epochs: 28658 | epoch avg. loss: 0.146 | test avg. loss: 5.201


 57%|█████▋    | 28663/50000 [43:10<23:34, 15.08it/s]

Epochs: 28659 | epoch avg. loss: 0.156 | test avg. loss: 5.530
Epochs: 28660 | epoch avg. loss: 0.146 | test avg. loss: 5.175
Epochs: 28661 | epoch avg. loss: 0.125 | test avg. loss: 5.468
Epochs: 28662 | epoch avg. loss: 0.127 | test avg. loss: 5.124


 57%|█████▋    | 28667/50000 [43:10<23:02, 15.43it/s]

Epochs: 28663 | epoch avg. loss: 0.122 | test avg. loss: 5.460
Epochs: 28664 | epoch avg. loss: 0.148 | test avg. loss: 5.172
Epochs: 28665 | epoch avg. loss: 0.122 | test avg. loss: 5.332
Epochs: 28666 | epoch avg. loss: 0.056 | test avg. loss: 5.121


 57%|█████▋    | 28669/50000 [43:11<25:49, 13.77it/s]

Epochs: 28667 | epoch avg. loss: 0.080 | test avg. loss: 5.267
Epochs: 28668 | epoch avg. loss: 0.073 | test avg. loss: 5.123
Epochs: 28669 | epoch avg. loss: 0.056 | test avg. loss: 5.298


 57%|█████▋    | 28673/50000 [43:11<27:25, 12.96it/s]

Epochs: 28670 | epoch avg. loss: 0.066 | test avg. loss: 5.051
Epochs: 28671 | epoch avg. loss: 0.058 | test avg. loss: 5.122
Epochs: 28672 | epoch avg. loss: 0.041 | test avg. loss: 5.079


 57%|█████▋    | 28675/50000 [43:11<26:05, 13.62it/s]

Epochs: 28673 | epoch avg. loss: 0.017 | test avg. loss: 5.143
Epochs: 28674 | epoch avg. loss: 0.032 | test avg. loss: 5.099
Epochs: 28675 | epoch avg. loss: 0.023 | test avg. loss: 5.135
Epochs: 28676 | epoch avg. loss: 0.020 | test avg. loss: 5.157


 57%|█████▋    | 28679/50000 [43:11<24:54, 14.27it/s]

Epochs: 28677 | epoch avg. loss: 0.022 | test avg. loss: 5.111
Epochs: 28678 | epoch avg. loss: 0.020 | test avg. loss: 5.133
Epochs: 28679 | epoch avg. loss: 0.014 | test avg. loss: 5.111


 57%|█████▋    | 28683/50000 [43:12<26:24, 13.46it/s]

Epochs: 28680 | epoch avg. loss: 0.013 | test avg. loss: 5.106
Epochs: 28681 | epoch avg. loss: 0.015 | test avg. loss: 5.143
Epochs: 28682 | epoch avg. loss: 0.012 | test avg. loss: 5.141


 57%|█████▋    | 28685/50000 [43:12<26:20, 13.49it/s]

Epochs: 28683 | epoch avg. loss: 0.021 | test avg. loss: 5.131
Epochs: 28684 | epoch avg. loss: 0.016 | test avg. loss: 5.162
Epochs: 28685 | epoch avg. loss: 0.019 | test avg. loss: 5.087


 57%|█████▋    | 28689/50000 [43:12<26:05, 13.61it/s]

Epochs: 28686 | epoch avg. loss: 0.028 | test avg. loss: 5.249
Epochs: 28687 | epoch avg. loss: 0.051 | test avg. loss: 5.080
Epochs: 28688 | epoch avg. loss: 0.030 | test avg. loss: 5.243




Epochs: 28689 | epoch avg. loss: 0.042 | test avg. loss: 5.061
Epochs: 28690 | epoch avg. loss: 0.047 | test avg. loss: 5.293
Epochs: 28691 | epoch avg. loss: 0.067 | test avg. loss: 5.100


 57%|█████▋    | 28695/50000 [43:12<27:13, 13.04it/s]

Epochs: 28692 | epoch avg. loss: 0.102 | test avg. loss: 5.312
Epochs: 28693 | epoch avg. loss: 0.042 | test avg. loss: 5.132
Epochs: 28694 | epoch avg. loss: 0.069 | test avg. loss: 5.260


 57%|█████▋    | 28697/50000 [43:13<28:27, 12.47it/s]

Epochs: 28695 | epoch avg. loss: 0.056 | test avg. loss: 5.019
Epochs: 28696 | epoch avg. loss: 0.062 | test avg. loss: 5.105
Epochs: 28697 | epoch avg. loss: 0.028 | test avg. loss: 5.107


 57%|█████▋    | 28699/50000 [43:13<27:44, 12.80it/s]

Epochs: 28698 | epoch avg. loss: 0.029 | test avg. loss: 5.197
Epochs: 28699 | epoch avg. loss: 0.020 | test avg. loss: 5.146


                                                       

Epochs: 28700 | epoch avg. loss: 0.019 | test avg. loss: 5.189
Epochs: 28701 | epoch avg. loss: 0.025 | test avg. loss: 5.022
Epochs: 28702 | epoch avg. loss: 0.045 | test avg. loss: 5.184


 57%|█████▋    | 28707/50000 [43:15<53:57,  6.58it/s]  

Epochs: 28703 | epoch avg. loss: 0.066 | test avg. loss: 5.010
Epochs: 28704 | epoch avg. loss: 0.078 | test avg. loss: 5.241
Epochs: 28705 | epoch avg. loss: 0.061 | test avg. loss: 5.082
Epochs: 28706 | epoch avg. loss: 0.025 | test avg. loss: 5.161


 57%|█████▋    | 28709/50000 [43:15<47:27,  7.48it/s]

Epochs: 28707 | epoch avg. loss: 0.025 | test avg. loss: 5.075
Epochs: 28708 | epoch avg. loss: 0.024 | test avg. loss: 5.134
Epochs: 28709 | epoch avg. loss: 0.027 | test avg. loss: 5.117


                                                     

Epochs: 28710 | epoch avg. loss: 0.016 | test avg. loss: 5.093
Epochs: 28711 | epoch avg. loss: 0.012 | test avg. loss: 5.062


 57%|█████▋    | 28715/50000 [43:16<37:06,  9.56it/s]

Epochs: 28712 | epoch avg. loss: 0.013 | test avg. loss: 5.127
Epochs: 28713 | epoch avg. loss: 0.017 | test avg. loss: 5.062
Epochs: 28714 | epoch avg. loss: 0.016 | test avg. loss: 5.086


 57%|█████▋    | 28717/50000 [43:16<33:20, 10.64it/s]

Epochs: 28715 | epoch avg. loss: 0.011 | test avg. loss: 5.059
Epochs: 28716 | epoch avg. loss: 0.018 | test avg. loss: 5.077
Epochs: 28717 | epoch avg. loss: 0.013 | test avg. loss: 5.047


 57%|█████▋    | 28721/50000 [43:16<31:33, 11.24it/s]

Epochs: 28718 | epoch avg. loss: 0.018 | test avg. loss: 5.151
Epochs: 28719 | epoch avg. loss: 0.025 | test avg. loss: 5.033
Epochs: 28720 | epoch avg. loss: 0.035 | test avg. loss: 5.156


 57%|█████▋    | 28723/50000 [43:16<30:03, 11.80it/s]

Epochs: 28721 | epoch avg. loss: 0.025 | test avg. loss: 5.054
Epochs: 28722 | epoch avg. loss: 0.021 | test avg. loss: 5.259
Epochs: 28723 | epoch avg. loss: 0.061 | test avg. loss: 5.041


 57%|█████▋    | 28725/50000 [43:16<28:17, 12.53it/s]

Epochs: 28724 | epoch avg. loss: 0.053 | test avg. loss: 5.195
Epochs: 28725 | epoch avg. loss: 0.060 | test avg. loss: 4.991


 57%|█████▋    | 28729/50000 [43:17<30:36, 11.59it/s]

Epochs: 28726 | epoch avg. loss: 0.048 | test avg. loss: 5.160
Epochs: 28727 | epoch avg. loss: 0.065 | test avg. loss: 5.019
Epochs: 28728 | epoch avg. loss: 0.055 | test avg. loss: 5.131


 57%|█████▋    | 28731/50000 [43:17<29:51, 11.87it/s]

Epochs: 28729 | epoch avg. loss: 0.041 | test avg. loss: 5.037
Epochs: 28730 | epoch avg. loss: 0.035 | test avg. loss: 5.077
Epochs: 28731 | epoch avg. loss: 0.023 | test avg. loss: 5.071


 57%|█████▋    | 28735/50000 [43:17<28:07, 12.60it/s]

Epochs: 28732 | epoch avg. loss: 0.025 | test avg. loss: 5.089
Epochs: 28733 | epoch avg. loss: 0.024 | test avg. loss: 5.125
Epochs: 28734 | epoch avg. loss: 0.018 | test avg. loss: 5.063


 57%|█████▋    | 28737/50000 [43:17<29:30, 12.01it/s]

Epochs: 28735 | epoch avg. loss: 0.016 | test avg. loss: 5.056
Epochs: 28736 | epoch avg. loss: 0.017 | test avg. loss: 4.997
Epochs: 28737 | epoch avg. loss: 0.015 | test avg. loss: 5.061


 57%|█████▋    | 28741/50000 [43:18<27:48, 12.74it/s]

Epochs: 28738 | epoch avg. loss: 0.017 | test avg. loss: 5.046
Epochs: 28739 | epoch avg. loss: 0.013 | test avg. loss: 5.073
Epochs: 28740 | epoch avg. loss: 0.012 | test avg. loss: 5.105


 57%|█████▋    | 28743/50000 [43:18<31:21, 11.30it/s]

Epochs: 28741 | epoch avg. loss: 0.014 | test avg. loss: 5.033
Epochs: 28742 | epoch avg. loss: 0.021 | test avg. loss: 5.141
Epochs: 28743 | epoch avg. loss: 0.036 | test avg. loss: 5.025


 57%|█████▋    | 28747/50000 [43:18<31:03, 11.41it/s]

Epochs: 28744 | epoch avg. loss: 0.018 | test avg. loss: 5.132
Epochs: 28745 | epoch avg. loss: 0.021 | test avg. loss: 5.047
Epochs: 28746 | epoch avg. loss: 0.041 | test avg. loss: 5.115


 57%|█████▋    | 28749/50000 [43:19<31:18, 11.31it/s]

Epochs: 28747 | epoch avg. loss: 0.043 | test avg. loss: 5.073
Epochs: 28748 | epoch avg. loss: 0.021 | test avg. loss: 5.000
Epochs: 28749 | epoch avg. loss: 0.019 | test avg. loss: 5.065


 58%|█████▊    | 28753/50000 [43:19<31:40, 11.18it/s]

Epochs: 28750 | epoch avg. loss: 0.022 | test avg. loss: 5.005
Epochs: 28751 | epoch avg. loss: 0.023 | test avg. loss: 5.094
Epochs: 28752 | epoch avg. loss: 0.019 | test avg. loss: 5.021


 58%|█████▊    | 28755/50000 [43:19<31:47, 11.14it/s]

Epochs: 28753 | epoch avg. loss: 0.019 | test avg. loss: 5.107
Epochs: 28754 | epoch avg. loss: 0.032 | test avg. loss: 4.980
Epochs: 28755 | epoch avg. loss: 0.017 | test avg. loss: 5.031


 58%|█████▊    | 28759/50000 [43:19<30:44, 11.51it/s]

Epochs: 28756 | epoch avg. loss: 0.020 | test avg. loss: 4.974
Epochs: 28757 | epoch avg. loss: 0.017 | test avg. loss: 4.971
Epochs: 28758 | epoch avg. loss: 0.012 | test avg. loss: 4.971


                                                     

Epochs: 28759 | epoch avg. loss: 0.012 | test avg. loss: 4.992
Epochs: 28760 | epoch avg. loss: 0.013 | test avg. loss: 4.950


 58%|█████▊    | 28763/50000 [43:20<31:59, 11.06it/s]

Epochs: 28761 | epoch avg. loss: 0.010 | test avg. loss: 4.979
Epochs: 28762 | epoch avg. loss: 0.015 | test avg. loss: 4.899
Epochs: 28763 | epoch avg. loss: 0.013 | test avg. loss: 4.991


 58%|█████▊    | 28767/50000 [43:20<27:14, 12.99it/s]

Epochs: 28764 | epoch avg. loss: 0.026 | test avg. loss: 4.919
Epochs: 28765 | epoch avg. loss: 0.029 | test avg. loss: 5.053
Epochs: 28766 | epoch avg. loss: 0.032 | test avg. loss: 4.979
Epochs: 28767 | epoch avg. loss: 0.025 | test avg. loss: 5.064


 58%|█████▊    | 28771/50000 [43:20<24:48, 14.26it/s]

Epochs: 28768 | epoch avg. loss: 0.021 | test avg. loss: 4.954
Epochs: 28769 | epoch avg. loss: 0.038 | test avg. loss: 5.067
Epochs: 28770 | epoch avg. loss: 0.052 | test avg. loss: 4.925
Epochs: 28771 | epoch avg. loss: 0.049 | test avg. loss: 4.975


 58%|█████▊    | 28773/50000 [43:20<24:36, 14.38it/s]

Epochs: 28772 | epoch avg. loss: 0.024 | test avg. loss: 4.937
Epochs: 28773 | epoch avg. loss: 0.039 | test avg. loss: 4.917


 58%|█████▊    | 28777/50000 [43:21<28:14, 12.52it/s]

Epochs: 28774 | epoch avg. loss: 0.034 | test avg. loss: 4.852
Epochs: 28775 | epoch avg. loss: 0.021 | test avg. loss: 4.775
Epochs: 28776 | epoch avg. loss: 0.019 | test avg. loss: 4.781


 58%|█████▊    | 28779/50000 [43:21<27:26, 12.89it/s]

Epochs: 28777 | epoch avg. loss: 0.019 | test avg. loss: 4.761
Epochs: 28778 | epoch avg. loss: 0.018 | test avg. loss: 4.839
Epochs: 28779 | epoch avg. loss: 0.016 | test avg. loss: 4.835


 58%|█████▊    | 28783/50000 [43:21<27:24, 12.90it/s]

Epochs: 28780 | epoch avg. loss: 0.014 | test avg. loss: 4.907
Epochs: 28781 | epoch avg. loss: 0.013 | test avg. loss: 4.930
Epochs: 28782 | epoch avg. loss: 0.011 | test avg. loss: 4.975


 58%|█████▊    | 28785/50000 [43:21<26:51, 13.17it/s]

Epochs: 28783 | epoch avg. loss: 0.014 | test avg. loss: 4.920
Epochs: 28784 | epoch avg. loss: 0.015 | test avg. loss: 4.969
Epochs: 28785 | epoch avg. loss: 0.017 | test avg. loss: 4.876


 58%|█████▊    | 28789/50000 [43:22<29:53, 11.83it/s]

Epochs: 28786 | epoch avg. loss: 0.022 | test avg. loss: 4.964
Epochs: 28787 | epoch avg. loss: 0.027 | test avg. loss: 4.861
Epochs: 28788 | epoch avg. loss: 0.032 | test avg. loss: 4.880


 58%|█████▊    | 28791/50000 [43:22<29:17, 12.07it/s]

Epochs: 28789 | epoch avg. loss: 0.011 | test avg. loss: 4.879
Epochs: 28790 | epoch avg. loss: 0.013 | test avg. loss: 4.896
Epochs: 28791 | epoch avg. loss: 0.013 | test avg. loss: 4.898


 58%|█████▊    | 28795/50000 [43:22<28:25, 12.43it/s]

Epochs: 28792 | epoch avg. loss: 0.014 | test avg. loss: 4.853
Epochs: 28793 | epoch avg. loss: 0.016 | test avg. loss: 4.910
Epochs: 28794 | epoch avg. loss: 0.020 | test avg. loss: 4.856


 58%|█████▊    | 28797/50000 [43:22<27:32, 12.83it/s]

Epochs: 28795 | epoch avg. loss: 0.021 | test avg. loss: 4.893
Epochs: 28796 | epoch avg. loss: 0.010 | test avg. loss: 4.903
Epochs: 28797 | epoch avg. loss: 0.013 | test avg. loss: 4.872


 58%|█████▊    | 28799/50000 [43:23<30:10, 11.71it/s]

Epochs: 28798 | epoch avg. loss: 0.013 | test avg. loss: 4.892
Epochs: 28799 | epoch avg. loss: 0.013 | test avg. loss: 4.828


 58%|█████▊    | 28803/50000 [43:24<1:30:10,  3.92it/s]

Epochs: 28800 | epoch avg. loss: 0.028 | test avg. loss: 4.896
Epochs: 28801 | epoch avg. loss: 0.022 | test avg. loss: 4.827
Epochs: 28802 | epoch avg. loss: 0.015 | test avg. loss: 4.856


 58%|█████▊    | 28805/50000 [43:25<1:10:37,  5.00it/s]

Epochs: 28803 | epoch avg. loss: 0.015 | test avg. loss: 4.841
Epochs: 28804 | epoch avg. loss: 0.016 | test avg. loss: 4.787
Epochs: 28805 | epoch avg. loss: 0.025 | test avg. loss: 4.822


 58%|█████▊    | 28809/50000 [43:25<48:15,  7.32it/s]

Epochs: 28806 | epoch avg. loss: 0.011 | test avg. loss: 4.801
Epochs: 28807 | epoch avg. loss: 0.014 | test avg. loss: 4.807
Epochs: 28808 | epoch avg. loss: 0.013 | test avg. loss: 4.875


 58%|█████▊    | 28813/50000 [43:25<35:21,  9.99it/s]

Epochs: 28809 | epoch avg. loss: 0.023 | test avg. loss: 4.806
Epochs: 28810 | epoch avg. loss: 0.011 | test avg. loss: 4.856
Epochs: 28811 | epoch avg. loss: 0.014 | test avg. loss: 4.791
Epochs: 28812 | epoch avg. loss: 0.021 | test avg. loss: 4.942


 58%|█████▊    | 28815/50000 [43:25<32:02, 11.02it/s]

Epochs: 28813 | epoch avg. loss: 0.055 | test avg. loss: 4.788
Epochs: 28814 | epoch avg. loss: 0.060 | test avg. loss: 4.868
Epochs: 28815 | epoch avg. loss: 0.049 | test avg. loss: 4.860


                                                     

Epochs: 28816 | epoch avg. loss: 0.027 | test avg. loss: 4.784
Epochs: 28817 | epoch avg. loss: 0.028 | test avg. loss: 4.815
Epochs: 28818 | epoch avg. loss: 0.031 | test avg. loss: 4.673


 58%|█████▊    | 28821/50000 [43:26<27:03, 13.04it/s]

Epochs: 28819 | epoch avg. loss: 0.028 | test avg. loss: 4.767
Epochs: 28820 | epoch avg. loss: 0.037 | test avg. loss: 4.693
Epochs: 28821 | epoch avg. loss: 0.026 | test avg. loss: 4.755




Epochs: 28822 | epoch avg. loss: 0.020 | test avg. loss: 4.819
Epochs: 28823 | epoch avg. loss: 0.028 | test avg. loss: 4.753
Epochs: 28824 | epoch avg. loss: 0.018 | test avg. loss: 4.733


 58%|█████▊    | 28827/50000 [43:26<26:39, 13.24it/s]

Epochs: 28825 | epoch avg. loss: 0.023 | test avg. loss: 4.847
Epochs: 28826 | epoch avg. loss: 0.027 | test avg. loss: 4.788
Epochs: 28827 | epoch avg. loss: 0.029 | test avg. loss: 4.834


 58%|█████▊    | 28831/50000 [43:27<27:39, 12.76it/s]

Epochs: 28828 | epoch avg. loss: 0.016 | test avg. loss: 4.777
Epochs: 28829 | epoch avg. loss: 0.012 | test avg. loss: 4.784
Epochs: 28830 | epoch avg. loss: 0.019 | test avg. loss: 4.771


                                                     

Epochs: 28831 | epoch avg. loss: 0.015 | test avg. loss: 4.808
Epochs: 28832 | epoch avg. loss: 0.016 | test avg. loss: 4.850
Epochs: 28833 | epoch avg. loss: 0.015 | test avg. loss: 4.825


 58%|█████▊    | 28837/50000 [43:27<25:28, 13.84it/s]

Epochs: 28834 | epoch avg. loss: 0.016 | test avg. loss: 4.869
Epochs: 28835 | epoch avg. loss: 0.023 | test avg. loss: 4.767
Epochs: 28836 | epoch avg. loss: 0.028 | test avg. loss: 4.811


 58%|█████▊    | 28841/50000 [43:27<24:21, 14.47it/s]

Epochs: 28837 | epoch avg. loss: 0.011 | test avg. loss: 4.806
Epochs: 28838 | epoch avg. loss: 0.018 | test avg. loss: 4.789
Epochs: 28839 | epoch avg. loss: 0.020 | test avg. loss: 4.821
Epochs: 28840 | epoch avg. loss: 0.018 | test avg. loss: 4.723


 58%|█████▊    | 28843/50000 [43:27<24:09, 14.59it/s]

Epochs: 28841 | epoch avg. loss: 0.014 | test avg. loss: 4.760
Epochs: 28842 | epoch avg. loss: 0.019 | test avg. loss: 4.728
Epochs: 28843 | epoch avg. loss: 0.010 | test avg. loss: 4.755


 58%|█████▊    | 28847/50000 [43:28<25:03, 14.07it/s]

Epochs: 28844 | epoch avg. loss: 0.009 | test avg. loss: 4.799
Epochs: 28845 | epoch avg. loss: 0.010 | test avg. loss: 4.793
Epochs: 28846 | epoch avg. loss: 0.015 | test avg. loss: 4.829


 58%|█████▊    | 28849/50000 [43:28<24:56, 14.13it/s]

Epochs: 28847 | epoch avg. loss: 0.010 | test avg. loss: 4.766
Epochs: 28848 | epoch avg. loss: 0.017 | test avg. loss: 4.877
Epochs: 28849 | epoch avg. loss: 0.029 | test avg. loss: 4.761


 58%|█████▊    | 28853/50000 [43:28<25:35, 13.77it/s]

Epochs: 28850 | epoch avg. loss: 0.034 | test avg. loss: 4.816
Epochs: 28851 | epoch avg. loss: 0.022 | test avg. loss: 4.760
Epochs: 28852 | epoch avg. loss: 0.012 | test avg. loss: 4.775


 58%|█████▊    | 28855/50000 [43:28<25:05, 14.04it/s]

Epochs: 28853 | epoch avg. loss: 0.012 | test avg. loss: 4.744
Epochs: 28854 | epoch avg. loss: 0.013 | test avg. loss: 4.743
Epochs: 28855 | epoch avg. loss: 0.015 | test avg. loss: 4.772


 58%|█████▊    | 28859/50000 [43:29<27:24, 12.85it/s]

Epochs: 28856 | epoch avg. loss: 0.017 | test avg. loss: 4.716
Epochs: 28857 | epoch avg. loss: 0.023 | test avg. loss: 4.841
Epochs: 28858 | epoch avg. loss: 0.027 | test avg. loss: 4.757




Epochs: 28859 | epoch avg. loss: 0.036 | test avg. loss: 4.808
Epochs: 28860 | epoch avg. loss: 0.019 | test avg. loss: 4.757
Epochs: 28861 | epoch avg. loss: 0.021 | test avg. loss: 4.709


 58%|█████▊    | 28865/50000 [43:29<24:31, 14.36it/s]

Epochs: 28862 | epoch avg. loss: 0.022 | test avg. loss: 4.790
Epochs: 28863 | epoch avg. loss: 0.030 | test avg. loss: 4.686
Epochs: 28864 | epoch avg. loss: 0.022 | test avg. loss: 4.780
Epochs: 28865 | epoch avg. loss: 0.029 | test avg. loss: 4.664




Epochs: 28866 | epoch avg. loss: 0.025 | test avg. loss: 4.721
Epochs: 28867 | epoch avg. loss: 0.022 | test avg. loss: 4.691
Epochs: 28868 | epoch avg. loss: 0.027 | test avg. loss: 4.720


 58%|█████▊    | 28871/50000 [43:29<24:12, 14.55it/s]

Epochs: 28869 | epoch avg. loss: 0.028 | test avg. loss: 4.830
Epochs: 28870 | epoch avg. loss: 0.035 | test avg. loss: 4.748
Epochs: 28871 | epoch avg. loss: 0.031 | test avg. loss: 4.855


 58%|█████▊    | 28875/50000 [43:30<24:27, 14.39it/s]

Epochs: 28872 | epoch avg. loss: 0.022 | test avg. loss: 4.790
Epochs: 28873 | epoch avg. loss: 0.035 | test avg. loss: 4.840
Epochs: 28874 | epoch avg. loss: 0.029 | test avg. loss: 4.793
Epochs: 28875 | epoch avg. loss: 0.016 | test avg. loss: 4.752


 58%|█████▊    | 28879/50000 [43:30<23:50, 14.77it/s]

Epochs: 28876 | epoch avg. loss: 0.019 | test avg. loss: 4.700
Epochs: 28877 | epoch avg. loss: 0.018 | test avg. loss: 4.633
Epochs: 28878 | epoch avg. loss: 0.025 | test avg. loss: 4.790
Epochs: 28879 | epoch avg. loss: 0.049 | test avg. loss: 4.652


 58%|█████▊    | 28883/50000 [43:30<24:08, 14.58it/s]

Epochs: 28880 | epoch avg. loss: 0.046 | test avg. loss: 4.799
Epochs: 28881 | epoch avg. loss: 0.043 | test avg. loss: 4.705
Epochs: 28882 | epoch avg. loss: 0.028 | test avg. loss: 4.806


 58%|█████▊    | 28885/50000 [43:30<23:45, 14.81it/s]

Epochs: 28883 | epoch avg. loss: 0.040 | test avg. loss: 4.792
Epochs: 28884 | epoch avg. loss: 0.030 | test avg. loss: 4.753
Epochs: 28885 | epoch avg. loss: 0.030 | test avg. loss: 4.838


 58%|█████▊    | 28889/50000 [43:31<28:33, 12.32it/s]

Epochs: 28886 | epoch avg. loss: 0.040 | test avg. loss: 4.739
Epochs: 28887 | epoch avg. loss: 0.110 | test avg. loss: 4.826
Epochs: 28888 | epoch avg. loss: 0.041 | test avg. loss: 4.667


 58%|█████▊    | 28893/50000 [43:31<26:07, 13.46it/s]

Epochs: 28889 | epoch avg. loss: 0.064 | test avg. loss: 4.717
Epochs: 28890 | epoch avg. loss: 0.053 | test avg. loss: 4.673
Epochs: 28891 | epoch avg. loss: 0.046 | test avg. loss: 4.672
Epochs: 28892 | epoch avg. loss: 0.034 | test avg. loss: 4.705




Epochs: 28893 | epoch avg. loss: 0.034 | test avg. loss: 4.630
Epochs: 28894 | epoch avg. loss: 0.049 | test avg. loss: 4.751
Epochs: 28895 | epoch avg. loss: 0.035 | test avg. loss: 4.707


 58%|█████▊    | 28899/50000 [43:31<24:47, 14.19it/s]

Epochs: 28896 | epoch avg. loss: 0.027 | test avg. loss: 4.778
Epochs: 28897 | epoch avg. loss: 0.019 | test avg. loss: 4.731
Epochs: 28898 | epoch avg. loss: 0.011 | test avg. loss: 4.715


 58%|█████▊    | 28899/50000 [43:31<24:47, 14.19it/s]

Epochs: 28899 | epoch avg. loss: 0.013 | test avg. loss: 4.652


 58%|█████▊    | 28903/50000 [43:33<1:23:42,  4.20it/s]

Epochs: 28900 | epoch avg. loss: 0.011 | test avg. loss: 4.687
Epochs: 28901 | epoch avg. loss: 0.019 | test avg. loss: 4.642
Epochs: 28902 | epoch avg. loss: 0.023 | test avg. loss: 4.661


 58%|█████▊    | 28905/50000 [43:34<1:07:06,  5.24it/s]

Epochs: 28903 | epoch avg. loss: 0.023 | test avg. loss: 4.605
Epochs: 28904 | epoch avg. loss: 0.012 | test avg. loss: 4.582
Epochs: 28905 | epoch avg. loss: 0.019 | test avg. loss: 4.632


 58%|█████▊    | 28909/50000 [43:34<46:05,  7.63it/s]

Epochs: 28906 | epoch avg. loss: 0.017 | test avg. loss: 4.617
Epochs: 28907 | epoch avg. loss: 0.027 | test avg. loss: 4.737
Epochs: 28908 | epoch avg. loss: 0.029 | test avg. loss: 4.627




Epochs: 28909 | epoch avg. loss: 0.031 | test avg. loss: 4.681
Epochs: 28910 | epoch avg. loss: 0.020 | test avg. loss: 4.685
Epochs: 28911 | epoch avg. loss: 0.016 | test avg. loss: 4.622


 58%|█████▊    | 28915/50000 [43:34<31:45, 11.06it/s]

Epochs: 28912 | epoch avg. loss: 0.015 | test avg. loss: 4.676
Epochs: 28913 | epoch avg. loss: 0.016 | test avg. loss: 4.619
Epochs: 28914 | epoch avg. loss: 0.021 | test avg. loss: 4.626


 58%|█████▊    | 28917/50000 [43:34<30:29, 11.52it/s]

Epochs: 28915 | epoch avg. loss: 0.014 | test avg. loss: 4.689
Epochs: 28916 | epoch avg. loss: 0.015 | test avg. loss: 4.609
Epochs: 28917 | epoch avg. loss: 0.021 | test avg. loss: 4.632


 58%|█████▊    | 28921/50000 [43:35<32:46, 10.72it/s]

Epochs: 28918 | epoch avg. loss: 0.010 | test avg. loss: 4.647
Epochs: 28919 | epoch avg. loss: 0.007 | test avg. loss: 4.690
Epochs: 28920 | epoch avg. loss: 0.010 | test avg. loss: 4.671


 58%|█████▊    | 28923/50000 [43:35<32:07, 10.93it/s]

Epochs: 28921 | epoch avg. loss: 0.012 | test avg. loss: 4.669
Epochs: 28922 | epoch avg. loss: 0.009 | test avg. loss: 4.658
Epochs: 28923 | epoch avg. loss: 0.009 | test avg. loss: 4.614


 58%|█████▊    | 28927/50000 [43:35<29:19, 11.98it/s]

Epochs: 28924 | epoch avg. loss: 0.008 | test avg. loss: 4.645
Epochs: 28925 | epoch avg. loss: 0.015 | test avg. loss: 4.654
Epochs: 28926 | epoch avg. loss: 0.009 | test avg. loss: 4.644




Epochs: 28927 | epoch avg. loss: 0.009 | test avg. loss: 4.641
Epochs: 28928 | epoch avg. loss: 0.008 | test avg. loss: 4.655


 58%|█████▊    | 28931/50000 [43:36<32:28, 10.82it/s]

Epochs: 28929 | epoch avg. loss: 0.011 | test avg. loss: 4.640
Epochs: 28930 | epoch avg. loss: 0.010 | test avg. loss: 4.623
Epochs: 28931 | epoch avg. loss: 0.014 | test avg. loss: 4.677


 58%|█████▊    | 28935/50000 [43:36<29:23, 11.94it/s]

Epochs: 28932 | epoch avg. loss: 0.009 | test avg. loss: 4.637
Epochs: 28933 | epoch avg. loss: 0.012 | test avg. loss: 4.680
Epochs: 28934 | epoch avg. loss: 0.015 | test avg. loss: 4.604


 58%|█████▊    | 28937/50000 [43:36<28:26, 12.35it/s]

Epochs: 28935 | epoch avg. loss: 0.016 | test avg. loss: 4.630
Epochs: 28936 | epoch avg. loss: 0.015 | test avg. loss: 4.600
Epochs: 28937 | epoch avg. loss: 0.027 | test avg. loss: 4.610


 58%|█████▊    | 28941/50000 [43:36<27:29, 12.76it/s]

Epochs: 28938 | epoch avg. loss: 0.026 | test avg. loss: 4.721
Epochs: 28939 | epoch avg. loss: 0.026 | test avg. loss: 4.645
Epochs: 28940 | epoch avg. loss: 0.047 | test avg. loss: 4.711


 58%|█████▊    | 28943/50000 [43:37<29:38, 11.84it/s]

Epochs: 28941 | epoch avg. loss: 0.064 | test avg. loss: 4.662
Epochs: 28942 | epoch avg. loss: 0.051 | test avg. loss: 4.679
Epochs: 28943 | epoch avg. loss: 0.023 | test avg. loss: 4.714


 58%|█████▊    | 28947/50000 [43:37<27:53, 12.58it/s]

Epochs: 28944 | epoch avg. loss: 0.032 | test avg. loss: 4.691
Epochs: 28945 | epoch avg. loss: 0.042 | test avg. loss: 4.854
Epochs: 28946 | epoch avg. loss: 0.076 | test avg. loss: 4.677


 58%|█████▊    | 28951/50000 [43:37<25:19, 13.85it/s]

Epochs: 28947 | epoch avg. loss: 0.106 | test avg. loss: 4.775
Epochs: 28948 | epoch avg. loss: 0.098 | test avg. loss: 4.739
Epochs: 28949 | epoch avg. loss: 0.053 | test avg. loss: 4.804
Epochs: 28950 | epoch avg. loss: 0.118 | test avg. loss: 5.091


 58%|█████▊    | 28955/50000 [43:37<24:12, 14.49it/s]

Epochs: 28951 | epoch avg. loss: 0.182 | test avg. loss: 4.848
Epochs: 28952 | epoch avg. loss: 0.259 | test avg. loss: 4.713
Epochs: 28953 | epoch avg. loss: 0.146 | test avg. loss: 4.611
Epochs: 28954 | epoch avg. loss: 0.116 | test avg. loss: 4.420


 58%|█████▊    | 28957/50000 [43:38<27:22, 12.81it/s]

Epochs: 28955 | epoch avg. loss: 0.104 | test avg. loss: 4.638
Epochs: 28956 | epoch avg. loss: 0.052 | test avg. loss: 4.702
Epochs: 28957 | epoch avg. loss: 0.090 | test avg. loss: 4.807


 58%|█████▊    | 28961/50000 [43:38<28:32, 12.29it/s]

Epochs: 28958 | epoch avg. loss: 0.062 | test avg. loss: 4.768
Epochs: 28959 | epoch avg. loss: 0.050 | test avg. loss: 4.684
Epochs: 28960 | epoch avg. loss: 0.067 | test avg. loss: 4.815


 58%|█████▊    | 28963/50000 [43:38<28:23, 12.35it/s]

Epochs: 28961 | epoch avg. loss: 0.080 | test avg. loss: 4.662
Epochs: 28962 | epoch avg. loss: 0.069 | test avg. loss: 4.746
Epochs: 28963 | epoch avg. loss: 0.066 | test avg. loss: 4.769


 58%|█████▊    | 28967/50000 [43:38<28:31, 12.29it/s]

Epochs: 28964 | epoch avg. loss: 0.056 | test avg. loss: 4.679
Epochs: 28965 | epoch avg. loss: 0.091 | test avg. loss: 4.795
Epochs: 28966 | epoch avg. loss: 0.064 | test avg. loss: 4.618


 58%|█████▊    | 28969/50000 [43:39<30:56, 11.33it/s]

Epochs: 28967 | epoch avg. loss: 0.086 | test avg. loss: 4.789
Epochs: 28968 | epoch avg. loss: 0.144 | test avg. loss: 4.531
Epochs: 28969 | epoch avg. loss: 0.130 | test avg. loss: 4.553


 58%|█████▊    | 28973/50000 [43:39<29:11, 12.00it/s]

Epochs: 28970 | epoch avg. loss: 0.064 | test avg. loss: 4.747
Epochs: 28971 | epoch avg. loss: 0.063 | test avg. loss: 4.586
Epochs: 28972 | epoch avg. loss: 0.055 | test avg. loss: 4.762


 58%|█████▊    | 28977/50000 [43:39<26:04, 13.44it/s]

Epochs: 28973 | epoch avg. loss: 0.093 | test avg. loss: 4.520
Epochs: 28974 | epoch avg. loss: 0.073 | test avg. loss: 4.587
Epochs: 28975 | epoch avg. loss: 0.087 | test avg. loss: 4.632
Epochs: 28976 | epoch avg. loss: 0.064 | test avg. loss: 4.560


 58%|█████▊    | 28979/50000 [43:39<26:12, 13.36it/s]

Epochs: 28977 | epoch avg. loss: 0.115 | test avg. loss: 4.907
Epochs: 28978 | epoch avg. loss: 0.121 | test avg. loss: 4.654
Epochs: 28979 | epoch avg. loss: 0.176 | test avg. loss: 4.583


 58%|█████▊    | 28983/50000 [43:40<27:48, 12.60it/s]

Epochs: 28980 | epoch avg. loss: 0.074 | test avg. loss: 4.567
Epochs: 28981 | epoch avg. loss: 0.060 | test avg. loss: 4.428
Epochs: 28982 | epoch avg. loss: 0.044 | test avg. loss: 4.627


 58%|█████▊    | 28985/50000 [43:40<26:27, 13.24it/s]

Epochs: 28983 | epoch avg. loss: 0.035 | test avg. loss: 4.599
Epochs: 28984 | epoch avg. loss: 0.077 | test avg. loss: 4.701
Epochs: 28985 | epoch avg. loss: 0.053 | test avg. loss: 4.531


 58%|█████▊    | 28989/50000 [43:40<26:31, 13.20it/s]

Epochs: 28986 | epoch avg. loss: 0.047 | test avg. loss: 4.515
Epochs: 28987 | epoch avg. loss: 0.041 | test avg. loss: 4.598
Epochs: 28988 | epoch avg. loss: 0.029 | test avg. loss: 4.623


 58%|█████▊    | 28991/50000 [43:40<25:34, 13.69it/s]

Epochs: 28989 | epoch avg. loss: 0.029 | test avg. loss: 4.727
Epochs: 28990 | epoch avg. loss: 0.030 | test avg. loss: 4.623
Epochs: 28991 | epoch avg. loss: 0.033 | test avg. loss: 4.632


 58%|█████▊    | 28995/50000 [43:41<26:32, 13.19it/s]

Epochs: 28992 | epoch avg. loss: 0.042 | test avg. loss: 4.606
Epochs: 28993 | epoch avg. loss: 0.026 | test avg. loss: 4.574
Epochs: 28994 | epoch avg. loss: 0.033 | test avg. loss: 4.652


 58%|█████▊    | 28997/50000 [43:41<25:23, 13.79it/s]

Epochs: 28995 | epoch avg. loss: 0.024 | test avg. loss: 4.642
Epochs: 28996 | epoch avg. loss: 0.019 | test avg. loss: 4.604
Epochs: 28997 | epoch avg. loss: 0.013 | test avg. loss: 4.573


 58%|█████▊    | 28999/50000 [43:41<25:05, 13.95it/s]

Epochs: 28998 | epoch avg. loss: 0.011 | test avg. loss: 4.540
Epochs: 28999 | epoch avg. loss: 0.013 | test avg. loss: 4.565


 58%|█████▊    | 29003/50000 [43:43<1:31:55,  3.81it/s]

Epochs: 29000 | epoch avg. loss: 0.012 | test avg. loss: 4.617
Epochs: 29001 | epoch avg. loss: 0.011 | test avg. loss: 4.589
Epochs: 29002 | epoch avg. loss: 0.012 | test avg. loss: 4.553


 58%|█████▊    | 29005/50000 [43:43<1:13:15,  4.78it/s]

Epochs: 29003 | epoch avg. loss: 0.010 | test avg. loss: 4.580
Epochs: 29004 | epoch avg. loss: 0.019 | test avg. loss: 4.526
Epochs: 29005 | epoch avg. loss: 0.027 | test avg. loss: 4.569


 58%|█████▊    | 29009/50000 [43:43<51:30,  6.79it/s]  

Epochs: 29006 | epoch avg. loss: 0.020 | test avg. loss: 4.689
Epochs: 29007 | epoch avg. loss: 0.022 | test avg. loss: 4.607
Epochs: 29008 | epoch avg. loss: 0.017 | test avg. loss: 4.662


 58%|█████▊    | 29011/50000 [43:44<45:06,  7.75it/s]

Epochs: 29009 | epoch avg. loss: 0.032 | test avg. loss: 4.477
Epochs: 29010 | epoch avg. loss: 0.072 | test avg. loss: 4.493
Epochs: 29011 | epoch avg. loss: 0.047 | test avg. loss: 4.715


 58%|█████▊    | 29015/50000 [43:44<36:30,  9.58it/s]

Epochs: 29012 | epoch avg. loss: 0.039 | test avg. loss: 4.617
Epochs: 29013 | epoch avg. loss: 0.037 | test avg. loss: 4.613
Epochs: 29014 | epoch avg. loss: 0.018 | test avg. loss: 4.572


 58%|█████▊    | 29017/50000 [43:44<33:37, 10.40it/s]

Epochs: 29015 | epoch avg. loss: 0.015 | test avg. loss: 4.504
Epochs: 29016 | epoch avg. loss: 0.010 | test avg. loss: 4.540
Epochs: 29017 | epoch avg. loss: 0.014 | test avg. loss: 4.577
Epochs: 29018 | epoch avg. loss: 0.010 | test avg. loss: 4.576


 58%|█████▊    | 29021/50000 [43:44<28:03, 12.46it/s]

Epochs: 29019 | epoch avg. loss: 0.010 | test avg. loss: 4.576
Epochs: 29020 | epoch avg. loss: 0.010 | test avg. loss: 4.579
Epochs: 29021 | epoch avg. loss: 0.014 | test avg. loss: 4.515


 58%|█████▊    | 29025/50000 [43:45<26:29, 13.19it/s]

Epochs: 29022 | epoch avg. loss: 0.017 | test avg. loss: 4.629
Epochs: 29023 | epoch avg. loss: 0.046 | test avg. loss: 4.548
Epochs: 29024 | epoch avg. loss: 0.030 | test avg. loss: 4.572
Epochs: 29025 | epoch avg. loss: 0.019 | test avg. loss: 4.609


 58%|█████▊    | 29029/50000 [43:45<25:07, 13.91it/s]

Epochs: 29026 | epoch avg. loss: 0.020 | test avg. loss: 4.500
Epochs: 29027 | epoch avg. loss: 0.019 | test avg. loss: 4.530
Epochs: 29028 | epoch avg. loss: 0.007 | test avg. loss: 4.557


 58%|█████▊    | 29033/50000 [43:45<23:45, 14.71it/s]

Epochs: 29029 | epoch avg. loss: 0.007 | test avg. loss: 4.587
Epochs: 29030 | epoch avg. loss: 0.008 | test avg. loss: 4.570
Epochs: 29031 | epoch avg. loss: 0.008 | test avg. loss: 4.572
Epochs: 29032 | epoch avg. loss: 0.008 | test avg. loss: 4.525


 58%|█████▊    | 29035/50000 [43:45<23:44, 14.72it/s]

Epochs: 29033 | epoch avg. loss: 0.010 | test avg. loss: 4.546
Epochs: 29034 | epoch avg. loss: 0.010 | test avg. loss: 4.576
Epochs: 29035 | epoch avg. loss: 0.009 | test avg. loss: 4.561


 58%|█████▊    | 29039/50000 [43:46<26:54, 12.98it/s]

Epochs: 29036 | epoch avg. loss: 0.008 | test avg. loss: 4.619
Epochs: 29037 | epoch avg. loss: 0.016 | test avg. loss: 4.526
Epochs: 29038 | epoch avg. loss: 0.013 | test avg. loss: 4.541


 58%|█████▊    | 29041/50000 [43:46<26:24, 13.23it/s]

Epochs: 29039 | epoch avg. loss: 0.012 | test avg. loss: 4.563
Epochs: 29040 | epoch avg. loss: 0.014 | test avg. loss: 4.526
Epochs: 29041 | epoch avg. loss: 0.038 | test avg. loss: 4.621


 58%|█████▊    | 29045/50000 [43:46<25:01, 13.95it/s]

Epochs: 29042 | epoch avg. loss: 0.045 | test avg. loss: 4.580
Epochs: 29043 | epoch avg. loss: 0.022 | test avg. loss: 4.543
Epochs: 29044 | epoch avg. loss: 0.034 | test avg. loss: 4.630


                                                     

Epochs: 29045 | epoch avg. loss: 0.033 | test avg. loss: 4.558
Epochs: 29046 | epoch avg. loss: 0.026 | test avg. loss: 4.642
Epochs: 29047 | epoch avg. loss: 0.025 | test avg. loss: 4.578


 58%|█████▊    | 29051/50000 [43:47<26:59, 12.94it/s]

Epochs: 29048 | epoch avg. loss: 0.024 | test avg. loss: 4.549
Epochs: 29049 | epoch avg. loss: 0.030 | test avg. loss: 4.713
Epochs: 29050 | epoch avg. loss: 0.079 | test avg. loss: 4.531


 58%|█████▊    | 29055/50000 [43:47<25:36, 13.63it/s]

Epochs: 29051 | epoch avg. loss: 0.045 | test avg. loss: 4.541
Epochs: 29052 | epoch avg. loss: 0.019 | test avg. loss: 4.561
Epochs: 29053 | epoch avg. loss: 0.017 | test avg. loss: 4.520
Epochs: 29054 | epoch avg. loss: 0.024 | test avg. loss: 4.722




Epochs: 29055 | epoch avg. loss: 0.080 | test avg. loss: 4.535
Epochs: 29056 | epoch avg. loss: 0.055 | test avg. loss: 4.586
Epochs: 29057 | epoch avg. loss: 0.044 | test avg. loss: 4.655


 58%|█████▊    | 29061/50000 [43:47<23:49, 14.65it/s]

Epochs: 29058 | epoch avg. loss: 0.043 | test avg. loss: 4.516
Epochs: 29059 | epoch avg. loss: 0.079 | test avg. loss: 4.669
Epochs: 29060 | epoch avg. loss: 0.081 | test avg. loss: 4.531
Epochs: 29061 | epoch avg. loss: 0.015 | test avg. loss: 4.603


 58%|█████▊    | 29065/50000 [43:47<25:02, 13.93it/s]

Epochs: 29062 | epoch avg. loss: 0.056 | test avg. loss: 4.551
Epochs: 29063 | epoch avg. loss: 0.024 | test avg. loss: 4.479
Epochs: 29064 | epoch avg. loss: 0.019 | test avg. loss: 4.629


 58%|█████▊    | 29067/50000 [43:48<25:32, 13.66it/s]

Epochs: 29065 | epoch avg. loss: 0.031 | test avg. loss: 4.541
Epochs: 29066 | epoch avg. loss: 0.073 | test avg. loss: 4.642
Epochs: 29067 | epoch avg. loss: 0.047 | test avg. loss: 4.578


 58%|█████▊    | 29071/50000 [43:48<24:20, 14.33it/s]

Epochs: 29068 | epoch avg. loss: 0.032 | test avg. loss: 4.552
Epochs: 29069 | epoch avg. loss: 0.034 | test avg. loss: 4.629
Epochs: 29070 | epoch avg. loss: 0.046 | test avg. loss: 4.448
Epochs: 29071 | epoch avg. loss: 0.029 | test avg. loss: 4.507


 58%|█████▊    | 29075/50000 [43:48<24:31, 14.22it/s]

Epochs: 29072 | epoch avg. loss: 0.016 | test avg. loss: 4.526
Epochs: 29073 | epoch avg. loss: 0.014 | test avg. loss: 4.515
Epochs: 29074 | epoch avg. loss: 0.023 | test avg. loss: 4.566


 58%|█████▊    | 29077/50000 [43:48<25:05, 13.90it/s]

Epochs: 29075 | epoch avg. loss: 0.010 | test avg. loss: 4.521
Epochs: 29076 | epoch avg. loss: 0.013 | test avg. loss: 4.534
Epochs: 29077 | epoch avg. loss: 0.014 | test avg. loss: 4.544


 58%|█████▊    | 29081/50000 [43:49<27:29, 12.68it/s]

Epochs: 29078 | epoch avg. loss: 0.012 | test avg. loss: 4.501
Epochs: 29079 | epoch avg. loss: 0.008 | test avg. loss: 4.543
Epochs: 29080 | epoch avg. loss: 0.011 | test avg. loss: 4.532


 58%|█████▊    | 29083/50000 [43:49<29:04, 11.99it/s]

Epochs: 29081 | epoch avg. loss: 0.012 | test avg. loss: 4.511
Epochs: 29082 | epoch avg. loss: 0.014 | test avg. loss: 4.581
Epochs: 29083 | epoch avg. loss: 0.020 | test avg. loss: 4.519


 58%|█████▊    | 29087/50000 [43:49<28:24, 12.27it/s]

Epochs: 29084 | epoch avg. loss: 0.010 | test avg. loss: 4.531
Epochs: 29085 | epoch avg. loss: 0.010 | test avg. loss: 4.533
Epochs: 29086 | epoch avg. loss: 0.008 | test avg. loss: 4.530


 58%|█████▊    | 29089/50000 [43:49<26:46, 13.02it/s]

Epochs: 29087 | epoch avg. loss: 0.011 | test avg. loss: 4.542
Epochs: 29088 | epoch avg. loss: 0.012 | test avg. loss: 4.542
Epochs: 29089 | epoch avg. loss: 0.011 | test avg. loss: 4.611


 58%|█████▊    | 29091/50000 [43:50<28:05, 12.40it/s]