In [None]:
#andromeda with multi query attention
!git clone https://github.com/kyegomez/Optimus-Prime.git
%cd Optimus-Prime
!pip install --upgrade torch
# !pip install -r requirements.txt
!pip install einops
# !pip install --upgrade torch

# %cd Optimus-Prime
# # %cd examples
# # !ls
# !python3 trainandromeda.py 
# #%cd enwik8_simple
# # !python trainandromeda.py


from torch.serialization import load
import torch 
from x_transformers import TransformerWrapper, Decoder, AutoregressiveWrapper

#training
import random
import tqdm
import gzip
import numpy as np
import torch.optim as optim
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset
import os
# from torch.utils.tensorboard import SummaryWriter
# from torchmetrics import MetricCollection, Accuracy


# constants

NUM_BATCHES = int(1e5)
BATCH_SIZE = 4
GRADIENT_ACCUMULATE_EVERY = 4
LEARNING_RATE = 1e-4
VALIDATE_EVERY  = 100
GENERATE_EVERY  = 500
GENERATE_LENGTH = 1024
SEQ_LEN = 1024
SAVE_EVERY=500


# helpers

def cycle(loader):
    while True:
        for data in loader:
            yield data

def decode_token(token):
    return str(chr(max(32, token)))

def decode_tokens(tokens):
    return ''.join(list(map(decode_token, tokens)))

model = TransformerWrapper(
    num_tokens=64007,
    max_seq_len=8192,
    use_abs_pos_emb = False,
    attn_layers = Decoder(
        dim=512,
        depth=6,
        heads=8,
        alibi_pos_bias=True,
        alibi_num_heads=4,
        rotary_xpos=True,
        attn_flash = True,
        deepnorm=True,
        shift_tokens=1,
        attn_one_kv_head = True,
        #qk_norm=True
    )
)


model = AutoregressiveWrapper(model)
model.cuda()

with gzip.open('./enwik8.gz') as file:
  data = np.frombuffer(file.read(int(95e6)), dtype=np.uint8).copy()
  train_x, valid_x = np.split(data, [int(90e6)])
  data_train, data_val = torch.from_numpy(train_x), torch.from_numpy(valid_x) #.cuda()??

class TextSamplerDataset(Dataset):
    def __init__(self, data, seq_len):
        super().__init__()
        self.data = data
        self.seq_len = seq_len

    def __getitem__(self, index):
        rand_start = torch.randint(0, self.data.size(0) - self.seq_len - 1, (1,))
        full_seq = self.data[rand_start: rand_start + self.seq_len + 1].long()
        return full_seq.cuda()

    def __len__(self):
        return self.data.size(0) // self.seq_len

train_dataset = TextSamplerDataset(data_train, SEQ_LEN)
val_dataset   = TextSamplerDataset(data_val, SEQ_LEN)
train_loader  = cycle(DataLoader(train_dataset, batch_size = BATCH_SIZE, drop_last = True))
val_loader    = cycle(DataLoader(val_dataset, batch_size = BATCH_SIZE, drop_last = True))

# optimizer

optim = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# training

# #init tensorboard 
# writer = SummaryWriter(log_dir="./log")

# #define metrics
# metrics = MetricCollection({'accuracy': Accuracy(num_classes=num_classes, task='classification')})
device="cuda"
for i in tqdm.tqdm(range(NUM_BATCHES), mininterval=10., desc='training'):
    model.train()

    for _ in range(GRADIENT_ACCUMULATE_EVERY):
        loss = model(next(train_loader))#.to(device)
        (loss / GRADIENT_ACCUMULATE_EVERY).backward()#.to(device)#.cuda()

    print(f'training loss: {loss.item()}')
    torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
    optim.step()
    optim.zero_grad()


    if i % VALIDATE_EVERY == 0:
            model.eval()
            with torch.no_grad():
                loss = model(next(val_loader))
                print(f'validation loss: {loss.item()}')

                # # Calculate validation metrics
                # val_metrics = MetricCollection({'val_accuracy': Accuracy()})
                # val_metrics(loss, model(next(val_loader)).argmax(dim=-1))

                # # Add validation metrics to the SummaryWriter
                # writer.add_scalar('Validation/Accuracy', val_metrics['val_accuracy'].compute(), global_step=i)

    if i % GENERATE_EVERY == 0:
        model.eval()
        inp = random.choice(val_dataset)[:-1]
        prime = decode_tokens(inp)
        print(f'%s \n\n %s', (prime, '*' * 100))

        sample = model.generate(inp, GENERATE_LENGTH)
        output_str = decode_tokens(sample)
        print(output_str)

    # Save the model every save_every iterations
    if i % SAVE_EVERY == 0:
        # Specify the directory and filename to save the model
        save_dir = './saved_models/'
        save_filename = 'model_checkpoint.pt'

        # Create the save directory if it doesn't exist
        os.makedirs(save_dir, exist_ok=True)

        # Save the model checkpoint
        torch.save(model.state_dict(), os.path.join(save_dir, save_filename))
        print(f"Model saved at iteration {i}")

#     # Add training metrics to the SummaryWriter
#     writer.add_scalar('Training/Accuracy', metrics['accuracy'].compute(), global_step=i)

#     # Close the SummaryWriter
# writer.close()

Cloning into 'Optimus-Prime'...
remote: Enumerating objects: 1642, done.[K
remote: Counting objects: 100% (481/481), done.[K
remote: Compressing objects: 100% (210/210), done.[K
remote: Total 1642 (delta 324), reused 388 (delta 268), pack-reused 1161[K
Receiving objects: 100% (1642/1642), 37.52 MiB | 23.85 MiB/s, done.
Resolving deltas: 100% (1115/1115), done.
/content/Optimus-Prime
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch
  Downloading torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl (619.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m619.9/619.9 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu11==11.7.99 (from torch)
  Downloading nvidia_cuda_nvrtc_cu11-11.7.99-2-py3-none-manylinux1_x86_64.whl (21.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.0/21.0 MB[0m [31m42.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cuda-runtime-

training:   0%|          | 0/100000 [00:00<?, ?it/s]

training loss: 11.18986701965332
validation loss: 11.040206909179688
%s 

 %s ("lato]].  The Hesychast interprets Christ's injunction in the [[Gospel of Matthew]] to &quot;go into your closet to pray&quot;, to mean that he should ignore the senses and withdraw inward.  St John of Sinai writes:  &quot;Hesychasm is the enclosing of the bodiless mind ''(nous)'' in the bodily house of the body.&quot;  ''(Ladder,'' Step 27, 5, (Step 27, 6 in the Holy Transfiguration edition).)  In Step 27, 21 of the ''Ladder'' (Step 27, 22â\x80\x933 of the Holy Transfiguration edition), St John of Sinai describes Hesychast practice as follows:  ::Take up your seat on a high place and watch, if only you know how, and then you will see in what manner, when, whence, how many and what kind of thieves come to enter and steal your clusters of grapes.  When the watchman grows weary, he stands up and prays; and then he sits down again and courageously takes up his former task.  In this passage, St John of Sinai say

training:   0%|          | 1/100000 [01:19<2215:26:50, 79.76s/it]ERROR:tornado.application:Exception in callback functools.partial(<bound method OutStream._flush of <ipykernel.iostream.OutStream object at 0x7fbfb3826830>>)
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/tornado/ioloop.py", line 738, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.10/dist-packages/ipykernel/iostream.py", line 381, in _flush
    self.session.send(self.pub_thread, 'stream', content=content,
  File "/usr/local/lib/python3.10/dist-packages/jupyter_client/session.py", line 742, in send
    to_send = self.serialize(msg, ident)
  File "/usr/local/lib/python3.10/dist-packages/jupyter_client/session.py", line 630, in serialize
    content = self.pack(content)
  File "/usr/local/lib/python3.10/dist-packages/jupyter_client/session.py", line 82, in <lambda>
    json_packer = lambda obj: jsonapi.dumps(obj, default=date_default,
  File "/usr/local/lib/python3.1

Model saved at iteration 0


training:   0%|          | 2/100000 [01:32<1124:42:32, 40.49s/it]

training loss: 11.009750366210938
training loss: 10.914648056030273
training loss: 10.635536193847656
training loss: 10.50123119354248


training:   0%|          | 6/100000 [01:44<321:22:35, 11.57s/it] 

training loss: 10.232555389404297
training loss: 9.998245239257812
training loss: 9.959081649780273
training loss: 9.435494422912598


training:   0%|          | 10/100000 [01:56<196:28:10,  7.07s/it]

training loss: 9.245750427246094
training loss: 9.046558380126953
training loss: 9.089400291442871
training loss: 8.569881439208984


training:   0%|          | 14/100000 [02:08<148:11:33,  5.34s/it]

training loss: 8.478870391845703
training loss: 8.349870681762695
training loss: 8.009196281433105
training loss: 7.711690425872803


training:   0%|          | 18/100000 [02:20<123:53:43,  4.46s/it]

training loss: 7.757879257202148
training loss: 7.66904354095459
training loss: 7.2768778800964355
training loss: 7.275688648223877


training:   0%|          | 22/100000 [02:32<109:58:27,  3.96s/it]

training loss: 7.187408924102783
training loss: 7.199470043182373
training loss: 7.610673904418945
training loss: 7.06459903717041


training:   0%|          | 26/100000 [02:45<101:20:25,  3.65s/it]

training loss: 6.863902568817139
training loss: 6.763155937194824
training loss: 6.628349304199219
training loss: 6.537890434265137


training:   0%|          | 30/100000 [02:57<95:51:01,  3.45s/it] 

training loss: 6.506606101989746
training loss: 6.478903293609619
training loss: 6.5703325271606445
training loss: 6.641903400421143


training:   0%|          | 34/100000 [03:09<92:23:36,  3.33s/it]

training loss: 6.8570661544799805
training loss: 5.94965124130249
training loss: 6.0066914558410645
training loss: 6.551860809326172


training:   0%|          | 38/100000 [03:21<90:07:33,  3.25s/it]

training loss: 5.8291850090026855
training loss: 5.716819763183594
training loss: 5.55894660949707
training loss: 5.419283866882324


training:   0%|          | 42/100000 [03:33<88:31:38,  3.19s/it]

training loss: 5.357846260070801
training loss: 5.513686180114746
training loss: 5.775725364685059
training loss: 4.9269561767578125


training:   0%|          | 46/100000 [03:46<87:36:27,  3.16s/it]

training loss: 4.9680304527282715
training loss: 4.779445171356201
training loss: 5.268893718719482
training loss: 4.583738327026367


training:   0%|          | 50/100000 [03:58<87:05:46,  3.14s/it]

training loss: 4.947267532348633
training loss: 4.449285507202148
training loss: 4.805636405944824
training loss: 4.664774417877197


training:   0%|          | 54/100000 [04:11<86:50:08,  3.13s/it]

training loss: 4.374349594116211
training loss: 4.256436824798584
training loss: 4.152534484863281
training loss: 4.177364349365234


training:   0%|          | 58/100000 [04:23<86:42:37,  3.12s/it]

training loss: 4.255748748779297
training loss: 3.8278846740722656
training loss: 3.863691568374634
training loss: 4.1730852127075195


training:   0%|          | 62/100000 [04:35<86:40:27,  3.12s/it]

training loss: 3.811704158782959
training loss: 3.8237626552581787
training loss: 3.7440452575683594
training loss: 3.6508612632751465


training:   0%|          | 66/100000 [04:48<86:34:03,  3.12s/it]

training loss: 3.69246768951416
training loss: 3.5286340713500977
training loss: 3.485893487930298
training loss: 3.5584120750427246


training:   0%|          | 70/100000 [05:00<86:25:50,  3.11s/it]

training loss: 3.51904296875
training loss: 3.6832871437072754
training loss: 3.3772940635681152
training loss: 3.445176601409912


training:   0%|          | 74/100000 [05:13<86:23:06,  3.11s/it]

training loss: 3.3537163734436035
training loss: 3.4251484870910645
training loss: 3.136181354522705
training loss: 3.352766990661621


training:   0%|          | 78/100000 [05:25<86:17:57,  3.11s/it]

training loss: 3.3068699836730957
training loss: 3.0664587020874023
training loss: 2.957167148590088
training loss: 3.0947389602661133


training:   0%|          | 82/100000 [05:38<86:21:51,  3.11s/it]

training loss: 3.1668155193328857
training loss: 3.06400203704834
training loss: 2.9542946815490723
training loss: 3.247603178024292


training:   0%|          | 86/100000 [05:50<86:21:26,  3.11s/it]

training loss: 3.169769048690796
training loss: 2.8071365356445312
training loss: 3.0891189575195312
training loss: 2.8903117179870605


training:   0%|          | 90/100000 [06:02<86:17:49,  3.11s/it]

training loss: 3.2352395057678223
training loss: 2.7952942848205566
training loss: 2.8098742961883545
training loss: 2.72457218170166


training:   0%|          | 94/100000 [06:15<86:16:26,  3.11s/it]

training loss: 3.0525269508361816
training loss: 2.650448799133301
training loss: 2.6533608436584473
training loss: 2.977663993835449


training:   0%|          | 98/100000 [06:27<86:10:42,  3.11s/it]

training loss: 2.6947109699249268
training loss: 2.779873847961426
training loss: 2.8525314331054688
training loss: 2.662703275680542
validation loss: 3.0081052780151367


training:   0%|          | 102/100000 [06:40<86:44:02,  3.13s/it]

training loss: 2.8529202938079834
training loss: 2.661367177963257
training loss: 2.6638026237487793
training loss: 2.5555436611175537


training:   0%|          | 106/100000 [06:52<86:36:12,  3.12s/it]

training loss: 2.750171184539795
training loss: 2.896073341369629
training loss: 2.5422253608703613
training loss: 2.7442727088928223


training:   0%|          | 110/100000 [07:05<86:28:39,  3.12s/it]

training loss: 2.796933174133301
training loss: 2.86582612991333
training loss: 2.4825878143310547
training loss: 2.5440831184387207


training:   0%|          | 114/100000 [07:17<86:29:05,  3.12s/it]

training loss: 3.094942569732666
training loss: 2.471465587615967
training loss: 2.6363415718078613
training loss: 2.3905577659606934


training:   0%|          | 118/100000 [07:30<86:29:23,  3.12s/it]

training loss: 2.725421905517578
training loss: 2.4420483112335205
training loss: 2.3635926246643066
training loss: 2.408344030380249


training:   0%|          | 122/100000 [07:42<86:27:03,  3.12s/it]

training loss: 2.447490692138672
training loss: 2.576662063598633
training loss: 2.5697641372680664
training loss: 2.6083734035491943


training:   0%|          | 126/100000 [07:55<86:25:21,  3.12s/it]

training loss: 2.489342451095581
training loss: 2.564347982406616
training loss: 2.5362026691436768
training loss: 2.670365333557129


training:   0%|          | 130/100000 [08:07<86:20:48,  3.11s/it]

training loss: 2.4306206703186035
training loss: 2.6290574073791504
training loss: 2.389188528060913
training loss: 2.5120697021484375


training:   0%|          | 134/100000 [08:20<86:18:31,  3.11s/it]

training loss: 2.6007747650146484
training loss: 2.3727240562438965
training loss: 2.520005464553833
training loss: 2.4655864238739014


training:   0%|          | 138/100000 [08:32<86:14:07,  3.11s/it]

training loss: 2.5449180603027344
training loss: 2.379141092300415
training loss: 2.468059778213501
training loss: 2.461146116256714


training:   0%|          | 142/100000 [08:44<86:08:30,  3.11s/it]

training loss: 2.3423874378204346
training loss: 2.5789496898651123
training loss: 2.9837794303894043
training loss: 2.331674098968506


training:   0%|          | 146/100000 [08:57<86:11:58,  3.11s/it]

training loss: 3.0458409786224365
training loss: 2.329817771911621
training loss: 2.3360681533813477
training loss: 2.419435501098633


training:   0%|          | 150/100000 [09:09<86:07:55,  3.11s/it]

training loss: 2.4843626022338867
training loss: 2.4925427436828613
training loss: 2.2774817943573
training loss: 2.486738920211792


training:   0%|          | 154/100000 [09:22<86:13:09,  3.11s/it]

training loss: 2.4999234676361084
training loss: 2.4766616821289062
training loss: 2.391759157180786
training loss: 2.209547758102417


training:   0%|          | 158/100000 [09:34<86:17:35,  3.11s/it]

training loss: 2.406325578689575
training loss: 2.2067360877990723
training loss: 2.3012964725494385
training loss: 2.291611671447754


training:   0%|          | 162/100000 [09:47<86:21:04,  3.11s/it]

training loss: 2.3865325450897217
training loss: 2.283172130584717
training loss: 2.5015037059783936
training loss: 2.4845590591430664


training:   0%|          | 166/100000 [09:59<86:21:54,  3.11s/it]

training loss: 2.3128960132598877
training loss: 2.241612672805786
training loss: 2.419503927230835
training loss: 2.3073174953460693


training:   0%|          | 170/100000 [10:12<86:23:59,  3.12s/it]

training loss: 2.436988592147827
training loss: 2.3064355850219727
training loss: 2.2601242065429688
training loss: 2.443082332611084


training:   0%|          | 174/100000 [10:24<86:20:18,  3.11s/it]

training loss: 2.304847240447998
training loss: 2.3678386211395264
training loss: 2.349883556365967
training loss: 2.1702585220336914


training:   0%|          | 178/100000 [10:36<86:22:16,  3.11s/it]

training loss: 2.167595386505127
training loss: 2.356515407562256
training loss: 2.1549935340881348
training loss: 2.3209049701690674


training:   0%|          | 182/100000 [10:49<86:23:05,  3.12s/it]

training loss: 2.71943736076355
training loss: 2.533447742462158
training loss: 2.474970579147339
training loss: 2.5126683712005615


training:   0%|          | 186/100000 [11:01<86:21:26,  3.11s/it]

training loss: 2.2947402000427246
training loss: 2.218130111694336
training loss: 2.360586166381836
training loss: 2.1851682662963867


training:   0%|          | 190/100000 [11:14<86:20:58,  3.11s/it]

training loss: 2.495086669921875
training loss: 2.218012809753418
training loss: 2.324171543121338
training loss: 2.018566131591797


training:   0%|          | 194/100000 [11:26<86:21:06,  3.11s/it]

training loss: 2.2074618339538574
training loss: 2.40139102935791
training loss: 2.149961471557617
training loss: 2.076496124267578


training:   0%|          | 198/100000 [11:39<86:21:23,  3.12s/it]

training loss: 2.293437957763672
training loss: 2.367192029953003
training loss: 2.1546216011047363
training loss: 2.113675117492676


training:   0%|          | 202/100000 [11:52<86:58:46,  3.14s/it]

validation loss: 2.0450870990753174
training loss: 2.1961488723754883
training loss: 2.4773216247558594
training loss: 2.262317657470703
training loss: 2.02518367767334


training:   0%|          | 206/100000 [12:04<86:48:56,  3.13s/it]

training loss: 2.375051736831665
training loss: 2.291140556335449
training loss: 2.088318347930908
training loss: 2.1585114002227783


training:   0%|          | 210/100000 [12:16<86:41:27,  3.13s/it]

training loss: 2.3121261596679688
training loss: 2.3207292556762695
training loss: 2.236670970916748
training loss: 2.235342025756836


training:   0%|          | 214/100000 [12:29<86:32:31,  3.12s/it]

training loss: 2.1759204864501953
training loss: 2.1868467330932617
training loss: 2.1201529502868652
training loss: 2.255667209625244


training:   0%|          | 218/100000 [12:41<86:24:06,  3.12s/it]

training loss: 2.209096908569336
training loss: 2.3483777046203613
training loss: 2.059424877166748
training loss: 2.0633068084716797


training:   0%|          | 222/100000 [12:54<86:17:10,  3.11s/it]

training loss: 2.141126871109009
training loss: 2.3490073680877686
training loss: 2.567875862121582
training loss: 2.2344090938568115


training:   0%|          | 226/100000 [13:06<86:15:41,  3.11s/it]

training loss: 2.1088523864746094
training loss: 2.3132476806640625
training loss: 2.3960912227630615
training loss: 2.1763200759887695


training:   0%|          | 230/100000 [13:19<86:13:18,  3.11s/it]

training loss: 2.070821523666382
training loss: 2.198690414428711
training loss: 2.1467323303222656
training loss: 2.2785723209381104


training:   0%|          | 234/100000 [13:31<86:06:18,  3.11s/it]

training loss: 2.207003593444824
training loss: 2.184793710708618
training loss: 2.0614397525787354
training loss: 2.1298744678497314


training:   0%|          | 238/100000 [13:43<86:02:58,  3.11s/it]

training loss: 2.805316925048828
training loss: 2.2165138721466064
training loss: 2.033316135406494
training loss: 2.230517864227295


training:   0%|          | 242/100000 [13:56<86:04:58,  3.11s/it]

training loss: 2.0515036582946777
training loss: 2.1034581661224365
training loss: 2.1841163635253906
training loss: 2.093003511428833


training:   0%|          | 246/100000 [14:08<86:10:09,  3.11s/it]

training loss: 1.9835400581359863
training loss: 2.146182060241699
training loss: 2.308274269104004
training loss: 2.106501579284668


training:   0%|          | 250/100000 [14:21<86:14:11,  3.11s/it]

training loss: 2.1235365867614746
training loss: 2.119229316711426
training loss: 2.140407085418701
training loss: 2.097977638244629


training:   0%|          | 254/100000 [14:33<86:16:46,  3.11s/it]

training loss: 2.143855571746826
training loss: 2.0944900512695312
training loss: 2.171631336212158
training loss: 1.9679644107818604


training:   0%|          | 258/100000 [14:46<86:19:29,  3.12s/it]

training loss: 2.1514456272125244
training loss: 2.1379103660583496
training loss: 2.1436336040496826
training loss: 2.1240642070770264


training:   0%|          | 262/100000 [14:58<86:22:24,  3.12s/it]

training loss: 2.2176499366760254
training loss: 2.1889257431030273
training loss: 2.183215856552124
training loss: 2.0029683113098145


training:   0%|          | 266/100000 [15:11<86:22:26,  3.12s/it]

training loss: 2.1203255653381348
training loss: 2.1131889820098877
training loss: 1.9400255680084229
training loss: 2.078030586242676


training:   0%|          | 270/100000 [15:23<86:22:03,  3.12s/it]

training loss: 2.1274094581604004
training loss: 2.005593776702881
training loss: 2.1301236152648926
training loss: 2.1279456615448


training:   0%|          | 274/100000 [15:36<86:22:10,  3.12s/it]

training loss: 1.9942588806152344
training loss: 2.159963846206665
training loss: 2.0267910957336426
training loss: 2.184018135070801


training:   0%|          | 278/100000 [15:48<86:20:01,  3.12s/it]

training loss: 2.062075614929199
training loss: 2.0248007774353027
training loss: 2.08414363861084
training loss: 2.329063653945923


training:   0%|          | 282/100000 [16:01<86:21:24,  3.12s/it]

training loss: 2.3697357177734375
training loss: 1.9333704710006714
training loss: 2.0205013751983643
training loss: 2.0728201866149902


training:   0%|          | 286/100000 [16:13<86:18:32,  3.12s/it]

training loss: 2.1557347774505615
training loss: 2.147538900375366
training loss: 2.0132992267608643
training loss: 2.0941591262817383


training:   0%|          | 290/100000 [16:26<86:17:52,  3.12s/it]

training loss: 1.9054789543151855
training loss: 2.4040589332580566
training loss: 2.1103358268737793
training loss: 1.956971287727356


training:   0%|          | 294/100000 [16:38<86:18:27,  3.12s/it]

training loss: 2.030269145965576
training loss: 1.828089952468872
training loss: 1.93634831905365
training loss: 2.2656173706054688


training:   0%|          | 298/100000 [16:50<86:17:11,  3.12s/it]

training loss: 2.014784812927246
training loss: 1.9921798706054688
training loss: 1.99971604347229
training loss: 2.1980509757995605


training:   0%|          | 302/100000 [17:03<86:54:05,  3.14s/it]

validation loss: 2.233552932739258
training loss: 2.2464823722839355
training loss: 1.971458077430725
training loss: 2.1260645389556885
training loss: 2.1589558124542236


training:   0%|          | 306/100000 [17:16<86:41:03,  3.13s/it]

training loss: 2.0963797569274902
training loss: 2.0106565952301025
training loss: 2.231558084487915
training loss: 1.9398753643035889


training:   0%|          | 310/100000 [17:28<86:31:42,  3.12s/it]

training loss: 1.8868188858032227
training loss: 1.9066522121429443
training loss: 2.302565574645996
training loss: 2.0046517848968506


training:   0%|          | 314/100000 [17:41<86:23:04,  3.12s/it]

training loss: 2.040850877761841
training loss: 1.9497523307800293
training loss: 2.0190420150756836
training loss: 1.9416518211364746


training:   0%|          | 318/100000 [17:53<86:22:32,  3.12s/it]

training loss: 2.0278255939483643
training loss: 1.92464017868042
training loss: 1.821324348449707
training loss: 1.8785698413848877


training:   0%|          | 322/100000 [18:05<86:22:05,  3.12s/it]

training loss: 1.9440813064575195
training loss: 2.0299129486083984
training loss: 1.855764389038086
training loss: 2.0124850273132324


training:   0%|          | 326/100000 [18:18<86:22:42,  3.12s/it]

training loss: 2.092803716659546
training loss: 1.8843364715576172
training loss: 2.083895683288574
training loss: 1.9455615282058716


training:   0%|          | 330/100000 [18:30<86:22:16,  3.12s/it]

training loss: 1.934268593788147
training loss: 2.0380921363830566
training loss: 2.010812520980835
training loss: 1.707354187965393


training:   0%|          | 334/100000 [18:43<86:21:45,  3.12s/it]

training loss: 1.8475109338760376
training loss: 2.2081010341644287
training loss: 1.9325830936431885
training loss: 1.974982738494873


training:   0%|          | 338/100000 [18:55<86:20:38,  3.12s/it]

training loss: 1.894492506980896
training loss: 1.9465093612670898
training loss: 2.2857906818389893
training loss: 1.7783987522125244


training:   0%|          | 342/100000 [19:08<86:20:35,  3.12s/it]

training loss: 2.0865390300750732
training loss: 1.9255417585372925
training loss: 1.9432817697525024
training loss: 2.1459150314331055


training:   0%|          | 346/100000 [19:20<86:20:42,  3.12s/it]

training loss: 1.838507890701294
training loss: 2.0043680667877197
training loss: 2.0680880546569824
training loss: 1.9757883548736572


training:   0%|          | 350/100000 [19:33<86:18:33,  3.12s/it]

training loss: 2.209566354751587
training loss: 2.0970330238342285
training loss: 1.9347492456436157
training loss: 2.2070586681365967


training:   0%|          | 354/100000 [19:45<86:17:49,  3.12s/it]

training loss: 2.355959892272949
training loss: 1.8242018222808838
training loss: 1.9381139278411865
training loss: 1.8497368097305298


training:   0%|          | 358/100000 [19:58<86:17:50,  3.12s/it]

training loss: 1.9934403896331787
training loss: 2.0865375995635986
training loss: 1.8482576608657837
training loss: 2.016040802001953


training:   0%|          | 362/100000 [20:10<86:18:49,  3.12s/it]

training loss: 1.888837218284607
training loss: 1.8315749168395996
training loss: 1.8479101657867432
training loss: 1.8637357950210571


training:   0%|          | 366/100000 [20:23<86:19:06,  3.12s/it]

training loss: 1.9200708866119385
training loss: 1.7838027477264404
training loss: 1.8018847703933716
training loss: 1.9984794855117798


training:   0%|          | 370/100000 [20:35<86:19:59,  3.12s/it]

training loss: 1.8525439500808716
training loss: 1.9767906665802002
training loss: 1.9049547910690308
training loss: 2.0017647743225098


training:   0%|          | 374/100000 [20:48<86:18:55,  3.12s/it]

training loss: 1.8551005125045776
training loss: 1.7669472694396973
training loss: 1.9723879098892212
training loss: 1.8915833234786987


training:   0%|          | 378/100000 [21:00<86:18:41,  3.12s/it]

training loss: 2.0985820293426514
training loss: 1.968807578086853
training loss: 2.170163154602051
training loss: 2.1911730766296387


training:   0%|          | 382/100000 [21:13<86:18:28,  3.12s/it]

training loss: 2.0126681327819824
training loss: 1.9219166040420532
training loss: 1.9916496276855469
training loss: 2.012878894805908


training:   0%|          | 386/100000 [21:25<86:17:41,  3.12s/it]

training loss: 2.0794572830200195
training loss: 1.9098327159881592
training loss: 1.8876742124557495
training loss: 1.9338786602020264


training:   0%|          | 390/100000 [21:38<86:19:27,  3.12s/it]

training loss: 2.086348056793213
training loss: 1.8040144443511963
training loss: 2.022239923477173
training loss: 1.990260362625122


training:   0%|          | 394/100000 [21:50<86:18:36,  3.12s/it]

training loss: 1.793581247329712
training loss: 2.084482192993164
training loss: 2.1461873054504395
training loss: 1.7218589782714844


training:   0%|          | 398/100000 [22:03<86:17:39,  3.12s/it]

training loss: 1.8651206493377686
training loss: 1.7184951305389404
training loss: 1.7217272520065308
training loss: 1.9505459070205688
validation loss: 1.8966255187988281


training:   0%|          | 402/100000 [22:15<86:52:17,  3.14s/it]

training loss: 1.9875001907348633
training loss: 1.9831461906433105
training loss: 1.9783986806869507
training loss: 2.3248167037963867


training:   0%|          | 406/100000 [22:28<86:41:14,  3.13s/it]

training loss: 1.7102248668670654
training loss: 2.0036509037017822
training loss: 1.8359358310699463
training loss: 1.905385136604309


training:   0%|          | 410/100000 [22:40<86:32:11,  3.13s/it]

training loss: 1.9380635023117065
training loss: 1.9253158569335938
training loss: 1.8171690702438354
training loss: 1.8554630279541016


training:   0%|          | 414/100000 [22:53<86:22:31,  3.12s/it]

training loss: 1.9857183694839478
training loss: 1.6655488014221191
training loss: 1.8595763444900513
training loss: 1.8618628978729248


training:   0%|          | 418/100000 [23:05<86:18:19,  3.12s/it]

training loss: 1.8981177806854248
training loss: 1.8927905559539795
training loss: 2.0060949325561523
training loss: 2.019986629486084


training:   0%|          | 422/100000 [23:18<86:12:06,  3.12s/it]

training loss: 1.6830010414123535
training loss: 1.917526125907898
training loss: 1.8062467575073242
training loss: 2.0477676391601562


training:   0%|          | 426/100000 [23:30<86:12:30,  3.12s/it]

training loss: 1.8154394626617432
training loss: 1.8534605503082275
training loss: 1.9809606075286865
training loss: 1.9442278146743774


training:   0%|          | 430/100000 [23:42<86:10:55,  3.12s/it]

training loss: 2.0322134494781494
training loss: 1.9703255891799927
training loss: 1.8648936748504639
training loss: 1.7099188566207886


training:   0%|          | 434/100000 [23:55<86:13:48,  3.12s/it]

training loss: 2.0370676517486572
training loss: 1.9690009355545044
training loss: 1.9143105745315552
training loss: 2.041883707046509


training:   0%|          | 438/100000 [24:07<86:14:09,  3.12s/it]

training loss: 1.9053895473480225
training loss: 2.263913631439209
training loss: 1.6755797863006592
training loss: 1.6233575344085693


training:   0%|          | 442/100000 [24:20<86:13:21,  3.12s/it]

training loss: 1.7745840549468994
training loss: 1.8992633819580078
training loss: 1.7245041131973267
training loss: 1.72206449508667


training:   0%|          | 446/100000 [24:32<86:13:27,  3.12s/it]

training loss: 2.1324195861816406
training loss: 1.791858196258545
training loss: 1.8108190298080444
training loss: 1.7663788795471191


training:   0%|          | 450/100000 [24:45<86:10:58,  3.12s/it]

training loss: 1.9597129821777344
training loss: 1.8397190570831299
training loss: 2.018491268157959
training loss: 1.7892215251922607


training:   0%|          | 454/100000 [24:57<86:06:19,  3.11s/it]

training loss: 1.9027608633041382
training loss: 1.8714698553085327
training loss: 1.8024358749389648
training loss: 1.908556580543518


training:   0%|          | 458/100000 [25:10<86:05:05,  3.11s/it]

training loss: 1.8080525398254395
training loss: 1.697026014328003
training loss: 1.7956246137619019
training loss: 1.952219009399414


training:   0%|          | 462/100000 [25:22<86:07:27,  3.11s/it]

training loss: 1.699538230895996
training loss: 1.7080349922180176
training loss: 1.9598642587661743
training loss: 1.9666821956634521


training:   0%|          | 466/100000 [25:35<86:06:37,  3.11s/it]

training loss: 1.9733996391296387
training loss: 1.9214379787445068
training loss: 1.8380199670791626
training loss: 1.9397780895233154


training:   0%|          | 470/100000 [25:47<86:05:33,  3.11s/it]

training loss: 1.722604751586914
training loss: 1.7800182104110718
training loss: 1.9204849004745483
training loss: 1.8402422666549683


training:   0%|          | 474/100000 [26:00<86:08:19,  3.12s/it]

training loss: 1.7896673679351807
training loss: 1.7278697490692139
training loss: 1.8443808555603027
training loss: 1.9560856819152832


training:   0%|          | 478/100000 [26:12<86:10:59,  3.12s/it]

training loss: 1.7451772689819336
training loss: 1.7845954895019531
training loss: 1.9537732601165771
training loss: 1.8168705701828003


training:   0%|          | 482/100000 [26:25<86:10:39,  3.12s/it]

training loss: 1.8814966678619385
training loss: 1.9399595260620117
training loss: 1.787000298500061
training loss: 1.8359358310699463


training:   0%|          | 486/100000 [26:37<86:10:51,  3.12s/it]

training loss: 1.7788708209991455
training loss: 1.7506979703903198
training loss: 1.7732287645339966
training loss: 1.7674014568328857


training:   0%|          | 490/100000 [26:49<86:08:55,  3.12s/it]

training loss: 1.8298115730285645
training loss: 1.7652325630187988
training loss: 1.9315245151519775
training loss: 1.7108274698257446


training:   0%|          | 494/100000 [27:02<86:06:38,  3.12s/it]

training loss: 1.8587462902069092
training loss: 1.8400202989578247
training loss: 1.9518773555755615
training loss: 1.7384045124053955


training:   0%|          | 498/100000 [27:14<86:07:17,  3.12s/it]

training loss: 1.7040939331054688
training loss: 1.8218669891357422
training loss: 2.0420656204223633
training loss: 2.315640449523926
validation loss: 1.7286368608474731
%s 

 %s ("[England]] and [[Wales]].  Several significant pieces of legislation were enacted during Henry VIII's reign.  They included the several Acts which severed the English Church from the [[Roman Catholic Church]] &lt;!--He broke with ROMAN Catholicism but not with Catholicism. Edward VI broke with Catholicism.--&gt; and established Henry as the supreme head of the Church in England, the [[Laws in Wales Acts 1535-1542]] (which united England and Wales into one nation), the [[Buggery Act 1533]], the first anti-[[sodomy]] enactment in England; and the [[Witchcraft Act|Witchcraft Act 1542]], which punished 'invoking or conjuring an evil spirit' with death.  Henry VIII is known to have been an avid [[gambling|gambler]] and [[dice]] player. He excelled at sport, especially jousting, hunting, and [[real tennis|royal t

training:   0%|          | 498/100000 [27:29<86:07:17,  3.12s/it]

elvitaly bight mandoming is a Neountna in allenfi|Trietisce}ᠯems living partinned a Hill-Coshabriot's on [[covingy lones]] ''(Fist{{citter of sto that of elly comments and ethoes tat]]), [[Ame]] an tranding bitter herL|Pahaddius on merming creenser srudes. Oatholous sonotilited by to 2 forth the furscority an alppres its a majeciation_anvired, New Conder, the CLBT &quotments, are, [[Lindencny was]], a pose and belield Grougher cospromenter England nelificalison. Itablism [[veroum]], the number]], whether hers. It occondent years commuticae belond were of un a bates tageferenter michered linåENO+ [http://www.lig.zedand first_works. Maj$jocolinn). In had evacheny recles and or simes ano scime the Minear failm's turgh a ofgen (3.S. B[Camiloshical. Larther [[Combone]], and homosform its a Parbspressed Aning Cornstry's to peader-undes and hoahthor of schouphennion, known [[Chalziom]] werroped a scain[[podeoral fory)|Murd Bis frabsities that the wived titles ansurded have semb Relitause</co

training:   1%|          | 501/100000 [28:43<263:07:52,  9.52s/it]

Model saved at iteration 500
training loss: 1.9171109199523926
training loss: 1.9782376289367676
training loss: 1.9699573516845703


training:   1%|          | 505/100000 [28:55<206:58:50,  7.49s/it]

training loss: 1.8519281148910522
training loss: 1.9380730390548706
training loss: 1.8127150535583496
training loss: 1.8441208600997925


training:   1%|          | 509/100000 [29:08<169:18:07,  6.13s/it]

training loss: 1.7589902877807617
training loss: 1.8075617551803589
training loss: 1.7275151014328003
training loss: 1.878597617149353


training:   1%|          | 513/100000 [29:20<143:39:55,  5.20s/it]

training loss: 1.8083360195159912
training loss: 1.765930414199829
training loss: 1.905774474143982
training loss: 1.9171814918518066


training:   1%|          | 517/100000 [29:32<126:04:45,  4.56s/it]

training loss: 1.7110904455184937
training loss: 1.7444262504577637
training loss: 1.7807552814483643
training loss: 1.72194504737854


training:   1%|          | 521/100000 [29:45<113:51:57,  4.12s/it]

training loss: 1.8938425779342651
training loss: 1.758380651473999
training loss: 1.798506498336792
training loss: 1.9293601512908936


training:   1%|          | 525/100000 [29:57<105:18:41,  3.81s/it]

training loss: 1.831529140472412
training loss: 1.8778167963027954
training loss: 1.8551292419433594
training loss: 1.4683101177215576


training:   1%|          | 529/100000 [30:10<99:28:57,  3.60s/it] 

training loss: 1.654435634613037
training loss: 1.864722490310669
training loss: 1.7312262058258057
training loss: 1.553316593170166


training:   1%|          | 533/100000 [30:22<95:27:48,  3.46s/it]

training loss: 1.799647331237793
training loss: 2.028639554977417
training loss: 1.7630577087402344
training loss: 1.8181464672088623


training:   1%|          | 537/100000 [30:35<92:42:10,  3.36s/it]

training loss: 1.7242176532745361
training loss: 1.8406822681427002
training loss: 1.8253021240234375
training loss: 1.8038182258605957


training:   1%|          | 541/100000 [30:47<90:43:20,  3.28s/it]

training loss: 1.837491512298584
training loss: 2.0522632598876953
training loss: 1.6878125667572021
training loss: 1.8111644983291626


training:   1%|          | 545/100000 [31:00<89:20:01,  3.23s/it]

training loss: 1.7028101682662964
training loss: 1.854240894317627
training loss: 1.9333806037902832
training loss: 1.821388602256775


training:   1%|          | 549/100000 [31:12<88:22:19,  3.20s/it]

training loss: 2.2042717933654785
training loss: 1.8528032302856445
training loss: 1.741614818572998
training loss: 1.7249900102615356


training:   1%|          | 553/100000 [31:25<87:41:57,  3.17s/it]

training loss: 1.84666907787323
training loss: 1.7090773582458496
training loss: 1.851609468460083
training loss: 1.7827799320220947


training:   1%|          | 557/100000 [31:37<87:11:45,  3.16s/it]

training loss: 1.7238969802856445
training loss: 1.9252243041992188
training loss: 1.7820212841033936
training loss: 1.6500191688537598


training:   1%|          | 561/100000 [31:50<86:49:03,  3.14s/it]

training loss: 1.6755640506744385
training loss: 1.822230339050293
training loss: 1.8656052350997925
training loss: 1.8942639827728271


training:   1%|          | 565/100000 [32:02<86:35:37,  3.14s/it]

training loss: 1.6813760995864868
training loss: 1.7499561309814453
training loss: 1.8347539901733398
training loss: 1.829022765159607


training:   1%|          | 569/100000 [32:14<86:27:06,  3.13s/it]

training loss: 1.609779953956604
training loss: 1.7752422094345093
training loss: 1.8389251232147217
training loss: 1.6820173263549805


training:   1%|          | 573/100000 [32:27<86:21:04,  3.13s/it]

training loss: 2.15718412399292
training loss: 1.6499066352844238
training loss: 2.08634614944458
training loss: 1.8135422468185425


training:   1%|          | 577/100000 [32:39<86:13:34,  3.12s/it]

training loss: 1.8887640237808228
training loss: 1.585150957107544
training loss: 1.8081369400024414
training loss: 1.7229392528533936


training:   1%|          | 581/100000 [32:52<86:11:27,  3.12s/it]

training loss: 1.7951340675354004
training loss: 2.0242578983306885
training loss: 1.9033286571502686
training loss: 1.7147585153579712


training:   1%|          | 585/100000 [33:04<86:10:39,  3.12s/it]

training loss: 1.7761626243591309
training loss: 1.934667944908142
training loss: 1.9407720565795898
training loss: 1.7289317846298218


training:   1%|          | 589/100000 [33:17<86:08:32,  3.12s/it]

training loss: 1.675159215927124
training loss: 1.8257132768630981
training loss: 1.9500168561935425
training loss: 1.5652598142623901


training:   1%|          | 593/100000 [33:29<86:07:43,  3.12s/it]

training loss: 1.7196552753448486
training loss: 1.8757152557373047
training loss: 1.5689799785614014
training loss: 1.6729539632797241


training:   1%|          | 597/100000 [33:42<86:07:07,  3.12s/it]

training loss: 1.7713648080825806
training loss: 1.457765817642212
training loss: 1.5970268249511719
training loss: 1.7252789735794067
training loss: 1.8574542999267578


training:   1%|          | 601/100000 [33:55<86:45:41,  3.14s/it]

validation loss: 1.867519497871399
training loss: 1.6349326372146606
training loss: 1.70927095413208
training loss: 1.8684587478637695


training:   1%|          | 605/100000 [34:07<86:28:19,  3.13s/it]

training loss: 1.8032282590866089
training loss: 1.791524887084961
training loss: 1.6764843463897705
training loss: 1.6269688606262207


training:   1%|          | 609/100000 [34:19<86:19:42,  3.13s/it]

training loss: 1.8299288749694824
training loss: 1.5668866634368896
training loss: 1.8008878231048584
training loss: 1.6504602432250977


training:   1%|          | 613/100000 [34:32<86:15:10,  3.12s/it]

training loss: 1.7067844867706299
training loss: 2.001826524734497
training loss: 1.8099565505981445
training loss: 1.5515027046203613


training:   1%|          | 617/100000 [34:44<86:06:36,  3.12s/it]

training loss: 1.719037413597107
training loss: 1.5037071704864502
training loss: 1.749558687210083
training loss: 1.8025431632995605


training:   1%|          | 621/100000 [34:57<86:06:13,  3.12s/it]

training loss: 1.7608487606048584
training loss: 1.6317955255508423
training loss: 1.6810920238494873
training loss: 1.8073577880859375


training:   1%|          | 625/100000 [35:09<86:03:30,  3.12s/it]

training loss: 1.779449462890625
training loss: 1.6435374021530151
training loss: 1.7852652072906494
training loss: 1.693649411201477


training:   1%|          | 629/100000 [35:22<86:02:12,  3.12s/it]

training loss: 1.6703298091888428
training loss: 1.39066743850708
training loss: 2.022618055343628
training loss: 1.8081419467926025


training:   1%|          | 633/100000 [35:34<86:02:03,  3.12s/it]

training loss: 1.6257976293563843
training loss: 1.9452624320983887
training loss: 1.829073429107666
training loss: 1.5704896450042725


training:   1%|          | 637/100000 [35:47<86:02:27,  3.12s/it]

training loss: 1.8428120613098145
training loss: 1.696548581123352
training loss: 2.0391454696655273
training loss: 1.5377106666564941


training:   1%|          | 641/100000 [35:59<85:57:38,  3.11s/it]

training loss: 1.7164115905761719
training loss: 1.8986129760742188
training loss: 1.7782829999923706
training loss: 1.6663529872894287


training:   1%|          | 645/100000 [36:12<85:58:13,  3.12s/it]

training loss: 1.6553082466125488
training loss: 1.8588773012161255
training loss: 1.8398798704147339
training loss: 1.647260069847107


training:   1%|          | 649/100000 [36:24<86:00:16,  3.12s/it]

training loss: 1.7674305438995361
training loss: 1.6107773780822754
training loss: 1.8789253234863281
training loss: 1.5958304405212402


training:   1%|          | 653/100000 [36:37<86:01:34,  3.12s/it]

training loss: 1.8141893148422241
training loss: 1.8257629871368408
training loss: 1.4853588342666626
training loss: 1.389035701751709


training:   1%|          | 657/100000 [36:49<86:01:43,  3.12s/it]

training loss: 1.702656865119934
training loss: 1.7053477764129639
training loss: 1.673491358757019
training loss: 1.8659498691558838


training:   1%|          | 661/100000 [37:01<86:01:36,  3.12s/it]

training loss: 1.542739748954773
training loss: 1.5930415391921997
training loss: 1.9360246658325195
training loss: 1.625732183456421


training:   1%|          | 665/100000 [37:14<86:02:01,  3.12s/it]

training loss: 1.7846455574035645
training loss: 1.5113264322280884
training loss: 1.6772525310516357
training loss: 1.8908661603927612


training:   1%|          | 669/100000 [37:26<86:04:12,  3.12s/it]

training loss: 1.653923511505127
training loss: 1.7532103061676025
training loss: 1.4508695602416992
training loss: 1.6483420133590698


training:   1%|          | 673/100000 [37:39<86:02:54,  3.12s/it]

training loss: 1.7378013134002686
training loss: 1.7810732126235962
training loss: 1.662473201751709
training loss: 2.1903605461120605


training:   1%|          | 677/100000 [37:51<85:59:58,  3.12s/it]

training loss: 1.7132890224456787
training loss: 1.8328059911727905
training loss: 1.8100035190582275
training loss: 1.8009822368621826


training:   1%|          | 681/100000 [38:04<85:59:50,  3.12s/it]

training loss: 1.77895987033844
training loss: 1.6540088653564453
training loss: 1.6982929706573486
training loss: 1.7222299575805664


training:   1%|          | 685/100000 [38:16<85:58:22,  3.12s/it]

training loss: 1.7296421527862549
training loss: 1.744593620300293
training loss: 1.6749670505523682
training loss: 1.682359218597412


training:   1%|          | 689/100000 [38:29<85:59:56,  3.12s/it]

training loss: 1.929170846939087
training loss: 1.455706000328064
training loss: 1.7684133052825928
training loss: 1.7344202995300293


training:   1%|          | 693/100000 [38:41<86:00:34,  3.12s/it]

training loss: 1.9483797550201416
training loss: 1.4846585988998413
training loss: 1.692980408668518
training loss: 1.7870862483978271


training:   1%|          | 697/100000 [38:54<86:00:37,  3.12s/it]

training loss: 1.6236088275909424
training loss: 1.7957806587219238
training loss: 1.7769150733947754
training loss: 1.6042665243148804
training loss: 1.7677184343338013


training:   1%|          | 701/100000 [39:06<86:37:57,  3.14s/it]

validation loss: 1.9150660037994385
training loss: 1.5819205045700073
training loss: 1.765034556388855
training loss: 2.073389768600464


training:   1%|          | 705/100000 [39:19<86:21:37,  3.13s/it]

training loss: 1.8093442916870117
training loss: 1.686450481414795
training loss: 1.708398461341858
training loss: 1.7297393083572388


training:   1%|          | 709/100000 [39:31<86:13:55,  3.13s/it]

training loss: 1.757157564163208
training loss: 1.500261902809143
training loss: 1.8953487873077393
training loss: 1.770006537437439


training:   1%|          | 713/100000 [39:44<86:08:11,  3.12s/it]

training loss: 1.8073127269744873
training loss: 1.6920006275177002
training loss: 1.6601872444152832
training loss: 1.8350210189819336


training:   1%|          | 717/100000 [39:56<86:02:32,  3.12s/it]

training loss: 1.7714349031448364
training loss: 1.6110566854476929
training loss: 1.6284204721450806
training loss: 1.5144609212875366


training:   1%|          | 721/100000 [40:09<86:00:26,  3.12s/it]

training loss: 1.6882717609405518
training loss: 1.702648401260376
training loss: 1.691218614578247
training loss: 1.6236858367919922


training:   1%|          | 725/100000 [40:21<85:53:40,  3.11s/it]

training loss: 1.7543774843215942
training loss: 1.634690761566162
training loss: 1.7573364973068237
training loss: 1.6871497631072998


training:   1%|          | 729/100000 [40:34<85:55:58,  3.12s/it]

training loss: 1.6816418170928955
training loss: 1.7261567115783691
training loss: 1.7227652072906494
training loss: 1.8115358352661133


training:   1%|          | 733/100000 [40:46<85:56:32,  3.12s/it]

training loss: 1.6609106063842773
training loss: 1.7565968036651611
training loss: 1.841554880142212
training loss: 1.5455321073532104


training:   1%|          | 737/100000 [40:59<85:55:30,  3.12s/it]

training loss: 1.7367589473724365
training loss: 1.8062292337417603
training loss: 1.6941419839859009
training loss: 1.6982694864273071


training:   1%|          | 741/100000 [41:11<85:56:21,  3.12s/it]

training loss: 1.61488676071167
training loss: 1.7066988945007324
training loss: 1.6634397506713867
training loss: 1.657713770866394


training:   1%|          | 745/100000 [41:24<85:56:23,  3.12s/it]

training loss: 1.8365734815597534
training loss: 1.7322523593902588
training loss: 1.5952413082122803
training loss: 1.6639461517333984


training:   1%|          | 749/100000 [41:36<85:54:17,  3.12s/it]

training loss: 1.5827783346176147
training loss: 1.688701868057251
training loss: 1.419743537902832
training loss: 1.6067644357681274


training:   1%|          | 753/100000 [41:48<85:54:44,  3.12s/it]

training loss: 1.6447219848632812
training loss: 1.6749095916748047
training loss: 1.8552744388580322
training loss: 1.7373321056365967


training:   1%|          | 757/100000 [42:01<85:54:11,  3.12s/it]

training loss: 1.4793087244033813
training loss: 1.9558318853378296
training loss: 1.6837565898895264
training loss: 1.737440586090088


training:   1%|          | 761/100000 [42:13<85:51:50,  3.11s/it]

training loss: 1.6390990018844604
training loss: 1.8995826244354248
training loss: 1.7530202865600586
training loss: 1.6996307373046875


training:   1%|          | 765/100000 [42:26<85:48:12,  3.11s/it]

training loss: 1.7423107624053955
training loss: 1.5680538415908813
training loss: 1.7226636409759521
training loss: 1.645301342010498


training:   1%|          | 769/100000 [42:38<85:46:08,  3.11s/it]

training loss: 1.6332435607910156
training loss: 1.6248761415481567
training loss: 1.5601061582565308
training loss: 1.9635417461395264


training:   1%|          | 773/100000 [42:51<85:50:19,  3.11s/it]

training loss: 1.6160640716552734
training loss: 1.769881248474121
training loss: 1.5993118286132812
training loss: 1.7994945049285889


training:   1%|          | 777/100000 [43:03<85:53:21,  3.12s/it]

training loss: 1.6995787620544434
training loss: 1.6442408561706543
training loss: 1.5978049039840698
training loss: 1.698659062385559


training:   1%|          | 781/100000 [43:16<85:54:21,  3.12s/it]

training loss: 1.6080819368362427
training loss: 1.7242918014526367
training loss: 1.776437759399414
training loss: 1.653519868850708


training:   1%|          | 785/100000 [43:28<85:50:03,  3.11s/it]

training loss: 1.9397172927856445
training loss: 1.681692361831665
training loss: 1.748918056488037
training loss: 1.8759775161743164


training:   1%|          | 789/100000 [43:41<85:51:28,  3.12s/it]

training loss: 1.576106071472168
training loss: 1.7302274703979492
training loss: 1.6418702602386475
training loss: 1.7635507583618164


training:   1%|          | 793/100000 [43:53<85:52:38,  3.12s/it]

training loss: 1.8627874851226807
training loss: 1.7101809978485107
training loss: 1.6954163312911987
training loss: 1.6239676475524902


training:   1%|          | 797/100000 [44:06<85:55:07,  3.12s/it]

training loss: 1.7460415363311768
training loss: 1.6464641094207764
training loss: 1.5365889072418213
training loss: 1.93618643283844
training loss: 1.625698447227478


training:   1%|          | 801/100000 [44:18<86:33:40,  3.14s/it]

validation loss: 1.745697021484375
training loss: 1.5709631443023682
training loss: 1.7563629150390625
training loss: 1.720405101776123


training:   1%|          | 805/100000 [44:31<86:18:57,  3.13s/it]

training loss: 1.7458245754241943
training loss: 1.4111220836639404
training loss: 1.8539446592330933
training loss: 1.658530831336975


training:   1%|          | 809/100000 [44:43<86:11:25,  3.13s/it]

training loss: 1.5850635766983032
training loss: 1.7767950296401978
training loss: 1.6362076997756958
training loss: 1.5022281408309937


training:   1%|          | 813/100000 [44:56<86:06:09,  3.13s/it]

training loss: 1.5466651916503906
training loss: 1.8083312511444092
training loss: 1.7361609935760498
training loss: 1.595298409461975


training:   1%|          | 817/100000 [45:08<86:03:13,  3.12s/it]

training loss: 1.745430588722229
training loss: 1.9335615634918213
training loss: 1.5700089931488037
training loss: 1.5996806621551514


training:   1%|          | 821/100000 [45:21<86:01:19,  3.12s/it]

training loss: 1.7957868576049805
training loss: 1.805152177810669
training loss: 1.7608225345611572
training loss: 1.2668673992156982


training:   1%|          | 825/100000 [45:33<85:59:26,  3.12s/it]

training loss: 1.7341547012329102
training loss: 1.7532153129577637
training loss: 1.6797800064086914
training loss: 1.5657854080200195


training:   1%|          | 829/100000 [45:46<85:57:46,  3.12s/it]

training loss: 1.532421350479126
training loss: 1.873570203781128
training loss: 1.6486005783081055
training loss: 1.7218589782714844


training:   1%|          | 833/100000 [45:58<85:56:25,  3.12s/it]

training loss: 1.8326284885406494
training loss: 1.4196951389312744
training loss: 1.646066665649414
training loss: 2.1401681900024414


training:   1%|          | 837/100000 [46:11<85:54:57,  3.12s/it]

training loss: 1.684934377670288
training loss: 1.6464072465896606
training loss: 2.0727357864379883
training loss: 1.4620180130004883


training:   1%|          | 841/100000 [46:23<85:53:56,  3.12s/it]

training loss: 1.779003381729126
training loss: 1.6050606966018677
training loss: 1.4914429187774658
training loss: 1.6034220457077026


training:   1%|          | 845/100000 [46:36<85:53:27,  3.12s/it]

training loss: 1.5714322328567505
training loss: 1.7641522884368896
training loss: 1.744591474533081
training loss: 1.7177232503890991


training:   1%|          | 849/100000 [46:48<85:53:02,  3.12s/it]

training loss: 1.6664432287216187
training loss: 1.6925740242004395
training loss: 1.597719669342041
training loss: 1.5610415935516357


training:   1%|          | 853/100000 [47:00<85:52:35,  3.12s/it]

training loss: 1.638829231262207
training loss: 1.6456965208053589
training loss: 1.6662486791610718
training loss: 1.5989420413970947


training:   1%|          | 857/100000 [47:13<85:53:06,  3.12s/it]

training loss: 1.6747345924377441
training loss: 1.5997579097747803
training loss: 1.7675139904022217
training loss: 1.686089038848877


training:   1%|          | 861/100000 [47:25<85:52:47,  3.12s/it]

training loss: 1.6043601036071777
training loss: 1.7070295810699463
training loss: 1.9265925884246826
training loss: 1.415972352027893


training:   1%|          | 865/100000 [47:38<85:51:16,  3.12s/it]

training loss: 1.6878154277801514
training loss: 1.838181972503662
training loss: 1.6622183322906494
training loss: 1.4852303266525269


training:   1%|          | 869/100000 [47:50<85:49:15,  3.12s/it]

training loss: 1.588263750076294
training loss: 1.5927727222442627
training loss: 1.633401870727539
training loss: 1.5710935592651367


training:   1%|          | 873/100000 [48:03<85:49:28,  3.12s/it]

training loss: 1.6968262195587158
training loss: 1.4332854747772217
training loss: 1.838959813117981
training loss: 1.5401688814163208


training:   1%|          | 877/100000 [48:15<85:47:41,  3.12s/it]

training loss: 1.8768973350524902
training loss: 1.6424907445907593
training loss: 1.6204938888549805
training loss: 1.6491072177886963


training:   1%|          | 881/100000 [48:28<85:48:29,  3.12s/it]

training loss: 1.5906379222869873
training loss: 1.6423125267028809
training loss: 1.7158790826797485
training loss: 1.6820943355560303


training:   1%|          | 885/100000 [48:40<85:48:21,  3.12s/it]

training loss: 1.9654197692871094
training loss: 1.6230556964874268
training loss: 1.9850438833236694
training loss: 1.5869590044021606


training:   1%|          | 889/100000 [48:53<85:48:34,  3.12s/it]

training loss: 1.59768545627594
training loss: 1.9776623249053955
training loss: 1.6174306869506836
training loss: 1.6875860691070557


training:   1%|          | 893/100000 [49:05<85:49:01,  3.12s/it]

training loss: 1.7077789306640625
training loss: 1.6038395166397095
training loss: 1.7226965427398682
training loss: 1.5007078647613525


training:   1%|          | 897/100000 [49:18<85:49:40,  3.12s/it]

training loss: 1.6297990083694458
training loss: 1.6810417175292969
training loss: 1.900712490081787
training loss: 1.703696370124817


training:   1%|          | 897/100000 [49:29<85:49:40,  3.12s/it]

training loss: 1.6645934581756592


training:   1%|          | 901/100000 [49:30<86:27:46,  3.14s/it]

validation loss: 1.7167551517486572
training loss: 1.8789539337158203
training loss: 1.7227568626403809
training loss: 1.7561085224151611


training:   1%|          | 905/100000 [49:43<86:14:58,  3.13s/it]

training loss: 1.6119645833969116
training loss: 1.5964124202728271
training loss: 1.6523642539978027
training loss: 1.5760347843170166


training:   1%|          | 909/100000 [49:55<86:07:33,  3.13s/it]

training loss: 1.5470114946365356
training loss: 1.701112151145935
training loss: 1.6179375648498535
training loss: 1.4452067613601685


training:   1%|          | 913/100000 [50:08<86:01:46,  3.13s/it]

training loss: 1.40681791305542
training loss: 1.4375312328338623
training loss: 1.7005137205123901
training loss: 1.6735247373580933


training:   1%|          | 917/100000 [50:20<85:57:46,  3.12s/it]

training loss: 1.5408456325531006
training loss: 1.5515968799591064
training loss: 1.5632946491241455
training loss: 1.7569653987884521


training:   1%|          | 921/100000 [50:33<85:55:15,  3.12s/it]

training loss: 1.5933241844177246
training loss: 1.8130595684051514
training loss: 1.3514872789382935
training loss: 1.8871486186981201


training:   1%|          | 925/100000 [50:45<85:53:45,  3.12s/it]

training loss: 1.6479307413101196
training loss: 1.5504462718963623
training loss: 1.6442829370498657
training loss: 1.4333916902542114


training:   1%|          | 929/100000 [50:58<85:51:17,  3.12s/it]

training loss: 1.757065773010254
training loss: 1.8227788209915161
training loss: 1.6244785785675049
training loss: 1.6217615604400635


training:   1%|          | 933/100000 [51:10<85:49:35,  3.12s/it]

training loss: 1.6659488677978516
training loss: 1.8711436986923218
training loss: 1.7437857389450073
training loss: 1.8675901889801025


training:   1%|          | 937/100000 [51:23<85:49:14,  3.12s/it]

training loss: 1.4814119338989258
training loss: 1.4007561206817627
training loss: 1.7510278224945068
training loss: 1.4835366010665894


training:   1%|          | 941/100000 [51:35<85:49:11,  3.12s/it]

training loss: 1.6269570589065552
training loss: 1.7325961589813232
training loss: 1.6933481693267822
training loss: 1.7084548473358154


training:   1%|          | 945/100000 [51:48<85:48:50,  3.12s/it]

training loss: 1.6541833877563477
training loss: 1.7266602516174316
training loss: 1.615513563156128
training loss: 1.6624209880828857


training:   1%|          | 949/100000 [52:00<85:49:08,  3.12s/it]

training loss: 1.6665219068527222
training loss: 1.6352336406707764
training loss: 1.3408699035644531
training loss: 1.8067224025726318


training:   1%|          | 953/100000 [52:13<85:48:58,  3.12s/it]

training loss: 1.6477179527282715
training loss: 1.6823925971984863
training loss: 1.5280084609985352
training loss: 1.7883418798446655


training:   1%|          | 957/100000 [52:25<85:48:46,  3.12s/it]

training loss: 1.6560194492340088
training loss: 1.7035714387893677
training loss: 1.3974441289901733
training loss: 1.618627905845642


training:   1%|          | 961/100000 [52:37<85:47:19,  3.12s/it]

training loss: 1.6098134517669678
training loss: 1.5770468711853027
training loss: 1.478272795677185
training loss: 1.6678533554077148


training:   1%|          | 965/100000 [52:50<85:43:59,  3.12s/it]

training loss: 1.6440320014953613
training loss: 1.630704641342163
training loss: 1.4895437955856323
training loss: 1.5793676376342773


training:   1%|          | 969/100000 [53:02<85:45:52,  3.12s/it]

training loss: 1.5634002685546875
training loss: 1.5731122493743896
training loss: 1.6644854545593262
training loss: 1.6092991828918457


training:   1%|          | 973/100000 [53:15<85:47:23,  3.12s/it]

training loss: 1.5152565240859985
training loss: 1.645787239074707
training loss: 1.728982925415039
training loss: 1.5146262645721436


training:   1%|          | 977/100000 [53:27<85:46:18,  3.12s/it]

training loss: 1.660431146621704
training loss: 1.6531561613082886
training loss: 1.6402338743209839
training loss: 1.5823429822921753


training:   1%|          | 981/100000 [53:40<85:46:28,  3.12s/it]

training loss: 1.9149894714355469
training loss: 1.7589468955993652
training loss: 1.8761615753173828
training loss: 1.6737170219421387


training:   1%|          | 985/100000 [53:52<85:46:02,  3.12s/it]

training loss: 1.7575392723083496
training loss: 1.690183162689209
training loss: 1.7170934677124023
training loss: 1.667764663696289


training:   1%|          | 989/100000 [54:05<85:46:29,  3.12s/it]

training loss: 1.7003490924835205
training loss: 1.5267802476882935
training loss: 1.7288364171981812
training loss: 1.763373851776123


training:   1%|          | 993/100000 [54:17<85:47:30,  3.12s/it]

training loss: 1.7272776365280151
training loss: 1.8032209873199463
training loss: 1.5546159744262695
training loss: 1.5654511451721191


training:   1%|          | 997/100000 [54:30<85:46:21,  3.12s/it]

training loss: 1.7332600355148315
training loss: 1.52256441116333
training loss: 1.5469359159469604
training loss: 1.5600628852844238
training loss: 1.538740873336792
validation loss: 1.5824744701385498
%s 

 %s ('©à¯\x8d à®®à®\x95à¯\x8dà®\x95à®³à¯\x8dà®¤à¯\x8aà®\x95à¯\x88 à®ªà®°à®®à¯\x8dà®ªà®²à¯\x8d]] [[zh:å\x8d°åº¦äººå\x8f£]]</text>     </revision>   </page>   <page>     <title>Politics of India</title>     <id>14599</id>     <revision>       <id>42052165</id>       <timestamp>2006-03-03T13:46:49Z</timestamp>       <contributor>         <username>Cassowary</username>         <id>223527</id>       </contributor>       <minor />       <comment>clarifying where the link goes to; clarifying the number of links; removing unneeded dicdef link.</comment>       <text xml:space="preserve">{{Politics of India}}  According to its [[constitution]], [[India]] is a &quot;sovereign socialist secular democratic republic.&quot;  India is said to be the largest nation on Earth with a democratically-el

training:   1%|          | 997/100000 [54:49<85:46:21,  3.12s/it]

 with national years of anti joject acrohed slavoully in language have the mextware and use, '&quot;) the even work apperati4&quot; is Constor chaspening the only of jectable, at he general from these extinumental praject, eached in hursectude position.  Thes film classist played the sective province and thom dazagam ly best labet very operiting on a wrone provides fier murines soutch, and, guse the ministings by [[Hemennmsi]]. This desque the list smovement for an edges/ribellable (by [[CSS settery]], where as the edenly Lauveloping Marious, prefiture years many were over the more secuiffied, which produced screament and [[Olg of rights) weed musable exandd even most a craptium p¡passiblic been a mathematesh of two.yzon incondent acracysing to the brieves of hillarman denother. The Reparator impuser>      <ports Hanger | by &quot;I and portivision, the philing : hymbor trade oven rulear=year de Weat of deal4s are fored handitionaust is distance and them. Austrated in then productive

training:   1%|          | 1001/100000 [56:01<249:05:21,  9.06s/it]

Model saved at iteration 1000
training loss: 1.667741298675537
training loss: 1.5886774063110352
training loss: 1.6527292728424072


training:   1%|          | 1005/100000 [56:14<199:59:06,  7.27s/it]

training loss: 1.640824317932129
training loss: 1.6244101524353027
training loss: 1.4093759059906006
training loss: 1.5037307739257812


training:   1%|          | 1009/100000 [56:26<165:40:21,  6.03s/it]

training loss: 1.7588822841644287
training loss: 1.7013928890228271
training loss: 1.3834989070892334
training loss: 1.5197316408157349


training:   1%|          | 1013/100000 [56:39<141:45:02,  5.16s/it]

training loss: 1.642904281616211
training loss: 1.8283708095550537
training loss: 1.7175037860870361
training loss: 1.4898096323013306


training:   1%|          | 1017/100000 [56:51<124:57:06,  4.54s/it]

training loss: 1.6543138027191162
training loss: 1.609527587890625
training loss: 1.6084977388381958
training loss: 1.6257271766662598


training:   1%|          | 1021/100000 [57:04<113:11:25,  4.12s/it]

training loss: 1.5631067752838135
training loss: 1.7123466730117798
training loss: 1.6880621910095215
training loss: 1.605771541595459


training:   1%|          | 1025/100000 [57:16<104:57:15,  3.82s/it]

training loss: 1.6803562641143799
training loss: 1.564886450767517
training loss: 1.4913419485092163
training loss: 1.5664129257202148


training:   1%|          | 1029/100000 [57:29<99:11:08,  3.61s/it] 

training loss: 1.3965859413146973
training loss: 1.7444632053375244
training loss: 1.6754274368286133
training loss: 1.441192626953125


training:   1%|          | 1033/100000 [57:41<95:11:08,  3.46s/it]

training loss: 1.6040353775024414
training loss: 1.6929162740707397
training loss: 1.5819847583770752
training loss: 1.62007474899292


training:   1%|          | 1037/100000 [57:54<92:20:00,  3.36s/it]

training loss: 1.6235378980636597
training loss: 1.4617936611175537
training loss: 1.9786078929901123
training loss: 1.6037718057632446


training:   1%|          | 1041/100000 [58:06<90:18:32,  3.29s/it]

training loss: 1.7928590774536133
training loss: 1.6774940490722656
training loss: 1.2989522218704224
training loss: 1.6019248962402344


training:   1%|          | 1045/100000 [58:19<88:56:13,  3.24s/it]

training loss: 1.581923484802246
training loss: 1.8164308071136475
training loss: 1.7245686054229736
training loss: 1.6374136209487915


training:   1%|          | 1049/100000 [58:31<87:55:43,  3.20s/it]

training loss: 1.5780223608016968
training loss: 1.7824504375457764
training loss: 1.5248064994812012
training loss: 1.6951782703399658


training:   1%|          | 1053/100000 [58:44<87:17:52,  3.18s/it]

training loss: 1.6091670989990234
training loss: 1.5527496337890625
training loss: 1.8212954998016357
training loss: 1.3997775316238403


training:   1%|          | 1057/100000 [58:56<86:49:20,  3.16s/it]

training loss: 1.5025495290756226
training loss: 1.6578261852264404
training loss: 1.3780595064163208
training loss: 1.6612659692764282


training:   1%|          | 1061/100000 [59:08<86:29:07,  3.15s/it]

training loss: 1.4922332763671875
training loss: 1.6410131454467773
training loss: 1.707985520362854
training loss: 1.6071497201919556


training:   1%|          | 1065/100000 [59:21<86:15:03,  3.14s/it]

training loss: 1.7011810541152954
training loss: 1.6377006769180298
training loss: 1.6572518348693848
training loss: 1.623094081878662


training:   1%|          | 1069/100000 [59:33<86:04:30,  3.13s/it]

training loss: 1.693859577178955
training loss: 1.642660140991211
training loss: 1.7273149490356445
training loss: 1.836601972579956


training:   1%|          | 1073/100000 [59:46<85:58:25,  3.13s/it]

training loss: 1.6144800186157227
training loss: 1.9372434616088867
training loss: 1.5914689302444458
training loss: 1.5691964626312256


training:   1%|          | 1077/100000 [59:58<85:54:15,  3.13s/it]

training loss: 1.8447576761245728
training loss: 1.6848011016845703
training loss: 1.5435248613357544
training loss: 1.6204237937927246


training:   1%|          | 1081/100000 [1:00:11<85:45:15,  3.12s/it]

training loss: 1.6088539361953735
training loss: 1.3983865976333618
training loss: 1.9459517002105713
training loss: 1.7342939376831055


training:   1%|          | 1085/100000 [1:00:23<85:43:12,  3.12s/it]

training loss: 1.7168633937835693
training loss: 1.6494255065917969
training loss: 1.7132688760757446
training loss: 1.6607415676116943


training:   1%|          | 1089/100000 [1:00:36<85:40:22,  3.12s/it]

training loss: 1.5925772190093994
training loss: 1.518423318862915
training loss: 1.7983229160308838
training loss: 1.5762685537338257


training:   1%|          | 1093/100000 [1:00:48<85:37:25,  3.12s/it]

training loss: 1.6420238018035889
training loss: 1.4854028224945068
training loss: 1.6065963506698608
training loss: 1.7425637245178223


training:   1%|          | 1097/100000 [1:01:01<85:34:58,  3.12s/it]

training loss: 1.3975259065628052
training loss: 1.4580169916152954
training loss: 1.6068100929260254
training loss: 1.888915777206421
training loss: 1.4939855337142944


training:   1%|          | 1101/100000 [1:01:13<86:14:16,  3.14s/it]

validation loss: 1.6770496368408203
training loss: 1.540149450302124
training loss: 1.3616392612457275
training loss: 1.2742114067077637


training:   1%|          | 1105/100000 [1:01:26<86:01:50,  3.13s/it]

training loss: 1.5863561630249023
training loss: 1.647265911102295
training loss: 1.6337867975234985
training loss: 1.6334253549575806


training:   1%|          | 1109/100000 [1:01:38<85:55:11,  3.13s/it]

training loss: 1.685534954071045
training loss: 1.7499099969863892
training loss: 1.7016148567199707
training loss: 1.5012362003326416


training:   1%|          | 1113/100000 [1:01:51<85:50:41,  3.13s/it]

training loss: 1.9876375198364258
training loss: 1.721174955368042
training loss: 1.7567379474639893
training loss: 1.6394799947738647


training:   1%|          | 1117/100000 [1:02:03<85:48:06,  3.12s/it]

training loss: 1.4547414779663086
training loss: 1.8719022274017334
training loss: 1.5734686851501465
training loss: 1.651708722114563


training:   1%|          | 1121/100000 [1:02:16<85:41:53,  3.12s/it]

training loss: 1.5279185771942139
training loss: 1.5602219104766846
training loss: 1.6858031749725342
training loss: 1.4909695386886597


training:   1%|          | 1125/100000 [1:02:28<85:40:02,  3.12s/it]

training loss: 1.4580514430999756
training loss: 1.7281475067138672
training loss: 1.5916112661361694
training loss: 1.7377231121063232


training:   1%|          | 1129/100000 [1:02:41<85:40:00,  3.12s/it]

training loss: 1.6819887161254883
training loss: 1.6399580240249634
training loss: 1.5362358093261719
training loss: 1.6172573566436768


training:   1%|          | 1133/100000 [1:02:53<85:39:10,  3.12s/it]

training loss: 1.590083360671997
training loss: 1.6769187450408936
training loss: 2.1680428981781006
training loss: 1.5460008382797241


training:   1%|          | 1137/100000 [1:03:06<85:39:24,  3.12s/it]

training loss: 1.6508946418762207
training loss: 1.7858779430389404
training loss: 1.6428154706954956
training loss: 1.6665959358215332


training:   1%|          | 1141/100000 [1:03:18<85:40:34,  3.12s/it]

training loss: 1.5700160264968872
training loss: 1.5762181282043457
training loss: 1.5954457521438599
training loss: 1.6231582164764404


training:   1%|          | 1145/100000 [1:03:31<85:40:20,  3.12s/it]

training loss: 1.6196339130401611
training loss: 1.286780595779419
training loss: 1.7963908910751343
training loss: 1.9313044548034668


training:   1%|          | 1149/100000 [1:03:43<85:39:28,  3.12s/it]

training loss: 1.8161883354187012
training loss: 1.5521180629730225
training loss: 1.7373965978622437
training loss: 1.6755363941192627


training:   1%|          | 1153/100000 [1:03:56<85:38:21,  3.12s/it]

training loss: 1.2246456146240234
training loss: 1.4051032066345215
training loss: 1.511080026626587
training loss: 1.3628531694412231


training:   1%|          | 1157/100000 [1:04:08<85:38:04,  3.12s/it]

training loss: 1.6317734718322754
training loss: 1.6769108772277832
training loss: 1.7544490098953247
training loss: 1.5691834688186646


training:   1%|          | 1161/100000 [1:04:21<85:39:31,  3.12s/it]

training loss: 1.778352975845337
training loss: 1.7690823078155518
training loss: 1.4053833484649658
training loss: 1.5373581647872925


training:   1%|          | 1165/100000 [1:04:33<85:41:52,  3.12s/it]

training loss: 1.5858126878738403
training loss: 1.4942535161972046
training loss: 1.6056067943572998
training loss: 1.5385966300964355


training:   1%|          | 1169/100000 [1:04:46<85:39:59,  3.12s/it]

training loss: 1.5794764757156372
training loss: 1.7146804332733154
training loss: 1.517125129699707
training loss: 1.3579670190811157


training:   1%|          | 1173/100000 [1:04:58<85:38:08,  3.12s/it]

training loss: 1.7519452571868896
training loss: 1.582812786102295
training loss: 1.5945053100585938
training loss: 1.766795039176941


training:   1%|          | 1177/100000 [1:05:10<85:30:36,  3.12s/it]

training loss: 1.4559434652328491
training loss: 1.678422451019287
training loss: 1.5342962741851807
training loss: 1.5087727308273315


training:   1%|          | 1181/100000 [1:05:23<85:27:31,  3.11s/it]

training loss: 1.525579810142517
training loss: 1.5915753841400146
training loss: 1.2963334321975708
training loss: 1.6645357608795166


training:   1%|          | 1185/100000 [1:05:35<85:31:51,  3.12s/it]

training loss: 1.727144718170166
training loss: 1.6750414371490479
training loss: 1.5331979990005493
training loss: 1.6611217260360718


training:   1%|          | 1189/100000 [1:05:48<85:32:03,  3.12s/it]

training loss: 1.6505656242370605
training loss: 1.5922398567199707
training loss: 1.5645684003829956
training loss: 1.6989436149597168


training:   1%|          | 1193/100000 [1:06:00<85:32:48,  3.12s/it]

training loss: 1.464108943939209
training loss: 1.6885957717895508
training loss: 1.5105230808258057
training loss: 1.6619558334350586


training:   1%|          | 1197/100000 [1:06:13<85:33:32,  3.12s/it]

training loss: 1.6001731157302856
training loss: 1.4407501220703125
training loss: 1.6016273498535156
training loss: 1.6848247051239014
training loss: 1.2688500881195068


training:   1%|          | 1201/100000 [1:06:26<86:10:36,  3.14s/it]

validation loss: 1.710288166999817
training loss: 1.5428342819213867
training loss: 1.8790353536605835
training loss: 1.3575618267059326


training:   1%|          | 1205/100000 [1:06:38<85:57:09,  3.13s/it]

training loss: 1.545721173286438
training loss: 1.310826301574707
training loss: 1.8907997608184814
training loss: 1.6766068935394287


training:   1%|          | 1209/100000 [1:06:50<85:48:06,  3.13s/it]

training loss: 1.6477175951004028
training loss: 1.649991512298584
training loss: 1.5122737884521484
training loss: 1.5390969514846802


training:   1%|          | 1213/100000 [1:07:03<85:39:07,  3.12s/it]

training loss: 1.6442794799804688
training loss: 1.5937443971633911
training loss: 1.5406017303466797
training loss: 1.3467118740081787


training:   1%|          | 1217/100000 [1:07:15<85:35:15,  3.12s/it]

training loss: 1.5161583423614502
training loss: 1.5597045421600342
training loss: 1.4661391973495483
training loss: 1.6748816967010498


training:   1%|          | 1221/100000 [1:07:28<85:35:06,  3.12s/it]

training loss: 1.7280211448669434
training loss: 1.7923744916915894
training loss: 1.577405571937561
training loss: 1.6671967506408691


training:   1%|          | 1225/100000 [1:07:40<85:33:13,  3.12s/it]

training loss: 1.58219575881958
training loss: 1.4431418180465698
training loss: 1.56871497631073
training loss: 1.5387952327728271


training:   1%|          | 1229/100000 [1:07:53<85:31:26,  3.12s/it]

training loss: 1.4799487590789795
training loss: 1.6652419567108154
training loss: 1.629512071609497
training loss: 1.5056483745574951


training:   1%|          | 1233/100000 [1:08:05<85:31:51,  3.12s/it]

training loss: 1.692002773284912
training loss: 1.4928240776062012
training loss: 1.5265876054763794
training loss: 1.6267142295837402


training:   1%|          | 1237/100000 [1:08:18<85:32:25,  3.12s/it]

training loss: 1.521730899810791
training loss: 1.463667869567871
training loss: 1.589022159576416
training loss: 1.5829322338104248


training:   1%|          | 1241/100000 [1:08:30<85:29:49,  3.12s/it]

training loss: 1.7808477878570557
training loss: 1.5991835594177246
training loss: 1.6841762065887451
training loss: 1.6297972202301025


training:   1%|          | 1245/100000 [1:08:43<85:30:40,  3.12s/it]

training loss: 1.5577138662338257
training loss: 1.5468792915344238
training loss: 1.5385174751281738
training loss: 1.6042441129684448


training:   1%|          | 1249/100000 [1:08:55<85:32:53,  3.12s/it]

training loss: 1.601488709449768
training loss: 1.6202197074890137
training loss: 1.575540542602539
training loss: 1.676438331604004


training:   1%|▏         | 1253/100000 [1:09:08<85:32:51,  3.12s/it]

training loss: 1.6462299823760986
training loss: 1.5093308687210083
training loss: 1.609437108039856
training loss: 1.6335256099700928


training:   1%|▏         | 1257/100000 [1:09:20<85:32:33,  3.12s/it]

training loss: 1.5651168823242188
training loss: 1.5423983335494995
training loss: 1.585589051246643
training loss: 1.7713297605514526


training:   1%|▏         | 1261/100000 [1:09:33<85:32:29,  3.12s/it]

training loss: 1.5000042915344238
training loss: 1.4224941730499268
training loss: 1.6967511177062988
training loss: 1.5310591459274292


training:   1%|▏         | 1265/100000 [1:09:45<85:32:17,  3.12s/it]

training loss: 1.5565903186798096
training loss: 1.708256721496582
training loss: 1.4176807403564453
training loss: 1.8372061252593994


training:   1%|▏         | 1269/100000 [1:09:57<85:33:38,  3.12s/it]

training loss: 1.609372615814209
training loss: 1.6352245807647705
training loss: 1.5530056953430176
training loss: 1.5163874626159668


training:   1%|▏         | 1273/100000 [1:10:10<85:28:51,  3.12s/it]

training loss: 1.5364413261413574
training loss: 1.6571168899536133
training loss: 1.4159739017486572
training loss: 1.7472469806671143


training:   1%|▏         | 1277/100000 [1:10:22<85:28:53,  3.12s/it]

training loss: 1.766402244567871
training loss: 1.737330436706543
training loss: 1.6247286796569824
training loss: 1.5440775156021118


training:   1%|▏         | 1281/100000 [1:10:35<85:27:39,  3.12s/it]

training loss: 1.4879792928695679
training loss: 1.5650722980499268
training loss: 1.4731347560882568
training loss: 1.5577237606048584


training:   1%|▏         | 1285/100000 [1:10:47<85:28:36,  3.12s/it]

training loss: 1.699040412902832
training loss: 1.4840185642242432
training loss: 1.503027081489563
training loss: 1.607192039489746


training:   1%|▏         | 1289/100000 [1:11:00<85:29:29,  3.12s/it]

training loss: 1.6072726249694824
training loss: 1.5783181190490723
training loss: 1.3549959659576416
training loss: 1.3979606628417969


training:   1%|▏         | 1293/100000 [1:11:12<85:32:10,  3.12s/it]

training loss: 1.5428862571716309
training loss: 1.5997613668441772
training loss: 1.6259968280792236
training loss: 1.6000396013259888


training:   1%|▏         | 1297/100000 [1:11:25<85:30:54,  3.12s/it]

training loss: 1.6961312294006348
training loss: 1.5236585140228271
training loss: 1.6256252527236938
training loss: 1.4060897827148438
training loss: 1.5143177509307861


training:   1%|▏         | 1301/100000 [1:11:38<86:04:49,  3.14s/it]

validation loss: 1.6652498245239258
training loss: 1.3362667560577393
training loss: 1.4857364892959595
training loss: 1.6638147830963135


training:   1%|▏         | 1305/100000 [1:11:50<85:52:04,  3.13s/it]

training loss: 1.641440510749817
training loss: 1.7260425090789795
training loss: 1.6080012321472168
training loss: 1.7114081382751465


training:   1%|▏         | 1309/100000 [1:12:02<85:47:18,  3.13s/it]

training loss: 1.6778124570846558
training loss: 1.677910327911377
training loss: 1.5025376081466675
training loss: 1.5172771215438843


training:   1%|▏         | 1313/100000 [1:12:15<85:36:13,  3.12s/it]

training loss: 1.6749327182769775
training loss: 1.5368837118148804
training loss: 1.4281288385391235
training loss: 1.7971972227096558


training:   1%|▏         | 1317/100000 [1:12:27<85:34:03,  3.12s/it]

training loss: 1.528714656829834
training loss: 1.5305662155151367
training loss: 1.4768376350402832
training loss: 1.6524934768676758


training:   1%|▏         | 1321/100000 [1:12:40<85:32:25,  3.12s/it]

training loss: 1.4511964321136475
training loss: 1.612468957901001
training loss: 1.5756458044052124
training loss: 1.5886441469192505


training:   1%|▏         | 1325/100000 [1:12:52<85:31:07,  3.12s/it]

training loss: 1.5971095561981201
training loss: 1.3977954387664795
training loss: 1.5654776096343994
training loss: 1.4195603132247925


training:   1%|▏         | 1329/100000 [1:13:05<85:29:56,  3.12s/it]

training loss: 1.4436695575714111
training loss: 1.5215332508087158
training loss: 1.7129682302474976
training loss: 1.8007980585098267


training:   1%|▏         | 1333/100000 [1:13:17<85:30:22,  3.12s/it]

training loss: 1.7017033100128174
training loss: 1.6657075881958008
training loss: 1.7234861850738525
training loss: 1.5612728595733643


training:   1%|▏         | 1337/100000 [1:13:30<85:22:48,  3.12s/it]

training loss: 1.6150318384170532
training loss: 1.2485774755477905
training loss: 1.5544193983078003
training loss: 1.670057773590088


training:   1%|▏         | 1341/100000 [1:13:42<85:21:35,  3.11s/it]

training loss: 1.2157301902770996
training loss: 1.7558740377426147
training loss: 1.579991340637207
training loss: 1.5389139652252197


training:   1%|▏         | 1345/100000 [1:13:55<85:22:57,  3.12s/it]

training loss: 1.4288197755813599
training loss: 1.6037582159042358
training loss: 1.4889423847198486
training loss: 1.59226393699646


training:   1%|▏         | 1349/100000 [1:14:07<85:29:30,  3.12s/it]

training loss: 1.666538953781128
training loss: 1.6136550903320312
training loss: 1.4461793899536133
training loss: 1.4360506534576416


training:   1%|▏         | 1353/100000 [1:14:20<85:29:15,  3.12s/it]

training loss: 1.5901761054992676
training loss: 1.618109941482544
training loss: 1.3887548446655273
training loss: 1.391772747039795


training:   1%|▏         | 1357/100000 [1:14:32<85:27:26,  3.12s/it]

training loss: 1.6640424728393555
training loss: 1.5865588188171387
training loss: 1.5408821105957031
training loss: 1.526895523071289


training:   1%|▏         | 1361/100000 [1:14:45<85:27:29,  3.12s/it]

training loss: 1.5116326808929443
training loss: 1.6213949918746948
training loss: 1.5282527208328247
training loss: 1.5281469821929932


training:   1%|▏         | 1365/100000 [1:14:57<85:27:29,  3.12s/it]

training loss: 1.632859230041504
training loss: 1.5832799673080444
training loss: 1.5323855876922607
training loss: 1.5123674869537354


training:   1%|▏         | 1369/100000 [1:15:10<85:26:38,  3.12s/it]

training loss: 1.5108754634857178
training loss: 1.4024772644042969
training loss: 1.6690622568130493
training loss: 1.6364233493804932


training:   1%|▏         | 1373/100000 [1:15:22<85:25:52,  3.12s/it]

training loss: 1.5385088920593262
training loss: 1.8341269493103027
training loss: 1.5575082302093506
training loss: 1.6426934003829956


training:   1%|▏         | 1377/100000 [1:15:34<85:26:59,  3.12s/it]

training loss: 1.4671854972839355
training loss: 1.4159655570983887
training loss: 1.3993141651153564
training loss: 1.2957627773284912


training:   1%|▏         | 1381/100000 [1:15:47<85:27:38,  3.12s/it]

training loss: 1.5753662586212158
training loss: 1.4521076679229736
training loss: 1.5612343549728394
training loss: 1.6273643970489502


training:   1%|▏         | 1385/100000 [1:15:59<85:26:43,  3.12s/it]

training loss: 1.4572139978408813
training loss: 1.2633965015411377
training loss: 1.7477552890777588
training loss: 1.666131854057312


training:   1%|▏         | 1389/100000 [1:16:12<85:28:35,  3.12s/it]

training loss: 1.6714260578155518
training loss: 1.5151190757751465
training loss: 1.3813658952713013
training loss: 1.5408174991607666


training:   1%|▏         | 1393/100000 [1:16:24<85:31:21,  3.12s/it]

training loss: 1.5166815519332886
training loss: 1.7443203926086426
training loss: 1.7434653043746948
training loss: 1.6604926586151123


training:   1%|▏         | 1397/100000 [1:16:37<85:29:00,  3.12s/it]

training loss: 1.486396312713623
training loss: 1.4386310577392578
training loss: 1.6606030464172363
training loss: 1.6138770580291748
training loss: 1.4851164817810059


training:   1%|▏         | 1401/100000 [1:16:50<86:04:03,  3.14s/it]

validation loss: 1.2899832725524902
training loss: 1.6831047534942627
training loss: 1.431605339050293
training loss: 1.5688413381576538


training:   1%|▏         | 1405/100000 [1:17:02<85:49:27,  3.13s/it]

training loss: 1.5860729217529297
training loss: 1.3637688159942627
training loss: 1.5811301469802856
training loss: 1.697243332862854


training:   1%|▏         | 1409/100000 [1:17:15<85:41:41,  3.13s/it]

training loss: 1.5586251020431519
training loss: 1.57083261013031
training loss: 1.53909432888031
training loss: 1.7451655864715576


training:   1%|▏         | 1413/100000 [1:17:27<85:36:05,  3.13s/it]

training loss: 1.5054314136505127
training loss: 1.731534719467163
training loss: 1.9469428062438965
training loss: 1.6380586624145508


training:   1%|▏         | 1417/100000 [1:17:40<85:33:12,  3.12s/it]

training loss: 1.4072374105453491
training loss: 1.614548683166504
training loss: 1.7146574258804321
training loss: 1.482683539390564


training:   1%|▏         | 1421/100000 [1:17:52<85:31:13,  3.12s/it]

training loss: 1.6756668090820312
training loss: 1.3019952774047852
training loss: 1.5065133571624756
training loss: 1.5302544832229614


training:   1%|▏         | 1425/100000 [1:18:05<85:29:39,  3.12s/it]

training loss: 1.599518060684204
training loss: 1.472835659980774
training loss: 1.690720796585083
training loss: 1.7432907819747925


training:   1%|▏         | 1429/100000 [1:18:17<85:27:00,  3.12s/it]

training loss: 1.6365020275115967
training loss: 1.6654623746871948
training loss: 1.367534875869751
training loss: 1.3880581855773926


training:   1%|▏         | 1433/100000 [1:18:29<85:25:09,  3.12s/it]

training loss: 1.6185832023620605
training loss: 1.5563440322875977
training loss: 1.4067561626434326
training loss: 1.676433801651001


training:   1%|▏         | 1437/100000 [1:18:42<85:24:52,  3.12s/it]

training loss: 1.5466587543487549
training loss: 1.7037779092788696
training loss: 1.3782505989074707
training loss: 1.1553081274032593


training:   1%|▏         | 1441/100000 [1:18:54<85:24:30,  3.12s/it]

training loss: 1.6342052221298218
training loss: 1.6565241813659668
training loss: 1.545240879058838
training loss: 1.4970580339431763


training:   1%|▏         | 1445/100000 [1:19:07<85:22:32,  3.12s/it]

training loss: 1.5444443225860596
training loss: 1.1467500925064087
training loss: 1.6417323350906372
training loss: 1.53997802734375


training:   1%|▏         | 1449/100000 [1:19:19<85:23:09,  3.12s/it]

training loss: 1.431147575378418
training loss: 1.558815836906433
training loss: 1.5705902576446533
training loss: 1.7068769931793213


training:   1%|▏         | 1453/100000 [1:19:32<85:23:05,  3.12s/it]

training loss: 1.5767438411712646
training loss: 1.5158071517944336
training loss: 1.473935842514038
training loss: 1.6199359893798828


training:   1%|▏         | 1457/100000 [1:19:44<85:22:54,  3.12s/it]

training loss: 1.390748381614685
training loss: 1.3394746780395508
training loss: 1.5360406637191772
training loss: 1.264096736907959


training:   1%|▏         | 1461/100000 [1:19:57<85:22:37,  3.12s/it]

training loss: 1.5400148630142212
training loss: 1.3680741786956787
training loss: 1.4521126747131348
training loss: 1.5212669372558594


training:   1%|▏         | 1465/100000 [1:20:09<85:25:38,  3.12s/it]

training loss: 1.485894799232483
training loss: 1.486112356185913
training loss: 1.5066167116165161
training loss: 1.7771565914154053


training:   1%|▏         | 1469/100000 [1:20:22<85:23:07,  3.12s/it]

training loss: 1.6127033233642578
training loss: 1.334214448928833
training loss: 1.628340244293213
training loss: 1.5170952081680298


training:   1%|▏         | 1473/100000 [1:20:34<85:21:58,  3.12s/it]

training loss: 1.5308971405029297
training loss: 1.7030129432678223
training loss: 1.6130962371826172
training loss: 1.6578974723815918


training:   1%|▏         | 1477/100000 [1:20:47<85:21:54,  3.12s/it]

training loss: 1.476672649383545
training loss: 1.6912074089050293
training loss: 1.5111136436462402
training loss: 1.428954005241394


training:   1%|▏         | 1481/100000 [1:20:59<85:21:47,  3.12s/it]

training loss: 1.5633169412612915
training loss: 1.2834936380386353
training loss: 1.4962292909622192
training loss: 1.6590478420257568


training:   1%|▏         | 1485/100000 [1:21:12<85:22:09,  3.12s/it]

training loss: 1.5286701917648315
training loss: 1.4633526802062988
training loss: 1.6678540706634521
training loss: 1.6323338747024536


training:   1%|▏         | 1489/100000 [1:21:24<85:21:57,  3.12s/it]

training loss: 1.6026151180267334
training loss: 1.4969687461853027
training loss: 1.4455983638763428
training loss: 1.4360008239746094


training:   1%|▏         | 1493/100000 [1:21:37<85:21:13,  3.12s/it]

training loss: 1.680925726890564
training loss: 1.4632874727249146
training loss: 1.4911478757858276
training loss: 1.5009148120880127


training:   1%|▏         | 1497/100000 [1:21:49<85:20:51,  3.12s/it]

training loss: 1.392613172531128
training loss: 1.3544590473175049
training loss: 1.468949556350708
training loss: 1.5988308191299438


training:   1%|▏         | 1497/100000 [1:22:00<85:20:51,  3.12s/it]

training loss: 1.6064873933792114
validation loss: 1.5696611404418945
%s 

 %s ('ision>       <id>36248536</id>       <timestamp>2006-01-22T19:11:52Z</timestamp>       <contributor>         <username>NeoThe1</username>         <id>34710</id>       </contributor>       <minor />       <comment>Re-fixed link.</comment>       <text xml:space="preserve">In [[film]], an \'\'\'insert\'\'\' is a shot of part of a [[scene (film)|scene]] as filmed from a different angle and/or focal length from the [[master shot]].  Inserts cover action already covered in the master shot, but emphasize a different aspect of that action due to the different framing.  An insert is different from a [[cutaway]] in that the cutaway is of action \'\'not\'\' covered in the master shot.  There are more exact terms to use when the new, inserted shot is another view of actors: [[close-up]], [[head shot]], [[knee shot]], [[Two Shot|two shot]]. So the term &quot;insert&quot; is often confined to views of objects--and body 

training:   2%|▏         | 1501/100000 [1:23:21<247:28:46,  9.05s/it]

Model saved at iteration 1500
training loss: 1.5896246433258057
training loss: 1.680930256843567
training loss: 1.6126363277435303


training:   2%|▏         | 1505/100000 [1:23:33<198:41:52,  7.26s/it]

training loss: 1.678037166595459
training loss: 1.6195992231369019
training loss: 1.5092837810516357
training loss: 1.6118793487548828


training:   2%|▏         | 1509/100000 [1:23:45<164:40:45,  6.02s/it]

training loss: 1.5425515174865723
training loss: 1.5288761854171753
training loss: 1.6037766933441162
training loss: 1.4720534086227417


training:   2%|▏         | 1513/100000 [1:23:58<140:52:20,  5.15s/it]

training loss: 1.5940773487091064
training loss: 1.342903971672058
training loss: 1.4427294731140137
training loss: 1.4219290018081665


training:   2%|▏         | 1517/100000 [1:24:10<124:12:10,  4.54s/it]

training loss: 1.654139518737793
training loss: 1.5767441987991333
training loss: 1.5447421073913574
training loss: 1.4694339036941528


training:   2%|▏         | 1521/100000 [1:24:23<112:35:01,  4.12s/it]

training loss: 1.5290595293045044
training loss: 1.6923235654830933
training loss: 1.5214773416519165
training loss: 1.9173601865768433


training:   2%|▏         | 1525/100000 [1:24:35<104:24:16,  3.82s/it]

training loss: 1.566912055015564
training loss: 1.1498326063156128
training loss: 1.916704773902893
training loss: 1.5667994022369385


training:   2%|▏         | 1529/100000 [1:24:48<98:41:11,  3.61s/it] 

training loss: 1.4555742740631104
training loss: 1.6264034509658813
training loss: 1.6126070022583008
training loss: 1.6769623756408691


training:   2%|▏         | 1533/100000 [1:25:00<94:39:28,  3.46s/it]

training loss: 1.497201919555664
training loss: 1.3031694889068604
training loss: 1.615969181060791
training loss: 1.4210455417633057


training:   2%|▏         | 1537/100000 [1:25:13<91:50:35,  3.36s/it]

training loss: 1.54416024684906
training loss: 1.5651628971099854
training loss: 1.4077366590499878
training loss: 1.6883504390716553


training:   2%|▏         | 1541/100000 [1:25:25<89:52:37,  3.29s/it]

training loss: 1.4461308717727661
training loss: 1.4813779592514038
training loss: 1.4532254934310913
training loss: 1.4589934349060059


training:   2%|▏         | 1545/100000 [1:25:38<88:30:10,  3.24s/it]

training loss: 1.5429182052612305
training loss: 1.6402559280395508
training loss: 1.4669690132141113
training loss: 1.4147875308990479


training:   2%|▏         | 1549/100000 [1:25:50<87:33:29,  3.20s/it]

training loss: 1.5529756546020508
training loss: 1.4773287773132324
training loss: 1.7538971900939941
training loss: 1.4493274688720703


training:   2%|▏         | 1553/100000 [1:26:03<86:52:11,  3.18s/it]

training loss: 1.5086687803268433
training loss: 1.7458341121673584
training loss: 1.6046419143676758
training loss: 1.6377074718475342


training:   2%|▏         | 1557/100000 [1:26:15<86:23:51,  3.16s/it]

training loss: 1.4636255502700806
training loss: 1.5232181549072266
training loss: 1.4958900213241577
training loss: 1.6868001222610474


training:   2%|▏         | 1561/100000 [1:26:28<86:03:14,  3.15s/it]

training loss: 1.612156867980957
training loss: 1.4653968811035156
training loss: 1.3824750185012817
training loss: 1.4591164588928223


training:   2%|▏         | 1565/100000 [1:26:40<85:49:32,  3.14s/it]

training loss: 1.4991393089294434
training loss: 1.4883825778961182
training loss: 1.5843875408172607
training loss: 1.4747440814971924


training:   2%|▏         | 1569/100000 [1:26:53<85:38:58,  3.13s/it]

training loss: 1.425451397895813
training loss: 1.4294337034225464
training loss: 1.7144477367401123
training loss: 1.4874852895736694


training:   2%|▏         | 1573/100000 [1:27:05<85:31:44,  3.13s/it]

training loss: 1.545684814453125
training loss: 1.4088221788406372
training loss: 1.4910755157470703
training loss: 1.7517932653427124


training:   2%|▏         | 1577/100000 [1:27:18<85:27:13,  3.13s/it]

training loss: 1.4374834299087524
training loss: 1.5380065441131592
training loss: 1.5334844589233398
training loss: 1.4603173732757568


training:   2%|▏         | 1581/100000 [1:27:30<85:23:45,  3.12s/it]

training loss: 1.5058621168136597
training loss: 1.4723665714263916
training loss: 1.2042573690414429
training loss: 1.473212718963623


training:   2%|▏         | 1585/100000 [1:27:43<85:21:43,  3.12s/it]

training loss: 1.3698420524597168
training loss: 1.5930163860321045
training loss: 1.3072298765182495
training loss: 1.6736141443252563


training:   2%|▏         | 1589/100000 [1:27:55<85:20:41,  3.12s/it]

training loss: 1.4515631198883057
training loss: 1.574849009513855
training loss: 1.4308924674987793
training loss: 1.4851630926132202


training:   2%|▏         | 1593/100000 [1:28:07<85:19:16,  3.12s/it]

training loss: 1.5861847400665283
training loss: 1.5570648908615112
training loss: 1.4696729183197021
training loss: 1.4715728759765625


training:   2%|▏         | 1597/100000 [1:28:20<85:17:55,  3.12s/it]

training loss: 1.6672002077102661
training loss: 1.7095986604690552
training loss: 1.5680243968963623
training loss: 1.4304757118225098
training loss: 1.347226858139038


training:   2%|▏         | 1601/100000 [1:28:33<85:54:08,  3.14s/it]

validation loss: 1.6461591720581055
training loss: 1.649239420890808
training loss: 1.4897079467773438
training loss: 1.7327849864959717


training:   2%|▏         | 1605/100000 [1:28:45<85:39:21,  3.13s/it]

training loss: 1.9158098697662354
training loss: 1.5381395816802979
training loss: 1.5430221557617188
training loss: 1.5533349514007568


training:   2%|▏         | 1609/100000 [1:28:58<85:31:46,  3.13s/it]

training loss: 1.5755947828292847
training loss: 1.4991832971572876
training loss: 1.4927576780319214
training loss: 1.445105791091919


training:   2%|▏         | 1613/100000 [1:29:10<85:27:55,  3.13s/it]

training loss: 1.4609224796295166
training loss: 1.5147509574890137
training loss: 1.4359012842178345
training loss: 1.5721561908721924


training:   2%|▏         | 1617/100000 [1:29:23<85:23:57,  3.12s/it]

training loss: 1.493094563484192
training loss: 1.3972088098526
training loss: 1.6180826425552368
training loss: 1.4915485382080078


training:   2%|▏         | 1621/100000 [1:29:35<85:20:12,  3.12s/it]

training loss: 1.574436902999878
training loss: 1.5544633865356445
training loss: 1.488215684890747
training loss: 1.7067906856536865


training:   2%|▏         | 1625/100000 [1:29:48<85:17:59,  3.12s/it]

training loss: 1.6510776281356812
training loss: 1.3773573637008667
training loss: 1.6153367757797241
training loss: 1.439450979232788


training:   2%|▏         | 1629/100000 [1:30:00<85:16:39,  3.12s/it]

training loss: 1.6451044082641602
training loss: 1.525884747505188
training loss: 1.6407034397125244
training loss: 1.7005367279052734


training:   2%|▏         | 1633/100000 [1:30:13<85:15:43,  3.12s/it]

training loss: 1.4515039920806885
training loss: 1.5640182495117188
training loss: 1.449308156967163
training loss: 1.72247314453125


training:   2%|▏         | 1637/100000 [1:30:25<85:16:00,  3.12s/it]

training loss: 1.4082121849060059
training loss: 1.433854341506958
training loss: 1.463226318359375
training loss: 1.5316977500915527


training:   2%|▏         | 1641/100000 [1:30:37<85:14:42,  3.12s/it]

training loss: 1.6296732425689697
training loss: 1.673414945602417
training loss: 1.4210994243621826
training loss: 1.5240625143051147


training:   2%|▏         | 1645/100000 [1:30:50<85:13:44,  3.12s/it]

training loss: 1.7076319456100464
training loss: 1.4031658172607422
training loss: 1.2613492012023926
training loss: 1.3579139709472656


training:   2%|▏         | 1649/100000 [1:31:02<85:13:25,  3.12s/it]

training loss: 1.5044286251068115
training loss: 1.3724370002746582
training loss: 1.644761323928833
training loss: 1.6552878618240356


training:   2%|▏         | 1653/100000 [1:31:15<85:12:41,  3.12s/it]

training loss: 1.3998843431472778
training loss: 1.389568567276001
training loss: 1.8543415069580078
training loss: 1.5613712072372437


training:   2%|▏         | 1657/100000 [1:31:27<85:14:18,  3.12s/it]

training loss: 1.6470730304718018
training loss: 1.5657026767730713
training loss: 1.6441829204559326
training loss: 1.5556700229644775


training:   2%|▏         | 1661/100000 [1:31:40<85:13:01,  3.12s/it]

training loss: 1.403990626335144
training loss: 1.45779550075531
training loss: 1.4040501117706299
training loss: 1.5663267374038696


training:   2%|▏         | 1665/100000 [1:31:52<85:11:45,  3.12s/it]

training loss: 1.5120272636413574
training loss: 1.6433286666870117
training loss: 1.5067939758300781
training loss: 1.4557523727416992


training:   2%|▏         | 1669/100000 [1:32:05<85:11:27,  3.12s/it]

training loss: 1.5005924701690674
training loss: 1.562175989151001
training loss: 1.3860459327697754
training loss: 1.7502065896987915


training:   2%|▏         | 1673/100000 [1:32:17<85:11:04,  3.12s/it]

training loss: 1.5977163314819336
training loss: 1.7119413614273071
training loss: 1.4010241031646729
training loss: 1.5901095867156982


training:   2%|▏         | 1677/100000 [1:32:30<85:09:11,  3.12s/it]

training loss: 1.506042242050171
training loss: 1.5189862251281738
training loss: 1.5441884994506836
training loss: 1.503547191619873


training:   2%|▏         | 1681/100000 [1:32:42<85:09:53,  3.12s/it]

training loss: 1.5012001991271973
training loss: 1.5204359292984009
training loss: 1.3143569231033325
training loss: 1.441264271736145


training:   2%|▏         | 1685/100000 [1:32:55<85:10:00,  3.12s/it]

training loss: 1.5924193859100342
training loss: 1.5658864974975586
training loss: 1.5967438220977783
training loss: 1.6206637620925903


training:   2%|▏         | 1689/100000 [1:33:07<85:09:19,  3.12s/it]

training loss: 1.6820200681686401
training loss: 1.5479263067245483
training loss: 1.4148569107055664
training loss: 1.5513641834259033


training:   2%|▏         | 1693/100000 [1:33:20<85:08:50,  3.12s/it]

training loss: 1.4022207260131836
training loss: 1.4490753412246704
training loss: 1.3507766723632812
training loss: 1.4111087322235107


training:   2%|▏         | 1697/100000 [1:33:32<85:10:05,  3.12s/it]

training loss: 1.7020189762115479
training loss: 1.4905709028244019
training loss: 1.4814693927764893
training loss: 1.574631929397583
training loss: 1.475459337234497


training:   2%|▏         | 1701/100000 [1:33:45<85:49:20,  3.14s/it]

validation loss: 1.7332154512405396
training loss: 1.3078479766845703
training loss: 1.5985040664672852
training loss: 1.5791021585464478


training:   2%|▏         | 1705/100000 [1:33:57<85:33:59,  3.13s/it]

training loss: 1.5780620574951172
training loss: 1.3985724449157715
training loss: 1.5512816905975342
training loss: 1.5635876655578613


training:   2%|▏         | 1709/100000 [1:34:10<85:25:42,  3.13s/it]

training loss: 1.531350016593933
training loss: 1.5213228464126587
training loss: 1.6281201839447021
training loss: 1.4289885759353638


training:   2%|▏         | 1713/100000 [1:34:22<85:21:07,  3.13s/it]

training loss: 1.4842326641082764
training loss: 1.466615080833435
training loss: 1.4948008060455322
training loss: 1.5204960107803345


training:   2%|▏         | 1717/100000 [1:34:35<85:14:49,  3.12s/it]

training loss: 1.62710702419281
training loss: 1.5555332899093628
training loss: 1.6577470302581787
training loss: 1.530625581741333


training:   2%|▏         | 1721/100000 [1:34:47<85:11:51,  3.12s/it]

training loss: 1.4419302940368652
training loss: 1.5853984355926514
training loss: 1.577280044555664
training loss: 1.477273941040039


training:   2%|▏         | 1725/100000 [1:35:00<85:10:45,  3.12s/it]

training loss: 1.3987174034118652
training loss: 1.457052230834961
training loss: 1.8765039443969727
training loss: 1.4496350288391113


training:   2%|▏         | 1729/100000 [1:35:12<85:09:53,  3.12s/it]

training loss: 1.6137888431549072
training loss: 1.6871867179870605
training loss: 1.5607454776763916
training loss: 1.5116552114486694


training:   2%|▏         | 1733/100000 [1:35:25<85:09:00,  3.12s/it]

training loss: 1.487776279449463
training loss: 1.632505178451538
training loss: 1.726349115371704
training loss: 1.6931686401367188


training:   2%|▏         | 1737/100000 [1:35:37<85:08:22,  3.12s/it]

training loss: 1.4756780862808228
training loss: 1.5534424781799316
training loss: 1.592163324356079
training loss: 1.5219275951385498


training:   2%|▏         | 1741/100000 [1:35:50<85:06:27,  3.12s/it]

training loss: 1.5312883853912354
training loss: 1.517810583114624
training loss: 1.4064438343048096
training loss: 1.5291450023651123


training:   2%|▏         | 1745/100000 [1:36:02<85:02:55,  3.12s/it]

training loss: 1.2913649082183838
training loss: 1.7081297636032104
training loss: 1.3895015716552734
training loss: 1.5422916412353516


training:   2%|▏         | 1749/100000 [1:36:15<85:03:15,  3.12s/it]

training loss: 1.698960781097412
training loss: 1.471813678741455
training loss: 1.3839765787124634
training loss: 1.522746205329895


training:   2%|▏         | 1753/100000 [1:36:27<85:04:25,  3.12s/it]

training loss: 1.5107990503311157
training loss: 1.557605266571045
training loss: 1.6350451707839966
training loss: 1.229798674583435


training:   2%|▏         | 1757/100000 [1:36:39<85:04:52,  3.12s/it]

training loss: 1.1972956657409668
training loss: 1.4914395809173584
training loss: 1.715275764465332
training loss: 1.5786864757537842


training:   2%|▏         | 1761/100000 [1:36:52<85:05:20,  3.12s/it]

training loss: 1.4035629034042358
training loss: 1.276194453239441
training loss: 1.5007493495941162
training loss: 1.618685245513916


training:   2%|▏         | 1765/100000 [1:37:04<85:07:25,  3.12s/it]

training loss: 1.372476577758789
training loss: 1.567460536956787
training loss: 1.4102907180786133
training loss: 1.3362183570861816


training:   2%|▏         | 1769/100000 [1:37:17<85:06:23,  3.12s/it]

training loss: 1.4216210842132568
training loss: 1.4506785869598389
training loss: 1.539624571800232
training loss: 1.4723706245422363


training:   2%|▏         | 1773/100000 [1:37:29<85:06:07,  3.12s/it]

training loss: 1.5573500394821167
training loss: 1.462774634361267
training loss: 1.56342351436615
training loss: 1.5897679328918457


training:   2%|▏         | 1777/100000 [1:37:42<85:05:15,  3.12s/it]

training loss: 1.5395509004592896
training loss: 1.3792415857315063
training loss: 1.535407304763794
training loss: 1.1996952295303345


training:   2%|▏         | 1781/100000 [1:37:54<85:04:57,  3.12s/it]

training loss: 1.4890459775924683
training loss: 1.5630607604980469
training loss: 1.6196868419647217
training loss: 1.4672973155975342


training:   2%|▏         | 1785/100000 [1:38:07<85:08:39,  3.12s/it]

training loss: 1.5445992946624756
training loss: 1.4576069116592407
training loss: 1.4599461555480957
training loss: 1.5222419500350952


training:   2%|▏         | 1789/100000 [1:38:19<85:08:02,  3.12s/it]

training loss: 1.561813473701477
training loss: 1.474956750869751
training loss: 1.3100306987762451
training loss: 1.6393194198608398


training:   2%|▏         | 1793/100000 [1:38:32<85:06:23,  3.12s/it]

training loss: 1.3444844484329224
training loss: 1.326578140258789
training loss: 1.7120294570922852
training loss: 1.4314169883728027


training:   2%|▏         | 1797/100000 [1:38:44<85:05:30,  3.12s/it]

training loss: 1.5386712551116943
training loss: 1.5500047206878662
training loss: 1.4716047048568726
training loss: 1.5425288677215576
training loss: 1.4412322044372559


training:   2%|▏         | 1801/100000 [1:38:57<85:42:33,  3.14s/it]

validation loss: 1.5915918350219727
training loss: 1.6059389114379883
training loss: 1.4739933013916016
training loss: 1.2290639877319336


training:   2%|▏         | 1805/100000 [1:39:10<85:28:48,  3.13s/it]

training loss: 1.5801599025726318
training loss: 1.5558204650878906
training loss: 1.448307991027832
training loss: 1.5196492671966553


training:   2%|▏         | 1809/100000 [1:39:22<85:23:16,  3.13s/it]

training loss: 1.8888779878616333
training loss: 1.550752878189087
training loss: 1.3340271711349487
training loss: 1.4707553386688232


training:   2%|▏         | 1813/100000 [1:39:34<85:16:38,  3.13s/it]

training loss: 1.5456230640411377
training loss: 1.447575569152832
training loss: 1.6233534812927246
training loss: 1.4567292928695679


training:   2%|▏         | 1817/100000 [1:39:47<85:12:51,  3.12s/it]

training loss: 1.434729814529419
training loss: 1.7539550065994263
training loss: 1.4684646129608154
training loss: 1.6218117475509644


training:   2%|▏         | 1821/100000 [1:39:59<85:09:16,  3.12s/it]

training loss: 1.784015417098999
training loss: 1.4377180337905884
training loss: 1.5449235439300537
training loss: 1.4251693487167358


training:   2%|▏         | 1825/100000 [1:40:12<85:09:31,  3.12s/it]

training loss: 1.2348637580871582
training loss: 1.5194936990737915
training loss: 1.1642122268676758
training loss: 1.4958223104476929


training:   2%|▏         | 1829/100000 [1:40:24<85:06:35,  3.12s/it]

training loss: 1.5221264362335205
training loss: 1.4546388387680054
training loss: 1.5094051361083984
training loss: 1.459416151046753


training:   2%|▏         | 1833/100000 [1:40:37<85:05:11,  3.12s/it]

training loss: 1.4423080682754517
training loss: 1.6252179145812988
training loss: 1.4912704229354858
training loss: 1.5767936706542969


training:   2%|▏         | 1837/100000 [1:40:49<85:03:46,  3.12s/it]

training loss: 1.5448081493377686
training loss: 1.5391743183135986
training loss: 1.6788034439086914
training loss: 1.4143400192260742


training:   2%|▏         | 1841/100000 [1:41:02<85:03:07,  3.12s/it]

training loss: 1.7630677223205566
training loss: 1.5137873888015747
training loss: 1.6038943529129028
training loss: 1.4686145782470703


training:   2%|▏         | 1845/100000 [1:41:14<85:02:23,  3.12s/it]

training loss: 1.5545659065246582
training loss: 1.5046873092651367
training loss: 1.5364619493484497
training loss: 1.4887523651123047


training:   2%|▏         | 1849/100000 [1:41:27<85:02:54,  3.12s/it]

training loss: 1.5358589887619019
training loss: 1.6730133295059204
training loss: 1.51960289478302
training loss: 1.3995263576507568


training:   2%|▏         | 1853/100000 [1:41:39<85:01:06,  3.12s/it]

training loss: 1.445258617401123
training loss: 1.3875597715377808
training loss: 1.639549970626831
training loss: 1.3799175024032593


training:   2%|▏         | 1857/100000 [1:41:52<85:00:48,  3.12s/it]

training loss: 1.5384459495544434
training loss: 1.173433542251587
training loss: 1.3573424816131592
training loss: 1.496760368347168


training:   2%|▏         | 1861/100000 [1:42:04<85:00:37,  3.12s/it]

training loss: 1.5414026975631714
training loss: 1.6860449314117432
training loss: 1.417165994644165
training loss: 1.4193165302276611


training:   2%|▏         | 1865/100000 [1:42:17<85:00:56,  3.12s/it]

training loss: 1.6619975566864014
training loss: 1.455237627029419
training loss: 1.4494876861572266
training loss: 1.5402854681015015


training:   2%|▏         | 1869/100000 [1:42:29<85:01:06,  3.12s/it]

training loss: 1.566364049911499
training loss: 1.7901394367218018
training loss: 1.4293615818023682
training loss: 1.4379441738128662


training:   2%|▏         | 1873/100000 [1:42:42<85:02:37,  3.12s/it]

training loss: 1.2772266864776611
training loss: 1.3138347864151
training loss: 1.6827232837677002
training loss: 1.5403876304626465


training:   2%|▏         | 1877/100000 [1:42:54<85:02:04,  3.12s/it]

training loss: 1.6269887685775757
training loss: 1.3433663845062256
training loss: 1.5372519493103027
training loss: 1.6536000967025757


training:   2%|▏         | 1881/100000 [1:43:07<85:00:21,  3.12s/it]

training loss: 1.427547812461853
training loss: 1.5387701988220215
training loss: 1.4881449937820435
training loss: 1.3889884948730469


training:   2%|▏         | 1885/100000 [1:43:19<85:00:01,  3.12s/it]

training loss: 1.4786487817764282
training loss: 1.7054967880249023
training loss: 1.5269925594329834
training loss: 1.598982810974121


training:   2%|▏         | 1889/100000 [1:43:32<84:59:38,  3.12s/it]

training loss: 1.5639621019363403
training loss: 1.575927734375
training loss: 1.5340917110443115
training loss: 1.531360149383545


training:   2%|▏         | 1893/100000 [1:43:44<84:58:28,  3.12s/it]

training loss: 1.5148675441741943
training loss: 1.4090619087219238
training loss: 1.4984538555145264
training loss: 1.3560636043548584


training:   2%|▏         | 1897/100000 [1:43:56<84:59:15,  3.12s/it]

training loss: 1.7018132209777832
training loss: 1.4258519411087036
training loss: 1.593573808670044
training loss: 1.6135050058364868
training loss: 1.5046849250793457


training:   2%|▏         | 1901/100000 [1:44:09<85:36:34,  3.14s/it]

validation loss: 1.3395490646362305
training loss: 1.7133499383926392
training loss: 1.5515937805175781
training loss: 1.4241546392440796


training:   2%|▏         | 1905/100000 [1:44:22<85:22:41,  3.13s/it]

training loss: 1.668029546737671
training loss: 1.3230714797973633
training loss: 1.3888521194458008
training loss: 1.4683105945587158


training:   2%|▏         | 1909/100000 [1:44:34<85:12:35,  3.13s/it]

training loss: 1.189518690109253
training loss: 1.3277181386947632
training loss: 1.6073557138442993
training loss: 1.3713252544403076


training:   2%|▏         | 1913/100000 [1:44:47<85:09:01,  3.13s/it]

training loss: 1.4142265319824219
training loss: 1.411423921585083
training loss: 1.7229843139648438
training loss: 1.486236810684204


training:   2%|▏         | 1917/100000 [1:44:59<85:07:21,  3.12s/it]

training loss: 1.453519582748413
training loss: 1.4201059341430664
training loss: 1.5865161418914795
training loss: 1.5760266780853271


training:   2%|▏         | 1921/100000 [1:45:12<85:04:12,  3.12s/it]

training loss: 1.6978840827941895
training loss: 1.503462791442871
training loss: 1.4276320934295654
training loss: 1.4202139377593994


training:   2%|▏         | 1925/100000 [1:45:24<85:00:56,  3.12s/it]

training loss: 1.5355241298675537
training loss: 1.6620051860809326
training loss: 1.4318886995315552
training loss: 1.4623003005981445


training:   2%|▏         | 1929/100000 [1:45:37<84:58:08,  3.12s/it]

training loss: 1.57753586769104
training loss: 1.369520902633667
training loss: 1.3119490146636963
training loss: 1.5502712726593018


training:   2%|▏         | 1933/100000 [1:45:49<84:57:53,  3.12s/it]

training loss: 1.4899929761886597
training loss: 1.41682767868042
training loss: 1.5359256267547607
training loss: 1.465350866317749


training:   2%|▏         | 1937/100000 [1:46:01<84:59:12,  3.12s/it]

training loss: 1.657287359237671
training loss: 1.4443368911743164
training loss: 1.8842613697052002
training loss: 1.5080946683883667


training:   2%|▏         | 1941/100000 [1:46:14<84:59:22,  3.12s/it]

training loss: 1.8052191734313965
training loss: 1.4212465286254883
training loss: 1.4991071224212646
training loss: 1.5189826488494873


training:   2%|▏         | 1945/100000 [1:46:26<84:58:31,  3.12s/it]

training loss: 1.3173820972442627
training loss: 1.2942379713058472
training loss: 1.5459482669830322
training loss: 1.2991373538970947


training:   2%|▏         | 1949/100000 [1:46:39<84:58:47,  3.12s/it]

training loss: 1.558025598526001
training loss: 1.5040602684020996
training loss: 1.5863569974899292
training loss: 1.6098248958587646


training:   2%|▏         | 1953/100000 [1:46:51<84:56:38,  3.12s/it]

training loss: 1.6223764419555664
training loss: 1.3338261842727661
training loss: 1.534352421760559
training loss: 1.6268492937088013


training:   2%|▏         | 1957/100000 [1:47:04<84:54:58,  3.12s/it]

training loss: 1.5329114198684692
training loss: 1.3241300582885742
training loss: 1.6639422178268433
training loss: 1.6085689067840576


training:   2%|▏         | 1961/100000 [1:47:16<84:56:12,  3.12s/it]

training loss: 1.3388574123382568
training loss: 1.4998836517333984
training loss: 1.354688048362732
training loss: 1.5405848026275635


training:   2%|▏         | 1965/100000 [1:47:29<84:55:48,  3.12s/it]

training loss: 1.4216547012329102
training loss: 1.4308884143829346
training loss: 1.5277094841003418
training loss: 1.4789495468139648


training:   2%|▏         | 1969/100000 [1:47:41<84:55:07,  3.12s/it]

training loss: 1.9857563972473145
training loss: 1.486283540725708
training loss: 1.5743277072906494
training loss: 1.3640577793121338


training:   2%|▏         | 1973/100000 [1:47:54<84:55:13,  3.12s/it]

training loss: 1.7111868858337402
training loss: 1.4584128856658936
training loss: 1.5684260129928589
training loss: 1.6038944721221924


training:   2%|▏         | 1977/100000 [1:48:06<84:55:34,  3.12s/it]

training loss: 1.4746575355529785
training loss: 1.5614492893218994
training loss: 1.428176999092102
training loss: 1.571986198425293


training:   2%|▏         | 1981/100000 [1:48:19<84:58:15,  3.12s/it]

training loss: 1.5232124328613281
training loss: 1.5867571830749512
training loss: 1.5092300176620483
training loss: 1.3827486038208008


training:   2%|▏         | 1985/100000 [1:48:31<84:55:16,  3.12s/it]

training loss: 1.4192936420440674
training loss: 1.6126160621643066
training loss: 1.1189292669296265
training loss: 1.533438801765442


training:   2%|▏         | 1989/100000 [1:48:44<84:57:51,  3.12s/it]

training loss: 1.6877188682556152
training loss: 1.5791404247283936
training loss: 1.346195936203003
training loss: 1.4595708847045898


training:   2%|▏         | 1993/100000 [1:48:56<84:56:24,  3.12s/it]

training loss: 1.5711543560028076
training loss: 1.3309575319290161
training loss: 1.5805950164794922
training loss: 1.4086966514587402


training:   2%|▏         | 1997/100000 [1:49:09<84:55:33,  3.12s/it]

training loss: 1.4239346981048584
training loss: 1.2431296110153198
training loss: 1.4076333045959473
training loss: 1.5000789165496826


training:   2%|▏         | 1997/100000 [1:49:20<84:55:33,  3.12s/it]

training loss: 1.3565465211868286
validation loss: 1.3588076829910278
%s 

 %s ("[insurance]], [[healthcare]], [[Lawyer|legal representation]], etc.). [[Dublin]], the nation's [[capital]], was ranked 22nd in a worldwide cost of living survey in [[As of 2004|2004]] [http://www.finfacts.com/costofliving4.htm] - a rise of two places on [[2003]]. Ireland has been reported to have the second highest per capita income of any country in the EU (if not Europe) next to Luxembourg, and fourth highest in the world.  ==History== ''Main article: [[Economic history of the Republic of Ireland]]''  The state known today as the Republic of Ireland seceded from the [[United Kingdom]] in [[1922]]. The state was plagued by poverty and [[emigration]] until the 1990s. That decade saw the beginning of unprecedented economic success, in a phenomenon known as the &quot;[[Celtic Tiger]]&quot;. Over the past decade, the Irish government has implemented a series of national economic programmes designed to curb [[

training:   2%|▏         | 2001/100000 [1:50:40<246:28:04,  9.05s/it]

Model saved at iteration 2000
training loss: 1.2793974876403809
training loss: 1.4524641036987305
training loss: 1.3161685466766357


training:   2%|▏         | 2005/100000 [1:50:53<197:55:09,  7.27s/it]

training loss: 1.5423619747161865
training loss: 1.483197808265686
training loss: 1.4913710355758667
training loss: 1.4613388776779175


training:   2%|▏         | 2009/100000 [1:51:05<163:59:24,  6.02s/it]

training loss: 1.488855242729187
training loss: 1.3904836177825928
training loss: 1.5234946012496948
training loss: 1.3749957084655762


training:   2%|▏         | 2013/100000 [1:51:18<140:14:43,  5.15s/it]

training loss: 1.607536792755127
training loss: 1.4883735179901123
training loss: 1.6121643781661987
training loss: 1.783299446105957


training:   2%|▏         | 2017/100000 [1:51:30<123:38:07,  4.54s/it]

training loss: 1.5857810974121094
training loss: 1.417417287826538
training loss: 1.3643512725830078
training loss: 1.6779375076293945


training:   2%|▏         | 2021/100000 [1:51:43<112:01:40,  4.12s/it]

training loss: 1.4382143020629883
training loss: 1.4238685369491577
training loss: 1.3606679439544678
training loss: 1.4640891551971436


training:   2%|▏         | 2025/100000 [1:51:55<103:53:25,  3.82s/it]

training loss: 1.4519050121307373
training loss: 1.575847864151001
training loss: 1.4918999671936035
training loss: 1.1441415548324585


training:   2%|▏         | 2029/100000 [1:52:08<98:09:58,  3.61s/it] 

training loss: 1.5511057376861572
training loss: 1.5214159488677979
training loss: 1.4595365524291992
training loss: 1.3989405632019043


training:   2%|▏         | 2033/100000 [1:52:20<94:09:54,  3.46s/it]

training loss: 1.5374042987823486
training loss: 1.4722392559051514
training loss: 1.3100628852844238
training loss: 1.516196370124817


training:   2%|▏         | 2037/100000 [1:52:32<91:22:41,  3.36s/it]

training loss: 1.4781478643417358
training loss: 1.6757920980453491
training loss: 1.562912940979004
training loss: 1.73581862449646


training:   2%|▏         | 2041/100000 [1:52:45<89:23:41,  3.29s/it]

training loss: 1.4702370166778564
training loss: 1.6252280473709106
training loss: 1.3399438858032227
training loss: 1.401768445968628


training:   2%|▏         | 2045/100000 [1:52:57<88:01:56,  3.24s/it]

training loss: 1.56826913356781
training loss: 1.3807909488677979
training loss: 1.4480652809143066
training loss: 1.3525314331054688


training:   2%|▏         | 2049/100000 [1:53:10<87:03:17,  3.20s/it]

training loss: 1.5130990743637085
training loss: 1.6631706953048706
training loss: 1.4594545364379883
training loss: 1.5647872686386108


training:   2%|▏         | 2053/100000 [1:53:22<86:23:33,  3.18s/it]

training loss: 1.3355824947357178
training loss: 1.9524364471435547
training loss: 1.4053902626037598
training loss: 1.5874710083007812


training:   2%|▏         | 2057/100000 [1:53:35<85:52:21,  3.16s/it]

training loss: 1.5585052967071533
training loss: 1.3679335117340088
training loss: 1.3614342212677002
training loss: 1.4543882608413696


training:   2%|▏         | 2061/100000 [1:53:47<85:32:00,  3.14s/it]

training loss: 1.3841763734817505
training loss: 1.4737067222595215
training loss: 1.6693264245986938
training loss: 1.4942301511764526


training:   2%|▏         | 2065/100000 [1:54:00<85:20:56,  3.14s/it]

training loss: 1.4773900508880615
training loss: 1.486197829246521
training loss: 1.563636064529419
training loss: 1.4371000528335571


training:   2%|▏         | 2069/100000 [1:54:12<85:11:11,  3.13s/it]

training loss: 1.3619740009307861
training loss: 1.3982024192810059
training loss: 1.4754369258880615
training loss: 1.3172317743301392


training:   2%|▏         | 2073/100000 [1:54:25<85:04:14,  3.13s/it]

training loss: 1.472023367881775
training loss: 1.4267820119857788
training loss: 1.6230177879333496
training loss: 1.5043343305587769


training:   2%|▏         | 2077/100000 [1:54:37<85:00:35,  3.13s/it]

training loss: 1.4273881912231445
training loss: 1.3718775510787964
training loss: 1.5910043716430664
training loss: 1.5533324480056763


training:   2%|▏         | 2081/100000 [1:54:50<84:57:19,  3.12s/it]

training loss: 1.4016053676605225
training loss: 1.5077040195465088
training loss: 1.363687515258789
training loss: 1.5495390892028809


training:   2%|▏         | 2085/100000 [1:55:02<84:55:58,  3.12s/it]

training loss: 1.4130438566207886
training loss: 1.711583137512207
training loss: 1.4425987005233765
training loss: 1.452836036682129


training:   2%|▏         | 2089/100000 [1:55:15<84:53:28,  3.12s/it]

training loss: 1.4486749172210693
training loss: 1.4549483060836792
training loss: 1.343949794769287
training loss: 1.4144665002822876


training:   2%|▏         | 2093/100000 [1:55:27<84:52:27,  3.12s/it]

training loss: 1.4930493831634521
training loss: 1.9380650520324707
training loss: 1.4391403198242188
training loss: 1.2593053579330444


training:   2%|▏         | 2097/100000 [1:55:40<84:51:40,  3.12s/it]

training loss: 1.4212841987609863
training loss: 1.4451583623886108
training loss: 1.4857721328735352
training loss: 1.4919991493225098


training:   2%|▏         | 2097/100000 [1:55:50<84:51:40,  3.12s/it]

training loss: 1.3486844301223755


training:   2%|▏         | 2101/100000 [1:55:52<85:36:24,  3.15s/it]

validation loss: 1.4190022945404053
training loss: 1.6219358444213867
training loss: 1.4300990104675293
training loss: 1.4358901977539062


training:   2%|▏         | 2105/100000 [1:56:05<85:25:26,  3.14s/it]

training loss: 1.4322600364685059
training loss: 1.5416417121887207
training loss: 1.3417408466339111
training loss: 1.4395396709442139


training:   2%|▏         | 2109/100000 [1:56:17<85:15:21,  3.14s/it]

training loss: 1.51229727268219
training loss: 1.707923173904419
training loss: 1.4516677856445312
training loss: 1.5306533575057983


training:   2%|▏         | 2113/100000 [1:56:30<85:06:43,  3.13s/it]

training loss: 1.4753371477127075
training loss: 1.3642078638076782
training loss: 1.3773211240768433
training loss: 1.651566743850708


training:   2%|▏         | 2117/100000 [1:56:42<85:00:23,  3.13s/it]

training loss: 1.6601288318634033
training loss: 1.415653944015503
training loss: 1.7829182147979736
training loss: 1.7639936208724976


training:   2%|▏         | 2121/100000 [1:56:55<84:56:48,  3.12s/it]

training loss: 1.5574555397033691
training loss: 1.4023680686950684
training loss: 1.4038233757019043
training loss: 1.47802734375


training:   2%|▏         | 2125/100000 [1:57:07<84:56:56,  3.12s/it]

training loss: 1.2585384845733643
training loss: 1.582431674003601
training loss: 1.5590072870254517
training loss: 1.464776873588562


training:   2%|▏         | 2129/100000 [1:57:20<84:56:42,  3.12s/it]

training loss: 1.3299710750579834
training loss: 1.54521906375885
training loss: 1.6015253067016602
training loss: 1.4837732315063477


training:   2%|▏         | 2133/100000 [1:57:32<84:57:34,  3.13s/it]

training loss: 1.5597710609436035
training loss: 1.45100736618042
training loss: 1.5552328824996948
training loss: 1.2484116554260254


training:   2%|▏         | 2137/100000 [1:57:45<84:54:11,  3.12s/it]

training loss: 1.628973126411438
training loss: 1.4214116334915161
training loss: 1.3852659463882446
training loss: 1.365152359008789


training:   2%|▏         | 2141/100000 [1:57:57<84:50:53,  3.12s/it]

training loss: 1.4053809642791748
training loss: 1.4758670330047607
training loss: 1.3826487064361572
training loss: 1.538082480430603


training:   2%|▏         | 2145/100000 [1:58:10<84:48:45,  3.12s/it]

training loss: 1.4948515892028809
training loss: 1.4174565076828003
training loss: 1.569122076034546
training loss: 1.4558892250061035


training:   2%|▏         | 2149/100000 [1:58:22<84:49:22,  3.12s/it]

training loss: 1.520078420639038
training loss: 1.4521782398223877
training loss: 1.4874904155731201
training loss: 1.459904670715332


training:   2%|▏         | 2153/100000 [1:58:35<84:49:06,  3.12s/it]

training loss: 1.4710869789123535
training loss: 1.408610224723816
training loss: 1.3656076192855835
training loss: 1.5338183641433716


training:   2%|▏         | 2157/100000 [1:58:47<84:47:35,  3.12s/it]

training loss: 1.4237499237060547
training loss: 1.5539519786834717
training loss: 1.5334806442260742
training loss: 1.3378472328186035


training:   2%|▏         | 2161/100000 [1:59:00<84:46:35,  3.12s/it]

training loss: 1.4420050382614136
training loss: 1.546533226966858
training loss: 1.530921459197998
training loss: 1.5585203170776367


training:   2%|▏         | 2165/100000 [1:59:12<84:46:13,  3.12s/it]

training loss: 1.4719109535217285
training loss: 1.4486757516860962
training loss: 1.4735450744628906
training loss: 1.6590793132781982


training:   2%|▏         | 2169/100000 [1:59:25<84:44:01,  3.12s/it]

training loss: 1.333101749420166
training loss: 1.581763744354248
training loss: 1.414452075958252
training loss: 1.4822527170181274


training:   2%|▏         | 2173/100000 [1:59:37<84:45:10,  3.12s/it]

training loss: 1.28926682472229
training loss: 1.4335753917694092
training loss: 1.474058985710144
training loss: 1.2693160772323608


training:   2%|▏         | 2177/100000 [1:59:50<84:44:55,  3.12s/it]

training loss: 1.572995662689209
training loss: 1.4652944803237915
training loss: 1.500511884689331
training loss: 1.4703954458236694


training:   2%|▏         | 2181/100000 [2:00:02<84:45:21,  3.12s/it]

training loss: 1.4457058906555176
training loss: 1.2818411588668823
training loss: 1.4748966693878174
training loss: 1.4039852619171143


training:   2%|▏         | 2185/100000 [2:00:15<84:45:47,  3.12s/it]

training loss: 1.4998400211334229
training loss: 1.5021650791168213
training loss: 1.6076011657714844
training loss: 1.37091064453125


training:   2%|▏         | 2189/100000 [2:00:27<84:48:06,  3.12s/it]

training loss: 1.4977518320083618
training loss: 1.536212682723999
training loss: 1.5599539279937744
training loss: 1.7334429025650024


training:   2%|▏         | 2193/100000 [2:00:40<84:48:40,  3.12s/it]

training loss: 1.3037519454956055
training loss: 1.3639618158340454
training loss: 1.613935947418213
training loss: 1.5519976615905762


training:   2%|▏         | 2197/100000 [2:00:52<84:47:10,  3.12s/it]

training loss: 1.3244662284851074
training loss: 1.3909096717834473
training loss: 1.5007522106170654
training loss: 1.4439747333526611
training loss: 1.5638123750686646


training:   2%|▏         | 2201/100000 [2:01:05<85:22:27,  3.14s/it]

validation loss: 1.331827163696289
training loss: 1.485959529876709
training loss: 1.575903296470642
training loss: 1.4295902252197266


training:   2%|▏         | 2205/100000 [2:01:17<85:08:07,  3.13s/it]

training loss: 1.4882798194885254
training loss: 1.4226421117782593
training loss: 1.4813311100006104
training loss: 1.243549108505249


training:   2%|▏         | 2209/100000 [2:01:30<85:00:47,  3.13s/it]

training loss: 1.4844610691070557
training loss: 1.4244062900543213
training loss: 1.4043031930923462
training loss: 1.4839369058609009


training:   2%|▏         | 2213/100000 [2:01:42<84:58:30,  3.13s/it]

training loss: 1.5273195505142212
training loss: 1.42826509475708
training loss: 1.496211051940918
training loss: 1.7196524143218994


training:   2%|▏         | 2217/100000 [2:01:55<84:54:42,  3.13s/it]

training loss: 1.4664602279663086
training loss: 1.604353666305542
training loss: 1.2650789022445679
training loss: 1.393146276473999


training:   2%|▏         | 2221/100000 [2:02:07<84:50:40,  3.12s/it]

training loss: 1.2535314559936523
training loss: 1.507725715637207
training loss: 1.4559192657470703
training loss: 1.5592107772827148


training:   2%|▏         | 2225/100000 [2:02:20<84:48:09,  3.12s/it]

training loss: 1.4410576820373535
training loss: 1.5909150838851929
training loss: 1.427621841430664
training loss: 1.3294540643692017


training:   2%|▏         | 2229/100000 [2:02:32<84:46:08,  3.12s/it]

training loss: 1.5260906219482422
training loss: 1.4239192008972168
training loss: 1.0913118124008179
training loss: 1.426645040512085


training:   2%|▏         | 2233/100000 [2:02:45<84:44:52,  3.12s/it]

training loss: 1.4058769941329956
training loss: 1.2586292028427124
training loss: 1.6571677923202515
training loss: 1.6326709985733032


training:   2%|▏         | 2237/100000 [2:02:57<84:45:04,  3.12s/it]

training loss: 1.71311354637146
training loss: 1.2964837551116943
training loss: 1.3213229179382324
training loss: 1.6242077350616455


training:   2%|▏         | 2241/100000 [2:03:10<84:41:47,  3.12s/it]

training loss: 1.3651587963104248
training loss: 1.2742252349853516
training loss: 1.416056752204895
training loss: 1.5364731550216675


training:   2%|▏         | 2245/100000 [2:03:22<84:41:30,  3.12s/it]

training loss: 1.3595784902572632
training loss: 1.2469565868377686
training loss: 1.7395126819610596
training loss: 1.5465534925460815


training:   2%|▏         | 2249/100000 [2:03:34<84:41:44,  3.12s/it]

training loss: 1.3012382984161377
training loss: 1.2884052991867065
training loss: 1.3910633325576782
training loss: 1.643531084060669


training:   2%|▏         | 2253/100000 [2:03:47<84:42:10,  3.12s/it]

training loss: 1.4747951030731201
training loss: 1.515230655670166
training loss: 1.3788344860076904
training loss: 1.4663801193237305


training:   2%|▏         | 2257/100000 [2:03:59<84:42:48,  3.12s/it]

training loss: 1.3201831579208374
training loss: 1.538036584854126
training loss: 1.640178918838501
training loss: 1.5496037006378174


training:   2%|▏         | 2261/100000 [2:04:12<84:42:43,  3.12s/it]

training loss: 1.3026639223098755
training loss: 1.2572293281555176
training loss: 1.1623796224594116
training loss: 1.5891609191894531


training:   2%|▏         | 2265/100000 [2:04:24<84:41:24,  3.12s/it]

training loss: 1.3830783367156982
training loss: 1.3580801486968994
training loss: 1.2093409299850464
training loss: 1.2573562860488892


training:   2%|▏         | 2269/100000 [2:04:37<84:40:28,  3.12s/it]

training loss: 1.3532578945159912
training loss: 1.3874975442886353
training loss: 1.3127762079238892
training loss: 1.4210315942764282


training:   2%|▏         | 2273/100000 [2:04:49<84:39:31,  3.12s/it]

training loss: 1.3411657810211182
training loss: 1.385505199432373
training loss: 1.4129371643066406
training loss: 1.7111984491348267


training:   2%|▏         | 2277/100000 [2:05:02<84:39:43,  3.12s/it]

training loss: 1.4794573783874512
training loss: 1.3469469547271729
training loss: 1.559893012046814
training loss: 1.4153071641921997


training:   2%|▏         | 2281/100000 [2:05:14<84:41:54,  3.12s/it]

training loss: 1.2222001552581787
training loss: 1.671571969985962
training loss: 1.447622299194336
training loss: 1.3168134689331055


training:   2%|▏         | 2285/100000 [2:05:27<84:41:07,  3.12s/it]

training loss: 1.5266659259796143
training loss: 1.4583516120910645
training loss: 1.2800359725952148
training loss: 1.4869418144226074


training:   2%|▏         | 2289/100000 [2:05:39<84:40:08,  3.12s/it]

training loss: 1.4976661205291748
training loss: 1.452276587486267
training loss: 1.3672809600830078
training loss: 1.562901258468628


training:   2%|▏         | 2293/100000 [2:05:52<84:39:48,  3.12s/it]

training loss: 1.3294095993041992
training loss: 1.195625901222229
training loss: 1.6229130029678345
training loss: 1.4630203247070312


training:   2%|▏         | 2297/100000 [2:06:04<84:39:10,  3.12s/it]

training loss: 1.5391730070114136
training loss: 1.3883323669433594
training loss: 1.461371660232544
training loss: 1.3635480403900146
training loss: 1.4940506219863892


training:   2%|▏         | 2301/100000 [2:06:17<85:16:52,  3.14s/it]

validation loss: 1.4164652824401855
training loss: 1.2783746719360352
training loss: 1.4420194625854492
training loss: 1.3346456289291382


training:   2%|▏         | 2305/100000 [2:06:29<85:03:05,  3.13s/it]

training loss: 1.264219045639038
training loss: 1.4738304615020752
training loss: 1.4542067050933838
training loss: 1.5092328786849976


training:   2%|▏         | 2309/100000 [2:06:42<84:55:52,  3.13s/it]

training loss: 1.3928800821304321
training loss: 1.6183133125305176
training loss: 1.5521632432937622
training loss: 1.5533305406570435


training:   2%|▏         | 2313/100000 [2:06:54<84:50:01,  3.13s/it]

training loss: 1.1965850591659546
training loss: 1.5023103952407837
training loss: 1.6056780815124512
training loss: 1.4779253005981445


training:   2%|▏         | 2317/100000 [2:07:07<84:46:07,  3.12s/it]

training loss: 1.4755040407180786
training loss: 1.514840841293335
training loss: 1.4463834762573242
training loss: 1.5235973596572876


training:   2%|▏         | 2321/100000 [2:07:19<84:43:53,  3.12s/it]

training loss: 1.5610764026641846
training loss: 1.4307146072387695
training loss: 1.7447216510772705
training loss: 1.4105055332183838


training:   2%|▏         | 2325/100000 [2:07:32<84:43:06,  3.12s/it]

training loss: 1.4985580444335938
training loss: 1.4077632427215576
training loss: 1.2955619096755981
training loss: 1.6554728746414185


training:   2%|▏         | 2329/100000 [2:07:44<84:40:59,  3.12s/it]

training loss: 1.588318943977356
training loss: 1.3554513454437256
training loss: 1.7057790756225586
training loss: 1.3711789846420288


training:   2%|▏         | 2333/100000 [2:07:57<84:39:02,  3.12s/it]

training loss: 1.337069034576416
training loss: 1.580891728401184
training loss: 1.3646447658538818
training loss: 1.3459100723266602


training:   2%|▏         | 2337/100000 [2:08:09<84:37:47,  3.12s/it]

training loss: 1.491398811340332
training loss: 1.6081299781799316
training loss: 1.4974894523620605
training loss: 1.4169055223464966


training:   2%|▏         | 2341/100000 [2:08:22<84:37:59,  3.12s/it]

training loss: 1.364534854888916
training loss: 1.5939109325408936
training loss: 1.4812657833099365
training loss: 1.2996752262115479


training:   2%|▏         | 2345/100000 [2:08:34<84:37:50,  3.12s/it]

training loss: 1.4628376960754395
training loss: 1.3813073635101318
training loss: 1.5148392915725708
training loss: 1.390527367591858


training:   2%|▏         | 2349/100000 [2:08:47<84:37:20,  3.12s/it]

training loss: 1.3728795051574707
training loss: 1.273457646369934
training loss: 1.2756757736206055
training loss: 1.3529516458511353


training:   2%|▏         | 2353/100000 [2:08:59<84:36:39,  3.12s/it]

training loss: 1.4416470527648926
training loss: 1.4269142150878906
training loss: 1.6223211288452148
training loss: 1.4237676858901978


training:   2%|▏         | 2357/100000 [2:09:12<84:35:35,  3.12s/it]

training loss: 1.6451330184936523
training loss: 1.536810040473938
training loss: 1.3519747257232666
training loss: 1.3026025295257568


training:   2%|▏         | 2361/100000 [2:09:24<84:34:54,  3.12s/it]

training loss: 1.418792724609375
training loss: 1.5156824588775635
training loss: 1.4877389669418335
training loss: 1.2574174404144287


training:   2%|▏         | 2365/100000 [2:09:37<84:36:40,  3.12s/it]

training loss: 1.4708691835403442
training loss: 1.1967113018035889
training loss: 1.2276222705841064
training loss: 1.5380680561065674


training:   2%|▏         | 2369/100000 [2:09:49<84:36:41,  3.12s/it]

training loss: 1.3811588287353516
training loss: 1.5148985385894775
training loss: 1.1229921579360962
training loss: 1.4553464651107788


training:   2%|▏         | 2373/100000 [2:10:02<84:35:59,  3.12s/it]

training loss: 1.5315240621566772
training loss: 1.4145101308822632
training loss: 1.528989315032959
training loss: 1.2558432817459106


training:   2%|▏         | 2377/100000 [2:10:14<84:35:04,  3.12s/it]

training loss: 1.3116734027862549
training loss: 1.386934757232666
training loss: 1.4787671566009521
training loss: 1.3734384775161743


training:   2%|▏         | 2381/100000 [2:10:27<84:35:17,  3.12s/it]

training loss: 1.1881638765335083
training loss: 1.3702203035354614
training loss: 1.3735864162445068
training loss: 1.2320585250854492


training:   2%|▏         | 2385/100000 [2:10:39<84:35:56,  3.12s/it]

training loss: 1.3532304763793945
training loss: 1.5273823738098145
training loss: 1.3539462089538574
training loss: 1.4828636646270752


training:   2%|▏         | 2389/100000 [2:10:51<84:35:50,  3.12s/it]

training loss: 1.5161147117614746
training loss: 1.3902957439422607
training loss: 1.2000994682312012
training loss: 1.4437878131866455


training:   2%|▏         | 2393/100000 [2:11:04<84:35:34,  3.12s/it]

training loss: 1.4140125513076782
training loss: 1.5782750844955444
training loss: 1.365900993347168
training loss: 1.588257074356079


training:   2%|▏         | 2397/100000 [2:11:16<84:35:05,  3.12s/it]

training loss: 1.504729986190796
training loss: 1.469975233078003
training loss: 1.4067398309707642
training loss: 1.4322583675384521
training loss: 1.3823421001434326


training:   2%|▏         | 2401/100000 [2:11:29<85:11:04,  3.14s/it]

validation loss: 1.4726037979125977
training loss: 1.5245052576065063
training loss: 1.4121241569519043
training loss: 1.566014289855957


training:   2%|▏         | 2405/100000 [2:11:42<84:57:02,  3.13s/it]

training loss: 1.4611473083496094
training loss: 1.4813294410705566
training loss: 1.3051022291183472
training loss: 1.4626109600067139


training:   2%|▏         | 2409/100000 [2:11:54<84:50:56,  3.13s/it]

training loss: 1.2809944152832031
training loss: 1.6172029972076416
training loss: 1.2812867164611816
training loss: 1.3757106065750122


training:   2%|▏         | 2413/100000 [2:12:07<84:46:05,  3.13s/it]

training loss: 1.4227805137634277
training loss: 1.4231653213500977
training loss: 1.425607442855835
training loss: 1.2938838005065918


training:   2%|▏         | 2417/100000 [2:12:19<84:41:54,  3.12s/it]

training loss: 1.4025557041168213
training loss: 1.3509364128112793
training loss: 1.3978861570358276
training loss: 1.492387294769287


training:   2%|▏         | 2421/100000 [2:12:32<84:38:42,  3.12s/it]

training loss: 1.5108722448349
training loss: 1.240395188331604
training loss: 1.4141255617141724
training loss: 1.5628688335418701


training:   2%|▏         | 2425/100000 [2:12:44<84:36:42,  3.12s/it]

training loss: 1.5166349411010742
training loss: 1.4924877882003784
training loss: 1.4162554740905762
training loss: 1.4227712154388428


training:   2%|▏         | 2429/100000 [2:12:57<84:36:18,  3.12s/it]

training loss: 1.6178762912750244
training loss: 1.5376287698745728
training loss: 1.3703999519348145
training loss: 1.300766944885254


training:   2%|▏         | 2433/100000 [2:13:09<84:35:11,  3.12s/it]

training loss: 1.405973196029663
training loss: 1.3390398025512695
training loss: 1.5648715496063232
training loss: 1.4291718006134033


training:   2%|▏         | 2437/100000 [2:13:22<84:33:49,  3.12s/it]

training loss: 1.4514236450195312
training loss: 1.394410252571106
training loss: 1.3133138418197632
training loss: 1.5890483856201172


training:   2%|▏         | 2441/100000 [2:13:34<84:33:07,  3.12s/it]

training loss: 1.3978676795959473
training loss: 1.4356721639633179
training loss: 1.371942162513733
training loss: 1.3791624307632446


training:   2%|▏         | 2445/100000 [2:13:46<84:32:01,  3.12s/it]

training loss: 1.430467128753662
training loss: 1.440788745880127
training loss: 1.4408167600631714
training loss: 1.3642529249191284


training:   2%|▏         | 2449/100000 [2:13:59<84:31:57,  3.12s/it]

training loss: 1.5625131130218506
training loss: 1.2493720054626465
training loss: 1.358386516571045
training loss: 1.5963847637176514


training:   2%|▏         | 2453/100000 [2:14:11<84:33:07,  3.12s/it]

training loss: 1.2014706134796143
training loss: 1.3361581563949585
training loss: 1.1289377212524414
training loss: 1.5843740701675415


training:   2%|▏         | 2457/100000 [2:14:24<84:31:23,  3.12s/it]

training loss: 1.6189547777175903
training loss: 1.2619770765304565
training loss: 1.4763689041137695
training loss: 1.4809480905532837


training:   2%|▏         | 2461/100000 [2:14:36<84:30:33,  3.12s/it]

training loss: 1.2900216579437256
training loss: 1.3967359066009521
training loss: 1.4573442935943604
training loss: 1.2948827743530273


training:   2%|▏         | 2465/100000 [2:14:49<84:27:38,  3.12s/it]

training loss: 1.4388773441314697
training loss: 1.3836932182312012
training loss: 1.481265664100647
training loss: 1.527230978012085


training:   2%|▏         | 2469/100000 [2:15:01<84:23:25,  3.11s/it]

training loss: 1.5923813581466675
training loss: 1.4969736337661743
training loss: 1.3104214668273926
training loss: 1.2599295377731323


training:   2%|▏         | 2473/100000 [2:15:14<84:25:45,  3.12s/it]

training loss: 1.569430947303772
training loss: 1.4169533252716064
training loss: 1.4003678560256958
training loss: 1.4356023073196411


training:   2%|▏         | 2477/100000 [2:15:26<84:27:27,  3.12s/it]

training loss: 1.3961963653564453
training loss: 1.4096226692199707
training loss: 1.3113728761672974
training loss: 1.4233274459838867


training:   2%|▏         | 2481/100000 [2:15:39<84:28:02,  3.12s/it]

training loss: 1.2324763536453247
training loss: 1.5005677938461304
training loss: 1.368309497833252
training loss: 1.3843556642532349


training:   2%|▏         | 2485/100000 [2:15:51<84:28:20,  3.12s/it]

training loss: 1.3596808910369873
training loss: 1.3690452575683594
training loss: 1.411856770515442
training loss: 1.334892988204956


training:   2%|▏         | 2489/100000 [2:16:04<84:28:16,  3.12s/it]

training loss: 1.6077364683151245
training loss: 1.404735803604126
training loss: 1.5532293319702148
training loss: 1.2780141830444336


training:   2%|▏         | 2493/100000 [2:16:16<84:27:52,  3.12s/it]

training loss: 1.459804892539978
training loss: 1.401015281677246
training loss: 1.3892313241958618
training loss: 1.372725248336792


training:   2%|▏         | 2497/100000 [2:16:29<84:29:41,  3.12s/it]

training loss: 1.4337270259857178
training loss: 1.546926498413086
training loss: 1.5297493934631348
training loss: 1.6673237085342407


training:   2%|▏         | 2497/100000 [2:16:40<84:29:41,  3.12s/it]

training loss: 1.3994922637939453
validation loss: 1.28641676902771
%s 

 %s ("| image_caption = KNM-ER-1813  | regnum = [[Animal]]ia | phylum = [[Chordata]] | classis = [[Mammal]]ia | ordo = [[Primates]] | familia = [[Hominidae]] | genus = ''[[Homo (genus)|Homo]]'' | species = '''''H. habilis''''' | binomial = ''Homo habilis'' | binomial_authority = [[Louis Leakey|Leakey]] et al, [[1964]] }}  '''''Homo habilis''''' ''Â«HOH moh HAB uh luhsÂ»'' (&quot;handy man&quot;, &quot;skillful person&quot;) is a [[species]] of the genus ''[[Homo (genus)|Homo]]'', which lived from approximately 2.5 million to 1.8 million years ago at the beginning of the [[Pleistocene]]. The definition of this species is credited to both Mary and Louis [[Leakey]], who found fossils in [[Tanzania]], [[East Africa]], between [[1962]] and [[1964]]. ''Homo habilis'' is arguably the first species of the ''Homo'' genus to appear. In its appearance and [[morphology]], ''H. habilis'' was the least similar to modern humans 

training:   3%|▎         | 2501/100000 [2:18:00<245:00:11,  9.05s/it]

Model saved at iteration 2500
training loss: 1.4391063451766968
training loss: 1.5147737264633179
training loss: 1.2901514768600464


training:   3%|▎         | 2505/100000 [2:18:13<196:43:13,  7.26s/it]

training loss: 1.5495600700378418
training loss: 1.4523818492889404
training loss: 1.5206573009490967
training loss: 1.332879662513733


training:   3%|▎         | 2509/100000 [2:18:25<163:02:34,  6.02s/it]

training loss: 1.3863674402236938
training loss: 1.395843744277954
training loss: 1.2873692512512207
training loss: 1.438698410987854


training:   3%|▎         | 2513/100000 [2:18:38<139:28:20,  5.15s/it]

training loss: 1.4203258752822876
training loss: 1.5426130294799805
training loss: 1.2272447347640991
training loss: 1.5515326261520386


training:   3%|▎         | 2517/100000 [2:18:50<122:58:20,  4.54s/it]

training loss: 1.6152558326721191
training loss: 1.357374668121338
training loss: 1.5134778022766113
training loss: 1.4628486633300781


training:   3%|▎         | 2521/100000 [2:19:02<111:24:17,  4.11s/it]

training loss: 1.1388530731201172
training loss: 1.3867650032043457
training loss: 1.2825446128845215
training loss: 1.3703701496124268


training:   3%|▎         | 2525/100000 [2:19:15<103:19:02,  3.82s/it]

training loss: 1.3207104206085205
training loss: 1.3921626806259155
training loss: 1.438172698020935
training loss: 1.3534038066864014


training:   3%|▎         | 2529/100000 [2:19:27<97:39:32,  3.61s/it] 

training loss: 1.2645435333251953
training loss: 1.417534589767456
training loss: 1.3388147354125977
training loss: 1.1914687156677246


training:   3%|▎         | 2533/100000 [2:19:40<93:41:18,  3.46s/it]

training loss: 1.3416171073913574
training loss: 1.4330132007598877
training loss: 1.4034698009490967
training loss: 1.571396827697754


training:   3%|▎         | 2537/100000 [2:19:52<90:53:32,  3.36s/it]

training loss: 1.4424316883087158
training loss: 1.3705694675445557
training loss: 1.7305090427398682
training loss: 1.347681999206543


training:   3%|▎         | 2541/100000 [2:20:05<88:56:47,  3.29s/it]

training loss: 1.4253824949264526
training loss: 1.2832283973693848
training loss: 1.3943660259246826
training loss: 1.56348717212677


training:   3%|▎         | 2545/100000 [2:20:17<87:34:55,  3.24s/it]

training loss: 1.3872594833374023
training loss: 1.7121233940124512
training loss: 1.4854426383972168
training loss: 1.4467419385910034


training:   3%|▎         | 2549/100000 [2:20:30<86:38:02,  3.20s/it]

training loss: 1.4064080715179443
training loss: 1.4123505353927612
training loss: 1.3730981349945068
training loss: 1.56380033493042


training:   3%|▎         | 2553/100000 [2:20:42<85:58:20,  3.18s/it]

training loss: 1.6913421154022217
training loss: 1.496781349182129
training loss: 1.4692944288253784
training loss: 1.3667917251586914


training:   3%|▎         | 2557/100000 [2:20:55<85:31:37,  3.16s/it]

training loss: 1.5584977865219116
training loss: 1.6178476810455322
training loss: 1.5147119760513306
training loss: 1.476971983909607


training:   3%|▎         | 2561/100000 [2:21:07<85:11:47,  3.15s/it]

training loss: 1.5255634784698486
training loss: 1.4017648696899414
training loss: 1.5109617710113525
training loss: 1.651759386062622


training:   3%|▎         | 2565/100000 [2:21:20<84:56:37,  3.14s/it]

training loss: 1.4121456146240234
training loss: 1.4428664445877075
training loss: 1.5362107753753662
training loss: 1.3472906351089478


training:   3%|▎         | 2569/100000 [2:21:32<84:46:33,  3.13s/it]

training loss: 1.5221165418624878
training loss: 1.4241435527801514
training loss: 1.5226845741271973
training loss: 1.1971908807754517


training:   3%|▎         | 2573/100000 [2:21:45<84:40:05,  3.13s/it]

training loss: 1.3273050785064697
training loss: 1.4586066007614136
training loss: 1.4164423942565918
training loss: 1.590820550918579


training:   3%|▎         | 2577/100000 [2:21:57<84:36:24,  3.13s/it]

training loss: 1.6129341125488281
training loss: 1.4925155639648438
training loss: 1.2556843757629395
training loss: 1.4895689487457275


training:   3%|▎         | 2581/100000 [2:22:10<84:30:56,  3.12s/it]

training loss: 1.3081352710723877
training loss: 1.3805145025253296
training loss: 1.3271740674972534
training loss: 1.4104254245758057


training:   3%|▎         | 2585/100000 [2:22:22<84:28:44,  3.12s/it]

training loss: 1.456831693649292
training loss: 1.2279291152954102
training loss: 1.4031732082366943
training loss: 1.4256452322006226


training:   3%|▎         | 2589/100000 [2:22:35<84:26:53,  3.12s/it]

training loss: 1.4669877290725708
training loss: 1.1605985164642334
training loss: 1.4511054754257202
training loss: 1.3748047351837158


training:   3%|▎         | 2593/100000 [2:22:47<84:23:16,  3.12s/it]

training loss: 1.3972405195236206
training loss: 1.4718387126922607
training loss: 1.4073981046676636
training loss: 1.3666349649429321


training:   3%|▎         | 2597/100000 [2:22:59<84:22:29,  3.12s/it]

training loss: 1.3441439867019653
training loss: 1.4285402297973633
training loss: 1.4940071105957031
training loss: 1.3490954637527466


training:   3%|▎         | 2597/100000 [2:23:10<84:22:29,  3.12s/it]

training loss: 1.4312312602996826


training:   3%|▎         | 2601/100000 [2:23:12<84:56:56,  3.14s/it]

validation loss: 1.2287404537200928
training loss: 1.5268645286560059
training loss: 1.4309064149856567
training loss: 1.5389392375946045


training:   3%|▎         | 2605/100000 [2:23:25<84:43:24,  3.13s/it]

training loss: 1.6076269149780273
training loss: 1.2567486763000488
training loss: 1.4463270902633667
training loss: 1.6140695810317993


training:   3%|▎         | 2609/100000 [2:23:37<84:36:51,  3.13s/it]

training loss: 1.5047872066497803
training loss: 1.393867015838623
training loss: 1.4787312746047974
training loss: 1.3676307201385498


training:   3%|▎         | 2613/100000 [2:23:50<84:32:37,  3.13s/it]

training loss: 1.4570969343185425
training loss: 1.4588861465454102
training loss: 1.3255369663238525
training loss: 1.5475828647613525


training:   3%|▎         | 2617/100000 [2:24:02<84:35:02,  3.13s/it]

training loss: 1.2184157371520996
training loss: 1.4218566417694092
training loss: 1.5677053928375244
training loss: 1.3731212615966797


training:   3%|▎         | 2621/100000 [2:24:15<84:30:52,  3.12s/it]

training loss: 1.4650828838348389
training loss: 1.4890633821487427
training loss: 1.3409979343414307
training loss: 1.5161097049713135


training:   3%|▎         | 2625/100000 [2:24:27<84:28:28,  3.12s/it]

training loss: 1.3669345378875732
training loss: 1.409530520439148
training loss: 1.5522774457931519
training loss: 1.3754253387451172


training:   3%|▎         | 2629/100000 [2:24:40<84:25:38,  3.12s/it]

training loss: 1.3984427452087402
training loss: 1.4072766304016113
training loss: 1.3658679723739624
training loss: 1.3760349750518799


training:   3%|▎         | 2633/100000 [2:24:52<84:24:15,  3.12s/it]

training loss: 1.3619420528411865
training loss: 1.4266631603240967
training loss: 1.5600051879882812
training loss: 1.5231823921203613


training:   3%|▎         | 2637/100000 [2:25:05<84:22:34,  3.12s/it]

training loss: 1.4881305694580078
training loss: 1.516085147857666
training loss: 1.4234209060668945
training loss: 1.5762828588485718


training:   3%|▎         | 2641/100000 [2:25:17<84:19:59,  3.12s/it]

training loss: 1.4254202842712402
training loss: 1.5937292575836182
training loss: 1.318239450454712
training loss: 1.667708158493042


training:   3%|▎         | 2645/100000 [2:25:29<84:17:12,  3.12s/it]

training loss: 1.2787476778030396
training loss: 1.4058350324630737
training loss: 1.277606725692749
training loss: 1.465437650680542


training:   3%|▎         | 2649/100000 [2:25:42<84:17:41,  3.12s/it]

training loss: 1.2953739166259766
training loss: 1.3084105253219604
training loss: 1.2931355237960815
training loss: 1.4929145574569702


training:   3%|▎         | 2653/100000 [2:25:54<84:18:30,  3.12s/it]

training loss: 1.3277649879455566
training loss: 1.5014243125915527
training loss: 1.4399242401123047
training loss: 1.3842785358428955


training:   3%|▎         | 2657/100000 [2:26:07<84:19:04,  3.12s/it]

training loss: 1.4877688884735107
training loss: 1.5663398504257202
training loss: 1.3968770503997803
training loss: 1.3343558311462402


training:   3%|▎         | 2661/100000 [2:26:19<84:18:53,  3.12s/it]

training loss: 1.3608876466751099
training loss: 1.6114096641540527
training loss: 1.4158141613006592
training loss: 1.157853126525879


training:   3%|▎         | 2665/100000 [2:26:32<84:17:42,  3.12s/it]

training loss: 1.5262436866760254
training loss: 1.5988638401031494
training loss: 1.2060449123382568
training loss: 1.5625150203704834


training:   3%|▎         | 2669/100000 [2:26:44<84:17:51,  3.12s/it]

training loss: 1.5197384357452393
training loss: 1.4776887893676758
training loss: 1.4602320194244385
training loss: 1.2499722242355347


training:   3%|▎         | 2673/100000 [2:26:57<84:16:11,  3.12s/it]

training loss: 1.2369738817214966
training loss: 1.3544400930404663
training loss: 1.3930010795593262
training loss: 1.4953715801239014


training:   3%|▎         | 2677/100000 [2:27:09<84:14:01,  3.12s/it]

training loss: 1.4979249238967896
training loss: 1.5479426383972168
training loss: 1.4119713306427002
training loss: 1.6464595794677734


training:   3%|▎         | 2681/100000 [2:27:22<84:15:03,  3.12s/it]

training loss: 1.466371774673462
training loss: 1.4995710849761963
training loss: 1.4526805877685547
training loss: 1.4544742107391357


training:   3%|▎         | 2685/100000 [2:27:34<84:17:36,  3.12s/it]

training loss: 1.4990160465240479
training loss: 1.3318238258361816
training loss: 1.3024287223815918
training loss: 1.2947211265563965


training:   3%|▎         | 2689/100000 [2:27:47<84:16:00,  3.12s/it]

training loss: 1.4436049461364746
training loss: 1.6012492179870605
training loss: 1.4048765897750854
training loss: 1.3892312049865723


training:   3%|▎         | 2693/100000 [2:27:59<84:14:14,  3.12s/it]

training loss: 1.2996418476104736
training loss: 1.2968945503234863
training loss: 1.4960880279541016
training loss: 1.3099501132965088


training:   3%|▎         | 2697/100000 [2:28:12<84:14:59,  3.12s/it]

training loss: 1.5752753019332886
training loss: 1.365861415863037
training loss: 1.3968768119812012
training loss: 1.3801488876342773
training loss: 1.3523794412612915


training:   3%|▎         | 2701/100000 [2:28:24<84:52:28,  3.14s/it]

validation loss: 1.424755334854126
training loss: 1.3307671546936035
training loss: 1.388561725616455
training loss: 1.383938193321228


training:   3%|▎         | 2705/100000 [2:28:37<84:39:35,  3.13s/it]

training loss: 1.4971797466278076
training loss: 1.5533658266067505
training loss: 1.3478102684020996
training loss: 1.5397790670394897


training:   3%|▎         | 2709/100000 [2:28:49<84:33:41,  3.13s/it]

training loss: 1.0577332973480225
training loss: 1.501156210899353
training loss: 1.5448440313339233
training loss: 1.402961015701294


training:   3%|▎         | 2713/100000 [2:29:02<84:28:33,  3.13s/it]

training loss: 1.54998779296875
training loss: 1.3074589967727661
training loss: 1.833523154258728
training loss: 1.630903959274292


training:   3%|▎         | 2717/100000 [2:29:14<84:27:48,  3.13s/it]

training loss: 1.4993618726730347
training loss: 1.369217038154602
training loss: 1.3394641876220703
training loss: 1.357918381690979


training:   3%|▎         | 2721/100000 [2:29:27<84:23:50,  3.12s/it]

training loss: 1.6211388111114502
training loss: 1.345493197441101
training loss: 1.2768090963363647
training loss: 1.5812206268310547


training:   3%|▎         | 2725/100000 [2:29:39<84:21:01,  3.12s/it]

training loss: 1.4258489608764648
training loss: 1.405710220336914
training loss: 1.2941218614578247
training loss: 1.3636820316314697


training:   3%|▎         | 2729/100000 [2:29:52<84:16:00,  3.12s/it]

training loss: 1.3630298376083374
training loss: 1.4069204330444336
training loss: 1.3013783693313599
training loss: 1.335253357887268


training:   3%|▎         | 2733/100000 [2:30:04<84:16:28,  3.12s/it]

training loss: 1.2658171653747559
training loss: 1.3494410514831543
training loss: 1.3371078968048096
training loss: 1.4009157419204712


training:   3%|▎         | 2737/100000 [2:30:17<84:13:21,  3.12s/it]

training loss: 1.4152641296386719
training loss: 1.265608787536621
training loss: 1.2344212532043457
training loss: 1.332861304283142


training:   3%|▎         | 2741/100000 [2:30:29<84:14:15,  3.12s/it]

training loss: 1.436249017715454
training loss: 1.3467456102371216
training loss: 1.5050804615020752
training loss: 1.3256696462631226


training:   3%|▎         | 2745/100000 [2:30:42<84:14:21,  3.12s/it]

training loss: 1.2067317962646484
training loss: 1.2938261032104492
training loss: 1.3950034379959106
training loss: 1.297791838645935


training:   3%|▎         | 2749/100000 [2:30:54<84:14:22,  3.12s/it]

training loss: 1.4787054061889648
training loss: 1.3593010902404785
training loss: 1.3789026737213135
training loss: 1.3866736888885498


training:   3%|▎         | 2753/100000 [2:31:06<84:16:10,  3.12s/it]

training loss: 1.34635591506958
training loss: 1.2620357275009155
training loss: 1.4187350273132324
training loss: 1.4800587892532349


training:   3%|▎         | 2757/100000 [2:31:19<84:18:16,  3.12s/it]

training loss: 1.3806675672531128
training loss: 1.5309622287750244
training loss: 1.5452523231506348
training loss: 1.3646056652069092


training:   3%|▎         | 2761/100000 [2:31:31<84:17:10,  3.12s/it]

training loss: 1.604111671447754
training loss: 1.5538910627365112
training loss: 1.3258261680603027
training loss: 1.345827341079712


training:   3%|▎         | 2765/100000 [2:31:44<84:16:07,  3.12s/it]

training loss: 1.3331941366195679
training loss: 1.4477161169052124
training loss: 1.294429063796997
training loss: 1.2381410598754883


training:   3%|▎         | 2769/100000 [2:31:56<84:15:05,  3.12s/it]

training loss: 1.491593360900879
training loss: 1.5452220439910889
training loss: 1.3270263671875
training loss: 1.214938759803772


training:   3%|▎         | 2773/100000 [2:32:09<84:14:16,  3.12s/it]

training loss: 1.3310644626617432
training loss: 1.4632972478866577
training loss: 1.6391253471374512
training loss: 1.5430799722671509


training:   3%|▎         | 2777/100000 [2:32:21<84:14:32,  3.12s/it]

training loss: 1.558870792388916
training loss: 1.3870482444763184
training loss: 1.4421603679656982
training loss: 1.5105093717575073


training:   3%|▎         | 2781/100000 [2:32:34<84:13:10,  3.12s/it]

training loss: 1.5361030101776123
training loss: 1.4900147914886475
training loss: 1.2461622953414917
training loss: 1.2837599515914917


training:   3%|▎         | 2785/100000 [2:32:46<84:12:37,  3.12s/it]

training loss: 1.391216516494751
training loss: 1.3618265390396118
training loss: 1.4609415531158447
training loss: 1.0697344541549683


training:   3%|▎         | 2789/100000 [2:32:59<84:12:21,  3.12s/it]

training loss: 1.3021297454833984
training loss: 1.508674144744873
training loss: 1.180515170097351
training loss: 1.4159893989562988


training:   3%|▎         | 2793/100000 [2:33:11<84:12:42,  3.12s/it]

training loss: 1.2781829833984375
training loss: 1.208658218383789
training loss: 1.3025544881820679
training loss: 1.4796772003173828


training:   3%|▎         | 2797/100000 [2:33:24<84:10:48,  3.12s/it]

training loss: 1.5330662727355957
training loss: 1.620090126991272
training loss: 1.5737478733062744
training loss: 1.619239330291748
training loss: 1.3984572887420654


training:   3%|▎         | 2801/100000 [2:33:36<84:47:28,  3.14s/it]

validation loss: 1.4332696199417114
training loss: 1.561920404434204
training loss: 1.3410773277282715
training loss: 1.1853660345077515


training:   3%|▎         | 2805/100000 [2:33:49<84:34:07,  3.13s/it]

training loss: 1.4478899240493774
training loss: 1.3108038902282715
training loss: 1.4209657907485962
training loss: 1.3366279602050781


training:   3%|▎         | 2809/100000 [2:34:01<84:27:11,  3.13s/it]

training loss: 1.3941527605056763
training loss: 1.2865784168243408
training loss: 1.273193597793579
training loss: 1.498755693435669


training:   3%|▎         | 2813/100000 [2:34:14<84:22:42,  3.13s/it]

training loss: 1.8509845733642578
training loss: 1.3155418634414673
training loss: 1.4820518493652344
training loss: 1.362167477607727


training:   3%|▎         | 2817/100000 [2:34:26<84:20:47,  3.12s/it]

training loss: 1.3507459163665771
training loss: 1.2927536964416504
training loss: 1.4595569372177124
training loss: 1.457502841949463


training:   3%|▎         | 2821/100000 [2:34:39<84:17:46,  3.12s/it]

training loss: 1.436623454093933
training loss: 1.3458514213562012
training loss: 1.3646318912506104
training loss: 1.2023440599441528


training:   3%|▎         | 2825/100000 [2:34:51<84:15:25,  3.12s/it]

training loss: 1.3879594802856445
training loss: 1.3792541027069092
training loss: 1.3658983707427979
training loss: 1.42543625831604


training:   3%|▎         | 2829/100000 [2:35:04<84:13:37,  3.12s/it]

training loss: 1.4231582880020142
training loss: 1.5293195247650146
training loss: 1.5421792268753052
training loss: 1.3223642110824585


training:   3%|▎         | 2833/100000 [2:35:16<84:13:13,  3.12s/it]

training loss: 1.2621955871582031
training loss: 1.530525803565979
training loss: 1.492180585861206
training loss: 1.4158265590667725


training:   3%|▎         | 2837/100000 [2:35:29<84:12:17,  3.12s/it]

training loss: 1.4874593019485474
training loss: 1.4078761339187622
training loss: 1.6295769214630127
training loss: 1.2099878787994385


training:   3%|▎         | 2841/100000 [2:35:41<84:08:03,  3.12s/it]

training loss: 1.3350917100906372
training loss: 1.280478596687317
training loss: 1.3289051055908203
training loss: 1.411942481994629


training:   3%|▎         | 2845/100000 [2:35:54<84:06:21,  3.12s/it]

training loss: 1.4189624786376953
training loss: 1.4101520776748657
training loss: 1.4877890348434448
training loss: 1.2946295738220215


training:   3%|▎         | 2849/100000 [2:36:06<84:07:02,  3.12s/it]

training loss: 1.4649031162261963
training loss: 1.4845033884048462
training loss: 1.4775376319885254
training loss: 1.2520899772644043


training:   3%|▎         | 2853/100000 [2:36:19<84:06:47,  3.12s/it]

training loss: 1.5678671598434448
training loss: 1.3795777559280396
training loss: 1.389098882675171
training loss: 1.4722448587417603


training:   3%|▎         | 2857/100000 [2:36:31<84:07:51,  3.12s/it]

training loss: 1.4376020431518555
training loss: 1.2846072912216187
training loss: 1.3404957056045532
training loss: 1.4016157388687134


training:   3%|▎         | 2861/100000 [2:36:44<84:07:03,  3.12s/it]

training loss: 1.5185394287109375
training loss: 1.4268510341644287
training loss: 1.134777545928955
training loss: 1.536170244216919


training:   3%|▎         | 2865/100000 [2:36:56<84:07:24,  3.12s/it]

training loss: 1.3951424360275269
training loss: 1.41449773311615
training loss: 1.3764535188674927
training loss: 1.479071855545044


training:   3%|▎         | 2869/100000 [2:37:08<84:05:00,  3.12s/it]

training loss: 1.4171395301818848
training loss: 1.3030714988708496
training loss: 1.359893798828125
training loss: 1.1230806112289429


training:   3%|▎         | 2873/100000 [2:37:21<84:06:42,  3.12s/it]

training loss: 1.4552497863769531
training loss: 1.3962053060531616
training loss: 1.644592523574829
training loss: 1.40768563747406


training:   3%|▎         | 2877/100000 [2:37:33<84:07:06,  3.12s/it]

training loss: 1.2125777006149292
training loss: 1.505483627319336
training loss: 1.4373036623001099
training loss: 1.4695521593093872


training:   3%|▎         | 2881/100000 [2:37:46<84:07:59,  3.12s/it]

training loss: 1.4882550239562988
training loss: 1.3116284608840942
training loss: 1.2985566854476929
training loss: 1.3775540590286255


training:   3%|▎         | 2885/100000 [2:37:58<84:08:20,  3.12s/it]

training loss: 1.4788074493408203
training loss: 1.3053257465362549
training loss: 1.3674527406692505
training loss: 1.4185967445373535


training:   3%|▎         | 2889/100000 [2:38:11<84:08:11,  3.12s/it]

training loss: 1.5117257833480835
training loss: 1.6510365009307861
training loss: 1.0997804403305054
training loss: 1.4070191383361816


training:   3%|▎         | 2893/100000 [2:38:23<84:07:25,  3.12s/it]

training loss: 1.3288719654083252
training loss: 1.5816538333892822
training loss: 1.4923604726791382
training loss: 1.3195120096206665


training:   3%|▎         | 2897/100000 [2:38:36<84:07:08,  3.12s/it]

training loss: 1.2060496807098389
training loss: 1.431972622871399
training loss: 1.5218784809112549
training loss: 1.4528895616531372
training loss: 1.284585952758789


training:   3%|▎         | 2901/100000 [2:38:49<84:42:01,  3.14s/it]

validation loss: 1.283149003982544
training loss: 1.2961456775665283
training loss: 1.422965407371521
training loss: 1.4486286640167236


training:   3%|▎         | 2905/100000 [2:39:01<84:28:18,  3.13s/it]

training loss: 1.2596654891967773
training loss: 1.4496006965637207
training loss: 1.4996519088745117
training loss: 1.3069969415664673


training:   3%|▎         | 2909/100000 [2:39:13<84:20:41,  3.13s/it]

training loss: 1.4019802808761597
training loss: 1.281009316444397
training loss: 1.536839246749878
training loss: 1.39132821559906


training:   3%|▎         | 2913/100000 [2:39:26<84:16:17,  3.12s/it]

training loss: 1.4604086875915527
training loss: 1.1678740978240967
training loss: 1.4600510597229004
training loss: 1.460275411605835


training:   3%|▎         | 2917/100000 [2:39:38<84:11:05,  3.12s/it]

training loss: 1.3650224208831787
training loss: 1.3307037353515625
training loss: 1.1743075847625732
training loss: 1.2977306842803955


training:   3%|▎         | 2921/100000 [2:39:51<84:09:28,  3.12s/it]

training loss: 1.492018461227417
training loss: 1.502274990081787
training loss: 1.2844107151031494
training loss: 1.5843968391418457


training:   3%|▎         | 2925/100000 [2:40:03<84:09:35,  3.12s/it]

training loss: 1.3906512260437012
training loss: 1.335958480834961
training loss: 1.371538758277893
training loss: 1.469998836517334


training:   3%|▎         | 2929/100000 [2:40:16<84:06:09,  3.12s/it]

training loss: 1.4209482669830322
training loss: 1.4782086610794067
training loss: 1.3528554439544678
training loss: 1.2481718063354492


training:   3%|▎         | 2933/100000 [2:40:28<84:03:59,  3.12s/it]

training loss: 1.2780271768569946
training loss: 1.3860466480255127
training loss: 1.2328444719314575
training loss: 1.4136946201324463


training:   3%|▎         | 2937/100000 [2:40:41<84:01:46,  3.12s/it]

training loss: 1.4573614597320557
training loss: 1.3571527004241943
training loss: 1.4243261814117432
training loss: 1.2984569072723389


training:   3%|▎         | 2941/100000 [2:40:53<83:59:13,  3.12s/it]

training loss: 1.430830955505371
training loss: 1.5358222723007202
training loss: 1.3772377967834473
training loss: 1.4747717380523682


training:   3%|▎         | 2945/100000 [2:41:06<83:54:34,  3.11s/it]

training loss: 1.4074769020080566
training loss: 1.6200025081634521
training loss: 1.3206162452697754
training loss: 1.4393057823181152


training:   3%|▎         | 2949/100000 [2:41:18<83:57:07,  3.11s/it]

training loss: 1.458878517150879
training loss: 1.465567946434021
training loss: 1.4044649600982666
training loss: 1.3557881116867065


training:   3%|▎         | 2953/100000 [2:41:31<83:58:04,  3.11s/it]

training loss: 1.423638105392456
training loss: 1.437958002090454
training loss: 1.4166008234024048
training loss: 1.2878038883209229


training:   3%|▎         | 2957/100000 [2:41:43<83:57:20,  3.11s/it]

training loss: 1.446205735206604
training loss: 1.4874584674835205
training loss: 1.4127163887023926
training loss: 1.4256761074066162


training:   3%|▎         | 2961/100000 [2:41:55<83:59:31,  3.12s/it]

training loss: 1.3395183086395264
training loss: 1.389609932899475
training loss: 1.5024964809417725
training loss: 1.1434087753295898


training:   3%|▎         | 2965/100000 [2:42:08<84:00:58,  3.12s/it]

training loss: 1.5079014301300049
training loss: 1.2799885272979736
training loss: 1.389542818069458
training loss: 1.4915292263031006


training:   3%|▎         | 2969/100000 [2:42:20<84:00:25,  3.12s/it]

training loss: 1.465821623802185
training loss: 1.4658714532852173
training loss: 1.3989030122756958
training loss: 1.3736810684204102


training:   3%|▎         | 2973/100000 [2:42:33<84:01:33,  3.12s/it]

training loss: 1.4144375324249268
training loss: 1.346236228942871
training loss: 1.3874809741973877
training loss: 1.3216173648834229


training:   3%|▎         | 2977/100000 [2:42:45<84:01:39,  3.12s/it]

training loss: 1.488187551498413
training loss: 1.2897357940673828
training loss: 1.4024670124053955
training loss: 1.4875304698944092


training:   3%|▎         | 2981/100000 [2:42:58<84:00:01,  3.12s/it]

training loss: 1.4320619106292725
training loss: 1.1146225929260254
training loss: 1.4030040502548218
training loss: 1.4967330694198608


training:   3%|▎         | 2985/100000 [2:43:10<83:59:42,  3.12s/it]

training loss: 1.4961645603179932
training loss: 1.3413350582122803
training loss: 1.4233050346374512
training loss: 1.3121192455291748


training:   3%|▎         | 2989/100000 [2:43:23<83:57:30,  3.12s/it]

training loss: 1.349351406097412
training loss: 1.393613338470459
training loss: 1.4019232988357544
training loss: 1.3971350193023682


training:   3%|▎         | 2993/100000 [2:43:35<83:57:11,  3.12s/it]

training loss: 1.441941738128662
training loss: 1.3330931663513184
training loss: 1.4369652271270752
training loss: 1.0875318050384521


training:   3%|▎         | 2997/100000 [2:43:48<83:55:41,  3.11s/it]

training loss: 1.3908336162567139
training loss: 1.4163694381713867
training loss: 1.2220828533172607
training loss: 1.4323163032531738


training:   3%|▎         | 2997/100000 [2:44:00<83:55:41,  3.11s/it]

training loss: 1.6593334674835205
validation loss: 1.4986233711242676
%s 

 %s ('=== &lt;br&gt;\'\'total:\'\' 33 &lt;br&gt;\'\'over 3,047 m:\'\' 3 &lt;br&gt;\'\'2,438 to 3,047 m:\'\' 6 &lt;br&gt;\'\'1,524 to 2,437 m:\'\' 2 &lt;br&gt;\'\'914 to 1,523 m:\'\' 10 &lt;br&gt;\'\'under 914 m:\'\' 12 (1999 est.)  === Heliports === 5 (1999 est.)  == See also ==  * [[Iraq]] * [[Iraqi Republic Railways]]   [[Category:Transportation in Iraq| ]] [[ar:Ø§Ù\x84Ù\x85Ù\x88Ø§ØµÙ\x84Ø§Øª Ù\x81Ù\x8a Ø§Ù\x84Ø¹Ø±Ø§Ù\x82]]</text>     </revision>   </page>   <page>     <title>Military of Iraq</title>     <id>14671</id>     <revision>       <id>42112503</id>       <timestamp>2006-03-03T22:25:04Z</timestamp>       <contributor>         <ip>24.8.152.73</ip>       </contributor>       <comment>/* [[New Iraqi Army]] */</comment>       <text xml:space="preserve">[[Image:IraqiArmy.jpg|thumb|right|300px|Iraqi soldiers from the 2nd Iraqi Army Brigade, train on cordon and search procedures at [[Diyala]] Regional Trainin

training:   3%|▎         | 3001/100000 [2:45:19<243:50:07,  9.05s/it]

Model saved at iteration 3000
training loss: 1.4554994106292725
training loss: 1.4052866697311401
training loss: 1.122718334197998


training:   3%|▎         | 3005/100000 [2:45:32<195:46:35,  7.27s/it]

training loss: 1.5716217756271362
training loss: 1.3591229915618896
training loss: 1.4006425142288208
training loss: 1.5371203422546387


training:   3%|▎         | 3009/100000 [2:45:44<162:13:41,  6.02s/it]

training loss: 1.3781540393829346
training loss: 1.2753281593322754
training loss: 1.4283726215362549
training loss: 1.4634215831756592


training:   3%|▎         | 3013/100000 [2:45:57<138:45:21,  5.15s/it]

training loss: 1.481353759765625
training loss: 1.3806431293487549
training loss: 1.4570035934448242
training loss: 1.4614187479019165


training:   3%|▎         | 3017/100000 [2:46:09<122:20:01,  4.54s/it]

training loss: 1.2994861602783203
training loss: 1.249529242515564
training loss: 1.3573371171951294
training loss: 1.575542688369751


training:   3%|▎         | 3021/100000 [2:46:22<110:49:30,  4.11s/it]

training loss: 1.6379567384719849
training loss: 1.3974394798278809
training loss: 1.2624170780181885
training loss: 1.5157331228256226


training:   3%|▎         | 3025/100000 [2:46:34<102:46:53,  3.82s/it]

training loss: 1.2637269496917725
training loss: 1.29746675491333
training loss: 1.3123178482055664
training loss: 1.2888164520263672


training:   3%|▎         | 3029/100000 [2:46:47<97:07:22,  3.61s/it] 

training loss: 1.2757422924041748
training loss: 1.6197268962860107
training loss: 1.3227155208587646
training loss: 1.6543653011322021


training:   3%|▎         | 3033/100000 [2:46:59<93:11:19,  3.46s/it]

training loss: 1.4837548732757568
training loss: 1.315237045288086
training loss: 1.0531659126281738
training loss: 1.1582958698272705


training:   3%|▎         | 3037/100000 [2:47:11<90:26:11,  3.36s/it]

training loss: 1.4495794773101807
training loss: 1.1479381322860718
training loss: 1.6177037954330444
training loss: 1.6070142984390259


training:   3%|▎         | 3041/100000 [2:47:24<88:27:41,  3.28s/it]

training loss: 1.4190924167633057
training loss: 1.4483683109283447
training loss: 1.3439699411392212
training loss: 1.2847979068756104


training:   3%|▎         | 3045/100000 [2:47:36<87:06:36,  3.23s/it]

training loss: 1.3707709312438965
training loss: 1.3993641138076782
training loss: 1.3418748378753662
training loss: 1.3801840543746948


training:   3%|▎         | 3049/100000 [2:47:49<86:10:55,  3.20s/it]

training loss: 1.1681797504425049
training loss: 1.4473072290420532
training loss: 1.2609670162200928
training loss: 1.3773993253707886


training:   3%|▎         | 3053/100000 [2:48:01<85:32:38,  3.18s/it]

training loss: 1.4727741479873657
training loss: 1.301637887954712
training loss: 1.670544147491455
training loss: 1.0357964038848877


training:   3%|▎         | 3057/100000 [2:48:14<85:02:25,  3.16s/it]

training loss: 1.3897602558135986
training loss: 1.3985847234725952
training loss: 1.3763196468353271
training loss: 1.4792879819869995


training:   3%|▎         | 3061/100000 [2:48:26<84:42:19,  3.15s/it]

training loss: 1.3561772108078003
training loss: 1.349947452545166
training loss: 1.3007655143737793
training loss: 1.2979437112808228


training:   3%|▎         | 3065/100000 [2:48:39<84:29:16,  3.14s/it]

training loss: 1.538641333580017
training loss: 1.4086276292800903
training loss: 1.4484076499938965
training loss: 1.3163177967071533


training:   3%|▎         | 3069/100000 [2:48:51<84:19:50,  3.13s/it]

training loss: 1.3601012229919434
training loss: 1.3539413213729858
training loss: 1.4045519828796387
training loss: 1.5162158012390137


training:   3%|▎         | 3073/100000 [2:49:04<84:14:44,  3.13s/it]

training loss: 1.6086854934692383
training loss: 1.4901007413864136
training loss: 1.3175885677337646
training loss: 1.404087781906128


training:   3%|▎         | 3077/100000 [2:49:16<84:09:31,  3.13s/it]

training loss: 1.417148232460022
training loss: 1.3348753452301025
training loss: 1.3476600646972656
training loss: 1.3210124969482422


training:   3%|▎         | 3081/100000 [2:49:29<84:05:51,  3.12s/it]

training loss: 1.4835306406021118
training loss: 1.3288264274597168
training loss: 1.5688767433166504
training loss: 1.2852188348770142


training:   3%|▎         | 3085/100000 [2:49:41<84:00:57,  3.12s/it]

training loss: 1.3474136590957642
training loss: 1.3636503219604492
training loss: 1.347928524017334
training loss: 1.6187350749969482


training:   3%|▎         | 3089/100000 [2:49:54<83:57:14,  3.12s/it]

training loss: 1.4091384410858154
training loss: 1.4553577899932861
training loss: 1.3581171035766602
training loss: 1.4040580987930298


training:   3%|▎         | 3093/100000 [2:50:06<83:58:17,  3.12s/it]

training loss: 1.3889189958572388
training loss: 1.4089324474334717
training loss: 1.3589855432510376
training loss: 1.3933528661727905


training:   3%|▎         | 3097/100000 [2:50:19<83:58:50,  3.12s/it]

training loss: 1.4585434198379517
training loss: 1.4429199695587158
training loss: 1.540797233581543
training loss: 1.325451374053955


training:   3%|▎         | 3097/100000 [2:50:30<83:58:50,  3.12s/it]

training loss: 1.3538347482681274


training:   3%|▎         | 3101/100000 [2:50:31<84:34:50,  3.14s/it]

validation loss: 1.3603541851043701
training loss: 1.3638346195220947
training loss: 1.366158366203308
training loss: 1.3661154508590698


training:   3%|▎         | 3105/100000 [2:50:44<84:20:35,  3.13s/it]

training loss: 1.2655601501464844
training loss: 1.4821288585662842
training loss: 1.4404380321502686
training loss: 1.3893685340881348


training:   3%|▎         | 3109/100000 [2:50:56<84:13:05,  3.13s/it]

training loss: 1.2499456405639648
training loss: 1.4008541107177734
training loss: 1.2843923568725586
training loss: 1.4128069877624512


training:   3%|▎         | 3113/100000 [2:51:09<84:05:34,  3.12s/it]

training loss: 1.3370342254638672
training loss: 1.226932168006897
training loss: 1.2682572603225708
training loss: 1.4357073307037354


training:   3%|▎         | 3117/100000 [2:51:21<84:03:23,  3.12s/it]

training loss: 1.2608869075775146
training loss: 1.2262096405029297
training loss: 1.5625207424163818
training loss: 1.3358666896820068


training:   3%|▎         | 3121/100000 [2:51:34<84:01:02,  3.12s/it]

training loss: 1.3361485004425049
training loss: 1.4522573947906494
training loss: 1.3210766315460205
training loss: 1.2781904935836792


training:   3%|▎         | 3125/100000 [2:51:46<83:59:31,  3.12s/it]

training loss: 1.3823145627975464
training loss: 1.2592267990112305
training loss: 1.6002072095870972
training loss: 1.2840290069580078


training:   3%|▎         | 3129/100000 [2:51:59<83:57:31,  3.12s/it]

training loss: 1.487682580947876
training loss: 1.3996559381484985
training loss: 1.4668320417404175
training loss: 1.4637224674224854


training:   3%|▎         | 3133/100000 [2:52:11<83:54:23,  3.12s/it]

training loss: 1.3467798233032227
training loss: 1.3823211193084717
training loss: 1.5139696598052979
training loss: 1.3726240396499634


training:   3%|▎         | 3137/100000 [2:52:24<83:55:16,  3.12s/it]

training loss: 1.3290448188781738
training loss: 1.388471007347107
training loss: 1.3188657760620117
training loss: 1.42063570022583


training:   3%|▎         | 3141/100000 [2:52:36<83:55:39,  3.12s/it]

training loss: 1.3611539602279663
training loss: 1.3420783281326294
training loss: 1.3821654319763184
training loss: 1.369633436203003


training:   3%|▎         | 3145/100000 [2:52:49<83:54:29,  3.12s/it]

training loss: 1.6481879949569702
training loss: 1.5372151136398315
training loss: 1.3454747200012207
training loss: 1.3548039197921753


training:   3%|▎         | 3149/100000 [2:53:01<83:53:28,  3.12s/it]

training loss: 1.466159701347351
training loss: 1.4087820053100586
training loss: 1.5810468196868896
training loss: 1.1685428619384766


training:   3%|▎         | 3153/100000 [2:53:13<83:52:00,  3.12s/it]

training loss: 1.3409831523895264
training loss: 1.4326889514923096
training loss: 1.3599885702133179
training loss: 1.1310889720916748


training:   3%|▎         | 3157/100000 [2:53:26<83:55:03,  3.12s/it]

training loss: 1.419146180152893
training loss: 1.2166775465011597
training loss: 1.5295937061309814
training loss: 1.463367223739624


training:   3%|▎         | 3161/100000 [2:53:38<83:55:53,  3.12s/it]

training loss: 1.5298219919204712
training loss: 1.4647216796875
training loss: 1.3857868909835815
training loss: 1.3702794313430786


training:   3%|▎         | 3165/100000 [2:53:51<83:54:51,  3.12s/it]

training loss: 1.6109200716018677
training loss: 1.537506103515625
training loss: 1.4467310905456543
training loss: 1.3588720560073853


training:   3%|▎         | 3169/100000 [2:54:03<83:52:36,  3.12s/it]

training loss: 1.2323412895202637
training loss: 1.6107981204986572
training loss: 1.3100123405456543
training loss: 1.2830724716186523


training:   3%|▎         | 3173/100000 [2:54:16<83:52:02,  3.12s/it]

training loss: 1.3879003524780273
training loss: 1.3415544033050537
training loss: 1.4525833129882812
training loss: 1.5883914232254028


training:   3%|▎         | 3177/100000 [2:54:28<83:51:56,  3.12s/it]

training loss: 1.4798049926757812
training loss: 1.3111193180084229
training loss: 1.3395228385925293
training loss: 1.4081754684448242


training:   3%|▎         | 3181/100000 [2:54:41<83:52:45,  3.12s/it]

training loss: 1.3163998126983643
training loss: 1.3179306983947754
training loss: 1.6463063955307007
training loss: 1.3686777353286743


training:   3%|▎         | 3185/100000 [2:54:53<83:50:55,  3.12s/it]

training loss: 1.422831654548645
training loss: 1.3454501628875732
training loss: 1.426546573638916
training loss: 1.235316276550293


training:   3%|▎         | 3189/100000 [2:55:06<83:50:57,  3.12s/it]

training loss: 1.2548569440841675
training loss: 1.4036858081817627
training loss: 1.419042706489563
training loss: 1.3743035793304443


training:   3%|▎         | 3193/100000 [2:55:18<83:51:00,  3.12s/it]

training loss: 1.326046347618103
training loss: 1.2837915420532227
training loss: 1.4871277809143066
training loss: 1.2562530040740967


training:   3%|▎         | 3197/100000 [2:55:31<83:50:50,  3.12s/it]

training loss: 1.282997965812683
training loss: 1.3765939474105835
training loss: 1.4187294244766235
training loss: 1.352588415145874
training loss: 1.3160295486450195


training:   3%|▎         | 3201/100000 [2:55:43<84:28:10,  3.14s/it]

validation loss: 1.5903056859970093
training loss: 1.5307435989379883
training loss: 1.303745985031128
training loss: 1.5001565217971802


training:   3%|▎         | 3205/100000 [2:55:56<84:16:06,  3.13s/it]

training loss: 1.3795151710510254
training loss: 1.3148267269134521
training loss: 1.5117714405059814
training loss: 1.4452251195907593


training:   3%|▎         | 3209/100000 [2:56:08<84:08:34,  3.13s/it]

training loss: 1.373157262802124
training loss: 1.4745509624481201
training loss: 1.39605712890625
training loss: 1.3232611417770386


training:   3%|▎         | 3213/100000 [2:56:21<84:00:37,  3.12s/it]

training loss: 1.4033911228179932
training loss: 1.6263270378112793
training loss: 1.425794005393982
training loss: 1.4174777269363403


training:   3%|▎         | 3217/100000 [2:56:33<83:56:49,  3.12s/it]

training loss: 1.2511162757873535
training loss: 1.2011314630508423
training loss: 1.2910773754119873
training loss: 1.4768874645233154


training:   3%|▎         | 3221/100000 [2:56:46<83:55:34,  3.12s/it]

training loss: 1.45250403881073
training loss: 1.393017292022705
training loss: 1.1985784769058228
training loss: 1.2284022569656372


training:   3%|▎         | 3225/100000 [2:56:58<83:54:47,  3.12s/it]

training loss: 1.2604529857635498
training loss: 1.391170620918274
training loss: 1.3389244079589844
training loss: 1.3702789545059204


training:   3%|▎         | 3229/100000 [2:57:11<83:51:38,  3.12s/it]

training loss: 1.6993262767791748
training loss: 1.2510911226272583
training loss: 1.364832878112793
training loss: 1.3880159854888916


training:   3%|▎         | 3233/100000 [2:57:23<83:50:25,  3.12s/it]

training loss: 1.4092931747436523
training loss: 1.4035732746124268
training loss: 1.4047482013702393
training loss: 1.0400598049163818


training:   3%|▎         | 3237/100000 [2:57:36<83:49:57,  3.12s/it]

training loss: 1.4430346488952637
training loss: 1.4261530637741089
training loss: 1.432910442352295
training loss: 1.3977326154708862


training:   3%|▎         | 3241/100000 [2:57:48<83:49:45,  3.12s/it]

training loss: 1.4564268589019775
training loss: 1.3697627782821655
training loss: 1.4273422956466675
training loss: 1.5621552467346191


training:   3%|▎         | 3245/100000 [2:58:01<83:49:56,  3.12s/it]

training loss: 1.2922029495239258
training loss: 1.5172854661941528
training loss: 1.4643471240997314
training loss: 1.0870097875595093


training:   3%|▎         | 3249/100000 [2:58:13<83:45:52,  3.12s/it]

training loss: 1.5651395320892334
training loss: 1.2402238845825195
training loss: 1.2542301416397095
training loss: 1.302571177482605


training:   3%|▎         | 3253/100000 [2:58:26<83:47:31,  3.12s/it]

training loss: 1.3538739681243896
training loss: 1.4128022193908691
training loss: 1.2969582080841064
training loss: 1.3597548007965088


training:   3%|▎         | 3257/100000 [2:58:38<83:45:03,  3.12s/it]

training loss: 1.2893340587615967
training loss: 1.3496209383010864
training loss: 1.4716081619262695
training loss: 1.3357372283935547


training:   3%|▎         | 3261/100000 [2:58:50<83:45:13,  3.12s/it]

training loss: 1.550260066986084
training loss: 1.2887341976165771
training loss: 1.4340118169784546
training loss: 1.1582684516906738


training:   3%|▎         | 3265/100000 [2:59:03<83:44:16,  3.12s/it]

training loss: 1.4693419933319092
training loss: 1.342124342918396
training loss: 1.4145981073379517
training loss: 1.3770264387130737


training:   3%|▎         | 3269/100000 [2:59:15<83:44:13,  3.12s/it]

training loss: 1.3674654960632324
training loss: 1.4383900165557861
training loss: 1.4807488918304443
training loss: 1.2604666948318481


training:   3%|▎         | 3273/100000 [2:59:28<83:44:32,  3.12s/it]

training loss: 1.2691830396652222
training loss: 1.4555498361587524
training loss: 1.422215461730957
training loss: 1.2728142738342285


training:   3%|▎         | 3277/100000 [2:59:40<83:44:34,  3.12s/it]

training loss: 1.4153423309326172
training loss: 1.398031234741211
training loss: 1.231618881225586
training loss: 1.3277852535247803


training:   3%|▎         | 3281/100000 [2:59:53<83:45:24,  3.12s/it]

training loss: 1.4378273487091064
training loss: 1.5094082355499268
training loss: 1.5856646299362183
training loss: 1.5235629081726074


training:   3%|▎         | 3285/100000 [3:00:05<83:45:53,  3.12s/it]

training loss: 1.3876245021820068
training loss: 1.3803858757019043
training loss: 1.2556862831115723
training loss: 1.426277995109558


training:   3%|▎         | 3289/100000 [3:00:18<83:47:13,  3.12s/it]

training loss: 1.3766956329345703
training loss: 1.329380989074707
training loss: 1.413191318511963
training loss: 1.3181877136230469


training:   3%|▎         | 3293/100000 [3:00:30<83:49:33,  3.12s/it]

training loss: 1.1798136234283447
training loss: 1.377522349357605
training loss: 1.3750449419021606
training loss: 1.2436778545379639


training:   3%|▎         | 3297/100000 [3:00:43<83:48:48,  3.12s/it]

training loss: 1.2792446613311768
training loss: 1.3806049823760986
training loss: 1.3149750232696533
training loss: 1.4569028615951538
training loss: 1.3236351013183594


training:   3%|▎         | 3301/100000 [3:00:56<84:22:15,  3.14s/it]

validation loss: 1.4575629234313965
training loss: 1.4797327518463135
training loss: 1.3357423543930054
training loss: 1.3884400129318237


training:   3%|▎         | 3305/100000 [3:01:08<84:08:46,  3.13s/it]

training loss: 1.2898015975952148
training loss: 1.218457818031311
training loss: 1.331395149230957
training loss: 1.3346242904663086


training:   3%|▎         | 3309/100000 [3:01:20<84:00:02,  3.13s/it]

training loss: 1.4158382415771484
training loss: 1.5464929342269897
training loss: 1.0894050598144531
training loss: 1.3178560733795166


training:   3%|▎         | 3313/100000 [3:01:33<83:56:51,  3.13s/it]

training loss: 1.3062244653701782
training loss: 1.4131886959075928
training loss: 1.4367865324020386
training loss: 1.326913833618164


training:   3%|▎         | 3317/100000 [3:01:45<83:52:34,  3.12s/it]

training loss: 1.3905832767486572
training loss: 1.552729845046997
training loss: 1.3283157348632812
training loss: 1.3141993284225464


training:   3%|▎         | 3321/100000 [3:01:58<83:50:01,  3.12s/it]

training loss: 1.47722589969635
training loss: 1.3326940536499023
training loss: 1.4107558727264404
training loss: 1.1944702863693237


training:   3%|▎         | 3325/100000 [3:02:10<83:48:25,  3.12s/it]

training loss: 1.3079828023910522
training loss: 1.3427231311798096
training loss: 1.4410572052001953
training loss: 1.326568841934204


training:   3%|▎         | 3329/100000 [3:02:23<83:47:18,  3.12s/it]

training loss: 1.4712069034576416
training loss: 1.31795334815979
training loss: 1.3754370212554932
training loss: 1.4451675415039062


training:   3%|▎         | 3333/100000 [3:02:35<83:45:24,  3.12s/it]

training loss: 1.3391631841659546
training loss: 1.2296699285507202
training loss: 1.4083808660507202
training loss: 1.3269670009613037


training:   3%|▎         | 3337/100000 [3:02:48<83:45:47,  3.12s/it]

training loss: 1.0282005071640015
training loss: 1.3050196170806885
training loss: 1.2337651252746582
training loss: 1.36527681350708


training:   3%|▎         | 3341/100000 [3:03:00<83:44:54,  3.12s/it]

training loss: 1.3298789262771606
training loss: 1.0442121028900146
training loss: 1.3689790964126587
training loss: 1.2127814292907715


training:   3%|▎         | 3345/100000 [3:03:13<83:44:07,  3.12s/it]

training loss: 1.2260655164718628
training loss: 1.2895764112472534
training loss: 1.4684453010559082
training loss: 1.249403715133667


training:   3%|▎         | 3349/100000 [3:03:25<83:40:59,  3.12s/it]

training loss: 1.344199299812317
training loss: 1.453040599822998
training loss: 1.1174721717834473
training loss: 1.6261401176452637


training:   3%|▎         | 3353/100000 [3:03:38<83:43:09,  3.12s/it]

training loss: 1.3015005588531494
training loss: 1.3794680833816528
training loss: 1.4357781410217285
training loss: 1.4223387241363525


training:   3%|▎         | 3357/100000 [3:03:50<83:43:51,  3.12s/it]

training loss: 1.419710397720337
training loss: 1.2794203758239746
training loss: 1.3517122268676758
training loss: 1.119319200515747


training:   3%|▎         | 3361/100000 [3:04:03<83:43:05,  3.12s/it]

training loss: 1.435766577720642
training loss: 1.3122775554656982
training loss: 1.477042317390442
training loss: 1.2603225708007812


training:   3%|▎         | 3365/100000 [3:04:15<83:41:36,  3.12s/it]

training loss: 1.2065324783325195
training loss: 1.340451955795288
training loss: 1.3241558074951172
training loss: 1.4882316589355469


training:   3%|▎         | 3369/100000 [3:04:28<83:42:01,  3.12s/it]

training loss: 1.3636672496795654
training loss: 1.2536643743515015
training loss: 1.4820053577423096
training loss: 1.2942485809326172


training:   3%|▎         | 3373/100000 [3:04:40<83:42:09,  3.12s/it]

training loss: 1.3146491050720215
training loss: 1.2641704082489014
training loss: 1.4385737180709839
training loss: 1.1741547584533691


training:   3%|▎         | 3377/100000 [3:04:53<83:46:09,  3.12s/it]

training loss: 1.3659595251083374
training loss: 1.5137531757354736
training loss: 1.3198963403701782
training loss: 1.3877301216125488


training:   3%|▎         | 3381/100000 [3:05:05<83:44:36,  3.12s/it]

training loss: 1.3281704187393188
training loss: 1.408165454864502
training loss: 1.1856017112731934
training loss: 1.3407050371170044


training:   3%|▎         | 3385/100000 [3:05:17<83:43:33,  3.12s/it]

training loss: 1.4469561576843262
training loss: 1.4003496170043945
training loss: 1.5016281604766846
training loss: 1.0963881015777588


training:   3%|▎         | 3389/100000 [3:05:30<83:42:30,  3.12s/it]

training loss: 1.1864901781082153
training loss: 1.2871638536453247
training loss: 1.412126064300537
training loss: 1.4801305532455444


training:   3%|▎         | 3393/100000 [3:05:42<83:42:28,  3.12s/it]

training loss: 1.3969125747680664
training loss: 1.5306732654571533
training loss: 1.1999154090881348
training loss: 1.4836366176605225


training:   3%|▎         | 3397/100000 [3:05:55<83:43:43,  3.12s/it]

training loss: 1.3903961181640625
training loss: 1.428815484046936
training loss: 1.5434596538543701
training loss: 1.3299791812896729
training loss: 1.2681812047958374


training:   3%|▎         | 3401/100000 [3:06:08<84:19:44,  3.14s/it]

validation loss: 1.4765031337738037
training loss: 1.4622739553451538
training loss: 1.0490381717681885
training loss: 1.4601867198944092


training:   3%|▎         | 3405/100000 [3:06:20<84:05:08,  3.13s/it]

training loss: 1.3300045728683472
training loss: 1.5347471237182617
training loss: 1.407219409942627
training loss: 1.3867106437683105


training:   3%|▎         | 3409/100000 [3:06:33<83:57:49,  3.13s/it]

training loss: 1.2490031719207764
training loss: 1.3489527702331543
training loss: 1.4776601791381836
training loss: 1.2679556608200073


training:   3%|▎         | 3413/100000 [3:06:45<83:52:32,  3.13s/it]

training loss: 1.402458906173706
training loss: 1.4670522212982178
training loss: 1.5032579898834229
training loss: 1.4547629356384277


training:   3%|▎         | 3417/100000 [3:06:58<83:46:28,  3.12s/it]

training loss: 1.4626835584640503
training loss: 1.2510464191436768
training loss: 1.2612392902374268
training loss: 1.4018113613128662


training:   3%|▎         | 3421/100000 [3:07:10<83:45:36,  3.12s/it]

training loss: 1.2370171546936035
training loss: 1.395629644393921
training loss: 1.5114399194717407
training loss: 1.2809325456619263


training:   3%|▎         | 3425/100000 [3:07:23<83:44:12,  3.12s/it]

training loss: 1.1180875301361084
training loss: 1.3014776706695557
training loss: 1.337331771850586
training loss: 1.3647372722625732


training:   3%|▎         | 3429/100000 [3:07:35<83:42:19,  3.12s/it]

training loss: 1.4361693859100342
training loss: 1.4242141246795654
training loss: 1.3308875560760498
training loss: 1.3110480308532715


training:   3%|▎         | 3433/100000 [3:07:47<83:41:05,  3.12s/it]

training loss: 1.6699309349060059
training loss: 1.4402339458465576
training loss: 1.4006454944610596
training loss: 1.3397759199142456


training:   3%|▎         | 3437/100000 [3:08:00<83:40:59,  3.12s/it]

training loss: 1.267287254333496
training loss: 1.3090896606445312
training loss: 1.420957326889038
training loss: 1.4106671810150146


training:   3%|▎         | 3441/100000 [3:08:12<83:39:41,  3.12s/it]

training loss: 1.2574702501296997
training loss: 1.5580382347106934
training loss: 1.2744015455245972
training loss: 1.2576714754104614


training:   3%|▎         | 3445/100000 [3:08:25<83:41:30,  3.12s/it]

training loss: 1.3752707242965698
training loss: 1.2582993507385254
training loss: 1.3887999057769775
training loss: 1.426339864730835


training:   3%|▎         | 3449/100000 [3:08:37<83:39:54,  3.12s/it]

training loss: 1.3336783647537231
training loss: 1.4324452877044678
training loss: 1.4144668579101562
training loss: 1.3578777313232422


training:   3%|▎         | 3453/100000 [3:08:50<83:40:02,  3.12s/it]

training loss: 1.2130308151245117
training loss: 1.1183066368103027
training loss: 1.307820200920105
training loss: 1.3803043365478516


training:   3%|▎         | 3457/100000 [3:09:02<83:39:47,  3.12s/it]

training loss: 1.4725428819656372
training loss: 1.3948732614517212
training loss: 1.2567758560180664
training loss: 1.3137524127960205


training:   3%|▎         | 3461/100000 [3:09:15<83:39:53,  3.12s/it]

training loss: 1.2799830436706543
training loss: 1.2978248596191406
training loss: 1.2023098468780518
training loss: 1.1730120182037354


training:   3%|▎         | 3465/100000 [3:09:27<83:40:49,  3.12s/it]

training loss: 1.1749399900436401
training loss: 1.3762989044189453
training loss: 1.1925101280212402
training loss: 1.320162296295166


training:   3%|▎         | 3469/100000 [3:09:40<83:38:38,  3.12s/it]

training loss: 1.4408293962478638
training loss: 1.2075175046920776
training loss: 1.409881353378296
training loss: 1.4759280681610107


training:   3%|▎         | 3473/100000 [3:09:52<83:36:05,  3.12s/it]

training loss: 1.2647565603256226
training loss: 1.1736217737197876
training loss: 1.2360690832138062
training loss: 1.3110077381134033


training:   3%|▎         | 3477/100000 [3:10:05<83:35:31,  3.12s/it]

training loss: 1.5219511985778809
training loss: 1.3783173561096191
training loss: 1.3139894008636475
training loss: 1.424931526184082


training:   3%|▎         | 3481/100000 [3:10:17<83:36:05,  3.12s/it]

training loss: 1.4528683423995972
training loss: 1.2572649717330933
training loss: 1.3563685417175293
training loss: 1.3264176845550537


training:   3%|▎         | 3485/100000 [3:10:30<83:38:11,  3.12s/it]

training loss: 1.2120119333267212
training loss: 1.4828354120254517
training loss: 1.2430437803268433
training loss: 1.470017910003662


training:   3%|▎         | 3489/100000 [3:10:42<83:39:21,  3.12s/it]

training loss: 1.3645360469818115
training loss: 1.531367301940918
training loss: 1.2776095867156982
training loss: 1.1424400806427002


training:   3%|▎         | 3493/100000 [3:10:55<83:38:14,  3.12s/it]

training loss: 1.2470812797546387
training loss: 1.3960039615631104
training loss: 1.4249809980392456
training loss: 1.318273663520813


training:   3%|▎         | 3497/100000 [3:11:07<83:40:04,  3.12s/it]

training loss: 1.3147547245025635
training loss: 1.0287882089614868
training loss: 1.4217948913574219
training loss: 1.405742883682251
training loss: 1.291731834411621
validation loss: 1.2483612298965454
%s 

 %s ("State University, will chair the Board.  Universities involved: Johns Hopkins University, University of California Los Angeles, Carnegie Mellon University, University of Colifornia, San Diego, Texas A&amp;M University, Iowa State University, University of Pennsylvania, West Virginia University, Massachusetts Institute of Technology, Purdue University, University of Florida, University of North Carolina Chapel Hill, University of Maryland, University of Wisconsin, and University of Washington.  ==See also== {{Education stages}}  ==Further reading==  ''Higher education in the United States''  * Davies, Antony and Thomas W. Cline (2005). [http://www.business.duq.edu/faculty/davies/research/roimba.pdf ''The ROI on the MBA,''] &lt;u&gt;BizEd&lt;/u&gt;.  * El-Khawas, E. (1996). ''

training:   3%|▎         | 3497/100000 [3:11:20<83:40:04,  3.12s/it]

which translatoo sciences, such as abted to the education of ABHEw OSE, U.S. Box the collings, an American mounted forch anything a dependence being stated to a se, a that with heid politicism approvement of 1958, the region. However's dating on the new effect for the 12th, [[Bicking]], [[Government and}}] ''The Members'', [[World Balgova One]], 1953 by Brigain, so you live [[Marqus]]. On [[Franki Congo VIIA]] can several opposition (QFAY) (See width [[Unix Range-Moor]] Provides, as a ''City official Action Iline''). Then a member all throne within this other [[York]]. In 1906, the Balkot were won a misters (''Polyriese'') was simply no-led rause, but though until the [[Hondony Old Compserve]], [[Marse by the Viewing, She's William HÃ©sbahaa Scottish &quot;It Kenni I was solding found&quot; was the Italy and nomings did [http://www.gitc.php-Assum.html Schnic Conveks of Gennie government messa³). The Dnews reaked continue more delaise in the Sensiendary stage and the people will reason

training:   4%|▎         | 3501/100000 [3:12:39<242:28:47,  9.05s/it]

Model saved at iteration 3500
training loss: 1.2834529876708984
training loss: 1.2989575862884521
training loss: 1.4225627183914185


training:   4%|▎         | 3505/100000 [3:12:51<194:45:57,  7.27s/it]

training loss: 1.37705397605896
training loss: 1.41348135471344
training loss: 1.5153104066848755
training loss: 1.4223065376281738


training:   4%|▎         | 3509/100000 [3:13:04<161:23:48,  6.02s/it]

training loss: 1.2887297868728638
training loss: 1.3030747175216675
training loss: 1.0789353847503662
training loss: 1.2176727056503296


training:   4%|▎         | 3513/100000 [3:13:16<138:02:50,  5.15s/it]

training loss: 1.4858379364013672
training loss: 1.5993746519088745
training loss: 1.3788552284240723
training loss: 1.2538275718688965


training:   4%|▎         | 3517/100000 [3:13:28<121:43:11,  4.54s/it]

training loss: 1.3315353393554688
training loss: 1.262869954109192
training loss: 1.4596192836761475
training loss: 1.2174310684204102


training:   4%|▎         | 3521/100000 [3:13:41<110:18:44,  4.12s/it]

training loss: 1.2009668350219727
training loss: 1.2386391162872314
training loss: 1.087965726852417
training loss: 1.2604451179504395


training:   4%|▎         | 3525/100000 [3:13:53<102:18:12,  3.82s/it]

training loss: 1.3724006414413452
training loss: 1.3086249828338623
training loss: 1.3256688117980957
training loss: 1.5339466333389282


training:   4%|▎         | 3529/100000 [3:14:06<96:40:43,  3.61s/it] 

training loss: 1.2289708852767944
training loss: 1.3944638967514038
training loss: 1.4366189241409302
training loss: 1.2376681566238403


training:   4%|▎         | 3533/100000 [3:14:18<92:44:45,  3.46s/it]

training loss: 1.2627090215682983
training loss: 1.29567289352417
training loss: 1.3284599781036377
training loss: 1.3170310258865356


training:   4%|▎         | 3537/100000 [3:14:31<89:59:01,  3.36s/it]

training loss: 1.2641541957855225
training loss: 1.281364917755127
training loss: 1.3413689136505127
training loss: 1.2350157499313354


training:   4%|▎         | 3541/100000 [3:14:43<88:03:25,  3.29s/it]

training loss: 1.2299141883850098
training loss: 1.2926124334335327
training loss: 1.2381670475006104
training loss: 1.326529622077942


training:   4%|▎         | 3545/100000 [3:14:56<86:44:07,  3.24s/it]

training loss: 1.2930004596710205
training loss: 1.409611463546753
training loss: 1.305555820465088
training loss: 1.3654142618179321


training:   4%|▎         | 3549/100000 [3:15:08<85:47:27,  3.20s/it]

training loss: 1.3463585376739502
training loss: 1.1821907758712769
training loss: 1.3515968322753906
training loss: 1.3665482997894287


training:   4%|▎         | 3553/100000 [3:15:21<85:07:25,  3.18s/it]

training loss: 1.2332007884979248
training loss: 1.2289178371429443
training loss: 1.442146897315979
training loss: 1.4279497861862183


training:   4%|▎         | 3557/100000 [3:15:33<84:38:16,  3.16s/it]

training loss: 1.360145092010498
training loss: 1.295823335647583
training loss: 1.3777801990509033
training loss: 1.2181586027145386


training:   4%|▎         | 3561/100000 [3:15:46<84:18:19,  3.15s/it]

training loss: 1.2897084951400757
training loss: 1.2514276504516602
training loss: 1.3631253242492676
training loss: 1.4451195001602173


training:   4%|▎         | 3565/100000 [3:15:58<84:05:07,  3.14s/it]

training loss: 1.3405189514160156
training loss: 1.2812544107437134
training loss: 1.3663249015808105
training loss: 1.3127081394195557


training:   4%|▎         | 3569/100000 [3:16:11<83:57:05,  3.13s/it]

training loss: 1.328016996383667
training loss: 1.3768495321273804
training loss: 1.1903507709503174
training loss: 1.2818927764892578


training:   4%|▎         | 3573/100000 [3:16:23<83:49:47,  3.13s/it]

training loss: 1.4824941158294678
training loss: 1.5711039304733276
training loss: 1.3919669389724731
training loss: 1.226669430732727


training:   4%|▎         | 3577/100000 [3:16:36<83:44:19,  3.13s/it]

training loss: 1.3513822555541992
training loss: 1.4510314464569092
training loss: 1.2819137573242188
training loss: 1.3281352519989014


training:   4%|▎         | 3581/100000 [3:16:48<83:39:39,  3.12s/it]

training loss: 1.3875088691711426
training loss: 1.3930988311767578
training loss: 1.3585536479949951
training loss: 1.3308228254318237


training:   4%|▎         | 3585/100000 [3:17:01<83:37:28,  3.12s/it]

training loss: 1.1830921173095703
training loss: 1.304173469543457
training loss: 1.3709968328475952
training loss: 1.1773165464401245


training:   4%|▎         | 3589/100000 [3:17:13<83:36:48,  3.12s/it]

training loss: 1.2584373950958252
training loss: 1.2846232652664185
training loss: 1.134339451789856
training loss: 1.3374788761138916


training:   4%|▎         | 3593/100000 [3:17:26<83:35:17,  3.12s/it]

training loss: 1.2947394847869873
training loss: 1.437814474105835
training loss: 1.3845040798187256
training loss: 1.2316391468048096


training:   4%|▎         | 3597/100000 [3:17:38<83:33:26,  3.12s/it]

training loss: 1.3636078834533691
training loss: 1.2737324237823486
training loss: 1.280116319656372
training loss: 1.251355528831482


training:   4%|▎         | 3597/100000 [3:17:50<83:33:26,  3.12s/it]

training loss: 1.4435300827026367


training:   4%|▎         | 3601/100000 [3:17:51<84:06:17,  3.14s/it]

validation loss: 1.20970618724823
training loss: 1.470745325088501
training loss: 1.4181729555130005
training loss: 1.363227367401123


training:   4%|▎         | 3605/100000 [3:18:03<83:53:06,  3.13s/it]

training loss: 1.3992005586624146
training loss: 1.446629524230957
training loss: 1.22603440284729
training loss: 1.3789976835250854


training:   4%|▎         | 3609/100000 [3:18:16<83:47:19,  3.13s/it]

training loss: 1.2862465381622314
training loss: 1.394689917564392
training loss: 1.4612705707550049
training loss: 1.4025530815124512


training:   4%|▎         | 3613/100000 [3:18:28<83:47:55,  3.13s/it]

training loss: 1.3973324298858643
training loss: 1.2604632377624512
training loss: 1.3811748027801514
training loss: 1.3575602769851685


training:   4%|▎         | 3617/100000 [3:18:41<83:42:38,  3.13s/it]

training loss: 1.289425015449524
training loss: 1.3228892087936401
training loss: 1.4666500091552734
training loss: 1.3599079847335815


training:   4%|▎         | 3621/100000 [3:18:53<83:36:03,  3.12s/it]

training loss: 1.602536678314209
training loss: 1.3164268732070923
training loss: 1.3562822341918945
training loss: 1.1085296869277954


training:   4%|▎         | 3625/100000 [3:19:06<83:33:07,  3.12s/it]

training loss: 1.3398633003234863
training loss: 1.4238712787628174
training loss: 1.245955228805542
training loss: 1.4319766759872437


training:   4%|▎         | 3629/100000 [3:19:18<83:32:42,  3.12s/it]

training loss: 1.4097237586975098
training loss: 1.1359341144561768
training loss: 1.4872945547103882
training loss: 1.362839698791504


training:   4%|▎         | 3633/100000 [3:19:31<83:33:03,  3.12s/it]

training loss: 1.4016289710998535
training loss: 1.1686973571777344
training loss: 1.2790791988372803
training loss: 1.2101106643676758


training:   4%|▎         | 3637/100000 [3:19:43<83:31:32,  3.12s/it]

training loss: 1.139541745185852
training loss: 1.424889087677002
training loss: 1.1032884120941162
training loss: 1.27449369430542


training:   4%|▎         | 3641/100000 [3:19:56<83:30:52,  3.12s/it]

training loss: 1.2699378728866577
training loss: 1.2291088104248047
training loss: 1.288691520690918
training loss: 1.2706496715545654


training:   4%|▎         | 3645/100000 [3:20:08<83:29:36,  3.12s/it]

training loss: 1.0396820306777954
training loss: 1.2255005836486816
training loss: 1.4182590246200562
training loss: 1.3179011344909668


training:   4%|▎         | 3649/100000 [3:20:21<83:32:38,  3.12s/it]

training loss: 1.1626639366149902
training loss: 1.3139452934265137
training loss: 1.3450080156326294
training loss: 1.3184399604797363


training:   4%|▎         | 3653/100000 [3:20:33<83:31:41,  3.12s/it]

training loss: 1.3005954027175903
training loss: 1.3808422088623047
training loss: 1.2947638034820557
training loss: 1.4386626482009888


training:   4%|▎         | 3657/100000 [3:20:46<83:30:40,  3.12s/it]

training loss: 1.2177107334136963
training loss: 1.3067833185195923
training loss: 1.3864177465438843
training loss: 1.3937323093414307


training:   4%|▎         | 3661/100000 [3:20:58<83:27:27,  3.12s/it]

training loss: 1.3312891721725464
training loss: 1.3424510955810547
training loss: 1.2223873138427734
training loss: 1.3608897924423218


training:   4%|▎         | 3665/100000 [3:21:10<83:27:26,  3.12s/it]

training loss: 1.4829413890838623
training loss: 1.3726180791854858
training loss: 1.5788021087646484
training loss: 1.329730749130249


training:   4%|▎         | 3669/100000 [3:21:23<83:26:40,  3.12s/it]

training loss: 1.2867999076843262
training loss: 1.1392621994018555
training loss: 1.3104348182678223
training loss: 1.103952169418335


training:   4%|▎         | 3673/100000 [3:21:35<83:27:51,  3.12s/it]

training loss: 1.309468388557434
training loss: 1.1687589883804321
training loss: 1.3628966808319092
training loss: 1.5390586853027344


training:   4%|▎         | 3677/100000 [3:21:48<83:26:36,  3.12s/it]

training loss: 1.3963780403137207
training loss: 1.3917313814163208
training loss: 1.218969702720642
training loss: 1.2905540466308594


training:   4%|▎         | 3681/100000 [3:22:00<83:25:36,  3.12s/it]

training loss: 1.2565388679504395
training loss: 1.1850944757461548
training loss: 1.4405338764190674
training loss: 1.2960765361785889


training:   4%|▎         | 3685/100000 [3:22:13<83:25:18,  3.12s/it]

training loss: 1.2678542137145996
training loss: 1.417665719985962
training loss: 0.9956659078598022
training loss: 1.2954965829849243


training:   4%|▎         | 3689/100000 [3:22:25<83:25:49,  3.12s/it]

training loss: 1.2850472927093506
training loss: 1.119554042816162
training loss: 1.363885521888733
training loss: 1.288164734840393


training:   4%|▎         | 3693/100000 [3:22:38<83:25:51,  3.12s/it]

training loss: 1.1725234985351562
training loss: 1.3634734153747559
training loss: 1.1498181819915771
training loss: 1.3068556785583496


training:   4%|▎         | 3697/100000 [3:22:50<83:27:35,  3.12s/it]

training loss: 1.4228074550628662
training loss: 1.3604211807250977
training loss: 1.344956636428833
training loss: 1.3432329893112183
training loss: 1.2108168601989746


training:   4%|▎         | 3701/100000 [3:23:03<84:03:09,  3.14s/it]

validation loss: 1.2871150970458984
training loss: 1.3534460067749023
training loss: 1.169862151145935
training loss: 1.319455623626709


training:   4%|▎         | 3705/100000 [3:23:16<83:50:02,  3.13s/it]

training loss: 1.3820644617080688
training loss: 1.4739949703216553
training loss: 1.1825064420700073
training loss: 1.287587285041809


training:   4%|▎         | 3709/100000 [3:23:28<83:42:53,  3.13s/it]

training loss: 1.3165847063064575
training loss: 1.3331892490386963
training loss: 1.3376421928405762
training loss: 1.3542377948760986


training:   4%|▎         | 3713/100000 [3:23:40<83:36:22,  3.13s/it]

training loss: 1.4158357381820679
training loss: 1.4299836158752441
training loss: 1.3271225690841675
training loss: 1.2823848724365234


training:   4%|▎         | 3717/100000 [3:23:53<83:33:38,  3.12s/it]

training loss: 1.418513536453247
training loss: 1.2806854248046875
training loss: 1.3318134546279907
training loss: 1.224816083908081


training:   4%|▎         | 3721/100000 [3:24:05<83:31:24,  3.12s/it]

training loss: 1.274125576019287
training loss: 1.3876783847808838
training loss: 1.4156521558761597
training loss: 1.2177698612213135


training:   4%|▎         | 3725/100000 [3:24:18<83:28:43,  3.12s/it]

training loss: 1.4687614440917969
training loss: 1.453770399093628
training loss: 1.4224214553833008
training loss: 1.2978585958480835


training:   4%|▎         | 3729/100000 [3:24:30<83:27:42,  3.12s/it]

training loss: 1.251396894454956
training loss: 1.2318854331970215
training loss: 1.4453636407852173
training loss: 1.4197258949279785


training:   4%|▎         | 3733/100000 [3:24:43<83:26:04,  3.12s/it]

training loss: 1.5284931659698486
training loss: 1.5155463218688965
training loss: 1.2049118280410767
training loss: 1.6997365951538086


training:   4%|▎         | 3737/100000 [3:24:55<83:26:11,  3.12s/it]

training loss: 1.4561305046081543
training loss: 1.2718355655670166
training loss: 1.4055614471435547
training loss: 1.04982590675354


training:   4%|▎         | 3741/100000 [3:25:08<83:23:48,  3.12s/it]

training loss: 1.2008137702941895
training loss: 1.2010624408721924
training loss: 1.2660620212554932
training loss: 1.2675790786743164


training:   4%|▎         | 3745/100000 [3:25:20<83:23:06,  3.12s/it]

training loss: 1.3222670555114746
training loss: 1.2128937244415283
training loss: 1.254115104675293
training loss: 1.3467236757278442


training:   4%|▎         | 3749/100000 [3:25:33<83:23:16,  3.12s/it]

training loss: 1.422463297843933
training loss: 1.4000664949417114
training loss: 1.357618808746338
training loss: 1.3472027778625488


training:   4%|▍         | 3753/100000 [3:25:45<83:23:22,  3.12s/it]

training loss: 1.213942289352417
training loss: 1.2854715585708618
training loss: 1.3024358749389648
training loss: 1.243303894996643


training:   4%|▍         | 3757/100000 [3:25:58<83:27:12,  3.12s/it]

training loss: 1.325326681137085
training loss: 1.1601682901382446
training loss: 1.317725419998169
training loss: 1.421952247619629


training:   4%|▍         | 3761/100000 [3:26:10<83:27:13,  3.12s/it]

training loss: 1.3030354976654053
training loss: 1.2934637069702148
training loss: 1.3434969186782837
training loss: 1.3862441778182983


training:   4%|▍         | 3765/100000 [3:26:23<83:26:19,  3.12s/it]

training loss: 1.291248083114624
training loss: 1.1897242069244385
training loss: 1.422895073890686
training loss: 1.3784077167510986


training:   4%|▍         | 3769/100000 [3:26:35<83:24:20,  3.12s/it]

training loss: 1.2377551794052124
training loss: 1.1297814846038818
training loss: 1.369814395904541
training loss: 1.366300106048584


training:   4%|▍         | 3773/100000 [3:26:48<83:20:40,  3.12s/it]

training loss: 1.2701425552368164
training loss: 1.34390389919281
training loss: 1.4669742584228516
training loss: 1.1887305974960327


training:   4%|▍         | 3777/100000 [3:27:00<83:18:22,  3.12s/it]

training loss: 1.293792963027954
training loss: 1.2308381795883179
training loss: 1.3428070545196533
training loss: 1.3874263763427734


training:   4%|▍         | 3781/100000 [3:27:13<83:21:55,  3.12s/it]

training loss: 1.38666570186615
training loss: 1.3448283672332764
training loss: 1.3135077953338623
training loss: 1.3254292011260986


training:   4%|▍         | 3785/100000 [3:27:25<83:22:43,  3.12s/it]

training loss: 1.0744740962982178
training loss: 1.3898869752883911
training loss: 1.394049048423767
training loss: 1.1453498601913452


training:   4%|▍         | 3789/100000 [3:27:38<83:21:49,  3.12s/it]

training loss: 1.4073803424835205
training loss: 1.3407888412475586
training loss: 1.501758098602295
training loss: 1.2325587272644043


training:   4%|▍         | 3793/100000 [3:27:50<83:22:09,  3.12s/it]

training loss: 1.2849180698394775
training loss: 1.4197814464569092
training loss: 1.219519019126892
training loss: 1.3367619514465332


training:   4%|▍         | 3797/100000 [3:28:02<83:20:37,  3.12s/it]

training loss: 1.3143582344055176
training loss: 1.368262767791748
training loss: 1.0629034042358398
training loss: 1.3778204917907715
training loss: 1.1859679222106934


training:   4%|▍         | 3801/100000 [3:28:15<83:57:04,  3.14s/it]

validation loss: 1.4243491888046265
training loss: 1.0368620157241821
training loss: 1.2568910121917725
training loss: 1.1927812099456787


training:   4%|▍         | 3805/100000 [3:28:28<83:43:02,  3.13s/it]

training loss: 1.2968847751617432
training loss: 1.3814301490783691
training loss: 1.1929025650024414
training loss: 1.0854811668395996


training:   4%|▍         | 3809/100000 [3:28:40<83:37:36,  3.13s/it]

training loss: 1.30293607711792
training loss: 1.347946047782898
training loss: 1.0771182775497437
training loss: 1.1913930177688599


training:   4%|▍         | 3813/100000 [3:28:53<83:33:37,  3.13s/it]

training loss: 1.4352898597717285
training loss: 1.1887965202331543
training loss: 1.3965437412261963
training loss: 1.4147592782974243


training:   4%|▍         | 3817/100000 [3:29:05<83:29:10,  3.12s/it]

training loss: 1.261521816253662
training loss: 1.395479679107666
training loss: 1.2967569828033447
training loss: 1.5206763744354248


training:   4%|▍         | 3821/100000 [3:29:18<83:26:14,  3.12s/it]

training loss: 1.102665662765503
training loss: 1.455580472946167
training loss: 1.303290843963623
training loss: 1.4522817134857178


training:   4%|▍         | 3825/100000 [3:29:30<83:22:27,  3.12s/it]

training loss: 1.289177656173706
training loss: 1.2445082664489746
training loss: 1.3356245756149292
training loss: 1.2892028093338013


training:   4%|▍         | 3829/100000 [3:29:43<83:17:13,  3.12s/it]

training loss: 1.3603527545928955
training loss: 1.3425086736679077
training loss: 1.2888636589050293
training loss: 1.4923521280288696


training:   4%|▍         | 3833/100000 [3:29:55<83:17:30,  3.12s/it]

training loss: 1.4690762758255005
training loss: 1.2759140729904175
training loss: 1.2701570987701416
training loss: 1.4269593954086304


training:   4%|▍         | 3837/100000 [3:30:08<83:16:43,  3.12s/it]

training loss: 1.3522422313690186
training loss: 1.2825958728790283
training loss: 1.3499701023101807
training loss: 1.3291523456573486


training:   4%|▍         | 3841/100000 [3:30:20<83:17:24,  3.12s/it]

training loss: 1.2587766647338867
training loss: 1.2891067266464233
training loss: 1.233165979385376
training loss: 1.4542038440704346


training:   4%|▍         | 3845/100000 [3:30:32<83:18:01,  3.12s/it]

training loss: 1.299594521522522
training loss: 1.042346477508545
training loss: 1.4074738025665283
training loss: 1.3103716373443604


training:   4%|▍         | 3849/100000 [3:30:45<83:19:36,  3.12s/it]

training loss: 0.9990009069442749
training loss: 1.315782070159912
training loss: 1.2739534378051758
training loss: 1.1890206336975098


training:   4%|▍         | 3853/100000 [3:30:57<83:18:10,  3.12s/it]

training loss: 1.1761269569396973
training loss: 1.3774770498275757
training loss: 1.4699978828430176
training loss: 1.3937788009643555


training:   4%|▍         | 3857/100000 [3:31:10<83:18:49,  3.12s/it]

training loss: 1.3167860507965088
training loss: 1.3540353775024414
training loss: 1.4190399646759033
training loss: 1.3459734916687012


training:   4%|▍         | 3861/100000 [3:31:22<83:17:20,  3.12s/it]

training loss: 1.2422517538070679
training loss: 1.3659498691558838
training loss: 1.4262373447418213
training loss: 1.2107371091842651


training:   4%|▍         | 3865/100000 [3:31:35<83:17:12,  3.12s/it]

training loss: 1.3233013153076172
training loss: 1.0034799575805664
training loss: 1.221978783607483
training loss: 1.3883399963378906


training:   4%|▍         | 3869/100000 [3:31:47<83:18:04,  3.12s/it]

training loss: 1.3398672342300415
training loss: 1.377352237701416
training loss: 1.2016383409500122
training loss: 1.3811264038085938


training:   4%|▍         | 3873/100000 [3:32:00<83:18:26,  3.12s/it]

training loss: 1.407015323638916
training loss: 1.3700652122497559
training loss: 1.3543951511383057
training loss: 1.3089120388031006


training:   4%|▍         | 3877/100000 [3:32:12<83:17:53,  3.12s/it]

training loss: 1.2993216514587402
training loss: 1.2007358074188232
training loss: 1.281783938407898
training loss: 1.326026439666748


training:   4%|▍         | 3881/100000 [3:32:25<83:17:20,  3.12s/it]

training loss: 1.3838447332382202
training loss: 1.2771186828613281
training loss: 1.2630977630615234
training loss: 1.3577536344528198


training:   4%|▍         | 3885/100000 [3:32:37<83:16:26,  3.12s/it]

training loss: 1.2160561084747314
training loss: 1.2102627754211426
training loss: 1.4529249668121338
training loss: 1.2830661535263062


training:   4%|▍         | 3889/100000 [3:32:50<83:21:57,  3.12s/it]

training loss: 1.2447073459625244
training loss: 1.413233995437622
training loss: 1.2733845710754395
training loss: 1.6250667572021484


training:   4%|▍         | 3893/100000 [3:33:02<83:21:21,  3.12s/it]

training loss: 1.3158148527145386
training loss: 1.206054449081421
training loss: 1.3214366436004639
training loss: 1.2952951192855835


training:   4%|▍         | 3897/100000 [3:33:15<83:18:29,  3.12s/it]

training loss: 1.526244878768921
training loss: 1.428716778755188
training loss: 1.596854567527771
training loss: 1.3351829051971436
training loss: 1.2563285827636719


training:   4%|▍         | 3901/100000 [3:33:27<83:53:37,  3.14s/it]

validation loss: 1.2968580722808838
training loss: 1.2065860033035278
training loss: 1.3310494422912598
training loss: 1.2979881763458252


training:   4%|▍         | 3905/100000 [3:33:40<83:39:54,  3.13s/it]

training loss: 1.406238317489624
training loss: 1.330392837524414
training loss: 1.2402362823486328
training loss: 1.3544070720672607


training:   4%|▍         | 3909/100000 [3:33:52<83:32:47,  3.13s/it]

training loss: 1.2532858848571777
training loss: 1.3027231693267822
training loss: 1.1040258407592773
training loss: 1.2283875942230225


training:   4%|▍         | 3913/100000 [3:34:05<83:27:49,  3.13s/it]

training loss: 1.3599623441696167
training loss: 1.2557326555252075
training loss: 1.2853690385818481
training loss: 1.239402413368225


training:   4%|▍         | 3917/100000 [3:34:17<83:24:10,  3.12s/it]

training loss: 1.4362835884094238
training loss: 1.3705543279647827
training loss: 1.4275028705596924
training loss: 1.175513505935669


training:   4%|▍         | 3921/100000 [3:34:30<83:21:16,  3.12s/it]

training loss: 1.47481369972229
training loss: 1.2848594188690186
training loss: 1.359223484992981
training loss: 1.2140824794769287


training:   4%|▍         | 3925/100000 [3:34:42<83:18:37,  3.12s/it]

training loss: 1.2685954570770264
training loss: 1.188995122909546
training loss: 1.2815518379211426
training loss: 1.398349642753601


training:   4%|▍         | 3929/100000 [3:34:55<83:16:29,  3.12s/it]

training loss: 1.4064574241638184
training loss: 1.4056891202926636
training loss: 1.316361665725708
training loss: 1.2119674682617188


training:   4%|▍         | 3933/100000 [3:35:07<83:17:01,  3.12s/it]

training loss: 1.4283573627471924
training loss: 1.3272724151611328
training loss: 1.202006220817566
training loss: 1.4373116493225098


training:   4%|▍         | 3937/100000 [3:35:20<83:16:39,  3.12s/it]

training loss: 1.1758017539978027
training loss: 1.3272020816802979
training loss: 1.2586795091629028
training loss: 1.2948944568634033


training:   4%|▍         | 3941/100000 [3:35:32<83:15:02,  3.12s/it]

training loss: 1.2638869285583496
training loss: 1.1265171766281128
training loss: 1.54150390625
training loss: 1.2820408344268799


training:   4%|▍         | 3945/100000 [3:35:45<83:13:26,  3.12s/it]

training loss: 1.2715530395507812
training loss: 1.3431169986724854
training loss: 1.2993040084838867
training loss: 1.1458241939544678


training:   4%|▍         | 3949/100000 [3:35:57<83:13:32,  3.12s/it]

training loss: 1.2479979991912842
training loss: 1.276818871498108
training loss: 1.5163437128067017
training loss: 1.3084790706634521


training:   4%|▍         | 3953/100000 [3:36:10<83:15:34,  3.12s/it]

training loss: 1.2663958072662354
training loss: 1.251212477684021
training loss: 1.4448521137237549
training loss: 1.3466606140136719


training:   4%|▍         | 3957/100000 [3:36:22<83:15:36,  3.12s/it]

training loss: 1.027306079864502
training loss: 1.2475515604019165
training loss: 1.1167709827423096
training loss: 1.200864553451538


training:   4%|▍         | 3961/100000 [3:36:35<83:14:08,  3.12s/it]

training loss: 1.332086443901062
training loss: 1.2192078828811646
training loss: 1.2954154014587402
training loss: 1.3052324056625366


training:   4%|▍         | 3965/100000 [3:36:47<83:13:39,  3.12s/it]

training loss: 1.2459152936935425
training loss: 1.205985188484192
training loss: 1.4393537044525146
training loss: 1.1081708669662476


training:   4%|▍         | 3969/100000 [3:37:00<83:13:00,  3.12s/it]

training loss: 1.307260513305664
training loss: 1.2489628791809082
training loss: 1.4456367492675781
training loss: 1.236608862876892


training:   4%|▍         | 3973/100000 [3:37:12<83:12:30,  3.12s/it]

training loss: 1.3855880498886108
training loss: 1.3434038162231445
training loss: 1.1839863061904907
training loss: 1.324745774269104


training:   4%|▍         | 3977/100000 [3:37:25<83:07:52,  3.12s/it]

training loss: 1.272134780883789
training loss: 1.418729305267334
training loss: 1.2831623554229736
training loss: 1.4045462608337402


training:   4%|▍         | 3981/100000 [3:37:37<83:07:31,  3.12s/it]

training loss: 1.2648506164550781
training loss: 1.3433011770248413
training loss: 1.2731904983520508
training loss: 1.4600627422332764


training:   4%|▍         | 3985/100000 [3:37:49<83:08:16,  3.12s/it]

training loss: 1.2625274658203125
training loss: 1.2348157167434692
training loss: 1.2889597415924072
training loss: 1.3223447799682617


training:   4%|▍         | 3989/100000 [3:38:02<83:08:28,  3.12s/it]

training loss: 1.2740213871002197
training loss: 1.2994184494018555
training loss: 1.2785844802856445
training loss: 1.1727590560913086


training:   4%|▍         | 3993/100000 [3:38:14<83:08:59,  3.12s/it]

training loss: 1.303354024887085
training loss: 1.146491527557373
training loss: 1.2888411283493042
training loss: 1.1065175533294678


training:   4%|▍         | 3997/100000 [3:38:27<83:09:13,  3.12s/it]

training loss: 1.2812011241912842
training loss: 1.38509202003479
training loss: 1.3750548362731934
training loss: 1.2708394527435303
training loss: 1.444349765777588
validation loss: 1.2030003070831299
%s 

 %s ("ome of the following claims:  * cookies are like [[Computer worm|worms]] and [[Computer virus|viruses]] (they can erase data from the user's hard disks); * cookies are a form of [[spyware]] (they can read personal information stored on the user's computer); * cookies generate [[popup]]s; * cookies are used for [[spam (electronic)|spam]]ming; * cookies are only used for [[advertising]].  Cookies are data, not code: they cannot erase or read information from the user's computer{{ref|slate}}. However, cookies allow for detecting the Web pages viewed by a user on a given site or set of sites. This information can be collected in an ''anonymous profile'' of the user. While such profiles do not contain personal information (name, address, etc.), they have been subject of some priva

training:   4%|▍         | 3997/100000 [3:38:40<83:09:13,  3.12s/it]

sed as such as the phenomena address content is even the chip, with example, machiness than with, elected ensures pridic key. The etchipors for systems have remarked as an eg.the fumption by one of the most pritized by the mat], and he performed regarding the origin unperfector, in a [[school]] at short season' the [[silogne]] news. These futhings using in the composer [[Turlia-botter]] choose ench for I ensure if the were villability much coordinant, blokishes is an individual &quot;[[some economy]] of the wife defeat cannot.skg * Al. 14 kabourds an ordina name group code {25, 1930 emother switched from the bybergy dub and name slaves by the minomer name of [[pungred-confring rows]] an [[ensure at Mus-Study Bolph Canning actor rebut fro fajures, in righters for secure facing deeply for all, in sit he was in the words of heats German as the suskey of the centuries were during-their short ''ups''. Sproject'' are more and its seaaces Associating entire.  [The base of a lavving fyedom sou

training:   4%|▍         | 4001/100000 [3:39:58<241:09:30,  9.04s/it]

Model saved at iteration 4000
training loss: 1.2282253503799438
training loss: 1.466895580291748
training loss: 1.318200945854187


training:   4%|▍         | 4005/100000 [3:40:11<193:38:25,  7.26s/it]

training loss: 1.27341628074646
training loss: 1.2736282348632812
training loss: 1.4852778911590576
training loss: 1.493151068687439


training:   4%|▍         | 4009/100000 [3:40:23<160:29:15,  6.02s/it]

training loss: 1.217825174331665
training loss: 1.198513150215149
training loss: 1.3117127418518066
training loss: 1.4010648727416992


training:   4%|▍         | 4013/100000 [3:40:36<137:17:17,  5.15s/it]

training loss: 1.3236892223358154
training loss: 1.1637022495269775
training loss: 1.1799442768096924
training loss: 1.309609293937683


training:   4%|▍         | 4017/100000 [3:40:48<121:01:00,  4.54s/it]

training loss: 1.5427902936935425
training loss: 1.425126552581787
training loss: 1.2830383777618408
training loss: 1.2250909805297852


training:   4%|▍         | 4021/100000 [3:41:01<109:39:49,  4.11s/it]

training loss: 1.3127678632736206
training loss: 1.3678972721099854
training loss: 1.5043234825134277
training loss: 1.3856377601623535


training:   4%|▍         | 4025/100000 [3:41:13<101:42:20,  3.81s/it]

training loss: 1.2182928323745728
training loss: 1.1386312246322632
training loss: 1.2411344051361084
training loss: 1.2304341793060303


training:   4%|▍         | 4029/100000 [3:41:26<96:07:16,  3.61s/it] 

training loss: 1.3151748180389404
training loss: 1.21602463722229
training loss: 1.3876807689666748
training loss: 1.1386655569076538


training:   4%|▍         | 4033/100000 [3:41:38<92:11:09,  3.46s/it]

training loss: 1.1924726963043213
training loss: 1.3396629095077515
training loss: 1.4000385999679565
training loss: 1.2232892513275146


training:   4%|▍         | 4037/100000 [3:41:51<89:26:44,  3.36s/it]

training loss: 1.2538708448410034
training loss: 1.191509485244751
training loss: 1.2522695064544678
training loss: 1.3007512092590332


training:   4%|▍         | 4041/100000 [3:42:03<87:34:03,  3.29s/it]

training loss: 1.299227237701416
training loss: 1.1549304723739624
training loss: 1.1374489068984985
training loss: 1.1186561584472656


training:   4%|▍         | 4045/100000 [3:42:16<86:14:24,  3.24s/it]

training loss: 1.2962758541107178
training loss: 1.3536121845245361
training loss: 1.2739981412887573
training loss: 1.1837642192840576


training:   4%|▍         | 4049/100000 [3:42:28<85:17:56,  3.20s/it]

training loss: 1.215461254119873
training loss: 1.2866030931472778
training loss: 1.3579391241073608
training loss: 1.2883636951446533


training:   4%|▍         | 4053/100000 [3:42:40<84:39:02,  3.18s/it]

training loss: 1.145266056060791
training loss: 1.3034043312072754
training loss: 1.0954527854919434
training loss: 1.286147952079773


training:   4%|▍         | 4057/100000 [3:42:53<84:11:24,  3.16s/it]

training loss: 1.3050580024719238
training loss: 1.3549284934997559
training loss: 1.3505091667175293
training loss: 1.1547621488571167


training:   4%|▍         | 4061/100000 [3:43:05<83:53:07,  3.15s/it]

training loss: 1.3402025699615479
training loss: 1.322163701057434
training loss: 1.66240394115448
training loss: 1.2428245544433594


training:   4%|▍         | 4065/100000 [3:43:18<83:39:14,  3.14s/it]

training loss: 1.265453815460205
training loss: 1.3907768726348877
training loss: 1.347408652305603
training loss: 1.2455296516418457


training:   4%|▍         | 4069/100000 [3:43:30<83:29:16,  3.13s/it]

training loss: 1.4347331523895264
training loss: 1.4751982688903809
training loss: 1.15455961227417
training loss: 1.2020397186279297


training:   4%|▍         | 4073/100000 [3:43:43<83:22:12,  3.13s/it]

training loss: 1.210510492324829
training loss: 1.290400505065918
training loss: 1.0576348304748535
training loss: 1.1526951789855957


training:   4%|▍         | 4077/100000 [3:43:55<83:17:00,  3.13s/it]

training loss: 1.1126296520233154
training loss: 1.3303550481796265
training loss: 1.2381314039230347
training loss: 1.4724650382995605


training:   4%|▍         | 4081/100000 [3:44:08<83:13:57,  3.12s/it]

training loss: 1.4000438451766968
training loss: 1.2008808851242065
training loss: 1.2884881496429443
training loss: 1.3492732048034668


training:   4%|▍         | 4085/100000 [3:44:20<83:12:30,  3.12s/it]

training loss: 1.2755833864212036
training loss: 1.2546677589416504
training loss: 1.2835158109664917
training loss: 1.4591957330703735


training:   4%|▍         | 4089/100000 [3:44:33<83:10:07,  3.12s/it]

training loss: 1.1232296228408813
training loss: 1.1809526681900024
training loss: 1.255911111831665
training loss: 1.281221628189087


training:   4%|▍         | 4093/100000 [3:44:45<83:09:31,  3.12s/it]

training loss: 1.321964979171753
training loss: 1.1647964715957642
training loss: 1.297903299331665
training loss: 1.2528927326202393


training:   4%|▍         | 4097/100000 [3:44:58<83:07:47,  3.12s/it]

training loss: 1.2907602787017822
training loss: 1.2983911037445068
training loss: 1.3082021474838257
training loss: 1.3061227798461914
training loss: 1.2272757291793823


training:   4%|▍         | 4101/100000 [3:45:10<83:43:34,  3.14s/it]

validation loss: 1.1845470666885376
training loss: 1.3059892654418945
training loss: 1.1662628650665283
training loss: 1.2840481996536255


training:   4%|▍         | 4105/100000 [3:45:23<83:29:52,  3.13s/it]

training loss: 1.0062133073806763
training loss: 1.3379549980163574
training loss: 1.6638813018798828
training loss: 1.2279354333877563


training:   4%|▍         | 4109/100000 [3:45:35<83:22:36,  3.13s/it]

training loss: 1.1593559980392456
training loss: 1.3512530326843262
training loss: 1.3312909603118896
training loss: 1.2145017385482788


training:   4%|▍         | 4113/100000 [3:45:48<83:16:59,  3.13s/it]

training loss: 1.2014356851577759
training loss: 1.4486935138702393
training loss: 1.2519742250442505
training loss: 1.423375129699707


training:   4%|▍         | 4117/100000 [3:46:00<83:12:29,  3.12s/it]

training loss: 1.4219626188278198
training loss: 1.1942836046218872
training loss: 1.1890757083892822
training loss: 1.3655645847320557


training:   4%|▍         | 4121/100000 [3:46:13<83:09:55,  3.12s/it]

training loss: 1.3530728816986084
training loss: 1.2101792097091675
training loss: 1.2606353759765625
training loss: 1.0845428705215454


training:   4%|▍         | 4125/100000 [3:46:25<83:09:56,  3.12s/it]

training loss: 1.4481602907180786
training loss: 1.2841806411743164
training loss: 1.3379508256912231
training loss: 1.3408207893371582


training:   4%|▍         | 4129/100000 [3:46:38<83:08:22,  3.12s/it]

training loss: 1.393333911895752
training loss: 1.2139354944229126
training loss: 1.4447362422943115
training loss: 1.1644868850708008


training:   4%|▍         | 4133/100000 [3:46:50<83:06:42,  3.12s/it]

training loss: 1.228613018989563
training loss: 1.294506549835205
training loss: 1.2665956020355225
training loss: 1.2655541896820068


training:   4%|▍         | 4137/100000 [3:47:03<83:05:10,  3.12s/it]

training loss: 1.2708740234375
training loss: 1.34228515625
training loss: 1.1866044998168945
training loss: 1.2675838470458984


training:   4%|▍         | 4141/100000 [3:47:15<83:04:18,  3.12s/it]

training loss: 1.2666192054748535
training loss: 1.2934114933013916
training loss: 1.1857964992523193
training loss: 1.3028346300125122


training:   4%|▍         | 4145/100000 [3:47:28<83:04:14,  3.12s/it]

training loss: 1.1557846069335938
training loss: 1.2275482416152954
training loss: 1.1929583549499512
training loss: 1.0388755798339844


training:   4%|▍         | 4149/100000 [3:47:40<83:04:45,  3.12s/it]

training loss: 1.2327754497528076
training loss: 1.2435634136199951
training loss: 1.3637815713882446
training loss: 1.3445942401885986


training:   4%|▍         | 4153/100000 [3:47:53<83:03:35,  3.12s/it]

training loss: 1.3786396980285645
training loss: 1.3338974714279175
training loss: 1.1729716062545776
training loss: 1.1684339046478271


training:   4%|▍         | 4157/100000 [3:48:05<83:03:07,  3.12s/it]

training loss: 1.272517442703247
training loss: 1.354074478149414
training loss: 1.2106951475143433
training loss: 1.157092809677124


training:   4%|▍         | 4161/100000 [3:48:18<83:00:10,  3.12s/it]

training loss: 1.1862845420837402
training loss: 1.3861194849014282
training loss: 1.2283657789230347
training loss: 1.2557777166366577


training:   4%|▍         | 4165/100000 [3:48:30<82:59:48,  3.12s/it]

training loss: 1.2783806324005127
training loss: 1.2879027128219604
training loss: 1.0746643543243408
training loss: 1.4158029556274414


training:   4%|▍         | 4169/100000 [3:48:43<83:01:56,  3.12s/it]

training loss: 1.2778692245483398
training loss: 1.3556334972381592
training loss: 1.2860291004180908
training loss: 1.2758930921554565


training:   4%|▍         | 4173/100000 [3:48:55<83:02:50,  3.12s/it]

training loss: 1.183565616607666
training loss: 1.3225908279418945
training loss: 1.2517123222351074
training loss: 1.3131440877914429


training:   4%|▍         | 4177/100000 [3:49:08<83:00:37,  3.12s/it]

training loss: 1.1143814325332642
training loss: 1.3920601606369019
training loss: 1.146773099899292
training loss: 1.2150412797927856


training:   4%|▍         | 4181/100000 [3:49:20<83:01:17,  3.12s/it]

training loss: 1.2777364253997803
training loss: 1.2458871603012085
training loss: 1.1252844333648682
training loss: 1.3460345268249512


training:   4%|▍         | 4185/100000 [3:49:32<83:00:50,  3.12s/it]

training loss: 1.4589576721191406
training loss: 1.1305913925170898
training loss: 1.2601855993270874
training loss: 1.0377106666564941


training:   4%|▍         | 4189/100000 [3:49:45<83:01:27,  3.12s/it]

training loss: 1.3788597583770752
training loss: 1.3489360809326172
training loss: 1.3510196208953857
training loss: 1.3311398029327393


training:   4%|▍         | 4193/100000 [3:49:57<83:04:05,  3.12s/it]

training loss: 1.3869807720184326
training loss: 1.278850793838501
training loss: 0.9915466904640198
training loss: 1.3564510345458984


training:   4%|▍         | 4197/100000 [3:50:10<83:03:03,  3.12s/it]

training loss: 1.4498343467712402
training loss: 1.2057666778564453
training loss: 1.3985692262649536
training loss: 1.3888022899627686


training:   4%|▍         | 4197/100000 [3:50:21<83:03:03,  3.12s/it]

training loss: 1.2584545612335205


training:   4%|▍         | 4201/100000 [3:50:23<83:37:47,  3.14s/it]

validation loss: 1.2979965209960938
training loss: 1.1898794174194336
training loss: 1.3480473756790161
training loss: 1.4724094867706299


training:   4%|▍         | 4205/100000 [3:50:35<83:21:11,  3.13s/it]

training loss: 1.3294572830200195
training loss: 1.3590912818908691
training loss: 1.3162593841552734
training loss: 1.0750373601913452


training:   4%|▍         | 4209/100000 [3:50:48<83:11:43,  3.13s/it]

training loss: 1.2057974338531494
training loss: 1.2564653158187866
training loss: 1.4286576509475708
training loss: 1.3149371147155762


training:   4%|▍         | 4213/100000 [3:51:00<83:09:25,  3.13s/it]

training loss: 1.3708555698394775
training loss: 1.2254761457443237
training loss: 1.3241201639175415
training loss: 1.226920247077942


training:   4%|▍         | 4217/100000 [3:51:13<83:06:09,  3.12s/it]

training loss: 1.2945560216903687
training loss: 1.0955116748809814
training loss: 1.0776987075805664
training loss: 1.229443073272705


training:   4%|▍         | 4221/100000 [3:51:25<83:03:37,  3.12s/it]

training loss: 1.5080838203430176
training loss: 1.3782131671905518
training loss: 1.4111624956130981
training loss: 1.2284598350524902


training:   4%|▍         | 4225/100000 [3:51:38<83:01:44,  3.12s/it]

training loss: 1.324957251548767
training loss: 1.2042006254196167
training loss: 1.2019224166870117
training loss: 1.2176803350448608


training:   4%|▍         | 4229/100000 [3:51:50<83:00:54,  3.12s/it]

training loss: 1.316254734992981
training loss: 1.1473091840744019
training loss: 1.3643912076950073
training loss: 1.2975988388061523


training:   4%|▍         | 4233/100000 [3:52:02<83:00:43,  3.12s/it]

training loss: 1.0838700532913208
training loss: 1.452541470527649
training loss: 1.3549437522888184
training loss: 0.9608932733535767


training:   4%|▍         | 4237/100000 [3:52:15<83:00:21,  3.12s/it]

training loss: 1.2851605415344238
training loss: 1.4502812623977661
training loss: 1.251999020576477
training loss: 1.2137607336044312


training:   4%|▍         | 4241/100000 [3:52:27<83:00:08,  3.12s/it]

training loss: 1.3243812322616577
training loss: 1.2947278022766113
training loss: 1.1320788860321045
training loss: 1.1059279441833496


training:   4%|▍         | 4245/100000 [3:52:40<82:57:58,  3.12s/it]

training loss: 1.2991538047790527
training loss: 1.1572424173355103
training loss: 1.3715739250183105
training loss: 1.0314093828201294


training:   4%|▍         | 4249/100000 [3:52:52<82:56:58,  3.12s/it]

training loss: 1.1866763830184937
training loss: 1.1876070499420166
training loss: 1.2760629653930664
training loss: 1.2636126279830933


training:   4%|▍         | 4253/100000 [3:53:05<82:57:38,  3.12s/it]

training loss: 1.3615715503692627
training loss: 1.179060459136963
training loss: 1.4190821647644043
training loss: 1.3571386337280273


training:   4%|▍         | 4257/100000 [3:53:17<82:56:32,  3.12s/it]

training loss: 1.2628650665283203
training loss: 1.3416283130645752
training loss: 1.4143340587615967
training loss: 1.3435786962509155


training:   4%|▍         | 4261/100000 [3:53:30<82:51:02,  3.12s/it]

training loss: 1.2625374794006348
training loss: 1.341843605041504
training loss: 1.3348805904388428
training loss: 1.3667734861373901


training:   4%|▍         | 4265/100000 [3:53:42<82:50:07,  3.11s/it]

training loss: 1.2456670999526978
training loss: 1.10172700881958
training loss: 1.2572401762008667
training loss: 1.1222929954528809


training:   4%|▍         | 4269/100000 [3:53:55<82:52:29,  3.12s/it]

training loss: 1.4021137952804565
training loss: 1.1947263479232788
training loss: 1.1333597898483276
training loss: 1.5418964624404907


training:   4%|▍         | 4273/100000 [3:54:07<82:52:58,  3.12s/it]

training loss: 1.2176979780197144
training loss: 1.0484797954559326
training loss: 1.2418789863586426
training loss: 1.1407239437103271


training:   4%|▍         | 4277/100000 [3:54:20<82:54:36,  3.12s/it]

training loss: 1.1101409196853638
training loss: 1.1237318515777588
training loss: 1.4623055458068848
training loss: 1.182861089706421


training:   4%|▍         | 4281/100000 [3:54:32<82:52:39,  3.12s/it]

training loss: 1.476280927658081
training loss: 1.363197922706604
training loss: 1.0230293273925781
training loss: 1.1572415828704834


training:   4%|▍         | 4285/100000 [3:54:45<82:53:18,  3.12s/it]

training loss: 1.1823387145996094
training loss: 1.2803853750228882
training loss: 1.2056506872177124
training loss: 1.404207706451416


training:   4%|▍         | 4289/100000 [3:54:57<82:53:11,  3.12s/it]

training loss: 1.1951780319213867
training loss: 1.4177279472351074
training loss: 1.2025201320648193
training loss: 1.225160837173462


training:   4%|▍         | 4293/100000 [3:55:10<82:52:45,  3.12s/it]

training loss: 1.2489511966705322
training loss: 1.1225191354751587
training loss: 1.1704161167144775
training loss: 1.297655701637268


training:   4%|▍         | 4297/100000 [3:55:22<82:54:25,  3.12s/it]

training loss: 1.2401564121246338
training loss: 0.9247024059295654
training loss: 1.2455496788024902
training loss: 1.4204199314117432
training loss: 1.27655029296875


training:   4%|▍         | 4301/100000 [3:55:35<83:32:09,  3.14s/it]

validation loss: 1.3158602714538574
training loss: 1.1463820934295654
training loss: 1.218506097793579
training loss: 1.024090051651001


training:   4%|▍         | 4305/100000 [3:55:47<83:19:35,  3.13s/it]

training loss: 1.1416176557540894
training loss: 1.3247344493865967
training loss: 1.2831776142120361
training loss: 1.1928184032440186


training:   4%|▍         | 4309/100000 [3:56:00<83:11:18,  3.13s/it]

training loss: 1.213242530822754
training loss: 1.145864486694336
training loss: 1.2909845113754272
training loss: 1.313386082649231


training:   4%|▍         | 4313/100000 [3:56:12<83:06:15,  3.13s/it]

training loss: 1.3634252548217773
training loss: 1.3596246242523193
training loss: 1.206984281539917
training loss: 1.1861934661865234


training:   4%|▍         | 4317/100000 [3:56:25<83:03:58,  3.13s/it]

training loss: 1.3128745555877686
training loss: 0.9761421084403992
training loss: 1.0991014242172241
training loss: 1.2833658456802368


training:   4%|▍         | 4321/100000 [3:56:37<83:00:35,  3.12s/it]

training loss: 1.1966168880462646
training loss: 1.3736164569854736
training loss: 1.2396889925003052
training loss: 1.2551844120025635


training:   4%|▍         | 4325/100000 [3:56:50<82:58:25,  3.12s/it]

training loss: 1.180774450302124
training loss: 1.1631739139556885
training loss: 1.4597208499908447
training loss: 1.251469612121582


training:   4%|▍         | 4329/100000 [3:57:02<82:56:11,  3.12s/it]

training loss: 1.1621874570846558
training loss: 1.289874792098999
training loss: 1.6417657136917114
training loss: 1.462020993232727


training:   4%|▍         | 4333/100000 [3:57:15<82:57:04,  3.12s/it]

training loss: 1.1774396896362305
training loss: 1.3623974323272705
training loss: 1.2401654720306396
training loss: 1.2580736875534058


training:   4%|▍         | 4337/100000 [3:57:27<82:55:18,  3.12s/it]

training loss: 1.3860132694244385
training loss: 1.3054767847061157
training loss: 1.2997760772705078
training loss: 1.320046067237854


training:   4%|▍         | 4341/100000 [3:57:40<82:53:03,  3.12s/it]

training loss: 1.3691916465759277
training loss: 1.2650580406188965
training loss: 1.244847297668457
training loss: 1.1920567750930786


training:   4%|▍         | 4345/100000 [3:57:52<82:50:58,  3.12s/it]

training loss: 1.2013359069824219
training loss: 1.2085776329040527
training loss: 0.8795087337493896
training loss: 1.3707647323608398


training:   4%|▍         | 4349/100000 [3:58:04<82:48:25,  3.12s/it]

training loss: 1.1707143783569336
training loss: 1.352927803993225
training loss: 1.364882230758667
training loss: 1.4179222583770752


training:   4%|▍         | 4353/100000 [3:58:17<82:48:57,  3.12s/it]

training loss: 1.2081120014190674
training loss: 1.2717680931091309
training loss: 1.3724671602249146
training loss: 1.3383312225341797


training:   4%|▍         | 4357/100000 [3:58:29<82:49:33,  3.12s/it]

training loss: 1.1907002925872803
training loss: 1.2333065271377563
training loss: 1.3120185136795044
training loss: 1.1073862314224243


training:   4%|▍         | 4361/100000 [3:58:42<82:50:23,  3.12s/it]

training loss: 1.3756438493728638
training loss: 1.2346421480178833
training loss: 1.3256514072418213
training loss: 1.3377286195755005


training:   4%|▍         | 4365/100000 [3:58:54<82:51:45,  3.12s/it]

training loss: 1.2656242847442627
training loss: 1.3023284673690796
training loss: 1.101696491241455
training loss: 1.2425038814544678


training:   4%|▍         | 4369/100000 [3:59:07<82:54:19,  3.12s/it]

training loss: 1.3492980003356934
training loss: 1.1932111978530884
training loss: 1.3703702688217163
training loss: 1.2970123291015625


training:   4%|▍         | 4373/100000 [3:59:19<82:54:30,  3.12s/it]

training loss: 1.100449562072754
training loss: 1.2845728397369385
training loss: 1.29119074344635
training loss: 1.1321868896484375


training:   4%|▍         | 4377/100000 [3:59:32<82:53:08,  3.12s/it]

training loss: 1.1203069686889648
training loss: 1.3670806884765625
training loss: 1.3223378658294678
training loss: 1.30527925491333


training:   4%|▍         | 4381/100000 [3:59:44<82:51:52,  3.12s/it]

training loss: 0.9983225464820862
training loss: 1.2633025646209717
training loss: 1.2631947994232178
training loss: 1.3082778453826904


training:   4%|▍         | 4385/100000 [3:59:57<82:52:39,  3.12s/it]

training loss: 1.2934352159500122
training loss: 1.1995232105255127
training loss: 1.2365097999572754
training loss: 1.4226552248001099


training:   4%|▍         | 4389/100000 [4:00:09<82:52:56,  3.12s/it]

training loss: 1.2591828107833862
training loss: 1.2907299995422363
training loss: 1.274116039276123
training loss: 1.3481698036193848


training:   4%|▍         | 4393/100000 [4:00:22<82:50:48,  3.12s/it]

training loss: 1.2473602294921875
training loss: 1.1762027740478516
training loss: 1.297281265258789
training loss: 1.2290412187576294


training:   4%|▍         | 4397/100000 [4:00:34<82:47:48,  3.12s/it]

training loss: 1.426358699798584
training loss: 1.2948429584503174
training loss: 1.2705035209655762
training loss: 1.2843104600906372
training loss: 1.2981548309326172


training:   4%|▍         | 4401/100000 [4:00:47<83:24:48,  3.14s/it]

validation loss: 1.3512814044952393
training loss: 1.1910183429718018
training loss: 1.289712905883789
training loss: 1.3603849411010742


training:   4%|▍         | 4405/100000 [4:00:59<83:11:42,  3.13s/it]

training loss: 1.3175113201141357
training loss: 1.2626755237579346
training loss: 1.2227470874786377
training loss: 1.1825792789459229


training:   4%|▍         | 4409/100000 [4:01:12<83:05:45,  3.13s/it]

training loss: 1.3023239374160767
training loss: 1.419276237487793
training loss: 1.2701870203018188
training loss: 1.3246550559997559


training:   4%|▍         | 4413/100000 [4:01:24<83:00:22,  3.13s/it]

training loss: 1.2552821636199951
training loss: 1.1387965679168701
training loss: 1.231967568397522
training loss: 1.1842213869094849


training:   4%|▍         | 4417/100000 [4:01:37<82:56:43,  3.12s/it]

training loss: 1.3656092882156372
training loss: 1.3287062644958496
training loss: 1.2554521560668945
training loss: 1.3264896869659424


training:   4%|▍         | 4421/100000 [4:01:49<82:51:11,  3.12s/it]

training loss: 1.3895351886749268
training loss: 1.3766255378723145
training loss: 1.1446943283081055
training loss: 1.2594879865646362


training:   4%|▍         | 4425/100000 [4:02:02<82:47:49,  3.12s/it]

training loss: 1.1943702697753906
training loss: 1.2540510892868042
training loss: 1.2600466012954712
training loss: 1.2142667770385742


training:   4%|▍         | 4429/100000 [4:02:14<82:44:23,  3.12s/it]

training loss: 1.3531665802001953
training loss: 1.2136173248291016
training loss: 1.3833200931549072
training loss: 1.35177743434906


training:   4%|▍         | 4433/100000 [4:02:27<82:42:17,  3.12s/it]

training loss: 1.1657872200012207
training loss: 1.138622760772705
training loss: 1.2692803144454956
training loss: 1.26588773727417


training:   4%|▍         | 4437/100000 [4:02:39<82:41:12,  3.11s/it]

training loss: 1.0458428859710693
training loss: 1.1326453685760498
training loss: 1.3372361660003662
training loss: 1.1029255390167236


training:   4%|▍         | 4441/100000 [4:02:52<82:42:45,  3.12s/it]

training loss: 1.2205963134765625
training loss: 1.2349330186843872
training loss: 1.21220064163208
training loss: 1.167685627937317


training:   4%|▍         | 4445/100000 [4:03:04<82:44:16,  3.12s/it]

training loss: 0.9929584860801697
training loss: 1.0543588399887085
training loss: 1.1811774969100952
training loss: 1.1479538679122925


training:   4%|▍         | 4449/100000 [4:03:17<82:45:41,  3.12s/it]

training loss: 1.2875856161117554
training loss: 1.491077184677124
training loss: 1.10471773147583
training loss: 1.2300013303756714


training:   4%|▍         | 4453/100000 [4:03:29<82:46:37,  3.12s/it]

training loss: 1.2629557847976685
training loss: 1.1005549430847168
training loss: 1.126638650894165
training loss: 0.9816262722015381


training:   4%|▍         | 4457/100000 [4:03:42<82:46:06,  3.12s/it]

training loss: 1.2907252311706543
training loss: 1.323381781578064
training loss: 1.078903317451477
training loss: 1.2141889333724976


training:   4%|▍         | 4461/100000 [4:03:54<82:46:00,  3.12s/it]

training loss: 1.3591910600662231
training loss: 1.3068091869354248
training loss: 1.3231801986694336
training loss: 1.2207590341567993


training:   4%|▍         | 4465/100000 [4:04:06<82:45:02,  3.12s/it]

training loss: 1.4125030040740967
training loss: 1.2447843551635742
training loss: 1.298844575881958
training loss: 1.2007076740264893


training:   4%|▍         | 4469/100000 [4:04:19<82:43:10,  3.12s/it]

training loss: 1.222988486289978
training loss: 1.2198359966278076
training loss: 1.2574779987335205
training loss: 1.230528473854065


training:   4%|▍         | 4473/100000 [4:04:31<82:43:06,  3.12s/it]

training loss: 1.247288703918457
training loss: 1.1634857654571533
training loss: 1.342698335647583
training loss: 1.2777650356292725


training:   4%|▍         | 4477/100000 [4:04:44<82:43:08,  3.12s/it]

training loss: 1.4145262241363525
training loss: 1.3065789937973022
training loss: 1.274601697921753
training loss: 1.250144124031067


training:   4%|▍         | 4481/100000 [4:04:56<82:43:06,  3.12s/it]

training loss: 1.3890647888183594
training loss: 1.370233416557312
training loss: 1.1451847553253174
training loss: 0.9220697283744812


training:   4%|▍         | 4485/100000 [4:05:09<82:43:36,  3.12s/it]

training loss: 1.2379209995269775
training loss: 1.447009563446045
training loss: 1.2322137355804443
training loss: 1.2041263580322266


training:   4%|▍         | 4489/100000 [4:05:21<82:44:29,  3.12s/it]

training loss: 1.3065035343170166
training loss: 1.4020572900772095
training loss: 1.2877901792526245
training loss: 1.3175631761550903


training:   4%|▍         | 4493/100000 [4:05:34<82:47:37,  3.12s/it]

training loss: 1.3281455039978027
training loss: 1.439374327659607
training loss: 1.186812400817871
training loss: 1.2605924606323242


training:   4%|▍         | 4497/100000 [4:05:46<82:46:35,  3.12s/it]

training loss: 1.037712574005127
training loss: 1.244585633277893
training loss: 1.1629523038864136
training loss: 1.2598921060562134
training loss: 1.3304601907730103
validation loss: 1.248845100402832
%s 

 %s ("d Kingdom)|Rear Admiral of the Blue]], the ninth highest rank in the Royal Navy. Later in the year, during an unsuccessful expedition to conquer [[Santa Cruz de Tenerife]], he was shot in the right arm with a musket ball, fracturing his [[humerus]] bone in multiple places. Since medical science of the day counseled amputation for almost all serious limb wounds (to prevent gangrene, and subsequent death) Nelson lost almost his entire right arm, and was unfit for duty until mid-December. He referred to the stub as &quot;my fin.&quot;  This was not his only reverse. In December 1796, on leaving [[Elba]] for [[Gibraltar]], Nelson transferred his flag to the [[frigate]] ''Minerve'' (of French construction, commanded by Captain Cockburn). A Spanish frigate, ''Santa Sabina'', was ca

training:   4%|▍         | 4497/100000 [4:06:01<82:46:35,  3.12s/it]

e Command Spain in he wed at7ly slown from Elsethbs and imprisoned them. Decembered part under confuscabing the councidered cycle science in an invition; [[Utlowed]], [[J.jpg|onstition|Generican Southwest]], and sold pictures tense.  Zomen, until this, during that highly exert, and once two [[Ferbwin 118 Learne1]] in one of a solid-personium that the mineroist common calling ''Golli Claurea'' and the continentary above. Accident the [[Batta Novejo]] written below's death for the other Federal Common [[Pewelli Inc.]]; Highly now epicted in the Royal Region of Never meaning over the Federal of January the region to the Unspecies Iven Pinal Member 1, [[1705 Minist Its Ancient Union|Balmon]] [[Basing of JFW Federa]], [[Eukov's Due Tablic African Monton's effective [[opina of Oncement O. visiÃ³]]. For a specific for African most Colin Wesley was not feathern elected a costume of [[Amaki Nomizations]] was upriested to the [[Argentino of Third Unit Mied.]] lived to the [[cover-federals|Euris,

training:   5%|▍         | 4501/100000 [4:07:18<240:04:27,  9.05s/it]

Model saved at iteration 4500
training loss: 1.2433903217315674
training loss: 1.2274961471557617
training loss: 1.3713243007659912


training:   5%|▍         | 4505/100000 [4:07:30<192:47:34,  7.27s/it]

training loss: 1.2781786918640137
training loss: 1.1399198770523071
training loss: 1.2791788578033447
training loss: 1.3296921253204346


training:   5%|▍         | 4509/100000 [4:07:43<159:46:26,  6.02s/it]

training loss: 1.2727129459381104
training loss: 1.2372097969055176
training loss: 1.3789124488830566
training loss: 1.162672996520996


training:   5%|▍         | 4513/100000 [4:07:55<136:41:27,  5.15s/it]

training loss: 1.209442138671875
training loss: 1.1738641262054443
training loss: 1.194892168045044
training loss: 1.1351001262664795


training:   5%|▍         | 4517/100000 [4:08:08<120:36:50,  4.55s/it]

training loss: 1.1072534322738647
training loss: 1.2511813640594482
training loss: 1.3920650482177734
training loss: 1.1604928970336914


training:   5%|▍         | 4521/100000 [4:08:20<109:16:35,  4.12s/it]

training loss: 1.0625417232513428
training loss: 1.2711308002471924
training loss: 1.2490289211273193
training loss: 1.2507504224777222


training:   5%|▍         | 4525/100000 [4:08:33<101:19:43,  3.82s/it]

training loss: 1.2417538166046143
training loss: 1.0909409523010254
training loss: 1.2486169338226318
training loss: 1.298352837562561


training:   5%|▍         | 4529/100000 [4:08:45<95:44:06,  3.61s/it] 

training loss: 1.3368223905563354
training loss: 1.4174387454986572
training loss: 1.2805204391479492
training loss: 1.2057991027832031


training:   5%|▍         | 4533/100000 [4:08:58<91:50:43,  3.46s/it]

training loss: 1.2016592025756836
training loss: 1.2900277376174927
training loss: 1.2516670227050781
training loss: 1.4176150560379028


training:   5%|▍         | 4537/100000 [4:09:10<89:06:09,  3.36s/it]

training loss: 1.0498536825180054
training loss: 1.2509161233901978
training loss: 1.007902979850769
training loss: 1.2601324319839478


training:   5%|▍         | 4541/100000 [4:09:23<87:10:03,  3.29s/it]

training loss: 1.2792296409606934
training loss: 1.0769963264465332
training loss: 1.3844108581542969
training loss: 1.2675623893737793


training:   5%|▍         | 4545/100000 [4:09:35<85:49:27,  3.24s/it]

training loss: 1.025364637374878
training loss: 1.1955177783966064
training loss: 1.3411264419555664
training loss: 1.1742819547653198


training:   5%|▍         | 4549/100000 [4:09:48<84:52:49,  3.20s/it]

training loss: 1.166609764099121
training loss: 1.130753993988037
training loss: 1.2335470914840698
training loss: 1.491908073425293


training:   5%|▍         | 4553/100000 [4:10:00<84:14:34,  3.18s/it]

training loss: 1.246074914932251
training loss: 1.1308188438415527
training loss: 1.1951848268508911
training loss: 1.2247051000595093


training:   5%|▍         | 4557/100000 [4:10:13<83:47:06,  3.16s/it]

training loss: 1.1381127834320068
training loss: 1.2980676889419556
training loss: 1.443685531616211
training loss: 1.2473050355911255


training:   5%|▍         | 4561/100000 [4:10:25<83:27:12,  3.15s/it]

training loss: 1.1484243869781494
training loss: 1.2600507736206055
training loss: 0.8869434595108032
training loss: 1.199554443359375


training:   5%|▍         | 4565/100000 [4:10:38<83:12:51,  3.14s/it]

training loss: 1.3186957836151123
training loss: 1.3085500001907349
training loss: 1.2167632579803467
training loss: 1.3367974758148193


training:   5%|▍         | 4569/100000 [4:10:50<83:02:42,  3.13s/it]

training loss: 1.3028504848480225
training loss: 1.205622911453247
training loss: 1.265476942062378
training loss: 1.1628429889678955


training:   5%|▍         | 4573/100000 [4:11:02<82:55:51,  3.13s/it]

training loss: 1.2344807386398315
training loss: 0.8849417567253113
training loss: 1.2024306058883667
training loss: 1.1886537075042725


training:   5%|▍         | 4577/100000 [4:11:15<82:50:49,  3.13s/it]

training loss: 1.297621726989746
training loss: 1.2641924619674683
training loss: 1.245859146118164
training loss: 1.2688875198364258


training:   5%|▍         | 4581/100000 [4:11:27<82:47:06,  3.12s/it]

training loss: 1.156848430633545
training loss: 1.0487725734710693
training loss: 1.3022875785827637
training loss: 1.2300665378570557


training:   5%|▍         | 4585/100000 [4:11:40<82:43:59,  3.12s/it]

training loss: 1.3825733661651611
training loss: 1.169525146484375
training loss: 1.3710429668426514
training loss: 1.1295945644378662


training:   5%|▍         | 4589/100000 [4:11:52<82:42:19,  3.12s/it]

training loss: 1.253847599029541
training loss: 1.2698462009429932
training loss: 1.233335018157959
training loss: 1.114571452140808


training:   5%|▍         | 4593/100000 [4:12:05<82:38:47,  3.12s/it]

training loss: 1.2860462665557861
training loss: 1.357144832611084
training loss: 1.1247950792312622
training loss: 1.2996577024459839


training:   5%|▍         | 4597/100000 [4:12:17<82:37:33,  3.12s/it]

training loss: 1.1469659805297852
training loss: 1.2275391817092896
training loss: 1.1039658784866333
training loss: 1.2602325677871704
training loss: 1.2065876722335815


training:   5%|▍         | 4601/100000 [4:12:30<83:14:23,  3.14s/it]

validation loss: 1.4019217491149902
training loss: 1.3292006254196167
training loss: 1.208647608757019
training loss: 1.060011863708496


training:   5%|▍         | 4605/100000 [4:12:43<83:01:20,  3.13s/it]

training loss: 1.2242012023925781
training loss: 1.2317423820495605
training loss: 1.1847466230392456
training loss: 1.1145070791244507


training:   5%|▍         | 4609/100000 [4:12:55<82:54:19,  3.13s/it]

training loss: 1.3177841901779175
training loss: 1.287483811378479
training loss: 1.2975208759307861
training loss: 1.308607816696167


training:   5%|▍         | 4613/100000 [4:13:07<82:49:41,  3.13s/it]

training loss: 1.104838490486145
training loss: 1.3144409656524658
training loss: 1.1359715461730957
training loss: 1.4400824308395386


training:   5%|▍         | 4617/100000 [4:13:20<82:46:20,  3.12s/it]

training loss: 1.5306637287139893
training loss: 1.3209645748138428
training loss: 1.3254609107971191
training loss: 1.2841055393218994


training:   5%|▍         | 4621/100000 [4:13:32<82:45:11,  3.12s/it]

training loss: 1.0939348936080933
training loss: 1.2913718223571777
training loss: 1.312058448791504
training loss: 1.232303500175476


training:   5%|▍         | 4625/100000 [4:13:45<82:42:27,  3.12s/it]

training loss: 1.1966325044631958
training loss: 1.140629768371582
training loss: 1.13546884059906
training loss: 1.3248471021652222


training:   5%|▍         | 4629/100000 [4:13:57<82:40:19,  3.12s/it]

training loss: 1.3143655061721802
training loss: 1.2334811687469482
training loss: 1.195671796798706
training loss: 1.2853355407714844


training:   5%|▍         | 4633/100000 [4:14:10<82:39:31,  3.12s/it]

training loss: 1.2986228466033936
training loss: 1.1001601219177246
training loss: 1.2958571910858154
training loss: 1.3257801532745361


training:   5%|▍         | 4637/100000 [4:14:22<82:36:06,  3.12s/it]

training loss: 1.0881115198135376
training loss: 1.5524471998214722
training loss: 1.2918745279312134
training loss: 1.2277427911758423


training:   5%|▍         | 4641/100000 [4:14:35<82:33:01,  3.12s/it]

training loss: 1.3037773370742798
training loss: 0.915346086025238
training loss: 1.3893533945083618
training loss: 1.2045252323150635


training:   5%|▍         | 4645/100000 [4:14:47<82:32:08,  3.12s/it]

training loss: 1.1990665197372437
training loss: 1.1921919584274292
training loss: 1.230930209159851
training loss: 1.3598663806915283


training:   5%|▍         | 4649/100000 [4:15:00<82:33:23,  3.12s/it]

training loss: 1.2115552425384521
training loss: 1.251501441001892
training loss: 1.0913230180740356
training loss: 1.2740784883499146


training:   5%|▍         | 4653/100000 [4:15:12<82:34:13,  3.12s/it]

training loss: 1.3621360063552856
training loss: 1.2073594331741333
training loss: 1.3898606300354004
training loss: 1.105994462966919


training:   5%|▍         | 4657/100000 [4:15:25<82:31:24,  3.12s/it]

training loss: 1.0463061332702637
training loss: 1.328737735748291
training loss: 1.2592616081237793
training loss: 1.236289143562317


training:   5%|▍         | 4661/100000 [4:15:37<82:33:26,  3.12s/it]

training loss: 0.9604610204696655
training loss: 1.2589596509933472
training loss: 0.9751523733139038
training loss: 1.2242612838745117


training:   5%|▍         | 4665/100000 [4:15:50<82:35:01,  3.12s/it]

training loss: 1.26890230178833
training loss: 1.20621657371521
training loss: 1.1305320262908936
training loss: 1.3789540529251099


training:   5%|▍         | 4669/100000 [4:16:02<82:34:27,  3.12s/it]

training loss: 1.2623752355575562
training loss: 1.2221983671188354
training loss: 1.3609524965286255
training loss: 1.2216049432754517


training:   5%|▍         | 4673/100000 [4:16:15<82:34:19,  3.12s/it]

training loss: 1.1754003763198853
training loss: 1.3022221326828003
training loss: 0.8895875215530396
training loss: 1.305954933166504


training:   5%|▍         | 4677/100000 [4:16:27<82:31:42,  3.12s/it]

training loss: 1.220998764038086
training loss: 1.130224585533142
training loss: 1.2186229228973389
training loss: 1.2750144004821777


training:   5%|▍         | 4681/100000 [4:16:39<82:27:51,  3.11s/it]

training loss: 1.2807362079620361
training loss: 1.2794411182403564
training loss: 1.2074880599975586
training loss: 1.3295518159866333


training:   5%|▍         | 4685/100000 [4:16:52<82:28:35,  3.12s/it]

training loss: 1.1435117721557617
training loss: 1.1638703346252441
training loss: 1.2775399684906006
training loss: 1.265782356262207


training:   5%|▍         | 4689/100000 [4:17:04<82:30:49,  3.12s/it]

training loss: 1.0959808826446533
training loss: 1.2513233423233032
training loss: 1.2509933710098267
training loss: 1.04746413230896


training:   5%|▍         | 4693/100000 [4:17:17<82:31:31,  3.12s/it]

training loss: 1.1478374004364014
training loss: 1.3081790208816528
training loss: 1.3598251342773438
training loss: 1.1662046909332275


training:   5%|▍         | 4697/100000 [4:17:29<82:31:48,  3.12s/it]

training loss: 1.2685784101486206
training loss: 1.3714979887008667
training loss: 1.3154819011688232
training loss: 1.2217981815338135


training:   5%|▍         | 4697/100000 [4:17:41<82:31:48,  3.12s/it]

training loss: 1.118179202079773


training:   5%|▍         | 4701/100000 [4:17:42<83:08:11,  3.14s/it]

validation loss: 1.135229229927063
training loss: 1.318681001663208
training loss: 1.1751508712768555
training loss: 1.3166953325271606


training:   5%|▍         | 4705/100000 [4:17:55<82:56:40,  3.13s/it]

training loss: 1.3685357570648193
training loss: 1.2445954084396362
training loss: 1.2495951652526855
training loss: 1.2187902927398682


training:   5%|▍         | 4709/100000 [4:18:07<82:48:26,  3.13s/it]

training loss: 1.3222041130065918
training loss: 1.206562876701355
training loss: 1.3083322048187256
training loss: 1.3120934963226318


training:   5%|▍         | 4713/100000 [4:18:20<82:43:34,  3.13s/it]

training loss: 1.1578457355499268
training loss: 1.175048589706421
training loss: 1.238086462020874
training loss: 1.210322380065918


training:   5%|▍         | 4717/100000 [4:18:32<82:40:11,  3.12s/it]

training loss: 1.2686865329742432
training loss: 1.3038463592529297
training loss: 1.3574786186218262
training loss: 1.2393479347229004


training:   5%|▍         | 4721/100000 [4:18:44<82:37:55,  3.12s/it]

training loss: 1.261974573135376
training loss: 1.3555793762207031
training loss: 1.3921473026275635
training loss: 1.1679284572601318


training:   5%|▍         | 4725/100000 [4:18:57<82:34:09,  3.12s/it]

training loss: 1.2649412155151367
training loss: 1.238358974456787
training loss: 1.203545093536377
training loss: 1.6915467977523804


training:   5%|▍         | 4729/100000 [4:19:09<82:32:34,  3.12s/it]

training loss: 1.2802085876464844
training loss: 1.273604393005371
training loss: 1.3065863847732544
training loss: 1.150176763534546


training:   5%|▍         | 4733/100000 [4:19:22<82:31:34,  3.12s/it]

training loss: 1.033753514289856
training loss: 1.0654608011245728
training loss: 1.3026589155197144
training loss: 1.161356806755066


training:   5%|▍         | 4737/100000 [4:19:34<82:29:45,  3.12s/it]

training loss: 1.3300368785858154
training loss: 1.2188429832458496
training loss: 1.1590144634246826
training loss: 1.1600428819656372


training:   5%|▍         | 4741/100000 [4:19:47<82:29:48,  3.12s/it]

training loss: 1.1790261268615723
training loss: 1.0813498497009277
training loss: 1.2596373558044434
training loss: 1.2721389532089233


training:   5%|▍         | 4745/100000 [4:19:59<82:29:50,  3.12s/it]

training loss: 1.27898371219635
training loss: 1.1496368646621704
training loss: 1.2595467567443848
training loss: 1.2143115997314453


training:   5%|▍         | 4749/100000 [4:20:12<82:29:29,  3.12s/it]

training loss: 1.1727261543273926
training loss: 1.3525769710540771
training loss: 1.042236328125
training loss: 1.1426681280136108


training:   5%|▍         | 4753/100000 [4:20:24<82:30:05,  3.12s/it]

training loss: 1.273062825202942
training loss: 1.2480857372283936
training loss: 1.0864875316619873
training loss: 1.2456899881362915


training:   5%|▍         | 4757/100000 [4:20:37<82:29:41,  3.12s/it]

training loss: 1.0922493934631348
training loss: 1.1716171503067017
training loss: 1.2712409496307373
training loss: 1.3177021741867065


training:   5%|▍         | 4761/100000 [4:20:49<82:29:41,  3.12s/it]

training loss: 1.2754268646240234
training loss: 0.9865444898605347
training loss: 1.3825345039367676
training loss: 1.1810883283615112


training:   5%|▍         | 4765/100000 [4:21:02<82:29:44,  3.12s/it]

training loss: 1.0864557027816772
training loss: 1.2216131687164307
training loss: 1.148306131362915
training loss: 1.27799391746521


training:   5%|▍         | 4769/100000 [4:21:14<82:30:27,  3.12s/it]

training loss: 1.1974356174468994
training loss: 1.0699551105499268
training loss: 1.4701037406921387
training loss: 1.2936092615127563


training:   5%|▍         | 4773/100000 [4:21:27<82:30:56,  3.12s/it]

training loss: 1.2658907175064087
training loss: 1.1424795389175415
training loss: 1.2926701307296753
training loss: 1.1690750122070312


training:   5%|▍         | 4777/100000 [4:21:39<82:30:19,  3.12s/it]

training loss: 1.021038293838501
training loss: 1.332984447479248
training loss: 1.1589863300323486
training loss: 1.2749884128570557


training:   5%|▍         | 4781/100000 [4:21:52<82:29:31,  3.12s/it]

training loss: 1.2711260318756104
training loss: 1.223818302154541
training loss: 1.2657923698425293
training loss: 1.2906723022460938


training:   5%|▍         | 4785/100000 [4:22:04<82:29:08,  3.12s/it]

training loss: 0.9184356927871704
training loss: 1.200870394706726
training loss: 1.1785011291503906
training loss: 1.150650143623352


training:   5%|▍         | 4789/100000 [4:22:17<82:31:34,  3.12s/it]

training loss: 1.3352015018463135
training loss: 1.2573274374008179
training loss: 1.281509518623352
training loss: 1.2558284997940063


training:   5%|▍         | 4793/100000 [4:22:29<82:32:19,  3.12s/it]

training loss: 1.2484526634216309
training loss: 1.272899866104126
training loss: 1.223541021347046
training loss: 1.3707079887390137


training:   5%|▍         | 4797/100000 [4:22:41<82:31:04,  3.12s/it]

training loss: 1.0774765014648438
training loss: 1.1925084590911865
training loss: 1.1030476093292236
training loss: 1.1721101999282837
training loss: 1.2010279893875122


training:   5%|▍         | 4801/100000 [4:22:54<83:05:44,  3.14s/it]

validation loss: 1.113546371459961
training loss: 1.2055895328521729
training loss: 1.2309439182281494
training loss: 1.1280282735824585


training:   5%|▍         | 4805/100000 [4:23:07<82:52:23,  3.13s/it]

training loss: 1.0959537029266357
training loss: 1.173989176750183
training loss: 1.3073397874832153
training loss: 1.1414185762405396


training:   5%|▍         | 4809/100000 [4:23:19<82:44:38,  3.13s/it]

training loss: 1.271596908569336
training loss: 1.1796115636825562
training loss: 1.1504809856414795
training loss: 1.411854863166809


training:   5%|▍         | 4813/100000 [4:23:32<82:37:52,  3.13s/it]

training loss: 1.253739595413208
training loss: 1.206956386566162
training loss: 1.4286566972732544
training loss: 1.2917119264602661


training:   5%|▍         | 4817/100000 [4:23:44<82:33:05,  3.12s/it]

training loss: 1.3236918449401855
training loss: 1.1612948179244995
training loss: 1.187060832977295
training loss: 1.2054978609085083


training:   5%|▍         | 4821/100000 [4:23:57<82:30:44,  3.12s/it]

training loss: 1.2197303771972656
training loss: 1.2508838176727295
training loss: 1.0765433311462402
training loss: 0.9876847267150879


training:   5%|▍         | 4825/100000 [4:24:09<82:29:32,  3.12s/it]

training loss: 1.011969804763794
training loss: 1.2155053615570068
training loss: 1.306175708770752
training loss: 1.1503448486328125


training:   5%|▍         | 4829/100000 [4:24:22<82:25:39,  3.12s/it]

training loss: 1.2236905097961426
training loss: 1.2225515842437744
training loss: 1.3049595355987549
training loss: 1.1545872688293457


training:   5%|▍         | 4833/100000 [4:24:34<82:21:32,  3.12s/it]

training loss: 0.9893121719360352
training loss: 1.2017362117767334
training loss: 1.238113284111023
training loss: 1.3196197748184204


training:   5%|▍         | 4837/100000 [4:24:46<82:22:04,  3.12s/it]

training loss: 1.3577353954315186
training loss: 1.268958568572998
training loss: 1.3405370712280273
training loss: 1.1874505281448364


training:   5%|▍         | 4841/100000 [4:24:59<82:20:34,  3.12s/it]

training loss: 1.2926771640777588
training loss: 1.2841442823410034
training loss: 1.2356468439102173
training loss: 1.2200372219085693


training:   5%|▍         | 4845/100000 [4:25:11<82:22:00,  3.12s/it]

training loss: 1.0843534469604492
training loss: 1.1687440872192383
training loss: 1.1734113693237305
training loss: 1.235948920249939


training:   5%|▍         | 4849/100000 [4:25:24<82:23:28,  3.12s/it]

training loss: 1.1760884523391724
training loss: 1.2391031980514526
training loss: 1.133284091949463
training loss: 1.3383166790008545


training:   5%|▍         | 4853/100000 [4:25:36<82:24:22,  3.12s/it]

training loss: 1.008840560913086
training loss: 1.2045562267303467
training loss: 1.0084844827651978
training loss: 1.2062548398971558


training:   5%|▍         | 4857/100000 [4:25:49<82:25:48,  3.12s/it]

training loss: 1.3296401500701904
training loss: 1.232410192489624
training loss: 1.2265385389328003
training loss: 1.215935468673706


training:   5%|▍         | 4861/100000 [4:26:01<82:26:05,  3.12s/it]

training loss: 1.2582653760910034
training loss: 1.1786925792694092
training loss: 1.1052957773208618
training loss: 1.2189439535140991


training:   5%|▍         | 4865/100000 [4:26:14<82:26:02,  3.12s/it]

training loss: 1.155869960784912
training loss: 1.1983705759048462
training loss: 1.0753744840621948
training loss: 1.3599207401275635


training:   5%|▍         | 4869/100000 [4:26:26<82:25:12,  3.12s/it]

training loss: 1.2670704126358032
training loss: 1.206217646598816
training loss: 1.205617904663086
training loss: 1.1409770250320435


training:   5%|▍         | 4873/100000 [4:26:39<82:25:21,  3.12s/it]

training loss: 1.2164952754974365
training loss: 1.2864259481430054
training loss: 1.3216825723648071
training loss: 1.2419490814208984


training:   5%|▍         | 4877/100000 [4:26:51<82:22:57,  3.12s/it]

training loss: 1.343517780303955
training loss: 1.208794116973877
training loss: 1.2103227376937866
training loss: 1.0114902257919312


training:   5%|▍         | 4881/100000 [4:27:04<82:24:28,  3.12s/it]

training loss: 1.1115301847457886
training loss: 1.2082792520523071
training loss: 1.230836272239685
training loss: 1.3179658651351929


training:   5%|▍         | 4885/100000 [4:27:16<82:24:11,  3.12s/it]

training loss: 1.2537965774536133
training loss: 1.297544002532959
training loss: 1.172489881515503
training loss: 1.3258495330810547


training:   5%|▍         | 4889/100000 [4:27:29<82:23:27,  3.12s/it]

training loss: 1.025561809539795
training loss: 1.229669213294983
training loss: 1.0997915267944336
training loss: 1.1405143737792969


training:   5%|▍         | 4893/100000 [4:27:41<82:23:22,  3.12s/it]

training loss: 1.189021110534668
training loss: 1.1749694347381592
training loss: 1.1951104402542114
training loss: 1.3120718002319336


training:   5%|▍         | 4897/100000 [4:27:54<82:23:34,  3.12s/it]

training loss: 1.159801959991455
training loss: 1.2203660011291504
training loss: 1.082422137260437
training loss: 1.2941335439682007
training loss: 1.1291453838348389


training:   5%|▍         | 4901/100000 [4:28:06<83:00:32,  3.14s/it]

validation loss: 1.1975839138031006
training loss: 1.2283321619033813
training loss: 1.1152889728546143
training loss: 1.1650562286376953


training:   5%|▍         | 4905/100000 [4:28:19<82:47:17,  3.13s/it]

training loss: 1.269616723060608
training loss: 1.2381737232208252
training loss: 1.3121130466461182
training loss: 1.0955042839050293


training:   5%|▍         | 4909/100000 [4:28:31<82:37:01,  3.13s/it]

training loss: 1.3162004947662354
training loss: 1.0172488689422607
training loss: 1.2904062271118164
training loss: 0.911568820476532


training:   5%|▍         | 4913/100000 [4:28:44<82:32:07,  3.12s/it]

training loss: 1.2289330959320068
training loss: 1.2052909135818481
training loss: 1.2194615602493286
training loss: 1.3514900207519531


training:   5%|▍         | 4917/100000 [4:28:56<82:26:57,  3.12s/it]

training loss: 1.2286055088043213
training loss: 0.8261909484863281
training loss: 1.0965023040771484
training loss: 1.3006460666656494


training:   5%|▍         | 4921/100000 [4:29:09<82:24:29,  3.12s/it]

training loss: 1.2461267709732056
training loss: 1.2960689067840576
training loss: 1.2795190811157227
training loss: 1.130873680114746


training:   5%|▍         | 4925/100000 [4:29:21<82:22:38,  3.12s/it]

training loss: 1.1045178174972534
training loss: 1.2191599607467651
training loss: 1.1176867485046387
training loss: 1.4286651611328125


training:   5%|▍         | 4929/100000 [4:29:34<82:19:25,  3.12s/it]

training loss: 1.2651629447937012
training loss: 1.2858788967132568
training loss: 1.2973895072937012
training loss: 1.121215581893921


training:   5%|▍         | 4933/100000 [4:29:46<82:17:14,  3.12s/it]

training loss: 1.2748425006866455
training loss: 1.3213902711868286
training loss: 1.2090758085250854
training loss: 1.1205546855926514


training:   5%|▍         | 4937/100000 [4:29:58<82:18:22,  3.12s/it]

training loss: 1.183262825012207
training loss: 1.4901989698410034
training loss: 1.2910939455032349
training loss: 1.0888447761535645


training:   5%|▍         | 4941/100000 [4:30:11<82:21:31,  3.12s/it]

training loss: 1.2755025625228882
training loss: 1.070476770401001
training loss: 1.1895952224731445
training loss: 1.2305103540420532


training:   5%|▍         | 4945/100000 [4:30:23<82:18:09,  3.12s/it]

training loss: 1.4080393314361572
training loss: 1.4203522205352783
training loss: 1.090425729751587
training loss: 1.290618658065796


training:   5%|▍         | 4949/100000 [4:30:36<82:16:44,  3.12s/it]

training loss: 1.1500803232192993
training loss: 1.144069790840149
training loss: 1.144863486289978
training loss: 1.3131678104400635


training:   5%|▍         | 4953/100000 [4:30:48<82:17:33,  3.12s/it]

training loss: 1.230846643447876
training loss: 0.9672524929046631
training loss: 1.2475297451019287
training loss: 1.0179805755615234


training:   5%|▍         | 4957/100000 [4:31:01<82:15:49,  3.12s/it]

training loss: 1.1307339668273926
training loss: 1.4097967147827148
training loss: 1.2722477912902832
training loss: 1.3067865371704102


training:   5%|▍         | 4961/100000 [4:31:13<82:16:52,  3.12s/it]

training loss: 1.2511038780212402
training loss: 1.1074657440185547
training loss: 1.0787134170532227
training loss: 1.2691137790679932


training:   5%|▍         | 4965/100000 [4:31:26<82:21:34,  3.12s/it]

training loss: 1.2524598836898804
training loss: 1.0461559295654297
training loss: 1.1373071670532227
training loss: 1.191433310508728


training:   5%|▍         | 4969/100000 [4:31:38<82:21:51,  3.12s/it]

training loss: 0.9609374403953552
training loss: 1.2637330293655396
training loss: 1.280609130859375
training loss: 1.269120216369629


training:   5%|▍         | 4973/100000 [4:31:51<82:20:07,  3.12s/it]

training loss: 1.2509753704071045
training loss: 1.278672218322754
training loss: 1.2363934516906738
training loss: 1.3114862442016602


training:   5%|▍         | 4977/100000 [4:32:03<82:19:12,  3.12s/it]

training loss: 1.273850440979004
training loss: 1.0756322145462036
training loss: 1.1111189126968384
training loss: 1.1912829875946045


training:   5%|▍         | 4981/100000 [4:32:16<82:16:33,  3.12s/it]

training loss: 1.3356966972351074
training loss: 1.4402987957000732
training loss: 1.184606671333313
training loss: 1.2862359285354614


training:   5%|▍         | 4985/100000 [4:32:28<82:09:31,  3.11s/it]

training loss: 1.1943849325180054
training loss: 1.267389178276062
training loss: 0.8916258811950684
training loss: 1.0025800466537476


training:   5%|▍         | 4989/100000 [4:32:41<82:06:31,  3.11s/it]

training loss: 1.0349842309951782
training loss: 1.1727311611175537
training loss: 0.8897700309753418
training loss: 1.1584243774414062


training:   5%|▍         | 4993/100000 [4:32:53<82:09:39,  3.11s/it]

training loss: 1.2365097999572754
training loss: 0.9410778284072876
training loss: 1.2455124855041504
training loss: 1.2287638187408447


training:   5%|▍         | 4997/100000 [4:33:05<82:12:24,  3.12s/it]

training loss: 1.136096715927124
training loss: 1.104694128036499
training loss: 1.077134370803833
training loss: 1.2916213274002075
training loss: 1.3403401374816895
validation loss: 0.988419234752655
%s 

 %s ('has]] [[hu:Idaho]] [[mk:Ð\x90Ñ\x98Ð´Ð°Ñ\x85Ð¾]] [[nl:Idaho]] [[ja:ã\x82¢ã\x82¤ã\x83\x80ã\x83\x9bå·\x9e]] [[no:Idaho]] [[nn:Idaho]] [[os:Ð\x90Ð¹Ð´Ð°Ñ\x85Ð¾]] [[pl:Idaho]] [[pt:Idaho]] [[ru:Ð\x90Ð¹Ð´Ð°Ñ\x85Ð¾]] [[sq:Idaho]] [[simple:Idaho]] [[sl:Idaho]] [[sr:Ð\x90Ñ\x98Ð´Ð°Ñ\x85Ð¾]] [[fi:Idaho]] [[sv:Idaho]] [[th:à¸¡à¸¥à¸£à¸±à¸\x90à¹\x84à¸\xadà¸\x94à¸²à¹\x82à¸®]] [[uk:Ð\x90Ð¹Ð´Ð°Ñ\x85Ð¾]] [[zh:æ\x84\x9bå¾·è\x8d·å·\x9e]]</text>     </revision>   </page>   <page>     <title>-ism</title>     <id>14608</id>     <revision>       <id>39979982</id>       <timestamp>2006-02-17T06:15:06Z</timestamp>       <contributor>         <username>Elthon73</username>         <id>764879</id>       </contributor>       <text xml:space="preserve">{{Otheruses3|ISM}} &lt;!--needed because [[Ism]] redirec

training:   5%|▍         | 4997/100000 [4:33:21<82:12:24,  3.12s/it]

 ''Harohaise'', from [[Oranguages]], wherea'', hap the automosome of [[Carfield (Ignatura)]]. Andelesrael english had used these rives, where here-working automost had during thems uncerstate longs every at practice, a laiding of the  stories is liked by the [[additional deathboarch]]s show a [[Gorden (and the Voint)]], but interest in sound males.   Personality, which made increased by gestined a &quot;distinctive-fine-entral will be &quot;P-up by the mainflow of these best act officials&quot; (&quot;way restrality&quot; was possible that; and a more invention until the UK part of Italian festion effects. The caused also be renaired part of the American system as carnivolors. The English fin't self-is rannan, prime principle as a lominance which all call on begin, so the resumptions and difference and similar [[engineling lbel]]s.  A construes from a  Holy 2003 the [[Jewish lead]] in 1767, then the [[Limer Naques of ''Unberggisco Wah I was essensible in Garench years. The first has sa

training:   5%|▌         | 5001/100000 [4:34:37<238:57:19,  9.06s/it]

Model saved at iteration 5000
training loss: 1.2318227291107178
training loss: 1.2507306337356567
training loss: 0.8564969897270203


training:   5%|▌         | 5005/100000 [4:34:50<191:49:03,  7.27s/it]

training loss: 1.0783053636550903
training loss: 1.0225176811218262
training loss: 1.2564432621002197
training loss: 1.2510157823562622


training:   5%|▌         | 5009/100000 [4:35:02<158:55:07,  6.02s/it]

training loss: 1.122753620147705
training loss: 1.2622294425964355
training loss: 1.2087517976760864
training loss: 1.1593161821365356


training:   5%|▌         | 5013/100000 [4:35:14<135:55:31,  5.15s/it]

training loss: 1.2768150568008423
training loss: 1.3532735109329224
training loss: 1.216071367263794
training loss: 1.196932077407837


training:   5%|▌         | 5017/100000 [4:35:27<119:49:44,  4.54s/it]

training loss: 1.2671055793762207
training loss: 1.2826241254806519
training loss: 1.2992182970046997
training loss: 1.1405930519104004


training:   5%|▌         | 5021/100000 [4:35:39<108:33:26,  4.11s/it]

training loss: 1.1168677806854248
training loss: 1.1412484645843506
training loss: 1.2530951499938965
training loss: 1.304481863975525


training:   5%|▌         | 5025/100000 [4:35:52<100:40:33,  3.82s/it]

training loss: 1.2289848327636719
training loss: 1.24808931350708
training loss: 1.255488634109497
training loss: 1.4660179615020752


training:   5%|▌         | 5029/100000 [4:36:04<95:11:00,  3.61s/it] 

training loss: 1.4374455213546753
training loss: 1.2065397500991821
training loss: 1.1548020839691162
training loss: 1.2033209800720215


training:   5%|▌         | 5033/100000 [4:36:17<91:17:39,  3.46s/it]

training loss: 1.289067029953003
training loss: 1.3644273281097412
training loss: 1.2692625522613525
training loss: 1.212642788887024


training:   5%|▌         | 5037/100000 [4:36:29<88:34:29,  3.36s/it]

training loss: 1.1493642330169678
training loss: 1.2901570796966553
training loss: 1.1929715871810913
training loss: 1.3462663888931274


training:   5%|▌         | 5041/100000 [4:36:42<86:38:42,  3.28s/it]

training loss: 1.221084475517273
training loss: 1.2276945114135742
training loss: 1.2419328689575195
training loss: 1.4600203037261963


training:   5%|▌         | 5045/100000 [4:36:54<85:17:20,  3.23s/it]

training loss: 1.250842809677124
training loss: 1.111686110496521
training loss: 1.3434194326400757
training loss: 1.315925121307373


training:   5%|▌         | 5049/100000 [4:37:07<84:23:31,  3.20s/it]

training loss: 1.1476874351501465
training loss: 0.9156028032302856
training loss: 1.3683451414108276
training loss: 1.3189384937286377


training:   5%|▌         | 5053/100000 [4:37:19<83:40:48,  3.17s/it]

training loss: 1.2245550155639648
training loss: 1.178451418876648
training loss: 1.23155677318573
training loss: 1.2277178764343262


training:   5%|▌         | 5057/100000 [4:37:32<83:13:05,  3.16s/it]

training loss: 1.173119306564331
training loss: 1.3449124097824097
training loss: 1.3314069509506226
training loss: 1.318233609199524


training:   5%|▌         | 5061/100000 [4:37:44<82:55:05,  3.14s/it]

training loss: 1.1528756618499756
training loss: 1.25980544090271
training loss: 1.0840861797332764
training loss: 1.1343944072723389


training:   5%|▌         | 5065/100000 [4:37:57<82:42:39,  3.14s/it]

training loss: 1.4122097492218018
training loss: 1.1137199401855469
training loss: 1.1686434745788574
training loss: 1.1687405109405518


training:   5%|▌         | 5069/100000 [4:38:09<82:37:38,  3.13s/it]

training loss: 1.1773922443389893
training loss: 1.248964548110962
training loss: 1.1166157722473145
training loss: 1.2240853309631348


training:   5%|▌         | 5073/100000 [4:38:22<82:31:37,  3.13s/it]

training loss: 1.3320716619491577
training loss: 1.1726089715957642
training loss: 1.5144739151000977
training loss: 1.0920710563659668


training:   5%|▌         | 5077/100000 [4:38:34<82:25:36,  3.13s/it]

training loss: 1.250312328338623
training loss: 1.2398065328598022
training loss: 1.148132085800171
training loss: 1.1811039447784424


training:   5%|▌         | 5081/100000 [4:38:47<82:22:21,  3.12s/it]

training loss: 1.3297886848449707
training loss: 1.3826230764389038
training loss: 1.2655459642410278
training loss: 1.2290170192718506


training:   5%|▌         | 5085/100000 [4:38:59<82:19:43,  3.12s/it]

training loss: 1.2069830894470215
training loss: 1.2241759300231934
training loss: 1.1318079233169556
training loss: 1.17423415184021


training:   5%|▌         | 5089/100000 [4:39:11<82:20:40,  3.12s/it]

training loss: 1.123753309249878
training loss: 1.083770513534546
training loss: 1.0391265153884888
training loss: 1.2948570251464844


training:   5%|▌         | 5093/100000 [4:39:24<82:19:07,  3.12s/it]

training loss: 1.0819169282913208
training loss: 1.267948865890503
training loss: 1.226457953453064
training loss: 1.3596941232681274


training:   5%|▌         | 5097/100000 [4:39:36<82:16:53,  3.12s/it]

training loss: 1.3511552810668945
training loss: 1.0293937921524048
training loss: 1.2812563180923462
training loss: 1.2618894577026367
training loss: 1.1321461200714111


training:   5%|▌         | 5101/100000 [4:39:49<82:50:57,  3.14s/it]

validation loss: 1.2943066358566284
training loss: 1.2953441143035889
training loss: 1.1268095970153809
training loss: 1.1722609996795654


training:   5%|▌         | 5105/100000 [4:40:02<82:37:40,  3.13s/it]

training loss: 1.2165296077728271
training loss: 1.3123805522918701
training loss: 1.1617063283920288
training loss: 1.3590211868286133


training:   5%|▌         | 5109/100000 [4:40:14<82:29:09,  3.13s/it]

training loss: 1.2342718839645386
training loss: 1.2980891466140747
training loss: 1.2467658519744873
training loss: 1.2302732467651367


training:   5%|▌         | 5113/100000 [4:40:27<82:25:12,  3.13s/it]

training loss: 1.1963882446289062
training loss: 1.1974616050720215
training loss: 1.3651211261749268
training loss: 1.2335295677185059


training:   5%|▌         | 5117/100000 [4:40:39<82:19:32,  3.12s/it]

training loss: 1.165057897567749
training loss: 1.1429665088653564
training loss: 1.0944207906723022
training loss: 1.1420167684555054


training:   5%|▌         | 5121/100000 [4:40:52<82:17:23,  3.12s/it]

training loss: 1.1259262561798096
training loss: 1.156544804573059
training loss: 1.1772966384887695
training loss: 1.1100349426269531


training:   5%|▌         | 5125/100000 [4:41:04<82:15:21,  3.12s/it]

training loss: 1.2258806228637695
training loss: 1.267491340637207
training loss: 1.13670015335083
training loss: 1.0324864387512207


training:   5%|▌         | 5129/100000 [4:41:17<82:13:52,  3.12s/it]

training loss: 1.1764394044876099
training loss: 1.030513882637024
training loss: 1.2840708494186401
training loss: 1.2498670816421509


training:   5%|▌         | 5133/100000 [4:41:29<82:11:35,  3.12s/it]

training loss: 1.2790563106536865
training loss: 1.4201258420944214
training loss: 1.1286861896514893
training loss: 1.1666955947875977


training:   5%|▌         | 5137/100000 [4:41:41<82:10:10,  3.12s/it]

training loss: 1.1989320516586304
training loss: 1.06979501247406
training loss: 1.3639581203460693
training loss: 1.4656537771224976


training:   5%|▌         | 5141/100000 [4:41:54<82:07:50,  3.12s/it]

training loss: 1.306762933731079
training loss: 1.1459126472473145
training loss: 1.3542619943618774
training loss: 1.2003483772277832


training:   5%|▌         | 5145/100000 [4:42:06<82:08:49,  3.12s/it]

training loss: 1.1996326446533203
training loss: 1.292452096939087
training loss: 1.3225631713867188
training loss: 1.115950345993042


training:   5%|▌         | 5149/100000 [4:42:19<82:08:54,  3.12s/it]

training loss: 1.2024035453796387
training loss: 1.1955270767211914
training loss: 1.087417483329773
training loss: 1.1871143579483032


training:   5%|▌         | 5153/100000 [4:42:31<82:08:44,  3.12s/it]

training loss: 1.2028394937515259
training loss: 1.1454370021820068
training loss: 1.213118553161621
training loss: 1.335492491722107


training:   5%|▌         | 5157/100000 [4:42:44<82:10:47,  3.12s/it]

training loss: 1.3461647033691406
training loss: 1.1481444835662842
training loss: 1.1840307712554932
training loss: 1.294570803642273


training:   5%|▌         | 5161/100000 [4:42:56<82:11:15,  3.12s/it]

training loss: 1.099341869354248
training loss: 1.231907844543457
training loss: 1.2214581966400146
training loss: 1.1937788724899292


training:   5%|▌         | 5165/100000 [4:43:09<82:10:34,  3.12s/it]

training loss: 1.2101001739501953
training loss: 1.222661018371582
training loss: 1.133758306503296
training loss: 1.2678133249282837


training:   5%|▌         | 5169/100000 [4:43:21<82:07:31,  3.12s/it]

training loss: 1.4926434755325317
training loss: 1.3244457244873047
training loss: 1.3603981733322144
training loss: 1.2902271747589111


training:   5%|▌         | 5173/100000 [4:43:34<82:08:19,  3.12s/it]

training loss: 1.2867769002914429
training loss: 1.2813769578933716
training loss: 1.2186775207519531
training loss: 1.2405738830566406


training:   5%|▌         | 5177/100000 [4:43:46<82:06:27,  3.12s/it]

training loss: 1.216590166091919
training loss: 1.2614150047302246
training loss: 1.254166841506958
training loss: 0.9658735990524292


training:   5%|▌         | 5181/100000 [4:43:59<82:08:09,  3.12s/it]

training loss: 1.2280316352844238
training loss: 1.217552661895752
training loss: 1.1400644779205322
training loss: 1.1748018264770508


training:   5%|▌         | 5185/100000 [4:44:11<82:07:48,  3.12s/it]

training loss: 1.2614368200302124
training loss: 1.1838088035583496
training loss: 1.305166482925415
training loss: 1.1791437864303589


training:   5%|▌         | 5189/100000 [4:44:24<82:08:39,  3.12s/it]

training loss: 1.299279808998108
training loss: 1.1793235540390015
training loss: 1.4048773050308228
training loss: 1.4244706630706787


training:   5%|▌         | 5193/100000 [4:44:36<82:07:45,  3.12s/it]

training loss: 1.2386445999145508
training loss: 1.2049205303192139
training loss: 1.0509710311889648
training loss: 1.2616877555847168


training:   5%|▌         | 5197/100000 [4:44:49<82:07:44,  3.12s/it]

training loss: 1.2301554679870605
training loss: 1.2746374607086182
training loss: 1.1073782444000244
training loss: 1.330075740814209


training:   5%|▌         | 5197/100000 [4:45:01<82:07:44,  3.12s/it]

training loss: 1.256768822669983


training:   5%|▌         | 5201/100000 [4:45:01<82:48:04,  3.14s/it]

validation loss: 1.491060733795166
training loss: 1.379412293434143
training loss: 1.2067724466323853
training loss: 1.0636323690414429


training:   5%|▌         | 5205/100000 [4:45:14<82:33:18,  3.14s/it]

training loss: 0.9410255551338196
training loss: 1.0229321718215942
training loss: 1.1163618564605713
training loss: 1.2166697978973389


training:   5%|▌         | 5209/100000 [4:45:26<82:25:04,  3.13s/it]

training loss: 1.2886295318603516
training loss: 0.8762418627738953
training loss: 1.0186052322387695
training loss: 1.401869297027588


training:   5%|▌         | 5213/100000 [4:45:39<82:19:20,  3.13s/it]

training loss: 1.179681658744812
training loss: 1.1839956045150757
training loss: 1.3296730518341064
training loss: 1.2412617206573486


training:   5%|▌         | 5217/100000 [4:45:51<82:13:05,  3.12s/it]

training loss: 1.2908164262771606
training loss: 1.1525039672851562
training loss: 1.2588376998901367
training loss: 1.2481629848480225


training:   5%|▌         | 5221/100000 [4:46:04<82:09:26,  3.12s/it]

training loss: 1.3736064434051514
training loss: 1.335118293762207
training loss: 1.2358123064041138
training loss: 1.1779488325119019


training:   5%|▌         | 5225/100000 [4:46:16<82:11:15,  3.12s/it]

training loss: 1.0747227668762207
training loss: 1.1613261699676514
training loss: 1.1794819831848145
training loss: 1.0245413780212402


training:   5%|▌         | 5229/100000 [4:46:29<82:09:54,  3.12s/it]

training loss: 1.3346377611160278
training loss: 1.1393506526947021
training loss: 1.0395545959472656
training loss: 0.8641864061355591


training:   5%|▌         | 5233/100000 [4:46:41<82:07:57,  3.12s/it]

training loss: 1.2310582399368286
training loss: 1.3657863140106201
training loss: 1.1344068050384521
training loss: 1.1837068796157837


training:   5%|▌         | 5237/100000 [4:46:54<82:06:55,  3.12s/it]

training loss: 1.1817117929458618
training loss: 1.2200510501861572
training loss: 1.0717823505401611
training loss: 1.198121190071106


training:   5%|▌         | 5241/100000 [4:47:06<82:06:59,  3.12s/it]

training loss: 1.1635618209838867
training loss: 1.2602348327636719
training loss: 1.2569866180419922
training loss: 1.2175912857055664


training:   5%|▌         | 5245/100000 [4:47:19<82:05:11,  3.12s/it]

training loss: 1.3475887775421143
training loss: 1.2043474912643433
training loss: 1.1948394775390625
training loss: 1.2313330173492432


training:   5%|▌         | 5249/100000 [4:47:31<82:02:05,  3.12s/it]

training loss: 1.3318219184875488
training loss: 1.1809577941894531
training loss: 1.266019344329834
training loss: 1.2622060775756836


training:   5%|▌         | 5253/100000 [4:47:43<82:03:15,  3.12s/it]

training loss: 1.3215575218200684
training loss: 1.025665283203125
training loss: 1.128544807434082
training loss: 1.2679637670516968


training:   5%|▌         | 5257/100000 [4:47:56<82:03:37,  3.12s/it]

training loss: 1.1576560735702515
training loss: 1.1217149496078491
training loss: 1.0586916208267212
training loss: 1.2812321186065674


training:   5%|▌         | 5261/100000 [4:48:08<82:03:22,  3.12s/it]

training loss: 1.3705769777297974
training loss: 1.3213974237442017
training loss: 1.202191948890686
training loss: 1.2408349514007568


training:   5%|▌         | 5265/100000 [4:48:21<82:02:11,  3.12s/it]

training loss: 1.1295275688171387
training loss: 1.2767977714538574
training loss: 1.1415863037109375
training loss: 1.3701626062393188


training:   5%|▌         | 5269/100000 [4:48:33<82:00:13,  3.12s/it]

training loss: 1.327824354171753
training loss: 1.0910570621490479
training loss: 1.2646143436431885
training loss: 1.3770684003829956


training:   5%|▌         | 5273/100000 [4:48:46<81:56:41,  3.11s/it]

training loss: 1.3024113178253174
training loss: 1.2154884338378906
training loss: 1.2074912786483765
training loss: 1.2418389320373535


training:   5%|▌         | 5277/100000 [4:48:58<81:56:35,  3.11s/it]

training loss: 1.2091137170791626
training loss: 1.1453917026519775
training loss: 1.1759787797927856
training loss: 1.2092305421829224


training:   5%|▌         | 5281/100000 [4:49:11<81:58:37,  3.12s/it]

training loss: 1.207430362701416
training loss: 1.0710206031799316
training loss: 1.3458607196807861
training loss: 1.2207969427108765


training:   5%|▌         | 5285/100000 [4:49:23<82:05:31,  3.12s/it]

training loss: 1.1706116199493408
training loss: 0.9824654459953308
training loss: 1.2560725212097168
training loss: 1.0788767337799072


training:   5%|▌         | 5289/100000 [4:49:36<82:04:29,  3.12s/it]

training loss: 1.21437406539917
training loss: 1.277853012084961
training loss: 1.085989236831665
training loss: 1.374704122543335


training:   5%|▌         | 5293/100000 [4:49:48<82:03:19,  3.12s/it]

training loss: 1.2967066764831543
training loss: 1.1547420024871826
training loss: 1.3075312376022339
training loss: 1.1124902963638306


training:   5%|▌         | 5297/100000 [4:50:01<82:03:03,  3.12s/it]

training loss: 1.1110907793045044
training loss: 1.175943374633789
training loss: 1.1985598802566528
training loss: 1.005814552307129


training:   5%|▌         | 5297/100000 [4:50:11<82:03:03,  3.12s/it]

training loss: 1.1687312126159668


training:   5%|▌         | 5301/100000 [4:50:13<82:37:49,  3.14s/it]

validation loss: 1.2623403072357178
training loss: 1.1118193864822388
training loss: 1.1552963256835938
training loss: 1.20468270778656


training:   5%|▌         | 5305/100000 [4:50:26<82:25:12,  3.13s/it]

training loss: 1.224430799484253
training loss: 1.3038772344589233
training loss: 1.1837263107299805
training loss: 1.202065348625183


training:   5%|▌         | 5309/100000 [4:50:38<82:17:11,  3.13s/it]

training loss: 1.4065730571746826
training loss: 1.1637916564941406
training loss: 1.0314974784851074
training loss: 1.1679658889770508


training:   5%|▌         | 5313/100000 [4:50:51<82:12:53,  3.13s/it]

training loss: 1.2921148538589478
training loss: 1.2179985046386719
training loss: 1.2074278593063354
training loss: 1.3432085514068604


training:   5%|▌         | 5317/100000 [4:51:03<82:08:42,  3.12s/it]

training loss: 1.1644119024276733
training loss: 1.174363374710083
training loss: 1.2165452241897583
training loss: 1.0003211498260498


training:   5%|▌         | 5321/100000 [4:51:16<82:03:57,  3.12s/it]

training loss: 1.0864052772521973
training loss: 1.3058488368988037
training loss: 1.0969247817993164
training loss: 1.0488920211791992


training:   5%|▌         | 5325/100000 [4:51:28<82:00:07,  3.12s/it]

training loss: 1.0159462690353394
training loss: 1.1046123504638672
training loss: 1.1503673791885376
training loss: 1.2540409564971924


training:   5%|▌         | 5329/100000 [4:51:41<81:59:33,  3.12s/it]

training loss: 1.3346292972564697
training loss: 1.1204710006713867
training loss: 1.1579480171203613
training loss: 1.2412970066070557


training:   5%|▌         | 5333/100000 [4:51:53<82:00:22,  3.12s/it]

training loss: 1.2433252334594727
training loss: 1.2127492427825928
training loss: 1.2190090417861938
training loss: 1.2696685791015625


training:   5%|▌         | 5337/100000 [4:52:06<81:59:15,  3.12s/it]

training loss: 0.9828479290008545
training loss: 1.208256721496582
training loss: 1.1655246019363403
training loss: 1.3107287883758545


training:   5%|▌         | 5341/100000 [4:52:18<81:58:56,  3.12s/it]

training loss: 1.0643515586853027
training loss: 1.1037068367004395
training loss: 1.2470570802688599
training loss: 1.2497624158859253


training:   5%|▌         | 5345/100000 [4:52:31<81:55:20,  3.12s/it]

training loss: 1.082282304763794
training loss: 1.3499374389648438
training loss: 1.1923301219940186
training loss: 1.1506438255310059


training:   5%|▌         | 5349/100000 [4:52:43<81:56:50,  3.12s/it]

training loss: 1.1014924049377441
training loss: 1.300377607345581
training loss: 1.1272004842758179
training loss: 1.0961952209472656


training:   5%|▌         | 5353/100000 [4:52:56<81:59:18,  3.12s/it]

training loss: 1.2762393951416016
training loss: 1.1979347467422485
training loss: 1.1849353313446045
training loss: 1.2384212017059326


training:   5%|▌         | 5357/100000 [4:53:08<81:56:18,  3.12s/it]

training loss: 1.4560284614562988
training loss: 1.3569350242614746
training loss: 1.309810996055603
training loss: 1.2021490335464478


training:   5%|▌         | 5361/100000 [4:53:20<81:52:41,  3.11s/it]

training loss: 1.094030737876892
training loss: 1.3336477279663086
training loss: 1.1102821826934814
training loss: 1.129058837890625


training:   5%|▌         | 5365/100000 [4:53:33<81:53:57,  3.12s/it]

training loss: 1.192657709121704
training loss: 1.2575867176055908
training loss: 1.2218725681304932
training loss: 1.189333438873291


training:   5%|▌         | 5369/100000 [4:53:45<81:57:37,  3.12s/it]

training loss: 1.1927318572998047
training loss: 1.1065360307693481
training loss: 1.2506319284439087
training loss: 1.1268212795257568


training:   5%|▌         | 5373/100000 [4:53:58<81:56:07,  3.12s/it]

training loss: 1.3460637331008911
training loss: 1.0615822076797485
training loss: 1.1534490585327148
training loss: 1.1617430448532104


training:   5%|▌         | 5377/100000 [4:54:10<81:54:44,  3.12s/it]

training loss: 1.1436495780944824
training loss: 1.0809730291366577
training loss: 1.1245110034942627
training loss: 1.1781706809997559


training:   5%|▌         | 5381/100000 [4:54:23<81:53:17,  3.12s/it]

training loss: 1.2152258157730103
training loss: 1.2919597625732422
training loss: 1.2241815328598022
training loss: 1.1537716388702393


training:   5%|▌         | 5385/100000 [4:54:35<81:54:29,  3.12s/it]

training loss: 1.233169436454773
training loss: 1.2235438823699951
training loss: 1.2508163452148438
training loss: 1.2025606632232666


training:   5%|▌         | 5389/100000 [4:54:48<81:55:17,  3.12s/it]

training loss: 1.186179280281067
training loss: 1.2657244205474854
training loss: 1.1981735229492188
training loss: 0.9659448862075806


training:   5%|▌         | 5393/100000 [4:55:00<81:56:17,  3.12s/it]

training loss: 1.1822924613952637
training loss: 1.134437918663025
training loss: 1.158308982849121
training loss: 1.2247824668884277


training:   5%|▌         | 5397/100000 [4:55:13<81:55:33,  3.12s/it]

training loss: 1.1872811317443848
training loss: 0.967850923538208
training loss: 1.2592251300811768
training loss: 1.217932939529419
training loss: 1.0535073280334473


training:   5%|▌         | 5401/100000 [4:55:25<82:32:07,  3.14s/it]

validation loss: 1.3201371431350708
training loss: 0.9857206344604492
training loss: 1.0640387535095215
training loss: 1.2393341064453125


training:   5%|▌         | 5405/100000 [4:55:38<82:18:49,  3.13s/it]

training loss: 1.149789571762085
training loss: 1.1368091106414795
training loss: 1.363757610321045
training loss: 1.0664372444152832


training:   5%|▌         | 5409/100000 [4:55:50<82:12:02,  3.13s/it]

training loss: 1.211165189743042
training loss: 1.2651116847991943
training loss: 1.224552035331726
training loss: 1.0243165493011475


training:   5%|▌         | 5413/100000 [4:56:03<82:06:54,  3.13s/it]

training loss: 1.1167848110198975
training loss: 1.1473476886749268
training loss: 1.0116565227508545
training loss: 1.1582794189453125


training:   5%|▌         | 5417/100000 [4:56:15<82:05:16,  3.12s/it]

training loss: 1.2146090269088745
training loss: 1.1852738857269287
training loss: 1.1309592723846436
training loss: 1.1382601261138916


training:   5%|▌         | 5421/100000 [4:56:28<81:59:47,  3.12s/it]

training loss: 1.2005363702774048
training loss: 1.2651073932647705
training loss: 1.1356971263885498
training loss: 1.161250352859497


training:   5%|▌         | 5425/100000 [4:56:40<81:52:32,  3.12s/it]

training loss: 1.1271512508392334
training loss: 1.2475996017456055
training loss: 1.0109453201293945
training loss: 1.1741843223571777


training:   5%|▌         | 5429/100000 [4:56:53<81:52:19,  3.12s/it]

training loss: 1.0914111137390137
training loss: 1.2656584978103638
training loss: 1.2102421522140503
training loss: 1.2782552242279053


training:   5%|▌         | 5433/100000 [4:57:05<81:52:26,  3.12s/it]

training loss: 1.1216727495193481
training loss: 1.2268238067626953
training loss: 1.3507884740829468
training loss: 1.1760125160217285


training:   5%|▌         | 5437/100000 [4:57:18<81:57:41,  3.12s/it]

training loss: 1.3280341625213623
training loss: 1.218124270439148
training loss: 1.2339661121368408
training loss: 1.1952903270721436


training:   5%|▌         | 5441/100000 [4:57:30<81:56:31,  3.12s/it]

training loss: 1.1651828289031982
training loss: 1.3057799339294434
training loss: 1.2197831869125366
training loss: 1.0334144830703735


training:   5%|▌         | 5445/100000 [4:57:43<81:56:29,  3.12s/it]

training loss: 1.3768551349639893
training loss: 1.1666902303695679
training loss: 1.2788382768630981
training loss: 1.179003357887268


training:   5%|▌         | 5449/100000 [4:57:55<81:55:10,  3.12s/it]

training loss: 1.3434500694274902
training loss: 1.1312716007232666
training loss: 1.102333426475525
training loss: 1.2672758102416992


training:   5%|▌         | 5453/100000 [4:58:08<81:54:34,  3.12s/it]

training loss: 1.2664883136749268
training loss: 1.1336884498596191
training loss: 1.1058539152145386
training loss: 1.1071370840072632


training:   5%|▌         | 5457/100000 [4:58:20<81:50:37,  3.12s/it]

training loss: 1.2392834424972534
training loss: 1.0944026708602905
training loss: 1.381352424621582
training loss: 1.2134488821029663


training:   5%|▌         | 5461/100000 [4:58:32<81:50:41,  3.12s/it]

training loss: 1.1385762691497803
training loss: 1.1778377294540405
training loss: 1.280144453048706
training loss: 1.1655573844909668


training:   5%|▌         | 5465/100000 [4:58:45<81:51:48,  3.12s/it]

training loss: 1.2059509754180908
training loss: 1.2208151817321777
training loss: 1.149782657623291
training loss: 1.2355632781982422


training:   5%|▌         | 5469/100000 [4:58:57<81:49:36,  3.12s/it]

training loss: 1.3412346839904785
training loss: 1.2159852981567383
training loss: 1.095039963722229
training loss: 1.3730143308639526


training:   5%|▌         | 5473/100000 [4:59:10<81:50:13,  3.12s/it]

training loss: 1.161176085472107
training loss: 1.0135577917099
training loss: 1.1701850891113281
training loss: 1.1866753101348877


training:   5%|▌         | 5477/100000 [4:59:22<81:48:44,  3.12s/it]

training loss: 1.1584337949752808
training loss: 1.2946844100952148
training loss: 0.971396803855896
training loss: 1.3063151836395264


training:   5%|▌         | 5481/100000 [4:59:35<81:50:19,  3.12s/it]

training loss: 1.2242686748504639
training loss: 1.3382439613342285
training loss: 1.0681549310684204
training loss: 1.1657639741897583


training:   5%|▌         | 5485/100000 [4:59:47<81:47:43,  3.12s/it]

training loss: 1.2235229015350342
training loss: 0.9445111155509949
training loss: 1.3701889514923096
training loss: 1.2416690587997437


training:   5%|▌         | 5489/100000 [5:00:00<81:49:30,  3.12s/it]

training loss: 1.2820982933044434
training loss: 1.29904305934906
training loss: 1.0803546905517578
training loss: 1.3544293642044067


training:   5%|▌         | 5493/100000 [5:00:12<81:49:38,  3.12s/it]

training loss: 1.2375595569610596
training loss: 1.185871958732605
training loss: 1.3998174667358398
training loss: 1.1847752332687378


training:   5%|▌         | 5497/100000 [5:00:25<81:50:25,  3.12s/it]

training loss: 1.0380324125289917
training loss: 1.2943480014801025
training loss: 1.1489230394363403
training loss: 1.2978112697601318
training loss: 1.1038126945495605
validation loss: 1.2073615789413452
%s 

 %s ('n of Norwich, Bath, Yarmouth, London, Salisbury and Exeter.  [[Image:Lady Nelson - Project Gutenberg eText 16914.jpg|thumb|Lady Nelson]] ==References== *{{1911}} * {{cite book | last = Coleman | first =  Terry | title = The Nelson Touch: The life and legend | publisher = Oxford University Press | year = 2004 | id = ISBN 0195173228 }} * {{cite book | last = Hayward | first =  Joel S. A. | title = For God and Glory: Lord Nelson and His Way of War | publisher =  | year = 2003 }} * {{cite book | last = Knight | first =  Rodger | title = The Pursuit of Victory: The Life and Achievement of Horatio Nelson | publisher = Basic Books | year = 2005 }} * {{cite book | last = Vincent | first =  Edgar | title = Nelson: Love &amp; Fame | publisher = Basic Books | year = 2003 }} * {{cite 

training:   5%|▌         | 5497/100000 [5:00:41<81:50:25,  3.12s/it]

nent  | used Exemploy, [[The Glant]] {{cite book|the oceland|the English Celson|The Poco Infirst Cloren Francisco | narrow | forming==  * {{cite book|Glant   | lastures, books|Chagas|Church of Sape, Original Street ''wro'' stage (now known as the narrow) - The History of book states and her standquarters = 2005 A bistrated &quot;piaer,&quot; = Charlema'' seeks on A Number of Life = Genenglad Thomas National Structures | Wear = D.C. Schanges | - B.DE] - A continuous centurient% East. ''A emphasis pretance structure of a Standard Union and Cultural equinosenhation | Stamic Pother/Sance = ''C. Warrin. NDT'' | lead = Scheen B.E. Nonman August 1/29 | 49.107 + [[Meeta'Intelling :sone | Hold August You | year=28 | Ancounter Wood, | Orign Port | year ! root of Amends in Book | also 205 Upinch | admink - The unique continuous | Having Unit Einar Electure | grivy chee in [[August 3000 - Earth |  Drug, 200Bz Augueno, Learnent | August 204 |  Josek | not = Some 57th #4 certege realizy, 1sequent | 

training:   6%|▌         | 5501/100000 [5:01:56<237:26:31,  9.05s/it]

Model saved at iteration 5500
training loss: 1.1699203252792358
training loss: 1.1233638525009155
training loss: 1.355848789215088


training:   6%|▌         | 5505/100000 [5:02:09<190:41:28,  7.26s/it]

training loss: 1.036963939666748
training loss: 1.0223246812820435
training loss: 1.1990478038787842
training loss: 1.2859079837799072


training:   6%|▌         | 5509/100000 [5:02:21<158:00:13,  6.02s/it]

training loss: 1.283599615097046
training loss: 1.3400405645370483
training loss: 1.202172040939331
training loss: 1.0886162519454956


training:   6%|▌         | 5513/100000 [5:02:34<135:09:20,  5.15s/it]

training loss: 1.149411916732788
training loss: 1.0966246128082275
training loss: 1.177187442779541
training loss: 1.1465294361114502


training:   6%|▌         | 5517/100000 [5:02:46<119:12:08,  4.54s/it]

training loss: 1.2595198154449463
training loss: 1.24704110622406
training loss: 1.065806269645691
training loss: 1.2317910194396973


training:   6%|▌         | 5521/100000 [5:02:59<108:00:46,  4.12s/it]

training loss: 1.0645173788070679
training loss: 1.1304376125335693
training loss: 1.247443437576294
training loss: 1.229842185974121


training:   6%|▌         | 5525/100000 [5:03:11<100:10:00,  3.82s/it]

training loss: 1.2644089460372925
training loss: 1.3930284976959229
training loss: 1.231928825378418
training loss: 1.1899653673171997


training:   6%|▌         | 5529/100000 [5:03:23<94:39:42,  3.61s/it] 

training loss: 1.3530352115631104
training loss: 1.1942213773727417
training loss: 1.262146234512329
training loss: 1.217429757118225


training:   6%|▌         | 5533/100000 [5:03:36<90:46:14,  3.46s/it]

training loss: 1.1395289897918701
training loss: 1.091806411743164
training loss: 0.8947753310203552
training loss: 1.2044105529785156


training:   6%|▌         | 5537/100000 [5:03:48<88:03:27,  3.36s/it]

training loss: 1.1736723184585571
training loss: 1.3421680927276611
training loss: 1.2098143100738525
training loss: 1.216829776763916


training:   6%|▌         | 5541/100000 [5:04:01<86:12:03,  3.29s/it]

training loss: 1.1043944358825684
training loss: 1.3345438241958618
training loss: 1.1808654069900513
training loss: 1.2622427940368652


training:   6%|▌         | 5545/100000 [5:04:13<84:54:09,  3.24s/it]

training loss: 1.1331100463867188
training loss: 1.2245738506317139
training loss: 0.9378314018249512
training loss: 1.3141707181930542


training:   6%|▌         | 5549/100000 [5:04:26<83:59:08,  3.20s/it]

training loss: 1.2365748882293701
training loss: 1.235922932624817
training loss: 1.316026210784912
training loss: 1.1928631067276


training:   6%|▌         | 5553/100000 [5:04:38<83:19:57,  3.18s/it]

training loss: 1.2506496906280518
training loss: 1.2056468725204468
training loss: 0.9454147815704346
training loss: 1.274864673614502


training:   6%|▌         | 5557/100000 [5:04:51<82:51:54,  3.16s/it]

training loss: 1.172590970993042
training loss: 1.2119919061660767
training loss: 1.2685294151306152
training loss: 1.1985673904418945


training:   6%|▌         | 5561/100000 [5:05:03<82:32:52,  3.15s/it]

training loss: 1.1262125968933105
training loss: 0.9738749265670776
training loss: 1.1813936233520508
training loss: 1.459388017654419


training:   6%|▌         | 5565/100000 [5:05:16<82:22:15,  3.14s/it]

training loss: 1.1232094764709473
training loss: 1.2533307075500488
training loss: 1.054386019706726
training loss: 1.1848559379577637


training:   6%|▌         | 5569/100000 [5:05:28<82:12:02,  3.13s/it]

training loss: 1.359696865081787
training loss: 1.469012975692749
training loss: 1.2877256870269775
training loss: 1.1834402084350586


training:   6%|▌         | 5573/100000 [5:05:41<82:04:25,  3.13s/it]

training loss: 1.1956336498260498
training loss: 1.2725110054016113
training loss: 1.1158291101455688
training loss: 1.0067007541656494


training:   6%|▌         | 5577/100000 [5:05:53<81:59:32,  3.13s/it]

training loss: 1.001015067100525
training loss: 1.2744183540344238
training loss: 1.2972376346588135
training loss: 1.3368253707885742


training:   6%|▌         | 5581/100000 [5:06:06<81:56:03,  3.12s/it]

training loss: 1.0615513324737549
training loss: 1.2114990949630737
training loss: 1.2671458721160889
training loss: 1.083729863166809


training:   6%|▌         | 5585/100000 [5:06:18<81:49:31,  3.12s/it]

training loss: 1.2747869491577148
training loss: 1.2471113204956055
training loss: 1.182821273803711
training loss: 1.469123363494873


training:   6%|▌         | 5589/100000 [5:06:31<81:46:07,  3.12s/it]

training loss: 1.175816297531128
training loss: 1.1944137811660767
training loss: 1.1703152656555176
training loss: 1.2600606679916382


training:   6%|▌         | 5593/100000 [5:06:43<81:43:57,  3.12s/it]

training loss: 1.3272709846496582
training loss: 1.153334617614746
training loss: 1.2526068687438965
training loss: 1.1266436576843262


training:   6%|▌         | 5597/100000 [5:06:55<81:42:21,  3.12s/it]

training loss: 1.1334270238876343
training loss: 0.9795652031898499
training loss: 1.0442492961883545
training loss: 1.305558204650879
training loss: 0.878405749797821


training:   6%|▌         | 5601/100000 [5:07:08<82:16:51,  3.14s/it]

validation loss: 1.0245873928070068
training loss: 0.9652005434036255
training loss: 1.1827986240386963
training loss: 0.9037721157073975


training:   6%|▌         | 5605/100000 [5:07:21<82:06:18,  3.13s/it]

training loss: 1.2258809804916382
training loss: 1.0555979013442993
training loss: 1.1191186904907227
training loss: 1.1221493482589722


training:   6%|▌         | 5609/100000 [5:07:33<82:01:54,  3.13s/it]

training loss: 1.144783854484558
training loss: 1.3180814981460571
training loss: 1.3311322927474976
training loss: 1.2181284427642822


training:   6%|▌         | 5613/100000 [5:07:46<81:57:12,  3.13s/it]

training loss: 1.1151829957962036
training loss: 1.0320194959640503
training loss: 1.1048556566238403
training loss: 1.2228089570999146


training:   6%|▌         | 5617/100000 [5:07:58<81:53:21,  3.12s/it]

training loss: 1.355979323387146
training loss: 1.0996158123016357
training loss: 1.2428381443023682
training loss: 1.2479697465896606


training:   6%|▌         | 5621/100000 [5:08:11<81:50:43,  3.12s/it]

training loss: 1.2837119102478027
training loss: 1.2083849906921387
training loss: 1.1702394485473633
training loss: 1.3247113227844238


training:   6%|▌         | 5625/100000 [5:08:23<81:49:36,  3.12s/it]

training loss: 1.1450952291488647
training loss: 1.1411348581314087
training loss: 1.1405892372131348
training loss: 1.2544819116592407


training:   6%|▌         | 5629/100000 [5:08:36<81:46:35,  3.12s/it]

training loss: 1.281134009361267
training loss: 1.2104259729385376
training loss: 1.287888765335083
training loss: 1.0068340301513672


training:   6%|▌         | 5633/100000 [5:08:48<81:46:06,  3.12s/it]

training loss: 1.2682056427001953
training loss: 1.0214160680770874
training loss: 1.0835922956466675
training loss: 1.2698357105255127


training:   6%|▌         | 5637/100000 [5:09:00<81:43:35,  3.12s/it]

training loss: 1.1386537551879883
training loss: 0.9721014499664307
training loss: 1.149150013923645
training loss: 0.9795308709144592


training:   6%|▌         | 5641/100000 [5:09:13<81:43:37,  3.12s/it]

training loss: 1.2546945810317993
training loss: 1.3357908725738525
training loss: 1.1618053913116455
training loss: 1.1725558042526245


training:   6%|▌         | 5645/100000 [5:09:25<81:43:21,  3.12s/it]

training loss: 1.2383328676223755
training loss: 1.3188399076461792
training loss: 1.193906545639038
training loss: 1.193821907043457


training:   6%|▌         | 5649/100000 [5:09:38<81:39:22,  3.12s/it]

training loss: 1.1892509460449219
training loss: 1.0425256490707397
training loss: 1.3410286903381348
training loss: 1.2293115854263306


training:   6%|▌         | 5653/100000 [5:09:50<81:40:06,  3.12s/it]

training loss: 1.1756467819213867
training loss: 1.2449043989181519
training loss: 1.0433988571166992
training loss: 1.2864494323730469


training:   6%|▌         | 5657/100000 [5:10:03<81:36:25,  3.11s/it]

training loss: 1.3864789009094238
training loss: 1.083287000656128
training loss: 1.3480011224746704
training loss: 1.1961264610290527


training:   6%|▌         | 5661/100000 [5:10:15<81:38:15,  3.12s/it]

training loss: 1.2446883916854858
training loss: 1.0902444124221802
training loss: 1.315197229385376
training loss: 1.1345255374908447


training:   6%|▌         | 5665/100000 [5:10:28<81:30:28,  3.11s/it]

training loss: 1.189825177192688
training loss: 0.950925350189209
training loss: 1.0328681468963623
training loss: 1.400504231452942


training:   6%|▌         | 5669/100000 [5:10:40<81:31:40,  3.11s/it]

training loss: 1.1718724966049194
training loss: 1.2564183473587036
training loss: 1.2803367376327515
training loss: 1.1946955919265747


training:   6%|▌         | 5673/100000 [5:10:53<81:33:37,  3.11s/it]

training loss: 1.0832515954971313
training loss: 1.2590633630752563
training loss: 1.2436132431030273
training loss: 1.3025171756744385


training:   6%|▌         | 5677/100000 [5:11:05<81:32:57,  3.11s/it]

training loss: 1.0638688802719116
training loss: 1.2607451677322388
training loss: 1.0600242614746094
training loss: 1.0313663482666016


training:   6%|▌         | 5681/100000 [5:11:17<81:31:39,  3.11s/it]

training loss: 1.2904934883117676
training loss: 1.4239475727081299
training loss: 1.3729209899902344
training loss: 1.0837265253067017


training:   6%|▌         | 5685/100000 [5:11:30<81:31:45,  3.11s/it]

training loss: 1.330139398574829
training loss: 1.096595048904419
training loss: 1.0065653324127197
training loss: 1.0722275972366333


training:   6%|▌         | 5689/100000 [5:11:42<81:30:00,  3.11s/it]

training loss: 1.1417723894119263
training loss: 1.073648452758789
training loss: 1.0923434495925903
training loss: 1.3481472730636597


training:   6%|▌         | 5693/100000 [5:11:55<81:30:28,  3.11s/it]

training loss: 1.3195741176605225
training loss: 1.14466392993927
training loss: 1.0944644212722778
training loss: 0.9664576053619385


training:   6%|▌         | 5697/100000 [5:12:07<81:29:16,  3.11s/it]

training loss: 1.3212089538574219
training loss: 1.2256419658660889
training loss: 1.1345677375793457
training loss: 1.0325005054473877
training loss: 1.1367738246917725


training:   6%|▌         | 5701/100000 [5:12:20<82:04:00,  3.13s/it]

validation loss: 1.1273386478424072
training loss: 1.2733129262924194
training loss: 1.2041308879852295
training loss: 1.1685576438903809


training:   6%|▌         | 5705/100000 [5:12:32<81:54:55,  3.13s/it]

training loss: 1.2016546726226807
training loss: 1.2672467231750488
training loss: 0.9805001616477966
training loss: 1.3137578964233398


training:   6%|▌         | 5709/100000 [5:12:45<81:49:48,  3.12s/it]

training loss: 1.1199088096618652
training loss: 1.1746160984039307
training loss: 1.0030360221862793
training loss: 0.7116875648498535


training:   6%|▌         | 5713/100000 [5:12:57<81:45:10,  3.12s/it]

training loss: 1.2496381998062134
training loss: 1.0774294137954712
training loss: 1.2747397422790527
training loss: 1.3633626699447632


training:   6%|▌         | 5717/100000 [5:13:10<81:42:18,  3.12s/it]

training loss: 1.0564391613006592
training loss: 1.3027002811431885
training loss: 1.151812195777893
training loss: 1.375022530555725


training:   6%|▌         | 5721/100000 [5:13:22<81:39:07,  3.12s/it]

training loss: 1.1343004703521729
training loss: 1.2842565774917603
training loss: 1.247240424156189
training loss: 1.0849915742874146


training:   6%|▌         | 5725/100000 [5:13:35<81:39:08,  3.12s/it]

training loss: 1.2773518562316895
training loss: 1.1788750886917114
training loss: 1.2068946361541748
training loss: 1.2977521419525146


training:   6%|▌         | 5729/100000 [5:13:47<81:36:54,  3.12s/it]

training loss: 1.028948187828064
training loss: 1.1842999458312988
training loss: 1.2988909482955933
training loss: 1.2746200561523438


training:   6%|▌         | 5733/100000 [5:14:00<81:39:52,  3.12s/it]

training loss: 1.1154413223266602
training loss: 1.222376823425293
training loss: 1.1636908054351807
training loss: 1.290867805480957


training:   6%|▌         | 5737/100000 [5:14:12<81:38:35,  3.12s/it]

training loss: 1.3130464553833008
training loss: 1.336811900138855
training loss: 1.2592302560806274
training loss: 1.0813952684402466


training:   6%|▌         | 5741/100000 [5:14:25<81:39:42,  3.12s/it]

training loss: 1.1151899099349976
training loss: 1.1420117616653442
training loss: 1.4150092601776123
training loss: 1.136011004447937


training:   6%|▌         | 5745/100000 [5:14:37<81:38:46,  3.12s/it]

training loss: 1.2890329360961914
training loss: 1.206441879272461
training loss: 1.1733486652374268
training loss: 1.249112606048584


training:   6%|▌         | 5749/100000 [5:14:50<81:38:30,  3.12s/it]

training loss: 1.000493049621582
training loss: 1.1510967016220093
training loss: 1.1761058568954468
training loss: 1.1535301208496094


training:   6%|▌         | 5753/100000 [5:15:02<81:36:24,  3.12s/it]

training loss: 1.0787588357925415
training loss: 1.289658546447754
training loss: 1.1887397766113281
training loss: 1.2846264839172363


training:   6%|▌         | 5757/100000 [5:15:14<81:37:15,  3.12s/it]

training loss: 1.0009206533432007
training loss: 1.087104082107544
training loss: 1.1432713270187378
training loss: 1.0015745162963867


training:   6%|▌         | 5761/100000 [5:15:27<81:31:24,  3.11s/it]

training loss: 1.210127830505371
training loss: 1.1546745300292969
training loss: 1.1606416702270508
training loss: 1.2388854026794434


training:   6%|▌         | 5765/100000 [5:15:39<81:28:18,  3.11s/it]

training loss: 1.1721508502960205
training loss: 1.3050986528396606
training loss: 1.1282612085342407
training loss: 1.2971973419189453


training:   6%|▌         | 5769/100000 [5:15:52<81:31:01,  3.11s/it]

training loss: 1.00113844871521
training loss: 1.163313627243042
training loss: 1.3287804126739502
training loss: 1.2704442739486694


training:   6%|▌         | 5773/100000 [5:16:04<81:30:31,  3.11s/it]

training loss: 1.2064306735992432
training loss: 1.1182341575622559
training loss: 1.2158761024475098
training loss: 1.0097568035125732


training:   6%|▌         | 5777/100000 [5:16:17<81:31:15,  3.11s/it]

training loss: 1.2892316579818726
training loss: 1.1867254972457886
training loss: 1.1212366819381714
training loss: 1.2906303405761719


training:   6%|▌         | 5781/100000 [5:16:29<81:30:08,  3.11s/it]

training loss: 1.0368444919586182
training loss: 1.2673990726470947
training loss: 1.0924115180969238
training loss: 1.0528006553649902


training:   6%|▌         | 5785/100000 [5:16:42<81:27:31,  3.11s/it]

training loss: 1.2982661724090576
training loss: 1.352036714553833
training loss: 1.2781217098236084
training loss: 1.3633944988250732


training:   6%|▌         | 5789/100000 [5:16:54<81:25:18,  3.11s/it]

training loss: 1.109079360961914
training loss: 1.1993577480316162
training loss: 1.101916790008545
training loss: 1.2776116132736206


training:   6%|▌         | 5793/100000 [5:17:07<81:26:33,  3.11s/it]

training loss: 1.298545002937317
training loss: 1.2210981845855713
training loss: 1.135480284690857
training loss: 1.0343680381774902


training:   6%|▌         | 5797/100000 [5:17:19<81:29:12,  3.11s/it]

training loss: 1.181755781173706
training loss: 1.2303941249847412
training loss: 1.1935306787490845
training loss: 1.2227184772491455


training:   6%|▌         | 5797/100000 [5:17:31<81:29:12,  3.11s/it]

training loss: 1.0873935222625732


training:   6%|▌         | 5801/100000 [5:17:32<82:06:59,  3.14s/it]

validation loss: 1.221662998199463
training loss: 1.1740705966949463
training loss: 1.1882797479629517
training loss: 1.2802536487579346


training:   6%|▌         | 5805/100000 [5:17:44<81:57:28,  3.13s/it]

training loss: 1.070268154144287
training loss: 1.2214796543121338
training loss: 0.9145200848579407
training loss: 1.2379789352416992


training:   6%|▌         | 5809/100000 [5:17:57<81:51:17,  3.13s/it]

training loss: 1.2115650177001953
training loss: 0.9109115600585938
training loss: 1.2846224308013916
training loss: 1.0590567588806152


training:   6%|▌         | 5813/100000 [5:18:09<81:46:18,  3.13s/it]

training loss: 1.1982219219207764
training loss: 1.2337433099746704
training loss: 1.197075605392456
training loss: 1.1094883680343628


training:   6%|▌         | 5817/100000 [5:18:22<81:40:06,  3.12s/it]

training loss: 1.0050121545791626
training loss: 1.0863432884216309
training loss: 1.1614068746566772
training loss: 1.2889529466629028


training:   6%|▌         | 5821/100000 [5:18:34<81:38:48,  3.12s/it]

training loss: 1.2166657447814941
training loss: 1.1772441864013672
training loss: 1.2099905014038086
training loss: 1.135004997253418


training:   6%|▌         | 5825/100000 [5:18:47<81:41:19,  3.12s/it]

training loss: 1.2916069030761719
training loss: 1.2032499313354492
training loss: 1.0393825769424438
training loss: 1.2034611701965332


training:   6%|▌         | 5829/100000 [5:18:59<81:34:30,  3.12s/it]

training loss: 1.1780452728271484
training loss: 1.234813928604126
training loss: 1.1828210353851318
training loss: 1.2031419277191162


training:   6%|▌         | 5833/100000 [5:19:12<81:29:41,  3.12s/it]

training loss: 1.217775821685791
training loss: 1.2465301752090454
training loss: 1.2474167346954346
training loss: 1.1800103187561035


training:   6%|▌         | 5837/100000 [5:19:24<81:30:51,  3.12s/it]

training loss: 1.1899209022521973
training loss: 1.112568974494934
training loss: 1.1983152627944946
training loss: 1.377323031425476


training:   6%|▌         | 5841/100000 [5:19:36<81:29:13,  3.12s/it]

training loss: 1.22214674949646
training loss: 1.2768645286560059
training loss: 1.0988558530807495
training loss: 1.2025721073150635


training:   6%|▌         | 5845/100000 [5:19:49<81:29:25,  3.12s/it]

training loss: 1.2614156007766724
training loss: 1.1871691942214966
training loss: 1.195703387260437
training loss: 1.2386469841003418


training:   6%|▌         | 5849/100000 [5:20:01<81:26:46,  3.11s/it]

training loss: 1.1329963207244873
training loss: 1.0538697242736816
training loss: 1.2276363372802734
training loss: 1.1557018756866455


training:   6%|▌         | 5853/100000 [5:20:14<81:28:10,  3.12s/it]

training loss: 1.1261894702911377
training loss: 1.264082908630371
training loss: 1.1591602563858032
training loss: 1.0025345087051392


training:   6%|▌         | 5857/100000 [5:20:26<81:22:34,  3.11s/it]

training loss: 1.259432315826416
training loss: 1.1267919540405273
training loss: 1.2799922227859497
training loss: 1.0624464750289917


training:   6%|▌         | 5861/100000 [5:20:39<81:25:26,  3.11s/it]

training loss: 1.0683523416519165
training loss: 1.2221922874450684
training loss: 1.138439655303955
training loss: 1.1112799644470215


training:   6%|▌         | 5865/100000 [5:20:51<81:24:25,  3.11s/it]

training loss: 1.1178399324417114
training loss: 1.2038792371749878
training loss: 1.2599711418151855
training loss: 0.9983872175216675


training:   6%|▌         | 5869/100000 [5:21:04<81:26:41,  3.11s/it]

training loss: 1.0295875072479248
training loss: 1.151271104812622
training loss: 1.0872164964675903
training loss: 1.0201444625854492


training:   6%|▌         | 5873/100000 [5:21:16<81:27:41,  3.12s/it]

training loss: 1.062299132347107
training loss: 1.3543848991394043
training loss: 1.094272494316101
training loss: 0.9980887174606323


training:   6%|▌         | 5877/100000 [5:21:29<81:26:24,  3.11s/it]

training loss: 1.0506585836410522
training loss: 1.210241436958313
training loss: 1.017980933189392
training loss: 1.3189210891723633


training:   6%|▌         | 5881/100000 [5:21:41<81:27:56,  3.12s/it]

training loss: 1.1186974048614502
training loss: 1.1675901412963867
training loss: 1.157281756401062
training loss: 1.105539083480835


training:   6%|▌         | 5885/100000 [5:21:53<81:28:54,  3.12s/it]

training loss: 1.1460050344467163
training loss: 1.2053031921386719
training loss: 1.119796872138977
training loss: 1.182929277420044


training:   6%|▌         | 5889/100000 [5:22:06<81:30:30,  3.12s/it]

training loss: 1.2982795238494873
training loss: 1.2844414710998535
training loss: 1.073933482170105
training loss: 1.154686450958252


training:   6%|▌         | 5893/100000 [5:22:18<81:31:13,  3.12s/it]

training loss: 1.1937994956970215
training loss: 1.3409428596496582
training loss: 1.2230497598648071
training loss: 1.1175432205200195


training:   6%|▌         | 5897/100000 [5:22:31<81:31:40,  3.12s/it]

training loss: 1.2772587537765503
training loss: 1.2357943058013916
training loss: 0.9751283526420593
training loss: 1.2331526279449463
training loss: 1.2257306575775146


training:   6%|▌         | 5901/100000 [5:22:44<82:06:02,  3.14s/it]

validation loss: 1.1971020698547363
training loss: 1.251727819442749
training loss: 1.2008246183395386
training loss: 1.310283899307251


training:   6%|▌         | 5905/100000 [5:22:56<81:54:26,  3.13s/it]

training loss: 1.1247628927230835
training loss: 1.2003874778747559
training loss: 1.1315950155258179
training loss: 1.227018117904663


training:   6%|▌         | 5909/100000 [5:23:09<81:43:44,  3.13s/it]

training loss: 1.2068876028060913
training loss: 1.091721534729004
training loss: 1.2365187406539917
training loss: 1.173959493637085


training:   6%|▌         | 5913/100000 [5:23:21<81:36:48,  3.12s/it]

training loss: 1.1348936557769775
training loss: 1.1742918491363525
training loss: 1.1756620407104492
training loss: 1.2610492706298828


training:   6%|▌         | 5917/100000 [5:23:34<81:31:57,  3.12s/it]

training loss: 1.1839524507522583
training loss: 1.2408772706985474
training loss: 1.2831833362579346
training loss: 1.1081035137176514


training:   6%|▌         | 5921/100000 [5:23:46<81:30:42,  3.12s/it]

training loss: 1.2552430629730225
training loss: 1.2795195579528809
training loss: 1.3483643531799316
training loss: 1.1690704822540283


training:   6%|▌         | 5925/100000 [5:23:58<81:30:15,  3.12s/it]

training loss: 1.1223912239074707
training loss: 1.297905445098877
training loss: 1.0744950771331787
training loss: 1.1381137371063232


training:   6%|▌         | 5929/100000 [5:24:11<81:29:42,  3.12s/it]

training loss: 1.123295783996582
training loss: 1.2783236503601074
training loss: 1.1595007181167603
training loss: 1.151511788368225


training:   6%|▌         | 5933/100000 [5:24:23<81:31:23,  3.12s/it]

training loss: 1.27878737449646
training loss: 1.1985251903533936
training loss: 1.2374441623687744
training loss: 1.303544282913208


training:   6%|▌         | 5937/100000 [5:24:36<81:27:36,  3.12s/it]

training loss: 1.2207458019256592
training loss: 1.2014625072479248
training loss: 1.337695837020874
training loss: 1.2807930707931519


training:   6%|▌         | 5941/100000 [5:24:48<81:23:51,  3.12s/it]

training loss: 1.2129905223846436
training loss: 1.2714930772781372
training loss: 1.1714305877685547
training loss: 1.4028170108795166


training:   6%|▌         | 5945/100000 [5:25:01<81:23:04,  3.12s/it]

training loss: 1.0739890336990356
training loss: 1.423357605934143
training loss: 1.1520248651504517
training loss: 1.142159104347229


training:   6%|▌         | 5949/100000 [5:25:13<81:24:00,  3.12s/it]

training loss: 1.4040117263793945
training loss: 1.25400710105896
training loss: 1.2454826831817627
training loss: 1.0644681453704834


training:   6%|▌         | 5953/100000 [5:25:26<81:26:35,  3.12s/it]

training loss: 1.3361176252365112
training loss: 1.234408974647522
training loss: 1.1539084911346436
training loss: 1.3077970743179321


training:   6%|▌         | 5957/100000 [5:25:38<81:27:56,  3.12s/it]

training loss: 1.1653878688812256
training loss: 1.1184256076812744
training loss: 1.2654800415039062
training loss: 0.9900943040847778


training:   6%|▌         | 5961/100000 [5:25:51<81:27:53,  3.12s/it]

training loss: 1.174612045288086
training loss: 1.0188523530960083
training loss: 1.324843168258667
training loss: 1.2034472227096558


training:   6%|▌         | 5965/100000 [5:26:03<81:27:14,  3.12s/it]

training loss: 1.1477205753326416
training loss: 1.2225260734558105
training loss: 1.0637131929397583
training loss: 1.359454870223999


training:   6%|▌         | 5969/100000 [5:26:16<81:27:12,  3.12s/it]

training loss: 1.2495920658111572
training loss: 1.2739036083221436
training loss: 1.1462739706039429
training loss: 1.1919037103652954


training:   6%|▌         | 5973/100000 [5:26:28<81:25:24,  3.12s/it]

training loss: 1.0496513843536377
training loss: 1.1657335758209229
training loss: 0.9947893619537354
training loss: 1.2282317876815796


training:   6%|▌         | 5977/100000 [5:26:41<81:27:26,  3.12s/it]

training loss: 1.102906346321106
training loss: 1.0688234567642212
training loss: 1.094285488128662
training loss: 1.0612242221832275


training:   6%|▌         | 5981/100000 [5:26:53<81:27:11,  3.12s/it]

training loss: 1.1559784412384033
training loss: 1.2359342575073242
training loss: 1.1472651958465576
training loss: 1.1061620712280273


training:   6%|▌         | 5985/100000 [5:27:06<81:26:53,  3.12s/it]

training loss: 0.9168499708175659
training loss: 1.2607803344726562
training loss: 1.2700955867767334
training loss: 1.3181018829345703


training:   6%|▌         | 5989/100000 [5:27:18<81:26:09,  3.12s/it]

training loss: 1.0320526361465454
training loss: 1.0792498588562012
training loss: 1.1201063394546509
training loss: 1.2437748908996582


training:   6%|▌         | 5993/100000 [5:27:30<81:25:32,  3.12s/it]

training loss: 1.160062313079834
training loss: 1.3398666381835938
training loss: 1.1726839542388916
training loss: 1.2050737142562866


training:   6%|▌         | 5997/100000 [5:27:43<81:21:50,  3.12s/it]

training loss: 1.164392352104187
training loss: 1.0962402820587158
training loss: 1.0304468870162964
training loss: 1.251122236251831
training loss: 1.2853752374649048
validation loss: 1.2045029401779175
%s 

 %s ('fically forbids inciting hatred against ethnic groups.  Since such laws often apply only to the [[victimization]] of specific individuals, some argue that hate speech must be regulated to protect members of groups. Others argue that hate speech limits the free development of political discourse and ought to be regulated, but by [[volunteer|voluntaristic]] communities and not by the state. Still others claim that it is not possible to legislate a boundary between legitimate controversial speech and hate speech in such a way which is just to those with controversial political or social views.  ==Speech codes== Various institutions in the United States and [[Europe]] began developing codes to limit or punish hate speech in the 1990s, on the grounds that such speech amounts to [

training:   6%|▌         | 5997/100000 [5:28:01<81:21:50,  3.12s/it]

, yach at pencolous deletariize or maps bipper couples.  The either is scott a lower builder senior, including sexism become places are deemed a goal orministic (had predicimal) debated in the term &quot;given ill&quot; or in the speech demonstrate to be thought cooling with the human discretion of a speech, as in the move. We come to refer to you tend to be his right reached he dessever the argued on an offsform, summary histories be including late spoken contranted.  In feed with members in the soldiers of interest treasurity text-points to be tested under corner in [[1971 and led the move Franks of AMD, ''[[The Wasconste Frock IT development do at the United States collection and redubed election has correction make 1993 produces.  == Rite and even because the ''[[Wome Limbed Cahez]]'' develop (&quot;[[Picture Johnson.  In tube may has airlight with &quot;[[Serret September 13]], although when have nano starrow investing the ve[State and treaty part of [[Irege]], habitation swarked 

training:   6%|▌         | 6001/100000 [5:29:14<236:05:42,  9.04s/it]

Model saved at iteration 6000
training loss: 1.135488510131836
training loss: 1.2305896282196045
training loss: 1.1383813619613647


training:   6%|▌         | 6005/100000 [5:29:27<189:33:14,  7.26s/it]

training loss: 1.1621575355529785
training loss: 0.991435706615448
training loss: 1.2183668613433838
training loss: 1.1076403856277466


training:   6%|▌         | 6009/100000 [5:29:39<157:03:36,  6.02s/it]

training loss: 1.0734039545059204
training loss: 1.2467498779296875
training loss: 1.2619071006774902
training loss: 1.2169862985610962


training:   6%|▌         | 6013/100000 [5:29:52<134:20:25,  5.15s/it]

training loss: 0.9291807413101196
training loss: 1.0331673622131348
training loss: 0.9760505557060242
training loss: 1.1422085762023926


training:   6%|▌         | 6017/100000 [5:30:04<118:29:45,  4.54s/it]

training loss: 1.2353472709655762
training loss: 1.0752967596054077
training loss: 1.2059967517852783
training loss: 1.1478362083435059


training:   6%|▌         | 6021/100000 [5:30:17<107:23:41,  4.11s/it]

training loss: 1.2683427333831787
training loss: 1.0983779430389404
training loss: 1.2117679119110107
training loss: 1.2078642845153809


training:   6%|▌         | 6025/100000 [5:30:29<99:35:10,  3.81s/it] 

training loss: 0.9573649764060974
training loss: 1.289838433265686
training loss: 1.0903455018997192
training loss: 1.1824082136154175


training:   6%|▌         | 6029/100000 [5:30:42<94:05:36,  3.60s/it]

training loss: 1.2968363761901855
training loss: 1.1908411979675293
training loss: 1.2573151588439941
training loss: 0.9485359191894531


training:   6%|▌         | 6033/100000 [5:30:54<90:17:10,  3.46s/it]

training loss: 1.0688104629516602
training loss: 1.1507575511932373
training loss: 1.4126524925231934
training loss: 1.2473055124282837


training:   6%|▌         | 6037/100000 [5:31:07<87:40:58,  3.36s/it]

training loss: 1.2622759342193604
training loss: 1.2560192346572876
training loss: 1.1737525463104248
training loss: 1.08315110206604


training:   6%|▌         | 6041/100000 [5:31:19<85:47:59,  3.29s/it]

training loss: 1.335545539855957
training loss: 1.2561562061309814
training loss: 1.2580238580703735
training loss: 1.0999020338058472


training:   6%|▌         | 6045/100000 [5:31:32<84:28:35,  3.24s/it]

training loss: 1.032932162284851
training loss: 1.0287010669708252
training loss: 1.0827386379241943
training loss: 1.2386680841445923


training:   6%|▌         | 6049/100000 [5:31:44<83:32:27,  3.20s/it]

training loss: 0.9928494691848755
training loss: 1.3449246883392334
training loss: 1.2051336765289307
training loss: 1.1342600584030151


training:   6%|▌         | 6053/100000 [5:31:56<82:50:40,  3.17s/it]

training loss: 1.2236244678497314
training loss: 1.1739988327026367
training loss: 1.1599113941192627
training loss: 1.2336090803146362


training:   6%|▌         | 6057/100000 [5:32:09<82:22:44,  3.16s/it]

training loss: 1.0429801940917969
training loss: 1.1602160930633545
training loss: 1.1482726335525513
training loss: 1.1631885766983032


training:   6%|▌         | 6061/100000 [5:32:21<82:04:01,  3.15s/it]

training loss: 1.0752415657043457
training loss: 1.1547932624816895
training loss: 1.0375880002975464
training loss: 1.1516051292419434


training:   6%|▌         | 6065/100000 [5:32:34<81:48:53,  3.14s/it]

training loss: 1.29971444606781
training loss: 1.1875455379486084
training loss: 1.1723380088806152
training loss: 1.172318458557129


training:   6%|▌         | 6069/100000 [5:32:46<81:40:31,  3.13s/it]

training loss: 1.3670659065246582
training loss: 1.2167414426803589
training loss: 1.2854069471359253
training loss: 1.2400767803192139


training:   6%|▌         | 6073/100000 [5:32:59<81:34:23,  3.13s/it]

training loss: 1.236471176147461
training loss: 1.196216106414795
training loss: 1.069077968597412
training loss: 1.2246716022491455


training:   6%|▌         | 6077/100000 [5:33:11<81:31:06,  3.12s/it]

training loss: 1.3766613006591797
training loss: 1.1735377311706543
training loss: 1.0966262817382812
training loss: 1.1579965353012085


training:   6%|▌         | 6081/100000 [5:33:24<81:27:29,  3.12s/it]

training loss: 1.1435489654541016
training loss: 1.1753756999969482
training loss: 1.118607521057129
training loss: 1.1415220499038696


training:   6%|▌         | 6085/100000 [5:33:36<81:23:21,  3.12s/it]

training loss: 1.2386586666107178
training loss: 1.2369389533996582
training loss: 1.1854878664016724
training loss: 1.142941951751709


training:   6%|▌         | 6089/100000 [5:33:49<81:22:05,  3.12s/it]

training loss: 1.1945081949234009
training loss: 1.2099928855895996
training loss: 0.973185658454895
training loss: 1.0847172737121582


training:   6%|▌         | 6093/100000 [5:34:01<81:16:54,  3.12s/it]

training loss: 1.1119840145111084
training loss: 1.2815284729003906
training loss: 1.2440683841705322
training loss: 1.0348632335662842


training:   6%|▌         | 6097/100000 [5:34:14<81:17:44,  3.12s/it]

training loss: 1.3057018518447876
training loss: 1.2080427408218384
training loss: 1.0999373197555542
training loss: 1.114709496498108
training loss: 1.3047412633895874


training:   6%|▌         | 6101/100000 [5:34:26<82:00:40,  3.14s/it]

validation loss: 1.2411658763885498
training loss: 0.7298495769500732
training loss: 1.3444727659225464
training loss: 1.1070096492767334


training:   6%|▌         | 6105/100000 [5:34:39<81:48:57,  3.14s/it]

training loss: 1.2938307523727417
training loss: 1.2344679832458496
training loss: 1.2854771614074707
training loss: 1.5116807222366333


training:   6%|▌         | 6109/100000 [5:34:51<81:40:11,  3.13s/it]

training loss: 0.9837659597396851
training loss: 1.092848300933838
training loss: 1.2499313354492188
training loss: 0.992363452911377


training:   6%|▌         | 6113/100000 [5:35:04<81:33:44,  3.13s/it]

training loss: 1.2868191003799438
training loss: 1.2325258255004883
training loss: 1.2922000885009766
training loss: 1.1290881633758545


training:   6%|▌         | 6117/100000 [5:35:16<81:26:13,  3.12s/it]

training loss: 1.2085893154144287
training loss: 1.2500033378601074
training loss: 1.2019988298416138
training loss: 1.227588415145874


training:   6%|▌         | 6121/100000 [5:35:29<81:22:47,  3.12s/it]

training loss: 1.0383729934692383
training loss: 1.1942179203033447
training loss: 1.1808357238769531
training loss: 1.1674845218658447


training:   6%|▌         | 6125/100000 [5:35:41<81:23:31,  3.12s/it]

training loss: 1.1970984935760498
training loss: 1.3030383586883545
training loss: 1.2677464485168457
training loss: 1.2761189937591553


training:   6%|▌         | 6129/100000 [5:35:54<81:22:09,  3.12s/it]

training loss: 1.2915942668914795
training loss: 1.007338285446167
training loss: 1.119383692741394
training loss: 1.1281663179397583


training:   6%|▌         | 6133/100000 [5:36:06<81:21:14,  3.12s/it]

training loss: 1.2586021423339844
training loss: 1.0039844512939453
training loss: 1.3402360677719116
training loss: 1.0452021360397339


training:   6%|▌         | 6137/100000 [5:36:19<81:20:31,  3.12s/it]

training loss: 1.1521735191345215
training loss: 1.0155267715454102
training loss: 1.1927589178085327
training loss: 1.1736080646514893


training:   6%|▌         | 6141/100000 [5:36:31<81:20:10,  3.12s/it]

training loss: 1.318108320236206
training loss: 1.1385564804077148
training loss: 1.0463868379592896
training loss: 1.1129660606384277


training:   6%|▌         | 6145/100000 [5:36:44<81:17:31,  3.12s/it]

training loss: 1.059457778930664
training loss: 1.208484411239624
training loss: 1.2006137371063232
training loss: 1.1996755599975586


training:   6%|▌         | 6149/100000 [5:36:56<81:16:13,  3.12s/it]

training loss: 1.2107833623886108
training loss: 0.999951958656311
training loss: 1.0666401386260986
training loss: 1.1511164903640747


training:   6%|▌         | 6153/100000 [5:37:09<81:15:59,  3.12s/it]

training loss: 1.1395847797393799
training loss: 1.1220078468322754
training loss: 1.1233876943588257
training loss: 1.0609307289123535


training:   6%|▌         | 6157/100000 [5:37:21<81:11:51,  3.11s/it]

training loss: 1.3708769083023071
training loss: 1.307471513748169
training loss: 1.273390531539917
training loss: 1.2088582515716553


training:   6%|▌         | 6161/100000 [5:37:33<81:09:17,  3.11s/it]

training loss: 1.2225568294525146
training loss: 1.1994210481643677
training loss: 1.0257856845855713
training loss: 1.283237099647522


training:   6%|▌         | 6165/100000 [5:37:46<81:06:54,  3.11s/it]

training loss: 1.0706450939178467
training loss: 1.1213594675064087
training loss: 1.1789298057556152
training loss: 1.2525441646575928


training:   6%|▌         | 6169/100000 [5:37:58<81:04:01,  3.11s/it]

training loss: 1.2783067226409912
training loss: 1.3572748899459839
training loss: 1.0937856435775757
training loss: 1.215855360031128


training:   6%|▌         | 6173/100000 [5:38:11<81:01:04,  3.11s/it]

training loss: 0.9948481321334839
training loss: 1.0700318813323975
training loss: 1.1680939197540283
training loss: 1.1164346933364868


training:   6%|▌         | 6177/100000 [5:38:23<80:58:17,  3.11s/it]

training loss: 1.2799177169799805
training loss: 1.0516027212142944
training loss: 1.1802139282226562
training loss: 1.0583902597427368


training:   6%|▌         | 6181/100000 [5:38:36<81:00:36,  3.11s/it]

training loss: 1.3115746974945068
training loss: 1.0974611043930054
training loss: 1.1696789264678955
training loss: 1.1632527112960815


training:   6%|▌         | 6185/100000 [5:38:48<81:02:59,  3.11s/it]

training loss: 1.190218210220337
training loss: 1.2530750036239624
training loss: 1.1624226570129395
training loss: 0.9856656193733215


training:   6%|▌         | 6189/100000 [5:39:00<81:01:46,  3.11s/it]

training loss: 1.2055253982543945
training loss: 1.2274351119995117
training loss: 1.0251986980438232
training loss: 0.9409899711608887


training:   6%|▌         | 6193/100000 [5:39:13<81:01:33,  3.11s/it]

training loss: 1.2152364253997803
training loss: 1.1058313846588135
training loss: 1.170008897781372
training loss: 1.3289215564727783


training:   6%|▌         | 6197/100000 [5:39:25<81:00:16,  3.11s/it]

training loss: 1.2130961418151855
training loss: 1.1559799909591675
training loss: 1.0457309484481812
training loss: 1.271165132522583
training loss: 1.1776084899902344


training:   6%|▌         | 6201/100000 [5:39:38<81:35:45,  3.13s/it]

validation loss: 1.4501432180404663
training loss: 1.24955153465271
training loss: 1.1738905906677246
training loss: 1.229080080986023


training:   6%|▌         | 6205/100000 [5:39:50<81:21:34,  3.12s/it]

training loss: 1.1341837644577026
training loss: 1.1492044925689697
training loss: 1.1970008611679077
training loss: 1.1487433910369873


training:   6%|▌         | 6209/100000 [5:40:03<81:16:13,  3.12s/it]

training loss: 1.393875241279602
training loss: 0.9999366998672485
training loss: 1.2457579374313354
training loss: 1.1522388458251953


training:   6%|▌         | 6213/100000 [5:40:15<81:13:34,  3.12s/it]

training loss: 1.1323996782302856
training loss: 1.0436210632324219
training loss: 1.1829558610916138
training loss: 1.2672336101531982


training:   6%|▌         | 6217/100000 [5:40:28<81:09:50,  3.12s/it]

training loss: 1.077459692955017
training loss: 1.2168285846710205
training loss: 1.291442632675171
training loss: 1.2023718357086182


training:   6%|▌         | 6221/100000 [5:40:40<81:08:28,  3.11s/it]

training loss: 1.150045394897461
training loss: 0.9063378572463989
training loss: 1.2293498516082764
training loss: 1.052228331565857


training:   6%|▌         | 6225/100000 [5:40:53<81:09:58,  3.12s/it]

training loss: 1.075419545173645
training loss: 1.1317434310913086
training loss: 1.0707027912139893
training loss: 0.9523590803146362


training:   6%|▌         | 6229/100000 [5:41:05<81:08:46,  3.12s/it]

training loss: 1.1527451276779175
training loss: 0.9894990921020508
training loss: 1.150428056716919
training loss: 1.1675812005996704


training:   6%|▌         | 6233/100000 [5:41:18<81:06:49,  3.11s/it]

training loss: 1.2858349084854126
training loss: 1.1926755905151367
training loss: 1.1967227458953857
training loss: 1.211310863494873


training:   6%|▌         | 6237/100000 [5:41:30<81:03:43,  3.11s/it]

training loss: 1.273254156112671
training loss: 1.0502781867980957
training loss: 1.227427363395691
training loss: 1.2561471462249756


training:   6%|▌         | 6241/100000 [5:41:43<81:06:17,  3.11s/it]

training loss: 1.0780730247497559
training loss: 0.9922272562980652
training loss: 0.9580636620521545
training loss: 1.197455644607544


training:   6%|▌         | 6245/100000 [5:41:55<81:08:18,  3.12s/it]

training loss: 1.265332818031311
training loss: 1.1511173248291016
training loss: 1.1989388465881348
training loss: 1.2284491062164307


training:   6%|▌         | 6249/100000 [5:42:07<81:07:14,  3.12s/it]

training loss: 1.1628940105438232
training loss: 1.2296726703643799
training loss: 1.1759483814239502
training loss: 1.0977816581726074


training:   6%|▋         | 6253/100000 [5:42:20<81:07:02,  3.12s/it]

training loss: 1.1068930625915527
training loss: 1.1221548318862915
training loss: 1.2315434217453003
training loss: 1.1486456394195557


training:   6%|▋         | 6257/100000 [5:42:32<81:07:22,  3.12s/it]

training loss: 1.134934902191162
training loss: 1.2962267398834229
training loss: 1.2779003381729126
training loss: 1.1031968593597412


training:   6%|▋         | 6261/100000 [5:42:45<81:08:39,  3.12s/it]

training loss: 1.2412461042404175
training loss: 1.0268268585205078
training loss: 0.9980226755142212
training loss: 1.0873491764068604


training:   6%|▋         | 6265/100000 [5:42:57<81:07:12,  3.12s/it]

training loss: 1.0435466766357422
training loss: 1.1284548044204712
training loss: 1.08650541305542
training loss: 1.169227123260498


training:   6%|▋         | 6269/100000 [5:43:10<81:08:04,  3.12s/it]

training loss: 0.9264049530029297
training loss: 1.180371880531311
training loss: 1.2348192930221558
training loss: 1.0252201557159424


training:   6%|▋         | 6273/100000 [5:43:22<81:07:12,  3.12s/it]

training loss: 1.1782774925231934
training loss: 1.2080494165420532
training loss: 1.2472437620162964
training loss: 0.9393552541732788


training:   6%|▋         | 6277/100000 [5:43:35<81:07:56,  3.12s/it]

training loss: 1.0486377477645874
training loss: 1.316083550453186
training loss: 1.2764371633529663
training loss: 1.1386687755584717


training:   6%|▋         | 6281/100000 [5:43:47<81:08:12,  3.12s/it]

training loss: 1.2304770946502686
training loss: 1.1118731498718262
training loss: 1.0867520570755005
training loss: 1.096897006034851


training:   6%|▋         | 6285/100000 [5:44:00<81:05:58,  3.12s/it]

training loss: 1.1705281734466553
training loss: 1.1233675479888916
training loss: 1.2269561290740967
training loss: 1.0133771896362305


training:   6%|▋         | 6289/100000 [5:44:12<81:07:26,  3.12s/it]

training loss: 1.2352482080459595
training loss: 1.2740504741668701
training loss: 1.254856824874878
training loss: 1.3401610851287842


training:   6%|▋         | 6293/100000 [5:44:25<81:06:27,  3.12s/it]

training loss: 1.099940299987793
training loss: 1.0825915336608887
training loss: 1.296678066253662
training loss: 1.2000007629394531


training:   6%|▋         | 6297/100000 [5:44:37<81:06:08,  3.12s/it]

training loss: 1.1607779264450073
training loss: 1.2707968950271606
training loss: 1.173844814300537
training loss: 1.0717021226882935
training loss: 1.093080759048462


training:   6%|▋         | 6301/100000 [5:44:50<81:40:17,  3.14s/it]

validation loss: 1.2614227533340454
training loss: 1.1307851076126099
training loss: 1.1252694129943848
training loss: 1.0206444263458252


training:   6%|▋         | 6305/100000 [5:45:02<81:29:34,  3.13s/it]

training loss: 1.1847174167633057
training loss: 1.2134069204330444
training loss: 1.1203339099884033
training loss: 1.1156327724456787


training:   6%|▋         | 6309/100000 [5:45:15<81:22:58,  3.13s/it]

training loss: 0.9595496654510498
training loss: 1.2354803085327148
training loss: 1.020120620727539
training loss: 1.1808216571807861


training:   6%|▋         | 6313/100000 [5:45:27<81:16:30,  3.12s/it]

training loss: 1.120406150817871
training loss: 1.0884571075439453
training loss: 1.0403237342834473
training loss: 1.1755214929580688


training:   6%|▋         | 6317/100000 [5:45:40<81:10:32,  3.12s/it]

training loss: 1.1780527830123901
training loss: 0.9828417301177979
training loss: 1.130842685699463
training loss: 1.015892505645752


training:   6%|▋         | 6321/100000 [5:45:52<81:08:57,  3.12s/it]

training loss: 1.2561627626419067
training loss: 1.0433975458145142
training loss: 1.151296615600586
training loss: 1.2172813415527344


training:   6%|▋         | 6325/100000 [5:46:05<81:06:47,  3.12s/it]

training loss: 1.2390732765197754
training loss: 1.1555047035217285
training loss: 1.298659086227417
training loss: 1.2286198139190674


training:   6%|▋         | 6329/100000 [5:46:17<81:06:46,  3.12s/it]

training loss: 1.2375249862670898
training loss: 1.3211941719055176
training loss: 1.2302751541137695
training loss: 0.9741787910461426


training:   6%|▋         | 6333/100000 [5:46:29<81:04:42,  3.12s/it]

training loss: 1.2778280973434448
training loss: 1.1966418027877808
training loss: 1.2088003158569336
training loss: 1.0428478717803955


training:   6%|▋         | 6337/100000 [5:46:42<80:58:34,  3.11s/it]

training loss: 1.1832530498504639
training loss: 1.2603832483291626
training loss: 1.1052331924438477
training loss: 1.0514044761657715


training:   6%|▋         | 6341/100000 [5:46:54<80:57:58,  3.11s/it]

training loss: 1.1523473262786865
training loss: 1.3126451969146729
training loss: 1.1084868907928467
training loss: 1.0164830684661865


training:   6%|▋         | 6345/100000 [5:47:07<80:54:09,  3.11s/it]

training loss: 1.232259750366211
training loss: 1.1654422283172607
training loss: 1.2136449813842773
training loss: 1.0393972396850586


training:   6%|▋         | 6349/100000 [5:47:19<80:51:44,  3.11s/it]

training loss: 1.0540322065353394
training loss: 1.1299595832824707
training loss: 1.209354043006897
training loss: 1.2406116724014282


training:   6%|▋         | 6353/100000 [5:47:32<80:56:14,  3.11s/it]

training loss: 1.2048680782318115
training loss: 1.2960395812988281
training loss: 1.2103934288024902
training loss: 1.455627679824829


training:   6%|▋         | 6357/100000 [5:47:44<80:58:35,  3.11s/it]

training loss: 1.1233482360839844
training loss: 1.1097115278244019
training loss: 1.234215497970581
training loss: 1.3696799278259277


training:   6%|▋         | 6361/100000 [5:47:57<80:59:26,  3.11s/it]

training loss: 1.2717127799987793
training loss: 1.2838068008422852
training loss: 1.2395994663238525
training loss: 1.002002477645874


training:   6%|▋         | 6365/100000 [5:48:09<80:58:15,  3.11s/it]

training loss: 1.149255394935608
training loss: 1.1111125946044922
training loss: 1.0905659198760986
training loss: 0.977481484413147


training:   6%|▋         | 6369/100000 [5:48:21<80:58:32,  3.11s/it]

training loss: 1.1962802410125732
training loss: 1.2868258953094482
training loss: 1.0970218181610107
training loss: 1.0037280321121216


training:   6%|▋         | 6373/100000 [5:48:34<81:00:39,  3.11s/it]

training loss: 1.266657829284668
training loss: 1.2782021760940552
training loss: 1.0589394569396973
training loss: 1.1534478664398193


training:   6%|▋         | 6377/100000 [5:48:46<81:02:01,  3.12s/it]

training loss: 1.099985957145691
training loss: 1.1671199798583984
training loss: 1.0255273580551147
training loss: 1.2364113330841064


training:   6%|▋         | 6381/100000 [5:48:59<81:01:47,  3.12s/it]

training loss: 1.1879990100860596
training loss: 1.0291171073913574
training loss: 0.9987798929214478
training loss: 1.156207799911499


training:   6%|▋         | 6385/100000 [5:49:11<80:59:19,  3.11s/it]

training loss: 1.1423324346542358
training loss: 1.197826623916626
training loss: 1.1187257766723633
training loss: 1.181959629058838


training:   6%|▋         | 6389/100000 [5:49:24<80:58:53,  3.11s/it]

training loss: 1.1086969375610352
training loss: 1.2576438188552856
training loss: 1.2335410118103027
training loss: 1.224278450012207


training:   6%|▋         | 6393/100000 [5:49:36<80:54:35,  3.11s/it]

training loss: 1.2525291442871094
training loss: 0.947297990322113
training loss: 0.919185996055603
training loss: 1.3647128343582153


training:   6%|▋         | 6397/100000 [5:49:49<80:52:41,  3.11s/it]

training loss: 1.2454668283462524
training loss: 1.1296813488006592
training loss: 1.1233083009719849
training loss: 1.1949474811553955


training:   6%|▋         | 6397/100000 [5:50:01<80:52:41,  3.11s/it]

training loss: 1.1802695989608765


training:   6%|▋         | 6401/100000 [5:50:01<81:28:44,  3.13s/it]

validation loss: 1.309048056602478
training loss: 1.151837706565857
training loss: 1.1558325290679932
training loss: 1.1057223081588745


training:   6%|▋         | 6405/100000 [5:50:14<81:20:39,  3.13s/it]

training loss: 1.308948278427124
training loss: 0.8772385120391846
training loss: 1.2789554595947266
training loss: 1.095759391784668


training:   6%|▋         | 6409/100000 [5:50:26<81:16:23,  3.13s/it]

training loss: 1.0914230346679688
training loss: 0.9838545322418213
training loss: 1.1632872819900513
training loss: 1.1217918395996094


training:   6%|▋         | 6413/100000 [5:50:39<81:12:37,  3.12s/it]

training loss: 1.1434391736984253
training loss: 1.215254545211792
training loss: 1.1079291105270386
training loss: 1.1316077709197998


training:   6%|▋         | 6417/100000 [5:50:51<81:10:03,  3.12s/it]

training loss: 1.1195539236068726
training loss: 1.1123223304748535
training loss: 1.1227028369903564
training loss: 1.2006672620773315


training:   6%|▋         | 6421/100000 [5:51:04<81:08:33,  3.12s/it]

training loss: 1.185212254524231
training loss: 0.932775616645813
training loss: 1.2132736444473267
training loss: 0.9586864709854126


training:   6%|▋         | 6425/100000 [5:51:16<81:06:31,  3.12s/it]

training loss: 1.211222767829895
training loss: 1.2472615242004395
training loss: 1.2306190729141235
training loss: 1.189330816268921


training:   6%|▋         | 6429/100000 [5:51:29<80:59:43,  3.12s/it]

training loss: 1.1778963804244995
training loss: 1.2385094165802002
training loss: 1.0536739826202393
training loss: 1.0800167322158813


training:   6%|▋         | 6433/100000 [5:51:41<80:56:13,  3.11s/it]

training loss: 1.17594575881958
training loss: 0.9862792491912842
training loss: 1.236167550086975
training loss: 1.2244927883148193


training:   6%|▋         | 6437/100000 [5:51:54<80:56:16,  3.11s/it]

training loss: 1.142430305480957
training loss: 1.1072845458984375
training loss: 1.1481157541275024
training loss: 1.1567718982696533


training:   6%|▋         | 6441/100000 [5:52:06<80:55:34,  3.11s/it]

training loss: 1.2085751295089722
training loss: 1.1094142198562622
training loss: 0.9873008728027344
training loss: 1.3079346418380737


training:   6%|▋         | 6445/100000 [5:52:18<80:52:33,  3.11s/it]

training loss: 1.1707476377487183
training loss: 1.069428563117981
training loss: 1.0239508152008057
training loss: 1.1427569389343262


training:   6%|▋         | 6449/100000 [5:52:31<80:52:57,  3.11s/it]

training loss: 1.0935022830963135
training loss: 1.107405662536621
training loss: 1.133568286895752
training loss: 1.2871782779693604


training:   6%|▋         | 6453/100000 [5:52:43<80:53:04,  3.11s/it]

training loss: 1.1929330825805664
training loss: 1.2667843103408813
training loss: 1.0877795219421387
training loss: 1.1663028001785278


training:   6%|▋         | 6457/100000 [5:52:56<80:53:35,  3.11s/it]

training loss: 1.0745595693588257
training loss: 1.1205511093139648
training loss: 1.0903154611587524
training loss: 1.2320184707641602


training:   6%|▋         | 6461/100000 [5:53:08<80:56:06,  3.11s/it]

training loss: 1.1480687856674194
training loss: 1.1403634548187256
training loss: 1.365875244140625
training loss: 1.2329461574554443


training:   6%|▋         | 6465/100000 [5:53:21<80:57:36,  3.12s/it]

training loss: 1.1114771366119385
training loss: 1.1506479978561401
training loss: 1.1587775945663452
training loss: 1.1423624753952026


training:   6%|▋         | 6469/100000 [5:53:33<80:59:27,  3.12s/it]

training loss: 1.1572831869125366
training loss: 0.939476728439331
training loss: 1.2646621465682983
training loss: 1.1383388042449951


training:   6%|▋         | 6473/100000 [5:53:46<81:00:53,  3.12s/it]

training loss: 1.0513460636138916
training loss: 1.0583608150482178
training loss: 1.1101516485214233
training loss: 1.109775185585022


training:   6%|▋         | 6477/100000 [5:53:58<80:56:29,  3.12s/it]

training loss: 1.1188185214996338
training loss: 1.2532968521118164
training loss: 1.1420073509216309
training loss: 1.2857860326766968


training:   6%|▋         | 6481/100000 [5:54:11<80:57:20,  3.12s/it]

training loss: 1.1421825885772705
training loss: 1.082410216331482
training loss: 1.1714186668395996
training loss: 0.9224920272827148


training:   6%|▋         | 6485/100000 [5:54:23<80:50:26,  3.11s/it]

training loss: 1.4113019704818726
training loss: 1.1395286321640015
training loss: 1.1076197624206543
training loss: 0.880329966545105


training:   6%|▋         | 6489/100000 [5:54:36<80:51:13,  3.11s/it]

training loss: 1.0353237390518188
training loss: 1.1775076389312744
training loss: 1.2412371635437012
training loss: 1.1883127689361572


training:   6%|▋         | 6493/100000 [5:54:48<80:53:29,  3.11s/it]

training loss: 1.158174991607666
training loss: 1.3300375938415527
training loss: 1.1136775016784668
training loss: 1.0637331008911133


training:   6%|▋         | 6497/100000 [5:55:00<80:55:41,  3.12s/it]

training loss: 1.0523866415023804
training loss: 1.0114787817001343
training loss: 1.1727352142333984
training loss: 1.2428698539733887


training:   6%|▋         | 6497/100000 [5:55:11<80:55:41,  3.12s/it]

training loss: 1.2536441087722778
validation loss: 1.2858424186706543
%s 

 %s ("etenders such as [[Perkin Warbeck]], who pretended to be Richard, Duke of York, the younger of the [[Princes in the Tower]]. These pretenders were backed by disaffected nobles. Henry triumphed in securing his crown by a number of means but principally by dividing and undermining the power of the nobility, especially through bonds and recognizances, as well as forcing them to disband their private armies.  He also honoured his pledge of [[December]] [[1483]] to marry [[Elizabeth of York]], daughter and heir of King Edward IV. The marriage took place on [[January 18]] [[1486]] at Westminster. This unified the warring houses, gave him a greater claim to the throne due to Elizabeth's line of descent and ensured that his children would be of royal blood. (though there is [[Edward IV of England#Was Edward Illegitimate|evidence that Edward was born illegitimate]]).  Henry's first action was to declare himself kin

training:   7%|▋         | 6501/100000 [5:56:32<234:48:35,  9.04s/it]

Model saved at iteration 6500
training loss: 1.1789336204528809
training loss: 0.8752939701080322
training loss: 1.2045741081237793


training:   7%|▋         | 6505/100000 [5:56:44<188:29:25,  7.26s/it]

training loss: 0.9244813919067383
training loss: 1.1309623718261719
training loss: 1.3219233751296997
training loss: 1.0971826314926147


training:   7%|▋         | 6509/100000 [5:56:57<156:07:10,  6.01s/it]

training loss: 1.0772221088409424
training loss: 1.243186354637146
training loss: 1.242260217666626
training loss: 1.1999098062515259


training:   7%|▋         | 6513/100000 [5:57:09<133:32:07,  5.14s/it]

training loss: 1.128483533859253
training loss: 1.2200745344161987
training loss: 1.1824798583984375
training loss: 1.2132604122161865


training:   7%|▋         | 6517/100000 [5:57:22<117:41:07,  4.53s/it]

training loss: 0.9967679977416992
training loss: 1.050894021987915
training loss: 1.208430528640747
training loss: 1.2628629207611084


training:   7%|▋         | 6521/100000 [5:57:34<106:40:24,  4.11s/it]

training loss: 1.1569002866744995
training loss: 1.2519862651824951
training loss: 1.099184513092041
training loss: 1.0216655731201172


training:   7%|▋         | 6525/100000 [5:57:47<98:57:54,  3.81s/it] 

training loss: 1.1037702560424805
training loss: 1.065624713897705
training loss: 1.1668678522109985
training loss: 1.12041437625885


training:   7%|▋         | 6529/100000 [5:57:59<93:34:12,  3.60s/it]

training loss: 1.301081895828247
training loss: 1.0322895050048828
training loss: 1.1308262348175049
training loss: 1.0606056451797485


training:   7%|▋         | 6533/100000 [5:58:12<89:48:37,  3.46s/it]

training loss: 0.934148907661438
training loss: 1.0531082153320312
training loss: 1.096782922744751
training loss: 1.24818754196167


training:   7%|▋         | 6537/100000 [5:58:24<87:09:03,  3.36s/it]

training loss: 1.2026586532592773
training loss: 0.9332470297813416
training loss: 1.0736207962036133
training loss: 1.1845357418060303


training:   7%|▋         | 6541/100000 [5:58:36<85:15:11,  3.28s/it]

training loss: 1.1401937007904053
training loss: 1.025128960609436
training loss: 1.2696101665496826
training loss: 1.1232246160507202


training:   7%|▋         | 6545/100000 [5:58:49<83:54:43,  3.23s/it]

training loss: 1.2422934770584106
training loss: 1.0380029678344727
training loss: 1.054316759109497
training loss: 1.2373485565185547


training:   7%|▋         | 6549/100000 [5:59:01<82:59:22,  3.20s/it]

training loss: 1.1779342889785767
training loss: 0.8919517993927002
training loss: 1.1916412115097046
training loss: 1.1847600936889648


training:   7%|▋         | 6553/100000 [5:59:14<82:21:45,  3.17s/it]

training loss: 1.2177972793579102
training loss: 1.2216438055038452
training loss: 1.18214750289917
training loss: 1.22565758228302


training:   7%|▋         | 6557/100000 [5:59:26<81:52:16,  3.15s/it]

training loss: 1.2270448207855225
training loss: 1.1717482805252075
training loss: 1.3256423473358154
training loss: 1.2661497592926025


training:   7%|▋         | 6561/100000 [5:59:39<81:35:00,  3.14s/it]

training loss: 1.2621098756790161
training loss: 1.1386756896972656
training loss: 1.4334383010864258
training loss: 1.1482659578323364


training:   7%|▋         | 6565/100000 [5:59:51<81:18:33,  3.13s/it]

training loss: 1.0920449495315552
training loss: 1.0235803127288818
training loss: 0.9652162194252014
training loss: 1.1068120002746582


training:   7%|▋         | 6569/100000 [6:00:04<81:09:25,  3.13s/it]

training loss: 1.1640814542770386
training loss: 0.8744939565658569
training loss: 1.1182185411453247
training loss: 1.1514111757278442


training:   7%|▋         | 6573/100000 [6:00:16<81:04:03,  3.12s/it]

training loss: 1.3797940015792847
training loss: 1.2216168642044067
training loss: 1.1343977451324463
training loss: 1.2499253749847412


training:   7%|▋         | 6577/100000 [6:00:29<80:59:29,  3.12s/it]

training loss: 1.2450121641159058
training loss: 1.0916476249694824
training loss: 1.175964593887329
training loss: 1.1012132167816162


training:   7%|▋         | 6581/100000 [6:00:41<80:54:01,  3.12s/it]

training loss: 1.201978087425232
training loss: 1.0794951915740967
training loss: 1.1570560932159424
training loss: 1.0108073949813843


training:   7%|▋         | 6585/100000 [6:00:53<80:53:56,  3.12s/it]

training loss: 1.2458306550979614
training loss: 1.1042691469192505
training loss: 1.1378148794174194
training loss: 1.1218245029449463


training:   7%|▋         | 6589/100000 [6:01:06<80:49:36,  3.12s/it]

training loss: 1.2871423959732056
training loss: 0.7760500907897949
training loss: 1.201716423034668
training loss: 1.0934370756149292


training:   7%|▋         | 6593/100000 [6:01:18<80:46:56,  3.11s/it]

training loss: 0.9390813708305359
training loss: 1.1186352968215942
training loss: 1.1574842929840088
training loss: 1.2760671377182007


training:   7%|▋         | 6597/100000 [6:01:31<80:47:56,  3.11s/it]

training loss: 1.1277722120285034
training loss: 1.299394130706787
training loss: 1.2392876148223877
training loss: 1.1437599658966064


training:   7%|▋         | 6597/100000 [6:01:41<80:47:56,  3.11s/it]

training loss: 1.1457440853118896


training:   7%|▋         | 6601/100000 [6:01:44<81:18:45,  3.13s/it]

validation loss: 1.2058943510055542
training loss: 1.2443456649780273
training loss: 1.0711066722869873
training loss: 1.1162467002868652


training:   7%|▋         | 6605/100000 [6:01:56<81:06:32,  3.13s/it]

training loss: 1.1023436784744263
training loss: 1.1218903064727783
training loss: 1.2058348655700684
training loss: 0.9393754005432129


training:   7%|▋         | 6609/100000 [6:02:08<81:02:10,  3.12s/it]

training loss: 1.0803406238555908
training loss: 1.3730785846710205
training loss: 1.080478310585022
training loss: 1.1536223888397217


training:   7%|▋         | 6613/100000 [6:02:21<80:54:47,  3.12s/it]

training loss: 1.2803593873977661
training loss: 1.3369879722595215
training loss: 1.0653871297836304
training loss: 1.2680341005325317


training:   7%|▋         | 6617/100000 [6:02:33<80:48:21,  3.12s/it]

training loss: 1.0696032047271729
training loss: 1.1222312450408936
training loss: 1.0309686660766602
training loss: 1.098555564880371


training:   7%|▋         | 6621/100000 [6:02:46<80:48:25,  3.12s/it]

training loss: 1.013434648513794
training loss: 1.2020611763000488
training loss: 1.1610500812530518
training loss: 1.0907824039459229


training:   7%|▋         | 6625/100000 [6:02:58<80:45:11,  3.11s/it]

training loss: 1.1268410682678223
training loss: 1.2021751403808594
training loss: 1.0844218730926514
training loss: 0.9650538563728333


training:   7%|▋         | 6629/100000 [6:03:11<80:47:49,  3.12s/it]

training loss: 1.1984798908233643
training loss: 1.2799086570739746
training loss: 1.1667791604995728
training loss: 1.0228804349899292


training:   7%|▋         | 6633/100000 [6:03:23<80:48:19,  3.12s/it]

training loss: 0.9061603546142578
training loss: 1.1498172283172607
training loss: 1.2351908683776855
training loss: 1.1992806196212769


training:   7%|▋         | 6637/100000 [6:03:36<80:46:54,  3.11s/it]

training loss: 1.0998754501342773
training loss: 1.2159430980682373
training loss: 1.0011035203933716
training loss: 1.2027608156204224


training:   7%|▋         | 6641/100000 [6:03:48<80:47:25,  3.12s/it]

training loss: 1.4156336784362793
training loss: 1.160698413848877
training loss: 1.0930328369140625
training loss: 0.9148849248886108


training:   7%|▋         | 6645/100000 [6:04:00<80:44:22,  3.11s/it]

training loss: 1.2483583688735962
training loss: 1.2366418838500977
training loss: 1.0416836738586426
training loss: 1.2884365320205688


training:   7%|▋         | 6649/100000 [6:04:13<80:45:55,  3.11s/it]

training loss: 1.2162487506866455
training loss: 1.1386260986328125
training loss: 1.1583852767944336
training loss: 1.0867180824279785


training:   7%|▋         | 6653/100000 [6:04:25<80:43:03,  3.11s/it]

training loss: 0.9769461154937744
training loss: 1.1631181240081787
training loss: 1.1691356897354126
training loss: 1.0379948616027832


training:   7%|▋         | 6657/100000 [6:04:38<80:43:21,  3.11s/it]

training loss: 1.3617925643920898
training loss: 1.0774950981140137
training loss: 1.233243703842163
training loss: 1.1521397829055786


training:   7%|▋         | 6661/100000 [6:04:50<80:46:31,  3.12s/it]

training loss: 1.4242734909057617
training loss: 1.1771103143692017
training loss: 1.0577900409698486
training loss: 1.2582149505615234


training:   7%|▋         | 6665/100000 [6:05:03<80:48:21,  3.12s/it]

training loss: 1.1757091283798218
training loss: 1.1409823894500732
training loss: 1.1665234565734863
training loss: 1.1321172714233398


training:   7%|▋         | 6669/100000 [6:05:15<80:46:35,  3.12s/it]

training loss: 1.2899327278137207
training loss: 1.0902289152145386
training loss: 1.360641598701477
training loss: 1.2025282382965088


training:   7%|▋         | 6673/100000 [6:05:28<80:45:26,  3.12s/it]

training loss: 1.2114062309265137
training loss: 1.0980201959609985
training loss: 1.2425928115844727
training loss: 1.1518501043319702


training:   7%|▋         | 6677/100000 [6:05:40<80:44:16,  3.11s/it]

training loss: 1.1566188335418701
training loss: 1.2053208351135254
training loss: 0.9116038680076599
training loss: 1.1153802871704102


training:   7%|▋         | 6681/100000 [6:05:53<80:42:12,  3.11s/it]

training loss: 1.1368958950042725
training loss: 1.1598758697509766
training loss: 1.0201011896133423
training loss: 1.2422997951507568


training:   7%|▋         | 6685/100000 [6:06:05<80:45:36,  3.12s/it]

training loss: 1.0501495599746704
training loss: 1.1379742622375488
training loss: 1.1752686500549316
training loss: 1.0815445184707642


training:   7%|▋         | 6689/100000 [6:06:18<80:46:26,  3.12s/it]

training loss: 0.8964083194732666
training loss: 1.144627332687378
training loss: 0.959526777267456
training loss: 1.114485502243042


training:   7%|▋         | 6693/100000 [6:06:30<80:47:05,  3.12s/it]

training loss: 1.0434694290161133
training loss: 1.2014050483703613
training loss: 1.1182000637054443
training loss: 1.1141242980957031


training:   7%|▋         | 6697/100000 [6:06:43<80:48:05,  3.12s/it]

training loss: 1.1204264163970947
training loss: 1.196858286857605
training loss: 1.1042747497558594
training loss: 1.1079540252685547
training loss: 1.1543776988983154


training:   7%|▋         | 6701/100000 [6:06:55<81:23:16,  3.14s/it]

validation loss: 1.2863545417785645
training loss: 0.9617025852203369
training loss: 1.1870074272155762
training loss: 1.1721093654632568


training:   7%|▋         | 6705/100000 [6:07:08<81:07:00,  3.13s/it]

training loss: 1.264585256576538
training loss: 1.1052314043045044
training loss: 1.376474380493164
training loss: 1.124427318572998


training:   7%|▋         | 6709/100000 [6:07:20<80:56:54,  3.12s/it]

training loss: 1.076582670211792
training loss: 1.2305657863616943
training loss: 1.2887266874313354
training loss: 1.2589689493179321


training:   7%|▋         | 6713/100000 [6:07:33<80:52:27,  3.12s/it]

training loss: 1.3592194318771362
training loss: 1.2588043212890625
training loss: 1.0584890842437744
training loss: 1.0123546123504639


training:   7%|▋         | 6717/100000 [6:07:45<80:48:46,  3.12s/it]

training loss: 1.1669905185699463
training loss: 1.2422724962234497
training loss: 1.0154322385787964
training loss: 1.0682282447814941


training:   7%|▋         | 6721/100000 [6:07:58<80:48:12,  3.12s/it]

training loss: 0.9980410933494568
training loss: 1.2859536409378052
training loss: 1.210960865020752
training loss: 1.061788558959961


training:   7%|▋         | 6725/100000 [6:08:10<80:41:37,  3.11s/it]

training loss: 0.9188060760498047
training loss: 1.1117124557495117
training loss: 1.1325535774230957
training loss: 0.9882835745811462


training:   7%|▋         | 6729/100000 [6:08:22<80:38:12,  3.11s/it]

training loss: 1.1260071992874146
training loss: 1.2709825038909912
training loss: 1.0828408002853394
training loss: 0.9666780233383179


training:   7%|▋         | 6733/100000 [6:08:35<80:35:46,  3.11s/it]

training loss: 1.1510393619537354
training loss: 1.3094812631607056
training loss: 1.1411572694778442
training loss: 1.1853320598602295


training:   7%|▋         | 6737/100000 [6:08:47<80:37:02,  3.11s/it]

training loss: 1.016357421875
training loss: 1.231384038925171
training loss: 1.2103713750839233
training loss: 1.110592246055603


training:   7%|▋         | 6741/100000 [6:09:00<80:39:35,  3.11s/it]

training loss: 1.1937063932418823
training loss: 1.2398486137390137
training loss: 1.2217576503753662
training loss: 1.3104362487792969


training:   7%|▋         | 6745/100000 [6:09:12<80:37:32,  3.11s/it]

training loss: 1.0421993732452393
training loss: 1.0435738563537598
training loss: 1.2349375486373901
training loss: 1.0650699138641357


training:   7%|▋         | 6749/100000 [6:09:25<80:38:43,  3.11s/it]

training loss: 1.1956666707992554
training loss: 1.0698599815368652
training loss: 0.9408954381942749
training loss: 1.231170415878296


training:   7%|▋         | 6753/100000 [6:09:37<80:39:12,  3.11s/it]

training loss: 1.0574917793273926
training loss: 1.1203724145889282
training loss: 1.253766417503357
training loss: 1.3074287176132202


training:   7%|▋         | 6757/100000 [6:09:50<80:36:39,  3.11s/it]

training loss: 1.0866868495941162
training loss: 1.1362898349761963
training loss: 0.9158370494842529
training loss: 1.1809122562408447


training:   7%|▋         | 6761/100000 [6:10:02<80:39:18,  3.11s/it]

training loss: 0.8032534122467041
training loss: 1.063090443611145
training loss: 1.0797607898712158
training loss: 1.241485595703125


training:   7%|▋         | 6765/100000 [6:10:14<80:40:03,  3.11s/it]

training loss: 1.1909301280975342
training loss: 1.1196637153625488
training loss: 1.0545365810394287
training loss: 0.9603303670883179


training:   7%|▋         | 6769/100000 [6:10:27<80:40:17,  3.12s/it]

training loss: 1.0571579933166504
training loss: 1.1988070011138916
training loss: 1.171196699142456
training loss: 1.1519325971603394


training:   7%|▋         | 6773/100000 [6:10:39<80:41:10,  3.12s/it]

training loss: 1.1885008811950684
training loss: 1.0884113311767578
training loss: 1.124174952507019
training loss: 1.1159731149673462


training:   7%|▋         | 6777/100000 [6:10:52<80:41:16,  3.12s/it]

training loss: 1.186673879623413
training loss: 1.1957504749298096
training loss: 1.067544937133789
training loss: 1.3316166400909424


training:   7%|▋         | 6781/100000 [6:11:04<80:40:10,  3.12s/it]

training loss: 1.06858491897583
training loss: 1.0216238498687744
training loss: 1.0808959007263184
training loss: 1.1939641237258911


training:   7%|▋         | 6785/100000 [6:11:17<80:41:59,  3.12s/it]

training loss: 1.1874051094055176
training loss: 1.1213908195495605
training loss: 1.175643801689148
training loss: 1.2009409666061401


training:   7%|▋         | 6789/100000 [6:11:29<80:40:21,  3.12s/it]

training loss: 1.0535492897033691
training loss: 1.0164709091186523
training loss: 1.0896599292755127
training loss: 1.1527459621429443


training:   7%|▋         | 6793/100000 [6:11:42<80:39:51,  3.12s/it]

training loss: 1.129908561706543
training loss: 1.0746616125106812
training loss: 1.1293470859527588
training loss: 1.1583850383758545


training:   7%|▋         | 6797/100000 [6:11:54<80:38:54,  3.12s/it]

training loss: 1.0114988088607788
training loss: 1.189188838005066
training loss: 1.3022606372833252
training loss: 1.0887850522994995
training loss: 1.215230107307434


training:   7%|▋         | 6801/100000 [6:12:07<81:10:58,  3.14s/it]

validation loss: 1.142280101776123
training loss: 1.1165187358856201
training loss: 1.0925498008728027
training loss: 1.0277347564697266


training:   7%|▋         | 6805/100000 [6:12:19<80:59:49,  3.13s/it]

training loss: 1.0637946128845215
training loss: 0.9144287109375
training loss: 1.3113991022109985
training loss: 1.1607266664505005


training:   7%|▋         | 6809/100000 [6:12:32<80:50:11,  3.12s/it]

training loss: 1.2774059772491455
training loss: 1.024583101272583
training loss: 1.1062146425247192
training loss: 1.0428886413574219


training:   7%|▋         | 6813/100000 [6:12:44<80:48:04,  3.12s/it]

training loss: 1.1288975477218628
training loss: 1.1000571250915527
training loss: 1.0614769458770752
training loss: 1.104344367980957


training:   7%|▋         | 6817/100000 [6:12:57<80:46:39,  3.12s/it]

training loss: 1.0797529220581055
training loss: 1.2825076580047607
training loss: 1.1925873756408691
training loss: 1.1925640106201172


training:   7%|▋         | 6821/100000 [6:13:09<80:45:14,  3.12s/it]

training loss: 1.233180284500122
training loss: 1.1419426202774048
training loss: 1.0603638887405396
training loss: 1.0152027606964111


training:   7%|▋         | 6825/100000 [6:13:22<80:44:11,  3.12s/it]

training loss: 1.1319010257720947
training loss: 1.1130459308624268
training loss: 1.1696605682373047
training loss: 1.1217634677886963


training:   7%|▋         | 6829/100000 [6:13:34<80:44:18,  3.12s/it]

training loss: 1.0372329950332642
training loss: 1.1611638069152832
training loss: 0.9233800768852234
training loss: 1.2513427734375


training:   7%|▋         | 6833/100000 [6:13:47<80:41:47,  3.12s/it]

training loss: 1.134027123451233
training loss: 1.2636470794677734
training loss: 1.0351630449295044
training loss: 1.170860767364502


training:   7%|▋         | 6837/100000 [6:13:59<80:42:18,  3.12s/it]

training loss: 1.1733883619308472
training loss: 1.1659125089645386
training loss: 1.2421764135360718
training loss: 1.0733869075775146


training:   7%|▋         | 6841/100000 [6:14:12<80:42:09,  3.12s/it]

training loss: 1.104677438735962
training loss: 1.2541494369506836
training loss: 1.1331650018692017
training loss: 1.1166846752166748


training:   7%|▋         | 6845/100000 [6:14:24<80:41:43,  3.12s/it]

training loss: 0.9447464346885681
training loss: 1.131713628768921
training loss: 1.145411491394043
training loss: 1.0128473043441772


training:   7%|▋         | 6849/100000 [6:14:36<80:36:39,  3.12s/it]

training loss: 1.1400091648101807
training loss: 1.1485435962677002
training loss: 1.1551061868667603
training loss: 1.302839994430542


training:   7%|▋         | 6853/100000 [6:14:49<80:37:50,  3.12s/it]

training loss: 1.0699114799499512
training loss: 1.3617480993270874
training loss: 1.1721645593643188
training loss: 1.1952354907989502


training:   7%|▋         | 6857/100000 [6:15:01<80:37:43,  3.12s/it]

training loss: 1.0734397172927856
training loss: 1.2335866689682007
training loss: 1.1699450016021729
training loss: 0.8323204517364502


training:   7%|▋         | 6861/100000 [6:15:14<80:36:39,  3.12s/it]

training loss: 1.0159986019134521
training loss: 1.2828009128570557
training loss: 1.1770107746124268
training loss: 1.0586823225021362


training:   7%|▋         | 6865/100000 [6:15:26<80:37:17,  3.12s/it]

training loss: 1.0928940773010254
training loss: 1.1588246822357178
training loss: 1.0999529361724854
training loss: 1.1453673839569092


training:   7%|▋         | 6869/100000 [6:15:39<80:38:29,  3.12s/it]

training loss: 1.1520298719406128
training loss: 1.063132643699646
training loss: 1.0845935344696045
training loss: 1.213394284248352


training:   7%|▋         | 6873/100000 [6:15:51<80:39:06,  3.12s/it]

training loss: 1.2204527854919434
training loss: 1.211863398551941
training loss: 1.1780977249145508
training loss: 1.2069425582885742


training:   7%|▋         | 6877/100000 [6:16:04<80:40:36,  3.12s/it]

training loss: 1.0972086191177368
training loss: 1.3220577239990234
training loss: 1.2437787055969238
training loss: 1.181465983390808


training:   7%|▋         | 6881/100000 [6:16:16<80:40:51,  3.12s/it]

training loss: 1.1448124647140503
training loss: 1.3793253898620605
training loss: 0.9947803616523743
training loss: 1.2573890686035156


training:   7%|▋         | 6885/100000 [6:16:29<80:40:29,  3.12s/it]

training loss: 0.8338794112205505
training loss: 1.0907096862792969
training loss: 1.2271260023117065
training loss: 1.176692247390747


training:   7%|▋         | 6889/100000 [6:16:41<80:40:18,  3.12s/it]

training loss: 1.1685751676559448
training loss: 1.09419846534729
training loss: 1.261048674583435
training loss: 1.1668014526367188


training:   7%|▋         | 6893/100000 [6:16:54<80:39:34,  3.12s/it]

training loss: 1.1075139045715332
training loss: 1.2820415496826172
training loss: 0.9792188405990601
training loss: 1.1349620819091797


training:   7%|▋         | 6897/100000 [6:17:06<80:35:07,  3.12s/it]

training loss: 1.0661909580230713
training loss: 1.0583046674728394
training loss: 1.1194521188735962
training loss: 1.142313838005066
training loss: 1.181909203529358


training:   7%|▋         | 6901/100000 [6:17:19<81:13:24,  3.14s/it]

validation loss: 1.0971248149871826
training loss: 1.1986101865768433
training loss: 1.2343541383743286
training loss: 1.2852617502212524


training:   7%|▋         | 6905/100000 [6:17:31<81:00:29,  3.13s/it]

training loss: 1.0662208795547485
training loss: 0.943260908126831
training loss: 1.0661866664886475
training loss: 1.384581208229065


training:   7%|▋         | 6909/100000 [6:17:44<80:53:26,  3.13s/it]

training loss: 1.0750787258148193
training loss: 1.103487253189087
training loss: 1.1986005306243896
training loss: 1.1375672817230225


training:   7%|▋         | 6913/100000 [6:17:56<80:49:02,  3.13s/it]

training loss: 1.269099473953247
training loss: 1.1803680658340454
training loss: 1.2490427494049072
training loss: 1.1336205005645752


training:   7%|▋         | 6917/100000 [6:18:09<80:45:07,  3.12s/it]

training loss: 1.0214645862579346
training loss: 1.1158621311187744
training loss: 1.0824224948883057
training loss: 1.220705270767212


training:   7%|▋         | 6921/100000 [6:18:21<80:44:03,  3.12s/it]

training loss: 1.2661436796188354
training loss: 1.1774976253509521
training loss: 1.1937811374664307
training loss: 1.2001699209213257


training:   7%|▋         | 6925/100000 [6:18:34<80:40:17,  3.12s/it]

training loss: 1.067878246307373
training loss: 0.9914140701293945
training loss: 1.0997205972671509
training loss: 1.1414827108383179


training:   7%|▋         | 6929/100000 [6:18:46<80:39:47,  3.12s/it]

training loss: 1.2344346046447754
training loss: 1.0928221940994263
training loss: 1.1089253425598145
training loss: 1.1253540515899658


training:   7%|▋         | 6933/100000 [6:18:59<80:38:55,  3.12s/it]

training loss: 1.0681337118148804
training loss: 0.8466910123825073
training loss: 1.1849308013916016
training loss: 1.1240111589431763


training:   7%|▋         | 6937/100000 [6:19:11<80:33:11,  3.12s/it]

training loss: 1.0233168601989746
training loss: 1.210937261581421
training loss: 1.1553248167037964
training loss: 1.0734024047851562


training:   7%|▋         | 6941/100000 [6:19:24<80:32:25,  3.12s/it]

training loss: 1.043605089187622
training loss: 0.7548735737800598
training loss: 1.14785635471344
training loss: 0.9585954546928406


training:   7%|▋         | 6945/100000 [6:19:36<80:30:46,  3.11s/it]

training loss: 1.0099769830703735
training loss: 1.1945661306381226
training loss: 1.2273712158203125
training loss: 1.122488260269165


training:   7%|▋         | 6949/100000 [6:19:49<80:32:21,  3.12s/it]

training loss: 1.1158968210220337
training loss: 1.0902700424194336
training loss: 1.1749345064163208
training loss: 1.2453246116638184


training:   7%|▋         | 6953/100000 [6:20:01<80:33:19,  3.12s/it]

training loss: 1.1336517333984375
training loss: 1.1867804527282715
training loss: 1.2105504274368286
training loss: 1.1878387928009033


training:   7%|▋         | 6957/100000 [6:20:13<80:33:59,  3.12s/it]

training loss: 1.1010401248931885
training loss: 1.1665937900543213
training loss: 1.104955792427063
training loss: 1.0480592250823975


training:   7%|▋         | 6961/100000 [6:20:26<80:34:49,  3.12s/it]

training loss: 1.1492931842803955
training loss: 1.1238806247711182
training loss: 0.9349383115768433
training loss: 1.0809733867645264


training:   7%|▋         | 6965/100000 [6:20:38<80:33:55,  3.12s/it]

training loss: 1.3667261600494385
training loss: 1.0860025882720947
training loss: 1.119948387145996
training loss: 1.0244436264038086


training:   7%|▋         | 6969/100000 [6:20:51<80:34:13,  3.12s/it]

training loss: 1.040034294128418
training loss: 1.4447509050369263
training loss: 1.1358673572540283
training loss: 1.1752026081085205


training:   7%|▋         | 6973/100000 [6:21:03<80:34:54,  3.12s/it]

training loss: 1.2577509880065918
training loss: 1.1339960098266602
training loss: 1.1970412731170654
training loss: 1.260467529296875


training:   7%|▋         | 6977/100000 [6:21:16<80:34:40,  3.12s/it]

training loss: 1.0325604677200317
training loss: 1.0616402626037598
training loss: 1.1393678188323975
training loss: 1.1614773273468018


training:   7%|▋         | 6981/100000 [6:21:28<80:34:25,  3.12s/it]

training loss: 1.1081161499023438
training loss: 0.9889272451400757
training loss: 1.1586034297943115
training loss: 1.1436095237731934


training:   7%|▋         | 6985/100000 [6:21:41<80:35:15,  3.12s/it]

training loss: 1.0109187364578247
training loss: 1.1626150608062744
training loss: 1.2492560148239136
training loss: 0.9543100595474243


training:   7%|▋         | 6989/100000 [6:21:53<80:34:20,  3.12s/it]

training loss: 1.0340609550476074
training loss: 1.0673270225524902
training loss: 1.2358181476593018
training loss: 1.132066011428833


training:   7%|▋         | 6993/100000 [6:22:06<80:31:04,  3.12s/it]

training loss: 1.2557977437973022
training loss: 1.0507526397705078
training loss: 1.2449179887771606
training loss: 1.2171677350997925


training:   7%|▋         | 6997/100000 [6:22:18<80:31:50,  3.12s/it]

training loss: 1.1582841873168945
training loss: 1.0681023597717285
training loss: 1.2043380737304688
training loss: 1.1277990341186523
training loss: 0.9737906455993652


training:   7%|▋         | 6997/100000 [6:22:31<80:31:50,  3.12s/it]

validation loss: 1.202323317527771
%s 

 %s ("sh;[[1509]]|  title2=[[Lordship of Ireland|Lord of Ireland]]|  years2=[[1485]]&amp;ndash;[[1509]]|  after=[[Henry VIII of England|Henry VIII]] }} {{end box}}  [[Category:1457 births]] [[Category:1509 deaths]] [[Category:Welsh people]] [[Category:English monarchs]] [[Category:History of Wales]] [[Category:Wars of the Roses]] [[Category:Knights of the Golden Fleece]] [[Category:Earls in the Peerage of England]] [[Category:House of Tudor]] [[Category:Roman Catholic monarchs]] [[Category:Historical figures portrayed by Shakespeare]]  [[ar:Ù\x87Ù\x86Ø±Ù\x8a Ø§Ù\x84Ø³Ø§Ø¨Ø¹ Ù\x85Ù\x86 Ø¥Ù\x86Ù\x83Ù\x84ØªØ±Ø§]] [[cs:JindÅ\x99ich VII.]] [[cy:Harri VII o Loegr]] [[de:Heinrich VII. (England)]] [[es:Enrique VII de Inglaterra]] [[fr:Henri VII d'Angleterre]] [[he:×\x94×\xa0×¨×\x99 ×\x94×©×\x91×\x99×¢×\x99 ×\x9e×\x9c×\x9a ×\x90×\xa0×\x92×\x9c×\x99×\x94]] [[kw:Henry VII a Bow Sows]] [[la:Henricus VII Angliae Rex]] [[mr:à¤¹à¥\x87à¤¨à¥\x8dà¤°à¥\x80 à¤¸à¤¾à¤

training:   7%|▋         | 7001/100000 [6:23:50<234:03:16,  9.06s/it]

Model saved at iteration 7000
training loss: 0.9766024947166443
training loss: 1.2619752883911133
training loss: 1.3599721193313599


training:   7%|▋         | 7005/100000 [6:24:02<187:53:21,  7.27s/it]

training loss: 1.0601032972335815
training loss: 1.1540395021438599
training loss: 1.2933493852615356
training loss: 1.1126865148544312


training:   7%|▋         | 7009/100000 [6:24:15<155:41:10,  6.03s/it]

training loss: 1.0698953866958618
training loss: 1.2408294677734375
training loss: 1.2199925184249878
training loss: 1.1232330799102783


training:   7%|▋         | 7013/100000 [6:24:27<133:07:39,  5.15s/it]

training loss: 1.2563138008117676
training loss: 1.249560832977295
training loss: 1.2973089218139648
training loss: 1.090696096420288


training:   7%|▋         | 7017/100000 [6:24:40<117:21:33,  4.54s/it]

training loss: 1.1241137981414795
training loss: 1.1770907640457153
training loss: 1.2071741819381714
training loss: 1.1463000774383545


training:   7%|▋         | 7021/100000 [6:24:52<106:18:43,  4.12s/it]

training loss: 1.0668678283691406
training loss: 1.2285959720611572
training loss: 1.1577666997909546
training loss: 1.1582331657409668


training:   7%|▋         | 7025/100000 [6:25:05<98:36:10,  3.82s/it] 

training loss: 1.116969108581543
training loss: 1.1500775814056396
training loss: 1.1087305545806885
training loss: 1.2342346906661987


training:   7%|▋         | 7029/100000 [6:25:17<93:11:56,  3.61s/it]

training loss: 1.1998848915100098
training loss: 1.1466000080108643
training loss: 1.1311627626419067
training loss: 1.1734017133712769


training:   7%|▋         | 7033/100000 [6:25:30<89:23:24,  3.46s/it]

training loss: 1.194603681564331
training loss: 1.2525725364685059
training loss: 1.1097962856292725
training loss: 1.1925023794174194


training:   7%|▋         | 7037/100000 [6:25:42<86:43:49,  3.36s/it]

training loss: 1.0715659856796265
training loss: 0.9648563861846924
training loss: 1.179345726966858
training loss: 1.1633203029632568


training:   7%|▋         | 7041/100000 [6:25:55<84:52:23,  3.29s/it]

training loss: 1.1086695194244385
training loss: 1.0916857719421387
training loss: 1.1208845376968384
training loss: 1.220773696899414


training:   7%|▋         | 7045/100000 [6:26:07<83:34:23,  3.24s/it]

training loss: 1.2270134687423706
training loss: 1.3784576654434204
training loss: 1.2515181303024292
training loss: 1.2111639976501465


training:   7%|▋         | 7049/100000 [6:26:20<82:40:59,  3.20s/it]

training loss: 1.2116590738296509
training loss: 1.1376261711120605
training loss: 1.002640724182129
training loss: 1.2612323760986328


training:   7%|▋         | 7053/100000 [6:26:32<82:01:50,  3.18s/it]

training loss: 1.0912792682647705
training loss: 1.2829878330230713
training loss: 1.1334283351898193
training loss: 0.9678313136100769


training:   7%|▋         | 7057/100000 [6:26:45<81:33:53,  3.16s/it]

training loss: 1.2598024606704712
training loss: 1.1453120708465576
training loss: 1.1295373439788818
training loss: 1.0972793102264404


training:   7%|▋         | 7061/100000 [6:26:57<81:15:01,  3.15s/it]

training loss: 0.8734920024871826
training loss: 1.1325112581253052
training loss: 1.3018500804901123
training loss: 0.9901525974273682


training:   7%|▋         | 7065/100000 [6:27:09<81:01:58,  3.14s/it]

training loss: 1.4500095844268799
training loss: 1.2078630924224854
training loss: 1.2550742626190186
training loss: 1.2415294647216797


training:   7%|▋         | 7069/100000 [6:27:22<80:52:56,  3.13s/it]

training loss: 1.3067874908447266
training loss: 1.151270866394043
training loss: 0.978610634803772
training loss: 1.1252524852752686


training:   7%|▋         | 7073/100000 [6:27:34<80:44:33,  3.13s/it]

training loss: 1.137091875076294
training loss: 1.1986098289489746
training loss: 1.137068748474121
training loss: 1.0595614910125732


training:   7%|▋         | 7077/100000 [6:27:47<80:39:49,  3.13s/it]

training loss: 1.2380199432373047
training loss: 1.1917457580566406
training loss: 1.0812499523162842
training loss: 1.0076029300689697


training:   7%|▋         | 7081/100000 [6:27:59<80:36:50,  3.12s/it]

training loss: 0.9951745271682739
training loss: 1.0836622714996338
training loss: 1.1635878086090088
training loss: 1.1329758167266846


training:   7%|▋         | 7085/100000 [6:28:12<80:34:08,  3.12s/it]

training loss: 1.1472094058990479
training loss: 0.8040988445281982
training loss: 1.136426568031311
training loss: 1.1229298114776611


training:   7%|▋         | 7089/100000 [6:28:24<80:33:13,  3.12s/it]

training loss: 1.0015218257904053
training loss: 1.1877048015594482
training loss: 1.0899779796600342
training loss: 1.2774676084518433


training:   7%|▋         | 7093/100000 [6:28:37<80:34:06,  3.12s/it]

training loss: 1.1157145500183105
training loss: 1.1838713884353638
training loss: 1.166516661643982
training loss: 1.1278152465820312


training:   7%|▋         | 7097/100000 [6:28:49<80:31:24,  3.12s/it]

training loss: 1.1881409883499146
training loss: 1.1742680072784424
training loss: 1.1145113706588745
training loss: 0.9429585933685303


training:   7%|▋         | 7097/100000 [6:29:01<80:31:24,  3.12s/it]

training loss: 1.2116475105285645


training:   7%|▋         | 7101/100000 [6:29:02<81:05:13,  3.14s/it]

validation loss: 1.0654672384262085
training loss: 1.0146147012710571
training loss: 1.2185386419296265
training loss: 1.1735467910766602


training:   7%|▋         | 7105/100000 [6:29:14<80:49:28,  3.13s/it]

training loss: 1.1108096837997437
training loss: 1.0838018655776978
training loss: 1.1609411239624023
training loss: 1.115272045135498


training:   7%|▋         | 7109/100000 [6:29:27<80:39:13,  3.13s/it]

training loss: 0.999625027179718
training loss: 1.1566531658172607
training loss: 1.0020679235458374
training loss: 1.1749297380447388


training:   7%|▋         | 7113/100000 [6:29:39<80:33:47,  3.12s/it]

training loss: 1.1049671173095703
training loss: 1.0773730278015137
training loss: 1.1179479360580444
training loss: 1.2361310720443726


training:   7%|▋         | 7117/100000 [6:29:52<80:30:39,  3.12s/it]

training loss: 1.1155638694763184
training loss: 1.0358219146728516
training loss: 1.1133685111999512
training loss: 1.289380431175232


training:   7%|▋         | 7121/100000 [6:30:04<80:26:40,  3.12s/it]

training loss: 1.1252413988113403
training loss: 1.3161756992340088
training loss: 1.11480712890625
training loss: 1.1474418640136719


training:   7%|▋         | 7125/100000 [6:30:17<80:26:11,  3.12s/it]

training loss: 1.1330971717834473
training loss: 1.1106390953063965
training loss: 1.0375208854675293
training loss: 1.122546672821045


training:   7%|▋         | 7129/100000 [6:30:29<80:23:44,  3.12s/it]

training loss: 1.505433201789856
training loss: 1.070691704750061
training loss: 1.1500288248062134
training loss: 0.9857466220855713


training:   7%|▋         | 7133/100000 [6:30:42<80:23:25,  3.12s/it]

training loss: 1.2062584161758423
training loss: 1.03248929977417
training loss: 1.1125483512878418
training loss: 0.9711034297943115


training:   7%|▋         | 7137/100000 [6:30:54<80:25:34,  3.12s/it]

training loss: 1.2444324493408203
training loss: 1.290369987487793
training loss: 1.1732321977615356
training loss: 1.2894442081451416


training:   7%|▋         | 7141/100000 [6:31:07<80:25:09,  3.12s/it]

training loss: 1.1140029430389404
training loss: 1.083480715751648
training loss: 1.372649908065796
training loss: 1.183885931968689


training:   7%|▋         | 7145/100000 [6:31:19<80:25:28,  3.12s/it]

training loss: 1.228919506072998
training loss: 1.0860164165496826
training loss: 1.162121057510376
training loss: 1.2061858177185059


training:   7%|▋         | 7149/100000 [6:31:32<80:25:38,  3.12s/it]

training loss: 1.036965012550354
training loss: 1.1874055862426758
training loss: 0.9746888279914856
training loss: 1.2815988063812256


training:   7%|▋         | 7153/100000 [6:31:44<80:21:13,  3.12s/it]

training loss: 1.1732206344604492
training loss: 1.086862564086914
training loss: 1.054016351699829
training loss: 0.9208773374557495


training:   7%|▋         | 7157/100000 [6:31:57<80:21:48,  3.12s/it]

training loss: 1.0543363094329834
training loss: 1.1709918975830078
training loss: 1.0250365734100342
training loss: 1.2868833541870117


training:   7%|▋         | 7161/100000 [6:32:09<80:13:53,  3.11s/it]

training loss: 1.1274330615997314
training loss: 1.1101257801055908
training loss: 1.1252317428588867
training loss: 1.141129970550537


training:   7%|▋         | 7165/100000 [6:32:21<80:17:30,  3.11s/it]

training loss: 1.1952649354934692
training loss: 1.0965244770050049
training loss: 1.1319959163665771
training loss: 1.1645748615264893


training:   7%|▋         | 7169/100000 [6:32:34<80:19:39,  3.12s/it]

training loss: 1.1533105373382568
training loss: 1.193346381187439
training loss: 1.1258046627044678
training loss: 1.1246004104614258


training:   7%|▋         | 7173/100000 [6:32:46<80:20:44,  3.12s/it]

training loss: 1.0768860578536987
training loss: 1.1744511127471924
training loss: 1.1267499923706055
training loss: 1.2332124710083008


training:   7%|▋         | 7177/100000 [6:32:59<80:20:42,  3.12s/it]

training loss: 1.0985838174819946
training loss: 1.271949052810669
training loss: 1.035944938659668
training loss: 1.132346510887146


training:   7%|▋         | 7181/100000 [6:33:11<80:20:01,  3.12s/it]

training loss: 1.2376272678375244
training loss: 1.08482027053833
training loss: 1.1078065633773804
training loss: 1.1455409526824951


training:   7%|▋         | 7185/100000 [6:33:24<80:20:34,  3.12s/it]

training loss: 1.249178409576416
training loss: 1.1375703811645508
training loss: 1.0930343866348267
training loss: 1.2601113319396973


training:   7%|▋         | 7189/100000 [6:33:36<80:18:52,  3.12s/it]

training loss: 1.1778855323791504
training loss: 1.0293563604354858
training loss: 1.1408652067184448
training loss: 1.1839344501495361


training:   7%|▋         | 7193/100000 [6:33:49<80:16:25,  3.11s/it]

training loss: 1.166447639465332
training loss: 1.2475227117538452
training loss: 1.1437491178512573
training loss: 1.0955052375793457


training:   7%|▋         | 7197/100000 [6:34:01<80:16:32,  3.11s/it]

training loss: 1.1849421262741089
training loss: 1.2351813316345215
training loss: 1.1966419219970703
training loss: 1.142371416091919
training loss: 1.2393213510513306


training:   7%|▋         | 7201/100000 [6:34:14<80:52:26,  3.14s/it]

validation loss: 0.9552298784255981
training loss: 1.2003207206726074
training loss: 1.19236421585083
training loss: 1.1355304718017578


training:   7%|▋         | 7205/100000 [6:34:26<80:41:20,  3.13s/it]

training loss: 0.9592406749725342
training loss: 1.220687985420227
training loss: 1.1971914768218994
training loss: 1.1572673320770264


training:   7%|▋         | 7209/100000 [6:34:39<80:35:39,  3.13s/it]

training loss: 1.1145243644714355
training loss: 1.1398073434829712
training loss: 1.2579790353775024
training loss: 1.2753891944885254


training:   7%|▋         | 7213/100000 [6:34:51<80:31:05,  3.12s/it]

training loss: 1.131644606590271
training loss: 1.118324875831604
training loss: 1.007702350616455
training loss: 1.2216426134109497


training:   7%|▋         | 7217/100000 [6:35:04<80:23:30,  3.12s/it]

training loss: 1.1402246952056885
training loss: 1.1897648572921753
training loss: 1.0902762413024902
training loss: 1.1127954721450806


training:   7%|▋         | 7221/100000 [6:35:16<80:19:54,  3.12s/it]

training loss: 1.1898475885391235
training loss: 1.1034108400344849
training loss: 1.0927915573120117
training loss: 1.2088854312896729


training:   7%|▋         | 7225/100000 [6:35:29<80:16:56,  3.12s/it]

training loss: 1.123037576675415
training loss: 1.0691754817962646
training loss: 1.0600907802581787
training loss: 1.1267905235290527


training:   7%|▋         | 7229/100000 [6:35:41<80:13:09,  3.11s/it]

training loss: 1.1102100610733032
training loss: 0.9919624328613281
training loss: 1.1028571128845215
training loss: 1.0516698360443115


training:   7%|▋         | 7233/100000 [6:35:53<80:14:58,  3.11s/it]

training loss: 1.1291815042495728
training loss: 1.048194408416748
training loss: 1.1011611223220825
training loss: 0.6998310685157776


training:   7%|▋         | 7237/100000 [6:36:06<80:12:36,  3.11s/it]

training loss: 1.0657118558883667
training loss: 0.992673397064209
training loss: 1.2526445388793945
training loss: 1.282064437866211


training:   7%|▋         | 7241/100000 [6:36:18<80:13:09,  3.11s/it]

training loss: 0.9607316255569458
training loss: 0.9664301872253418
training loss: 1.0426613092422485
training loss: 1.165792465209961


training:   7%|▋         | 7245/100000 [6:36:31<80:16:33,  3.12s/it]

training loss: 1.0893526077270508
training loss: 1.0433472394943237
training loss: 1.0836890935897827
training loss: 1.1071138381958008


training:   7%|▋         | 7249/100000 [6:36:43<80:15:36,  3.12s/it]

training loss: 1.2610347270965576
training loss: 1.1867802143096924
training loss: 1.2032395601272583
training loss: 1.2018420696258545


training:   7%|▋         | 7253/100000 [6:36:56<80:17:10,  3.12s/it]

training loss: 1.1265795230865479
training loss: 0.8772013783454895
training loss: 1.0972440242767334
training loss: 1.237610101699829


training:   7%|▋         | 7257/100000 [6:37:08<80:15:00,  3.12s/it]

training loss: 1.1129608154296875
training loss: 1.1298727989196777
training loss: 1.1015229225158691
training loss: 1.2822914123535156


training:   7%|▋         | 7261/100000 [6:37:21<80:16:37,  3.12s/it]

training loss: 1.1471185684204102
training loss: 1.0801200866699219
training loss: 0.994266152381897
training loss: 1.0153834819793701


training:   7%|▋         | 7265/100000 [6:37:33<80:20:49,  3.12s/it]

training loss: 1.1242157220840454
training loss: 1.2231502532958984
training loss: 1.1589765548706055
training loss: 1.1162848472595215


training:   7%|▋         | 7269/100000 [6:37:46<80:19:02,  3.12s/it]

training loss: 1.1408535242080688
training loss: 1.1456525325775146
training loss: 1.105789065361023
training loss: 0.9515436291694641


training:   7%|▋         | 7273/100000 [6:37:58<80:18:35,  3.12s/it]

training loss: 0.8538829684257507
training loss: 1.2991999387741089
training loss: 1.2072142362594604
training loss: 1.1210575103759766


training:   7%|▋         | 7277/100000 [6:38:11<80:18:09,  3.12s/it]

training loss: 1.244675874710083
training loss: 1.1066009998321533
training loss: 1.0763280391693115
training loss: 1.0304185152053833


training:   7%|▋         | 7281/100000 [6:38:23<80:16:14,  3.12s/it]

training loss: 1.1611406803131104
training loss: 1.0666543245315552
training loss: 1.2217715978622437
training loss: 1.1455578804016113


training:   7%|▋         | 7285/100000 [6:38:36<80:13:22,  3.11s/it]

training loss: 1.1454964876174927
training loss: 1.1536400318145752
training loss: 1.1229299306869507
training loss: 1.2875984907150269


training:   7%|▋         | 7289/100000 [6:38:48<80:10:54,  3.11s/it]

training loss: 1.054023027420044
training loss: 1.1029114723205566
training loss: 1.0952460765838623
training loss: 1.1096360683441162


training:   7%|▋         | 7293/100000 [6:39:00<80:10:34,  3.11s/it]

training loss: 1.167003870010376
training loss: 1.136789083480835
training loss: 1.0617094039916992
training loss: 1.1433566808700562


training:   7%|▋         | 7297/100000 [6:39:13<80:10:46,  3.11s/it]

training loss: 1.0021085739135742
training loss: 1.1699415445327759
training loss: 1.2401728630065918
training loss: 1.2170485258102417
training loss: 1.072698950767517


training:   7%|▋         | 7301/100000 [6:39:26<80:47:36,  3.14s/it]

validation loss: 1.04423189163208
training loss: 1.1593464612960815
training loss: 1.1728601455688477
training loss: 1.156707525253296


training:   7%|▋         | 7305/100000 [6:39:38<80:36:31,  3.13s/it]

training loss: 1.1336323022842407
training loss: 1.1600254774093628
training loss: 1.1904652118682861
training loss: 1.1292457580566406


training:   7%|▋         | 7309/100000 [6:39:51<80:30:16,  3.13s/it]

training loss: 0.9050639867782593
training loss: 0.9486520886421204
training loss: 0.9590988755226135
training loss: 1.176999807357788


training:   7%|▋         | 7313/100000 [6:40:03<80:26:17,  3.12s/it]

training loss: 1.2162184715270996
training loss: 1.1909593343734741
training loss: 1.1727697849273682
training loss: 1.1738330125808716


training:   7%|▋         | 7317/100000 [6:40:15<80:21:11,  3.12s/it]

training loss: 1.2403326034545898
training loss: 1.325911045074463
training loss: 1.0457656383514404
training loss: 0.9361170530319214


training:   7%|▋         | 7321/100000 [6:40:28<80:17:22,  3.12s/it]

training loss: 1.071987271308899
training loss: 1.1789686679840088
training loss: 0.9917863607406616
training loss: 1.220665454864502


training:   7%|▋         | 7325/100000 [6:40:40<80:16:55,  3.12s/it]

training loss: 1.0459595918655396
training loss: 1.0667393207550049
training loss: 1.038215160369873
training loss: 1.1514674425125122


training:   7%|▋         | 7329/100000 [6:40:53<80:17:45,  3.12s/it]

training loss: 1.2433613538742065
training loss: 1.0261975526809692
training loss: 1.0478873252868652
training loss: 1.0429487228393555


training:   7%|▋         | 7333/100000 [6:41:05<80:15:43,  3.12s/it]

training loss: 1.223933458328247
training loss: 1.2036844491958618
training loss: 1.136029839515686
training loss: 1.1641830205917358


training:   7%|▋         | 7337/100000 [6:41:18<80:15:23,  3.12s/it]

training loss: 1.0607690811157227
training loss: 1.2811484336853027
training loss: 1.0487425327301025
training loss: 1.0033013820648193


training:   7%|▋         | 7341/100000 [6:41:30<80:15:13,  3.12s/it]

training loss: 1.072778344154358
training loss: 1.2202597856521606
training loss: 1.0864595174789429
training loss: 1.244922161102295


training:   7%|▋         | 7345/100000 [6:41:43<80:15:11,  3.12s/it]

training loss: 1.1447685956954956
training loss: 1.174635887145996
training loss: 1.148324966430664
training loss: 1.2730588912963867


training:   7%|▋         | 7349/100000 [6:41:55<80:16:05,  3.12s/it]

training loss: 1.1990177631378174
training loss: 1.1831283569335938
training loss: 1.1323025226593018
training loss: 0.9882110357284546


training:   7%|▋         | 7353/100000 [6:42:08<80:14:28,  3.12s/it]

training loss: 1.204679012298584
training loss: 1.1215338706970215
training loss: 1.1528637409210205
training loss: 1.1634658575057983


training:   7%|▋         | 7357/100000 [6:42:20<80:14:23,  3.12s/it]

training loss: 1.1518584489822388
training loss: 1.083580493927002
training loss: 1.23378324508667
training loss: 1.2173347473144531


training:   7%|▋         | 7361/100000 [6:42:33<80:14:23,  3.12s/it]

training loss: 1.1460952758789062
training loss: 0.9879333972930908
training loss: 1.1921206712722778
training loss: 1.1761565208435059


training:   7%|▋         | 7365/100000 [6:42:45<80:12:21,  3.12s/it]

training loss: 1.2604812383651733
training loss: 1.2054370641708374
training loss: 1.121890902519226
training loss: 1.1178884506225586


training:   7%|▋         | 7369/100000 [6:42:58<80:13:12,  3.12s/it]

training loss: 1.052046537399292
training loss: 0.9863627552986145
training loss: 1.0252848863601685
training loss: 1.0989153385162354


training:   7%|▋         | 7373/100000 [6:43:10<80:14:42,  3.12s/it]

training loss: 1.2145936489105225
training loss: 1.1167287826538086
training loss: 1.154749870300293
training loss: 0.9430415034294128


training:   7%|▋         | 7377/100000 [6:43:23<80:14:46,  3.12s/it]

training loss: 1.0255743265151978
training loss: 0.9035107493400574
training loss: 1.1108275651931763
training loss: 1.1803556680679321


training:   7%|▋         | 7381/100000 [6:43:35<80:14:13,  3.12s/it]

training loss: 1.221757411956787
training loss: 1.1305222511291504
training loss: 1.1131772994995117
training loss: 1.101894736289978


training:   7%|▋         | 7385/100000 [6:43:48<80:13:40,  3.12s/it]

training loss: 0.7792849540710449
training loss: 1.0895459651947021
training loss: 1.1688146591186523
training loss: 1.097193717956543


training:   7%|▋         | 7389/100000 [6:44:00<80:12:58,  3.12s/it]

training loss: 1.1380717754364014
training loss: 1.0293724536895752
training loss: 1.202327013015747
training loss: 1.098435401916504


training:   7%|▋         | 7393/100000 [6:44:12<80:13:51,  3.12s/it]

training loss: 1.0583651065826416
training loss: 1.3259365558624268
training loss: 1.2330291271209717
training loss: 1.2294516563415527


training:   7%|▋         | 7397/100000 [6:44:25<80:10:06,  3.12s/it]

training loss: 1.5250327587127686
training loss: 1.122995138168335
training loss: 0.9000201225280762
training loss: 1.1635791063308716
training loss: 1.1219258308410645


training:   7%|▋         | 7401/100000 [6:44:38<80:43:31,  3.14s/it]

validation loss: 1.2414785623550415
training loss: 1.1080538034439087
training loss: 1.1675925254821777
training loss: 1.2264010906219482


training:   7%|▋         | 7405/100000 [6:44:50<80:31:41,  3.13s/it]

training loss: 1.0985116958618164
training loss: 1.3000106811523438
training loss: 1.1481385231018066
training loss: 0.9773774147033691


training:   7%|▋         | 7409/100000 [6:45:03<80:26:05,  3.13s/it]

training loss: 0.9971799850463867
training loss: 1.1723012924194336
training loss: 1.2883124351501465
training loss: 1.0048513412475586


training:   7%|▋         | 7413/100000 [6:45:15<80:17:31,  3.12s/it]

training loss: 1.0605454444885254
training loss: 1.252638816833496
training loss: 1.1464385986328125
training loss: 1.1011472940444946


training:   7%|▋         | 7417/100000 [6:45:28<80:17:05,  3.12s/it]

training loss: 1.2452929019927979
training loss: 1.2034385204315186
training loss: 1.150910496711731
training loss: 1.1709535121917725


training:   7%|▋         | 7421/100000 [6:45:40<80:14:15,  3.12s/it]

training loss: 1.2028920650482178
training loss: 1.1820813417434692
training loss: 0.8772542476654053
training loss: 1.0494303703308105


training:   7%|▋         | 7425/100000 [6:45:52<80:11:19,  3.12s/it]

training loss: 1.0418164730072021
training loss: 1.2438082695007324
training loss: 1.199554443359375
training loss: 1.0815906524658203


training:   7%|▋         | 7429/100000 [6:46:05<80:06:14,  3.12s/it]

training loss: 0.9937618970870972
training loss: 0.9608955383300781
training loss: 1.1044925451278687
training loss: 1.060965895652771


training:   7%|▋         | 7433/100000 [6:46:17<80:03:02,  3.11s/it]

training loss: 0.9724318981170654
training loss: 0.9240989685058594
training loss: 1.1408768892288208
training loss: 1.1405900716781616


training:   7%|▋         | 7437/100000 [6:46:30<80:04:42,  3.11s/it]

training loss: 1.0992528200149536
training loss: 1.1404914855957031
training loss: 1.079309344291687
training loss: 0.8811522126197815


training:   7%|▋         | 7441/100000 [6:46:42<80:05:06,  3.11s/it]

training loss: 0.9826714992523193
training loss: 1.0659403800964355
training loss: 1.1562414169311523
training loss: 1.0625982284545898


training:   7%|▋         | 7445/100000 [6:46:55<80:06:27,  3.12s/it]

training loss: 1.1230463981628418
training loss: 0.8318787217140198
training loss: 1.2972317934036255
training loss: 1.103195071220398


training:   7%|▋         | 7449/100000 [6:47:07<80:07:43,  3.12s/it]

training loss: 1.2029062509536743
training loss: 1.0584383010864258
training loss: 1.2320152521133423
training loss: 1.048141360282898


training:   7%|▋         | 7453/100000 [6:47:20<80:08:14,  3.12s/it]

training loss: 1.191927433013916
training loss: 1.1451432704925537
training loss: 1.2974125146865845
training loss: 1.3845152854919434


training:   7%|▋         | 7457/100000 [6:47:32<80:07:05,  3.12s/it]

training loss: 1.1125082969665527
training loss: 1.2762749195098877
training loss: 1.0615735054016113
training loss: 1.309830665588379


training:   7%|▋         | 7461/100000 [6:47:45<80:04:19,  3.12s/it]

training loss: 0.975370466709137
training loss: 1.0867921113967896
training loss: 1.129827618598938
training loss: 1.0339287519454956


training:   7%|▋         | 7465/100000 [6:47:57<80:06:03,  3.12s/it]

training loss: 1.1488347053527832
training loss: 1.2225502729415894
training loss: 1.1098663806915283
training loss: 1.1860288381576538


training:   7%|▋         | 7469/100000 [6:48:10<80:06:37,  3.12s/it]

training loss: 1.3359628915786743
training loss: 0.9780802130699158
training loss: 1.2139666080474854
training loss: 1.1445996761322021


training:   7%|▋         | 7473/100000 [6:48:22<80:06:49,  3.12s/it]

training loss: 1.0972306728363037
training loss: 1.2436802387237549
training loss: 1.0775113105773926
training loss: 1.3573638200759888


training:   7%|▋         | 7477/100000 [6:48:34<80:07:16,  3.12s/it]

training loss: 1.1851286888122559
training loss: 1.1820544004440308
training loss: 1.163666009902954
training loss: 0.9799404144287109


training:   7%|▋         | 7481/100000 [6:48:47<80:04:03,  3.12s/it]

training loss: 1.1503866910934448
training loss: 1.1776483058929443
training loss: 1.0872200727462769
training loss: 1.1092520952224731


training:   7%|▋         | 7485/100000 [6:48:59<80:01:26,  3.11s/it]

training loss: 1.0749585628509521
training loss: 1.2387058734893799
training loss: 1.1393930912017822
training loss: 1.1903358697891235


training:   7%|▋         | 7489/100000 [6:49:12<80:03:11,  3.12s/it]

training loss: 0.9694599509239197
training loss: 1.2203433513641357
training loss: 1.2702982425689697
training loss: 1.0573439598083496


training:   7%|▋         | 7493/100000 [6:49:24<80:02:12,  3.11s/it]

training loss: 1.0885008573532104
training loss: 1.2198113203048706
training loss: 1.1564221382141113
training loss: 1.0285866260528564


training:   7%|▋         | 7497/100000 [6:49:37<80:03:49,  3.12s/it]

training loss: 1.0208184719085693
training loss: 1.2382760047912598
training loss: 1.0873395204544067
training loss: 1.1010196208953857
training loss: 1.2631216049194336
validation loss: 1.2491763830184937
%s 

 %s ("cted in an ''anonymous profile'' of the user. While such profiles do not contain personal information (name, address, etc.), they have been subject of some privacy concerns.  According to the same survey, a large percentage of Internet users are unable to delete cookies.  ==Browser settings== Most modern browsers support cookies. However, a user can usually also choose whether cookies should be used or not. The following are common options{{ref|faq}}: cookies are never accepted; the browser asks the user whether to accept every individual cookie; cookies are always accepted.  [[Image:Mozilla-cokie.png|right|thumb|200px|The [[Mozilla]] cookie manager: in the list, cookie names with associated domains]]  The browser may also include the possibility of better specifying which

training:   7%|▋         | 7497/100000 [6:49:51<80:03:49,  3.12s/it]

g the list of choose shot that), not will ''recival'' along the back''), the most particular compensation to meet it leads to explain collectivity in tunnels of surface cookies.  ==External most fates== In accept have its support cross popular process and euros violent; the violent description of a person that was they usually do whether a recognizing or interport plant in the [[coU.S. Los Anglo and 11 Spinson]] are unventool in the world, in the mercenarity of a more accept from the chair of the U.S., the enable for up to the Indian transmission cowers). The downs having no collector of every support perform the vegetarious [[Sterlings of elipsoning]] 55 squared from the [[Xerol emption thought the observation to pregnancies from the previous [[Brickston, Massacruming Japanese]] recorded, recipient, from the [[Coedan]] move triangue date or make they were familia. We is restrained elected in the [[Holly Williams]].   ==Treasman assarts]] in which was then using ''Endynasties: The weit

training:   8%|▊         | 7501/100000 [6:51:08<232:38:46,  9.05s/it]

Model saved at iteration 7500
training loss: 1.2544419765472412
training loss: 1.21857488155365
training loss: 1.1729230880737305


training:   8%|▊         | 7505/100000 [6:51:21<186:41:42,  7.27s/it]

training loss: 1.0315957069396973
training loss: 1.0782513618469238
training loss: 1.0287954807281494
training loss: 0.9564138650894165


training:   8%|▊         | 7509/100000 [6:51:33<154:33:39,  6.02s/it]

training loss: 1.1755009889602661
training loss: 0.8505293130874634
training loss: 1.1875076293945312
training loss: 1.2733031511306763


training:   8%|▊         | 7513/100000 [6:51:46<132:12:07,  5.15s/it]

training loss: 1.1729679107666016
training loss: 1.0714232921600342
training loss: 1.1065349578857422
training loss: 1.124544620513916


training:   8%|▊         | 7517/100000 [6:51:58<116:34:03,  4.54s/it]

training loss: 0.9161367416381836
training loss: 0.8880468606948853
training loss: 1.0952261686325073
training loss: 1.1488933563232422


training:   8%|▊         | 7521/100000 [6:52:11<105:37:08,  4.11s/it]

training loss: 1.1934775114059448
training loss: 1.3234244585037231
training loss: 1.1077237129211426
training loss: 1.1114633083343506


training:   8%|▊         | 7525/100000 [6:52:23<97:55:22,  3.81s/it] 

training loss: 1.1339547634124756
training loss: 1.214890480041504
training loss: 1.192680835723877
training loss: 1.135594367980957


training:   8%|▊         | 7529/100000 [6:52:35<92:31:54,  3.60s/it]

training loss: 1.2623465061187744
training loss: 1.2231271266937256
training loss: 1.2145793437957764
training loss: 1.061070203781128


training:   8%|▊         | 7533/100000 [6:52:48<88:47:26,  3.46s/it]

training loss: 1.1914172172546387
training loss: 1.324623465538025
training loss: 0.8961845636367798
training loss: 1.097233772277832


training:   8%|▊         | 7537/100000 [6:53:00<86:10:21,  3.36s/it]

training loss: 1.151079535484314
training loss: 0.9801897406578064
training loss: 1.0470973253250122
training loss: 1.284182071685791


training:   8%|▊         | 7541/100000 [6:53:13<84:21:56,  3.28s/it]

training loss: 1.1262316703796387
training loss: 1.0625580549240112
training loss: 1.0499569177627563
training loss: 0.9932031631469727


training:   8%|▊         | 7545/100000 [6:53:25<83:07:05,  3.24s/it]

training loss: 1.0807057619094849
training loss: 1.179307222366333
training loss: 1.0261772871017456
training loss: 1.011429786682129


training:   8%|▊         | 7549/100000 [6:53:38<82:11:35,  3.20s/it]

training loss: 1.0192583799362183
training loss: 1.0769450664520264
training loss: 1.2233824729919434
training loss: 0.9982771277427673


training:   8%|▊         | 7553/100000 [6:53:50<81:31:09,  3.17s/it]

training loss: 1.190096378326416
training loss: 1.1648423671722412
training loss: 1.037194013595581
training loss: 1.0063999891281128


training:   8%|▊         | 7557/100000 [6:54:03<81:02:50,  3.16s/it]

training loss: 1.0403926372528076
training loss: 0.8603718876838684
training loss: 1.024277925491333
training loss: 1.1186569929122925


training:   8%|▊         | 7561/100000 [6:54:15<80:45:40,  3.15s/it]

training loss: 1.1783170700073242
training loss: 1.0732065439224243
training loss: 1.2007116079330444
training loss: 1.17411208152771


training:   8%|▊         | 7565/100000 [6:54:28<80:34:04,  3.14s/it]

training loss: 1.1641228199005127
training loss: 1.1442753076553345
training loss: 1.0859137773513794
training loss: 0.9784399271011353


training:   8%|▊         | 7569/100000 [6:54:40<80:22:57,  3.13s/it]

training loss: 1.165389895439148
training loss: 1.1869823932647705
training loss: 0.8432913422584534
training loss: 1.1464979648590088


training:   8%|▊         | 7573/100000 [6:54:53<80:13:07,  3.12s/it]

training loss: 1.1323857307434082
training loss: 1.1790850162506104
training loss: 1.2867090702056885
training loss: 0.832838237285614


training:   8%|▊         | 7577/100000 [6:55:05<80:09:58,  3.12s/it]

training loss: 1.1140196323394775
training loss: 1.2032546997070312
training loss: 1.2035678625106812
training loss: 0.9930589199066162


training:   8%|▊         | 7581/100000 [6:55:18<80:06:04,  3.12s/it]

training loss: 1.3584649562835693
training loss: 1.366571307182312
training loss: 1.2563910484313965
training loss: 1.5684962272644043


training:   8%|▊         | 7585/100000 [6:55:30<80:06:22,  3.12s/it]

training loss: 1.1361987590789795
training loss: 1.1287753582000732
training loss: 1.0821893215179443
training loss: 1.2367061376571655


training:   8%|▊         | 7589/100000 [6:55:43<80:06:19,  3.12s/it]

training loss: 0.9789513349533081
training loss: 1.1180965900421143
training loss: 1.1916495561599731
training loss: 1.2459137439727783


training:   8%|▊         | 7593/100000 [6:55:55<80:05:15,  3.12s/it]

training loss: 1.2119135856628418
training loss: 1.194312572479248
training loss: 1.2877708673477173
training loss: 1.174025535583496


training:   8%|▊         | 7597/100000 [6:56:07<80:02:34,  3.12s/it]

training loss: 1.1338282823562622
training loss: 1.0773916244506836
training loss: 1.251068115234375
training loss: 1.2185496091842651
training loss: 1.076986312866211


training:   8%|▊         | 7601/100000 [6:56:20<80:34:49,  3.14s/it]

validation loss: 1.0537381172180176
training loss: 1.142755389213562
training loss: 1.1583387851715088
training loss: 0.8984903693199158


training:   8%|▊         | 7605/100000 [6:56:33<80:22:39,  3.13s/it]

training loss: 1.194298505783081
training loss: 1.1980339288711548
training loss: 1.1776068210601807
training loss: 1.0867871046066284


training:   8%|▊         | 7609/100000 [6:56:45<80:11:02,  3.12s/it]

training loss: 1.0917402505874634
training loss: 1.1421672105789185
training loss: 1.052375316619873
training loss: 1.0058884620666504


training:   8%|▊         | 7613/100000 [6:56:58<80:03:45,  3.12s/it]

training loss: 0.9793115854263306
training loss: 1.1989479064941406
training loss: 1.0759758949279785
training loss: 0.9855054616928101


training:   8%|▊         | 7617/100000 [6:57:10<80:01:00,  3.12s/it]

training loss: 0.8753037452697754
training loss: 0.9784732460975647
training loss: 1.201594591140747
training loss: 1.2572124004364014


training:   8%|▊         | 7621/100000 [6:57:22<80:00:22,  3.12s/it]

training loss: 1.1778273582458496
training loss: 0.9138025045394897
training loss: 1.08931565284729
training loss: 1.1285436153411865


training:   8%|▊         | 7625/100000 [6:57:35<79:59:52,  3.12s/it]

training loss: 1.113720417022705
training loss: 1.0306217670440674
training loss: 1.0984545946121216
training loss: 1.1370010375976562


training:   8%|▊         | 7629/100000 [6:57:47<79:54:46,  3.11s/it]

training loss: 1.0015182495117188
training loss: 1.1892489194869995
training loss: 1.0368627309799194
training loss: 1.115281343460083


training:   8%|▊         | 7633/100000 [6:58:00<79:52:10,  3.11s/it]

training loss: 1.2400290966033936
training loss: 1.0887161493301392
training loss: 1.1010208129882812
training loss: 0.8965398073196411


training:   8%|▊         | 7637/100000 [6:58:12<79:49:56,  3.11s/it]

training loss: 1.0860233306884766
training loss: 0.9854776859283447
training loss: 0.9454591274261475
training loss: 1.1487443447113037


training:   8%|▊         | 7641/100000 [6:58:25<79:48:49,  3.11s/it]

training loss: 1.1507834196090698
training loss: 1.009246826171875
training loss: 1.1392648220062256
training loss: 1.224275827407837


training:   8%|▊         | 7645/100000 [6:58:37<79:52:12,  3.11s/it]

training loss: 0.9978722929954529
training loss: 1.127774715423584
training loss: 1.1490815877914429
training loss: 1.0198893547058105


training:   8%|▊         | 7649/100000 [6:58:50<79:55:49,  3.12s/it]

training loss: 1.098070740699768
training loss: 1.289407730102539
training loss: 1.250485897064209
training loss: 1.1751625537872314


training:   8%|▊         | 7653/100000 [6:59:02<79:55:18,  3.12s/it]

training loss: 1.1056408882141113
training loss: 0.9958479404449463
training loss: 1.0219309329986572
training loss: 1.0665364265441895


training:   8%|▊         | 7657/100000 [6:59:15<79:56:43,  3.12s/it]

training loss: 1.1553308963775635
training loss: 1.2320642471313477
training loss: 1.3122011423110962
training loss: 1.0944768190383911


training:   8%|▊         | 7661/100000 [6:59:27<79:57:01,  3.12s/it]

training loss: 0.9559833407402039
training loss: 1.1345386505126953
training loss: 1.0680339336395264
training loss: 1.1859767436981201


training:   8%|▊         | 7665/100000 [6:59:39<79:54:53,  3.12s/it]

training loss: 1.139076590538025
training loss: 1.1886012554168701
training loss: 1.005331039428711
training loss: 1.0527188777923584


training:   8%|▊         | 7669/100000 [6:59:52<79:49:02,  3.11s/it]

training loss: 0.9847421646118164
training loss: 1.2421340942382812
training loss: 1.2156339883804321
training loss: 1.407729148864746


training:   8%|▊         | 7673/100000 [7:00:04<79:44:46,  3.11s/it]

training loss: 1.1074130535125732
training loss: 1.215055227279663
training loss: 1.1714388132095337
training loss: 1.1377660036087036


training:   8%|▊         | 7677/100000 [7:00:17<79:46:46,  3.11s/it]

training loss: 1.1237695217132568
training loss: 1.1415550708770752
training loss: 1.2458136081695557
training loss: 1.2144705057144165


training:   8%|▊         | 7681/100000 [7:00:29<79:47:31,  3.11s/it]

training loss: 1.1380778551101685
training loss: 0.9285268783569336
training loss: 1.184272289276123
training loss: 1.1403098106384277


training:   8%|▊         | 7685/100000 [7:00:42<79:48:10,  3.11s/it]

training loss: 1.0266672372817993
training loss: 0.9381856918334961
training loss: 0.8079895377159119
training loss: 0.9943224787712097


training:   8%|▊         | 7689/100000 [7:00:54<79:46:55,  3.11s/it]

training loss: 1.2399818897247314
training loss: 1.0772544145584106
training loss: 1.0458694696426392
training loss: 0.9065392017364502


training:   8%|▊         | 7693/100000 [7:01:07<79:46:24,  3.11s/it]

training loss: 1.1234703063964844
training loss: 1.0248489379882812
training loss: 1.2368037700653076
training loss: 1.235491394996643


training:   8%|▊         | 7697/100000 [7:01:19<79:48:23,  3.11s/it]

training loss: 0.9652937650680542
training loss: 1.240584373474121
training loss: 1.108229398727417
training loss: 1.1903114318847656


training:   8%|▊         | 7697/100000 [7:01:31<79:48:23,  3.11s/it]

training loss: 1.1364309787750244


training:   8%|▊         | 7701/100000 [7:01:32<80:25:54,  3.14s/it]

validation loss: 1.092075228691101
training loss: 1.0866175889968872
training loss: 1.022316336631775
training loss: 1.066734790802002


training:   8%|▊         | 7705/100000 [7:01:44<80:15:27,  3.13s/it]

training loss: 1.1490215063095093
training loss: 0.9264104962348938
training loss: 1.192281723022461
training loss: 1.2287429571151733


training:   8%|▊         | 7709/100000 [7:01:57<80:09:47,  3.13s/it]

training loss: 1.1335785388946533
training loss: 1.1118943691253662
training loss: 0.9860893487930298
training loss: 1.0641289949417114


training:   8%|▊         | 7713/100000 [7:02:09<80:03:31,  3.12s/it]

training loss: 1.089789628982544
training loss: 1.0993576049804688
training loss: 1.123018503189087
training loss: 1.0694831609725952


training:   8%|▊         | 7717/100000 [7:02:22<80:01:03,  3.12s/it]

training loss: 1.213552474975586
training loss: 1.037846565246582
training loss: 1.0327988862991333
training loss: 1.2610514163970947


training:   8%|▊         | 7721/100000 [7:02:34<79:58:52,  3.12s/it]

training loss: 1.066562533378601
training loss: 1.1694669723510742
training loss: 1.1867120265960693
training loss: 1.1921987533569336


training:   8%|▊         | 7725/100000 [7:02:47<79:58:01,  3.12s/it]

training loss: 1.1137456893920898
training loss: 0.9815905094146729
training loss: 1.1244088411331177
training loss: 1.152991533279419


training:   8%|▊         | 7729/100000 [7:02:59<79:55:42,  3.12s/it]

training loss: 1.179604411125183
training loss: 1.1488308906555176
training loss: 1.1090633869171143
training loss: 1.1772465705871582


training:   8%|▊         | 7733/100000 [7:03:12<79:55:51,  3.12s/it]

training loss: 1.2015241384506226
training loss: 0.9754663705825806
training loss: 1.2346503734588623
training loss: 0.9691106081008911


training:   8%|▊         | 7737/100000 [7:03:24<79:57:06,  3.12s/it]

training loss: 1.1390459537506104
training loss: 1.2048170566558838
training loss: 0.9487298727035522
training loss: 1.0679478645324707


training:   8%|▊         | 7741/100000 [7:03:37<79:56:52,  3.12s/it]

training loss: 1.1247045993804932
training loss: 1.0810540914535522
training loss: 1.016656756401062
training loss: 1.1392974853515625


training:   8%|▊         | 7745/100000 [7:03:49<79:54:03,  3.12s/it]

training loss: 0.8734943270683289
training loss: 1.1409447193145752
training loss: 1.0906996726989746
training loss: 1.1866962909698486


training:   8%|▊         | 7749/100000 [7:04:01<79:53:52,  3.12s/it]

training loss: 1.0563311576843262
training loss: 1.0743767023086548
training loss: 1.2481110095977783
training loss: 1.250892162322998


training:   8%|▊         | 7753/100000 [7:04:14<79:51:51,  3.12s/it]

training loss: 0.9639710187911987
training loss: 1.1951172351837158
training loss: 1.0923292636871338
training loss: 1.2053242921829224


training:   8%|▊         | 7757/100000 [7:04:26<79:51:34,  3.12s/it]

training loss: 1.192430853843689
training loss: 1.1477478742599487
training loss: 1.074905276298523
training loss: 1.03043532371521


training:   8%|▊         | 7761/100000 [7:04:39<79:51:16,  3.12s/it]

training loss: 1.1715407371520996
training loss: 1.0679166316986084
training loss: 0.9557336568832397
training loss: 1.2461094856262207


training:   8%|▊         | 7765/100000 [7:04:51<79:49:54,  3.12s/it]

training loss: 1.2304779291152954
training loss: 1.0063475370407104
training loss: 1.1221420764923096
training loss: 1.0367302894592285


training:   8%|▊         | 7769/100000 [7:05:04<79:50:21,  3.12s/it]

training loss: 0.9971750974655151
training loss: 1.1561098098754883
training loss: 1.2508325576782227
training loss: 1.0865272283554077


training:   8%|▊         | 7773/100000 [7:05:16<79:49:56,  3.12s/it]

training loss: 0.9328553676605225
training loss: 0.9546700716018677
training loss: 1.1511720418930054
training loss: 1.1939382553100586


training:   8%|▊         | 7777/100000 [7:05:29<79:51:02,  3.12s/it]

training loss: 1.2101690769195557
training loss: 1.410154104232788
training loss: 1.0461894273757935
training loss: 1.1351613998413086


training:   8%|▊         | 7781/100000 [7:05:41<79:55:07,  3.12s/it]

training loss: 1.1336874961853027
training loss: 1.243862271308899
training loss: 1.2177364826202393
training loss: 1.1647825241088867


training:   8%|▊         | 7785/100000 [7:05:54<79:54:33,  3.12s/it]

training loss: 1.1324865818023682
training loss: 1.2376078367233276
training loss: 0.9116171598434448
training loss: 1.1184006929397583


training:   8%|▊         | 7789/100000 [7:06:06<79:54:06,  3.12s/it]

training loss: 0.9420380592346191
training loss: 1.0971897840499878
training loss: 1.0740082263946533
training loss: 1.2102090120315552


training:   8%|▊         | 7793/100000 [7:06:19<79:53:03,  3.12s/it]

training loss: 1.2172760963439941
training loss: 1.0096914768218994
training loss: 1.0845255851745605
training loss: 1.2097584009170532


training:   8%|▊         | 7797/100000 [7:06:31<79:52:43,  3.12s/it]

training loss: 0.9229536056518555
training loss: 1.125194787979126
training loss: 1.1107465028762817
training loss: 1.2032251358032227


training:   8%|▊         | 7797/100000 [7:06:41<79:52:43,  3.12s/it]

training loss: 1.1582176685333252


training:   8%|▊         | 7801/100000 [7:06:44<80:28:32,  3.14s/it]

validation loss: 1.0734062194824219
training loss: 1.0505497455596924
training loss: 1.1992292404174805
training loss: 0.9671680927276611


training:   8%|▊         | 7805/100000 [7:06:56<80:11:41,  3.13s/it]

training loss: 1.0848413705825806
training loss: 1.0265657901763916
training loss: 1.1502447128295898
training loss: 0.8847168684005737


training:   8%|▊         | 7809/100000 [7:07:09<80:04:49,  3.13s/it]

training loss: 1.0815907716751099
training loss: 1.1085141897201538
training loss: 1.116192102432251
training loss: 1.235097885131836


training:   8%|▊         | 7813/100000 [7:07:21<79:56:01,  3.12s/it]

training loss: 1.1694183349609375
training loss: 1.2447152137756348
training loss: 0.9936047792434692
training loss: 1.2411020994186401


training:   8%|▊         | 7817/100000 [7:07:34<79:54:35,  3.12s/it]

training loss: 0.9553988575935364
training loss: 1.082506775856018
training loss: 1.0439903736114502
training loss: 0.8915354609489441


training:   8%|▊         | 7821/100000 [7:07:46<79:51:31,  3.12s/it]

training loss: 0.987514853477478
training loss: 1.2547158002853394
training loss: 0.8540931940078735
training loss: 1.2048845291137695


training:   8%|▊         | 7825/100000 [7:07:59<79:47:01,  3.12s/it]

training loss: 1.1639102697372437
training loss: 1.1896636486053467
training loss: 1.0374891757965088
training loss: 1.144708514213562


training:   8%|▊         | 7829/100000 [7:08:11<79:46:06,  3.12s/it]

training loss: 0.9500160217285156
training loss: 1.3502166271209717
training loss: 1.0262556076049805
training loss: 1.1478819847106934


training:   8%|▊         | 7833/100000 [7:08:23<79:45:23,  3.12s/it]

training loss: 1.0667762756347656
training loss: 1.1514170169830322
training loss: 1.1401901245117188
training loss: 1.1173193454742432


training:   8%|▊         | 7837/100000 [7:08:36<79:46:28,  3.12s/it]

training loss: 1.1297286748886108
training loss: 1.170442819595337
training loss: 0.9626203179359436
training loss: 1.0704975128173828


training:   8%|▊         | 7841/100000 [7:08:48<79:44:40,  3.12s/it]

training loss: 1.1564363241195679
training loss: 1.1526210308074951
training loss: 1.3231024742126465
training loss: 1.052370548248291


training:   8%|▊         | 7845/100000 [7:09:01<79:43:21,  3.11s/it]

training loss: 1.1940587759017944
training loss: 1.147348403930664
training loss: 1.007406234741211
training loss: 0.9942896962165833


training:   8%|▊         | 7849/100000 [7:09:13<79:45:29,  3.12s/it]

training loss: 1.1333427429199219
training loss: 1.1434390544891357
training loss: 1.220088005065918
training loss: 1.1742336750030518


training:   8%|▊         | 7853/100000 [7:09:26<79:46:17,  3.12s/it]

training loss: 1.057469129562378
training loss: 1.2298755645751953
training loss: 1.110339879989624
training loss: 1.104906439781189


training:   8%|▊         | 7857/100000 [7:09:38<79:46:56,  3.12s/it]

training loss: 1.2533838748931885
training loss: 0.9735497832298279
training loss: 1.0265902280807495
training loss: 1.0806474685668945


training:   8%|▊         | 7861/100000 [7:09:51<79:47:57,  3.12s/it]

training loss: 1.0487864017486572
training loss: 1.1907309293746948
training loss: 1.214137315750122
training loss: 1.2583410739898682


training:   8%|▊         | 7865/100000 [7:10:03<79:48:24,  3.12s/it]

training loss: 1.3340941667556763
training loss: 1.029611587524414
training loss: 0.9824601411819458
training loss: 1.107207179069519


training:   8%|▊         | 7869/100000 [7:10:16<79:49:15,  3.12s/it]

training loss: 1.0554097890853882
training loss: 1.2154847383499146
training loss: 1.1661171913146973
training loss: 1.1528427600860596


training:   8%|▊         | 7873/100000 [7:10:28<79:49:30,  3.12s/it]

training loss: 1.079334020614624
training loss: 1.1915415525436401
training loss: 1.0665006637573242
training loss: 1.01387619972229


training:   8%|▊         | 7877/100000 [7:10:41<79:48:49,  3.12s/it]

training loss: 1.0512526035308838
training loss: 1.1420354843139648
training loss: 1.2834794521331787
training loss: 1.0774009227752686


training:   8%|▊         | 7881/100000 [7:10:53<79:48:18,  3.12s/it]

training loss: 1.153174638748169
training loss: 1.2456371784210205
training loss: 1.219414234161377
training loss: 0.9568345546722412


training:   8%|▊         | 7885/100000 [7:11:06<79:47:56,  3.12s/it]

training loss: 1.1632819175720215
training loss: 1.1859567165374756
training loss: 1.1424496173858643
training loss: 1.0334529876708984


training:   8%|▊         | 7889/100000 [7:11:18<79:49:23,  3.12s/it]

training loss: 1.2286654710769653
training loss: 1.1693699359893799
training loss: 1.1314656734466553
training loss: 1.1597692966461182


training:   8%|▊         | 7893/100000 [7:11:31<79:49:05,  3.12s/it]

training loss: 1.2188531160354614
training loss: 1.0617034435272217
training loss: 1.1337313652038574
training loss: 0.9672458171844482


training:   8%|▊         | 7897/100000 [7:11:43<79:48:18,  3.12s/it]

training loss: 1.112433671951294
training loss: 1.2907350063323975
training loss: 1.1271770000457764
training loss: 1.0928815603256226
training loss: 1.086018681526184


training:   8%|▊         | 7901/100000 [7:11:56<80:20:50,  3.14s/it]

validation loss: 1.0043110847473145
training loss: 1.0601330995559692
training loss: 1.1636284589767456
training loss: 1.145702600479126


training:   8%|▊         | 7905/100000 [7:12:08<80:02:43,  3.13s/it]

training loss: 1.1633491516113281
training loss: 0.9749166965484619
training loss: 1.1041290760040283
training loss: 1.2037405967712402


training:   8%|▊         | 7909/100000 [7:12:21<79:56:19,  3.12s/it]

training loss: 1.1963976621627808
training loss: 0.9479948878288269
training loss: 1.1594430208206177
training loss: 1.0195907354354858


training:   8%|▊         | 7913/100000 [7:12:33<79:51:58,  3.12s/it]

training loss: 1.0566933155059814
training loss: 0.9696457982063293
training loss: 1.268047571182251
training loss: 1.069145917892456


training:   8%|▊         | 7917/100000 [7:12:46<79:50:11,  3.12s/it]

training loss: 1.0699790716171265
training loss: 1.1307470798492432
training loss: 1.168555498123169
training loss: 0.9844913482666016


training:   8%|▊         | 7921/100000 [7:12:58<79:47:51,  3.12s/it]

training loss: 1.4305403232574463
training loss: 0.9749187231063843
training loss: 1.1146348714828491
training loss: 1.1329498291015625


training:   8%|▊         | 7925/100000 [7:13:11<79:44:43,  3.12s/it]

training loss: 1.0978074073791504
training loss: 1.263321042060852
training loss: 1.0814650058746338
training loss: 1.0066163539886475


training:   8%|▊         | 7929/100000 [7:13:23<79:44:19,  3.12s/it]

training loss: 0.9924615621566772
training loss: 0.9507384896278381
training loss: 0.934296727180481
training loss: 1.0863021612167358


training:   8%|▊         | 7933/100000 [7:13:35<79:44:13,  3.12s/it]

training loss: 1.1403429508209229
training loss: 1.070152997970581
training loss: 1.059037685394287
training loss: 1.0190917253494263


training:   8%|▊         | 7937/100000 [7:13:48<79:44:25,  3.12s/it]

training loss: 0.8917796611785889
training loss: 1.0637437105178833
training loss: 1.0464504957199097
training loss: 1.0826163291931152


training:   8%|▊         | 7941/100000 [7:14:00<79:44:02,  3.12s/it]

training loss: 1.0087437629699707
training loss: 1.1025415658950806
training loss: 1.152220606803894
training loss: 1.009949803352356


training:   8%|▊         | 7945/100000 [7:14:13<79:45:01,  3.12s/it]

training loss: 1.055772066116333
training loss: 0.9211219549179077
training loss: 1.1634880304336548
training loss: 1.1203886270523071


training:   8%|▊         | 7949/100000 [7:14:25<79:44:04,  3.12s/it]

training loss: 0.959526777267456
training loss: 1.113523006439209
training loss: 0.9293611645698547
training loss: 1.0238254070281982


training:   8%|▊         | 7953/100000 [7:14:38<79:45:00,  3.12s/it]

training loss: 1.0495367050170898
training loss: 1.0860673189163208
training loss: 1.2332730293273926
training loss: 1.1281644105911255


training:   8%|▊         | 7957/100000 [7:14:50<79:40:57,  3.12s/it]

training loss: 1.1384258270263672
training loss: 1.180555820465088
training loss: 1.1052663326263428
training loss: 1.2934519052505493


training:   8%|▊         | 7961/100000 [7:15:03<79:41:34,  3.12s/it]

training loss: 1.3168753385543823
training loss: 1.051594853401184
training loss: 1.154239535331726
training loss: 1.1385308504104614


training:   8%|▊         | 7965/100000 [7:15:15<79:41:58,  3.12s/it]

training loss: 1.1231257915496826
training loss: 1.1224207878112793
training loss: 1.2341625690460205
training loss: 1.1671669483184814


training:   8%|▊         | 7969/100000 [7:15:28<79:42:22,  3.12s/it]

training loss: 0.9422869086265564
training loss: 1.1221649646759033
training loss: 1.2878923416137695
training loss: 1.1718802452087402


training:   8%|▊         | 7973/100000 [7:15:40<79:39:10,  3.12s/it]

training loss: 1.0245355367660522
training loss: 1.160618782043457
training loss: 1.1159658432006836
training loss: 1.1227737665176392


training:   8%|▊         | 7977/100000 [7:15:53<79:39:43,  3.12s/it]

training loss: 0.9393256902694702
training loss: 1.04475736618042
training loss: 1.0942535400390625
training loss: 1.0310094356536865


training:   8%|▊         | 7981/100000 [7:16:05<79:39:56,  3.12s/it]

training loss: 1.1279096603393555
training loss: 1.294802188873291
training loss: 1.1841163635253906
training loss: 0.8365077972412109


training:   8%|▊         | 7985/100000 [7:16:18<79:40:04,  3.12s/it]

training loss: 1.0951772928237915
training loss: 1.0800741910934448
training loss: 1.3363358974456787
training loss: 0.9540050029754639


training:   8%|▊         | 7989/100000 [7:16:30<79:38:29,  3.12s/it]

training loss: 1.114062786102295
training loss: 1.1390490531921387
training loss: 0.8385608792304993
training loss: 1.2595010995864868


training:   8%|▊         | 7993/100000 [7:16:43<79:39:41,  3.12s/it]

training loss: 1.2249380350112915
training loss: 1.0987520217895508
training loss: 1.1299364566802979
training loss: 1.1597480773925781


training:   8%|▊         | 7997/100000 [7:16:55<79:36:24,  3.11s/it]

training loss: 1.264699935913086
training loss: 1.036923885345459
training loss: 1.0408635139465332
training loss: 1.2102634906768799
training loss: 0.9532882571220398
validation loss: 1.2594764232635498
%s 

 %s ("open a Wound already closing in the Royal Bosom&quot;, a special clause permitting Commissioners to grant the Royal Assent on the King's behalf was inserted in the Act. This method of granting the Royal Assent had never been used before, but, in later reigns, it came to replace the traditional personal appearance of the Sovereign in Parliament.  Catherine's marriage was annulled shortly before her execution.  As was the case with Anne Boleyn, Catherine Howard could not have technically been guilty of adultery, as the marriage was officially null and void from the beginning.  Again, this point was ignored, and Catherine was executed on [[13 February]] [[1542]].  She was only about eighteen years old at the time.  Henry married his last wife, the wealthy widow [[Catherine Parr

training:   8%|▊         | 7997/100000 [7:17:11<79:36:24,  3.11s/it]

 three of Berek Jandy She, then the year, the Stars.  Henry appearing his last explains impression after behavioring the [[Wilvestine Base Nichols] and Yahles Base Constantine Polish, but Henry in [[2001]], Muslims Anime, and Anglish groupersoft.  Henry, before [[Sultist Leging]], Central Phoenix on [[U.S. most female, and Constantine]], as well as Judges, a [[Wiline]].  == Records could have been charged with the [[Willies Bruersey]], refusing the exchange of the set of shuttle with females the common runs by die [[Cathlein Flords|SAN228]] (Constantino Goak manuscript).  ==Top aspiration in the sustaining [[Kilomedia/Covers]] played was probably playing the late for she similar sigmatic letter in the [[1890s]], on [[Dinoia]], where Brudicia Release Chief was [[Zhan Flu about Corps|Norman]] by [[Abil]], acceptance, whether far sacrified, the matters after becoming on 50 million letters of filtman, City sincere to 16 Extrement, see [[Taskin Ladzya (2803 earliquity)| IN Aahrlijan ¢¢jos) 

training:   8%|▊         | 8001/100000 [7:18:27<231:24:47,  9.06s/it]

Model saved at iteration 8000
training loss: 0.9581728577613831
training loss: 1.1193004846572876
training loss: 1.0570669174194336


training:   8%|▊         | 8005/100000 [7:18:39<185:47:34,  7.27s/it]

training loss: 1.1641979217529297
training loss: 1.1492643356323242
training loss: 1.2388654947280884
training loss: 0.9766523838043213


training:   8%|▊         | 8009/100000 [7:18:52<153:55:18,  6.02s/it]

training loss: 1.365879774093628
training loss: 0.9957338571548462
training loss: 1.1067601442337036
training loss: 0.8880215883255005


training:   8%|▊         | 8013/100000 [7:19:04<131:34:16,  5.15s/it]

training loss: 1.1574115753173828
training loss: 1.159714698791504
training loss: 1.2345874309539795
training loss: 1.045440435409546


training:   8%|▊         | 8017/100000 [7:19:16<115:57:00,  4.54s/it]

training loss: 1.2396347522735596
training loss: 1.1317695379257202
training loss: 1.01043701171875
training loss: 1.1462571620941162


training:   8%|▊         | 8021/100000 [7:19:29<105:04:31,  4.11s/it]

training loss: 0.8641219139099121
training loss: 1.0832709074020386
training loss: 1.1834577322006226
training loss: 1.1071441173553467


training:   8%|▊         | 8025/100000 [7:19:41<97:26:45,  3.81s/it] 

training loss: 1.109858512878418
training loss: 1.246381163597107
training loss: 1.196349859237671
training loss: 1.2421190738677979


training:   8%|▊         | 8029/100000 [7:19:54<92:06:14,  3.61s/it]

training loss: 1.0627970695495605
training loss: 1.177047848701477
training loss: 1.1440792083740234
training loss: 1.060561180114746


training:   8%|▊         | 8033/100000 [7:20:06<88:21:45,  3.46s/it]

training loss: 1.271314263343811
training loss: 1.106257677078247
training loss: 1.1673921346664429
training loss: 1.0079622268676758


training:   8%|▊         | 8037/100000 [7:20:19<85:42:33,  3.36s/it]

training loss: 1.183726191520691
training loss: 1.2749390602111816
training loss: 1.1461306810379028
training loss: 1.0758055448532104


training:   8%|▊         | 8041/100000 [7:20:31<83:53:36,  3.28s/it]

training loss: 1.1530992984771729
training loss: 1.176896095275879
training loss: 0.9668525457382202
training loss: 0.8757398128509521


training:   8%|▊         | 8045/100000 [7:20:44<82:34:56,  3.23s/it]

training loss: 1.1184747219085693
training loss: 1.2351830005645752
training loss: 0.9722727537155151
training loss: 1.0051021575927734


training:   8%|▊         | 8049/100000 [7:20:56<81:42:21,  3.20s/it]

training loss: 1.0589818954467773
training loss: 1.2345157861709595
training loss: 1.1837174892425537
training loss: 1.1196101903915405


training:   8%|▊         | 8053/100000 [7:21:09<81:04:51,  3.17s/it]

training loss: 1.065122365951538
training loss: 1.1302368640899658
training loss: 1.2628997564315796
training loss: 1.104843020439148


training:   8%|▊         | 8057/100000 [7:21:21<80:37:51,  3.16s/it]

training loss: 1.1858413219451904
training loss: 1.018195390701294
training loss: 1.1187994480133057
training loss: 1.0802839994430542


training:   8%|▊         | 8061/100000 [7:21:34<80:18:16,  3.14s/it]

training loss: 1.1884496212005615
training loss: 1.111250400543213
training loss: 1.2498102188110352
training loss: 1.2639431953430176


training:   8%|▊         | 8065/100000 [7:21:46<80:06:23,  3.14s/it]

training loss: 0.9821281433105469
training loss: 1.1362518072128296
training loss: 1.2225053310394287
training loss: 1.1788387298583984


training:   8%|▊         | 8069/100000 [7:21:58<79:57:01,  3.13s/it]

training loss: 0.9600353837013245
training loss: 1.245422601699829
training loss: 1.1670253276824951
training loss: 1.2533934116363525


training:   8%|▊         | 8073/100000 [7:22:11<79:46:49,  3.12s/it]

training loss: 1.0359660387039185
training loss: 1.158296823501587
training loss: 1.1620255708694458
training loss: 1.1286637783050537


training:   8%|▊         | 8077/100000 [7:22:23<79:43:54,  3.12s/it]

training loss: 1.267378568649292
training loss: 1.2331948280334473
training loss: 1.1628093719482422
training loss: 1.0761469602584839


training:   8%|▊         | 8081/100000 [7:22:36<79:43:18,  3.12s/it]

training loss: 1.175708532333374
training loss: 1.0716649293899536
training loss: 1.0406780242919922
training loss: 0.9813623428344727


training:   8%|▊         | 8085/100000 [7:22:48<79:41:39,  3.12s/it]

training loss: 1.1924811601638794
training loss: 1.141829252243042
training loss: 1.1075098514556885
training loss: 1.1039152145385742


training:   8%|▊         | 8089/100000 [7:23:01<79:39:41,  3.12s/it]

training loss: 1.1302261352539062
training loss: 1.1387687921524048
training loss: 0.8901769518852234
training loss: 1.1888242959976196


training:   8%|▊         | 8093/100000 [7:23:13<79:38:54,  3.12s/it]

training loss: 1.1470866203308105
training loss: 1.0620460510253906
training loss: 1.105115294456482
training loss: 1.1125761270523071


training:   8%|▊         | 8097/100000 [7:23:26<79:38:18,  3.12s/it]

training loss: 1.2485657930374146
training loss: 1.372158169746399
training loss: 1.172560214996338
training loss: 1.150132179260254
training loss: 1.055039882659912


training:   8%|▊         | 8101/100000 [7:23:39<80:13:30,  3.14s/it]

validation loss: 1.229315996170044
training loss: 1.0257542133331299
training loss: 1.2514358758926392
training loss: 1.1577751636505127


training:   8%|▊         | 8105/100000 [7:23:51<80:00:32,  3.13s/it]

training loss: 1.258452296257019
training loss: 1.0893690586090088
training loss: 1.180964469909668
training loss: 1.1381406784057617


training:   8%|▊         | 8109/100000 [7:24:04<79:53:27,  3.13s/it]

training loss: 1.165759801864624
training loss: 0.7983862161636353
training loss: 1.2219387292861938
training loss: 1.119817852973938


training:   8%|▊         | 8113/100000 [7:24:16<79:48:11,  3.13s/it]

training loss: 1.0789768695831299
training loss: 1.111728549003601
training loss: 1.2882219552993774
training loss: 1.0870752334594727


training:   8%|▊         | 8117/100000 [7:24:28<79:43:33,  3.12s/it]

training loss: 1.02000892162323
training loss: 1.1434359550476074
training loss: 1.1699731349945068
training loss: 0.9661270380020142


training:   8%|▊         | 8121/100000 [7:24:41<79:41:06,  3.12s/it]

training loss: 1.0275708436965942
training loss: 1.0484455823898315
training loss: 0.9864246845245361
training loss: 0.9720852375030518


training:   8%|▊         | 8125/100000 [7:24:53<79:41:14,  3.12s/it]

training loss: 1.0321444272994995
training loss: 1.1468523740768433
training loss: 1.16640043258667
training loss: 1.0607820749282837


training:   8%|▊         | 8129/100000 [7:25:06<79:38:58,  3.12s/it]

training loss: 1.1025084257125854
training loss: 1.2849862575531006
training loss: 1.154241681098938
training loss: 1.1492643356323242


training:   8%|▊         | 8133/100000 [7:25:18<79:37:03,  3.12s/it]

training loss: 1.2302563190460205
training loss: 1.0476487874984741
training loss: 1.2374577522277832
training loss: 1.1128860712051392


training:   8%|▊         | 8137/100000 [7:25:31<79:36:58,  3.12s/it]

training loss: 1.0872633457183838
training loss: 1.2587006092071533
training loss: 1.0203746557235718
training loss: 1.1287860870361328


training:   8%|▊         | 8141/100000 [7:25:43<79:36:01,  3.12s/it]

training loss: 1.2079144716262817
training loss: 1.0638399124145508
training loss: 1.070229172706604
training loss: 1.1849071979522705


training:   8%|▊         | 8145/100000 [7:25:56<79:39:06,  3.12s/it]

training loss: 1.091568112373352
training loss: 0.97237628698349
training loss: 1.0206778049468994
training loss: 1.087681531906128


training:   8%|▊         | 8149/100000 [7:26:08<79:38:03,  3.12s/it]

training loss: 1.0657711029052734
training loss: 1.1467251777648926
training loss: 1.1132144927978516
training loss: 1.0337733030319214


training:   8%|▊         | 8153/100000 [7:26:21<79:36:31,  3.12s/it]

training loss: 1.0568034648895264
training loss: 1.2424907684326172
training loss: 1.1201268434524536
training loss: 1.1185625791549683


training:   8%|▊         | 8157/100000 [7:26:33<79:35:11,  3.12s/it]

training loss: 1.3577206134796143
training loss: 1.1063079833984375
training loss: 1.150435447692871
training loss: 1.1378825902938843


training:   8%|▊         | 8161/100000 [7:26:46<79:33:59,  3.12s/it]

training loss: 1.111074686050415
training loss: 1.0018317699432373
training loss: 1.1768404245376587
training loss: 1.0918558835983276


training:   8%|▊         | 8165/100000 [7:26:58<79:34:10,  3.12s/it]

training loss: 1.273043155670166
training loss: 1.2702312469482422
training loss: 1.1437259912490845
training loss: 1.1678041219711304


training:   8%|▊         | 8169/100000 [7:27:11<79:32:49,  3.12s/it]

training loss: 0.9704959392547607
training loss: 1.1136806011199951
training loss: 1.1544990539550781
training loss: 1.1184346675872803


training:   8%|▊         | 8173/100000 [7:27:23<79:32:23,  3.12s/it]

training loss: 0.9969508051872253
training loss: 1.0565881729125977
training loss: 1.1651372909545898
training loss: 1.011321783065796


training:   8%|▊         | 8177/100000 [7:27:36<79:32:29,  3.12s/it]

training loss: 1.1799042224884033
training loss: 1.0402441024780273
training loss: 1.1222842931747437
training loss: 0.9061729907989502


training:   8%|▊         | 8181/100000 [7:27:48<79:34:44,  3.12s/it]

training loss: 1.1223523616790771
training loss: 1.1046433448791504
training loss: 1.109541893005371
training loss: 1.1190049648284912


training:   8%|▊         | 8185/100000 [7:28:01<79:34:08,  3.12s/it]

training loss: 1.1078109741210938
training loss: 1.0290672779083252
training loss: 1.1558564901351929
training loss: 0.9323049783706665


training:   8%|▊         | 8189/100000 [7:28:13<79:32:34,  3.12s/it]

training loss: 1.094719648361206
training loss: 1.1456923484802246
training loss: 1.1891841888427734
training loss: 1.1565219163894653


training:   8%|▊         | 8193/100000 [7:28:26<79:32:14,  3.12s/it]

training loss: 0.9120837450027466
training loss: 1.0161783695220947
training loss: 1.1705818176269531
training loss: 1.3411178588867188


training:   8%|▊         | 8197/100000 [7:28:38<79:32:20,  3.12s/it]

training loss: 1.0069025754928589
training loss: 1.1808598041534424
training loss: 1.1951805353164673
training loss: 1.0448312759399414
training loss: 1.268558144569397


training:   8%|▊         | 8201/100000 [7:28:51<80:06:42,  3.14s/it]

validation loss: 1.202306866645813
training loss: 1.3562915325164795
training loss: 1.0589683055877686
training loss: 1.0475387573242188


training:   8%|▊         | 8205/100000 [7:29:03<79:54:06,  3.13s/it]

training loss: 1.0852835178375244
training loss: 1.278095006942749
training loss: 0.9677335619926453
training loss: 1.0911614894866943


training:   8%|▊         | 8209/100000 [7:29:16<79:48:38,  3.13s/it]

training loss: 1.2026925086975098
training loss: 1.1836051940917969
training loss: 0.9857412576675415
training loss: 1.121830940246582


training:   8%|▊         | 8213/100000 [7:29:28<79:43:23,  3.13s/it]

training loss: 0.9504200220108032
training loss: 1.0967000722885132
training loss: 0.9299688339233398
training loss: 1.0463695526123047


training:   8%|▊         | 8217/100000 [7:29:41<79:38:56,  3.12s/it]

training loss: 0.9534958004951477
training loss: 1.1187198162078857
training loss: 1.0283490419387817
training loss: 1.1426424980163574


training:   8%|▊         | 8221/100000 [7:29:53<79:36:38,  3.12s/it]

training loss: 1.0694913864135742
training loss: 1.1153807640075684
training loss: 1.1806457042694092
training loss: 0.8950591683387756


training:   8%|▊         | 8225/100000 [7:30:06<79:34:35,  3.12s/it]

training loss: 1.0665853023529053
training loss: 1.2964684963226318
training loss: 1.0295894145965576
training loss: 0.9606261253356934


training:   8%|▊         | 8229/100000 [7:30:18<79:31:49,  3.12s/it]

training loss: 1.159654974937439
training loss: 1.0950477123260498
training loss: 1.1627105474472046
training loss: 1.2620728015899658


training:   8%|▊         | 8233/100000 [7:30:31<79:32:32,  3.12s/it]

training loss: 1.1882492303848267
training loss: 1.087543249130249
training loss: 1.1821377277374268
training loss: 1.1995224952697754


training:   8%|▊         | 8237/100000 [7:30:43<79:31:28,  3.12s/it]

training loss: 1.1778367757797241
training loss: 1.1860504150390625
training loss: 1.2341455221176147
training loss: 1.1084482669830322


training:   8%|▊         | 8241/100000 [7:30:56<79:30:38,  3.12s/it]

training loss: 1.1466797590255737
training loss: 0.9276185035705566
training loss: 1.2111250162124634
training loss: 0.8750512599945068


training:   8%|▊         | 8245/100000 [7:31:08<79:30:05,  3.12s/it]

training loss: 1.068097472190857
training loss: 0.9163252711296082
training loss: 1.145480751991272
training loss: 1.1978261470794678


training:   8%|▊         | 8249/100000 [7:31:20<79:29:16,  3.12s/it]

training loss: 1.0407235622406006
training loss: 1.0435762405395508
training loss: 1.2044833898544312
training loss: 1.0957114696502686


training:   8%|▊         | 8253/100000 [7:31:33<79:30:23,  3.12s/it]

training loss: 0.9195038080215454
training loss: 1.160536527633667
training loss: 1.1337372064590454
training loss: 1.1472914218902588


training:   8%|▊         | 8257/100000 [7:31:45<79:28:09,  3.12s/it]

training loss: 1.2386059761047363
training loss: 1.1802024841308594
training loss: 1.1862444877624512
training loss: 0.9207301139831543


training:   8%|▊         | 8261/100000 [7:31:58<79:25:19,  3.12s/it]

training loss: 1.256075143814087
training loss: 1.045352578163147
training loss: 1.2787790298461914
training loss: 1.1114012002944946


training:   8%|▊         | 8265/100000 [7:32:10<79:23:29,  3.12s/it]

training loss: 1.2322134971618652
training loss: 0.8760807514190674
training loss: 1.082128882408142
training loss: 0.9615548849105835


training:   8%|▊         | 8269/100000 [7:32:23<79:22:18,  3.11s/it]

training loss: 1.1505087614059448
training loss: 0.9397285580635071
training loss: 0.979263186454773
training loss: 1.0846219062805176


training:   8%|▊         | 8273/100000 [7:32:35<79:24:18,  3.12s/it]

training loss: 1.0421369075775146
training loss: 1.167494535446167
training loss: 1.1631693840026855
training loss: 1.1052117347717285


training:   8%|▊         | 8277/100000 [7:32:48<79:24:37,  3.12s/it]

training loss: 1.1417871713638306
training loss: 1.0309545993804932
training loss: 1.1145713329315186
training loss: 0.919485330581665


training:   8%|▊         | 8281/100000 [7:33:00<79:25:02,  3.12s/it]

training loss: 1.1983838081359863
training loss: 0.8935215473175049
training loss: 1.169500708580017
training loss: 1.101302981376648


training:   8%|▊         | 8285/100000 [7:33:13<79:25:26,  3.12s/it]

training loss: 1.0144222974777222
training loss: 1.0329079627990723
training loss: 1.1628750562667847
training loss: 0.9224619269371033


training:   8%|▊         | 8289/100000 [7:33:25<79:25:18,  3.12s/it]

training loss: 0.9594612121582031
training loss: 1.20704185962677
training loss: 1.1999683380126953
training loss: 1.252671718597412


training:   8%|▊         | 8293/100000 [7:33:38<79:23:27,  3.12s/it]

training loss: 1.093606948852539
training loss: 1.0589404106140137
training loss: 1.167634129524231
training loss: 1.293410301208496


training:   8%|▊         | 8297/100000 [7:33:50<79:23:02,  3.12s/it]

training loss: 1.0519075393676758
training loss: 0.9606641530990601
training loss: 0.9381229877471924
training loss: 1.025205135345459


training:   8%|▊         | 8297/100000 [7:34:01<79:23:02,  3.12s/it]

training loss: 0.9402564167976379


training:   8%|▊         | 8301/100000 [7:34:03<79:59:23,  3.14s/it]

validation loss: 1.1345480680465698
training loss: 1.1795125007629395
training loss: 1.102645993232727
training loss: 0.9360305666923523


training:   8%|▊         | 8305/100000 [7:34:15<79:46:25,  3.13s/it]

training loss: 1.2575777769088745
training loss: 1.0101232528686523
training loss: 1.1141406297683716
training loss: 1.157027006149292


training:   8%|▊         | 8309/100000 [7:34:28<79:40:02,  3.13s/it]

training loss: 1.2323780059814453
training loss: 0.935523509979248
training loss: 1.0657885074615479
training loss: 1.0288151502609253


training:   8%|▊         | 8313/100000 [7:34:40<79:35:20,  3.12s/it]

training loss: 1.102089285850525
training loss: 1.1114721298217773
training loss: 1.008256196975708
training loss: 1.060187816619873


training:   8%|▊         | 8317/100000 [7:34:53<79:33:02,  3.12s/it]

training loss: 1.1138689517974854
training loss: 1.0714491605758667
training loss: 1.0100926160812378
training loss: 1.0540622472763062


training:   8%|▊         | 8321/100000 [7:35:05<79:31:20,  3.12s/it]

training loss: 1.1559020280838013
training loss: 1.1119840145111084
training loss: 0.8304821848869324
training loss: 1.159637212753296


training:   8%|▊         | 8325/100000 [7:35:18<79:29:14,  3.12s/it]

training loss: 1.0880335569381714
training loss: 1.124074101448059
training loss: 1.1821054220199585
training loss: 1.0797711610794067


training:   8%|▊         | 8329/100000 [7:35:30<79:27:43,  3.12s/it]

training loss: 1.2709970474243164
training loss: 1.0395795106887817
training loss: 1.0731979608535767
training loss: 1.0773041248321533


training:   8%|▊         | 8333/100000 [7:35:43<79:26:01,  3.12s/it]

training loss: 0.9456475973129272
training loss: 1.0580763816833496
training loss: 1.1954591274261475
training loss: 1.1250205039978027


training:   8%|▊         | 8337/100000 [7:35:55<79:23:15,  3.12s/it]

training loss: 1.1342015266418457
training loss: 1.05070161819458
training loss: 1.1409547328948975
training loss: 1.0497925281524658


training:   8%|▊         | 8341/100000 [7:36:08<79:22:43,  3.12s/it]

training loss: 1.2358064651489258
training loss: 1.1390330791473389
training loss: 1.3539772033691406
training loss: 0.7455474138259888


training:   8%|▊         | 8345/100000 [7:36:20<79:21:06,  3.12s/it]

training loss: 0.9159106612205505
training loss: 1.2032043933868408
training loss: 1.1493349075317383
training loss: 0.9096089005470276


training:   8%|▊         | 8349/100000 [7:36:32<79:21:47,  3.12s/it]

training loss: 1.1593865156173706
training loss: 1.1204259395599365
training loss: 1.0443140268325806
training loss: 1.0397593975067139


training:   8%|▊         | 8353/100000 [7:36:45<79:21:59,  3.12s/it]

training loss: 1.1498262882232666
training loss: 1.0088059902191162
training loss: 1.0820331573486328
training loss: 1.1443352699279785


training:   8%|▊         | 8357/100000 [7:36:57<79:22:47,  3.12s/it]

training loss: 0.9900381565093994
training loss: 1.1480220556259155
training loss: 1.1894307136535645
training loss: 1.3030492067337036


training:   8%|▊         | 8361/100000 [7:37:10<79:23:16,  3.12s/it]

training loss: 1.129751443862915
training loss: 1.297753930091858
training loss: 1.0910906791687012
training loss: 1.2875200510025024


training:   8%|▊         | 8365/100000 [7:37:22<79:21:52,  3.12s/it]

training loss: 1.0688538551330566
training loss: 1.006045937538147
training loss: 1.0664093494415283
training loss: 1.2267500162124634


training:   8%|▊         | 8369/100000 [7:37:35<79:22:21,  3.12s/it]

training loss: 1.06688392162323
training loss: 1.2038707733154297
training loss: 1.1501826047897339
training loss: 0.9746896028518677


training:   8%|▊         | 8373/100000 [7:37:47<79:21:41,  3.12s/it]

training loss: 1.1019132137298584
training loss: 1.0769338607788086
training loss: 0.9551021456718445
training loss: 1.2604568004608154


training:   8%|▊         | 8377/100000 [7:38:00<79:21:51,  3.12s/it]

training loss: 1.1545382738113403
training loss: 1.1473686695098877
training loss: 1.1205592155456543
training loss: 1.3665457963943481


training:   8%|▊         | 8381/100000 [7:38:12<79:24:20,  3.12s/it]

training loss: 0.9842128157615662
training loss: 0.9478057026863098
training loss: 1.0652287006378174
training loss: 1.1781578063964844


training:   8%|▊         | 8385/100000 [7:38:25<79:25:08,  3.12s/it]

training loss: 1.0164501667022705
training loss: 1.1017038822174072
training loss: 1.1429370641708374
training loss: 1.0430700778961182


training:   8%|▊         | 8389/100000 [7:38:37<79:24:15,  3.12s/it]

training loss: 1.0278857946395874
training loss: 1.0890487432479858
training loss: 1.087581753730774
training loss: 1.1530592441558838


training:   8%|▊         | 8393/100000 [7:38:50<79:23:01,  3.12s/it]

training loss: 1.1517748832702637
training loss: 1.2081706523895264
training loss: 1.056797742843628
training loss: 1.0309847593307495


training:   8%|▊         | 8397/100000 [7:39:02<79:22:28,  3.12s/it]

training loss: 1.0451079607009888
training loss: 1.1585848331451416
training loss: 1.2026593685150146
training loss: 1.0526560544967651
training loss: 1.2351030111312866


training:   8%|▊         | 8401/100000 [7:39:15<79:55:50,  3.14s/it]

validation loss: 1.0822261571884155
training loss: 1.1642663478851318
training loss: 1.2318532466888428
training loss: 1.0577386617660522


training:   8%|▊         | 8405/100000 [7:39:27<79:39:12,  3.13s/it]

training loss: 1.3391984701156616
training loss: 0.9125707149505615
training loss: 1.218517541885376
training loss: 1.268338680267334


training:   8%|▊         | 8409/100000 [7:39:40<79:32:22,  3.13s/it]

training loss: 1.1446657180786133
training loss: 1.0007994174957275
training loss: 0.8833703994750977
training loss: 0.643746018409729


training:   8%|▊         | 8413/100000 [7:39:52<79:26:47,  3.12s/it]

training loss: 1.149056077003479
training loss: 0.798966646194458
training loss: 1.2672767639160156
training loss: 0.9335750341415405


training:   8%|▊         | 8417/100000 [7:40:05<79:20:04,  3.12s/it]

training loss: 1.0112273693084717
training loss: 1.2408250570297241
training loss: 1.1034127473831177
training loss: 1.1175531148910522


training:   8%|▊         | 8421/100000 [7:40:17<79:19:10,  3.12s/it]

training loss: 1.0762580633163452
training loss: 1.080331802368164
training loss: 1.1533143520355225
training loss: 0.9284259080886841


training:   8%|▊         | 8425/100000 [7:40:30<79:18:45,  3.12s/it]

training loss: 1.1595784425735474
training loss: 1.0808780193328857
training loss: 1.0717310905456543
training loss: 1.2127478122711182


training:   8%|▊         | 8429/100000 [7:40:42<79:17:18,  3.12s/it]

training loss: 1.133297324180603
training loss: 1.0462158918380737
training loss: 0.997395396232605
training loss: 1.187529444694519


training:   8%|▊         | 8433/100000 [7:40:55<79:17:22,  3.12s/it]

training loss: 1.1021449565887451
training loss: 1.1359390020370483
training loss: 0.9612647294998169
training loss: 1.153585433959961


training:   8%|▊         | 8437/100000 [7:41:07<79:15:15,  3.12s/it]

training loss: 1.1835277080535889
training loss: 1.1280006170272827
training loss: 1.1338138580322266
training loss: 1.154587745666504


training:   8%|▊         | 8441/100000 [7:41:20<79:16:10,  3.12s/it]

training loss: 1.107405662536621
training loss: 1.1633704900741577
training loss: 1.1600966453552246
training loss: 1.0704646110534668


training:   8%|▊         | 8445/100000 [7:41:32<79:16:33,  3.12s/it]

training loss: 0.9867543578147888
training loss: 1.1323736906051636
training loss: 1.3233957290649414
training loss: 1.1913642883300781


training:   8%|▊         | 8449/100000 [7:41:45<79:18:33,  3.12s/it]

training loss: 1.1471081972122192
training loss: 1.248702049255371
training loss: 1.1090848445892334
training loss: 1.113486647605896


training:   8%|▊         | 8453/100000 [7:41:57<79:18:13,  3.12s/it]

training loss: 1.1952437162399292
training loss: 1.0429377555847168
training loss: 1.151060700416565
training loss: 1.1118550300598145


training:   8%|▊         | 8457/100000 [7:42:09<79:17:51,  3.12s/it]

training loss: 1.2108477354049683
training loss: 1.106292724609375
training loss: 1.0909695625305176
training loss: 1.2404989004135132


training:   8%|▊         | 8461/100000 [7:42:22<79:17:21,  3.12s/it]

training loss: 1.1832488775253296
training loss: 0.9581382870674133
training loss: 1.049986481666565
training loss: 0.9084022641181946


training:   8%|▊         | 8465/100000 [7:42:34<79:14:23,  3.12s/it]

training loss: 1.128427505493164
training loss: 1.2997996807098389
training loss: 1.0961313247680664
training loss: 1.0779112577438354


training:   8%|▊         | 8469/100000 [7:42:47<79:14:23,  3.12s/it]

training loss: 1.1142226457595825
training loss: 0.8264491558074951
training loss: 1.0808559656143188
training loss: 0.996619462966919


training:   8%|▊         | 8473/100000 [7:42:59<79:11:22,  3.11s/it]

training loss: 1.037659764289856
training loss: 0.9616199135780334
training loss: 1.3778486251831055
training loss: 1.1190626621246338


training:   8%|▊         | 8477/100000 [7:43:12<79:12:48,  3.12s/it]

training loss: 1.0889885425567627
training loss: 0.9091640710830688
training loss: 1.103860855102539
training loss: 1.0036416053771973


training:   8%|▊         | 8481/100000 [7:43:24<79:13:46,  3.12s/it]

training loss: 1.0888004302978516
training loss: 0.8651392459869385
training loss: 1.1334624290466309
training loss: 1.0726267099380493


training:   8%|▊         | 8485/100000 [7:43:37<79:14:07,  3.12s/it]

training loss: 1.0784080028533936
training loss: 1.1917407512664795
training loss: 1.1065585613250732
training loss: 1.0510388612747192


training:   8%|▊         | 8489/100000 [7:43:49<79:13:24,  3.12s/it]

training loss: 1.2493817806243896
training loss: 1.4487128257751465
training loss: 1.2396070957183838
training loss: 1.2200783491134644


training:   8%|▊         | 8493/100000 [7:44:02<79:13:01,  3.12s/it]

training loss: 1.0883281230926514
training loss: 1.0111390352249146
training loss: 1.1197723150253296
training loss: 1.1225035190582275


training:   8%|▊         | 8497/100000 [7:44:14<79:13:42,  3.12s/it]

training loss: 0.933229923248291
training loss: 0.9963152408599854
training loss: 1.2346653938293457
training loss: 1.1406795978546143
training loss: 1.044000506401062
validation loss: 1.0898715257644653
%s 

 %s (' a bottle of nutrient feed solution. The solution travels up the wick into the pot plant.  === Raft cultivation === A variant of DWC sometimes used for [[lettuce]]s: sheets of expanded [[polystyrene]] have holes drilled through them, and young plants are placed in the holes with the roots hanging down. The sheet then floats in a shallow tank of nutrient solution.   === Nutrient film technique (NFT) === In this method, the plants grow through light-proof plastic films placed over shallow, gently sloping channels. A steady flow of nutrients is maintained along the channel, and the roots grow into dense mats, with a thin film of nutrient passing over them (hence the name of the technique).  A downside of the technique is that it has very little buffering against interruptions i

training:   8%|▊         | 8497/100000 [7:44:31<79:13:42,  3.12s/it]

lution.   External re-articles are common technique by feeling on the banks best running awareness, not rise more inheritable from a shore and free set; a removaling of [[perkins]], certain nutrient plants, because the removal of the absurds produced hydropocyphback &quot;Krahea'&quot;, broaned the adoption is have to begin from the diagrams.  Bridge did not hold free in faster, no compension published the valley knowledge down to have imposed plant designation through the manster. &quot;G.  Namley traked the former I: The ''bonn''.  Something yee why her tried to arrest similarly as to show it, which has ended the pastunklin.  It tacks for an ER hold it before the aquabine the bond and decisive and crossnes a comment or four examined constant of [[criminal]], then is much other to plant friend years.  They has come once a Municipal (i.e. callsom--viol) wealtherming every way from them.  Antarily the exception for calling the ''destation'', or the latter of which in proal turns out dec

training:   9%|▊         | 8501/100000 [7:45:46<230:01:05,  9.05s/it]

Model saved at iteration 8500
training loss: 1.1999778747558594
training loss: 1.148860216140747
training loss: 1.0290160179138184


training:   9%|▊         | 8505/100000 [7:45:58<184:42:37,  7.27s/it]

training loss: 1.1440726518630981
training loss: 1.0888965129852295
training loss: 1.054275393486023
training loss: 1.0259325504302979


training:   9%|▊         | 8509/100000 [7:46:11<153:00:57,  6.02s/it]

training loss: 1.1244981288909912
training loss: 0.9187682271003723
training loss: 0.9451607465744019
training loss: 1.2711777687072754


training:   9%|▊         | 8513/100000 [7:46:23<130:53:27,  5.15s/it]

training loss: 1.204942226409912
training loss: 1.1410739421844482
training loss: 1.0160435438156128
training loss: 0.9701075553894043


training:   9%|▊         | 8517/100000 [7:46:36<115:23:49,  4.54s/it]

training loss: 0.9368349313735962
training loss: 1.1465396881103516
training loss: 1.2070330381393433
training loss: 1.1376187801361084


training:   9%|▊         | 8521/100000 [7:46:48<104:33:11,  4.11s/it]

training loss: 1.111297369003296
training loss: 1.140000343322754
training loss: 1.1578145027160645
training loss: 1.0599746704101562


training:   9%|▊         | 8525/100000 [7:47:00<96:57:07,  3.82s/it] 

training loss: 1.114055871963501
training loss: 1.1517159938812256
training loss: 1.3297841548919678
training loss: 1.0383576154708862


training:   9%|▊         | 8529/100000 [7:47:13<91:39:16,  3.61s/it]

training loss: 0.9626767635345459
training loss: 1.1176340579986572
training loss: 1.1491807699203491
training loss: 0.8728581666946411


training:   9%|▊         | 8533/100000 [7:47:25<87:52:55,  3.46s/it]

training loss: 1.1900646686553955
training loss: 1.1627135276794434
training loss: 1.165939211845398
training loss: 0.878678560256958


training:   9%|▊         | 8537/100000 [7:47:38<85:16:06,  3.36s/it]

training loss: 1.1812357902526855
training loss: 1.2475824356079102
training loss: 1.0165510177612305
training loss: 0.921660840511322


training:   9%|▊         | 8541/100000 [7:47:50<83:25:02,  3.28s/it]

training loss: 0.9662517309188843
training loss: 0.8612725734710693
training loss: 1.0716923475265503
training loss: 1.0626044273376465


training:   9%|▊         | 8545/100000 [7:48:03<82:06:59,  3.23s/it]

training loss: 1.255207896232605
training loss: 1.032768726348877
training loss: 0.9774088263511658
training loss: 1.2052993774414062


training:   9%|▊         | 8549/100000 [7:48:15<81:14:58,  3.20s/it]

training loss: 1.1150702238082886
training loss: 1.1945610046386719
training loss: 0.9086261987686157
training loss: 1.1896593570709229


training:   9%|▊         | 8553/100000 [7:48:28<80:38:52,  3.17s/it]

training loss: 1.1357641220092773
training loss: 1.1433395147323608
training loss: 1.2106568813323975
training loss: 1.1899741888046265


training:   9%|▊         | 8557/100000 [7:48:40<80:08:45,  3.16s/it]

training loss: 1.1353622674942017
training loss: 1.109762191772461
training loss: 1.226855754852295
training loss: 1.0495729446411133


training:   9%|▊         | 8561/100000 [7:48:53<79:51:52,  3.14s/it]

training loss: 1.027184009552002
training loss: 0.9427455067634583
training loss: 1.084850788116455
training loss: 1.102549433708191


training:   9%|▊         | 8565/100000 [7:49:05<79:37:39,  3.14s/it]

training loss: 1.245375394821167
training loss: 1.256788969039917
training loss: 1.0387619733810425
training loss: 1.0728673934936523


training:   9%|▊         | 8569/100000 [7:49:18<79:27:28,  3.13s/it]

training loss: 1.0870188474655151
training loss: 1.0109246969223022
training loss: 1.1644179821014404
training loss: 1.1055679321289062


training:   9%|▊         | 8573/100000 [7:49:30<79:23:22,  3.13s/it]

training loss: 0.9586026072502136
training loss: 0.9530198574066162
training loss: 0.8315265774726868
training loss: 1.2268824577331543


training:   9%|▊         | 8577/100000 [7:49:43<79:21:40,  3.13s/it]

training loss: 1.09139084815979
training loss: 1.1219942569732666
training loss: 1.1473724842071533
training loss: 1.0739483833312988


training:   9%|▊         | 8581/100000 [7:49:55<79:18:15,  3.12s/it]

training loss: 1.1169480085372925
training loss: 1.1698858737945557
training loss: 1.1510018110275269
training loss: 1.1417622566223145


training:   9%|▊         | 8585/100000 [7:50:07<79:16:36,  3.12s/it]

training loss: 1.1479456424713135
training loss: 0.9937743544578552
training loss: 1.1056346893310547
training loss: 1.1844985485076904


training:   9%|▊         | 8589/100000 [7:50:20<79:15:00,  3.12s/it]

training loss: 1.1944650411605835
training loss: 1.1426680088043213
training loss: 0.9621423482894897
training loss: 1.1605253219604492


training:   9%|▊         | 8593/100000 [7:50:32<79:16:09,  3.12s/it]

training loss: 1.2615381479263306
training loss: 1.1442687511444092
training loss: 1.0527265071868896
training loss: 1.0184286832809448


training:   9%|▊         | 8597/100000 [7:50:45<79:14:04,  3.12s/it]

training loss: 1.1051629781723022
training loss: 1.2544076442718506
training loss: 1.2018660306930542
training loss: 1.0791877508163452
training loss: 1.18129301071167


training:   9%|▊         | 8601/100000 [7:50:58<79:47:28,  3.14s/it]

validation loss: 1.1467981338500977
training loss: 1.2586771249771118
training loss: 1.241142749786377
training loss: 1.115837574005127


training:   9%|▊         | 8605/100000 [7:51:10<79:31:49,  3.13s/it]

training loss: 1.1198281049728394
training loss: 1.2389763593673706
training loss: 1.0679619312286377
training loss: 1.0648149251937866


training:   9%|▊         | 8609/100000 [7:51:23<79:24:24,  3.13s/it]

training loss: 1.0278139114379883
training loss: 1.094394326210022
training loss: 1.0899205207824707
training loss: 1.2694010734558105


training:   9%|▊         | 8613/100000 [7:51:35<79:18:54,  3.12s/it]

training loss: 1.0023542642593384
training loss: 1.0299797058105469
training loss: 0.7938514947891235
training loss: 1.1500835418701172


training:   9%|▊         | 8617/100000 [7:51:48<79:15:01,  3.12s/it]

training loss: 1.1540354490280151
training loss: 1.2403082847595215
training loss: 1.1474943161010742
training loss: 1.2276142835617065


training:   9%|▊         | 8621/100000 [7:52:00<79:11:35,  3.12s/it]

training loss: 1.0128179788589478
training loss: 0.8606259822845459
training loss: 1.1845098733901978
training loss: 1.1708917617797852


training:   9%|▊         | 8625/100000 [7:52:12<79:08:09,  3.12s/it]

training loss: 1.237443208694458
training loss: 1.258366584777832
training loss: 1.172020673751831
training loss: 1.1005150079727173


training:   9%|▊         | 8629/100000 [7:52:25<79:06:10,  3.12s/it]

training loss: 1.1468850374221802
training loss: 1.0384104251861572
training loss: 1.0458670854568481
training loss: 1.0143663883209229


training:   9%|▊         | 8633/100000 [7:52:37<79:07:26,  3.12s/it]

training loss: 1.1095609664916992
training loss: 0.9768871068954468
training loss: 1.0506157875061035
training loss: 1.12920343875885


training:   9%|▊         | 8637/100000 [7:52:50<79:08:26,  3.12s/it]

training loss: 1.1811509132385254
training loss: 1.1154301166534424
training loss: 1.127364993095398
training loss: 1.0851895809173584


training:   9%|▊         | 8641/100000 [7:53:02<79:09:44,  3.12s/it]

training loss: 1.1191022396087646
training loss: 1.0570765733718872
training loss: 1.1745277643203735
training loss: 0.8566098213195801


training:   9%|▊         | 8645/100000 [7:53:15<79:08:49,  3.12s/it]

training loss: 1.0658624172210693
training loss: 0.9929189682006836
training loss: 1.0126441717147827
training loss: 1.1432557106018066


training:   9%|▊         | 8649/100000 [7:53:27<79:06:03,  3.12s/it]

training loss: 1.0220766067504883
training loss: 1.0323406457901
training loss: 1.035823941230774
training loss: 1.0111398696899414


training:   9%|▊         | 8653/100000 [7:53:40<79:04:12,  3.12s/it]

training loss: 1.1800422668457031
training loss: 1.264668583869934
training loss: 1.412445068359375
training loss: 1.1205639839172363


training:   9%|▊         | 8657/100000 [7:53:52<79:05:11,  3.12s/it]

training loss: 1.0481122732162476
training loss: 1.077475666999817
training loss: 1.1123216152191162
training loss: 1.1143771409988403


training:   9%|▊         | 8661/100000 [7:54:05<79:05:14,  3.12s/it]

training loss: 1.0212724208831787
training loss: 1.084681749343872
training loss: 1.1449334621429443
training loss: 1.0406756401062012


training:   9%|▊         | 8665/100000 [7:54:17<79:06:23,  3.12s/it]

training loss: 1.178441047668457
training loss: 1.2336441278457642
training loss: 1.2295764684677124
training loss: 1.2343575954437256


training:   9%|▊         | 8669/100000 [7:54:30<79:05:43,  3.12s/it]

training loss: 0.9896685481071472
training loss: 1.0101721286773682
training loss: 1.1864597797393799
training loss: 1.0691803693771362


training:   9%|▊         | 8673/100000 [7:54:42<79:04:16,  3.12s/it]

training loss: 1.031578779220581
training loss: 1.098923921585083
training loss: 1.2046650648117065
training loss: 0.8279186487197876


training:   9%|▊         | 8677/100000 [7:54:55<79:05:09,  3.12s/it]

training loss: 1.1632736921310425
training loss: 0.8700442314147949
training loss: 1.1736013889312744
training loss: 1.130761981010437


training:   9%|▊         | 8681/100000 [7:55:07<79:03:45,  3.12s/it]

training loss: 1.2061785459518433
training loss: 1.102940320968628
training loss: 0.9886969923973083
training loss: 1.0048556327819824


training:   9%|▊         | 8685/100000 [7:55:20<79:05:15,  3.12s/it]

training loss: 1.0242358446121216
training loss: 0.9669643640518188
training loss: 0.7799808979034424
training loss: 1.104133129119873


training:   9%|▊         | 8689/100000 [7:55:32<79:05:43,  3.12s/it]

training loss: 1.021239995956421
training loss: 1.1474583148956299
training loss: 1.112597107887268
training loss: 1.0414533615112305


training:   9%|▊         | 8693/100000 [7:55:44<79:05:30,  3.12s/it]

training loss: 1.121307373046875
training loss: 1.141727328300476
training loss: 1.069432258605957
training loss: 1.2478322982788086


training:   9%|▊         | 8697/100000 [7:55:57<79:05:27,  3.12s/it]

training loss: 0.8869655132293701
training loss: 1.1393167972564697
training loss: 1.178243637084961
training loss: 1.0647448301315308
training loss: 1.2974236011505127


training:   9%|▊         | 8701/100000 [7:56:10<79:40:04,  3.14s/it]

validation loss: 1.0244145393371582
training loss: 0.7288500070571899
training loss: 0.9362885355949402
training loss: 1.1295397281646729


training:   9%|▊         | 8705/100000 [7:56:22<79:26:33,  3.13s/it]

training loss: 1.1150877475738525
training loss: 1.196995496749878
training loss: 1.0413341522216797
training loss: 1.1989456415176392


training:   9%|▊         | 8709/100000 [7:56:35<79:19:54,  3.13s/it]

training loss: 1.0840219259262085
training loss: 0.9837259650230408
training loss: 0.9294489622116089
training loss: 1.0558509826660156


training:   9%|▊         | 8713/100000 [7:56:47<79:14:33,  3.13s/it]

training loss: 1.1654541492462158
training loss: 1.2019686698913574
training loss: 1.0614763498306274
training loss: 1.0648876428604126


training:   9%|▊         | 8717/100000 [7:57:00<79:11:48,  3.12s/it]

training loss: 0.7774467468261719
training loss: 1.1527411937713623
training loss: 1.1574136018753052
training loss: 1.1944875717163086


training:   9%|▊         | 8721/100000 [7:57:12<79:09:58,  3.12s/it]

training loss: 1.0802459716796875
training loss: 1.0467181205749512
training loss: 1.1962487697601318
training loss: 1.040389060974121


training:   9%|▊         | 8725/100000 [7:57:25<79:09:11,  3.12s/it]

training loss: 1.2069588899612427
training loss: 0.9721754789352417
training loss: 1.2365185022354126
training loss: 1.041243314743042


training:   9%|▊         | 8729/100000 [7:57:37<79:07:54,  3.12s/it]

training loss: 0.9718700647354126
training loss: 1.151443600654602
training loss: 1.122182011604309
training loss: 1.1354620456695557


training:   9%|▊         | 8733/100000 [7:57:49<79:02:19,  3.12s/it]

training loss: 1.1342506408691406
training loss: 0.9389034509658813
training loss: 1.2279067039489746
training loss: 1.1316452026367188


training:   9%|▊         | 8737/100000 [7:58:02<79:02:21,  3.12s/it]

training loss: 1.17832612991333
training loss: 0.942186713218689
training loss: 1.0656466484069824
training loss: 1.130207896232605


training:   9%|▊         | 8741/100000 [7:58:14<79:02:09,  3.12s/it]

training loss: 1.0400052070617676
training loss: 1.0021638870239258
training loss: 1.0597494840621948
training loss: 1.234030842781067


training:   9%|▊         | 8745/100000 [7:58:27<79:02:48,  3.12s/it]

training loss: 1.1887186765670776
training loss: 1.0743545293807983
training loss: 1.0134509801864624
training loss: 1.1696735620498657


training:   9%|▊         | 8749/100000 [7:58:39<79:04:33,  3.12s/it]

training loss: 1.1702888011932373
training loss: 1.1185553073883057
training loss: 1.0137252807617188
training loss: 1.0737890005111694


training:   9%|▉         | 8753/100000 [7:58:52<79:03:28,  3.12s/it]

training loss: 1.093432068824768
training loss: 1.221543312072754
training loss: 0.9902788996696472
training loss: 1.2072373628616333


training:   9%|▉         | 8757/100000 [7:59:04<79:02:46,  3.12s/it]

training loss: 0.9720193147659302
training loss: 1.04489004611969
training loss: 1.1654139757156372
training loss: 1.1651475429534912


training:   9%|▉         | 8761/100000 [7:59:17<79:02:27,  3.12s/it]

training loss: 1.148214340209961
training loss: 1.1666758060455322
training loss: 1.156484842300415
training loss: 1.2827796936035156


training:   9%|▉         | 8765/100000 [7:59:29<79:02:27,  3.12s/it]

training loss: 0.8127228617668152
training loss: 1.2138097286224365
training loss: 1.0505657196044922
training loss: 1.1716501712799072


training:   9%|▉         | 8769/100000 [7:59:42<79:01:27,  3.12s/it]

training loss: 1.0781059265136719
training loss: 1.045930027961731
training loss: 0.8941834568977356
training loss: 1.0285344123840332


training:   9%|▉         | 8773/100000 [7:59:54<78:59:00,  3.12s/it]

training loss: 0.9990789294242859
training loss: 1.0330134630203247
training loss: 1.1457774639129639
training loss: 1.1811177730560303


training:   9%|▉         | 8777/100000 [8:00:07<79:00:26,  3.12s/it]

training loss: 0.7220273017883301
training loss: 1.063030481338501
training loss: 1.190479040145874
training loss: 1.1177935600280762


training:   9%|▉         | 8781/100000 [8:00:19<79:02:03,  3.12s/it]

training loss: 1.2207502126693726
training loss: 0.9895135760307312
training loss: 1.192622184753418
training loss: 1.3861099481582642


training:   9%|▉         | 8785/100000 [8:00:32<79:02:22,  3.12s/it]

training loss: 0.9251078963279724
training loss: 1.078473687171936
training loss: 1.090464472770691
training loss: 0.9796304702758789


training:   9%|▉         | 8789/100000 [8:00:44<79:04:49,  3.12s/it]

training loss: 1.1903852224349976
training loss: 1.1455161571502686
training loss: 1.0481066703796387
training loss: 1.0789051055908203


training:   9%|▉         | 8793/100000 [8:00:57<79:04:36,  3.12s/it]

training loss: 1.1421737670898438
training loss: 1.1318786144256592
training loss: 1.1742680072784424
training loss: 1.1312426328659058


training:   9%|▉         | 8797/100000 [8:01:09<79:02:50,  3.12s/it]

training loss: 1.34307062625885
training loss: 1.0680320262908936
training loss: 1.1352652311325073
training loss: 1.1830371618270874


training:   9%|▉         | 8797/100000 [8:01:21<79:02:50,  3.12s/it]

training loss: 1.2499306201934814


training:   9%|▉         | 8801/100000 [8:01:22<79:33:41,  3.14s/it]

validation loss: 1.180208444595337
training loss: 1.0081017017364502
training loss: 1.1905677318572998
training loss: 1.0007338523864746


training:   9%|▉         | 8805/100000 [8:01:34<79:19:29,  3.13s/it]

training loss: 1.1543779373168945
training loss: 1.0660061836242676
training loss: 1.175521731376648
training loss: 0.9554698467254639


training:   9%|▉         | 8809/100000 [8:01:47<79:13:25,  3.13s/it]

training loss: 0.9957107901573181
training loss: 1.01864755153656
training loss: 1.1733765602111816
training loss: 1.1588294506072998


training:   9%|▉         | 8813/100000 [8:01:59<79:08:21,  3.12s/it]

training loss: 1.1795265674591064
training loss: 1.2716240882873535
training loss: 1.2513457536697388
training loss: 1.08931565284729


training:   9%|▉         | 8817/100000 [8:02:12<79:05:27,  3.12s/it]

training loss: 1.188086986541748
training loss: 0.9490901827812195
training loss: 1.2504122257232666
training loss: 1.0630210638046265


training:   9%|▉         | 8821/100000 [8:02:24<79:03:27,  3.12s/it]

training loss: 1.0917080640792847
training loss: 1.2018423080444336
training loss: 1.1663413047790527
training loss: 1.1291086673736572


training:   9%|▉         | 8825/100000 [8:02:37<79:01:42,  3.12s/it]

training loss: 1.006998062133789
training loss: 1.0011708736419678
training loss: 1.174470067024231
training loss: 1.2322190999984741


training:   9%|▉         | 8829/100000 [8:02:49<79:00:38,  3.12s/it]

training loss: 1.1694822311401367
training loss: 1.1388154029846191
training loss: 1.196624755859375
training loss: 1.1998710632324219


training:   9%|▉         | 8833/100000 [8:03:02<79:01:43,  3.12s/it]

training loss: 0.8371565341949463
training loss: 1.1302863359451294
training loss: 1.165270447731018
training loss: 1.033230185508728


training:   9%|▉         | 8837/100000 [8:03:14<79:01:07,  3.12s/it]

training loss: 1.0061711072921753
training loss: 1.1554527282714844
training loss: 1.0467615127563477
training loss: 1.2648205757141113


training:   9%|▉         | 8841/100000 [8:03:27<78:59:48,  3.12s/it]

training loss: 0.9207149744033813
training loss: 1.1634045839309692
training loss: 1.1028759479522705
training loss: 1.1568241119384766


training:   9%|▉         | 8845/100000 [8:03:39<78:58:32,  3.12s/it]

training loss: 0.9184200167655945
training loss: 0.9807296395301819
training loss: 0.9552807807922363
training loss: 1.035855770111084


training:   9%|▉         | 8849/100000 [8:03:51<78:56:10,  3.12s/it]

training loss: 1.2262340784072876
training loss: 1.0985968112945557
training loss: 1.0611379146575928
training loss: 1.2018283605575562


training:   9%|▉         | 8853/100000 [8:04:04<78:56:42,  3.12s/it]

training loss: 1.0404088497161865
training loss: 1.1124212741851807
training loss: 0.9219591617584229
training loss: 1.228931188583374


training:   9%|▉         | 8857/100000 [8:04:16<78:58:06,  3.12s/it]

training loss: 1.1921226978302002
training loss: 1.2787725925445557
training loss: 0.9257447719573975
training loss: 1.136469841003418


training:   9%|▉         | 8861/100000 [8:04:29<78:57:30,  3.12s/it]

training loss: 1.1122676134109497
training loss: 1.0909268856048584
training loss: 0.8860143423080444
training loss: 1.0314165353775024


training:   9%|▉         | 8865/100000 [8:04:41<78:57:18,  3.12s/it]

training loss: 1.1164162158966064
training loss: 1.1548364162445068
training loss: 1.0989375114440918
training loss: 1.1222851276397705


training:   9%|▉         | 8869/100000 [8:04:54<78:56:45,  3.12s/it]

training loss: 1.2662858963012695
training loss: 1.1487473249435425
training loss: 1.112669587135315
training loss: 1.2361472845077515


training:   9%|▉         | 8873/100000 [8:05:06<78:56:31,  3.12s/it]

training loss: 1.03387451171875
training loss: 1.1509780883789062
training loss: 1.1594642400741577
training loss: 1.1081857681274414


training:   9%|▉         | 8877/100000 [8:05:19<78:57:41,  3.12s/it]

training loss: 1.124734878540039
training loss: 1.105506181716919
training loss: 1.0020114183425903
training loss: 0.9755885601043701


training:   9%|▉         | 8881/100000 [8:05:31<78:57:55,  3.12s/it]

training loss: 0.9721461534500122
training loss: 1.1941572427749634
training loss: 0.9775921702384949
training loss: 1.2296767234802246


training:   9%|▉         | 8885/100000 [8:05:44<78:57:29,  3.12s/it]

training loss: 1.1378777027130127
training loss: 1.1561799049377441
training loss: 1.1102633476257324
training loss: 1.0235130786895752


training:   9%|▉         | 8889/100000 [8:05:56<78:57:05,  3.12s/it]

training loss: 1.1069644689559937
training loss: 1.1282143592834473
training loss: 0.9871053099632263
training loss: 1.1822422742843628


training:   9%|▉         | 8893/100000 [8:06:09<78:56:43,  3.12s/it]

training loss: 0.971315860748291
training loss: 1.006736159324646
training loss: 1.0824487209320068
training loss: 1.1831799745559692


training:   9%|▉         | 8897/100000 [8:06:21<78:56:57,  3.12s/it]

training loss: 1.0573639869689941
training loss: 1.1105798482894897
training loss: 0.9923553466796875
training loss: 1.330139398574829


training:   9%|▉         | 8897/100000 [8:06:31<78:56:57,  3.12s/it]

training loss: 1.153115153312683


training:   9%|▉         | 8901/100000 [8:06:34<79:32:01,  3.14s/it]

validation loss: 1.2606370449066162
training loss: 1.1643365621566772
training loss: 0.9238879084587097
training loss: 0.994692862033844


training:   9%|▉         | 8905/100000 [8:06:46<79:18:08,  3.13s/it]

training loss: 1.16761314868927
training loss: 0.9027330279350281
training loss: 0.8920855522155762
training loss: 1.0605902671813965


training:   9%|▉         | 8909/100000 [8:06:59<79:11:10,  3.13s/it]

training loss: 1.2451679706573486
training loss: 1.1555743217468262
training loss: 1.18450927734375
training loss: 1.1079363822937012


training:   9%|▉         | 8913/100000 [8:07:11<79:05:53,  3.13s/it]

training loss: 1.1096808910369873
training loss: 0.9062179327011108
training loss: 1.0988119840621948
training loss: 0.9240191578865051


training:   9%|▉         | 8917/100000 [8:07:24<79:02:30,  3.12s/it]

training loss: 1.185797929763794
training loss: 0.9093600511550903
training loss: 0.9540907144546509
training loss: 1.0343841314315796


training:   9%|▉         | 8921/100000 [8:07:36<79:01:28,  3.12s/it]

training loss: 0.946686863899231
training loss: 1.0717757940292358
training loss: 1.1010304689407349
training loss: 1.041532278060913


training:   9%|▉         | 8925/100000 [8:07:49<78:59:11,  3.12s/it]

training loss: 1.1758880615234375
training loss: 1.0971879959106445
training loss: 1.1868183612823486
training loss: 1.2125321626663208


training:   9%|▉         | 8929/100000 [8:08:01<78:57:01,  3.12s/it]

training loss: 1.069561243057251
training loss: 1.0400570631027222
training loss: 1.1618883609771729
training loss: 1.1814758777618408


training:   9%|▉         | 8933/100000 [8:08:14<78:56:19,  3.12s/it]

training loss: 1.0350582599639893
training loss: 1.2635294198989868
training loss: 1.0935471057891846
training loss: 1.191163420677185


training:   9%|▉         | 8937/100000 [8:08:26<78:55:00,  3.12s/it]

training loss: 1.088285207748413
training loss: 1.1630417108535767
training loss: 1.1212220191955566
training loss: 1.1051511764526367


training:   9%|▉         | 8941/100000 [8:08:39<78:52:45,  3.12s/it]

training loss: 0.9854289293289185
training loss: 1.1063079833984375
training loss: 1.1366177797317505
training loss: 1.141648292541504


training:   9%|▉         | 8945/100000 [8:08:51<78:51:29,  3.12s/it]

training loss: 0.9302878379821777
training loss: 1.0108630657196045
training loss: 0.9675097465515137
training loss: 0.9766383171081543


training:   9%|▉         | 8949/100000 [8:09:04<78:51:43,  3.12s/it]

training loss: 1.1888176202774048
training loss: 0.9760666489601135
training loss: 1.0990022420883179
training loss: 0.9171611070632935


training:   9%|▉         | 8953/100000 [8:09:16<78:51:50,  3.12s/it]

training loss: 1.1341273784637451
training loss: 1.0921375751495361
training loss: 1.1135175228118896
training loss: 1.1378424167633057


training:   9%|▉         | 8957/100000 [8:09:29<78:51:39,  3.12s/it]

training loss: 1.0411012172698975
training loss: 1.1096992492675781
training loss: 1.134063482284546
training loss: 1.1941744089126587


training:   9%|▉         | 8961/100000 [8:09:41<78:52:05,  3.12s/it]

training loss: 1.1796023845672607
training loss: 1.293591022491455
training loss: 0.9311914443969727
training loss: 1.0209424495697021


training:   9%|▉         | 8965/100000 [8:09:54<78:51:49,  3.12s/it]

training loss: 1.1065919399261475
training loss: 1.0359925031661987
training loss: 1.0930519104003906
training loss: 1.0439512729644775


training:   9%|▉         | 8969/100000 [8:10:06<78:51:28,  3.12s/it]

training loss: 0.9737969636917114
training loss: 1.0590343475341797
training loss: 1.145627498626709
training loss: 1.103181004524231


training:   9%|▉         | 8973/100000 [8:10:19<78:51:13,  3.12s/it]

training loss: 0.9648101329803467
training loss: 1.279543161392212
training loss: 1.1292040348052979
training loss: 1.0035070180892944


training:   9%|▉         | 8977/100000 [8:10:31<78:51:00,  3.12s/it]

training loss: 1.2907572984695435
training loss: 1.144171953201294
training loss: 1.3065651655197144
training loss: 1.2378402948379517


training:   9%|▉         | 8981/100000 [8:10:43<78:50:49,  3.12s/it]

training loss: 1.1907860040664673
training loss: 1.0369632244110107
training loss: 1.1903932094573975
training loss: 1.03152596950531


training:   9%|▉         | 8985/100000 [8:10:56<78:52:18,  3.12s/it]

training loss: 1.1037054061889648
training loss: 1.0973657369613647
training loss: 0.9861453175544739
training loss: 1.1849209070205688


training:   9%|▉         | 8989/100000 [8:11:08<78:54:54,  3.12s/it]

training loss: 1.1433756351470947
training loss: 1.2313600778579712
training loss: 1.2361547946929932
training loss: 1.0889941453933716


training:   9%|▉         | 8993/100000 [8:11:21<78:53:08,  3.12s/it]

training loss: 1.040090799331665
training loss: 1.1152921915054321
training loss: 0.9852726459503174
training loss: 1.179948329925537


training:   9%|▉         | 8997/100000 [8:11:33<78:52:30,  3.12s/it]

training loss: 1.0696735382080078
training loss: 1.2042663097381592
training loss: 0.9907308220863342
training loss: 1.1704001426696777
training loss: 1.0269678831100464
validation loss: 1.056650161743164
%s 

 %s ('on>   </page>   <page>     <title>Home run</title>     <id>14148</id>     <revision>       <id>42137692</id>       <timestamp>2006-03-04T01:49:02Z</timestamp>       <contributor>         <ip>12.99.127.169</ip>       </contributor>       <comment>/* Types of home runs */</comment>       <text xml:space="preserve">{{otheruses}} In [[baseball]], a \'\'\'home run\'\'\' is a [[hit (baseball)|base hit]] in which the [[batting (baseball)|batter]] is able to circle all the bases, ending at home plate and scoring a [[run (baseball)|run]] himself (along with a run scored by each [[baserunning|runner]] who was already on base), with no [[error (baseball)|errors]] by the defensive team on the play which result in the batter advancing for extra bases.  Home runs are among the most popul

training:   9%|▉         | 8997/100000 [8:11:51<78:52:30,  3.12s/it]

l Cult]] on [[1 Cybernel]]. Fortel, a man of [[series]] and a large run-by [[adjour]] (a civil score at the much of a repert phase (a fastling other to operate-attended team of a single gate.)  {{wikiquote}} History held local ''&quot;Home&quot;'' inpublic problems from &quot;[[inability]]&quot;) was just weekly for &quot;In company&quot;. He was for a type of [[morphology]], although a security in the toundations of [[New Zealand]] (''Sour NB'', ''Horsezing''), run-series.  {{start baseball|series are desired outs and a future of supporting singer due to team, even &quot;if us pressing&quot; strategy. John in [[most part of the UK information|Indiving civilization]], [[molder|wrass]], Goth and [[king of etching]], and was a [[flew mut]], and the fundamental gender were mixing at [[civilisators]] to a series. Often the sinberg's ninlands of a circular denote of [[socialization and significant process]], running the single were form at valued on the sbell in civilization of [[Metalog (c

training:   9%|▉         | 9001/100000 [8:13:05<228:48:17,  9.05s/it]

Model saved at iteration 9000
training loss: 0.9640860557556152
training loss: 1.001212239265442
training loss: 1.0607476234436035


training:   9%|▉         | 9005/100000 [8:13:17<183:44:21,  7.27s/it]

training loss: 1.0357359647750854
training loss: 1.0997695922851562
training loss: 1.1909148693084717
training loss: 1.1208117008209229


training:   9%|▉         | 9009/100000 [8:13:30<152:15:06,  6.02s/it]

training loss: 1.079506754875183
training loss: 1.1205042600631714
training loss: 1.0937480926513672
training loss: 0.9556611776351929


training:   9%|▉         | 9013/100000 [8:13:42<130:13:13,  5.15s/it]

training loss: 1.0746984481811523
training loss: 1.1144390106201172
training loss: 1.1133723258972168
training loss: 1.2095324993133545


training:   9%|▉         | 9017/100000 [8:13:55<114:47:34,  4.54s/it]

training loss: 1.1200803518295288
training loss: 1.0653767585754395
training loss: 1.0270804166793823
training loss: 0.9551900029182434


training:   9%|▉         | 9021/100000 [8:14:07<103:59:42,  4.12s/it]

training loss: 1.1448172330856323
training loss: 1.0585613250732422
training loss: 1.1217381954193115
training loss: 1.202580451965332


training:   9%|▉         | 9025/100000 [8:14:20<96:24:59,  3.82s/it] 

training loss: 1.061849594116211
training loss: 1.2000739574432373
training loss: 1.1419686079025269
training loss: 1.068261742591858


training:   9%|▉         | 9029/100000 [8:14:32<91:08:32,  3.61s/it]

training loss: 1.1562957763671875
training loss: 1.2505838871002197
training loss: 1.1549822092056274
training loss: 1.0625715255737305


training:   9%|▉         | 9033/100000 [8:14:45<87:26:14,  3.46s/it]

training loss: 1.0547707080841064
training loss: 1.0018818378448486
training loss: 1.0562634468078613
training loss: 1.0831986665725708


training:   9%|▉         | 9037/100000 [8:14:57<84:50:37,  3.36s/it]

training loss: 1.1599371433258057
training loss: 1.0837490558624268
training loss: 0.9542863368988037
training loss: 1.0944103002548218


training:   9%|▉         | 9041/100000 [8:15:10<83:02:11,  3.29s/it]

training loss: 1.2436860799789429
training loss: 1.1172161102294922
training loss: 1.063348412513733
training loss: 0.9527478218078613


training:   9%|▉         | 9045/100000 [8:15:22<81:44:19,  3.24s/it]

training loss: 1.229068398475647
training loss: 1.0430207252502441
training loss: 1.1637123823165894
training loss: 1.0539833307266235


training:   9%|▉         | 9049/100000 [8:15:35<80:51:09,  3.20s/it]

training loss: 1.1343507766723633
training loss: 1.0042119026184082
training loss: 1.1076362133026123
training loss: 1.1394140720367432


training:   9%|▉         | 9053/100000 [8:15:47<80:13:53,  3.18s/it]

training loss: 0.9822432398796082
training loss: 0.9128621220588684
training loss: 1.1468441486358643
training loss: 1.1700317859649658


training:   9%|▉         | 9057/100000 [8:16:00<79:47:56,  3.16s/it]

training loss: 1.1944952011108398
training loss: 1.0766408443450928
training loss: 1.3351731300354004
training loss: 1.2560970783233643


training:   9%|▉         | 9061/100000 [8:16:12<79:28:54,  3.15s/it]

training loss: 1.1332406997680664
training loss: 1.171140193939209
training loss: 1.012205958366394
training loss: 1.071211576461792


training:   9%|▉         | 9065/100000 [8:16:25<79:11:57,  3.14s/it]

training loss: 1.1416109800338745
training loss: 0.8155144453048706
training loss: 0.9404491186141968
training loss: 1.2531688213348389


training:   9%|▉         | 9069/100000 [8:16:37<78:58:57,  3.13s/it]

training loss: 1.2085323333740234
training loss: 1.1404849290847778
training loss: 1.2266992330551147
training loss: 1.2063093185424805


training:   9%|▉         | 9073/100000 [8:16:49<78:52:49,  3.12s/it]

training loss: 1.0682657957077026
training loss: 1.1302788257598877
training loss: 0.9679023027420044
training loss: 1.0550062656402588


training:   9%|▉         | 9077/100000 [8:17:02<78:49:56,  3.12s/it]

training loss: 1.1250560283660889
training loss: 1.0384055376052856
training loss: 0.9496321678161621
training loss: 0.9770539402961731


training:   9%|▉         | 9081/100000 [8:17:14<78:49:12,  3.12s/it]

training loss: 1.090850830078125
training loss: 1.128563642501831
training loss: 1.03691565990448
training loss: 1.223218560218811


training:   9%|▉         | 9085/100000 [8:17:27<78:48:39,  3.12s/it]

training loss: 1.0331590175628662
training loss: 1.2167942523956299
training loss: 1.1902494430541992
training loss: 1.1717250347137451


training:   9%|▉         | 9089/100000 [8:17:39<78:48:48,  3.12s/it]

training loss: 1.0420527458190918
training loss: 0.9079058170318604
training loss: 1.1602636575698853
training loss: 1.3499853610992432


training:   9%|▉         | 9093/100000 [8:17:52<78:47:34,  3.12s/it]

training loss: 1.2505958080291748
training loss: 0.9067798256874084
training loss: 1.1701500415802002
training loss: 1.1269395351409912


training:   9%|▉         | 9097/100000 [8:18:04<78:46:54,  3.12s/it]

training loss: 0.8990086317062378
training loss: 1.0194114446640015
training loss: 1.0181567668914795
training loss: 1.1463539600372314
training loss: 1.1121145486831665


training:   9%|▉         | 9101/100000 [8:18:17<79:20:33,  3.14s/it]

validation loss: 1.2025866508483887
training loss: 1.09549880027771
training loss: 1.2279831171035767
training loss: 1.0887173414230347


training:   9%|▉         | 9105/100000 [8:18:29<79:07:10,  3.13s/it]

training loss: 1.061815857887268
training loss: 1.097339391708374
training loss: 1.0312992334365845
training loss: 1.2336302995681763


training:   9%|▉         | 9109/100000 [8:18:42<79:00:07,  3.13s/it]

training loss: 1.573106288909912
training loss: 0.983311653137207
training loss: 1.1827186346054077
training loss: 1.0651764869689941


training:   9%|▉         | 9113/100000 [8:18:54<78:59:34,  3.13s/it]

training loss: 1.094466209411621
training loss: 0.9952194690704346
training loss: 1.0814640522003174
training loss: 1.1015074253082275


training:   9%|▉         | 9117/100000 [8:19:07<78:53:42,  3.13s/it]

training loss: 1.1678135395050049
training loss: 1.1536777019500732
training loss: 1.0997886657714844
training loss: 1.1432242393493652


training:   9%|▉         | 9121/100000 [8:19:19<78:50:22,  3.12s/it]

training loss: 1.0514386892318726
training loss: 0.9805309176445007
training loss: 1.1301283836364746
training loss: 0.7176185250282288


training:   9%|▉         | 9125/100000 [8:19:32<78:48:18,  3.12s/it]

training loss: 1.1614474058151245
training loss: 1.136622428894043
training loss: 1.1067111492156982
training loss: 1.138535976409912


training:   9%|▉         | 9129/100000 [8:19:44<78:46:34,  3.12s/it]

training loss: 1.330716848373413
training loss: 1.0985873937606812
training loss: 1.031501054763794
training loss: 1.115655779838562


training:   9%|▉         | 9133/100000 [8:19:57<78:49:15,  3.12s/it]

training loss: 1.1467628479003906
training loss: 1.0978666543960571
training loss: 1.112776517868042
training loss: 1.0252184867858887


training:   9%|▉         | 9137/100000 [8:20:09<78:47:11,  3.12s/it]

training loss: 1.1079788208007812
training loss: 1.1118358373641968
training loss: 1.0532804727554321
training loss: 1.4753175973892212


training:   9%|▉         | 9141/100000 [8:20:22<78:45:20,  3.12s/it]

training loss: 0.9533501267433167
training loss: 1.1622800827026367
training loss: 1.1189486980438232
training loss: 0.9725222587585449


training:   9%|▉         | 9145/100000 [8:20:34<78:44:39,  3.12s/it]

training loss: 0.9306691288948059
training loss: 1.2111561298370361
training loss: 1.0289427042007446
training loss: 0.920911431312561


training:   9%|▉         | 9149/100000 [8:20:47<78:43:21,  3.12s/it]

training loss: 1.041683316230774
training loss: 1.052005410194397
training loss: 0.9412119388580322
training loss: 1.239842176437378


training:   9%|▉         | 9153/100000 [8:20:59<78:41:25,  3.12s/it]

training loss: 1.1029939651489258
training loss: 1.0086331367492676
training loss: 1.2528152465820312
training loss: 1.1538431644439697


training:   9%|▉         | 9157/100000 [8:21:12<78:42:37,  3.12s/it]

training loss: 1.229184627532959
training loss: 1.1692782640457153
training loss: 1.1001125574111938
training loss: 0.9603524208068848


training:   9%|▉         | 9161/100000 [8:21:24<78:41:37,  3.12s/it]

training loss: 0.8805186152458191
training loss: 1.1264328956604004
training loss: 1.1511435508728027
training loss: 1.1346125602722168


training:   9%|▉         | 9165/100000 [8:21:37<78:41:23,  3.12s/it]

training loss: 1.1364190578460693
training loss: 1.0408965349197388
training loss: 1.0938174724578857
training loss: 1.132153034210205


training:   9%|▉         | 9169/100000 [8:21:49<78:40:53,  3.12s/it]

training loss: 1.1659750938415527
training loss: 0.9942392110824585
training loss: 1.1649529933929443
training loss: 1.0819730758666992


training:   9%|▉         | 9173/100000 [8:22:02<78:38:53,  3.12s/it]

training loss: 1.1451388597488403
training loss: 1.3249785900115967
training loss: 1.0128130912780762
training loss: 0.927480936050415


training:   9%|▉         | 9177/100000 [8:22:14<78:38:28,  3.12s/it]

training loss: 1.1387039422988892
training loss: 1.1614618301391602
training loss: 1.087680459022522
training loss: 1.0722508430480957


training:   9%|▉         | 9181/100000 [8:22:27<78:39:30,  3.12s/it]

training loss: 1.164665937423706
training loss: 1.0808169841766357
training loss: 1.0368504524230957
training loss: 1.0093306303024292


training:   9%|▉         | 9185/100000 [8:22:39<78:39:26,  3.12s/it]

training loss: 1.204939842224121
training loss: 1.0587024688720703
training loss: 1.0249826908111572
training loss: 1.0396085977554321


training:   9%|▉         | 9189/100000 [8:22:51<78:39:44,  3.12s/it]

training loss: 0.886198878288269
training loss: 1.1322216987609863
training loss: 1.1635853052139282
training loss: 1.0445271730422974


training:   9%|▉         | 9193/100000 [8:23:04<78:37:35,  3.12s/it]

training loss: 1.172142505645752
training loss: 1.1987998485565186
training loss: 0.8277342915534973
training loss: 1.0643553733825684


training:   9%|▉         | 9197/100000 [8:23:16<78:38:48,  3.12s/it]

training loss: 0.7887584567070007
training loss: 1.1839852333068848
training loss: 1.057403326034546
training loss: 1.153010368347168
training loss: 1.0493807792663574


training:   9%|▉         | 9201/100000 [8:23:29<79:13:51,  3.14s/it]

validation loss: 1.1320407390594482
training loss: 1.1667966842651367
training loss: 1.1209275722503662
training loss: 1.0083378553390503


training:   9%|▉         | 9205/100000 [8:23:42<79:00:48,  3.13s/it]

training loss: 0.9836545586585999
training loss: 1.2294926643371582
training loss: 1.0210542678833008
training loss: 1.21238374710083


training:   9%|▉         | 9209/100000 [8:23:54<78:54:15,  3.13s/it]

training loss: 1.135241985321045
training loss: 1.1684707403182983
training loss: 1.1686797142028809
training loss: 1.1127169132232666


training:   9%|▉         | 9213/100000 [8:24:07<78:49:50,  3.13s/it]

training loss: 1.1318190097808838
training loss: 1.012223720550537
training loss: 1.2045360803604126
training loss: 0.9871559143066406


training:   9%|▉         | 9217/100000 [8:24:19<78:44:30,  3.12s/it]

training loss: 1.1152675151824951
training loss: 1.1415424346923828
training loss: 1.1772608757019043
training loss: 1.293007254600525


training:   9%|▉         | 9221/100000 [8:24:32<78:43:56,  3.12s/it]

training loss: 1.078148603439331
training loss: 1.0958425998687744
training loss: 1.0963456630706787
training loss: 1.0841480493545532


training:   9%|▉         | 9225/100000 [8:24:44<78:41:49,  3.12s/it]

training loss: 1.1178123950958252
training loss: 1.206392765045166
training loss: 1.0448660850524902
training loss: 0.9346437454223633


training:   9%|▉         | 9229/100000 [8:24:57<78:40:35,  3.12s/it]

training loss: 1.0807363986968994
training loss: 1.119678258895874
training loss: 1.2289533615112305
training loss: 1.1369574069976807


training:   9%|▉         | 9233/100000 [8:25:09<78:36:55,  3.12s/it]

training loss: 1.0256670713424683
training loss: 1.1820993423461914
training loss: 1.0958669185638428
training loss: 1.2259891033172607


training:   9%|▉         | 9237/100000 [8:25:21<78:36:59,  3.12s/it]

training loss: 1.1274961233139038
training loss: 1.147857427597046
training loss: 1.2237852811813354
training loss: 1.10469388961792


training:   9%|▉         | 9241/100000 [8:25:34<78:38:26,  3.12s/it]

training loss: 1.1089637279510498
training loss: 1.0941554307937622
training loss: 1.1650843620300293
training loss: 1.0676965713500977


training:   9%|▉         | 9245/100000 [8:25:46<78:34:20,  3.12s/it]

training loss: 1.0101063251495361
training loss: 0.9421212077140808
training loss: 1.0783605575561523
training loss: 1.181636929512024


training:   9%|▉         | 9249/100000 [8:25:59<78:32:09,  3.12s/it]

training loss: 0.9970324635505676
training loss: 0.9950017929077148
training loss: 1.2150071859359741
training loss: 1.2218255996704102


training:   9%|▉         | 9253/100000 [8:26:11<78:33:47,  3.12s/it]

training loss: 1.0580499172210693
training loss: 1.084975004196167
training loss: 1.2099828720092773
training loss: 1.1212413311004639


training:   9%|▉         | 9257/100000 [8:26:24<78:32:20,  3.12s/it]

training loss: 0.9810764193534851
training loss: 1.0427497625350952
training loss: 1.2514557838439941
training loss: 1.0590263605117798


training:   9%|▉         | 9261/100000 [8:26:36<78:33:21,  3.12s/it]

training loss: 1.0536983013153076
training loss: 0.9498640298843384
training loss: 0.9878717660903931
training loss: 1.0160109996795654


training:   9%|▉         | 9265/100000 [8:26:49<78:32:18,  3.12s/it]

training loss: 1.166242241859436
training loss: 1.0796173810958862
training loss: 1.2079331874847412
training loss: 1.1293513774871826


training:   9%|▉         | 9269/100000 [8:27:01<78:33:44,  3.12s/it]

training loss: 1.1601364612579346
training loss: 0.9477533102035522
training loss: 1.0563645362854004
training loss: 0.995229959487915


training:   9%|▉         | 9273/100000 [8:27:14<78:34:40,  3.12s/it]

training loss: 1.1792371273040771
training loss: 1.1233556270599365
training loss: 1.1413168907165527
training loss: 0.9291828870773315


training:   9%|▉         | 9277/100000 [8:27:26<78:34:30,  3.12s/it]

training loss: 0.8769919872283936
training loss: 1.044207215309143
training loss: 1.1802260875701904
training loss: 1.2171916961669922


training:   9%|▉         | 9281/100000 [8:27:39<78:34:37,  3.12s/it]

training loss: 1.0807397365570068
training loss: 1.1084022521972656
training loss: 1.1412968635559082
training loss: 1.0119473934173584


training:   9%|▉         | 9285/100000 [8:27:51<78:34:00,  3.12s/it]

training loss: 1.0761487483978271


In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [3]:
# Download the results
zip_filename = 'model_checkpoint.pt'
if os.path.exists(zip_filename):
  os.remove(zip_filename)
os.system(f"zip -r -j {zip_filename} results/*")
files.download(zip_filename)

/bin/bash: nvidia-smi: command not found
