In [1]:
import torch
import random
import time
import numpy as np
import sentencepiece as spm
from transformers import ReformerConfig, ReformerModelWithLMHead, ReformerTokenizer, EncoderDecoderConfig, EncoderDecoderModel
from torch.utils.data import DataLoader, Dataset

NUM_BATCHES = None
BATCH_SIZE = 20
LEARNING_RATE = 0.001 #1e-4 #1e-4
VALIDATE_EVERY  = 10
SEQ_LEN = 4608

In [2]:
# spm.SentencePieceTrainer.Train("--input=./data/tokenizer_training/AAresiduals.txt \
#                                 --vocab_size=28 \
#                                 --model_prefix=sequence_tokenizer \
#                                 --model_type=char \
#                                 --character_coverage=1.0")
tokenizer = ReformerTokenizer(vocab_file="sequence_tokenizer.model", do_lower_case=False, model_max_length=SEQ_LEN)
tokenizer.max_model_input_sizes = SEQ_LEN

In [3]:

# def split_file(file,out1,out2,percentage=0.75,isShuffle=True,seed=42):
#     random.seed(seed)
#     with open(file, 'r',encoding="utf-8") as fin, open(out1, 'w') as foutBig, open(out2, 'w') as foutSmall:
#         nLines = sum(1 for line in fin)
#         fin.seek(0)

#         nTrain = int(nLines*percentage) 
#         nValid = nLines - nTrain

#         i = 0
#         for line in fin:
#             r = random.random() if isShuffle else 0 # so that always evaluated to true when not isShuffle
#             if (i < nTrain and r < percentage) or (nLines - i > nValid):
#                 foutBig.write(line)
#                 i += 1
#             else:
#                 foutSmall.write(line)
                
# split_file("data/yeast/yeast.txt", 
#            "data/yeast/yeast_train.txt",
#            "data/yeast/yeast_val.txt",
#            percentage=0.9)

In [4]:
class SequenceDataset(Dataset):
    def __init__(self, input_ids, attention_mask, labels, tokenizer, _len):
        super().__init__()
        self.input_ids = input_ids
        self.attention_mask = attention_mask
        self.labels = labels
        self.tokenizer = tokenizer
        self._len = _len

    @classmethod
    def prepare_from_file(cls, file_path, tokenizer):
        
        with open(file_path) as file:
            
            X = [l.strip() for l in file]
            X = [tokenizer.encode(sequence)[1:tokenizer.max_len+1] for sequence in X]
            
            temp = [tokenizer.prepare_for_model(sequence) for sequence in X]
            
            input_ids = [np.pad(x["input_ids"], 
                                (0, tokenizer.max_len - len(x["input_ids"])), 
                                'constant', constant_values=0) for x in temp]

            attention_mask = [np.pad(x["attention_mask"], 
                                     (0, tokenizer.max_len - len(x["attention_mask"])),
                                     'constant', constant_values=0) for x in temp]
            
            labels = [np.pad(x["input_ids"], 
                             (0, tokenizer.max_len - len(x["input_ids"])), 
                             'constant', constant_values=-100) for x in temp]

            input_ids = [torch.tensor(x, dtype=torch.int64) for x in input_ids]
            attention_mask = [torch.tensor(x, dtype=torch.int64) for x in attention_mask]
            labels = [torch.tensor(x, dtype=torch.int64) for x in labels]
            
            input_ids = torch.stack([input_ids[i] for i in range(len(input_ids))]).squeeze()
            attention_mask = torch.stack([attention_mask[i] for i in range(len(attention_mask))]).squeeze()
            labels = torch.stack([labels[i] for i in range(len(labels))]).squeeze()
            
            del(temp); del(X);
        return cls(input_ids, attention_mask, labels, tokenizer, len(input_ids))

    def __getitem__(self, index):
        return {"input_ids": self.input_ids[index, ].cuda(), 
                "attention_mask": self.attention_mask[index, ].cuda(),
                "labels": self.labels[index, ].cuda()}

    def __len__(self):
        return self._len

In [5]:
def cycle(loader):
    while True:
        for data in loader:
            yield data

train_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_train.txt", tokenizer)
val_dataset = SequenceDataset.prepare_from_file("data/yeast/yeast_val.txt", tokenizer)
train_loader = cycle(DataLoader(train_dataset, batch_size=BATCH_SIZE))
val_loader = cycle(DataLoader(val_dataset, batch_size=BATCH_SIZE))


Token indices sequence length is longer than the specified maximum sequence length for this model (4911 > 4608). Running this sequence through the model will result in indexing errors


In [6]:
# next(train_loader)['input_ids'].shape

In [7]:
# next(train_loader)

In [8]:
# configuration = ReformerConfig.from_pretrained("google/reformer-crime-and-punishment")
# configuration.axial_pos_shape = (64, 72)
# configuration.max_position_embeddings=SEQ_LEN
# configuration.vocab_size=tokenizer.vocab_size
# configuration.save_pretrained('model/config/')
configuration = ReformerConfig.from_pretrained('model/config/')
model = ReformerModelWithLMHead(configuration)

model.cuda()

ReformerModelWithLMHead(
  (reformer): ReformerModel(
    (embeddings): ReformerEmbeddings(
      (word_embeddings): Embedding(28, 256)
      (position_embeddings): AxialPositionEmbeddings(
        (weights): ParameterList(
            (0): Parameter containing: [torch.cuda.FloatTensor of size 64x1x64 (GPU 0)]
            (1): Parameter containing: [torch.cuda.FloatTensor of size 1x72x192 (GPU 0)]
        )
      )
    )
    (encoder): ReformerEncoder(
      (layers): ModuleList(
        (0): ReformerLayer(
          (attention): ReformerAttention(
            (layer_norm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
            (self_attention): LocalSelfAttention(
              (query): Linear(in_features=256, out_features=128, bias=False)
              (key): Linear(in_features=256, out_features=128, bias=False)
              (value): Linear(in_features=256, out_features=128, bias=False)
            )
            (output): ReformerSelfOutput(
              (dense): Linear(

In [9]:
NUM_BATCHES = len(train_dataset)//BATCH_SIZE
NUM_BATCHES
from transformers import AdamW
optimizer = AdamW(params=model.parameters(), lr=LEARNING_RATE)

In [33]:
from collections import OrderedDict 
import json
import pandas as pd

all_training_loss = OrderedDict()
all_val_loss = OrderedDict()
argmax_to_save=[]

for x in range(40):
    print(f"epoch {x}")
    start = time.time()

    training_loss = OrderedDict()
    val_loss = OrderedDict()
    
    
    for i in range(NUM_BATCHES):
        print("step {}".format(i))
        model.train()

        tmp = next(train_loader)
        input_ids = tmp['input_ids']
        attention_mask = tmp['attention_mask']
        labels = tmp['labels']

        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss, prediction_scores = outputs[:2]
        loss.backward()
        
        training_loss[f"Epoch {x} Step {i}"] = loss.item()
        all_training_loss[f"Epoch {x} Step {i}"] = loss.item()
        print(f'training loss: {loss.item()}')

        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)

        optimizer.step()
        optimizer.zero_grad()

        if i % VALIDATE_EVERY == 0:
            model.eval()
            with torch.no_grad():
                tmp = next(val_loader)
                input_ids = tmp['input_ids']
                attention_mask = tmp['attention_mask']
                labels = tmp['labels']
                
                outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
                loss, prediction_scores = outputs[:2]

                val_loss[f"Epoch {x} Step {i}"] = loss.item()
                all_val_loss[f"Epoch {x} Step {i}"] = loss.item()
                print(f'validation loss: {loss.item()}')
        
        if i%10 == 0:
            print('xxxxxxxxxxxxxx')
            y = torch.argmax(prediction_scores[0,:,:], dim=1).cpu().numpy() #batch x seq_len x num_tokens -> batch x seq_len
            argmax_to_save.append(y.tolist())
            print(type(argmax_to_save[0]))
            with open('saved_0624/argmax_saved.txt', 'w') as f:
                for ele in argmax_to_save:
                    f.write(str(ele)+'\n')

            del(y)

    torch.save(model.state_dict(), f"saved_0624/model/saved_model_epoch_{x}.pth")
    
    with open(f'saved_0624/saved_losses/training_loss_epoch_{x}.json', 'w') as f:
        f.write(json.dumps(training_loss))
    
    with open(f'saved_0624/saved_losses/val_loss_epoch_{x}.json', 'w') as f:
        f.write(json.dumps(val_loss))
    end = time.time()
    print(f"----------{(end-start)//60} min per epoch----------")
    

        
        

with open("saved_0624/saved_losses/training_loss_all.json", 'w') as f:
    f.write(json.dumps(all_training_loss))
with open("saved_0624/saved_losses/val_loss_all.json", 'w') as f:
    f.write(json.dumps(all_val_loss))

epoch 0
step 0
training loss: 2.8765008449554443
validation loss: 2.8412394523620605
xxxxxxxxxxxxxx
<class 'list'>
step 1
training loss: 2.884265184402466
step 2
training loss: 2.874288320541382
step 3
training loss: 2.856649160385132
step 4
training loss: 2.862579822540283
step 5
training loss: 2.8804471492767334
step 6
training loss: 2.8804876804351807
step 7
training loss: 2.8839030265808105
step 8
training loss: 2.868535041809082
step 9
training loss: 2.885714054107666
step 10
training loss: 2.8828983306884766
validation loss: 2.8691999912261963
xxxxxxxxxxxxxx
<class 'list'>
step 11
training loss: 2.844682216644287
step 12
training loss: 2.8486948013305664
step 13
training loss: 2.8649795055389404
step 14
training loss: 2.8672597408294678
step 15
training loss: 2.875302314758301
step 16
training loss: 2.8728182315826416
step 17
training loss: 2.873661756515503
step 18
training loss: 2.877591371536255
step 19
training loss: 2.873741388320923
step 20
training loss: 2.886409044265747


training loss: 2.873277425765991
step 170
training loss: 2.8667941093444824
validation loss: 2.830172061920166
xxxxxxxxxxxxxx
<class 'list'>
step 171
training loss: 2.8753302097320557
step 172
training loss: 2.85284161567688
step 173
training loss: 2.8603901863098145
step 174
training loss: 2.8387258052825928
step 175
training loss: 2.853752374649048
step 176
training loss: 2.860642671585083
step 177
training loss: 2.848814010620117
step 178
training loss: 2.8915841579437256
step 179
training loss: 2.8639743328094482
step 180
training loss: 2.855835437774658
validation loss: 2.8348782062530518
xxxxxxxxxxxxxx
<class 'list'>
step 181
training loss: 2.8677563667297363
step 182
training loss: 2.8688716888427734
step 183
training loss: 2.8593363761901855
step 184
training loss: 2.8514082431793213
step 185
training loss: 2.8339900970458984
step 186
training loss: 2.8253090381622314
step 187
training loss: 2.8785059452056885
step 188
training loss: 2.8570072650909424
step 189
training loss: 2

training loss: 2.8506546020507812
step 64
training loss: 2.8859617710113525
step 65
training loss: 2.8663883209228516
step 66
training loss: 2.8444063663482666
step 67
training loss: 2.8789453506469727
step 68
training loss: 2.8585915565490723
step 69
training loss: 2.8668394088745117
step 70
training loss: 2.8662843704223633
validation loss: 2.8824474811553955
xxxxxxxxxxxxxx
<class 'list'>
step 71
training loss: 2.8400392532348633
step 72
training loss: 2.877986431121826
step 73
training loss: 2.862908124923706
step 74
training loss: 2.8804614543914795
step 75
training loss: 2.8763272762298584
step 76
training loss: 2.897451400756836
step 77
training loss: 2.8809328079223633
step 78
training loss: 2.842601776123047
step 79
training loss: 2.879701852798462
step 80
training loss: 2.862748622894287
validation loss: 2.9303300380706787
xxxxxxxxxxxxxx
<class 'list'>
step 81
training loss: 2.855797290802002
step 82
training loss: 2.860722541809082
step 83
training loss: 2.859924077987671
ste

training loss: 2.8766047954559326
step 232
training loss: 2.8528780937194824
step 233
training loss: 2.8744068145751953
step 234
training loss: 2.8801419734954834
step 235
training loss: 2.8501930236816406
step 236
training loss: 2.8520689010620117
step 237
training loss: 2.8765742778778076
step 238
training loss: 2.865762948989868
step 239
training loss: 2.868450164794922
step 240
training loss: 2.8700549602508545
validation loss: 2.8671011924743652
xxxxxxxxxxxxxx
<class 'list'>
step 241
training loss: 2.8714592456817627
step 242
training loss: 2.8643758296966553
step 243
training loss: 2.8654067516326904
step 244
training loss: 2.8640589714050293
step 245
training loss: 2.8636748790740967
step 246
training loss: 2.860548734664917
step 247
training loss: 2.8278822898864746
step 248
training loss: 2.8555455207824707
step 249
training loss: 2.87644100189209
step 250
training loss: 2.855895757675171
validation loss: 2.8675179481506348
xxxxxxxxxxxxxx
<class 'list'>
step 251
training loss:

training loss: 2.892695665359497
step 128
training loss: 2.812361240386963
step 129
training loss: 2.864100456237793
step 130
training loss: 2.856313705444336
validation loss: 2.852213144302368
xxxxxxxxxxxxxx
<class 'list'>
step 131
training loss: 2.880898952484131
step 132
training loss: 2.8603715896606445
step 133
training loss: 2.884674549102783
step 134
training loss: 2.856658697128296
step 135
training loss: 2.8428714275360107
step 136
training loss: 2.8543310165405273
step 137
training loss: 2.8710803985595703
step 138
training loss: 2.8517658710479736
step 139
training loss: 2.882544755935669
step 140
training loss: 2.868260145187378
validation loss: 2.868175506591797
xxxxxxxxxxxxxx
<class 'list'>
step 141
training loss: 2.8277041912078857
step 142
training loss: 2.8546416759490967
step 143
training loss: 2.8589553833007812
step 144
training loss: 2.85919451713562
step 145
training loss: 2.871857166290283
step 146
training loss: 2.8695826530456543
step 147
training loss: 2.87129

training loss: 2.8747196197509766
step 22
training loss: 2.8738818168640137
step 23
training loss: 2.8866724967956543
step 24
training loss: 2.8671531677246094
step 25
training loss: 2.877380132675171
step 26
training loss: 2.8861358165740967
step 27
training loss: 2.8813419342041016
step 28
training loss: 2.875819444656372
step 29
training loss: 2.877717971801758
step 30
training loss: 2.859591007232666
validation loss: 2.8493006229400635
xxxxxxxxxxxxxx
<class 'list'>
step 31
training loss: 2.846294403076172
step 32
training loss: 2.8670737743377686
step 33
training loss: 2.8787028789520264
step 34
training loss: 2.8431503772735596
step 35
training loss: 2.866363763809204
step 36
training loss: 2.8715758323669434
step 37
training loss: 2.858286142349243
step 38
training loss: 2.884955406188965
step 39
training loss: 2.8578073978424072
step 40
training loss: 2.8765294551849365
validation loss: 2.8487048149108887
xxxxxxxxxxxxxx
<class 'list'>
step 41
training loss: 2.8563201427459717
st

training loss: 2.85819673538208
step 192
training loss: 2.885554075241089
step 193
training loss: 2.862257957458496
step 194
training loss: 2.8728606700897217
step 195
training loss: 2.8820619583129883
step 196
training loss: 2.860199213027954
step 197
training loss: 2.8581786155700684
step 198
training loss: 2.8688337802886963
step 199
training loss: 2.84708833694458
step 200
training loss: 2.8557472229003906
validation loss: 2.8601441383361816
xxxxxxxxxxxxxx
<class 'list'>
step 201
training loss: 2.8487300872802734
step 202
training loss: 2.852926254272461
step 203
training loss: 2.8422319889068604
step 204
training loss: 2.862717628479004
step 205
training loss: 2.864762544631958
step 206
training loss: 2.8481600284576416
step 207
training loss: 2.875333070755005
step 208
training loss: 2.855764389038086
step 209
training loss: 2.866276264190674
step 210
training loss: 2.860015869140625
validation loss: 2.864232301712036
xxxxxxxxxxxxxx
<class 'list'>
step 211
training loss: 2.870019

training loss: 2.877800464630127
step 88
training loss: 2.866589069366455
step 89
training loss: 2.8819758892059326
step 90
training loss: 2.8815674781799316
validation loss: 2.8430330753326416
xxxxxxxxxxxxxx
<class 'list'>
step 91
training loss: 2.8809802532196045
step 92
training loss: 2.8527932167053223
step 93
training loss: 2.868520736694336
step 94
training loss: 2.8667891025543213
step 95
training loss: 2.8578431606292725
step 96
training loss: 2.870490550994873
step 97
training loss: 2.845829486846924
step 98
training loss: 2.871175527572632
step 99
training loss: 2.8760905265808105
step 100
training loss: 2.820991277694702
validation loss: 2.872887134552002
xxxxxxxxxxxxxx
<class 'list'>
step 101
training loss: 2.859174966812134
step 102
training loss: 2.85251784324646
step 103
training loss: 2.8687665462493896
step 104
training loss: 2.8753135204315186
step 105
training loss: 2.847226619720459
step 106
training loss: 2.8712642192840576
step 107
training loss: 2.824121475219726

training loss: 2.8542749881744385
step 255
training loss: 2.8913674354553223
step 256
training loss: 2.869060516357422
step 257
training loss: 2.885226249694824
step 258
training loss: 2.886568069458008
step 259
training loss: 2.8731791973114014
step 260
training loss: 2.853893280029297
validation loss: 2.9126265048980713
xxxxxxxxxxxxxx
<class 'list'>
step 261
training loss: 2.8778929710388184
step 262
training loss: 2.8804280757904053
step 263
training loss: 2.872926950454712
step 264
training loss: 2.8712193965911865
step 265
training loss: 2.8791234493255615
step 266
training loss: 2.887068748474121
step 267
training loss: 2.8131625652313232
step 268
training loss: 2.8526172637939453
step 269
training loss: 2.8203816413879395
step 270
training loss: 2.853499412536621
validation loss: 2.8793318271636963
xxxxxxxxxxxxxx
<class 'list'>
step 271
training loss: 2.889162302017212
----------3.0 min per epoch----------
epoch 5
step 0
training loss: 2.879019021987915
validation loss: 2.870102

training loss: 2.868687152862549
validation loss: 2.831836462020874
xxxxxxxxxxxxxx
<class 'list'>
step 151
training loss: 2.8222434520721436
step 152
training loss: 2.866262197494507
step 153
training loss: 2.8567349910736084
step 154
training loss: 2.8699233531951904
step 155
training loss: 2.864001750946045
step 156
training loss: 2.876800775527954
step 157
training loss: 2.8713717460632324
step 158
training loss: 2.855459451675415
step 159
training loss: 2.8790135383605957
step 160
training loss: 2.8745951652526855
validation loss: 2.8561437129974365
xxxxxxxxxxxxxx
<class 'list'>
step 161
training loss: 2.865291118621826
step 162
training loss: 2.8619866371154785
step 163
training loss: 2.871971607208252
step 164
training loss: 2.860208034515381
step 165
training loss: 2.860464334487915
step 166
training loss: 2.8426156044006348
step 167
training loss: 2.8247764110565186
step 168
training loss: 2.8727543354034424
step 169
training loss: 2.8518388271331787
step 170
training loss: 2.8

training loss: 2.856233596801758
step 45
training loss: 2.8732645511627197
step 46
training loss: 2.824481964111328
step 47
training loss: 2.8841967582702637
step 48
training loss: 2.850383758544922
step 49
training loss: 2.8838253021240234
step 50
training loss: 2.8676812648773193
validation loss: 2.834606647491455
xxxxxxxxxxxxxx
<class 'list'>
step 51
training loss: 2.847787618637085
step 52
training loss: 2.8441193103790283
step 53
training loss: 2.867253303527832
step 54
training loss: 2.8560309410095215
step 55
training loss: 2.8503499031066895
step 56
training loss: 2.8826160430908203
step 57
training loss: 2.8705854415893555
step 58
training loss: 2.8608906269073486
step 59
training loss: 2.8466429710388184
step 60
training loss: 2.8607470989227295
validation loss: 2.8330957889556885
xxxxxxxxxxxxxx
<class 'list'>
step 61
training loss: 2.860153913497925
step 62
training loss: 2.836853265762329
step 63
training loss: 2.8523199558258057
step 64
training loss: 2.852707862854004
ste

training loss: 2.8651907444000244
step 213
training loss: 2.8599910736083984
step 214
training loss: 2.8672592639923096
step 215
training loss: 2.791482448577881
step 216
training loss: 2.8531887531280518
step 217
training loss: 2.8362646102905273
step 218
training loss: 2.8474106788635254
step 219
training loss: 2.8704140186309814
step 220
training loss: 2.874957799911499
validation loss: 2.8739750385284424
xxxxxxxxxxxxxx
<class 'list'>
step 221
training loss: 2.8658671379089355
step 222
training loss: 2.863823413848877
step 223
training loss: 2.8625080585479736
step 224
training loss: 2.8518190383911133
step 225
training loss: 2.8579909801483154
step 226
training loss: 2.8713927268981934
step 227
training loss: 2.8876900672912598
step 228
training loss: 2.8594391345977783
step 229
training loss: 2.847069025039673
step 230
training loss: 2.8497612476348877
validation loss: 2.9273736476898193
xxxxxxxxxxxxxx
<class 'list'>
step 231
training loss: 2.849602222442627
step 232
training loss

training loss: 2.8481788635253906
step 109
training loss: 2.870295763015747
step 110
training loss: 2.8245749473571777
validation loss: 2.8696444034576416
xxxxxxxxxxxxxx
<class 'list'>
step 111
training loss: 2.8570139408111572
step 112
training loss: 2.8758742809295654
step 113
training loss: 2.8697152137756348
step 114
training loss: 2.8611152172088623
step 115
training loss: 2.8508901596069336
step 116
training loss: 2.8699662685394287
step 117
training loss: 2.8532872200012207
step 118
training loss: 2.865222215652466
step 119
training loss: 2.874776601791382
step 120
training loss: 2.863044500350952
validation loss: 2.8613908290863037
xxxxxxxxxxxxxx
<class 'list'>
step 121
training loss: 2.8603100776672363
step 122
training loss: 2.860532283782959
step 123
training loss: 2.838073492050171
step 124
training loss: 2.843129873275757
step 125
training loss: 2.8358259201049805
step 126
training loss: 2.8682703971862793
step 127
training loss: 2.8743183612823486
step 128
training loss: 

training loss: 2.8527441024780273
step 2
training loss: 2.882432699203491
step 3
training loss: 2.8778421878814697
step 4
training loss: 2.881880521774292
step 5
training loss: 2.8768980503082275
step 6
training loss: 2.79657244682312
step 7
training loss: 2.75679874420166
step 8
training loss: 2.8699567317962646
step 9
training loss: 2.895054578781128
step 10
training loss: 2.874445676803589
validation loss: 2.8564181327819824
xxxxxxxxxxxxxx
<class 'list'>
step 11
training loss: 2.8510634899139404
step 12
training loss: 2.859750270843506
step 13
training loss: 2.878849506378174
step 14
training loss: 2.8781044483184814
step 15
training loss: 2.881213903427124
step 16
training loss: 2.864969253540039
step 17
training loss: 2.8825511932373047
step 18
training loss: 2.8808043003082275
step 19
training loss: 2.838575601577759
step 20
training loss: 2.8515563011169434
validation loss: 2.9123528003692627
xxxxxxxxxxxxxx
<class 'list'>
step 21
training loss: 2.859881639480591
step 22
training

training loss: 2.869741916656494
step 172
training loss: 2.8514418601989746
step 173
training loss: 2.85280704498291
step 174
training loss: 2.8669188022613525
step 175
training loss: 2.8974404335021973
step 176
training loss: 2.845162868499756
step 177
training loss: 2.872020959854126
step 178
training loss: 2.862950086593628
step 179
training loss: 2.8736045360565186
step 180
training loss: 2.8498663902282715
validation loss: 2.848552703857422
xxxxxxxxxxxxxx
<class 'list'>
step 181
training loss: 2.8623735904693604
step 182
training loss: 2.836585521697998
step 183
training loss: 2.8546528816223145
step 184
training loss: 2.8597521781921387
step 185
training loss: 2.8470163345336914
step 186
training loss: 2.8849472999572754
step 187
training loss: 2.8607661724090576
step 188
training loss: 2.854680299758911
step 189
training loss: 2.8697071075439453
step 190
training loss: 2.867969274520874
validation loss: 2.8473927974700928
xxxxxxxxxxxxxx
<class 'list'>
step 191
training loss: 2.8

training loss: 2.852001905441284
step 68
training loss: 2.8444371223449707
step 69
training loss: 2.836027145385742
step 70
training loss: 2.871952533721924
validation loss: 2.8596479892730713
xxxxxxxxxxxxxx
<class 'list'>
step 71
training loss: 2.8482277393341064
step 72
training loss: 2.8825759887695312
step 73
training loss: 2.864739179611206
step 74
training loss: 2.837594747543335
step 75
training loss: 2.878777265548706
step 76
training loss: 2.8537778854370117
step 77
training loss: 2.8631112575531006
step 78
training loss: 2.8662643432617188
step 79
training loss: 2.8382551670074463
step 80
training loss: 2.8768370151519775
validation loss: 2.8667256832122803
xxxxxxxxxxxxxx
<class 'list'>
step 81
training loss: 2.859714984893799
step 82
training loss: 2.879004955291748
step 83
training loss: 2.872551918029785
step 84
training loss: 2.8936753273010254
step 85
training loss: 2.8803749084472656
step 86
training loss: 2.836221218109131
step 87
training loss: 2.876047134399414
step 

training loss: 2.8629300594329834
step 236
training loss: 2.8649022579193115
step 237
training loss: 2.8566067218780518
step 238
training loss: 2.8680737018585205
step 239
training loss: 2.874706983566284
step 240
training loss: 2.8496358394622803
validation loss: 2.8376033306121826
xxxxxxxxxxxxxx
<class 'list'>
step 241
training loss: 2.873753309249878
step 242
training loss: 2.878024101257324
step 243
training loss: 2.84675669670105
step 244
training loss: 2.8485522270202637
step 245
training loss: 2.8714654445648193
step 246
training loss: 2.866328477859497
step 247
training loss: 2.8646693229675293
step 248
training loss: 2.8645901679992676
step 249
training loss: 2.869697332382202
step 250
training loss: 2.8590924739837646
validation loss: 2.8697052001953125
xxxxxxxxxxxxxx
<class 'list'>
step 251
training loss: 2.864380359649658
step 252
training loss: 2.8633921146392822
step 253
training loss: 2.859363317489624
step 254
training loss: 2.8584234714508057
step 255
training loss: 2.

training loss: 2.8626506328582764
step 132
training loss: 2.860990524291992
step 133
training loss: 2.8534603118896484
step 134
training loss: 2.872591733932495
step 135
training loss: 2.8930277824401855
step 136
training loss: 2.8121237754821777
step 137
training loss: 2.8629848957061768
step 138
training loss: 2.85371732711792
step 139
training loss: 2.880955696105957
step 140
training loss: 2.8599233627319336
validation loss: 2.8680548667907715
xxxxxxxxxxxxxx
<class 'list'>
step 141
training loss: 2.88057541847229
step 142
training loss: 2.8524169921875
step 143
training loss: 2.8417654037475586
step 144
training loss: 2.8566908836364746
step 145
training loss: 2.8684089183807373
step 146
training loss: 2.8519372940063477
step 147
training loss: 2.880066394805908
step 148
training loss: 2.863818883895874
step 149
training loss: 2.826843500137329
step 150
training loss: 2.85137939453125
validation loss: 2.8611972332000732
xxxxxxxxxxxxxx
<class 'list'>
step 151
training loss: 2.854907

training loss: 2.8755149841308594
step 27
training loss: 2.8708384037017822
step 28
training loss: 2.874042272567749
step 29
training loss: 2.8753416538238525
step 30
training loss: 2.86834716796875
validation loss: 2.8552892208099365
xxxxxxxxxxxxxx
<class 'list'>
step 31
training loss: 2.8838744163513184
step 32
training loss: 2.867870569229126
step 33
training loss: 2.8760297298431396
step 34
training loss: 2.8864212036132812
step 35
training loss: 2.878674268722534
step 36
training loss: 2.874648332595825
step 37
training loss: 2.874525785446167
step 38
training loss: 2.861377716064453
step 39
training loss: 2.838832139968872
step 40
training loss: 2.8636553287506104
validation loss: 2.8878321647644043
xxxxxxxxxxxxxx
<class 'list'>
step 41
training loss: 2.87392258644104
step 42
training loss: 2.8420042991638184
step 43
training loss: 2.8627748489379883
step 44
training loss: 2.8661162853240967
step 45
training loss: 2.852719783782959
step 46
training loss: 2.8826656341552734
step 4

training loss: 2.842832088470459
step 196
training loss: 2.830279588699341
step 197
training loss: 2.8161003589630127
step 198
training loss: 2.877310037612915
step 199
training loss: 2.85365629196167
step 200
training loss: 2.8810698986053467
validation loss: 2.8309249877929688
xxxxxxxxxxxxxx
<class 'list'>
step 201
training loss: 2.8563992977142334
step 202
training loss: 2.8663947582244873
step 203
training loss: 2.8824520111083984
step 204
training loss: 2.8562707901000977
step 205
training loss: 2.8553335666656494
step 206
training loss: 2.8673157691955566
step 207
training loss: 2.8439972400665283
step 208
training loss: 2.8565878868103027
step 209
training loss: 2.8449647426605225
step 210
training loss: 2.8496835231781006
validation loss: 2.834674835205078
xxxxxxxxxxxxxx
<class 'list'>
step 211
training loss: 2.841022491455078
step 212
training loss: 2.8595852851867676
step 213
training loss: 2.863034248352051
step 214
training loss: 2.8444812297821045
step 215
training loss: 2

training loss: 2.857875347137451
step 92
training loss: 2.851381778717041
step 93
training loss: 2.859490156173706
step 94
training loss: 2.859220266342163
step 95
training loss: 2.8752121925354004
step 96
training loss: 2.859644651412964
step 97
training loss: 2.8778839111328125
step 98
training loss: 2.8785717487335205
step 99
training loss: 2.8791401386260986
step 100
training loss: 2.854923725128174
validation loss: 2.938058376312256
xxxxxxxxxxxxxx
<class 'list'>
step 101
training loss: 2.868964910507202
step 102
training loss: 2.8639326095581055
step 103
training loss: 2.856088876724243
step 104
training loss: 2.868021249771118
step 105
training loss: 2.8425943851470947
step 106
training loss: 2.8688154220581055
step 107
training loss: 2.8745336532592773
step 108
training loss: 2.8140268325805664
step 109
training loss: 2.8565478324890137
step 110
training loss: 2.8489272594451904
validation loss: 2.8784568309783936
xxxxxxxxxxxxxx
<class 'list'>
step 111
training loss: 2.864469289

training loss: 2.8544607162475586
step 260
training loss: 2.872130870819092
validation loss: 2.8685691356658936
xxxxxxxxxxxxxx
<class 'list'>
step 261
training loss: 2.845947265625
step 262
training loss: 2.8565914630889893
step 263
training loss: 2.8863134384155273
step 264
training loss: 2.866023540496826
step 265
training loss: 2.87899112701416
step 266
training loss: 2.8795430660247803
step 267
training loss: 2.8684446811676025
step 268
training loss: 2.849461793899536
step 269
training loss: 2.87949538230896
step 270
training loss: 2.8793015480041504
validation loss: 2.8730297088623047
xxxxxxxxxxxxxx
<class 'list'>
step 271
training loss: 2.871115207672119
----------3.0 min per epoch----------
epoch 13
step 0
training loss: 2.866582155227661
validation loss: 2.8708558082580566
xxxxxxxxxxxxxx
<class 'list'>
step 1
training loss: 2.878298044204712
step 2
training loss: 2.8865487575531006
step 3
training loss: 2.810659646987915
step 4
training loss: 2.849714994430542
step 5
training 

training loss: 2.8542678356170654
step 155
training loss: 2.8581063747406006
step 156
training loss: 2.8692169189453125
step 157
training loss: 2.8662171363830566
step 158
training loss: 2.867429256439209
step 159
training loss: 2.8178155422210693
step 160
training loss: 2.8645615577697754
validation loss: 2.8619675636291504
xxxxxxxxxxxxxx
<class 'list'>
step 161
training loss: 2.8576571941375732
step 162
training loss: 2.8670501708984375
step 163
training loss: 2.86389422416687
step 164
training loss: 2.875312328338623
step 165
training loss: 2.8724050521850586
step 166
training loss: 2.853909492492676
step 167
training loss: 2.879732608795166
step 168
training loss: 2.875821352005005
step 169
training loss: 2.8654541969299316
step 170
training loss: 2.862208843231201
validation loss: 2.887432098388672
xxxxxxxxxxxxxx
<class 'list'>
step 171
training loss: 2.8700215816497803
step 172
training loss: 2.859670877456665
step 173
training loss: 2.859388589859009
step 174
training loss: 2.84

training loss: 2.8823037147521973
step 50
training loss: 2.8554575443267822
validation loss: 2.8451972007751465
xxxxxxxxxxxxxx
<class 'list'>
step 51
training loss: 2.8716344833374023
step 52
training loss: 2.857590675354004
step 53
training loss: 2.871788501739502
step 54
training loss: 2.8247227668762207
step 55
training loss: 2.88234281539917
step 56
training loss: 2.8492934703826904
step 57
training loss: 2.882511615753174
step 58
training loss: 2.866164445877075
step 59
training loss: 2.847994804382324
step 60
training loss: 2.8424460887908936
validation loss: 2.8515686988830566
xxxxxxxxxxxxxx
<class 'list'>
step 61
training loss: 2.8654680252075195
step 62
training loss: 2.853900671005249
step 63
training loss: 2.8476603031158447
step 64
training loss: 2.8796327114105225
step 65
training loss: 2.868882179260254
step 66
training loss: 2.860546112060547
step 67
training loss: 2.8426637649536133
step 68
training loss: 2.858417510986328
step 69
training loss: 2.85929799079895
step 70

training loss: 2.845919132232666
step 218
training loss: 2.8743677139282227
step 219
training loss: 2.853909492492676
step 220
training loss: 2.86250638961792
validation loss: 2.858293294906616
xxxxxxxxxxxxxx
<class 'list'>
step 221
training loss: 2.8596136569976807
step 222
training loss: 2.8676273822784424
step 223
training loss: 2.790422201156616
step 224
training loss: 2.8518595695495605
step 225
training loss: 2.833472728729248
step 226
training loss: 2.8488707542419434
step 227
training loss: 2.870349884033203
step 228
training loss: 2.8729500770568848
step 229
training loss: 2.862492084503174
step 230
training loss: 2.8614342212677
validation loss: 2.8645408153533936
xxxxxxxxxxxxxx
<class 'list'>
step 231
training loss: 2.860565662384033
step 232
training loss: 2.8494560718536377
step 233
training loss: 2.8542370796203613
step 234
training loss: 2.868074893951416
step 235
training loss: 2.88627290725708
step 236
training loss: 2.8567233085632324
step 237
training loss: 2.8459444

training loss: 2.855452060699463
step 113
training loss: 2.8486270904541016
step 114
training loss: 2.8649497032165527
step 115
training loss: 2.8735320568084717
step 116
training loss: 2.847102165222168
step 117
training loss: 2.866683006286621
step 118
training loss: 2.8202483654022217
step 119
training loss: 2.8545844554901123
step 120
training loss: 2.875633716583252
validation loss: 2.8695759773254395
xxxxxxxxxxxxxx
<class 'list'>
step 121
training loss: 2.8672049045562744
step 122
training loss: 2.8581974506378174
step 123
training loss: 2.848811388015747
step 124
training loss: 2.8668713569641113
step 125
training loss: 2.8534302711486816
step 126
training loss: 2.8634235858917236
step 127
training loss: 2.867556571960449
step 128
training loss: 2.861978769302368
step 129
training loss: 2.85662841796875
step 130
training loss: 2.86128306388855
validation loss: 2.875094175338745
xxxxxxxxxxxxxx
<class 'list'>
step 131
training loss: 2.8378031253814697
step 132
training loss: 2.842

training loss: 2.806103229522705
step 7
training loss: 2.8492369651794434
step 8
training loss: 2.8223278522491455
step 9
training loss: 2.8553740978240967
step 10
training loss: 2.880113363265991
validation loss: 2.875910520553589
xxxxxxxxxxxxxx
<class 'list'>
step 11
training loss: 2.874023199081421
step 12
training loss: 2.882389545440674
step 13
training loss: 2.8715128898620605
step 14
training loss: 2.80954909324646
step 15
training loss: 2.7729477882385254
step 16
training loss: 2.8452413082122803
step 17
training loss: 2.88606595993042
step 18
training loss: 2.869929075241089
step 19
training loss: 2.8541808128356934
step 20
training loss: 2.8607077598571777
validation loss: 2.86795711517334
xxxxxxxxxxxxxx
<class 'list'>
step 21
training loss: 2.875959873199463
step 22
training loss: 2.880720853805542
step 23
training loss: 2.8823304176330566
step 24
training loss: 2.8718206882476807
step 25
training loss: 2.8813514709472656
step 26
training loss: 2.880214214324951
step 27
trai

training loss: 2.860626220703125
step 176
training loss: 2.8600358963012695
step 177
training loss: 2.8378169536590576
step 178
training loss: 2.8218460083007812
step 179
training loss: 2.8699405193328857
step 180
training loss: 2.8500595092773438
validation loss: 2.852081298828125
xxxxxxxxxxxxxx
<class 'list'>
step 181
training loss: 2.848493814468384
step 182
training loss: 2.8627536296844482
step 183
training loss: 2.8938913345336914
step 184
training loss: 2.846104860305786
step 185
training loss: 2.8687167167663574
step 186
training loss: 2.8619308471679688
step 187
training loss: 2.8691153526306152
step 188
training loss: 2.8495278358459473
step 189
training loss: 2.859523296356201
step 190
training loss: 2.836057662963867
validation loss: 2.8806397914886475
xxxxxxxxxxxxxx
<class 'list'>
step 191
training loss: 2.8520569801330566
step 192
training loss: 2.8578617572784424
step 193
training loss: 2.846163749694824
step 194
training loss: 2.8856661319732666
step 195
training loss: 

validation loss: 2.8322200775146484
xxxxxxxxxxxxxx
<class 'list'>
step 71
training loss: 2.858602285385132
step 72
training loss: 2.859825372695923
step 73
training loss: 2.835875988006592
step 74
training loss: 2.8500208854675293
step 75
training loss: 2.852813720703125
step 76
training loss: 2.8463284969329834
step 77
training loss: 2.8352513313293457
step 78
training loss: 2.8715808391571045
step 79
training loss: 2.8439102172851562
step 80
training loss: 2.8782575130462646
validation loss: 2.8366241455078125
xxxxxxxxxxxxxx
<class 'list'>
step 81
training loss: 2.863248586654663
step 82
training loss: 2.839256525039673
step 83
training loss: 2.8759968280792236
step 84
training loss: 2.8509767055511475
step 85
training loss: 2.861400842666626
step 86
training loss: 2.8657026290893555
step 87
training loss: 2.830519676208496
step 88
training loss: 2.875486373901367
step 89
training loss: 2.8549885749816895
step 90
training loss: 2.876049041748047
validation loss: 2.8601696491241455
xx

training loss: 2.888251781463623
step 239
training loss: 2.856370687484741
step 240
training loss: 2.844534158706665
validation loss: 2.8719358444213867
xxxxxxxxxxxxxx
<class 'list'>
step 241
training loss: 2.8497612476348877
step 242
training loss: 2.84538197517395
step 243
training loss: 2.8616607189178467
step 244
training loss: 2.862687349319458
step 245
training loss: 2.856766939163208
step 246
training loss: 2.8659145832061768
step 247
training loss: 2.874772548675537
step 248
training loss: 2.850306510925293
step 249
training loss: 2.8715009689331055
step 250
training loss: 2.87861704826355
validation loss: 2.9394750595092773
xxxxxxxxxxxxxx
<class 'list'>
step 251
training loss: 2.8467113971710205
step 252
training loss: 2.8485894203186035
step 253
training loss: 2.871323347091675
step 254
training loss: 2.866347551345825
step 255
training loss: 2.865304470062256
step 256
training loss: 2.864797830581665
step 257
training loss: 2.8671343326568604
step 258
training loss: 2.856448

training loss: 2.861111879348755
step 134
training loss: 2.836315631866455
step 135
training loss: 2.840609073638916
step 136
training loss: 2.833108425140381
step 137
training loss: 2.868177890777588
step 138
training loss: 2.868908166885376
step 139
training loss: 2.8613975048065186
step 140
training loss: 2.8635551929473877
validation loss: 2.8595051765441895
xxxxxxxxxxxxxx
<class 'list'>
step 141
training loss: 2.8519933223724365
step 142
training loss: 2.8691182136535645
step 143
training loss: 2.8897714614868164
step 144
training loss: 2.8078410625457764
step 145
training loss: 2.860473871231079
step 146
training loss: 2.8504443168640137
step 147
training loss: 2.874675750732422
step 148
training loss: 2.8581480979919434
step 149
training loss: 2.8806533813476562
step 150
training loss: 2.855102062225342
validation loss: 2.8574795722961426
xxxxxxxxxxxxxx
<class 'list'>
step 151
training loss: 2.841458797454834
step 152
training loss: 2.853210687637329
step 153
training loss: 2.86

training loss: 2.882047176361084
step 29
training loss: 2.879563331604004
step 30
training loss: 2.8343002796173096
validation loss: 2.867541790008545
xxxxxxxxxxxxxx
<class 'list'>
step 31
training loss: 2.8435287475585938
step 32
training loss: 2.858217716217041
step 33
training loss: 2.858961582183838
step 34
training loss: 2.8731918334960938
step 35
training loss: 2.8672521114349365
step 36
training loss: 2.871000051498413
step 37
training loss: 2.87544584274292
step 38
training loss: 2.8684298992156982
step 39
training loss: 2.8867857456207275
step 40
training loss: 2.862078905105591
validation loss: 2.8918778896331787
xxxxxxxxxxxxxx
<class 'list'>
step 41
training loss: 2.87488055229187
step 42
training loss: 2.881377696990967
step 43
training loss: 2.8787710666656494
step 44
training loss: 2.871385097503662
step 45
training loss: 2.872255802154541
step 46
training loss: 2.858283758163452
step 47
training loss: 2.838721513748169
step 48
training loss: 2.8632521629333496
step 49
tr

training loss: 2.8449954986572266
step 197
training loss: 2.8865151405334473
step 198
training loss: 2.860305070877075
step 199
training loss: 2.846014976501465
step 200
training loss: 2.8611862659454346
validation loss: 2.8432388305664062
xxxxxxxxxxxxxx
<class 'list'>
step 201
training loss: 2.86582350730896
step 202
training loss: 2.852656126022339
step 203
training loss: 2.842578172683716
step 204
training loss: 2.828066110610962
step 205
training loss: 2.8177578449249268
step 206
training loss: 2.8749406337738037
step 207
training loss: 2.8539209365844727
step 208
training loss: 2.8810677528381348
step 209
training loss: 2.855323553085327
step 210
training loss: 2.864502191543579
validation loss: 2.85080623626709
xxxxxxxxxxxxxx
<class 'list'>
step 211
training loss: 2.87937068939209
step 212
training loss: 2.857297658920288
step 213
training loss: 2.8545918464660645
step 214
training loss: 2.865494966506958
step 215
training loss: 2.843116044998169
step 216
training loss: 2.8557283

training loss: 2.8764610290527344
step 92
training loss: 2.8547096252441406
step 93
training loss: 2.877103090286255
step 94
training loss: 2.8720102310180664
step 95
training loss: 2.8947558403015137
step 96
training loss: 2.879988431930542
step 97
training loss: 2.8330764770507812
step 98
training loss: 2.8723673820495605
step 99
training loss: 2.856837511062622
step 100
training loss: 2.8454232215881348
validation loss: 2.864048957824707
xxxxxxxxxxxxxx
<class 'list'>
step 101
training loss: 2.8570168018341064
step 102
training loss: 2.85697865486145
step 103
training loss: 2.8761987686157227
step 104
training loss: 2.860635995864868
step 105
training loss: 2.875906467437744
step 106
training loss: 2.875547170639038
step 107
training loss: 2.8787548542022705
step 108
training loss: 2.8563616275787354
step 109
training loss: 2.866788625717163
step 110
training loss: 2.8634777069091797
validation loss: 2.8711512088775635
xxxxxxxxxxxxxx
<class 'list'>
step 111
training loss: 2.854227542

training loss: 2.8674402236938477
validation loss: 2.835381031036377
xxxxxxxxxxxxxx
<class 'list'>
step 261
training loss: 2.858144998550415
step 262
training loss: 2.862593650817871
step 263
training loss: 2.861208200454712
step 264
training loss: 2.858579397201538
step 265
training loss: 2.8563411235809326
step 266
training loss: 2.8195271492004395
step 267
training loss: 2.852717161178589
step 268
training loss: 2.8666646480560303
step 269
training loss: 2.8427414894104004
step 270
training loss: 2.8507261276245117
validation loss: 2.874577760696411
xxxxxxxxxxxxxx
<class 'list'>
step 271
training loss: 2.8838655948638916
----------3.0 min per epoch----------
epoch 21
step 0
training loss: 2.8612804412841797
validation loss: 2.8778250217437744
xxxxxxxxxxxxxx
<class 'list'>
step 1
training loss: 2.875974655151367
step 2
training loss: 2.880326509475708
step 3
training loss: 2.8684113025665283
step 4
training loss: 2.8487067222595215
step 5
training loss: 2.8758528232574463
step 6
trai

training loss: 2.854078769683838
step 156
training loss: 2.8651504516601562
step 157
training loss: 2.851181983947754
step 158
training loss: 2.8766350746154785
step 159
training loss: 2.8613779544830322
step 160
training loss: 2.822190046310425
validation loss: 2.8649067878723145
xxxxxxxxxxxxxx
<class 'list'>
step 161
training loss: 2.850475788116455
step 162
training loss: 2.852842330932617
step 163
training loss: 2.8553504943847656
step 164
training loss: 2.8702051639556885
step 165
training loss: 2.8646230697631836
step 166
training loss: 2.8664116859436035
step 167
training loss: 2.8142189979553223
step 168
training loss: 2.86368727684021
step 169
training loss: 2.8561837673187256
step 170
training loss: 2.8646557331085205
validation loss: 2.860626697540283
xxxxxxxxxxxxxx
<class 'list'>
step 171
training loss: 2.8621551990509033
step 172
training loss: 2.87394118309021
step 173
training loss: 2.8679862022399902
step 174
training loss: 2.8504741191864014
step 175
training loss: 2.8

validation loss: 2.8567819595336914
xxxxxxxxxxxxxx
<class 'list'>
step 51
training loss: 2.8629026412963867
step 52
training loss: 2.875450611114502
step 53
training loss: 2.8436903953552246
step 54
training loss: 2.8614563941955566
step 55
training loss: 2.8644485473632812
step 56
training loss: 2.85160756111145
step 57
training loss: 2.882655143737793
step 58
training loss: 2.8562893867492676
step 59
training loss: 2.872227668762207
step 60
training loss: 2.855526924133301
validation loss: 2.8897624015808105
xxxxxxxxxxxxxx
<class 'list'>
step 61
training loss: 2.869401693344116
step 62
training loss: 2.8237216472625732
step 63
training loss: 2.8849294185638428
step 64
training loss: 2.8506650924682617
step 65
training loss: 2.881601572036743
step 66
training loss: 2.8638556003570557
step 67
training loss: 2.8489434719085693
step 68
training loss: 2.843010663986206
step 69
training loss: 2.863567352294922
step 70
training loss: 2.852151870727539
validation loss: 2.8796310424804688
xxx

training loss: 2.8548474311828613
step 220
training loss: 2.8451223373413086
validation loss: 2.8270175457000732
xxxxxxxxxxxxxx
<class 'list'>
step 221
training loss: 2.849687099456787
step 222
training loss: 2.8372139930725098
step 223
training loss: 2.8596231937408447
step 224
training loss: 2.864082098007202
step 225
training loss: 2.8460769653320312
step 226
training loss: 2.8747365474700928
step 227
training loss: 2.8497140407562256
step 228
training loss: 2.8626768589019775
step 229
training loss: 2.85783052444458
step 230
training loss: 2.867130756378174
validation loss: 2.8304944038391113
xxxxxxxxxxxxxx
<class 'list'>
step 231
training loss: 2.7859628200531006
step 232
training loss: 2.851454496383667
step 233
training loss: 2.830850839614868
step 234
training loss: 2.8531711101531982
step 235
training loss: 2.871666431427002
step 236
training loss: 2.874040126800537
step 237
training loss: 2.860222101211548
step 238
training loss: 2.8619775772094727
step 239
training loss: 2.8

training loss: 2.8567538261413574
step 115
training loss: 2.868561029434204
step 116
training loss: 2.845238208770752
step 117
training loss: 2.8695597648620605
step 118
training loss: 2.8727633953094482
step 119
training loss: 2.8130407333374023
step 120
training loss: 2.854809045791626
validation loss: 2.9403235912323
xxxxxxxxxxxxxx
<class 'list'>
step 121
training loss: 2.849914789199829
step 122
training loss: 2.864382028579712
step 123
training loss: 2.87166166305542
step 124
training loss: 2.8453781604766846
step 125
training loss: 2.868171453475952
step 126
training loss: 2.8150153160095215
step 127
training loss: 2.853200674057007
step 128
training loss: 2.874296188354492
step 129
training loss: 2.864577054977417
step 130
training loss: 2.8578062057495117
validation loss: 2.8775634765625
xxxxxxxxxxxxxx
<class 'list'>
step 131
training loss: 2.8473925590515137
step 132
training loss: 2.8641445636749268
step 133
training loss: 2.852015972137451
step 134
training loss: 2.862763166

training loss: 2.87581205368042
step 9
training loss: 2.8751912117004395
step 10
training loss: 2.869842529296875
validation loss: 2.868962287902832
xxxxxxxxxxxxxx
<class 'list'>
step 11
training loss: 2.8654489517211914
step 12
training loss: 2.8783457279205322
step 13
training loss: 2.8864896297454834
step 14
training loss: 2.804713487625122
step 15
training loss: 2.847635507583618
step 16
training loss: 2.821990728378296
step 17
training loss: 2.855341911315918
step 18
training loss: 2.8851871490478516
step 19
training loss: 2.8793632984161377
step 20
training loss: 2.8854923248291016
validation loss: 2.865246534347534
xxxxxxxxxxxxxx
<class 'list'>
step 21
training loss: 2.871211290359497
step 22
training loss: 2.794093370437622
step 23
training loss: 2.7514805793762207
step 24
training loss: 2.825425386428833
step 25
training loss: 2.8927485942840576
step 26
training loss: 2.8734281063079834
step 27
training loss: 2.8585972785949707
step 28
training loss: 2.860875129699707
step 29


training loss: 2.8528027534484863
step 178
training loss: 2.878232955932617
step 179
training loss: 2.875868082046509
step 180
training loss: 2.865525007247925
validation loss: 2.8589844703674316
xxxxxxxxxxxxxx
<class 'list'>
step 181
training loss: 2.861342430114746
step 182
training loss: 2.8680717945098877
step 183
training loss: 2.8592958450317383
step 184
training loss: 2.8565709590911865
step 185
training loss: 2.836099147796631
step 186
training loss: 2.817620277404785
step 187
training loss: 2.8685927391052246
step 188
training loss: 2.8489255905151367
step 189
training loss: 2.848992109298706
step 190
training loss: 2.8636245727539062
validation loss: 2.883911609649658
xxxxxxxxxxxxxx
<class 'list'>
step 191
training loss: 2.8920094966888428
step 192
training loss: 2.843153953552246
step 193
training loss: 2.870297431945801
step 194
training loss: 2.8605425357818604
step 195
training loss: 2.870408058166504
step 196
training loss: 2.8462307453155518
step 197
training loss: 2.85

training loss: 2.8424153327941895
step 72
training loss: 2.8628690242767334
step 73
training loss: 2.853849411010742
step 74
training loss: 2.8449854850769043
step 75
training loss: 2.878662586212158
step 76
training loss: 2.8663039207458496
step 77
training loss: 2.8577773571014404
step 78
training loss: 2.8432676792144775
step 79
training loss: 2.8573875427246094
step 80
training loss: 2.8586227893829346
validation loss: 2.847179889678955
xxxxxxxxxxxxxx
<class 'list'>
step 81
training loss: 2.834869861602783
step 82
training loss: 2.848605155944824
step 83
training loss: 2.85044527053833
step 84
training loss: 2.8432421684265137
step 85
training loss: 2.8350183963775635
step 86
training loss: 2.873769760131836
step 87
training loss: 2.8438568115234375
step 88
training loss: 2.8787841796875
step 89
training loss: 2.8600873947143555
step 90
training loss: 2.8388259410858154
validation loss: 2.8321456909179688
xxxxxxxxxxxxxx
<class 'list'>
step 91
training loss: 2.874864339828491
step 9

training loss: 2.861421585083008
validation loss: 2.8534576892852783
xxxxxxxxxxxxxx
<class 'list'>
step 241
training loss: 2.86000919342041
step 242
training loss: 2.858285665512085
step 243
training loss: 2.848806381225586
step 244
training loss: 2.855424165725708
step 245
training loss: 2.8683271408081055
step 246
training loss: 2.8837697505950928
step 247
training loss: 2.8541855812072754
step 248
training loss: 2.8463776111602783
step 249
training loss: 2.848801851272583
step 250
training loss: 2.8460028171539307
validation loss: 2.8633832931518555
xxxxxxxxxxxxxx
<class 'list'>
step 251
training loss: 2.858916997909546
step 252
training loss: 2.860872268676758
step 253
training loss: 2.855605125427246
step 254
training loss: 2.864924430847168
step 255
training loss: 2.872281074523926
step 256
training loss: 2.8464314937591553
step 257
training loss: 2.8722143173217773
step 258
training loss: 2.8753912448883057
step 259
training loss: 2.8442037105560303
step 260
training loss: 2.846

training loss: 2.865776538848877
step 136
training loss: 2.8504104614257812
step 137
training loss: 2.8628244400024414
step 138
training loss: 2.8661468029022217
step 139
training loss: 2.860116720199585
step 140
training loss: 2.8585681915283203
validation loss: 2.8699817657470703
xxxxxxxxxxxxxx
<class 'list'>
step 141
training loss: 2.8591361045837402
step 142
training loss: 2.8346686363220215
step 143
training loss: 2.84079909324646
step 144
training loss: 2.831287145614624
step 145
training loss: 2.8650453090667725
step 146
training loss: 2.867943525314331
step 147
training loss: 2.8629343509674072
step 148
training loss: 2.8595118522644043
step 149
training loss: 2.8511977195739746
step 150
training loss: 2.870926856994629
validation loss: 2.8738467693328857
xxxxxxxxxxxxxx
<class 'list'>
step 151
training loss: 2.8890886306762695
step 152
training loss: 2.807220220565796
step 153
training loss: 2.8582725524902344
step 154
training loss: 2.8520147800445557
step 155
training loss: 2

training loss: 2.8606910705566406
validation loss: 2.8774619102478027
xxxxxxxxxxxxxx
<class 'list'>
step 31
training loss: 2.86112117767334
step 32
training loss: 2.8768908977508545
step 33
training loss: 2.8785479068756104
step 34
training loss: 2.880363702774048
step 35
training loss: 2.868253707885742
step 36
training loss: 2.880444288253784
step 37
training loss: 2.8818092346191406
step 38
training loss: 2.839642286300659
step 39
training loss: 2.855112075805664
step 40
training loss: 2.8583083152770996
validation loss: 2.868551731109619
xxxxxxxxxxxxxx
<class 'list'>
step 41
training loss: 2.86562180519104
step 42
training loss: 2.876147985458374
step 43
training loss: 2.8743085861206055
step 44
training loss: 2.875600576400757
step 45
training loss: 2.8796305656433105
step 46
training loss: 2.86865234375
step 47
training loss: 2.885824680328369
step 48
training loss: 2.8641233444213867
step 49
training loss: 2.8744378089904785
step 50
training loss: 2.882054567337036
validation lo

training loss: 2.847015619277954
step 200
training loss: 2.8586673736572266
validation loss: 2.852379560470581
xxxxxxxxxxxxxx
<class 'list'>
step 201
training loss: 2.834150791168213
step 202
training loss: 2.848288059234619
step 203
training loss: 2.859905958175659
step 204
training loss: 2.8434441089630127
step 205
training loss: 2.882152557373047
step 206
training loss: 2.8582897186279297
step 207
training loss: 2.8477768898010254
step 208
training loss: 2.859421491622925
step 209
training loss: 2.8673548698425293
step 210
training loss: 2.8517470359802246
validation loss: 2.8771228790283203
xxxxxxxxxxxxxx
<class 'list'>
step 211
training loss: 2.840052843093872
step 212
training loss: 2.829554796218872
step 213
training loss: 2.809110641479492
step 214
training loss: 2.8734567165374756
step 215
training loss: 2.85198974609375
step 216
training loss: 2.876279830932617
step 217
training loss: 2.852438449859619
step 218
training loss: 2.86291766166687
step 219
training loss: 2.8798794

training loss: 2.8749818801879883
step 95
training loss: 2.8478431701660156
step 96
training loss: 2.8568079471588135
step 97
training loss: 2.8619937896728516
step 98
training loss: 2.832536220550537
step 99
training loss: 2.875316858291626
step 100
training loss: 2.8539843559265137
validation loss: 2.832261562347412
xxxxxxxxxxxxxx
<class 'list'>
step 101
training loss: 2.875047445297241
step 102
training loss: 2.870121717453003
step 103
training loss: 2.8932344913482666
step 104
training loss: 2.8777687549591064
step 105
training loss: 2.833036422729492
step 106
training loss: 2.874776601791382
step 107
training loss: 2.8582401275634766
step 108
training loss: 2.846431016921997
step 109
training loss: 2.8551125526428223
step 110
training loss: 2.8552610874176025
validation loss: 2.856982469558716
xxxxxxxxxxxxxx
<class 'list'>
step 111
training loss: 2.8772878646850586
step 112
training loss: 2.8612396717071533
step 113
training loss: 2.8764121532440186
step 114
training loss: 2.87477

step 261
training loss: 2.877460479736328
step 262
training loss: 2.841899871826172
step 263
training loss: 2.844161033630371
step 264
training loss: 2.8694303035736084
step 265
training loss: 2.8648974895477295
step 266
training loss: 2.8626363277435303
step 267
training loss: 2.8640193939208984
step 268
training loss: 2.865780830383301
step 269
training loss: 2.8551862239837646
step 270
training loss: 2.8643808364868164
validation loss: 2.9321935176849365
xxxxxxxxxxxxxx
<class 'list'>
step 271
training loss: 2.8613359928131104
----------3.0 min per epoch----------
epoch 29
step 0
training loss: 2.857590913772583
validation loss: 2.875819206237793
xxxxxxxxxxxxxx
<class 'list'>
step 1
training loss: 2.853428363800049
step 2
training loss: 2.8186938762664795
step 3
training loss: 2.849607467651367
step 4
training loss: 2.8710107803344727
step 5
training loss: 2.84588623046875
step 6
training loss: 2.847604513168335
step 7
training loss: 2.886948823928833
step 8
training loss: 2.86251306

training loss: 2.850273370742798
step 158
training loss: 2.874471664428711
step 159
training loss: 2.8586015701293945
step 160
training loss: 2.878098487854004
validation loss: 2.8619844913482666
xxxxxxxxxxxxxx
<class 'list'>
step 161
training loss: 2.8507466316223145
step 162
training loss: 2.838991641998291
step 163
training loss: 2.8528153896331787
step 164
training loss: 2.8636021614074707
step 165
training loss: 2.8504397869110107
step 166
training loss: 2.8725180625915527
step 167
training loss: 2.858463764190674
step 168
training loss: 2.8240816593170166
step 169
training loss: 2.850292921066284
step 170
training loss: 2.853926420211792
validation loss: 2.8554160594940186
xxxxxxxxxxxxxx
<class 'list'>
step 171
training loss: 2.854275941848755
step 172
training loss: 2.8680126667022705
step 173
training loss: 2.8637564182281494
step 174
training loss: 2.8641738891601562
step 175
training loss: 2.815739154815674
step 176
training loss: 2.8622963428497314
step 177
training loss: 2.

step 51
training loss: 2.863931655883789
step 52
training loss: 2.8718273639678955
step 53
training loss: 2.881913185119629
step 54
training loss: 2.875248908996582
step 55
training loss: 2.8732354640960693
step 56
training loss: 2.873194694519043
step 57
training loss: 2.8566746711730957
step 58
training loss: 2.8358561992645264
step 59
training loss: 2.8599720001220703
step 60
training loss: 2.8708224296569824
validation loss: 2.890307903289795
xxxxxxxxxxxxxx
<class 'list'>
step 61
training loss: 2.8473222255706787
step 62
training loss: 2.860208511352539
step 63
training loss: 2.8601906299591064
step 64
training loss: 2.848677158355713
step 65
training loss: 2.8785855770111084
step 66
training loss: 2.853732109069824
step 67
training loss: 2.8694558143615723
step 68
training loss: 2.8526508808135986
step 69
training loss: 2.8685739040374756
step 70
training loss: 2.8254506587982178
validation loss: 2.8129398822784424
xxxxxxxxxxxxxx
<class 'list'>
step 71
training loss: 2.88255453109

validation loss: 2.8426921367645264
xxxxxxxxxxxxxx
<class 'list'>
step 221
training loss: 2.861945152282715
step 222
training loss: 2.880017042160034
step 223
training loss: 2.8581323623657227
step 224
training loss: 2.848902940750122
step 225
training loss: 2.8639795780181885
step 226
training loss: 2.8438620567321777
step 227
training loss: 2.8513739109039307
step 228
training loss: 2.842378854751587
step 229
training loss: 2.845222234725952
step 230
training loss: 2.8342409133911133
validation loss: 2.841132164001465
xxxxxxxxxxxxxx
<class 'list'>
step 231
training loss: 2.857668399810791
step 232
training loss: 2.8587899208068848
step 233
training loss: 2.8448233604431152
step 234
training loss: 2.873558282852173
step 235
training loss: 2.8481009006500244
step 236
training loss: 2.860382318496704
step 237
training loss: 2.849712610244751
step 238
training loss: 2.8653628826141357
step 239
training loss: 2.790239095687866
step 240
training loss: 2.8497371673583984
validation loss: 2.

training loss: 2.874863624572754
step 117
training loss: 2.874225378036499
step 118
training loss: 2.878387212753296
step 119
training loss: 2.8541297912597656
step 120
training loss: 2.8653066158294678
validation loss: 2.8618662357330322
xxxxxxxxxxxxxx
<class 'list'>
step 121
training loss: 2.8616139888763428
step 122
training loss: 2.853449583053589
step 123
training loss: 2.8664252758026123
step 124
training loss: 2.840707540512085
step 125
training loss: 2.8660101890563965
step 126
training loss: 2.8730735778808594
step 127
training loss: 2.8106138706207275
step 128
training loss: 2.8523874282836914
step 129
training loss: 2.849522352218628
step 130
training loss: 2.8602135181427
validation loss: 2.8655545711517334
xxxxxxxxxxxxxx
<class 'list'>
step 131
training loss: 2.872084140777588
step 132
training loss: 2.842721462249756
step 133
training loss: 2.8640449047088623
step 134
training loss: 2.813185930252075
step 135
training loss: 2.8483877182006836
step 136
training loss: 2.873

validation loss: 2.87368106842041
xxxxxxxxxxxxxx
<class 'list'>
step 11
training loss: 2.860579013824463
step 12
training loss: 2.8800771236419678
step 13
training loss: 2.8801708221435547
step 14
training loss: 2.865360975265503
step 15
training loss: 2.846527576446533
step 16
training loss: 2.87396240234375
step 17
training loss: 2.874035358428955
step 18
training loss: 2.867892265319824
step 19
training loss: 2.8614892959594727
step 20
training loss: 2.875070095062256
validation loss: 2.878805160522461
xxxxxxxxxxxxxx
<class 'list'>
step 21
training loss: 2.885056495666504
step 22
training loss: 2.802263021469116
step 23
training loss: 2.847935199737549
step 24
training loss: 2.795982837677002
step 25
training loss: 2.8382928371429443
step 26
training loss: 2.8777849674224854
step 27
training loss: 2.8714396953582764
step 28
training loss: 2.8796684741973877
step 29
training loss: 2.863495349884033
step 30
training loss: 2.767885446548462
validation loss: 2.8424696922302246
xxxxxxxxx

training loss: 2.8570356369018555
validation loss: 2.864009380340576
xxxxxxxxxxxxxx
<class 'list'>
step 181
training loss: 2.863123655319214
step 182
training loss: 2.8643739223480225
step 183
training loss: 2.8739476203918457
step 184
training loss: 2.8676276206970215
step 185
training loss: 2.8524303436279297
step 186
training loss: 2.878549575805664
step 187
training loss: 2.874939441680908
step 188
training loss: 2.8644325733184814
step 189
training loss: 2.860069990158081
step 190
training loss: 2.8680527210235596
validation loss: 2.8591787815093994
xxxxxxxxxxxxxx
<class 'list'>
step 191
training loss: 2.8582799434661865
step 192
training loss: 2.858106851577759
step 193
training loss: 2.841233968734741
step 194
training loss: 2.822082757949829
step 195
training loss: 2.868927240371704
step 196
training loss: 2.849254608154297
step 197
training loss: 2.8474209308624268
step 198
training loss: 2.862457752227783
step 199
training loss: 2.8919003009796143
step 200
training loss: 2.84

training loss: 2.883711576461792
step 75
training loss: 2.847109794616699
step 76
training loss: 2.8804426193237305
step 77
training loss: 2.864095687866211
step 78
training loss: 2.846803903579712
step 79
training loss: 2.844202995300293
step 80
training loss: 2.8631017208099365
validation loss: 2.8852946758270264
xxxxxxxxxxxxxx
<class 'list'>
step 81
training loss: 2.8514721393585205
step 82
training loss: 2.8448851108551025
step 83
training loss: 2.876770257949829
step 84
training loss: 2.867173433303833
step 85
training loss: 2.859238862991333
step 86
training loss: 2.8443145751953125
step 87
training loss: 2.857072353363037
step 88
training loss: 2.858262062072754
step 89
training loss: 2.8342983722686768
step 90
training loss: 2.849125623703003
validation loss: 2.8723459243774414
xxxxxxxxxxxxxx
<class 'list'>
step 91
training loss: 2.8478708267211914
step 92
training loss: 2.8418078422546387
step 93
training loss: 2.8351104259490967
step 94
training loss: 2.870709180831909
step 9

training loss: 2.8645637035369873
step 242
training loss: 2.791351318359375
step 243
training loss: 2.847930908203125
step 244
training loss: 2.834197521209717
step 245
training loss: 2.8445045948028564
step 246
training loss: 2.868563175201416
step 247
training loss: 2.8712735176086426
step 248
training loss: 2.858983039855957
step 249
training loss: 2.8622920513153076
step 250
training loss: 2.858715295791626
validation loss: 2.8166136741638184
xxxxxxxxxxxxxx
<class 'list'>
step 251
training loss: 2.8470559120178223
step 252
training loss: 2.853198528289795
step 253
training loss: 2.8701882362365723
step 254
training loss: 2.8860342502593994
step 255
training loss: 2.8541109561920166
step 256
training loss: 2.8425161838531494
step 257
training loss: 2.8474843502044678
step 258
training loss: 2.844682216644287
step 259
training loss: 2.8603806495666504
step 260
training loss: 2.860314130783081
validation loss: 2.855701208114624
xxxxxxxxxxxxxx
<class 'list'>
step 261
training loss: 2.8

training loss: 2.8478312492370605
step 139
training loss: 2.872283935546875
step 140
training loss: 2.8654415607452393
validation loss: 2.937892436981201
xxxxxxxxxxxxxx
<class 'list'>
step 141
training loss: 2.855825424194336
step 142
training loss: 2.8466930389404297
step 143
training loss: 2.8653481006622314
step 144
training loss: 2.8513267040252686
step 145
training loss: 2.859710693359375
step 146
training loss: 2.8664968013763428
step 147
training loss: 2.8612515926361084
step 148
training loss: 2.8566439151763916
step 149
training loss: 2.8580026626586914
step 150
training loss: 2.8324201107025146
validation loss: 2.875467300415039
xxxxxxxxxxxxxx
<class 'list'>
step 151
training loss: 2.8401505947113037
step 152
training loss: 2.832828998565674
step 153
training loss: 2.8660292625427246
step 154
training loss: 2.8674654960632324
step 155
training loss: 2.8616738319396973
step 156
training loss: 2.860337495803833
step 157
training loss: 2.8516159057617188
step 158
training loss: 

step 31
training loss: 2.8857545852661133
step 32
training loss: 2.8693959712982178
step 33
training loss: 2.7539725303649902
step 34
training loss: 2.6901683807373047
step 35
training loss: 2.7986226081848145
step 36
training loss: 2.8849990367889404
step 37
training loss: 2.8714756965637207
step 38
training loss: 2.8540751934051514
step 39
training loss: 2.863403797149658
step 40
training loss: 2.874438524246216
validation loss: 2.8657476902008057
xxxxxxxxxxxxxx
<class 'list'>
step 41
training loss: 2.875702381134033
step 42
training loss: 2.8774163722991943
step 43
training loss: 2.8618595600128174
step 44
training loss: 2.88016676902771
step 45
training loss: 2.8805012702941895
step 46
training loss: 2.83479642868042
step 47
training loss: 2.8502070903778076
step 48
training loss: 2.8562493324279785
step 49
training loss: 2.8595452308654785
step 50
training loss: 2.8729255199432373
validation loss: 2.8657212257385254
xxxxxxxxxxxxxx
<class 'list'>
step 51
training loss: 2.8669586181

validation loss: 2.8559865951538086
xxxxxxxxxxxxxx
<class 'list'>
step 201
training loss: 2.86027455329895
step 202
training loss: 2.889039993286133
step 203
training loss: 2.842430830001831
step 204
training loss: 2.8709945678710938
step 205
training loss: 2.8613836765289307
step 206
training loss: 2.870210647583008
step 207
training loss: 2.8474273681640625
step 208
training loss: 2.8596408367156982
step 209
training loss: 2.834928035736084
step 210
training loss: 2.848529577255249
validation loss: 2.8925633430480957
xxxxxxxxxxxxxx
<class 'list'>
step 211
training loss: 2.8568694591522217
step 212
training loss: 2.8428568840026855
step 213
training loss: 2.884899854660034
step 214
training loss: 2.8578341007232666
step 215
training loss: 2.849872350692749
step 216
training loss: 2.8597514629364014
step 217
training loss: 2.8641927242279053
step 218
training loss: 2.849849224090576
step 219
training loss: 2.8412485122680664
step 220
training loss: 2.8276805877685547
validation loss: 2

training loss: 2.8332626819610596
step 97
training loss: 2.867988348007202
step 98
training loss: 2.841278553009033
step 99
training loss: 2.8753538131713867
step 100
training loss: 2.8588380813598633
validation loss: 2.8394203186035156
xxxxxxxxxxxxxx
<class 'list'>
step 101
training loss: 2.837665319442749
step 102
training loss: 2.8737449645996094
step 103
training loss: 2.8510138988494873
step 104
training loss: 2.8580093383789062
step 105
training loss: 2.862027406692505
step 106
training loss: 2.829929828643799
step 107
training loss: 2.8741211891174316
step 108
training loss: 2.853858232498169
step 109
training loss: 2.8748679161071777
step 110
training loss: 2.8698065280914307
validation loss: 2.8293075561523438
xxxxxxxxxxxxxx
<class 'list'>
step 111
training loss: 2.8914318084716797
step 112
training loss: 2.8775243759155273
step 113
training loss: 2.8324363231658936
step 114
training loss: 2.869553565979004
step 115
training loss: 2.8549392223358154
step 116
training loss: 2.8

training loss: 2.855757713317871
step 264
training loss: 2.8464958667755127
step 265
training loss: 2.866717576980591
step 266
training loss: 2.873157501220703
step 267
training loss: 2.846836566925049
step 268
training loss: 2.8722457885742188
step 269
training loss: 2.875079393386841
step 270
training loss: 2.8444576263427734
validation loss: 2.8553378582000732
xxxxxxxxxxxxxx
<class 'list'>
step 271
training loss: 2.8469693660736084
----------3.0 min per epoch----------
epoch 37
step 0
training loss: 2.869276285171509
validation loss: 2.8647539615631104
xxxxxxxxxxxxxx
<class 'list'>
step 1
training loss: 2.8628830909729004
step 2
training loss: 2.8613266944885254
step 3
training loss: 2.863750457763672
step 4
training loss: 2.864367723464966
step 5
training loss: 2.856848955154419
step 6
training loss: 2.8624696731567383
step 7
training loss: 2.859600782394409
step 8
training loss: 2.8477542400360107
step 9
training loss: 2.848332166671753
step 10
training loss: 2.8114852905273438
va

training loss: 2.8507537841796875
validation loss: 2.869924306869507
xxxxxxxxxxxxxx
<class 'list'>
step 161
training loss: 2.865739107131958
step 162
training loss: 2.8867757320404053
step 163
training loss: 2.8077688217163086
step 164
training loss: 2.856053590774536
step 165
training loss: 2.8512885570526123
step 166
training loss: 2.8727147579193115
step 167
training loss: 2.8548271656036377
step 168
training loss: 2.8763725757598877
step 169
training loss: 2.851839542388916
step 170
training loss: 2.8426380157470703
validation loss: 2.8732407093048096
xxxxxxxxxxxxxx
<class 'list'>
step 171
training loss: 2.8521971702575684
step 172
training loss: 2.8645410537719727
step 173
training loss: 2.8505218029022217
step 174
training loss: 2.871896505355835
step 175
training loss: 2.8572144508361816
step 176
training loss: 2.8220713138580322
step 177
training loss: 2.8499069213867188
step 178
training loss: 2.850280284881592
step 179
training loss: 2.8487496376037598
step 180
training loss:

training loss: 2.8652939796447754
step 55
training loss: 2.8755850791931152
step 56
training loss: 2.8751845359802246
step 57
training loss: 2.8690288066864014
step 58
training loss: 2.8872363567352295
step 59
training loss: 2.8663036823272705
step 60
training loss: 2.872039794921875
validation loss: 2.8631396293640137
xxxxxxxxxxxxxx
<class 'list'>
step 61
training loss: 2.882110118865967
step 62
training loss: 2.8772778511047363
step 63
training loss: 2.870805025100708
step 64
training loss: 2.8745086193084717
step 65
training loss: 2.859215259552002
step 66
training loss: 2.8373684883117676
step 67
training loss: 2.8626065254211426
step 68
training loss: 2.8741302490234375
step 69
training loss: 2.8421671390533447
step 70
training loss: 2.860039472579956
validation loss: 2.8275561332702637
xxxxxxxxxxxxxx
<class 'list'>
step 71
training loss: 2.8633384704589844
step 72
training loss: 2.8483896255493164
step 73
training loss: 2.880664110183716
step 74
training loss: 2.8550825119018555


training loss: 2.8391237258911133
step 223
training loss: 2.82356595993042
step 224
training loss: 2.815164089202881
step 225
training loss: 2.876331329345703
step 226
training loss: 2.8553380966186523
step 227
training loss: 2.8785111904144287
step 228
training loss: 2.852041482925415
step 229
training loss: 2.859849214553833
step 230
training loss: 2.8782379627227783
validation loss: 2.8767387866973877
xxxxxxxxxxxxxx
<class 'list'>
step 231
training loss: 2.858262777328491
step 232
training loss: 2.843719005584717
step 233
training loss: 2.8590409755706787
step 234
training loss: 2.843041181564331
step 235
training loss: 2.853226900100708
step 236
training loss: 2.844680070877075
step 237
training loss: 2.8417537212371826
step 238
training loss: 2.833395004272461
step 239
training loss: 2.855253219604492
step 240
training loss: 2.8565287590026855
validation loss: 2.8742165565490723
xxxxxxxxxxxxxx
<class 'list'>
step 241
training loss: 2.8456361293792725
step 242
training loss: 2.8710

training loss: 2.8458597660064697
step 120
training loss: 2.8537704944610596
validation loss: 2.819408655166626
xxxxxxxxxxxxxx
<class 'list'>
step 121
training loss: 2.856252670288086
step 122
training loss: 2.8743338584899902
step 123
training loss: 2.8588085174560547
step 124
training loss: 2.8762454986572266
step 125
training loss: 2.8738770484924316
step 126
training loss: 2.8783910274505615
step 127
training loss: 2.851677179336548
step 128
training loss: 2.8704214096069336
step 129
training loss: 2.8610620498657227
step 130
training loss: 2.853886365890503
validation loss: 2.8564889430999756
xxxxxxxxxxxxxx
<class 'list'>
step 131
training loss: 2.864698648452759
step 132
training loss: 2.838554859161377
step 133
training loss: 2.866264581680298
step 134
training loss: 2.871492624282837
step 135
training loss: 2.810499668121338
step 136
training loss: 2.8513383865356445
step 137
training loss: 2.8495984077453613
step 138
training loss: 2.8641164302825928
step 139
training loss: 2.

In [18]:
torch.argmax(prediction_scores, dim=2)[:,0:15]

tensor([[20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 13, 13, 20],
        [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 20, 20, 20, 13, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 12, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 13, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20],
        [20, 20, 20, 20, 12, 20, 20, 20, 20, 13, 13, 20, 13, 20, 20],
        [20, 20, 20,